summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/.gitignore1
-rw-r--r--arch/powerpc/kernel/85xx_entry_mapping.S (renamed from arch/powerpc/kernel/fsl_booke_entry_mapping.S)8
-rw-r--r--arch/powerpc/kernel/Makefile118
-rw-r--r--arch/powerpc/kernel/align.c97
-rw-r--r--arch/powerpc/kernel/asm-offsets.c276
-rw-r--r--arch/powerpc/kernel/audit.c15
-rw-r--r--arch/powerpc/kernel/audit_32.h7
-rw-r--r--arch/powerpc/kernel/btext.c406
-rw-r--r--arch/powerpc/kernel/cacheinfo.c180
-rw-r--r--arch/powerpc/kernel/compat_audit.c15
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S28
-rw-r--r--arch/powerpc/kernel/cpu_setup_e500.S (renamed from arch/powerpc/kernel/cpu_setup_fsl_booke.S)23
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S219
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.c288
-rw-r--r--arch/powerpc/kernel/cpu_specs.h29
-rw-r--r--arch/powerpc/kernel/cpu_specs_40x.h280
-rw-r--r--arch/powerpc/kernel/cpu_specs_44x.h304
-rw-r--r--arch/powerpc/kernel/cpu_specs_47x.h74
-rw-r--r--arch/powerpc/kernel/cpu_specs_85xx.h57
-rw-r--r--arch/powerpc/kernel/cpu_specs_8xx.h23
-rw-r--r--arch/powerpc/kernel/cpu_specs_book3s_32.h605
-rw-r--r--arch/powerpc/kernel/cpu_specs_book3s_64.h496
-rw-r--r--arch/powerpc/kernel/cpu_specs_e500mc.h75
-rw-r--r--arch/powerpc/kernel/cputable.c2152
-rw-r--r--arch/powerpc/kernel/crash_dump.c56
-rw-r--r--arch/powerpc/kernel/dawr.c33
-rw-r--r--arch/powerpc/kernel/dbell.c67
-rw-r--r--arch/powerpc/kernel/dma-iommu.c165
-rw-r--r--arch/powerpc/kernel/dma-mask.c1
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c1
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c170
-rw-r--r--arch/powerpc/kernel/early_32.c1
-rw-r--r--arch/powerpc/kernel/eeh.c589
-rw-r--r--arch/powerpc/kernel/eeh_cache.c21
-rw-r--r--arch/powerpc/kernel/eeh_dev.c67
-rw-r--r--arch/powerpc/kernel/eeh_driver.c220
-rw-r--r--arch/powerpc/kernel/eeh_event.c2
-rw-r--r--arch/powerpc/kernel/eeh_pe.c191
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c25
-rw-r--r--arch/powerpc/kernel/entry_32.S1357
-rw-r--r--arch/powerpc/kernel/entry_64.S1354
-rw-r--r--arch/powerpc/kernel/epapr_hcalls.S8
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c3
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S484
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2934
-rw-r--r--arch/powerpc/kernel/fadump.c474
-rw-r--r--arch/powerpc/kernel/firmware.c25
-rw-r--r--arch/powerpc/kernel/fpu.S48
-rw-r--r--arch/powerpc/kernel/head_32.h272
-rw-r--r--arch/powerpc/kernel/head_40x.S626
-rw-r--r--arch/powerpc/kernel/head_44x.S133
-rw-r--r--arch/powerpc/kernel/head_64.S325
-rw-r--r--arch/powerpc/kernel/head_85xx.S (renamed from arch/powerpc/kernel/head_fsl_booke.S)235
-rw-r--r--arch/powerpc/kernel/head_8xx.S701
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S (renamed from arch/powerpc/kernel/head_32.S)734
-rw-r--r--arch/powerpc/kernel/head_booke.h334
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c424
-rw-r--r--arch/powerpc/kernel/hw_breakpoint_constraints.c158
-rw-r--r--arch/powerpc/kernel/idle.c51
-rw-r--r--arch/powerpc/kernel/idle_64e.S (renamed from arch/powerpc/kernel/idle_book3e.S)10
-rw-r--r--arch/powerpc/kernel/idle_6xx.S13
-rw-r--r--arch/powerpc/kernel/idle_85xx.S (renamed from arch/powerpc/kernel/idle_e500.S)15
-rw-r--r--arch/powerpc/kernel/idle_book3s.S172
-rw-r--r--arch/powerpc/kernel/idle_power4.S83
-rw-r--r--arch/powerpc/kernel/ima_arch.c14
-rw-r--r--arch/powerpc/kernel/interrupt.c505
-rw-r--r--arch/powerpc/kernel/interrupt_64.S772
-rw-r--r--arch/powerpc/kernel/io-workarounds.c18
-rw-r--r--arch/powerpc/kernel/io.c12
-rw-r--r--arch/powerpc/kernel/iomap.c166
-rw-r--r--arch/powerpc/kernel/iommu.c445
-rw-r--r--arch/powerpc/kernel/irq.c570
-rw-r--r--arch/powerpc/kernel/irq_64.c522
-rw-r--r--arch/powerpc/kernel/isa-bridge.c191
-rw-r--r--arch/powerpc/kernel/jump_label.c7
-rw-r--r--arch/powerpc/kernel/kdebugfs.c14
-rw-r--r--arch/powerpc/kernel/kgdb.c49
-rw-r--r--arch/powerpc/kernel/kprobes-ftrace.c17
-rw-r--r--arch/powerpc/kernel/kprobes.c317
-rw-r--r--arch/powerpc/kernel/kvm.c11
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S10
-rw-r--r--arch/powerpc/kernel/legacy_serial.c66
-rw-r--r--arch/powerpc/kernel/mce.c189
-rw-r--r--arch/powerpc/kernel/mce_power.c267
-rw-r--r--arch/powerpc/kernel/misc.S10
-rw-r--r--arch/powerpc/kernel/misc_32.S112
-rw-r--r--arch/powerpc/kernel/misc_64.S59
-rw-r--r--arch/powerpc/kernel/module.c43
-rw-r--r--arch/powerpc/kernel/module.lds8
-rw-r--r--arch/powerpc/kernel/module_32.c130
-rw-r--r--arch/powerpc/kernel/module_64.c783
-rw-r--r--arch/powerpc/kernel/nvram_64.c24
-rw-r--r--arch/powerpc/kernel/of_platform.c18
-rw-r--r--arch/powerpc/kernel/optprobes.c171
-rw-r--r--arch/powerpc/kernel/optprobes_head.S68
-rw-r--r--arch/powerpc/kernel/paca.c68
-rw-r--r--arch/powerpc/kernel/pci-common.c238
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c6
-rw-r--r--arch/powerpc/kernel/pci_32.c44
-rw-r--r--arch/powerpc/kernel/pci_64.c75
-rw-r--r--arch/powerpc/kernel/pci_dn.c137
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c5
-rw-r--r--arch/powerpc/kernel/pmc.c2
-rw-r--r--arch/powerpc/kernel/ppc32.h60
-rw-r--r--arch/powerpc/kernel/ppc_save_regs.S61
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c16
-rw-r--r--arch/powerpc/kernel/process.c1053
-rw-r--r--arch/powerpc/kernel/prom.c220
-rw-r--r--arch/powerpc/kernel/prom_entry_64.S87
-rw-r--r--arch/powerpc/kernel/prom_init.c260
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh21
-rw-r--r--arch/powerpc/kernel/ptrace.c3468
-rw-r--r--arch/powerpc/kernel/ptrace/Makefile21
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-adv.c494
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-altivec.c115
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-decl.h183
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-fpu.c58
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-noadv.c298
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-novsx.c64
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-spe.c60
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-tm.c788
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-view.c953
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-vsx.c148
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace.c447
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace32.c (renamed from arch/powerpc/kernel/ptrace32.c)21
-rw-r--r--arch/powerpc/kernel/reloc_32.S2
-rw-r--r--arch/powerpc/kernel/reloc_64.S67
-rw-r--r--arch/powerpc/kernel/rtas-proc.c120
-rw-r--r--arch/powerpc/kernel/rtas-rtc.c9
-rw-r--r--arch/powerpc/kernel/rtas.c1843
-rw-r--r--arch/powerpc/kernel/rtas_entry.S176
-rw-r--r--arch/powerpc/kernel/rtas_flash.c63
-rw-r--r--arch/powerpc/kernel/rtas_pci.c21
-rw-r--r--arch/powerpc/kernel/rtasd.c62
-rw-r--r--arch/powerpc/kernel/secure_boot.c18
-rw-r--r--arch/powerpc/kernel/security.c506
-rw-r--r--arch/powerpc/kernel/secvar-ops.c10
-rw-r--r--arch/powerpc/kernel/secvar-sysfs.c175
-rw-r--r--arch/powerpc/kernel/setup-common.c230
-rw-r--r--arch/powerpc/kernel/setup.h18
-rw-r--r--arch/powerpc/kernel/setup_32.c40
-rw-r--r--arch/powerpc/kernel/setup_64.c390
-rw-r--r--arch/powerpc/kernel/signal.c245
-rw-r--r--arch/powerpc/kernel/signal.h186
-rw-r--r--arch/powerpc/kernel/signal_32.c1057
-rw-r--r--arch/powerpc/kernel/signal_64.c449
-rw-r--r--arch/powerpc/kernel/smp.c874
-rw-r--r--arch/powerpc/kernel/stacktrace.c174
-rw-r--r--arch/powerpc/kernel/static_call.c37
-rw-r--r--arch/powerpc/kernel/switch.S258
-rw-r--r--arch/powerpc/kernel/swsusp_32.S9
-rw-r--r--arch/powerpc/kernel/swsusp_64.c7
-rw-r--r--arch/powerpc/kernel/swsusp_85xx.S (renamed from arch/powerpc/kernel/swsusp_booke.S)0
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S19
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c110
-rw-r--r--arch/powerpc/kernel/syscall.c189
-rw-r--r--arch/powerpc/kernel/syscalls.c87
-rw-r--r--arch/powerpc/kernel/syscalls/Makefile55
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl105
-rw-r--r--arch/powerpc/kernel/syscalls/syscallhdr.sh37
-rw-r--r--arch/powerpc/kernel/syscalls/syscalltbl.sh36
-rw-r--r--arch/powerpc/kernel/sysfs.c572
-rw-r--r--arch/powerpc/kernel/systbl.S40
-rw-r--r--arch/powerpc/kernel/systbl.c46
-rw-r--r--arch/powerpc/kernel/systbl_chk.sh30
-rw-r--r--arch/powerpc/kernel/tau_6xx.c154
-rw-r--r--arch/powerpc/kernel/time.c550
-rw-r--r--arch/powerpc/kernel/tm.S83
-rw-r--r--arch/powerpc/kernel/trace/Makefile16
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c1065
-rw-r--r--arch/powerpc/kernel/trace/ftrace_32.S95
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64.S64
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_pg.c846
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_pg_entry.S (renamed from arch/powerpc/kernel/trace/ftrace_64_pg.S)67
-rw-r--r--arch/powerpc/kernel/trace/ftrace_entry.S (renamed from arch/powerpc/kernel/trace/ftrace_64_mprofile.S)293
-rw-r--r--arch/powerpc/kernel/traps.c563
-rw-r--r--arch/powerpc/kernel/ucall.S2
-rw-r--r--arch/powerpc/kernel/udbg.c4
-rw-r--r--arch/powerpc/kernel/udbg_16550.c44
-rw-r--r--arch/powerpc/kernel/uprobes.c20
-rw-r--r--arch/powerpc/kernel/vdso.c798
-rw-r--r--arch/powerpc/kernel/vdso/.gitignore5
-rw-r--r--arch/powerpc/kernel/vdso/Makefile118
-rw-r--r--arch/powerpc/kernel/vdso/cacheflush.S (renamed from arch/powerpc/kernel/vdso64/cacheflush.S)53
-rw-r--r--arch/powerpc/kernel/vdso/datapage.S (renamed from arch/powerpc/kernel/vdso32/datapage.S)46
-rwxr-xr-xarch/powerpc/kernel/vdso/gen_vdso32_offsets.sh16
-rwxr-xr-xarch/powerpc/kernel/vdso/gen_vdso64_offsets.sh16
-rw-r--r--arch/powerpc/kernel/vdso/getcpu.S (renamed from arch/powerpc/kernel/vdso32/getcpu.S)27
-rw-r--r--arch/powerpc/kernel/vdso/gettimeofday.S133
-rw-r--r--arch/powerpc/kernel/vdso/note.S (renamed from arch/powerpc/kernel/vdso32/note.S)0
-rw-r--r--arch/powerpc/kernel/vdso/sigtramp32.S (renamed from arch/powerpc/kernel/vdso32/sigtramp.S)0
-rw-r--r--arch/powerpc/kernel/vdso/sigtramp64.S (renamed from arch/powerpc/kernel/vdso64/sigtramp.S)22
-rw-r--r--arch/powerpc/kernel/vdso/vdso32.lds.S (renamed from arch/powerpc/kernel/vdso32/vdso32.lds.S)74
-rw-r--r--arch/powerpc/kernel/vdso/vdso64.lds.S (renamed from arch/powerpc/kernel/vdso64/vdso64.lds.S)71
-rw-r--r--arch/powerpc/kernel/vdso/vgettimeofday.c49
-rw-r--r--arch/powerpc/kernel/vdso32/.gitignore2
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile67
-rw-r--r--arch/powerpc/kernel/vdso32/cacheflush.S81
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S290
-rw-r--r--arch/powerpc/kernel/vdso32_wrapper.S (renamed from arch/powerpc/kernel/vdso32/vdso32_wrapper.S)2
-rw-r--r--arch/powerpc/kernel/vdso64/.gitignore2
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile47
-rw-r--r--arch/powerpc/kernel/vdso64/datapage.S84
-rw-r--r--arch/powerpc/kernel/vdso64/getcpu.S33
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S289
-rw-r--r--arch/powerpc/kernel/vdso64/note.S1
-rw-r--r--arch/powerpc/kernel/vdso64_wrapper.S (renamed from arch/powerpc/kernel/vdso64/vdso64_wrapper.S)2
-rw-r--r--arch/powerpc/kernel/vecemu.c20
-rw-r--r--arch/powerpc/kernel/vector.S55
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S207
-rw-r--r--arch/powerpc/kernel/watchdog.c274
211 files changed, 25260 insertions, 23279 deletions
diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore
index 67ebd3003c05..d71179d3ffe9 100644
--- a/arch/powerpc/kernel/.gitignore
+++ b/arch/powerpc/kernel/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
prom_init_check
vmlinux.lds
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/85xx_entry_mapping.S
index 8bccce6544b5..dedc17fac8f8 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/85xx_entry_mapping.S
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* 1. Find the index of the entry we're executing in */
- bl invstr /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
invstr: mflr r6 /* Make it accessible */
mfmsr r7
rlwinm r4,r7,27,31,31 /* extract MSR[IS] */
@@ -85,7 +85,7 @@ skpinv: addi r6,r6,1 /* Increment */
addi r6,r6,10
slw r6,r8,r6 /* convert to mask */
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r7
mfspr r8,SPRN_MAS3
@@ -117,7 +117,7 @@ skpinv: addi r6,r6,1 /* Increment */
xori r6,r4,1
slwi r6,r6,5 /* setup new context with other address space */
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r9
rlwimi r7,r9,0,20,31
addi r7,r7,(2f - 1b)
@@ -207,7 +207,7 @@ next_tlb_setup:
lis r7,MSR_KERNEL@h
ori r7,r7,MSR_KERNEL@l
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r9
rlwimi r6,r9,0,20,31
addi r6,r6,(2f - 1b)
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 157b0147921f..2919433be355 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -3,8 +3,6 @@
# Makefile for the linux kernel.
#
-CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-
ifdef CONFIG_PPC64
CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
endif
@@ -13,14 +11,16 @@ CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
+CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
-CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
+CFLAGS_prom_init.o += -fno-stack-protector
CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_prom_init.o += -ffreestanding
+CFLAGS_prom_init.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized)
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
@@ -34,6 +34,19 @@ KASAN_SANITIZE_early_32.o := n
KASAN_SANITIZE_cputable.o := n
KASAN_SANITIZE_prom_init.o := n
KASAN_SANITIZE_btext.o := n
+KASAN_SANITIZE_paca.o := n
+KASAN_SANITIZE_setup_64.o := n
+KASAN_SANITIZE_mce.o := n
+KASAN_SANITIZE_mce_power.o := n
+KASAN_SANITIZE_udbg.o := n
+KASAN_SANITIZE_udbg_16550.o := n
+
+# we have to be particularly careful in ppc64 to exclude code that
+# runs with translations off, as we cannot access the shadow with
+# translations off. However, ppc32 can sanitize this.
+ifdef CONFIG_PPC64
+KASAN_SANITIZE_traps.o := n
+endif
ifdef CONFIG_KASAN
CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING
@@ -41,71 +54,87 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
endif
-obj-y := cputable.o ptrace.o syscalls.o \
- irq.o align.o signal_32.o pmc.o vdso.o \
+KCSAN_SANITIZE_early_32.o := n
+KCSAN_SANITIZE_early_64.o := n
+KCSAN_SANITIZE_cputable.o := n
+KCSAN_SANITIZE_btext.o := n
+KCSAN_SANITIZE_paca.o := n
+KCSAN_SANITIZE_setup_64.o := n
+
+#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
+# Remove stack protector to avoid triggering unneeded stack canary
+# checks due to randomize_kstack_offset.
+CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
+CFLAGS_syscall.o += -fno-stack-protector
+#endif
+
+obj-y := cputable.o syscalls.o switch.o \
+ irq.o align.o signal_$(BITS).o pmc.o vdso.o \
process.o systbl.o idle.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o misc_$(BITS).o \
- of_platform.o prom_parse.o
-obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
- signal_64.o ptrace32.o \
- paca.o nvram_64.o firmware.o note.o
-obj-$(CONFIG_VDSO32) += vdso32/
+ of_platform.o prom_parse.o firmware.o \
+ hw_breakpoint_constraints.o interrupt.o \
+ kdebugfs.o stacktrace.o syscall.o
+obj-y += ptrace/
+obj-$(CONFIG_PPC64) += setup_64.o irq_64.o\
+ paca.o nvram_64.o note.o
+obj-$(CONFIG_PPC32) += sys_ppc32.o
+obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o
+obj-$(CONFIG_VDSO32) += vdso32_wrapper.o
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_DAWR) += dawr.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
-obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
+obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_64e.o
obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o
-obj-$(CONFIG_PPC64) += vdso64/
+obj-$(CONFIG_PPC64) += vdso64_wrapper.o
obj-$(CONFIG_ALTIVEC) += vecemu.o
-obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
-obj-$(CONFIG_PPC_P7_NAP) += idle_book3s.o
+obj-$(CONFIG_PPC_BOOK3S_IDLE) += idle_book3s.o
procfs-y := proc_powerpc.o
obj-$(CONFIG_PROC_FS) += $(procfs-y)
rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o
-obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y)
+obj-$(CONFIG_PPC_RTAS) += rtas_entry.o rtas.o rtas-rtc.o $(rtaspci-y-y)
obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
obj-$(CONFIG_PPC_DT_CPU_FTRS) += dt_cpu_ftrs.o
-obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
+obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_cache.o \
eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_FA_DUMP) += fadump.o
obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o
-ifdef CONFIG_PPC32
-obj-$(CONFIG_E500) += idle_e500.o
-endif
+obj-$(CONFIG_PPC_85xx) += idle_85xx.o
obj-$(CONFIG_PPC_BOOK3S_32) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
obj-$(CONFIG_TAU) += tau_6xx.o
obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o
-ifdef CONFIG_FSL_BOOKE
-obj-$(CONFIG_HIBERNATION) += swsusp_booke.o
+ifdef CONFIG_PPC_85xx
+obj-$(CONFIG_HIBERNATION) += swsusp_85xx.o
else
obj-$(CONFIG_HIBERNATION) += swsusp_$(BITS).o
endif
obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o
obj-$(CONFIG_MODULES) += module.o module_$(BITS).o
obj-$(CONFIG_44x) += cpu_setup_44x.o
-obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o
+obj-$(CONFIG_PPC_E500) += cpu_setup_e500.o
obj-$(CONFIG_PPC_DOORBELL) += dbell.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
-extra-y := head_$(BITS).o
-extra-$(CONFIG_40x) := head_40x.o
-extra-$(CONFIG_44x) := head_44x.o
-extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
-extra-$(CONFIG_PPC_8xx) := head_8xx.o
+obj-$(CONFIG_PPC64) += head_64.o
+obj-$(CONFIG_PPC_BOOK3S_32) += head_book3s_32.o
+obj-$(CONFIG_40x) += head_40x.o
+obj-$(CONFIG_44x) += head_44x.o
+obj-$(CONFIG_PPC_8xx) += head_8xx.o
+obj-$(CONFIG_PPC_85xx) += head_85xx.o
extra-y += vmlinux.lds
obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
-obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o
+obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o static_call.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
@@ -115,7 +144,6 @@ obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o
obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o
obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
-obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o
@@ -137,9 +165,6 @@ endif
obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o
-obj-$(CONFIG_PPC64) += $(obj64-y)
-obj-$(CONFIG_PPC32) += $(obj32-y)
-
ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),)
obj-y += ppc_save_regs.o
endif
@@ -156,13 +181,19 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o
# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
KCOV_INSTRUMENT_prom_init.o := n
+KCSAN_SANITIZE_prom_init.o := n
UBSAN_SANITIZE_prom_init.o := n
GCOV_PROFILE_kprobes.o := n
KCOV_INSTRUMENT_kprobes.o := n
+KCSAN_SANITIZE_kprobes.o := n
UBSAN_SANITIZE_kprobes.o := n
GCOV_PROFILE_kprobes-ftrace.o := n
KCOV_INSTRUMENT_kprobes-ftrace.o := n
+KCSAN_SANITIZE_kprobes-ftrace.o := n
UBSAN_SANITIZE_kprobes-ftrace.o := n
+GCOV_PROFILE_syscall_64.o := n
+KCOV_INSTRUMENT_syscall_64.o := n
+UBSAN_SANITIZE_syscall_64.o := n
UBSAN_SANITIZE_vdso.o := n
# Necessary for booting with kcov enabled on book3e machines
@@ -170,12 +201,18 @@ KCOV_INSTRUMENT_cputable.o := n
KCOV_INSTRUMENT_setup_64.o := n
KCOV_INSTRUMENT_paca.o := n
-extra-$(CONFIG_PPC_FPU) += fpu.o
-extra-$(CONFIG_ALTIVEC) += vector.o
-extra-$(CONFIG_PPC64) += entry_64.o
-extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
+CFLAGS_setup_64.o += -fno-stack-protector
+CFLAGS_paca.o += -fno-stack-protector
+
+obj-$(CONFIG_PPC_FPU) += fpu.o
+obj-$(CONFIG_ALTIVEC) += vector.o
-extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check
+obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
+obj64-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_entry_64.o
+extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check
+
+obj-$(CONFIG_PPC64) += $(obj64-y)
+obj-$(CONFIG_PPC32) += $(obj32-y)
quiet_cmd_prom_init_check = PROMCHK $@
cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@
@@ -185,3 +222,10 @@ $(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE
targets += prom_init_check
clean-files := vmlinux.lds
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso/vdso32.so.dbg
+$(obj)/vdso64_wrapper.o : $(obj)/vdso/vdso64.so.dbg
+
+# for cleaning
+subdir- += vdso
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 92045ed64976..3e37ece06739 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -24,6 +24,7 @@
#include <asm/disassemble.h>
#include <asm/cpu_has_feature.h>
#include <asm/sstep.h>
+#include <asm/inst.h>
struct aligninfo {
unsigned char len;
@@ -104,9 +105,8 @@ static struct aligninfo spe_aligninfo[32] = {
* so we don't need the address swizzling.
*/
static int emulate_spe(struct pt_regs *regs, unsigned int reg,
- unsigned int instr)
+ ppc_inst_t ppc_instr)
{
- int ret;
union {
u64 ll;
u32 w[2];
@@ -115,8 +115,9 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
} data, temp;
unsigned char __user *p, *addr;
unsigned long *evr = &current->thread.evr[reg];
- unsigned int nb, flags;
+ unsigned int nb, flags, instr;
+ instr = ppc_inst_val(ppc_instr);
instr = (instr >> 1) & 0x1f;
/* DAR has the operand effective address */
@@ -125,11 +126,6 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
nb = spe_aligninfo[instr].len;
flags = spe_aligninfo[instr].flags;
- /* Verify the address of the operand */
- if (unlikely(user_mode(regs) &&
- !access_ok(addr, nb)))
- return -EFAULT;
-
/* userland only */
if (unlikely(!user_mode(regs)))
return 0;
@@ -167,26 +163,27 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
}
} else {
temp.ll = data.ll = 0;
- ret = 0;
p = addr;
+ if (!user_read_access_begin(addr, nb))
+ return -EFAULT;
+
switch (nb) {
case 8:
- ret |= __get_user_inatomic(temp.v[0], p++);
- ret |= __get_user_inatomic(temp.v[1], p++);
- ret |= __get_user_inatomic(temp.v[2], p++);
- ret |= __get_user_inatomic(temp.v[3], p++);
- /* fall through */
+ unsafe_get_user(temp.v[0], p++, Efault_read);
+ unsafe_get_user(temp.v[1], p++, Efault_read);
+ unsafe_get_user(temp.v[2], p++, Efault_read);
+ unsafe_get_user(temp.v[3], p++, Efault_read);
+ fallthrough;
case 4:
- ret |= __get_user_inatomic(temp.v[4], p++);
- ret |= __get_user_inatomic(temp.v[5], p++);
- /* fall through */
+ unsafe_get_user(temp.v[4], p++, Efault_read);
+ unsafe_get_user(temp.v[5], p++, Efault_read);
+ fallthrough;
case 2:
- ret |= __get_user_inatomic(temp.v[6], p++);
- ret |= __get_user_inatomic(temp.v[7], p++);
- if (unlikely(ret))
- return -EFAULT;
+ unsafe_get_user(temp.v[6], p++, Efault_read);
+ unsafe_get_user(temp.v[7], p++, Efault_read);
}
+ user_read_access_end();
switch (instr) {
case EVLDD:
@@ -253,31 +250,41 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
/* Store result to memory or update registers */
if (flags & ST) {
- ret = 0;
p = addr;
+
+ if (!user_write_access_begin(addr, nb))
+ return -EFAULT;
+
switch (nb) {
case 8:
- ret |= __put_user_inatomic(data.v[0], p++);
- ret |= __put_user_inatomic(data.v[1], p++);
- ret |= __put_user_inatomic(data.v[2], p++);
- ret |= __put_user_inatomic(data.v[3], p++);
- /* fall through */
+ unsafe_put_user(data.v[0], p++, Efault_write);
+ unsafe_put_user(data.v[1], p++, Efault_write);
+ unsafe_put_user(data.v[2], p++, Efault_write);
+ unsafe_put_user(data.v[3], p++, Efault_write);
+ fallthrough;
case 4:
- ret |= __put_user_inatomic(data.v[4], p++);
- ret |= __put_user_inatomic(data.v[5], p++);
- /* fall through */
+ unsafe_put_user(data.v[4], p++, Efault_write);
+ unsafe_put_user(data.v[5], p++, Efault_write);
+ fallthrough;
case 2:
- ret |= __put_user_inatomic(data.v[6], p++);
- ret |= __put_user_inatomic(data.v[7], p++);
+ unsafe_put_user(data.v[6], p++, Efault_write);
+ unsafe_put_user(data.v[7], p++, Efault_write);
}
- if (unlikely(ret))
- return -EFAULT;
+ user_write_access_end();
} else {
*evr = data.w[0];
regs->gpr[reg] = data.w[1];
}
return 1;
+
+Efault_read:
+ user_read_access_end();
+ return -EFAULT;
+
+Efault_write:
+ user_write_access_end();
+ return -EFAULT;
}
#endif /* CONFIG_SPE */
@@ -293,28 +300,27 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
int fix_alignment(struct pt_regs *regs)
{
- unsigned int instr;
+ ppc_inst_t instr;
struct instruction_op op;
int r, type;
- /*
- * We require a complete register set, if not, then our assembly
- * is broken
- */
- CHECK_FULL_REGS(regs);
+ if (is_kernel_addr(regs->nip))
+ r = copy_inst_from_kernel_nofault(&instr, (void *)regs->nip);
+ else
+ r = __get_user_instr(instr, (void __user *)regs->nip);
- if (unlikely(__get_user(instr, (unsigned int __user *)regs->nip)))
+ if (unlikely(r))
return -EFAULT;
if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
/* We don't handle PPC little-endian any more... */
if (cpu_has_feature(CPU_FTR_PPC_LE))
return -EIO;
- instr = swab32(instr);
+ instr = ppc_inst_swab(instr);
}
#ifdef CONFIG_SPE
- if ((instr >> 26) == 0x4) {
- int reg = (instr >> 21) & 0x1f;
+ if (ppc_inst_primary_opcode(instr) == 0x4) {
+ int reg = (ppc_inst_val(instr) >> 21) & 0x1f;
PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
}
@@ -331,7 +337,7 @@ int fix_alignment(struct pt_regs *regs)
* when pasting to a co-processor. Furthermore, paste_last is the
* synchronisation point for preceding copy/paste sequences.
*/
- if ((instr & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe))
+ if ((ppc_inst_val(instr) & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe))
return -EIO;
r = analyse_instr(&op, regs, instr);
@@ -343,6 +349,7 @@ int fix_alignment(struct pt_regs *regs)
if (op.type != CACHEOP + DCBZ)
return -EINVAL;
PPC_WARN_ALIGNMENT(dcbz, regs);
+ WARN_ON_ONCE(!user_mode(regs));
r = emulate_dcbz(op.ea, regs);
} else {
if (type == LARX || type == STCX)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 3d47aec7becf..9f14d95b8b32 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -9,8 +9,6 @@
* #defines from the assembly-language output.
*/
-#define GENERATING_ASM_OFFSETS /* asm/smp.h */
-
#include <linux/compat.h>
#include <linux/signal.h>
#include <linux/sched.h>
@@ -30,7 +28,6 @@
#include <asm/io.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
@@ -57,12 +54,12 @@
#endif
#ifdef CONFIG_PPC32
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
#include "head_booke.h"
#endif
#endif
-#if defined(CONFIG_PPC_FSL_BOOK3E)
+#if defined(CONFIG_PPC_E500)
#include "../mm/mmu_decl.h"
#endif
@@ -70,8 +67,12 @@
#include <asm/fixmap.h>
#endif
+#ifdef CONFIG_XMON
+#include "../xmon/xmon_bpts.h"
+#endif
+
#define STACK_PT_REGS_OFFSET(sym, val) \
- DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val))
+ DEFINE(sym, STACK_INT_FRAME_REGS + offsetof(struct pt_regs, val))
int main(void)
{
@@ -83,22 +84,17 @@ int main(void)
OFFSET(PACA_CANARY, paca_struct, canary);
#endif
#endif
- OFFSET(MMCONTEXTID, mm_struct, context.id);
-#ifdef CONFIG_PPC64
- DEFINE(SIGSEGV, SIGSEGV);
- DEFINE(NMI_MASK, NMI_MASK);
-#else
- OFFSET(KSP_LIMIT, thread_struct, ksp_limit);
+#ifdef CONFIG_PPC32
#ifdef CONFIG_PPC_RTAS
OFFSET(RTAS_SP, thread_struct, rtas_sp);
#endif
#endif /* CONFIG_PPC64 */
OFFSET(TASK_STACK, task_struct, stack);
#ifdef CONFIG_SMP
- OFFSET(TASK_CPU, task_struct, cpu);
+ OFFSET(TASK_CPU, task_struct, thread_info.cpu);
#endif
-#ifdef CONFIG_LIVEPATCH
+#ifdef CONFIG_LIVEPATCH_64
OFFSET(TI_livepatch_sp, thread_info, livepatch_sp);
#endif
@@ -107,15 +103,16 @@ int main(void)
#ifdef CONFIG_BOOKE
OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]);
#endif
+#ifdef CONFIG_PPC_FPU
OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode);
OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr);
OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area);
+#endif
OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr);
OFFSET(THREAD_LOAD_FP, thread_struct, load_fp);
#ifdef CONFIG_ALTIVEC
OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr);
OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area);
- OFFSET(THREAD_VRSAVE, thread_struct, vrsave);
OFFSET(THREAD_USED_VR, thread_struct, used_vr);
OFFSET(VRSTATE_VSCR, thread_vr_state, vscr);
OFFSET(THREAD_LOAD_VEC, thread_struct, load_vec);
@@ -127,25 +124,35 @@ int main(void)
OFFSET(KSP_VSID, thread_struct, ksp_vsid);
#else /* CONFIG_PPC64 */
OFFSET(PGDIR, thread_struct, pgdir);
+ OFFSET(SRR0, thread_struct, srr0);
+ OFFSET(SRR1, thread_struct, srr1);
+ OFFSET(DAR, thread_struct, dar);
+ OFFSET(DSISR, thread_struct, dsisr);
+#ifdef CONFIG_PPC_BOOK3S_32
+ OFFSET(THR0, thread_struct, r0);
+ OFFSET(THR3, thread_struct, r3);
+ OFFSET(THR4, thread_struct, r4);
+ OFFSET(THR5, thread_struct, r5);
+ OFFSET(THR6, thread_struct, r6);
+ OFFSET(THR8, thread_struct, r8);
+ OFFSET(THR9, thread_struct, r9);
+ OFFSET(THR11, thread_struct, r11);
+ OFFSET(THLR, thread_struct, lr);
+ OFFSET(THCTR, thread_struct, ctr);
+ OFFSET(THSR0, thread_struct, sr0);
+#endif
#ifdef CONFIG_SPE
OFFSET(THREAD_EVR0, thread_struct, evr[0]);
OFFSET(THREAD_ACC, thread_struct, acc);
- OFFSET(THREAD_SPEFSCR, thread_struct, spefscr);
OFFSET(THREAD_USED_SPE, thread_struct, used_spe);
#endif /* CONFIG_SPE */
#endif /* CONFIG_PPC64 */
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- OFFSET(THREAD_DBCR0, thread_struct, debug.dbcr0);
-#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
OFFSET(THREAD_KVM_SVCPU, thread_struct, kvm_shadow_vcpu);
#endif
#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu);
#endif
-#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
- OFFSET(KUAP, thread_struct, kuap);
-#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
OFFSET(PACATMSCRATCH, paca_struct, tm_scratch);
@@ -155,28 +162,21 @@ int main(void)
OFFSET(THREAD_TM_TAR, thread_struct, tm_tar);
OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr);
OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr);
+ OFFSET(THREAD_TM_AMR, thread_struct, tm_amr);
OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs);
OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr);
OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave);
OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr);
- /* Local pt_regs on stack for Transactional Memory funcs. */
- DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
- sizeof(struct pt_regs) + 16);
+ /* Local pt_regs on stack in int frame form, plus 16 bytes for TM */
+ DEFINE(TM_FRAME_SIZE, STACK_INT_FRAME_SIZE + 16);
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
- OFFSET(TI_FLAGS, thread_info, flags);
OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
- OFFSET(TI_PREEMPT, thread_info, preempt_count);
#ifdef CONFIG_PPC64
OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size);
OFFSET(DCACHEL1LOGBLOCKSIZE, ppc64_caches, l1d.log_block_size);
- OFFSET(DCACHEL1BLOCKSPERPAGE, ppc64_caches, l1d.blocks_per_page);
- OFFSET(ICACHEL1BLOCKSIZE, ppc64_caches, l1i.block_size);
- OFFSET(ICACHEL1LOGBLOCKSIZE, ppc64_caches, l1i.log_block_size);
- OFFSET(ICACHEL1BLOCKSPERPAGE, ppc64_caches, l1i.blocks_per_page);
/* paca */
- DEFINE(PACA_SIZE, sizeof(struct paca_struct));
OFFSET(PACAPACAINDEX, paca_struct, paca_index);
OFFSET(PACAPROCSTART, paca_struct, cpu_start);
OFFSET(PACAKSAVE, paca_struct, kstack);
@@ -185,23 +185,20 @@ int main(void)
offsetof(struct task_struct, thread_info));
OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
OFFSET(PACAR1, paca_struct, saved_r1);
+#ifndef CONFIG_PPC_KERNEL_PCREL
OFFSET(PACATOC, paca_struct, kernel_toc);
+#endif
OFFSET(PACAKBASE, paca_struct, kernelbase);
OFFSET(PACAKMSR, paca_struct, kernel_msr);
+#ifdef CONFIG_PPC_BOOK3S_64
+ OFFSET(PACAHSRR_VALID, paca_struct, hsrr_valid);
+ OFFSET(PACASRR_VALID, paca_struct, srr_valid);
+#endif
OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled);
-#ifdef CONFIG_PPC_BOOK3S
- OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
-#ifdef CONFIG_PPC_MM_SLICES
- OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
- OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
- OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit);
- DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
-#endif /* CONFIG_PPC_MM_SLICES */
-#endif
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
OFFSET(PACAPGD, paca_struct, pgd);
OFFSET(PACA_KERNELPGD, paca_struct, kernel_pgd);
OFFSET(PACA_EXGEN, paca_struct, exgen);
@@ -217,36 +214,23 @@ int main(void)
OFFSET(TCD_ESEL_NEXT, tlb_core_data, esel_next);
OFFSET(TCD_ESEL_MAX, tlb_core_data, esel_max);
OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first);
-#endif /* CONFIG_PPC_BOOK3E */
+#endif /* CONFIG_PPC_BOOK3E_64 */
#ifdef CONFIG_PPC_BOOK3S_64
- OFFSET(PACASLBCACHE, paca_struct, slb_cache);
- OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
- OFFSET(PACASTABRR, paca_struct, stab_rr);
- OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
-#ifdef CONFIG_PPC_MM_SLICES
- OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
-#else
- OFFSET(PACACONTEXTSLLP, paca_struct, mm_ctx_sllp);
-#endif /* CONFIG_PPC_MM_SLICES */
OFFSET(PACA_EXGEN, paca_struct, exgen);
OFFSET(PACA_EXMC, paca_struct, exmc);
- OFFSET(PACA_EXSLB, paca_struct, exslb);
OFFSET(PACA_EXNMI, paca_struct, exnmi);
-#ifdef CONFIG_PPC_PSERIES
- OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
-#endif
+#ifdef CONFIG_PPC_64S_HASH_MMU
OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
+#endif
OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
#endif
- OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
- OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
#endif /* CONFIG_PPC_BOOK3S_64 */
OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
#ifdef CONFIG_PPC_BOOK3S_64
@@ -262,21 +246,14 @@ int main(void)
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default);
- OFFSET(ACCOUNT_STARTTIME, paca_struct, accounting.starttime);
- OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user);
- OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
- OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC64
+ OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1);
+#endif
+#ifdef CONFIG_PPC_BOOK3E_64
OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
#endif
OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
#else /* CONFIG_PPC64 */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- OFFSET(ACCOUNT_STARTTIME, thread_info, accounting.starttime);
- OFFSET(ACCOUNT_STARTTIME_USER, thread_info, accounting.starttime_user);
- OFFSET(ACCOUNT_USER_TIME, thread_info, accounting.utime);
- OFFSET(ACCOUNT_SYSTEM_TIME, thread_info, accounting.stime);
-#endif
#endif /* CONFIG_PPC64 */
/* RTAS */
@@ -285,7 +262,7 @@ int main(void)
/* Interrupt register frame */
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
- DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
+ DEFINE(SWITCH_FRAME_SIZE, STACK_SWITCH_FRAME_SIZE);
STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
@@ -300,9 +277,6 @@ int main(void)
STACK_PT_REGS_OFFSET(GPR11, gpr[11]);
STACK_PT_REGS_OFFSET(GPR12, gpr[12]);
STACK_PT_REGS_OFFSET(GPR13, gpr[13]);
-#ifndef CONFIG_PPC64
- STACK_PT_REGS_OFFSET(GPR14, gpr[14]);
-#endif /* CONFIG_PPC64 */
/*
* Note: these symbols include _ because they overlap with special
* register names
@@ -314,53 +288,39 @@ int main(void)
STACK_PT_REGS_OFFSET(_CCR, ccr);
STACK_PT_REGS_OFFSET(_XER, xer);
STACK_PT_REGS_OFFSET(_DAR, dar);
+ STACK_PT_REGS_OFFSET(_DEAR, dear);
STACK_PT_REGS_OFFSET(_DSISR, dsisr);
+ STACK_PT_REGS_OFFSET(_ESR, esr);
STACK_PT_REGS_OFFSET(ORIG_GPR3, orig_gpr3);
STACK_PT_REGS_OFFSET(RESULT, result);
STACK_PT_REGS_OFFSET(_TRAP, trap);
-#ifndef CONFIG_PPC64
- /*
- * The PowerPC 400-class & Book-E processors have neither the DAR
- * nor the DSISR SPRs. Hence, we overload them to hold the similar
- * DEAR and ESR SPRs for such processors. For critical interrupts
- * we use them to hold SRR0 and SRR1.
- */
- STACK_PT_REGS_OFFSET(_DEAR, dar);
- STACK_PT_REGS_OFFSET(_ESR, dsisr);
-#else /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC64
STACK_PT_REGS_OFFSET(SOFTE, softe);
STACK_PT_REGS_OFFSET(_PPR, ppr);
-#endif /* CONFIG_PPC64 */
+#endif
-#ifdef CONFIG_PPC_KUAP
- STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap);
+#ifdef CONFIG_PPC_PKEY
+ STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr);
+ STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr);
#endif
-#if defined(CONFIG_PPC32)
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
- DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
- DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
+#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
+ STACK_PT_REGS_OFFSET(MAS0, mas0);
/* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */
- DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
- DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1));
- DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2));
- DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3));
- DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6));
- DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7));
- DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0));
- DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1));
- DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0));
- DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1));
- DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0));
- DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
- DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit));
-#endif
+ STACK_PT_REGS_OFFSET(MMUCR, mas0);
+ STACK_PT_REGS_OFFSET(MAS1, mas1);
+ STACK_PT_REGS_OFFSET(MAS2, mas2);
+ STACK_PT_REGS_OFFSET(MAS3, mas3);
+ STACK_PT_REGS_OFFSET(MAS6, mas6);
+ STACK_PT_REGS_OFFSET(MAS7, mas7);
+ STACK_PT_REGS_OFFSET(_SRR0, srr0);
+ STACK_PT_REGS_OFFSET(_SRR1, srr1);
+ STACK_PT_REGS_OFFSET(_CSRR0, csrr0);
+ STACK_PT_REGS_OFFSET(_CSRR1, csrr1);
+ STACK_PT_REGS_OFFSET(_DSRR0, dsrr0);
+ STACK_PT_REGS_OFFSET(_DSRR1, dsrr1);
#endif
-#ifndef CONFIG_PPC64
- OFFSET(MM_PGD, mm_struct, pgd);
-#endif /* ! CONFIG_PPC64 */
-
/* About the CPU features table */
OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features);
OFFSET(CPU_SPEC_SETUP, cpu_spec, cpu_setup);
@@ -376,56 +336,23 @@ int main(void)
#endif /* ! CONFIG_PPC64 */
/* datapage offsets for use by vdso */
- OFFSET(CFG_TB_ORIG_STAMP, vdso_data, tb_orig_stamp);
- OFFSET(CFG_TB_TICKS_PER_SEC, vdso_data, tb_ticks_per_sec);
- OFFSET(CFG_TB_TO_XS, vdso_data, tb_to_xs);
- OFFSET(CFG_TB_UPDATE_COUNT, vdso_data, tb_update_count);
- OFFSET(CFG_TZ_MINUTEWEST, vdso_data, tz_minuteswest);
- OFFSET(CFG_TZ_DSTTIME, vdso_data, tz_dsttime);
- OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32);
- OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec);
- OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
- OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec);
- OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec);
- OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
- OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res);
- OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
- OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
- OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
- OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size);
+ OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data);
+ OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec);
#ifdef CONFIG_PPC64
- OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
- OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec);
- OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec);
-#endif
- OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec);
- OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec);
- OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
- OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
- OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
- OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
- /* timeval/timezone offsets for use by vdso */
- OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest);
- OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime);
-
- /* Other bits used by the vdso */
- DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
+ OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size);
+ OFFSET(CFG_DCACHE_BLOCKSZ, vdso_arch_data, dcache_block_size);
+ OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_arch_data, icache_log_block_size);
+ OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_arch_data, dcache_log_block_size);
+ OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map);
+ OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, compat_syscall_map);
+#else
+ OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map);
+#endif
#ifdef CONFIG_BUG
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE)));
-#else
- DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
-#endif
- DEFINE(PTE_SIZE, sizeof(pte_t));
-
#ifdef CONFIG_KVM
OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack);
OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid);
@@ -453,7 +380,7 @@ int main(void)
OFFSET(VCPU_SPRG2, kvm_vcpu, arch.shregs.sprg2);
OFFSET(VCPU_SPRG3, kvm_vcpu, arch.shregs.sprg3);
#endif
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
OFFSET(VCPU_TB_RMENTRY, kvm_vcpu, arch.rm_entry);
OFFSET(VCPU_TB_RMINTR, kvm_vcpu, arch.rm_intr);
OFFSET(VCPU_TB_RMEXIT, kvm_vcpu, arch.rm_exit);
@@ -492,23 +419,18 @@ int main(void)
/* book3s */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- OFFSET(KVM_TLB_SETS, kvm, arch.tlb_sets);
OFFSET(KVM_SDR1, kvm, arch.sdr1);
OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid);
OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr);
OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1);
- OFFSET(KVM_NEED_FLUSH, kvm, arch.need_tlb_flush.bits);
OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
- OFFSET(KVM_RADIX, kvm, arch.radix);
- OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
- OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested);
OFFSET(VCPU_CPU, kvm_vcpu, cpu);
OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
#endif
@@ -523,20 +445,18 @@ int main(void)
OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl);
OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr);
OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
- OFFSET(VCPU_DAWR, kvm_vcpu, arch.dawr);
- OFFSET(VCPU_DAWRX, kvm_vcpu, arch.dawrx);
+ OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
+ OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
- OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires);
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
- OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
- OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
+ OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra);
+ OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs);
OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
- OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc);
OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar);
OFFSET(VCPU_SDAR, kvm_vcpu, arch.sdar);
OFFSET(VCPU_SIER, kvm_vcpu, arch.sier);
@@ -545,7 +465,6 @@ int main(void)
OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr);
OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr);
OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar);
- OFFSET(VCPU_FAULT_GPA, kvm_vcpu, arch.fault_gpa);
OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr);
OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap);
@@ -561,8 +480,6 @@ int main(void)
OFFSET(VCPU_TCSCR, kvm_vcpu, arch.tcscr);
OFFSET(VCPU_ACOP, kvm_vcpu, arch.acop);
OFFSET(VCPU_WORT, kvm_vcpu, arch.wort);
- OFFSET(VCPU_TID, kvm_vcpu, arch.tid);
- OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr);
OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr);
OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest);
@@ -657,13 +574,8 @@ int main(void)
HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
- HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
- HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
- HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
- HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
- HSTATE_FIELD(HSTATE_TID, tid);
HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
@@ -690,8 +602,6 @@ int main(void)
OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
- OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set);
- OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_PPC_BOOK3S_64
@@ -728,7 +638,7 @@ int main(void)
DEFINE(PGD_T_LOG2, PGD_T_LOG2);
DEFINE(PTE_T_LOG2, PTE_T_LOG2);
#endif
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
OFFSET(TLBCAM_MAS0, tlbcam, MAS0);
OFFSET(TLBCAM_MAS1, tlbcam, MAS1);
@@ -749,17 +659,6 @@ int main(void)
OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6);
#endif
-#ifdef CONFIG_KVM_XICS
- DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu,
- arch.xive_saved_state));
- DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
- arch.xive_cam_word));
- DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
- DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
- DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
- DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
-#endif
-
#ifdef CONFIG_KVM_EXIT_TIMING
OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu);
OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl);
@@ -768,11 +667,14 @@ int main(void)
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
- DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
#ifdef CONFIG_PPC_8xx
DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
#endif
+#ifdef CONFIG_XMON
+ DEFINE(BPT_SIZE, BPT_SIZE);
+#endif
+
return 0;
}
diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
index a2dddd7f3d09..92298d6a3a37 100644
--- a/arch/powerpc/kernel/audit.c
+++ b/arch/powerpc/kernel/audit.c
@@ -4,6 +4,8 @@
#include <linux/audit.h>
#include <asm/unistd.h>
+#include "audit_32.h"
+
static unsigned dir_class[] = {
#include <asm-generic/audit_dir_write.h>
~0U
@@ -41,21 +43,22 @@ int audit_classify_arch(int arch)
int audit_classify_syscall(int abi, unsigned syscall)
{
#ifdef CONFIG_PPC64
- extern int ppc32_classify_syscall(unsigned);
if (abi == AUDIT_ARCH_PPC)
return ppc32_classify_syscall(syscall);
#endif
switch(syscall) {
case __NR_open:
- return 2;
+ return AUDITSC_OPEN;
case __NR_openat:
- return 3;
+ return AUDITSC_OPENAT;
case __NR_socketcall:
- return 4;
+ return AUDITSC_SOCKETCALL;
case __NR_execve:
- return 5;
+ return AUDITSC_EXECVE;
+ case __NR_openat2:
+ return AUDITSC_OPENAT2;
default:
- return 0;
+ return AUDITSC_NATIVE;
}
}
diff --git a/arch/powerpc/kernel/audit_32.h b/arch/powerpc/kernel/audit_32.h
new file mode 100644
index 000000000000..c6c79c3041ab
--- /dev/null
+++ b/arch/powerpc/kernel/audit_32.h
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __AUDIT_32_H__
+#define __AUDIT_32_H__
+
+extern int ppc32_classify_syscall(unsigned);
+
+#endif
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 6dfceaa820e4..7f63f1cdc6c3 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -8,14 +8,15 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/font.h>
#include <linux/memblock.h>
+#include <linux/pgtable.h>
+#include <linux/of.h>
#include <asm/sections.h>
-#include <asm/prom.h>
#include <asm/btext.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/udbg.h>
@@ -26,7 +27,7 @@
static void scrollscreen(void);
#endif
-#define __force_data __attribute__((__section__(".data")))
+#define __force_data __section(".data")
static int g_loc_X __force_data;
static int g_loc_Y __force_data;
@@ -41,12 +42,7 @@ static unsigned char *logicalDisplayBase __force_data;
unsigned long disp_BAT[2] __initdata = {0, 0};
-#define cmapsz (16*256)
-
-static unsigned char vga_font[cmapsz];
-
-int boot_text_mapped __force_data = 0;
-int force_printk_to_btext = 0;
+static int boot_text_mapped __force_data;
extern void rmci_on(void);
extern void rmci_off(void);
@@ -74,7 +70,7 @@ static inline void rmci_maybe_off(void)
* the display during identify_machine() and MMU_Init()
*
* The display is mapped to virtual address 0xD0000000, rather
- * than 1:1, because some some CHRP machines put the frame buffer
+ * than 1:1, because some CHRP machines put the frame buffer
* in the region starting at 0xC0000000 (PAGE_OFFSET).
* This mapping is temporary and will disappear as soon as the
* setup done by MMU_Init() is applied.
@@ -95,19 +91,10 @@ void __init btext_prepare_BAT(void)
boot_text_mapped = 0;
return;
}
- if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
- /* 603, 604, G3, G4, ... */
- lowbits = addr & ~0xFF000000UL;
- addr &= 0xFF000000UL;
- disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
- disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);
- } else {
- /* 601 */
- lowbits = addr & ~0xFF800000UL;
- addr &= 0xFF800000UL;
- disp_BAT[0] = vaddr | (_PAGE_NO_CACHE | PP_RWXX) | 4;
- disp_BAT[1] = addr | BL_8M | 0x40;
- }
+ lowbits = addr & ~0xFF000000UL;
+ addr &= 0xFF000000UL;
+ disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
+ disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);
logicalDisplayBase = (void *) (vaddr + lowbits);
}
#endif
@@ -170,7 +157,7 @@ void btext_map(void)
boot_text_mapped = 1;
}
-static int btext_initialize(struct device_node *np)
+static int __init btext_initialize(struct device_node *np)
{
unsigned int width, height, depth, pitch;
unsigned long address = 0;
@@ -245,13 +232,15 @@ int __init btext_find_display(int allow_nonstdout)
return rc;
for_each_node_by_type(np, "display") {
- if (of_get_property(np, "linux,opened", NULL)) {
+ if (of_property_read_bool(np, "linux,opened")) {
printk("trying %pOF ...\n", np);
rc = btext_initialize(np);
printk("result: %d\n", rc);
}
- if (rc == 0)
+ if (rc == 0) {
+ of_node_put(np);
break;
+ }
}
return rc;
}
@@ -299,7 +288,7 @@ void btext_update_display(unsigned long phys, int width, int height,
}
EXPORT_SYMBOL(btext_update_display);
-void btext_clearscreen(void)
+void __init btext_clearscreen(void)
{
unsigned int *base = (unsigned int *)calc_base(0, 0);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -317,7 +306,7 @@ void btext_clearscreen(void)
rmci_maybe_off();
}
-void btext_flushscreen(void)
+void __init btext_flushscreen(void)
{
unsigned int *base = (unsigned int *)calc_base(0, 0);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -336,7 +325,7 @@ void btext_flushscreen(void)
__asm__ __volatile__ ("sync" ::: "memory");
}
-void btext_flushline(void)
+void __init btext_flushline(void)
{
unsigned int *base = (unsigned int *)calc_base(0, g_loc_Y << 4);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -415,7 +404,7 @@ static unsigned int expand_bits_16[4] = {
};
-static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
+static void draw_byte_32(const unsigned char *font, unsigned int *base, int rb)
{
int l, bits;
int fg = 0xFFFFFFFFUL;
@@ -436,7 +425,7 @@ static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
}
}
-static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
+static inline void draw_byte_16(const unsigned char *font, unsigned int *base, int rb)
{
int l, bits;
int fg = 0xFFFFFFFFUL;
@@ -454,7 +443,7 @@ static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
}
}
-static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
+static inline void draw_byte_8(const unsigned char *font, unsigned int *base, int rb)
{
int l, bits;
int fg = 0x0F0F0F0FUL;
@@ -473,7 +462,8 @@ static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
static noinline void draw_byte(unsigned char c, long locX, long locY)
{
unsigned char *base = calc_base(locX << 3, locY << 4);
- unsigned char *font = &vga_font[((unsigned int)c) * 16];
+ unsigned int font_index = c * 16;
+ const unsigned char *font = font_sun_8x16.data + font_index;
int rb = dispDeviceRowBytes;
rmci_maybe_on();
@@ -551,7 +541,7 @@ void btext_drawstring(const char *c)
btext_drawchar(*c++);
}
-void btext_drawtext(const char *c, unsigned int len)
+void __init btext_drawtext(const char *c, unsigned int len)
{
if (!boot_text_mapped)
return;
@@ -559,7 +549,7 @@ void btext_drawtext(const char *c, unsigned int len)
btext_drawchar(*c++);
}
-void btext_drawhex(unsigned long v)
+void __init btext_drawhex(unsigned long v)
{
if (!boot_text_mapped)
return;
@@ -591,349 +581,3 @@ void __init udbg_init_btext(void)
*/
udbg_putc = btext_drawchar;
}
-
-static unsigned char vga_font[cmapsz] = {
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd,
-0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff,
-0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe,
-0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18,
-0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
-0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
-0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00,
-0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd,
-0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e,
-0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30,
-0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63,
-0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8,
-0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e,
-0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
-0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb,
-0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6,
-0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
-0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0,
-0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c,
-0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c,
-0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c,
-0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18,
-0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c,
-0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30,
-0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18,
-0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
-0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
-0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe,
-0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
-0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18,
-0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
-0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00,
-0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00,
-0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60,
-0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde,
-0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38,
-0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0,
-0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c,
-0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68,
-0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
-0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c,
-0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60,
-0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7,
-0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66,
-0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c,
-0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c,
-0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
-0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
-0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18,
-0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
-0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30,
-0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
-0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60,
-0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc,
-0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60,
-0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06,
-0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60,
-0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb,
-0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66,
-0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60,
-0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30,
-0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3,
-0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18,
-0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6,
-0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
-0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00,
-0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe,
-0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c,
-0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38,
-0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06,
-0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe,
-0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
-0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66,
-0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6,
-0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00,
-0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b,
-0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c,
-0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18,
-0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
-0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00,
-0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
-0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18,
-0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66,
-0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18,
-0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30,
-0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc,
-0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
-0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
-0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
-0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06,
-0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30,
-0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18,
-0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36,
-0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44,
-0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44,
-0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
-0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77,
-0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
-0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36,
-0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
-0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0,
-0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0,
-0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8,
-0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66,
-0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38,
-0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66,
-0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60,
-0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c,
-0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18,
-0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30,
-0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00,
-0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
-0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c,
-0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00,
-};
-
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 470336277c67..f502337dd37d 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -7,6 +7,8 @@
* Author: Nathan Lynch
*/
+#define pr_fmt(fmt) "cacheinfo: " fmt
+
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/kernel.h>
@@ -16,7 +18,6 @@
#include <linux/of.h>
#include <linux/percpu.h>
#include <linux/slab.h>
-#include <asm/prom.h>
#include <asm/cputhreads.h>
#include <asm/smp.h>
@@ -118,6 +119,7 @@ struct cache {
struct cpumask shared_cpu_map; /* online CPUs using this cache */
int type; /* split cache disambiguation */
int level; /* level not explicit in device tree */
+ int group_id; /* id of the group of threads that share this cache */
struct list_head list; /* global list of cache objects */
struct cache *next_local; /* next cache of >= level */
};
@@ -140,22 +142,24 @@ static const char *cache_type_string(const struct cache *cache)
}
static void cache_init(struct cache *cache, int type, int level,
- struct device_node *ofnode)
+ struct device_node *ofnode, int group_id)
{
cache->type = type;
cache->level = level;
cache->ofnode = of_node_get(ofnode);
+ cache->group_id = group_id;
INIT_LIST_HEAD(&cache->list);
list_add(&cache->list, &cache_list);
}
-static struct cache *new_cache(int type, int level, struct device_node *ofnode)
+static struct cache *new_cache(int type, int level,
+ struct device_node *ofnode, int group_id)
{
struct cache *cache;
cache = kzalloc(sizeof(*cache), GFP_KERNEL);
if (cache)
- cache_init(cache, type, level, ofnode);
+ cache_init(cache, type, level, ofnode, group_id);
return cache;
}
@@ -166,7 +170,7 @@ static void release_cache_debugcheck(struct cache *cache)
list_for_each_entry(iter, &cache_list, list)
WARN_ONCE(iter->next_local == cache,
- "cache for %pOF(%s) refers to cache for %pOF(%s)\n",
+ "cache for %pOFP(%s) refers to cache for %pOFP(%s)\n",
iter->ofnode,
cache_type_string(iter),
cache->ofnode,
@@ -178,7 +182,7 @@ static void release_cache(struct cache *cache)
if (!cache)
return;
- pr_debug("freeing L%d %s cache for %pOF\n", cache->level,
+ pr_debug("freeing L%d %s cache for %pOFP\n", cache->level,
cache_type_string(cache), cache->ofnode);
release_cache_debugcheck(cache);
@@ -193,7 +197,7 @@ static void cache_cpu_set(struct cache *cache, int cpu)
while (next) {
WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
- "CPU %i already accounted in %pOF(%s)\n",
+ "CPU %i already accounted in %pOFP(%s)\n",
cpu, next->ofnode,
cache_type_string(next));
cpumask_set_cpu(cpu, &next->shared_cpu_map);
@@ -307,20 +311,24 @@ static struct cache *cache_find_first_sibling(struct cache *cache)
return cache;
list_for_each_entry(iter, &cache_list, list)
- if (iter->ofnode == cache->ofnode && iter->next_local == cache)
+ if (iter->ofnode == cache->ofnode &&
+ iter->group_id == cache->group_id &&
+ iter->next_local == cache)
return iter;
return cache;
}
-/* return the first cache on a local list matching node */
-static struct cache *cache_lookup_by_node(const struct device_node *node)
+/* return the first cache on a local list matching node and thread-group id */
+static struct cache *cache_lookup_by_node_group(const struct device_node *node,
+ int group_id)
{
struct cache *cache = NULL;
struct cache *iter;
list_for_each_entry(iter, &cache_list, list) {
- if (iter->ofnode != node)
+ if (iter->ofnode != node ||
+ iter->group_id != group_id)
continue;
cache = cache_find_first_sibling(iter);
break;
@@ -350,23 +358,24 @@ static int cache_is_unified_d(const struct device_node *np)
CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
}
-static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode_unified(struct device_node *node, int group_id,
+ int level)
{
- pr_debug("creating L%d ucache for %pOF\n", level, node);
+ pr_debug("creating L%d ucache for %pOFP\n", level, node);
- return new_cache(cache_is_unified_d(node), level, node);
+ return new_cache(cache_is_unified_d(node), level, node, group_id);
}
-static struct cache *cache_do_one_devnode_split(struct device_node *node,
+static struct cache *cache_do_one_devnode_split(struct device_node *node, int group_id,
int level)
{
struct cache *dcache, *icache;
- pr_debug("creating L%d dcache and icache for %pOF\n", level,
+ pr_debug("creating L%d dcache and icache for %pOFP\n", level,
node);
- dcache = new_cache(CACHE_TYPE_DATA, level, node);
- icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
+ dcache = new_cache(CACHE_TYPE_DATA, level, node, group_id);
+ icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node, group_id);
if (!dcache || !icache)
goto err;
@@ -380,31 +389,32 @@ err:
return NULL;
}
-static struct cache *cache_do_one_devnode(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode(struct device_node *node, int group_id, int level)
{
struct cache *cache;
if (cache_node_is_unified(node))
- cache = cache_do_one_devnode_unified(node, level);
+ cache = cache_do_one_devnode_unified(node, group_id, level);
else
- cache = cache_do_one_devnode_split(node, level);
+ cache = cache_do_one_devnode_split(node, group_id, level);
return cache;
}
static struct cache *cache_lookup_or_instantiate(struct device_node *node,
+ int group_id,
int level)
{
struct cache *cache;
- cache = cache_lookup_by_node(node);
+ cache = cache_lookup_by_node_group(node, group_id);
WARN_ONCE(cache && cache->level != level,
"cache level mismatch on lookup (got %d, expected %d)\n",
cache->level, level);
if (!cache)
- cache = cache_do_one_devnode(node, level);
+ cache = cache_do_one_devnode(node, group_id, level);
return cache;
}
@@ -418,15 +428,53 @@ static void link_cache_lists(struct cache *smaller, struct cache *bigger)
}
smaller->next_local = bigger;
+
+ /*
+ * The cache->next_local list sorts by level ascending:
+ * L1d -> L1i -> L2 -> L3 ...
+ */
+ WARN_ONCE((smaller->level == 1 && bigger->level > 2) ||
+ (smaller->level > 1 && bigger->level != smaller->level + 1),
+ "linking L%i cache %pOFP to L%i cache %pOFP; skipped a level?\n",
+ smaller->level, smaller->ofnode, bigger->level, bigger->ofnode);
}
static void do_subsidiary_caches_debugcheck(struct cache *cache)
{
- WARN_ON_ONCE(cache->level != 1);
- WARN_ON_ONCE(!of_node_is_type(cache->ofnode, "cpu"));
+ WARN_ONCE(cache->level != 1,
+ "instantiating cache chain from L%d %s cache for "
+ "%pOFP instead of an L1\n", cache->level,
+ cache_type_string(cache), cache->ofnode);
+ WARN_ONCE(!of_node_is_type(cache->ofnode, "cpu"),
+ "instantiating cache chain from node %pOFP of type '%s' "
+ "instead of a cpu node\n", cache->ofnode,
+ of_node_get_device_type(cache->ofnode));
}
-static void do_subsidiary_caches(struct cache *cache)
+/*
+ * If sub-groups of threads in a core containing @cpu_id share the
+ * L@level-cache (information obtained via "ibm,thread-groups"
+ * device-tree property), then we identify the group by the first
+ * thread-sibling in the group. We define this to be the group-id.
+ *
+ * In the absence of any thread-group information for L@level-cache,
+ * this function returns -1.
+ */
+static int get_group_id(unsigned int cpu_id, int level)
+{
+ if (has_big_cores && level == 1)
+ return cpumask_first(per_cpu(thread_group_l1_cache_map,
+ cpu_id));
+ else if (thread_group_shares_l2 && level == 2)
+ return cpumask_first(per_cpu(thread_group_l2_cache_map,
+ cpu_id));
+ else if (thread_group_shares_l3 && level == 3)
+ return cpumask_first(per_cpu(thread_group_l3_cache_map,
+ cpu_id));
+ return -1;
+}
+
+static void do_subsidiary_caches(struct cache *cache, unsigned int cpu_id)
{
struct device_node *subcache_node;
int level = cache->level;
@@ -435,9 +483,11 @@ static void do_subsidiary_caches(struct cache *cache)
while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
struct cache *subcache;
+ int group_id;
level++;
- subcache = cache_lookup_or_instantiate(subcache_node, level);
+ group_id = get_group_id(cpu_id, level);
+ subcache = cache_lookup_or_instantiate(subcache_node, group_id, level);
of_node_put(subcache_node);
if (!subcache)
break;
@@ -451,6 +501,7 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
{
struct device_node *cpu_node;
struct cache *cpu_cache = NULL;
+ int group_id;
pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
@@ -459,11 +510,13 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
if (!cpu_node)
goto out;
- cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
+ group_id = get_group_id(cpu_id, 1);
+
+ cpu_cache = cache_lookup_or_instantiate(cpu_node, group_id, 1);
if (!cpu_cache)
goto out;
- do_subsidiary_caches(cpu_cache);
+ do_subsidiary_caches(cpu_cache, cpu_id);
cache_cpu_set(cpu_cache, cpu_id);
out:
@@ -624,66 +677,49 @@ static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *
static struct kobj_attribute cache_level_attr =
__ATTR(level, 0444, level_show, NULL);
-static unsigned int index_dir_to_cpu(struct cache_index_dir *index)
-{
- struct kobject *index_dir_kobj = &index->kobj;
- struct kobject *cache_dir_kobj = index_dir_kobj->parent;
- struct kobject *cpu_dev_kobj = cache_dir_kobj->parent;
- struct device *dev = kobj_to_dev(cpu_dev_kobj);
-
- return dev->id;
-}
-
-/*
- * On big-core systems, each core has two groups of CPUs each of which
- * has its own L1-cache. The thread-siblings which share l1-cache with
- * @cpu can be obtained via cpu_smallcore_mask().
- */
-static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache *cache)
-{
- if (cache->level == 1)
- return cpu_smallcore_mask(cpu);
-
- return &cache->shared_cpu_map;
-}
-
-static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+static ssize_t
+show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list)
{
struct cache_index_dir *index;
struct cache *cache;
const struct cpumask *mask;
- int ret, cpu;
index = kobj_to_cache_index_dir(k);
cache = index->cache;
- if (has_big_cores) {
- cpu = index_dir_to_cpu(index);
- mask = get_big_core_shared_cpu_map(cpu, cache);
- } else {
- mask = &cache->shared_cpu_map;
- }
+ mask = &cache->shared_cpu_map;
- ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n",
- cpumask_pr_args(mask));
- buf[ret++] = '\n';
- buf[ret] = '\0';
- return ret;
+ return cpumap_print_to_pagebuf(list, buf, mask);
+}
+
+static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ return show_shared_cpumap(k, attr, buf, false);
+}
+
+static ssize_t shared_cpu_list_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ return show_shared_cpumap(k, attr, buf, true);
}
static struct kobj_attribute cache_shared_cpu_map_attr =
__ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
+static struct kobj_attribute cache_shared_cpu_list_attr =
+ __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);
+
/* Attributes which should always be created -- the kobject/sysfs core
- * does this automatically via kobj_type->default_attrs. This is the
+ * does this automatically via kobj_type->default_groups. This is the
* minimum data required to uniquely identify a cache.
*/
static struct attribute *cache_index_default_attrs[] = {
&cache_type_attr.attr,
&cache_level_attr.attr,
&cache_shared_cpu_map_attr.attr,
+ &cache_shared_cpu_list_attr.attr,
NULL,
};
+ATTRIBUTE_GROUPS(cache_index_default);
/* Attributes which should be created if the cache device node has the
* right properties -- see cacheinfo_create_index_opt_attrs
@@ -702,7 +738,7 @@ static const struct sysfs_ops cache_index_ops = {
static struct kobj_type cache_index_type = {
.release = cache_index_release,
.sysfs_ops = &cache_index_ops,
- .default_attrs = cache_index_default_attrs,
+ .default_groups = cache_index_default_groups,
};
static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
@@ -733,13 +769,13 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
rc = attr->show(&dir->kobj, attr, buf);
if (rc <= 0) {
pr_debug("not creating %s attribute for "
- "%pOF(%s) (rc = %zd)\n",
+ "%pOFP(%s) (rc = %zd)\n",
attr->attr.name, cache->ofnode,
cache_type, rc);
continue;
}
if (sysfs_create_file(&dir->kobj, &attr->attr))
- pr_debug("could not create %s attribute for %pOF(%s)\n",
+ pr_debug("could not create %s attribute for %pOFP(%s)\n",
attr->attr.name, cache->ofnode, cache_type);
}
@@ -810,13 +846,15 @@ static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
{
struct device_node *cpu_node;
struct cache *cache;
+ int group_id;
cpu_node = of_get_cpu_node(cpu_id, NULL);
WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
if (!cpu_node)
return NULL;
- cache = cache_lookup_by_node(cpu_node);
+ group_id = get_group_id(cpu_id, 1);
+ cache = cache_lookup_by_node_group(cpu_node, group_id);
of_node_put(cpu_node);
return cache;
@@ -855,7 +893,7 @@ static void cache_cpu_clear(struct cache *cache, int cpu)
struct cache *next = cache->next_local;
WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
- "CPU %i not accounted in %pOF(%s)\n",
+ "CPU %i not accounted in %pOFP(%s)\n",
cpu, cache->ofnode,
cache_type_string(cache));
diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c
index 55c6ccda0a85..57b38c592b9f 100644
--- a/arch/powerpc/kernel/compat_audit.c
+++ b/arch/powerpc/kernel/compat_audit.c
@@ -1,7 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
#undef __powerpc64__
+#include <linux/audit_arch.h>
#include <asm/unistd.h>
+#include "audit_32.h"
+
unsigned ppc32_dir_class[] = {
#include <asm-generic/audit_dir_write.h>
~0U
@@ -31,14 +34,16 @@ int ppc32_classify_syscall(unsigned syscall)
{
switch(syscall) {
case __NR_open:
- return 2;
+ return AUDITSC_OPEN;
case __NR_openat:
- return 3;
+ return AUDITSC_OPENAT;
case __NR_socketcall:
- return 4;
+ return AUDITSC_SOCKETCALL;
case __NR_execve:
- return 5;
+ return AUDITSC_EXECVE;
+ case __NR_openat2:
+ return AUDITSC_OPENAT2;
default:
- return 1;
+ return AUDITSC_COMPAT;
}
}
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index f6517f67265a..f29ce3dd6140 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -4,6 +4,8 @@
* Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
*/
+#include <linux/linkage.h>
+
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/cputable.h>
@@ -81,7 +83,7 @@ _GLOBAL(__setup_cpu_745x)
blr
/* Enable caches for 603's, 604, 750 & 7400 */
-setup_common_caches:
+SYM_FUNC_START_LOCAL(setup_common_caches)
mfspr r11,SPRN_HID0
andi. r0,r11,HID0_DCE
ori r11,r11,HID0_ICE|HID0_DCE
@@ -95,11 +97,12 @@ setup_common_caches:
sync
isync
blr
+SYM_FUNC_END(setup_common_caches)
/* 604, 604e, 604ev, ...
* Enable superscalar execution & branch history table
*/
-setup_604_hid0:
+SYM_FUNC_START_LOCAL(setup_604_hid0)
mfspr r11,SPRN_HID0
ori r11,r11,HID0_SIED|HID0_BHTE
ori r8,r11,HID0_BTCD
@@ -110,6 +113,7 @@ setup_604_hid0:
sync
isync
blr
+SYM_FUNC_END(setup_604_hid0)
/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some
* erratas we work around here.
@@ -125,13 +129,14 @@ setup_604_hid0:
* needed once we have applied workaround #5 (though it's
* not set by Apple's firmware at least).
*/
-setup_7400_workarounds:
+SYM_FUNC_START_LOCAL(setup_7400_workarounds)
mfpvr r3
rlwinm r3,r3,0,20,31
cmpwi 0,r3,0x0207
ble 1f
blr
-setup_7410_workarounds:
+SYM_FUNC_END(setup_7400_workarounds)
+SYM_FUNC_START_LOCAL(setup_7410_workarounds)
mfpvr r3
rlwinm r3,r3,0,20,31
cmpwi 0,r3,0x0100
@@ -151,6 +156,7 @@ setup_7410_workarounds:
sync
isync
blr
+SYM_FUNC_END(setup_7410_workarounds)
/* 740/750/7400/7410
* Enable Store Gathering (SGE), Address Broadcast (ABE),
@@ -158,7 +164,7 @@ setup_7410_workarounds:
* Dynamic Power Management (DPM), Speculative (SPD)
* Clear Instruction cache throttling (ICTC)
*/
-setup_750_7400_hid0:
+SYM_FUNC_START_LOCAL(setup_750_7400_hid0)
mfspr r11,SPRN_HID0
ori r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC
oris r11,r11,HID0_DPM@h
@@ -177,12 +183,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
sync
isync
blr
+SYM_FUNC_END(setup_750_7400_hid0)
/* 750cx specific
* Looks like we have to disable NAP feature for some PLL settings...
* (waiting for confirmation)
*/
-setup_750cx:
+SYM_FUNC_START_LOCAL(setup_750cx)
mfspr r10, SPRN_HID1
rlwinm r10,r10,4,28,31
cmpwi cr0,r10,7
@@ -196,11 +203,13 @@ setup_750cx:
andc r6,r6,r7
stw r6,CPU_SPEC_FEATURES(r4)
blr
+SYM_FUNC_END(setup_750cx)
/* 750fx specific
*/
-setup_750fx:
+SYM_FUNC_START_LOCAL(setup_750fx)
blr
+SYM_FUNC_END(setup_750fx)
/* MPC 745x
* Enable Store Gathering (SGE), Branch Folding (FOLD)
@@ -212,7 +221,7 @@ setup_750fx:
* Clear Instruction cache throttling (ICTC)
* Enable L2 HW prefetch
*/
-setup_745x_specifics:
+SYM_FUNC_START_LOCAL(setup_745x_specifics)
/* We check for the presence of an L3 cache setup by
* the firmware. If any, we disable NAP capability as
* it's known to be bogus on rev 2.1 and earlier
@@ -270,6 +279,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
sync
isync
blr
+SYM_FUNC_END(setup_745x_specifics)
/*
* Initialize the FPU registers. This is needed to work around an errata
@@ -288,6 +298,7 @@ _GLOBAL(__init_fpu_registers)
mtmsr r10
isync
blr
+_ASM_NOKPROBE_SYMBOL(__init_fpu_registers)
/* Definitions for the table use to save CPU states */
@@ -483,4 +494,5 @@ _GLOBAL(__restore_cpu_setup)
1:
mtcr r7
blr
+_ASM_NOKPROBE_SYMBOL(__restore_cpu_setup)
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_e500.S
index 1d308780e0d3..077cfccc3461 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_e500.S
@@ -8,11 +8,13 @@
* Benjamin Herrenschmidt <benh@kernel.crashing.org>
*/
+#include <linux/linkage.h>
+
#include <asm/page.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/ppc_asm.h>
-#include <asm/nohash/mmu-book3e.h>
+#include <asm/nohash/mmu-e500.h>
#include <asm/asm-offsets.h>
#include <asm/mpc85xx.h>
@@ -108,16 +110,7 @@ _GLOBAL(__setup_cpu_e6500)
#endif /* CONFIG_PPC_E500MC */
#ifdef CONFIG_PPC32
-#ifdef CONFIG_E200
-_GLOBAL(__setup_cpu_e200)
- /* enable dedicated debug exception handling resources (Debug APU) */
- mfspr r3,SPRN_HID0
- ori r3,r3,HID0_DAPUEN@l
- mtspr SPRN_HID0,r3
- b __setup_e200_ivors
-#endif /* CONFIG_E200 */
-
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
#ifndef CONFIG_PPC_E500MC
_GLOBAL(__setup_cpu_e500v1)
_GLOBAL(__setup_cpu_e500v2)
@@ -165,7 +158,7 @@ _GLOBAL(__setup_cpu_e5500)
mtlr r5
blr
#endif /* CONFIG_PPC_E500MC */
-#endif /* CONFIG_E500 */
+#endif /* CONFIG_PPC_E500 */
#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC_BOOK3E_64
@@ -283,7 +276,7 @@ _GLOBAL(flush_dcache_L1)
blr
-has_L2_cache:
+SYM_FUNC_START_LOCAL(has_L2_cache)
/* skip L2 cache on P2040/P2040E as they have no L2 cache */
mfspr r3, SPRN_SVR
/* shift right by 8 bits and clear E bit of SVR */
@@ -299,9 +292,10 @@ has_L2_cache:
1:
li r3, 0
blr
+SYM_FUNC_END(has_L2_cache)
/* flush backside L2 cache */
-flush_backside_L2_cache:
+SYM_FUNC_START_LOCAL(flush_backside_L2_cache)
mflr r10
bl has_L2_cache
mtlr r10
@@ -322,6 +316,7 @@ flush_backside_L2_cache:
bne 1b
2:
blr
+SYM_FUNC_END(flush_backside_L2_cache)
_GLOBAL(cpu_down_flush_e500v2)
mflr r0
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
deleted file mode 100644
index a460298c7ddb..000000000000
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains low level CPU setup functions.
- * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- */
-
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cache.h>
-#include <asm/book3s/64/mmu-hash.h>
-
-/* Entry: r3 = crap, r4 = ptr to cputable entry
- *
- * Note that we can be called twice for pseudo-PVRs
- */
-_GLOBAL(__setup_cpu_power7)
- mflr r11
- bl __init_hvmode_206
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_LPID,r0
- LOAD_REG_IMMEDIATE(r0, PCR_MASK)
- mtspr SPRN_PCR,r0
- mfspr r3,SPRN_LPCR
- li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
- bl __init_LPCR_ISA206
- mtlr r11
- blr
-
-_GLOBAL(__restore_cpu_power7)
- mflr r11
- mfmsr r3
- rldicl. r0,r3,4,63
- beqlr
- li r0,0
- mtspr SPRN_LPID,r0
- LOAD_REG_IMMEDIATE(r0, PCR_MASK)
- mtspr SPRN_PCR,r0
- mfspr r3,SPRN_LPCR
- li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
- bl __init_LPCR_ISA206
- mtlr r11
- blr
-
-_GLOBAL(__setup_cpu_power8)
- mflr r11
- bl __init_FSCR
- bl __init_PMU
- bl __init_PMU_ISA207
- bl __init_hvmode_206
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_LPID,r0
- LOAD_REG_IMMEDIATE(r0, PCR_MASK)
- mtspr SPRN_PCR,r0
- mfspr r3,SPRN_LPCR
- ori r3, r3, LPCR_PECEDH
- li r4,0 /* LPES = 0 */
- bl __init_LPCR_ISA206
- bl __init_HFSCR
- bl __init_PMU_HV
- bl __init_PMU_HV_ISA207
- mtlr r11
- blr
-
-_GLOBAL(__restore_cpu_power8)
- mflr r11
- bl __init_FSCR
- bl __init_PMU
- bl __init_PMU_ISA207
- mfmsr r3
- rldicl. r0,r3,4,63
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_LPID,r0
- LOAD_REG_IMMEDIATE(r0, PCR_MASK)
- mtspr SPRN_PCR,r0
- mfspr r3,SPRN_LPCR
- ori r3, r3, LPCR_PECEDH
- li r4,0 /* LPES = 0 */
- bl __init_LPCR_ISA206
- bl __init_HFSCR
- bl __init_PMU_HV
- bl __init_PMU_HV_ISA207
- mtlr r11
- blr
-
-_GLOBAL(__setup_cpu_power9)
- mflr r11
- bl __init_FSCR
- bl __init_PMU
- bl __init_hvmode_206
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_PSSCR,r0
- mtspr SPRN_LPID,r0
- mtspr SPRN_PID,r0
- LOAD_REG_IMMEDIATE(r0, PCR_MASK)
- mtspr SPRN_PCR,r0
- mfspr r3,SPRN_LPCR
- LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
- or r3, r3, r4
- LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
- andc r3, r3, r4
- li r4,0 /* LPES = 0 */
- bl __init_LPCR_ISA300
- bl __init_HFSCR
- bl __init_PMU_HV
- mtlr r11
- blr
-
-_GLOBAL(__restore_cpu_power9)
- mflr r11
- bl __init_FSCR
- bl __init_PMU
- mfmsr r3
- rldicl. r0,r3,4,63
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_PSSCR,r0
- mtspr SPRN_LPID,r0
- mtspr SPRN_PID,r0
- LOAD_REG_IMMEDIATE(r0, PCR_MASK)
- mtspr SPRN_PCR,r0
- mfspr r3,SPRN_LPCR
- LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
- or r3, r3, r4
- LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
- andc r3, r3, r4
- li r4,0 /* LPES = 0 */
- bl __init_LPCR_ISA300
- bl __init_HFSCR
- bl __init_PMU_HV
- mtlr r11
- blr
-
-__init_hvmode_206:
- /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
- mfmsr r3
- rldicl. r0,r3,4,63
- bnelr
- ld r5,CPU_SPEC_FEATURES(r4)
- LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST)
- andc r5,r5,r6
- std r5,CPU_SPEC_FEATURES(r4)
- blr
-
-__init_LPCR_ISA206:
- /* Setup a sane LPCR:
- * Called with initial LPCR in R3 and desired LPES 2-bit value in R4
- *
- * LPES = 0b01 (HSRR0/1 used for 0x500)
- * PECE = 0b111
- * DPFD = 4
- * HDICE = 0
- * VC = 0b100 (VPM0=1, VPM1=0, ISL=0)
- * VRMASD = 0b10000 (L=1, LP=00)
- *
- * Other bits untouched for now
- */
- li r5,0x10
- rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5
-
- /* POWER9 has no VRMASD */
-__init_LPCR_ISA300:
- rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
- ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
- li r5,4
- rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3
- clrrdi r3,r3,1 /* clear HDICE */
- li r5,4
- rldimi r3,r5, LPCR_VC_SH, 0
- mtspr SPRN_LPCR,r3
- isync
- blr
-
-__init_FSCR:
- mfspr r3,SPRN_FSCR
- ori r3,r3,FSCR_TAR|FSCR_DSCR|FSCR_EBB
- mtspr SPRN_FSCR,r3
- blr
-
-__init_HFSCR:
- mfspr r3,SPRN_HFSCR
- ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\
- HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP
- mtspr SPRN_HFSCR,r3
- blr
-
-__init_PMU_HV:
- li r5,0
- mtspr SPRN_MMCRC,r5
- blr
-
-__init_PMU_HV_ISA207:
- li r5,0
- mtspr SPRN_MMCRH,r5
- blr
-
-__init_PMU:
- li r5,0
- mtspr SPRN_MMCRA,r5
- mtspr SPRN_MMCR0,r5
- mtspr SPRN_MMCR1,r5
- mtspr SPRN_MMCR2,r5
- blr
-
-__init_PMU_ISA207:
- li r5,0
- mtspr SPRN_MMCRS,r5
- blr
diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
new file mode 100644
index 000000000000..98bd4e6c1770
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2020, Jordan Niethe, IBM Corporation.
+ *
+ * This file contains low level CPU setup functions.
+ * Originally written in assembly by Benjamin Herrenschmidt & various other
+ * authors.
+ */
+
+#include <asm/reg.h>
+#include <asm/synch.h>
+#include <linux/bitops.h>
+#include <asm/cputable.h>
+#include <asm/cpu_setup.h>
+
+/* Disable CPU_FTR_HVMODE and return false if MSR:HV is not set */
+static bool init_hvmode_206(struct cpu_spec *t)
+{
+ u64 msr;
+
+ msr = mfmsr();
+ if (msr & MSR_HV)
+ return true;
+
+ t->cpu_features &= ~(CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST);
+ return false;
+}
+
+static void init_LPCR_ISA300(u64 lpcr, u64 lpes)
+{
+ /* POWER9 has no VRMASD */
+ lpcr |= (lpes << LPCR_LPES_SH) & LPCR_LPES;
+ lpcr |= LPCR_PECE0|LPCR_PECE1|LPCR_PECE2;
+ lpcr |= (4ull << LPCR_DPFD_SH) & LPCR_DPFD;
+ lpcr &= ~LPCR_HDICE; /* clear HDICE */
+ lpcr |= (4ull << LPCR_VC_SH);
+ mtspr(SPRN_LPCR, lpcr);
+ isync();
+}
+
+/*
+ * Setup a sane LPCR:
+ * Called with initial LPCR and desired LPES 2-bit value
+ *
+ * LPES = 0b01 (HSRR0/1 used for 0x500)
+ * PECE = 0b111
+ * DPFD = 4
+ * HDICE = 0
+ * VC = 0b100 (VPM0=1, VPM1=0, ISL=0)
+ * VRMASD = 0b10000 (L=1, LP=00)
+ *
+ * Other bits untouched for now
+ */
+static void init_LPCR_ISA206(u64 lpcr, u64 lpes)
+{
+ lpcr |= (0x10ull << LPCR_VRMASD_SH) & LPCR_VRMASD;
+ init_LPCR_ISA300(lpcr, lpes);
+}
+
+static void init_FSCR(void)
+{
+ u64 fscr;
+
+ fscr = mfspr(SPRN_FSCR);
+ fscr |= FSCR_TAR|FSCR_EBB;
+ mtspr(SPRN_FSCR, fscr);
+}
+
+static void init_FSCR_power9(void)
+{
+ u64 fscr;
+
+ fscr = mfspr(SPRN_FSCR);
+ fscr |= FSCR_SCV;
+ mtspr(SPRN_FSCR, fscr);
+ init_FSCR();
+}
+
+static void init_FSCR_power10(void)
+{
+ u64 fscr;
+
+ fscr = mfspr(SPRN_FSCR);
+ fscr |= FSCR_PREFIX;
+ mtspr(SPRN_FSCR, fscr);
+ init_FSCR_power9();
+}
+
+static void init_HFSCR(void)
+{
+ u64 hfscr;
+
+ hfscr = mfspr(SPRN_HFSCR);
+ hfscr |= HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|HFSCR_DSCR|\
+ HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP;
+ mtspr(SPRN_HFSCR, hfscr);
+}
+
+static void init_PMU_HV(void)
+{
+ mtspr(SPRN_MMCRC, 0);
+}
+
+static void init_PMU_HV_ISA207(void)
+{
+ mtspr(SPRN_MMCRH, 0);
+}
+
+static void init_PMU(void)
+{
+ mtspr(SPRN_MMCRA, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
+ mtspr(SPRN_MMCR1, 0);
+ mtspr(SPRN_MMCR2, 0);
+}
+
+static void init_PMU_ISA207(void)
+{
+ mtspr(SPRN_MMCRS, 0);
+}
+
+static void init_PMU_ISA31(void)
+{
+ mtspr(SPRN_MMCR3, 0);
+ mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+ mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
+}
+
+static void init_DEXCR(void)
+{
+ mtspr(SPRN_DEXCR, DEXCR_INIT);
+ mtspr(SPRN_HASHKEYR, 0);
+}
+
+/*
+ * Note that we can be called twice of pseudo-PVRs.
+ * The parameter offset is not used.
+ */
+
+void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t)
+{
+ if (!init_hvmode_206(t))
+ return;
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
+}
+
+void __restore_cpu_power7(void)
+{
+ u64 msr;
+
+ msr = mfmsr();
+ if (!(msr & MSR_HV))
+ return;
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
+}
+
+void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t)
+{
+ init_FSCR();
+ init_PMU();
+ init_PMU_ISA207();
+
+ if (!init_hvmode_206(t))
+ return;
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
+ init_HFSCR();
+ init_PMU_HV();
+ init_PMU_HV_ISA207();
+}
+
+void __restore_cpu_power8(void)
+{
+ u64 msr;
+
+ init_FSCR();
+ init_PMU();
+ init_PMU_ISA207();
+
+ msr = mfmsr();
+ if (!(msr & MSR_HV))
+ return;
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
+ init_HFSCR();
+ init_PMU_HV();
+ init_PMU_HV_ISA207();
+}
+
+void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t)
+{
+ init_FSCR_power9();
+ init_PMU();
+
+ if (!init_hvmode_206(t))
+ return;
+
+ mtspr(SPRN_PSSCR, 0);
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+ init_HFSCR();
+ init_PMU_HV();
+}
+
+void __restore_cpu_power9(void)
+{
+ u64 msr;
+
+ init_FSCR_power9();
+ init_PMU();
+
+ msr = mfmsr();
+ if (!(msr & MSR_HV))
+ return;
+
+ mtspr(SPRN_PSSCR, 0);
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+ init_HFSCR();
+ init_PMU_HV();
+}
+
+void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t)
+{
+ init_FSCR_power10();
+ init_PMU();
+ init_PMU_ISA31();
+ init_DEXCR();
+
+ if (!init_hvmode_206(t))
+ return;
+
+ mtspr(SPRN_PSSCR, 0);
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+ init_HFSCR();
+ init_PMU_HV();
+}
+
+void __restore_cpu_power10(void)
+{
+ u64 msr;
+
+ init_FSCR_power10();
+ init_PMU();
+ init_PMU_ISA31();
+ init_DEXCR();
+
+ msr = mfmsr();
+ if (!(msr & MSR_HV))
+ return;
+
+ mtspr(SPRN_PSSCR, 0);
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+ init_HFSCR();
+ init_PMU_HV();
+}
diff --git a/arch/powerpc/kernel/cpu_specs.h b/arch/powerpc/kernel/cpu_specs.h
new file mode 100644
index 000000000000..85ded3f77204
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifdef CONFIG_40x
+#include "cpu_specs_40x.h"
+#endif
+
+#ifdef CONFIG_PPC_47x
+#include "cpu_specs_47x.h"
+#elif defined(CONFIG_44x)
+#include "cpu_specs_44x.h"
+#endif
+
+#ifdef CONFIG_PPC_8xx
+#include "cpu_specs_8xx.h"
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#include "cpu_specs_e500mc.h"
+#elif defined(CONFIG_PPC_85xx)
+#include "cpu_specs_85xx.h"
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+#include "cpu_specs_book3s_32.h"
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include "cpu_specs_book3s_64.h"
+#endif
diff --git a/arch/powerpc/kernel/cpu_specs_40x.h b/arch/powerpc/kernel/cpu_specs_40x.h
new file mode 100644
index 000000000000..a1362a75b8c8
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_40x.h
@@ -0,0 +1,280 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+static struct cpu_spec cpu_specs[] __initdata = {
+ { /* STB 04xxx */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x41810000,
+ .cpu_name = "STB04xxx",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* NP405L */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x41610000,
+ .cpu_name = "NP405L",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* NP4GS3 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x40B10000,
+ .cpu_name = "NP4GS3",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* NP405H */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x41410000,
+ .cpu_name = "NP405H",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405GPr */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x50910000,
+ .cpu_name = "405GPr",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* STBx25xx */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x51510000,
+ .cpu_name = "STBx25xx",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405LP */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x41F10000,
+ .cpu_name = "405LP",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EP */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x51210000,
+ .cpu_name = "405EP",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. A/B with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910007,
+ .cpu_name = "405EX Rev. A/B",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. C without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x1291000d,
+ .cpu_name = "405EX Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. C with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x1291000f,
+ .cpu_name = "405EX Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. D without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910003,
+ .cpu_name = "405EX Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. D with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910005,
+ .cpu_name = "405EX Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. A/B without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910001,
+ .cpu_name = "405EXr Rev. A/B",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. C without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910009,
+ .cpu_name = "405EXr Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. C with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x1291000b,
+ .cpu_name = "405EXr Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. D without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910000,
+ .cpu_name = "405EXr Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. D with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910002,
+ .cpu_name = "405EXr Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ {
+ /* 405EZ */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x41510000,
+ .cpu_name = "405EZ",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* APM8018X */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x7ff11432,
+ .cpu_name = "APM8018X",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic 40x PPC)",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+ PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ }
+};
diff --git a/arch/powerpc/kernel/cpu_specs_44x.h b/arch/powerpc/kernel/cpu_specs_44x.h
new file mode 100644
index 000000000000..69c4cdc0cdee
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_44x.h
@@ -0,0 +1,304 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+ PPC_FEATURE_BOOKE)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+ {
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000850,
+ .cpu_name = "440GR Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000858,
+ .cpu_name = "440EP Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440ep,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ {
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x400008d3,
+ .cpu_name = "440GR Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000ff7,
+ .pvr_value = 0x400008d4,
+ .cpu_name = "440EP Rev. C",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440ep,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x400008db,
+ .cpu_name = "440EP Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440ep,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* 440GRX */
+ .pvr_mask = 0xf0000ffb,
+ .pvr_value = 0x200008D0,
+ .cpu_name = "440GRX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440grx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000ffb,
+ .pvr_value = 0x200008D8,
+ .cpu_name = "440EPX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440epx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440GP Rev. B */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000440,
+ .cpu_name = "440GP Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440gp",
+ },
+ { /* 440GP Rev. C */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000481,
+ .cpu_name = "440GP Rev. C",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440gp",
+ },
+ { /* 440GX Rev. A */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x50000850,
+ .cpu_name = "440GX Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440GX Rev. B */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x50000851,
+ .cpu_name = "440GX Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440GX Rev. C */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x50000892,
+ .cpu_name = "440GX Rev. C",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440GX Rev. F */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x50000894,
+ .cpu_name = "440GX Rev. F",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440SP Rev. A */
+ .pvr_mask = 0xfff00fff,
+ .pvr_value = 0x53200891,
+ .cpu_name = "440SP Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* 440SPe Rev. A */
+ .pvr_mask = 0xfff00fff,
+ .pvr_value = 0x53400890,
+ .cpu_name = "440SPe Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440spe,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440SPe Rev. B */
+ .pvr_mask = 0xfff00fff,
+ .pvr_value = 0x53400891,
+ .cpu_name = "440SPe Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440spe,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460EX */
+ .pvr_mask = 0xffff0006,
+ .pvr_value = 0x13020002,
+ .cpu_name = "460EX",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460ex,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460EX Rev B */
+ .pvr_mask = 0xffff0007,
+ .pvr_value = 0x13020004,
+ .cpu_name = "460EX Rev. B",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460ex,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460GT */
+ .pvr_mask = 0xffff0006,
+ .pvr_value = 0x13020000,
+ .cpu_name = "460GT",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460gt,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460GT Rev B */
+ .pvr_mask = 0xffff0007,
+ .pvr_value = 0x13020005,
+ .cpu_name = "460GT Rev. B",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460gt,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460SX */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x13541800,
+ .cpu_name = "460SX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460sx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 464 in APM821xx */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x12C41C80,
+ .cpu_name = "APM821XX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_apm821xx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic 44x PPC)",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ }
+};
diff --git a/arch/powerpc/kernel/cpu_specs_47x.h b/arch/powerpc/kernel/cpu_specs_47x.h
new file mode 100644
index 000000000000..3143cd504a51
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_47x.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+ PPC_FEATURE_BOOKE)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+ { /* 476 DD2 core */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x11a52080,
+ .cpu_name = "476",
+ .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+ MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* 476fpe */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x7ff50000,
+ .cpu_name = "476fpe",
+ .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+ MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* 476 iss */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00050000,
+ .cpu_name = "476",
+ .cpu_features = CPU_FTRS_47X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+ MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* 476 others */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x11a50000,
+ .cpu_name = "476",
+ .cpu_features = CPU_FTRS_47X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+ MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic 47x PPC)",
+ .cpu_features = CPU_FTRS_47X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_47x,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ }
+};
diff --git a/arch/powerpc/kernel/cpu_specs_85xx.h b/arch/powerpc/kernel/cpu_specs_85xx.h
new file mode 100644
index 000000000000..aaae202c1a89
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_85xx.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+ PPC_FEATURE_BOOKE)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+ { /* e500 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80200000,
+ .cpu_name = "e500",
+ .cpu_features = CPU_FTRS_E500,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_e500v1,
+ .machine_check = machine_check_e500,
+ .platform = "ppc8540",
+ },
+ { /* e500v2 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80210000,
+ .cpu_name = "e500v2",
+ .cpu_features = CPU_FTRS_E500_2,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP |
+ PPC_FEATURE_HAS_EFP_DOUBLE_COMP,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_e500v2,
+ .machine_check = machine_check_e500,
+ .platform = "ppc8548",
+ .cpu_down_flush = cpu_down_flush_e500v2,
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic E500 PPC)",
+ .cpu_features = CPU_FTRS_E500,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_e500,
+ .platform = "powerpc",
+ }
+};
diff --git a/arch/powerpc/kernel/cpu_specs_8xx.h b/arch/powerpc/kernel/cpu_specs_8xx.h
new file mode 100644
index 000000000000..93ddbc202ba3
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_8xx.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+static struct cpu_spec cpu_specs[] __initdata = {
+ { /* 8xx */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = PVR_8xx,
+ .cpu_name = "8xx",
+ /*
+ * CPU_FTR_MAYBE_CAN_DOZE is possible,
+ * if the 8xx code is there....
+ */
+ .cpu_features = CPU_FTRS_8XX,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_TYPE_8xx,
+ .icache_bsize = 16,
+ .dcache_bsize = 16,
+ .machine_check = machine_check_8xx,
+ .platform = "ppc823",
+ },
+};
diff --git a/arch/powerpc/kernel/cpu_specs_book3s_32.h b/arch/powerpc/kernel/cpu_specs_book3s_32.h
new file mode 100644
index 000000000000..3714634d194a
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_book3s_32.h
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
+ PPC_FEATURE_HAS_MMU)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+#ifdef CONFIG_PPC_BOOK3S_603
+ { /* 603 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00030000,
+ .cpu_name = "603",
+ .cpu_features = CPU_FTRS_603,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* 603e */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00060000,
+ .cpu_name = "603e",
+ .cpu_features = CPU_FTRS_603,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* 603ev */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00070000,
+ .cpu_name = "603ev",
+ .cpu_features = CPU_FTRS_603,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* 82xx (8240, 8245, 8260 are all 603e cores) */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00810000,
+ .cpu_name = "82xx",
+ .cpu_features = CPU_FTRS_82XX,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* All G2_LE (603e core, plus some) have the same pvr */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00820000,
+ .cpu_name = "G2_LE",
+ .cpu_features = CPU_FTRS_G2_LE,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+#ifdef CONFIG_PPC_83xx
+ { /* e300c1 (a 603e core, plus some) on 83xx */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00830000,
+ .cpu_name = "e300c1",
+ .cpu_features = CPU_FTRS_E300,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_83xx,
+ .platform = "ppc603",
+ },
+ { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00840000,
+ .cpu_name = "e300c2",
+ .cpu_features = CPU_FTRS_E300C2,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_83xx,
+ .platform = "ppc603",
+ },
+ { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00850000,
+ .cpu_name = "e300c3",
+ .cpu_features = CPU_FTRS_E300,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_83xx,
+ .num_pmcs = 4,
+ .platform = "ppc603",
+ },
+ { /* e300c4 (e300c1, plus one IU) */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00860000,
+ .cpu_name = "e300c4",
+ .cpu_features = CPU_FTRS_E300,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_83xx,
+ .num_pmcs = 4,
+ .platform = "ppc603",
+ },
+#endif
+#endif /* CONFIG_PPC_BOOK3S_603 */
+#ifdef CONFIG_PPC_BOOK3S_604
+ { /* 604 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00040000,
+ .cpu_name = "604",
+ .cpu_features = CPU_FTRS_604,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 2,
+ .cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
+ .platform = "ppc604",
+ },
+ { /* 604e */
+ .pvr_mask = 0xfffff000,
+ .pvr_value = 0x00090000,
+ .cpu_name = "604e",
+ .cpu_features = CPU_FTRS_604,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
+ .platform = "ppc604",
+ },
+ { /* 604r */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00090000,
+ .cpu_name = "604r",
+ .cpu_features = CPU_FTRS_604,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
+ .platform = "ppc604",
+ },
+ { /* 604ev */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x000a0000,
+ .cpu_name = "604ev",
+ .cpu_features = CPU_FTRS_604,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
+ .platform = "ppc604",
+ },
+ { /* 740/750 (0x4202, don't support TAU ?) */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x00084202,
+ .cpu_name = "740/750",
+ .cpu_features = CPU_FTRS_740_NOTAU,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CX (80100 and 8010x?) */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x00080100,
+ .cpu_name = "750CX",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CX (82201 and 82202) */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x00082200,
+ .cpu_name = "750CX",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CXe (82214) */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x00082210,
+ .cpu_name = "750CXe",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CXe "Gekko" (83214) */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x00083214,
+ .cpu_name = "750CXe",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CL (and "Broadway") */
+ .pvr_mask = 0xfffff0e0,
+ .pvr_value = 0x00087000,
+ .cpu_name = "750CL",
+ .cpu_features = CPU_FTRS_750CL,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 745/755 */
+ .pvr_mask = 0xfffff000,
+ .pvr_value = 0x00083000,
+ .cpu_name = "745/755",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750FX rev 1.x */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x70000100,
+ .cpu_name = "750FX",
+ .cpu_features = CPU_FTRS_750FX1,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750FX rev 2.0 must disable HID0[DPM] */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x70000200,
+ .cpu_name = "750FX",
+ .cpu_features = CPU_FTRS_750FX2,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750FX (All revs except 2.0) */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x70000000,
+ .cpu_name = "750FX",
+ .cpu_features = CPU_FTRS_750FX,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750fx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750GX */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x70020000,
+ .cpu_name = "750GX",
+ .cpu_features = CPU_FTRS_750GX,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750fx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 740/750 (L2CR bit need fixup for 740) */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00080000,
+ .cpu_name = "740/750",
+ .cpu_features = CPU_FTRS_740,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 7400 rev 1.1 ? (no TAU) */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x000c1101,
+ .cpu_name = "7400 (1.1)",
+ .cpu_features = CPU_FTRS_7400_NOTAU,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_7400,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7400",
+ },
+ { /* 7400 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x000c0000,
+ .cpu_name = "7400",
+ .cpu_features = CPU_FTRS_7400,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_7400,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7400",
+ },
+ { /* 7410 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x800c0000,
+ .cpu_name = "7410",
+ .cpu_features = CPU_FTRS_7400,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_7410,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7400",
+ },
+ { /* 7450 2.0 - no doze/nap */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80000200,
+ .cpu_name = "7450",
+ .cpu_features = CPU_FTRS_7450_20,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7450 2.1 */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80000201,
+ .cpu_name = "7450",
+ .cpu_features = CPU_FTRS_7450_21,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7450 2.3 and newer */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80000000,
+ .cpu_name = "7450",
+ .cpu_features = CPU_FTRS_7450_23,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7455 rev 1.x */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x80010100,
+ .cpu_name = "7455",
+ .cpu_features = CPU_FTRS_7455_1,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7455 rev 2.0 */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80010200,
+ .cpu_name = "7455",
+ .cpu_features = CPU_FTRS_7455_20,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7455 others */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80010000,
+ .cpu_name = "7455",
+ .cpu_features = CPU_FTRS_7455,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7447/7457 Rev 1.0 */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80020100,
+ .cpu_name = "7447/7457",
+ .cpu_features = CPU_FTRS_7447_10,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7447/7457 Rev 1.1 */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80020101,
+ .cpu_name = "7447/7457",
+ .cpu_features = CPU_FTRS_7447_10,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7447/7457 Rev 1.2 and later */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80020000,
+ .cpu_name = "7447/7457",
+ .cpu_features = CPU_FTRS_7447,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7447A */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80030000,
+ .cpu_name = "7447A",
+ .cpu_features = CPU_FTRS_7447A,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7448 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80040000,
+ .cpu_name = "7448",
+ .cpu_features = CPU_FTRS_7448,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* default match, we assume split I/D cache & TB (non-601)... */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic PPC)",
+ .cpu_features = CPU_FTRS_CLASSIC32,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+#endif /* CONFIG_PPC_BOOK3S_604 */
+};
diff --git a/arch/powerpc/kernel/cpu_specs_book3s_64.h b/arch/powerpc/kernel/cpu_specs_book3s_64.h
new file mode 100644
index 000000000000..3ff9757df4c0
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_book3s_64.h
@@ -0,0 +1,496 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ */
+
+/* NOTE:
+ * Unlike ppc32, ppc64 will only call cpu_setup() for the boot CPU, it's
+ * the responsibility of the appropriate CPU save/restore functions to
+ * eventually copy these settings over. Those save/restore aren't yet
+ * part of the cputable though. That has to be fixed for both ppc32
+ * and ppc64
+ */
+#define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_64)
+#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4)
+#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER7 (PPC_FEATURE2_DSCR)
+#define COMMON_USER_POWER8 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \
+ PPC_FEATURE2_HTM_COMP | \
+ PPC_FEATURE2_HTM_NOSC_COMP | \
+ PPC_FEATURE2_DSCR | \
+ PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+ PPC_FEATURE2_VEC_CRYPTO)
+#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_HAS_ALTIVEC_COMP)
+#define COMMON_USER_POWER9 COMMON_USER_POWER8
+#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \
+ PPC_FEATURE2_ARCH_3_00 | \
+ PPC_FEATURE2_HAS_IEEE128 | \
+ PPC_FEATURE2_DARN | \
+ PPC_FEATURE2_SCV)
+#define COMMON_USER_POWER10 COMMON_USER_POWER9
+#define COMMON_USER2_POWER10 (PPC_FEATURE2_ARCH_3_1 | \
+ PPC_FEATURE2_MMA | \
+ PPC_FEATURE2_ARCH_3_00 | \
+ PPC_FEATURE2_HAS_IEEE128 | \
+ PPC_FEATURE2_DARN | \
+ PPC_FEATURE2_SCV | \
+ PPC_FEATURE2_ARCH_2_07 | \
+ PPC_FEATURE2_DSCR | \
+ PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+ PPC_FEATURE2_VEC_CRYPTO)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+ { /* PPC970 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00390000,
+ .cpu_name = "PPC970",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .cpu_restore = __restore_cpu_ppc970,
+ .platform = "ppc970",
+ },
+ { /* PPC970FX */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003c0000,
+ .cpu_name = "PPC970FX",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .cpu_restore = __restore_cpu_ppc970,
+ .platform = "ppc970",
+ },
+ { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x00440100,
+ .cpu_name = "PPC970MP",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .cpu_restore = __restore_cpu_ppc970,
+ .platform = "ppc970",
+ },
+ { /* PPC970MP */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00440000,
+ .cpu_name = "PPC970MP",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970MP,
+ .cpu_restore = __restore_cpu_ppc970,
+ .platform = "ppc970",
+ },
+ { /* PPC970GX */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00450000,
+ .cpu_name = "PPC970GX",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .platform = "ppc970",
+ },
+ { /* Power5 GR */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003a0000,
+ .cpu_name = "POWER5 (gr)",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .platform = "power5",
+ },
+ { /* Power5++ */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x003b0300,
+ .cpu_name = "POWER5+ (gs)",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .platform = "power5+",
+ },
+ { /* Power5 GS */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003b0000,
+ .cpu_name = "POWER5+ (gs)",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .platform = "power5+",
+ },
+ { /* POWER6 in P5+ mode; 2.04-compliant processor */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000001,
+ .cpu_name = "POWER5+",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .platform = "power5+",
+ },
+ { /* Power6 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003e0000,
+ .cpu_name = "POWER6 (raw)",
+ .cpu_features = CPU_FTRS_POWER6,
+ .cpu_user_features = COMMON_USER_POWER6 | PPC_FEATURE_POWER6_EXT,
+ .mmu_features = MMU_FTRS_POWER6,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .platform = "power6x",
+ },
+ { /* 2.05-compliant processor, i.e. Power6 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000002,
+ .cpu_name = "POWER6 (architected)",
+ .cpu_features = CPU_FTRS_POWER6,
+ .cpu_user_features = COMMON_USER_POWER6,
+ .mmu_features = MMU_FTRS_POWER6,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .platform = "power6",
+ },
+ { /* 2.06-compliant processor, i.e. Power7 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000003,
+ .cpu_name = "POWER7 (architected)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .cpu_user_features2 = COMMON_USER2_POWER7,
+ .mmu_features = MMU_FTRS_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
+ .platform = "power7",
+ },
+ { /* 2.07-compliant processor, i.e. Power8 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000004,
+ .cpu_name = "POWER8 (architected)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* 2.07-compliant processor, HeXin C2000 processor */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00660000,
+ .cpu_name = "HX-C2000",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* 3.00-compliant processor, i.e. Power9 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000005,
+ .cpu_name = "POWER9 (architected)",
+ .cpu_features = CPU_FTRS_POWER9,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .platform = "power9",
+ },
+ { /* 3.1-compliant processor, i.e. Power10 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000006,
+ .cpu_name = "POWER10 (architected)",
+ .cpu_features = CPU_FTRS_POWER10,
+ .cpu_user_features = COMMON_USER_POWER10,
+ .cpu_user_features2 = COMMON_USER2_POWER10,
+ .mmu_features = MMU_FTRS_POWER10,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power10,
+ .cpu_restore = __restore_cpu_power10,
+ .platform = "power10",
+ },
+ { /* Power7 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003f0000,
+ .cpu_name = "POWER7 (raw)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .cpu_user_features2 = COMMON_USER2_POWER7,
+ .mmu_features = MMU_FTRS_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
+ .platform = "power7",
+ },
+ { /* Power7+ */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004A0000,
+ .cpu_name = "POWER7+ (raw)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .cpu_user_features2 = COMMON_USER2_POWER7,
+ .mmu_features = MMU_FTRS_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
+ .platform = "power7+",
+ },
+ { /* Power8E */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004b0000,
+ .cpu_name = "POWER8E (raw)",
+ .cpu_features = CPU_FTRS_POWER8E,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* Power8NVL */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004c0000,
+ .cpu_name = "POWER8NVL (raw)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* Power8 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004d0000,
+ .cpu_name = "POWER8 (raw)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* Power9 DD2.0 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0200,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_0,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power9 DD 2.1 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0201,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_1,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power9 DD2.2 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0202,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_2,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power9 DD2.3 or later */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004e0000,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_3,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power10 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00800000,
+ .cpu_name = "POWER10 (raw)",
+ .cpu_features = CPU_FTRS_POWER10,
+ .cpu_user_features = COMMON_USER_POWER10,
+ .cpu_user_features2 = COMMON_USER2_POWER10,
+ .mmu_features = MMU_FTRS_POWER10,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power10,
+ .cpu_restore = __restore_cpu_power10,
+ .machine_check_early = __machine_check_early_realmode_p10,
+ .platform = "power10",
+ },
+ { /* Cell Broadband Engine */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00700000,
+ .cpu_name = "Cell Broadband Engine",
+ .cpu_features = CPU_FTRS_CELL,
+ .cpu_user_features = COMMON_USER_PPC64 | PPC_FEATURE_CELL |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_SMT,
+ .mmu_features = MMU_FTRS_CELL,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .platform = "ppc-cell-be",
+ },
+ { /* PA Semi PA6T */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00900000,
+ .cpu_name = "PA6T",
+ .cpu_features = CPU_FTRS_PA6T,
+ .cpu_user_features = COMMON_USER_PA6T,
+ .mmu_features = MMU_FTRS_PA6T,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_PA6T,
+ .cpu_setup = __setup_cpu_pa6t,
+ .cpu_restore = __restore_cpu_pa6t,
+ .platform = "pa6t",
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "POWER5 (compatible)",
+ .cpu_features = CPU_FTRS_COMPATIBLE,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTRS_POWER,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .platform = "power5",
+ }
+};
diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h
new file mode 100644
index 000000000000..ceb06b109f83
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_e500mc.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ */
+
+#ifdef CONFIG_PPC64
+#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+ PPC_FEATURE_HAS_FPU | PPC_FEATURE_64)
+#else
+#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+ PPC_FEATURE_BOOKE)
+#endif
+
+static struct cpu_spec cpu_specs[] __initdata = {
+#ifdef CONFIG_PPC32
+ { /* e500mc */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80230000,
+ .cpu_name = "e500mc",
+ .cpu_features = CPU_FTRS_E500MC,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_e500mc,
+ .machine_check = machine_check_e500mc,
+ .platform = "ppce500mc",
+ .cpu_down_flush = cpu_down_flush_e500mc,
+ },
+#endif /* CONFIG_PPC32 */
+ { /* e5500 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80240000,
+ .cpu_name = "e5500",
+ .cpu_features = CPU_FTRS_E5500,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_e5500,
+#ifndef CONFIG_PPC32
+ .cpu_restore = __restore_cpu_e5500,
+#endif
+ .machine_check = machine_check_e500mc,
+ .platform = "ppce5500",
+ .cpu_down_flush = cpu_down_flush_e5500,
+ },
+ { /* e6500 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80400000,
+ .cpu_name = "e6500",
+ .cpu_features = CPU_FTRS_E6500,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 6,
+ .cpu_setup = __setup_cpu_e6500,
+#ifndef CONFIG_PPC32
+ .cpu_restore = __restore_cpu_e6500,
+#endif
+ .machine_check = machine_check_e500mc,
+ .platform = "ppce6500",
+ .cpu_down_flush = cpu_down_flush_e6500,
+ },
+};
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index e745abc5457a..6f6801da9dc1 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -12,2133 +12,23 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/jump_label.h>
+#include <linux/of.h>
-#include <asm/oprofile_impl.h>
#include <asm/cputable.h>
-#include <asm/prom.h> /* for PTRRELOC on ARCH=ppc */
+#include <asm/mce.h>
#include <asm/mmu.h>
#include <asm/setup.h>
+#include <asm/cpu_setup.h>
-static struct cpu_spec the_cpu_spec __read_mostly;
+static struct cpu_spec the_cpu_spec __ro_after_init;
-struct cpu_spec* cur_cpu_spec __read_mostly = NULL;
+struct cpu_spec *cur_cpu_spec __ro_after_init = NULL;
EXPORT_SYMBOL(cur_cpu_spec);
/* The platform string corresponding to the real PVR */
const char *powerpc_base_platform;
-/* NOTE:
- * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
- * the responsibility of the appropriate CPU save/restore functions to
- * eventually copy these settings over. Those save/restore aren't yet
- * part of the cputable though. That has to be fixed for both ppc32
- * and ppc64
- */
-#ifdef CONFIG_PPC32
-extern void __setup_cpu_e200(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440ep(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440epx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440x5(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec);
-extern void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec);
-extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_750cx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_750fx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_7400(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec);
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_pa6t(void);
-extern void __restore_cpu_ppc970(void);
-extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_power7(void);
-extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_power8(void);
-extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_power9(void);
-extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
-extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
-extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
-#endif /* CONFIG_PPC64 */
-#if defined(CONFIG_E500)
-extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_e6500(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_e5500(void);
-extern void __restore_cpu_e6500(void);
-#endif /* CONFIG_E500 */
-
-/* This table only contains "desktop" CPUs, it need to be filled with embedded
- * ones as well...
- */
-#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
- PPC_FEATURE_HAS_MMU)
-#define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64)
-#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4)
-#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\
- PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
-#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\
- PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
-#define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\
- PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
- PPC_FEATURE_TRUE_LE | \
- PPC_FEATURE_PSERIES_PERFMON_COMPAT)
-#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
- PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
- PPC_FEATURE_TRUE_LE | \
- PPC_FEATURE_PSERIES_PERFMON_COMPAT)
-#define COMMON_USER2_POWER7 (PPC_FEATURE2_DSCR)
-#define COMMON_USER_POWER8 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
- PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
- PPC_FEATURE_TRUE_LE | \
- PPC_FEATURE_PSERIES_PERFMON_COMPAT)
-#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \
- PPC_FEATURE2_HTM_COMP | \
- PPC_FEATURE2_HTM_NOSC_COMP | \
- PPC_FEATURE2_DSCR | \
- PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
- PPC_FEATURE2_VEC_CRYPTO)
-#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
- PPC_FEATURE_TRUE_LE | \
- PPC_FEATURE_HAS_ALTIVEC_COMP)
-#define COMMON_USER_POWER9 COMMON_USER_POWER8
-#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \
- PPC_FEATURE2_ARCH_3_00 | \
- PPC_FEATURE2_HAS_IEEE128 | \
- PPC_FEATURE2_DARN )
-
-#ifdef CONFIG_PPC_BOOK3E_64
-#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)
-#else
-#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
- PPC_FEATURE_BOOKE)
-#endif
-
-static struct cpu_spec __initdata cpu_specs[] = {
-#ifdef CONFIG_PPC_BOOK3S_64
- { /* PPC970 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00390000,
- .cpu_name = "PPC970",
- .cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_POWER4 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTRS_PPC970,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_ppc970,
- .cpu_restore = __restore_cpu_ppc970,
- .oprofile_cpu_type = "ppc64/970",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "ppc970",
- },
- { /* PPC970FX */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x003c0000,
- .cpu_name = "PPC970FX",
- .cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_POWER4 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTRS_PPC970,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_ppc970,
- .cpu_restore = __restore_cpu_ppc970,
- .oprofile_cpu_type = "ppc64/970",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "ppc970",
- },
- { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x00440100,
- .cpu_name = "PPC970MP",
- .cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_POWER4 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTRS_PPC970,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_ppc970,
- .cpu_restore = __restore_cpu_ppc970,
- .oprofile_cpu_type = "ppc64/970MP",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "ppc970",
- },
- { /* PPC970MP */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00440000,
- .cpu_name = "PPC970MP",
- .cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_POWER4 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTRS_PPC970,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_ppc970MP,
- .cpu_restore = __restore_cpu_ppc970,
- .oprofile_cpu_type = "ppc64/970MP",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "ppc970",
- },
- { /* PPC970GX */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00450000,
- .cpu_name = "PPC970GX",
- .cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_POWER4 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTRS_PPC970,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_ppc970,
- .oprofile_cpu_type = "ppc64/970",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "ppc970",
- },
- { /* Power5 GR */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x003a0000,
- .cpu_name = "POWER5 (gr)",
- .cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_POWER5,
- .mmu_features = MMU_FTRS_POWER5,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power5",
- .oprofile_type = PPC_OPROFILE_POWER4,
- /* SIHV / SIPR bits are implemented on POWER4+ (GQ)
- * and above but only works on POWER5 and above
- */
- .oprofile_mmcra_sihv = MMCRA_SIHV,
- .oprofile_mmcra_sipr = MMCRA_SIPR,
- .platform = "power5",
- },
- { /* Power5++ */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x003b0300,
- .cpu_name = "POWER5+ (gs)",
- .cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_POWER5_PLUS,
- .mmu_features = MMU_FTRS_POWER5,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .oprofile_cpu_type = "ppc64/power5++",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_mmcra_sihv = MMCRA_SIHV,
- .oprofile_mmcra_sipr = MMCRA_SIPR,
- .platform = "power5+",
- },
- { /* Power5 GS */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x003b0000,
- .cpu_name = "POWER5+ (gs)",
- .cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_POWER5_PLUS,
- .mmu_features = MMU_FTRS_POWER5,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power5+",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_mmcra_sihv = MMCRA_SIHV,
- .oprofile_mmcra_sipr = MMCRA_SIPR,
- .platform = "power5+",
- },
- { /* POWER6 in P5+ mode; 2.04-compliant processor */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x0f000001,
- .cpu_name = "POWER5+",
- .cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_POWER5_PLUS,
- .mmu_features = MMU_FTRS_POWER5,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .oprofile_cpu_type = "ppc64/ibm-compat-v1",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "power5+",
- },
- { /* Power6 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x003e0000,
- .cpu_name = "POWER6 (raw)",
- .cpu_features = CPU_FTRS_POWER6,
- .cpu_user_features = COMMON_USER_POWER6 |
- PPC_FEATURE_POWER6_EXT,
- .mmu_features = MMU_FTRS_POWER6,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power6",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV,
- .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR,
- .oprofile_mmcra_clear = POWER6_MMCRA_THRM |
- POWER6_MMCRA_OTHER,
- .platform = "power6x",
- },
- { /* 2.05-compliant processor, i.e. Power6 "architected" mode */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x0f000002,
- .cpu_name = "POWER6 (architected)",
- .cpu_features = CPU_FTRS_POWER6,
- .cpu_user_features = COMMON_USER_POWER6,
- .mmu_features = MMU_FTRS_POWER6,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .oprofile_cpu_type = "ppc64/ibm-compat-v1",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "power6",
- },
- { /* 2.06-compliant processor, i.e. Power7 "architected" mode */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x0f000003,
- .cpu_name = "POWER7 (architected)",
- .cpu_features = CPU_FTRS_POWER7,
- .cpu_user_features = COMMON_USER_POWER7,
- .cpu_user_features2 = COMMON_USER2_POWER7,
- .mmu_features = MMU_FTRS_POWER7,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_cpu_type = "ppc64/ibm-compat-v1",
- .cpu_setup = __setup_cpu_power7,
- .cpu_restore = __restore_cpu_power7,
- .machine_check_early = __machine_check_early_realmode_p7,
- .platform = "power7",
- },
- { /* 2.07-compliant processor, i.e. Power8 "architected" mode */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x0f000004,
- .cpu_name = "POWER8 (architected)",
- .cpu_features = CPU_FTRS_POWER8,
- .cpu_user_features = COMMON_USER_POWER8,
- .cpu_user_features2 = COMMON_USER2_POWER8,
- .mmu_features = MMU_FTRS_POWER8,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .oprofile_type = PPC_OPROFILE_INVALID,
- .oprofile_cpu_type = "ppc64/ibm-compat-v1",
- .cpu_setup = __setup_cpu_power8,
- .cpu_restore = __restore_cpu_power8,
- .machine_check_early = __machine_check_early_realmode_p8,
- .platform = "power8",
- },
- { /* 3.00-compliant processor, i.e. Power9 "architected" mode */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x0f000005,
- .cpu_name = "POWER9 (architected)",
- .cpu_features = CPU_FTRS_POWER9,
- .cpu_user_features = COMMON_USER_POWER9,
- .cpu_user_features2 = COMMON_USER2_POWER9,
- .mmu_features = MMU_FTRS_POWER9,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .oprofile_type = PPC_OPROFILE_INVALID,
- .oprofile_cpu_type = "ppc64/ibm-compat-v1",
- .cpu_setup = __setup_cpu_power9,
- .cpu_restore = __restore_cpu_power9,
- .platform = "power9",
- },
- { /* Power7 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x003f0000,
- .cpu_name = "POWER7 (raw)",
- .cpu_features = CPU_FTRS_POWER7,
- .cpu_user_features = COMMON_USER_POWER7,
- .cpu_user_features2 = COMMON_USER2_POWER7,
- .mmu_features = MMU_FTRS_POWER7,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power7",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .cpu_setup = __setup_cpu_power7,
- .cpu_restore = __restore_cpu_power7,
- .machine_check_early = __machine_check_early_realmode_p7,
- .platform = "power7",
- },
- { /* Power7+ */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x004A0000,
- .cpu_name = "POWER7+ (raw)",
- .cpu_features = CPU_FTRS_POWER7,
- .cpu_user_features = COMMON_USER_POWER7,
- .cpu_user_features2 = COMMON_USER2_POWER7,
- .mmu_features = MMU_FTRS_POWER7,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power7",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .cpu_setup = __setup_cpu_power7,
- .cpu_restore = __restore_cpu_power7,
- .machine_check_early = __machine_check_early_realmode_p7,
- .platform = "power7+",
- },
- { /* Power8E */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x004b0000,
- .cpu_name = "POWER8E (raw)",
- .cpu_features = CPU_FTRS_POWER8E,
- .cpu_user_features = COMMON_USER_POWER8,
- .cpu_user_features2 = COMMON_USER2_POWER8,
- .mmu_features = MMU_FTRS_POWER8,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power8",
- .oprofile_type = PPC_OPROFILE_INVALID,
- .cpu_setup = __setup_cpu_power8,
- .cpu_restore = __restore_cpu_power8,
- .machine_check_early = __machine_check_early_realmode_p8,
- .platform = "power8",
- },
- { /* Power8NVL */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x004c0000,
- .cpu_name = "POWER8NVL (raw)",
- .cpu_features = CPU_FTRS_POWER8,
- .cpu_user_features = COMMON_USER_POWER8,
- .cpu_user_features2 = COMMON_USER2_POWER8,
- .mmu_features = MMU_FTRS_POWER8,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power8",
- .oprofile_type = PPC_OPROFILE_INVALID,
- .cpu_setup = __setup_cpu_power8,
- .cpu_restore = __restore_cpu_power8,
- .machine_check_early = __machine_check_early_realmode_p8,
- .platform = "power8",
- },
- { /* Power8 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x004d0000,
- .cpu_name = "POWER8 (raw)",
- .cpu_features = CPU_FTRS_POWER8,
- .cpu_user_features = COMMON_USER_POWER8,
- .cpu_user_features2 = COMMON_USER2_POWER8,
- .mmu_features = MMU_FTRS_POWER8,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power8",
- .oprofile_type = PPC_OPROFILE_INVALID,
- .cpu_setup = __setup_cpu_power8,
- .cpu_restore = __restore_cpu_power8,
- .machine_check_early = __machine_check_early_realmode_p8,
- .platform = "power8",
- },
- { /* Power9 DD2.0 */
- .pvr_mask = 0xffffefff,
- .pvr_value = 0x004e0200,
- .cpu_name = "POWER9 (raw)",
- .cpu_features = CPU_FTRS_POWER9_DD2_0,
- .cpu_user_features = COMMON_USER_POWER9,
- .cpu_user_features2 = COMMON_USER2_POWER9,
- .mmu_features = MMU_FTRS_POWER9,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power9",
- .oprofile_type = PPC_OPROFILE_INVALID,
- .cpu_setup = __setup_cpu_power9,
- .cpu_restore = __restore_cpu_power9,
- .machine_check_early = __machine_check_early_realmode_p9,
- .platform = "power9",
- },
- { /* Power9 DD 2.1 */
- .pvr_mask = 0xffffefff,
- .pvr_value = 0x004e0201,
- .cpu_name = "POWER9 (raw)",
- .cpu_features = CPU_FTRS_POWER9_DD2_1,
- .cpu_user_features = COMMON_USER_POWER9,
- .cpu_user_features2 = COMMON_USER2_POWER9,
- .mmu_features = MMU_FTRS_POWER9,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power9",
- .oprofile_type = PPC_OPROFILE_INVALID,
- .cpu_setup = __setup_cpu_power9,
- .cpu_restore = __restore_cpu_power9,
- .machine_check_early = __machine_check_early_realmode_p9,
- .platform = "power9",
- },
- { /* Power9 DD2.2 or later */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x004e0000,
- .cpu_name = "POWER9 (raw)",
- .cpu_features = CPU_FTRS_POWER9_DD2_2,
- .cpu_user_features = COMMON_USER_POWER9,
- .cpu_user_features2 = COMMON_USER2_POWER9,
- .mmu_features = MMU_FTRS_POWER9,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power9",
- .oprofile_type = PPC_OPROFILE_INVALID,
- .cpu_setup = __setup_cpu_power9,
- .cpu_restore = __restore_cpu_power9,
- .machine_check_early = __machine_check_early_realmode_p9,
- .platform = "power9",
- },
- { /* Cell Broadband Engine */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00700000,
- .cpu_name = "Cell Broadband Engine",
- .cpu_features = CPU_FTRS_CELL,
- .cpu_user_features = COMMON_USER_PPC64 |
- PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP |
- PPC_FEATURE_SMT,
- .mmu_features = MMU_FTRS_CELL,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/cell-be",
- .oprofile_type = PPC_OPROFILE_CELL,
- .platform = "ppc-cell-be",
- },
- { /* PA Semi PA6T */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00900000,
- .cpu_name = "PA6T",
- .cpu_features = CPU_FTRS_PA6T,
- .cpu_user_features = COMMON_USER_PA6T,
- .mmu_features = MMU_FTRS_PA6T,
- .icache_bsize = 64,
- .dcache_bsize = 64,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_PA6T,
- .cpu_setup = __setup_cpu_pa6t,
- .cpu_restore = __restore_cpu_pa6t,
- .oprofile_cpu_type = "ppc64/pa6t",
- .oprofile_type = PPC_OPROFILE_PA6T,
- .platform = "pa6t",
- },
- { /* default match */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "POWER5 (compatible)",
- .cpu_features = CPU_FTRS_COMPATIBLE,
- .cpu_user_features = COMMON_USER_PPC64,
- .mmu_features = MMU_FTRS_POWER,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .platform = "power5",
- }
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-#ifdef CONFIG_PPC32
-#ifdef CONFIG_PPC_BOOK3S_601
- { /* 601 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00010000,
- .cpu_name = "601",
- .cpu_features = CPU_FTRS_PPC601,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_601_INSTR |
- PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_generic,
- .platform = "ppc601",
- },
-#endif /* CONFIG_PPC_BOOK3S_601 */
-#ifdef CONFIG_PPC_BOOK3S_6xx
- { /* 603 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00030000,
- .cpu_name = "603",
- .cpu_features = CPU_FTRS_603,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = 0,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* 603e */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00060000,
- .cpu_name = "603e",
- .cpu_features = CPU_FTRS_603,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = 0,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* 603ev */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00070000,
- .cpu_name = "603ev",
- .cpu_features = CPU_FTRS_603,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = 0,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* 604 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00040000,
- .cpu_name = "604",
- .cpu_features = CPU_FTRS_604,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 2,
- .cpu_setup = __setup_cpu_604,
- .machine_check = machine_check_generic,
- .platform = "ppc604",
- },
- { /* 604e */
- .pvr_mask = 0xfffff000,
- .pvr_value = 0x00090000,
- .cpu_name = "604e",
- .cpu_features = CPU_FTRS_604,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .cpu_setup = __setup_cpu_604,
- .machine_check = machine_check_generic,
- .platform = "ppc604",
- },
- { /* 604r */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00090000,
- .cpu_name = "604r",
- .cpu_features = CPU_FTRS_604,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .cpu_setup = __setup_cpu_604,
- .machine_check = machine_check_generic,
- .platform = "ppc604",
- },
- { /* 604ev */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x000a0000,
- .cpu_name = "604ev",
- .cpu_features = CPU_FTRS_604,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .cpu_setup = __setup_cpu_604,
- .machine_check = machine_check_generic,
- .platform = "ppc604",
- },
- { /* 740/750 (0x4202, don't support TAU ?) */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x00084202,
- .cpu_name = "740/750",
- .cpu_features = CPU_FTRS_740_NOTAU,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .cpu_setup = __setup_cpu_750,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 750CX (80100 and 8010x?) */
- .pvr_mask = 0xfffffff0,
- .pvr_value = 0x00080100,
- .cpu_name = "750CX",
- .cpu_features = CPU_FTRS_750,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .cpu_setup = __setup_cpu_750cx,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 750CX (82201 and 82202) */
- .pvr_mask = 0xfffffff0,
- .pvr_value = 0x00082200,
- .cpu_name = "750CX",
- .cpu_features = CPU_FTRS_750,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750cx,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 750CXe (82214) */
- .pvr_mask = 0xfffffff0,
- .pvr_value = 0x00082210,
- .cpu_name = "750CXe",
- .cpu_features = CPU_FTRS_750,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750cx,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 750CXe "Gekko" (83214) */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x00083214,
- .cpu_name = "750CXe",
- .cpu_features = CPU_FTRS_750,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750cx,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 750CL (and "Broadway") */
- .pvr_mask = 0xfffff0e0,
- .pvr_value = 0x00087000,
- .cpu_name = "750CL",
- .cpu_features = CPU_FTRS_750CL,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- .oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
- },
- { /* 745/755 */
- .pvr_mask = 0xfffff000,
- .pvr_value = 0x00083000,
- .cpu_name = "745/755",
- .cpu_features = CPU_FTRS_750,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 750FX rev 1.x */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x70000100,
- .cpu_name = "750FX",
- .cpu_features = CPU_FTRS_750FX1,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- .oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
- },
- { /* 750FX rev 2.0 must disable HID0[DPM] */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x70000200,
- .cpu_name = "750FX",
- .cpu_features = CPU_FTRS_750FX2,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- .oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
- },
- { /* 750FX (All revs except 2.0) */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x70000000,
- .cpu_name = "750FX",
- .cpu_features = CPU_FTRS_750FX,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750fx,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- .oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
- },
- { /* 750GX */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x70020000,
- .cpu_name = "750GX",
- .cpu_features = CPU_FTRS_750GX,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750fx,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- .oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
- },
- { /* 740/750 (L2CR bit need fixup for 740) */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00080000,
- .cpu_name = "740/750",
- .cpu_features = CPU_FTRS_740,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 7400 rev 1.1 ? (no TAU) */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x000c1101,
- .cpu_name = "7400 (1.1)",
- .cpu_features = CPU_FTRS_7400_NOTAU,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_7400,
- .machine_check = machine_check_generic,
- .platform = "ppc7400",
- },
- { /* 7400 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x000c0000,
- .cpu_name = "7400",
- .cpu_features = CPU_FTRS_7400,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_7400,
- .machine_check = machine_check_generic,
- .platform = "ppc7400",
- },
- { /* 7410 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x800c0000,
- .cpu_name = "7410",
- .cpu_features = CPU_FTRS_7400,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_7410,
- .machine_check = machine_check_generic,
- .platform = "ppc7400",
- },
- { /* 7450 2.0 - no doze/nap */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x80000200,
- .cpu_name = "7450",
- .cpu_features = CPU_FTRS_7450_20,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7450 2.1 */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x80000201,
- .cpu_name = "7450",
- .cpu_features = CPU_FTRS_7450_21,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7450 2.3 and newer */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80000000,
- .cpu_name = "7450",
- .cpu_features = CPU_FTRS_7450_23,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7455 rev 1.x */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x80010100,
- .cpu_name = "7455",
- .cpu_features = CPU_FTRS_7455_1,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7455 rev 2.0 */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x80010200,
- .cpu_name = "7455",
- .cpu_features = CPU_FTRS_7455_20,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7455 others */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80010000,
- .cpu_name = "7455",
- .cpu_features = CPU_FTRS_7455,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7447/7457 Rev 1.0 */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x80020100,
- .cpu_name = "7447/7457",
- .cpu_features = CPU_FTRS_7447_10,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7447/7457 Rev 1.1 */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x80020101,
- .cpu_name = "7447/7457",
- .cpu_features = CPU_FTRS_7447_10,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7447/7457 Rev 1.2 and later */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80020000,
- .cpu_name = "7447/7457",
- .cpu_features = CPU_FTRS_7447,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7447A */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80030000,
- .cpu_name = "7447A",
- .cpu_features = CPU_FTRS_7447A,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7448 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80040000,
- .cpu_name = "7448",
- .cpu_features = CPU_FTRS_7448,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 82xx (8240, 8245, 8260 are all 603e cores) */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00810000,
- .cpu_name = "82xx",
- .cpu_features = CPU_FTRS_82XX,
- .cpu_user_features = COMMON_USER,
- .mmu_features = 0,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* All G2_LE (603e core, plus some) have the same pvr */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00820000,
- .cpu_name = "G2_LE",
- .cpu_features = CPU_FTRS_G2_LE,
- .cpu_user_features = COMMON_USER,
- .mmu_features = MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
-#ifdef CONFIG_PPC_83xx
- { /* e300c1 (a 603e core, plus some) on 83xx */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00830000,
- .cpu_name = "e300c1",
- .cpu_features = CPU_FTRS_E300,
- .cpu_user_features = COMMON_USER,
- .mmu_features = MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_83xx,
- .platform = "ppc603",
- },
- { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00840000,
- .cpu_name = "e300c2",
- .cpu_features = CPU_FTRS_E300C2,
- .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_USE_HIGH_BATS |
- MMU_FTR_NEED_DTLB_SW_LRU,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_83xx,
- .platform = "ppc603",
- },
- { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00850000,
- .cpu_name = "e300c3",
- .cpu_features = CPU_FTRS_E300,
- .cpu_user_features = COMMON_USER,
- .mmu_features = MMU_FTR_USE_HIGH_BATS |
- MMU_FTR_NEED_DTLB_SW_LRU,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_83xx,
- .num_pmcs = 4,
- .oprofile_cpu_type = "ppc/e300",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .platform = "ppc603",
- },
- { /* e300c4 (e300c1, plus one IU) */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00860000,
- .cpu_name = "e300c4",
- .cpu_features = CPU_FTRS_E300,
- .cpu_user_features = COMMON_USER,
- .mmu_features = MMU_FTR_USE_HIGH_BATS |
- MMU_FTR_NEED_DTLB_SW_LRU,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_83xx,
- .num_pmcs = 4,
- .oprofile_cpu_type = "ppc/e300",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .platform = "ppc603",
- },
-#endif
- { /* default match, we assume split I/D cache & TB (non-601)... */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "(generic PPC)",
- .cpu_features = CPU_FTRS_CLASSIC32,
- .cpu_user_features = COMMON_USER,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
-#endif /* CONFIG_PPC_BOOK3S_6xx */
-#ifdef CONFIG_PPC_8xx
- { /* 8xx */
- .pvr_mask = 0xffff0000,
- .pvr_value = PVR_8xx,
- .cpu_name = "8xx",
- /* CPU_FTR_MAYBE_CAN_DOZE is possible,
- * if the 8xx code is there.... */
- .cpu_features = CPU_FTRS_8XX,
- .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_TYPE_8xx,
- .icache_bsize = 16,
- .dcache_bsize = 16,
- .machine_check = machine_check_8xx,
- .platform = "ppc823",
- },
-#endif /* CONFIG_PPC_8xx */
-#ifdef CONFIG_40x
- { /* 403GC */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x00200200,
- .cpu_name = "403GC",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 16,
- .dcache_bsize = 16,
- .machine_check = machine_check_4xx,
- .platform = "ppc403",
- },
- { /* 403GCX */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x00201400,
- .cpu_name = "403GCX",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 16,
- .dcache_bsize = 16,
- .machine_check = machine_check_4xx,
- .platform = "ppc403",
- },
- { /* 403G ?? */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00200000,
- .cpu_name = "403G ??",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 16,
- .dcache_bsize = 16,
- .machine_check = machine_check_4xx,
- .platform = "ppc403",
- },
- { /* 405GP */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x40110000,
- .cpu_name = "405GP",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* STB 03xxx */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x40130000,
- .cpu_name = "STB03xxx",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* STB 04xxx */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x41810000,
- .cpu_name = "STB04xxx",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* NP405L */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x41610000,
- .cpu_name = "NP405L",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* NP4GS3 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x40B10000,
- .cpu_name = "NP4GS3",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* NP405H */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x41410000,
- .cpu_name = "NP405H",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405GPr */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x50910000,
- .cpu_name = "405GPr",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* STBx25xx */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x51510000,
- .cpu_name = "STBx25xx",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405LP */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x41F10000,
- .cpu_name = "405LP",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* Xilinx Virtex-II Pro */
- .pvr_mask = 0xfffff000,
- .pvr_value = 0x20010000,
- .cpu_name = "Virtex-II Pro",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* Xilinx Virtex-4 FX */
- .pvr_mask = 0xfffff000,
- .pvr_value = 0x20011000,
- .cpu_name = "Virtex-4 FX",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EP */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x51210000,
- .cpu_name = "405EP",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EX Rev. A/B with Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910007,
- .cpu_name = "405EX Rev. A/B",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EX Rev. C without Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x1291000d,
- .cpu_name = "405EX Rev. C",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EX Rev. C with Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x1291000f,
- .cpu_name = "405EX Rev. C",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EX Rev. D without Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910003,
- .cpu_name = "405EX Rev. D",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EX Rev. D with Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910005,
- .cpu_name = "405EX Rev. D",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EXr Rev. A/B without Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910001,
- .cpu_name = "405EXr Rev. A/B",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EXr Rev. C without Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910009,
- .cpu_name = "405EXr Rev. C",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EXr Rev. C with Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x1291000b,
- .cpu_name = "405EXr Rev. C",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EXr Rev. D without Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910000,
- .cpu_name = "405EXr Rev. D",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EXr Rev. D with Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910002,
- .cpu_name = "405EXr Rev. D",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- {
- /* 405EZ */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x41510000,
- .cpu_name = "405EZ",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* APM8018X */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x7ff11432,
- .cpu_name = "APM8018X",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* default match */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "(generic 40x PPC)",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- }
-
-#endif /* CONFIG_40x */
-#ifdef CONFIG_44x
- {
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x40000850,
- .cpu_name = "440GR Rev. A",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- },
- { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x40000858,
- .cpu_name = "440EP Rev. A",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440ep,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- },
- {
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x400008d3,
- .cpu_name = "440GR Rev. B",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- },
- { /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */
- .pvr_mask = 0xf0000ff7,
- .pvr_value = 0x400008d4,
- .cpu_name = "440EP Rev. C",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440ep,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- },
- { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x400008db,
- .cpu_name = "440EP Rev. B",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440ep,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- },
- { /* 440GRX */
- .pvr_mask = 0xf0000ffb,
- .pvr_value = 0x200008D0,
- .cpu_name = "440GRX",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440grx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */
- .pvr_mask = 0xf0000ffb,
- .pvr_value = 0x200008D8,
- .cpu_name = "440EPX",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440epx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440GP Rev. B */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x40000440,
- .cpu_name = "440GP Rev. B",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440gp",
- },
- { /* 440GP Rev. C */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x40000481,
- .cpu_name = "440GP Rev. C",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440gp",
- },
- { /* 440GX Rev. A */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x50000850,
- .cpu_name = "440GX Rev. A",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440gx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440GX Rev. B */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x50000851,
- .cpu_name = "440GX Rev. B",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440gx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440GX Rev. C */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x50000892,
- .cpu_name = "440GX Rev. C",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440gx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440GX Rev. F */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x50000894,
- .cpu_name = "440GX Rev. F",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440gx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440SP Rev. A */
- .pvr_mask = 0xfff00fff,
- .pvr_value = 0x53200891,
- .cpu_name = "440SP Rev. A",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- },
- { /* 440SPe Rev. A */
- .pvr_mask = 0xfff00fff,
- .pvr_value = 0x53400890,
- .cpu_name = "440SPe Rev. A",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440spe,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440SPe Rev. B */
- .pvr_mask = 0xfff00fff,
- .pvr_value = 0x53400891,
- .cpu_name = "440SPe Rev. B",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440spe,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440 in Xilinx Virtex-5 FXT */
- .pvr_mask = 0xfffffff0,
- .pvr_value = 0x7ff21910,
- .cpu_name = "440 in Virtex-5 FXT",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440x5,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 460EX */
- .pvr_mask = 0xffff0006,
- .pvr_value = 0x13020002,
- .cpu_name = "460EX",
- .cpu_features = CPU_FTRS_440x6,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_460ex,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 460EX Rev B */
- .pvr_mask = 0xffff0007,
- .pvr_value = 0x13020004,
- .cpu_name = "460EX Rev. B",
- .cpu_features = CPU_FTRS_440x6,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_460ex,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 460GT */
- .pvr_mask = 0xffff0006,
- .pvr_value = 0x13020000,
- .cpu_name = "460GT",
- .cpu_features = CPU_FTRS_440x6,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_460gt,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 460GT Rev B */
- .pvr_mask = 0xffff0007,
- .pvr_value = 0x13020005,
- .cpu_name = "460GT Rev. B",
- .cpu_features = CPU_FTRS_440x6,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_460gt,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 460SX */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x13541800,
- .cpu_name = "460SX",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_460sx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 464 in APM821xx */
- .pvr_mask = 0xfffffff0,
- .pvr_value = 0x12C41C80,
- .cpu_name = "APM821XX",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_apm821xx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
-#ifdef CONFIG_PPC_47x
- { /* 476 DD2 core */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x11a52080,
- .cpu_name = "476",
- .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_47x |
- MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
- .icache_bsize = 32,
- .dcache_bsize = 128,
- .machine_check = machine_check_47x,
- .platform = "ppc470",
- },
- { /* 476fpe */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x7ff50000,
- .cpu_name = "476fpe",
- .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_47x |
- MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
- .icache_bsize = 32,
- .dcache_bsize = 128,
- .machine_check = machine_check_47x,
- .platform = "ppc470",
- },
- { /* 476 iss */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00050000,
- .cpu_name = "476",
- .cpu_features = CPU_FTRS_47X,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_47x |
- MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
- .icache_bsize = 32,
- .dcache_bsize = 128,
- .machine_check = machine_check_47x,
- .platform = "ppc470",
- },
- { /* 476 others */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x11a50000,
- .cpu_name = "476",
- .cpu_features = CPU_FTRS_47X,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_47x |
- MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
- .icache_bsize = 32,
- .dcache_bsize = 128,
- .machine_check = machine_check_47x,
- .platform = "ppc470",
- },
-#endif /* CONFIG_PPC_47x */
- { /* default match */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "(generic 44x PPC)",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- }
-#endif /* CONFIG_44x */
-#ifdef CONFIG_E200
- { /* e200z5 */
- .pvr_mask = 0xfff00000,
- .pvr_value = 0x81000000,
- .cpu_name = "e200z5",
- /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
- .cpu_features = CPU_FTRS_E200,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_EFP_SINGLE |
- PPC_FEATURE_UNIFIED_CACHE,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .dcache_bsize = 32,
- .machine_check = machine_check_e200,
- .platform = "ppc5554",
- },
- { /* e200z6 */
- .pvr_mask = 0xfff00000,
- .pvr_value = 0x81100000,
- .cpu_name = "e200z6",
- /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
- .cpu_features = CPU_FTRS_E200,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_SPE_COMP |
- PPC_FEATURE_HAS_EFP_SINGLE_COMP |
- PPC_FEATURE_UNIFIED_CACHE,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .dcache_bsize = 32,
- .machine_check = machine_check_e200,
- .platform = "ppc5554",
- },
- { /* default match */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "(generic E200 PPC)",
- .cpu_features = CPU_FTRS_E200,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_EFP_SINGLE |
- PPC_FEATURE_UNIFIED_CACHE,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_e200,
- .machine_check = machine_check_e200,
- .platform = "ppc5554",
- }
-#endif /* CONFIG_E200 */
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_E500
-#ifdef CONFIG_PPC32
-#ifndef CONFIG_PPC_E500MC
- { /* e500 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80200000,
- .cpu_name = "e500",
- .cpu_features = CPU_FTRS_E500,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_SPE_COMP |
- PPC_FEATURE_HAS_EFP_SINGLE_COMP,
- .cpu_user_features2 = PPC_FEATURE2_ISEL,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .oprofile_cpu_type = "ppc/e500",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .cpu_setup = __setup_cpu_e500v1,
- .machine_check = machine_check_e500,
- .platform = "ppc8540",
- },
- { /* e500v2 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80210000,
- .cpu_name = "e500v2",
- .cpu_features = CPU_FTRS_E500_2,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_SPE_COMP |
- PPC_FEATURE_HAS_EFP_SINGLE_COMP |
- PPC_FEATURE_HAS_EFP_DOUBLE_COMP,
- .cpu_user_features2 = PPC_FEATURE2_ISEL,
- .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .oprofile_cpu_type = "ppc/e500",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .cpu_setup = __setup_cpu_e500v2,
- .machine_check = machine_check_e500,
- .platform = "ppc8548",
- .cpu_down_flush = cpu_down_flush_e500v2,
- },
-#else
- { /* e500mc */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80230000,
- .cpu_name = "e500mc",
- .cpu_features = CPU_FTRS_E500MC,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .cpu_user_features2 = PPC_FEATURE2_ISEL,
- .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
- MMU_FTR_USE_TLBILX,
- .icache_bsize = 64,
- .dcache_bsize = 64,
- .num_pmcs = 4,
- .oprofile_cpu_type = "ppc/e500mc",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .cpu_setup = __setup_cpu_e500mc,
- .machine_check = machine_check_e500mc,
- .platform = "ppce500mc",
- .cpu_down_flush = cpu_down_flush_e500mc,
- },
-#endif /* CONFIG_PPC_E500MC */
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC_E500MC
- { /* e5500 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80240000,
- .cpu_name = "e5500",
- .cpu_features = CPU_FTRS_E5500,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .cpu_user_features2 = PPC_FEATURE2_ISEL,
- .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
- MMU_FTR_USE_TLBILX,
- .icache_bsize = 64,
- .dcache_bsize = 64,
- .num_pmcs = 4,
- .oprofile_cpu_type = "ppc/e500mc",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .cpu_setup = __setup_cpu_e5500,
-#ifndef CONFIG_PPC32
- .cpu_restore = __restore_cpu_e5500,
-#endif
- .machine_check = machine_check_e500mc,
- .platform = "ppce5500",
- .cpu_down_flush = cpu_down_flush_e5500,
- },
- { /* e6500 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80400000,
- .cpu_name = "e6500",
- .cpu_features = CPU_FTRS_E6500,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
- .cpu_user_features2 = PPC_FEATURE2_ISEL,
- .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
- MMU_FTR_USE_TLBILX,
- .icache_bsize = 64,
- .dcache_bsize = 64,
- .num_pmcs = 6,
- .oprofile_cpu_type = "ppc/e6500",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .cpu_setup = __setup_cpu_e6500,
-#ifndef CONFIG_PPC32
- .cpu_restore = __restore_cpu_e6500,
-#endif
- .machine_check = machine_check_e500mc,
- .platform = "ppce6500",
- .cpu_down_flush = cpu_down_flush_e6500,
- },
-#endif /* CONFIG_PPC_E500MC */
-#ifdef CONFIG_PPC32
- { /* default match */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "(generic E500 PPC)",
- .cpu_features = CPU_FTRS_E500,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_SPE_COMP |
- PPC_FEATURE_HAS_EFP_SINGLE_COMP,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_e500,
- .platform = "powerpc",
- }
-#endif /* CONFIG_PPC32 */
-#endif /* CONFIG_E500 */
-};
+#include "cpu_specs.h"
void __init set_cur_cpu_spec(struct cpu_spec *s)
{
@@ -2177,30 +67,18 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
if (old.num_pmcs && !s->num_pmcs) {
t->num_pmcs = old.num_pmcs;
t->pmc_type = old.pmc_type;
- t->oprofile_type = old.oprofile_type;
- t->oprofile_mmcra_sihv = old.oprofile_mmcra_sihv;
- t->oprofile_mmcra_sipr = old.oprofile_mmcra_sipr;
- t->oprofile_mmcra_clear = old.oprofile_mmcra_clear;
/*
- * If we have passed through this logic once before and
- * have pulled the default case because the real PVR was
- * not found inside cpu_specs[], then we are possibly
- * running in compatibility mode. In that case, let the
- * oprofiler know which set of compatibility counters to
- * pull from by making sure the oprofile_cpu_type string
- * is set to that of compatibility mode. If the
- * oprofile_cpu_type already has a value, then we are
- * possibly overriding a real PVR with a logical one,
- * and, in that case, keep the current value for
- * oprofile_cpu_type.
+ * Let's ensure that the
+ * fix for the PMAO bug is enabled on compatibility mode.
*/
- if (old.oprofile_cpu_type != NULL) {
- t->oprofile_cpu_type = old.oprofile_cpu_type;
- t->oprofile_type = old.oprofile_type;
- }
+ t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG;
}
+ /* Set kuap ON at startup, will be disabled later if cmdline has 'nosmap' */
+ if (IS_ENABLED(CONFIG_PPC_KUAP) && IS_ENABLED(CONFIG_PPC32))
+ t->mmu_features |= MMU_FTR_KUAP;
+
*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
/*
@@ -2230,6 +108,8 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
struct cpu_spec *s = cpu_specs;
int i;
+ BUILD_BUG_ON(!ARRAY_SIZE(cpu_specs));
+
s = PTRRELOC(s);
for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) {
@@ -2286,7 +166,7 @@ void __init cpu_feature_keys_init(void)
struct static_key_true mmu_feature_keys[NUM_MMU_FTR_KEYS] = {
[0 ... NUM_MMU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT
};
-EXPORT_SYMBOL_GPL(mmu_feature_keys);
+EXPORT_SYMBOL(mmu_feature_keys);
void __init mmu_feature_keys_init(void)
{
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 05745ddbd229..2086fa6cdc25 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -12,12 +12,14 @@
#include <linux/crash_dump.h>
#include <linux/io.h>
#include <linux/memblock.h>
+#include <linux/of.h>
#include <asm/code-patching.h>
#include <asm/kdump.h>
-#include <asm/prom.h>
#include <asm/firmware.h>
-#include <linux/uaccess.h>
+#include <linux/uio.h>
#include <asm/rtas.h>
+#include <asm/inst.h>
+#include <asm/fadump.h>
#ifdef DEBUG
#include <asm/udbg.h>
@@ -34,7 +36,7 @@ void __init reserve_kdump_trampoline(void)
static void __init create_trampoline(unsigned long addr)
{
- unsigned int *p = (unsigned int *)addr;
+ u32 *p = (u32 *)addr;
/* The maximum range of a single instruction branch, is the current
* instruction's address + (32 MB - 4) bytes. For the trampoline we
@@ -44,8 +46,8 @@ static void __init create_trampoline(unsigned long addr)
* branch to "addr" we jump to ("addr" + 32 MB). Although it requires
* two instructions it doesn't require any registers.
*/
- patch_instruction(p, PPC_INST_NOP);
- patch_branch(++p, addr + PHYSICAL_START, 0);
+ patch_instruction(p, ppc_inst(PPC_RAW_NOP()));
+ patch_branch(p + 1, addr + PHYSICAL_START, 0);
}
void __init setup_kdump_trampoline(void)
@@ -67,33 +69,8 @@ void __init setup_kdump_trampoline(void)
}
#endif /* CONFIG_NONSTATIC_KERNEL */
-static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize,
- unsigned long offset, int userbuf)
-{
- if (userbuf) {
- if (copy_to_user((char __user *)buf, (vaddr + offset), csize))
- return -EFAULT;
- } else
- memcpy(buf, (vaddr + offset), csize);
-
- return csize;
-}
-
-/**
- * copy_oldmem_page - copy one page from "oldmem"
- * @pfn: page frame number to be copied
- * @buf: target memory address for the copy; this can be in kernel address
- * space or user address space (see @userbuf)
- * @csize: number of bytes to copy
- * @offset: offset in bytes into the page (based on pfn) to begin the copy
- * @userbuf: if set, @buf is in user address space, use copy_to_user(),
- * otherwise @buf is in kernel address space, use memcpy().
- *
- * Copy a page from "oldmem". For this page, there is no pte mapped
- * in the current kernel. We stitch up a pte, similar to kmap_atomic.
- */
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset, int userbuf)
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
+ size_t csize, unsigned long offset)
{
void *vaddr;
phys_addr_t paddr;
@@ -106,16 +83,27 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
if (memblock_is_region_memory(paddr, csize)) {
vaddr = __va(paddr);
- csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
+ csize = copy_to_iter(vaddr + offset, csize, iter);
} else {
vaddr = ioremap_cache(paddr, PAGE_SIZE);
- csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
+ csize = copy_to_iter(vaddr + offset, csize, iter);
iounmap(vaddr);
}
return csize;
}
+/*
+ * Return true only when kexec based kernel dump capturing method is used.
+ * This ensures all restritions applied for kdump case are not automatically
+ * applied for fadump case.
+ */
+bool is_kdump_kernel(void)
+{
+ return !is_fadump_active() && elfcorehdr_addr != ELFCORE_ADDR_MAX;
+}
+EXPORT_SYMBOL_GPL(is_kdump_kernel);
+
#ifdef CONFIG_PPC_RTAS
/*
* The crashkernel region will almost always overlap the RTAS region, so
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index cc14aa6c4a1b..909a05cd2809 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -9,14 +9,14 @@
#include <linux/export.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
-#include <asm/debugfs.h>
#include <asm/machdep.h>
#include <asm/hvcall.h>
+#include <asm/firmware.h>
bool dawr_force_enable;
EXPORT_SYMBOL_GPL(dawr_force_enable);
-int set_dawr(struct arch_hw_breakpoint *brk)
+int set_dawr(int nr, struct arch_hw_breakpoint *brk)
{
unsigned long dawr, dawrx, mrd;
@@ -28,7 +28,7 @@ int set_dawr(struct arch_hw_breakpoint *brk)
dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) >> 3;
/*
* DAWR length is stored in field MDR bits 48:53. Matches range in
- * doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
+ * doublewords (64 bits) biased by -1 eg. 0b000000=1DW and
* 0b111111=64DW.
* brk->hw_len is in bytes.
* This aligns up to double word size, shifts and does the bias.
@@ -37,17 +37,26 @@ int set_dawr(struct arch_hw_breakpoint *brk)
dawrx |= (mrd & 0x3f) << (63 - 53);
if (ppc_md.set_dawr)
- return ppc_md.set_dawr(dawr, dawrx);
-
- mtspr(SPRN_DAWR, dawr);
- mtspr(SPRN_DAWRX, dawrx);
+ return ppc_md.set_dawr(nr, dawr, dawrx);
+
+ if (nr == 0) {
+ mtspr(SPRN_DAWR0, dawr);
+ mtspr(SPRN_DAWRX0, dawrx);
+ } else {
+ mtspr(SPRN_DAWR1, dawr);
+ mtspr(SPRN_DAWRX1, dawrx);
+ }
return 0;
}
-static void set_dawr_cb(void *info)
+static void disable_dawrs_cb(void *info)
{
- set_dawr(info);
+ struct arch_hw_breakpoint null_brk = {0};
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++)
+ set_dawr(i, &null_brk);
}
static ssize_t dawr_write_file_bool(struct file *file,
@@ -60,7 +69,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
/* Send error to user if they hypervisor won't allow us to write DAWR */
if (!dawr_force_enable &&
firmware_has_feature(FW_FEATURE_LPAR) &&
- set_dawr(&null_brk) != H_SUCCESS)
+ set_dawr(0, &null_brk) != H_SUCCESS)
return -ENODEV;
rc = debugfs_write_file_bool(file, user_buf, count, ppos);
@@ -69,7 +78,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
/* If we are clearing, make sure all CPUs have the DAWR cleared */
if (!dawr_force_enable)
- smp_call_function(set_dawr_cb, &null_brk, 0);
+ smp_call_function(disable_dawrs_cb, NULL, 0);
return rc;
}
@@ -92,7 +101,7 @@ static int __init dawr_force_setup(void)
if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
/* Turn DAWR off by default, but allow admin to turn it on */
debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
- powerpc_debugfs_root,
+ arch_debugfs_dir,
&dawr_force_enable,
&dawr_enable_fops);
}
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index f17ff1200eaa..5712dd846263 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -12,77 +12,23 @@
#include <linux/hardirq.h>
#include <asm/dbell.h>
+#include <asm/interrupt.h>
#include <asm/irq_regs.h>
#include <asm/kvm_ppc.h>
#include <asm/trace.h>
#ifdef CONFIG_SMP
-/*
- * Doorbells must only be used if CPU_FTR_DBELL is available.
- * msgsnd is used in HV, and msgsndp is used in !HV.
- *
- * These should be used by platform code that is aware of restrictions.
- * Other arch code should use ->cause_ipi.
- *
- * doorbell_global_ipi() sends a dbell to any target CPU.
- * Must be used only by architectures that address msgsnd target
- * by PIR/get_hard_smp_processor_id.
- */
-void doorbell_global_ipi(int cpu)
-{
- u32 tag = get_hard_smp_processor_id(cpu);
-
- kvmppc_set_host_ipi(cpu);
- /* Order previous accesses vs. msgsnd, which is treated as a store */
- ppc_msgsnd_sync();
- ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
-}
-
-/*
- * doorbell_core_ipi() sends a dbell to a target CPU in the same core.
- * Must be used only by architectures that address msgsnd target
- * by TIR/cpu_thread_in_core.
- */
-void doorbell_core_ipi(int cpu)
-{
- u32 tag = cpu_thread_in_core(cpu);
-
- kvmppc_set_host_ipi(cpu);
- /* Order previous accesses vs. msgsnd, which is treated as a store */
- ppc_msgsnd_sync();
- ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
-}
-
-/*
- * Attempt to cause a core doorbell if destination is on the same core.
- * Returns 1 on success, 0 on failure.
- */
-int doorbell_try_core_ipi(int cpu)
-{
- int this_cpu = get_cpu();
- int ret = 0;
-
- if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
- doorbell_core_ipi(cpu);
- ret = 1;
- }
-
- put_cpu();
-
- return ret;
-}
-
-void doorbell_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- irq_enter();
trace_doorbell_entry(regs);
ppc_msgsync();
- may_hard_irq_enable();
+ if (should_hard_irq_enable(regs))
+ do_hard_irq_enable();
kvmppc_clear_host_ipi(smp_processor_id());
__this_cpu_inc(irq_stat.doorbell_irqs);
@@ -90,13 +36,12 @@ void doorbell_exception(struct pt_regs *regs)
smp_ipi_demux_relaxed(); /* already performed the barrier */
trace_doorbell_exit(regs);
- irq_exit();
+
set_irq_regs(old_regs);
}
#else /* CONFIG_SMP */
-void doorbell_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
{
printk(KERN_WARNING "Received doorbell on non-smp system\n");
}
#endif /* CONFIG_SMP */
-
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index e486d1d78de2..8920862ffd79 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -10,26 +10,66 @@
#include <linux/pci.h>
#include <asm/iommu.h>
-/*
- * Generic iommu implementation
- */
+#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT
+#define can_map_direct(dev, addr) \
+ ((dev)->bus_dma_limit >= phys_to_dma((dev), (addr)))
-/*
- * The coherent mask may be smaller than the real mask, check if we can
- * really use a direct window.
- */
-static inline bool dma_iommu_alloc_bypass(struct device *dev)
+bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr)
+{
+ if (likely(!dev->bus_dma_limit))
+ return false;
+
+ return can_map_direct(dev, addr);
+}
+
+#define is_direct_handle(dev, h) ((h) >= (dev)->archdata.dma_offset)
+
+bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle)
{
- return dev->archdata.iommu_bypass && !iommu_fixed_is_weak &&
- dma_direct_supported(dev, dev->coherent_dma_mask);
+ if (likely(!dev->bus_dma_limit))
+ return false;
+
+ return is_direct_handle(dev, dma_handle);
}
-static inline bool dma_iommu_map_bypass(struct device *dev,
- unsigned long attrs)
+bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg,
+ int nents)
{
- return dev->archdata.iommu_bypass &&
- (!iommu_fixed_is_weak || (attrs & DMA_ATTR_WEAK_ORDERING));
+ struct scatterlist *s;
+ int i;
+
+ if (likely(!dev->bus_dma_limit))
+ return false;
+
+ for_each_sg(sg, s, nents, i) {
+ if (!can_map_direct(dev, sg_phys(s) + s->offset + s->length))
+ return false;
+ }
+
+ return true;
+}
+
+bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg,
+ int nents)
+{
+ struct scatterlist *s;
+ int i;
+
+ if (likely(!dev->bus_dma_limit))
+ return false;
+
+ for_each_sg(sg, s, nents, i) {
+ if (!is_direct_handle(dev, s->dma_address + s->length))
+ return false;
+ }
+
+ return true;
}
+#endif /* CONFIG_ARCH_HAS_DMA_MAP_DIRECT */
+
+/*
+ * Generic iommu implementation
+ */
/* Allocates a contiguous real buffer and creates mappings over it.
* Returns the virtual address of the buffer and sets dma_handle
@@ -39,8 +79,6 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
unsigned long attrs)
{
- if (dma_iommu_alloc_bypass(dev))
- return dma_direct_alloc(dev, size, dma_handle, flag, attrs);
return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
dma_handle, dev->coherent_dma_mask, flag,
dev_to_node(dev));
@@ -50,11 +88,7 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_free(dev, size, vaddr, dma_handle, attrs);
- else
- iommu_free_coherent(get_iommu_table_base(dev), size, vaddr,
- dma_handle);
+ iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
}
/* Creates TCEs for a user provided buffer. The user buffer must be
@@ -67,9 +101,6 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
enum dma_data_direction direction,
unsigned long attrs)
{
- if (dma_iommu_map_bypass(dev, attrs))
- return dma_direct_map_page(dev, page, offset, size, direction,
- attrs);
return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
size, dma_get_mask(dev), direction, attrs);
}
@@ -79,11 +110,8 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction,
unsigned long attrs)
{
- if (!dma_iommu_map_bypass(dev, attrs))
- iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size,
- direction, attrs);
- else
- dma_direct_unmap_page(dev, dma_handle, size, direction, attrs);
+ iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
+ attrs);
}
@@ -91,8 +119,6 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
- if (dma_iommu_map_bypass(dev, attrs))
- return dma_direct_map_sg(dev, sglist, nelems, direction, attrs);
return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
dma_get_mask(dev), direction, attrs);
}
@@ -101,11 +127,8 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
- if (!dma_iommu_map_bypass(dev, attrs))
- ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
+ ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
direction, attrs);
- else
- dma_direct_unmap_sg(dev, sglist, nelems, direction, attrs);
}
static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
@@ -113,21 +136,34 @@ static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_controller *phb = pci_bus_to_host(pdev->bus);
- return phb->controller_ops.iommu_bypass_supported &&
- phb->controller_ops.iommu_bypass_supported(pdev, mask);
+ if (iommu_fixed_is_weak || !phb->controller_ops.iommu_bypass_supported)
+ return false;
+ return phb->controller_ops.iommu_bypass_supported(pdev, mask);
}
/* We support DMA to/from any memory page via the iommu */
int dma_iommu_dma_supported(struct device *dev, u64 mask)
{
- struct iommu_table *tbl = get_iommu_table_base(dev);
+ struct iommu_table *tbl;
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
- dev->archdata.iommu_bypass = true;
- dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+ /*
+ * dma_iommu_bypass_supported() sets dma_max when there is
+ * 1:1 mapping but it is somehow limited.
+ * ibm,pmemory is one example.
+ */
+ dev->dma_ops_bypass = dev->bus_dma_limit == 0;
+ if (!dev->dma_ops_bypass)
+ dev_warn(dev,
+ "iommu: 64-bit OK but direct DMA is limited by %llx\n",
+ dev->bus_dma_limit);
+ else
+ dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
return 1;
}
+ tbl = get_iommu_table_base(dev);
+
if (!tbl) {
dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
return 0;
@@ -141,7 +177,7 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
}
dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
- dev->archdata.iommu_bypass = false;
+ dev->dma_ops_bypass = false;
return 1;
}
@@ -150,50 +186,25 @@ u64 dma_iommu_get_required_mask(struct device *dev)
struct iommu_table *tbl = get_iommu_table_base(dev);
u64 mask;
- if (!tbl)
- return 0;
-
if (dev_is_pci(dev)) {
u64 bypass_mask = dma_direct_get_required_mask(dev);
- if (dma_iommu_bypass_supported(dev, bypass_mask))
+ if (dma_iommu_dma_supported(dev, bypass_mask)) {
+ dev_info(dev, "%s: returning bypass mask 0x%llx\n", __func__, bypass_mask);
return bypass_mask;
+ }
}
- mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1);
+ if (!tbl)
+ return 0;
+
+ mask = 1ULL << (fls_long(tbl->it_offset + tbl->it_size) +
+ tbl->it_page_shift - 1);
mask += mask - 1;
return mask;
}
-static void dma_iommu_sync_for_cpu(struct device *dev, dma_addr_t addr,
- size_t size, enum dma_data_direction dir)
-{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_sync_single_for_cpu(dev, addr, size, dir);
-}
-
-static void dma_iommu_sync_for_device(struct device *dev, dma_addr_t addr,
- size_t sz, enum dma_data_direction dir)
-{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_sync_single_for_device(dev, addr, sz, dir);
-}
-
-extern void dma_iommu_sync_sg_for_cpu(struct device *dev,
- struct scatterlist *sgl, int nents, enum dma_data_direction dir)
-{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir);
-}
-
-extern void dma_iommu_sync_sg_for_device(struct device *dev,
- struct scatterlist *sgl, int nents, enum dma_data_direction dir)
-{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_sync_sg_for_device(dev, sgl, nents, dir);
-}
-
const struct dma_map_ops dma_iommu_ops = {
.alloc = dma_iommu_alloc_coherent,
.free = dma_iommu_free_coherent,
@@ -203,10 +214,8 @@ const struct dma_map_ops dma_iommu_ops = {
.map_page = dma_iommu_map_page,
.unmap_page = dma_iommu_unmap_page,
.get_required_mask = dma_iommu_get_required_mask,
- .sync_single_for_cpu = dma_iommu_sync_for_cpu,
- .sync_single_for_device = dma_iommu_sync_for_device,
- .sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu,
- .sync_sg_for_device = dma_iommu_sync_sg_for_device,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
};
diff --git a/arch/powerpc/kernel/dma-mask.c b/arch/powerpc/kernel/dma-mask.c
index ffbbbc432612..5b07ca7b73aa 100644
--- a/arch/powerpc/kernel/dma-mask.c
+++ b/arch/powerpc/kernel/dma-mask.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/export.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index fc7816126a40..ba256c37bcc0 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -10,6 +10,7 @@
#include <asm/swiotlb.h>
unsigned int ppc_swiotlb_enable;
+unsigned int ppc_swiotlb_flags;
void __init swiotlb_detect_4g(void)
{
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 180b3a5d1001..c3fb9fdf5bd7 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -10,6 +10,7 @@
#include <linux/jump_label.h>
#include <linux/libfdt.h>
#include <linux/memblock.h>
+#include <linux/of_fdt.h>
#include <linux/printk.h>
#include <linux/sched.h>
#include <linux/string.h>
@@ -17,15 +18,14 @@
#include <asm/cputable.h>
#include <asm/dt_cpu_ftrs.h>
+#include <asm/mce.h>
#include <asm/mmu.h>
-#include <asm/oprofile_impl.h>
-#include <asm/prom.h>
#include <asm/setup.h>
/* Device-tree visible constants follow */
-#define ISA_V2_07B 2070
#define ISA_V3_0B 3000
+#define ISA_V3_1 3100
#define USABLE_PR (1U << 0)
#define USABLE_OS (1U << 1)
@@ -64,44 +64,25 @@ struct dt_cpu_feature {
* Set up the base CPU
*/
-extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
-extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
-
static int hv_mode;
static struct {
u64 lpcr;
- u64 lpcr_clear;
u64 hfscr;
u64 fscr;
+ u64 pcr;
} system_registers;
static void (*init_pmu_registers)(void);
static void __restore_cpu_cpufeatures(void)
{
- u64 lpcr;
-
- /*
- * LPCR is restored by the power on engine already. It can be changed
- * after early init e.g., by radix enable, and we have no unified API
- * for saving and restoring such SPRs.
- *
- * This ->restore hook should really be removed from idle and register
- * restore moved directly into the idle restore code, because this code
- * doesn't know how idle is implemented or what it needs restored here.
- *
- * The best we can do to accommodate secondary boot and idle restore
- * for now is "or" LPCR with existing.
- */
- lpcr = mfspr(SPRN_LPCR);
- lpcr |= system_registers.lpcr;
- lpcr &= ~system_registers.lpcr_clear;
- mtspr(SPRN_LPCR, lpcr);
+ mtspr(SPRN_LPCR, system_registers.lpcr);
if (hv_mode) {
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
- mtspr(SPRN_PCR, PCR_MASK);
+ mtspr(SPRN_PCR, system_registers.pcr);
}
mtspr(SPRN_FSCR, system_registers.fscr);
@@ -121,8 +102,6 @@ static struct cpu_spec __initdata base_cpu_spec = {
.dcache_bsize = 32, /* cache info init. */
.num_pmcs = 0,
.pmc_type = PPC_PMC_DEFAULT,
- .oprofile_cpu_type = NULL,
- .oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = NULL,
.cpu_restore = __restore_cpu_cpufeatures,
.machine_check_early = NULL,
@@ -139,7 +118,6 @@ static void __init cpufeatures_setup_cpu(void)
/* Initialize the base environment -- clear FSCR/HFSCR. */
hv_mode = !!(mfmsr() & MSR_HV);
if (hv_mode) {
- /* CPU_FTR_HVMODE is used early in PACA setup */
cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
mtspr(SPRN_HFSCR, 0);
}
@@ -238,6 +216,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f)
}
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~LPCR_LPES0; /* HV external interrupts */
@@ -275,13 +254,6 @@ static int __init feat_enable_idle_nap(struct dt_cpu_feature *f)
return 1;
}
-static int __init feat_enable_align_dsisr(struct dt_cpu_feature *f)
-{
- cur_cpu_spec->cpu_features &= ~CPU_FTR_NODSISRALIGN;
-
- return 1;
-}
-
static int __init feat_enable_idle_stop(struct dt_cpu_feature *f)
{
u64 lpcr;
@@ -300,6 +272,9 @@ static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)
{
u64 lpcr;
+ if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ return 0;
+
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~LPCR_ISL;
@@ -319,7 +294,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
{
u64 lpcr;
- system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR);
+ if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ return 0;
+
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
mtspr(SPRN_LPCR, lpcr);
@@ -333,20 +310,29 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
{
-#ifdef CONFIG_PPC_RADIX_MMU
+ if (!IS_ENABLED(CONFIG_PPC_RADIX_MMU))
+ return 0;
+
+ cur_cpu_spec->mmu_features |= MMU_FTR_KERNEL_RO;
cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
- cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+ cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
return 1;
-#endif
- return 0;
}
static int __init feat_enable_dscr(struct dt_cpu_feature *f)
{
u64 lpcr;
+ /*
+ * Linux relies on FSCR[DSCR] being clear, so that we can take the
+ * facility unavailable interrupt and track the task's usage of DSCR.
+ * See facility_unavailable_exception().
+ * Clear the bit here so that feat_enable() doesn't set it.
+ */
+ f->fscr_bit_nr = -1;
+
feat_enable(f);
lpcr = mfspr(SPRN_LPCR);
@@ -357,7 +343,7 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f)
return 1;
}
-static void hfscr_pmu_enable(void)
+static void __init hfscr_pmu_enable(void)
{
u64 hfscr = mfspr(SPRN_HFSCR);
hfscr |= PPC_BIT(60);
@@ -372,7 +358,7 @@ static void init_pmu_power8(void)
}
mtspr(SPRN_MMCRA, 0);
- mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
mtspr(SPRN_MMCRS, 0);
@@ -400,7 +386,6 @@ static int __init feat_enable_pmu_power8(struct dt_cpu_feature *f)
cur_cpu_spec->num_pmcs = 6;
cur_cpu_spec->pmc_type = PPC_PMC_IBM;
- cur_cpu_spec->oprofile_cpu_type = "ppc64/power8";
return 1;
}
@@ -411,7 +396,7 @@ static void init_pmu_power9(void)
mtspr(SPRN_MMCRC, 0);
mtspr(SPRN_MMCRA, 0);
- mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
}
@@ -436,7 +421,39 @@ static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f)
cur_cpu_spec->num_pmcs = 6;
cur_cpu_spec->pmc_type = PPC_PMC_IBM;
- cur_cpu_spec->oprofile_cpu_type = "ppc64/power9";
+
+ return 1;
+}
+
+static void init_pmu_power10(void)
+{
+ init_pmu_power9();
+
+ mtspr(SPRN_MMCR3, 0);
+ mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+ mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
+}
+
+static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
+{
+ hfscr_pmu_enable();
+
+ init_pmu_power10();
+ init_pmu_registers = init_pmu_power10;
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+
+ cur_cpu_spec->num_pmcs = 6;
+ cur_cpu_spec->pmc_type = PPC_PMC_IBM;
+
+ return 1;
+}
+
+static int __init feat_enable_mce_power10(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->platform = "power10";
+ cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10;
return 1;
}
@@ -553,6 +570,18 @@ static int __init feat_enable_large_ci(struct dt_cpu_feature *f)
return 1;
}
+static int __init feat_enable_mma(struct dt_cpu_feature *f)
+{
+ u64 pcr;
+
+ feat_enable(f);
+ pcr = mfspr(SPRN_PCR);
+ pcr &= ~PCR_MMA_DIS;
+ mtspr(SPRN_PCR, pcr);
+
+ return 1;
+}
+
struct dt_cpu_feature_match {
const char *name;
int (*enable)(struct dt_cpu_feature *f);
@@ -566,6 +595,7 @@ static struct dt_cpu_feature_match __initdata
{"little-endian", feat_enable_le, CPU_FTR_REAL_LE},
{"smt", feat_enable_smt, 0},
{"interrupt-facilities", feat_enable, 0},
+ {"system-call-vectored", feat_enable, 0},
{"timer-facilities", feat_enable, 0},
{"timer-facilities-v3", feat_enable, 0},
{"debug-facilities", feat_enable, 0},
@@ -588,7 +618,7 @@ static struct dt_cpu_feature_match __initdata
{"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST},
{"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG},
{"idle-nap", feat_enable_idle_nap, 0},
- {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0},
+ /* alignment-interrupt-dsisr ignored */
{"idle-stop", feat_enable_idle_stop, 0},
{"machine-check-power8", feat_enable_mce_power8, 0},
{"performance-monitor-power8", feat_enable_pmu_power8, 0},
@@ -617,7 +647,9 @@ static struct dt_cpu_feature_match __initdata
{"group-start-register", feat_enable, 0},
{"pc-relative-addressing", feat_enable, 0},
{"machine-check-power9", feat_enable_mce_power9, 0},
+ {"machine-check-power10", feat_enable_mce_power10, 0},
{"performance-monitor-power9", feat_enable_pmu_power9, 0},
+ {"performance-monitor-power10", feat_enable_pmu_power10, 0},
{"event-based-branch-v3", feat_enable, 0},
{"random-number-generator", feat_enable, 0},
{"system-call-vectored", feat_disable, 0},
@@ -626,6 +658,9 @@ static struct dt_cpu_feature_match __initdata
{"vector-binary128", feat_enable, 0},
{"vector-binary16", feat_enable, 0},
{"wait-v3", feat_enable, 0},
+ {"prefix-instructions", feat_enable, 0},
+ {"matrix-multiply-assist", feat_enable_mma, 0},
+ {"debug-facilities-v31", feat_enable, CPU_FTR_DAWR1},
};
static bool __initdata using_dt_cpu_ftrs;
@@ -651,10 +686,15 @@ static void __init cpufeatures_setup_start(u32 isa)
{
pr_info("setup for ISA %d\n", isa);
- if (isa >= 3000) {
+ if (isa >= ISA_V3_0B) {
cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300;
cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00;
}
+
+ if (isa >= ISA_V3_1) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_31;
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_1;
+ }
}
static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
@@ -727,30 +767,33 @@ static __init void cpufeatures_cpu_quirks(void)
/*
* Not all quirks can be derived from the cpufeatures device tree.
*/
- if ((version & 0xffffefff) == 0x004e0200)
- ; /* DD2.0 has no feature flag */
- else if ((version & 0xffffefff) == 0x004e0201)
+ if ((version & 0xffffefff) == 0x004e0200) {
+ /* DD2.0 has no feature flag */
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG;
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
+ } else if ((version & 0xffffefff) == 0x004e0201) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG;
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
+ } else if ((version & 0xffffefff) == 0x004e0202) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
- else if ((version & 0xffffefff) == 0x004e0202) {
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
+ } else if ((version & 0xffffefff) == 0x004e0203) {
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
- } else if ((version & 0xffff0000) == 0x004e0000)
+ } else if ((version & 0xffff0000) == 0x004e0000) {
/* DD2.1 and up have DD2_1 */
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ }
if ((version & 0xffff0000) == 0x004e0000) {
- cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
}
update_tlbie_feature_flag(version);
- /*
- * PKEY was not in the initial base or feature node
- * specification, but it should become optional in the next
- * cpu feature version sequence.
- */
- cur_cpu_spec->cpu_features |= CPU_FTR_PKEY;
}
static void __init cpufeatures_setup_finished(void)
@@ -768,6 +811,7 @@ static void __init cpufeatures_setup_finished(void)
system_registers.lpcr = mfspr(SPRN_LPCR);
system_registers.hfscr = mfspr(SPRN_HFSCR);
system_registers.fscr = mfspr(SPRN_FSCR);
+ system_registers.pcr = mfspr(SPRN_PCR);
pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n",
cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
@@ -1055,14 +1099,14 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
prop = of_get_flat_dt_prop(node, "display-name", NULL);
if (prop && strlen((char *)prop) != 0) {
- strlcpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name));
+ strscpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name));
cur_cpu_spec->cpu_name = dt_cpu_name;
}
cpufeatures_setup_finished();
- memblock_free(__pa(dt_cpu_features),
- sizeof(struct dt_cpu_feature)*nr_dt_cpu_features);
+ memblock_free(dt_cpu_features,
+ sizeof(struct dt_cpu_feature) * nr_dt_cpu_features);
return 0;
}
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
index ef2ad4945904..03f1135ef64f 100644
--- a/arch/powerpc/kernel/early_32.c
+++ b/arch/powerpc/kernel/early_32.c
@@ -8,7 +8,6 @@
#include <linux/kernel.h>
#include <asm/setup.h>
#include <asm/sections.h>
-#include <asm/asm-prototypes.h>
/*
* We're called here very early in the boot.
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index bc8a551013be..ab316e155ea9 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -21,9 +21,9 @@
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/of.h>
+#include <linux/debugfs.h>
#include <linux/atomic.h>
-#include <asm/debugfs.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/io.h>
@@ -167,39 +167,33 @@ void eeh_show_enabled(void)
*/
static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
{
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
u32 cfg;
int cap, i;
int n = 0, l = 0;
char buffer[128];
- if (!pdn) {
- pr_warn("EEH: Note: No error log for absent device.\n");
- return 0;
- }
-
n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
- pdn->phb->global_number, pdn->busno,
- PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+ edev->pe->phb->global_number, edev->bdfn >> 8,
+ PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn));
pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
- pdn->phb->global_number, pdn->busno,
- PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+ edev->pe->phb->global_number, edev->bdfn >> 8,
+ PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn));
- eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
+ eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg);
n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
- eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg);
+ eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg);
n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
/* Gather bridge-specific registers */
if (edev->mode & EEH_DEV_BRIDGE) {
- eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
+ eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg);
n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
- eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
+ eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg);
n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
pr_warn("EEH: Bridge control: %04x\n", cfg);
}
@@ -207,11 +201,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
/* Dump out the PCI-X command and status regs */
cap = edev->pcix_cap;
if (cap) {
- eeh_ops->read_config(pdn, cap, 4, &cfg);
+ eeh_ops->read_config(edev, cap, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
- eeh_ops->read_config(pdn, cap+4, 4, &cfg);
+ eeh_ops->read_config(edev, cap+4, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
pr_warn("EEH: PCI-X status: %08x\n", cfg);
}
@@ -223,7 +217,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
pr_warn("EEH: PCI-E capabilities and status follow:\n");
for (i=0; i<=8; i++) {
- eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
+ eeh_ops->read_config(edev, cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
if ((i % 4) == 0) {
@@ -250,7 +244,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
pr_warn("EEH: PCI-E AER capability register set follows:\n");
for (i=0; i<=13; i++) {
- eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
+ eeh_ops->read_config(edev, cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
if ((i % 4) == 0) {
@@ -352,31 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
*/
static inline unsigned long eeh_token_to_phys(unsigned long token)
{
- pte_t *ptep;
- unsigned long pa;
- int hugepage_shift;
-
- /*
- * We won't find hugepages here(this is iomem). Hence we are not
- * worried about _PAGE_SPLITTING/collapse. Also we will not hit
- * page table free, because of init_mm.
- */
- ptep = find_init_mm_pte(token, &hugepage_shift);
- if (!ptep)
- return token;
-
- pa = pte_pfn(*ptep);
-
- /* On radix we can do hugepage mappings for io, so handle that */
- if (hugepage_shift) {
- pa <<= hugepage_shift;
- pa |= token & ((1ul << hugepage_shift) - 1);
- } else {
- pa <<= PAGE_SHIFT;
- pa |= token & (PAGE_SIZE - 1);
- }
-
- return pa;
+ return ppc_find_vmap_phys(token);
}
/*
@@ -429,6 +399,14 @@ out:
return ret;
}
+static inline const char *eeh_driver_name(struct pci_dev *pdev)
+{
+ if (pdev)
+ return dev_driver_string(&pdev->dev);
+
+ return "<null>";
+}
+
/**
* eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
* @edev: eeh device
@@ -472,11 +450,6 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
return 0;
}
- if (!pe->addr && !pe->config_addr) {
- eeh_stats.no_cfg_addr++;
- return 0;
- }
-
/*
* On PowerNV platform, we might already have fenced PHB
* there and we need take care of that firstly.
@@ -503,7 +476,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
rc = 1;
if (pe->state & EEH_PE_ISOLATED) {
pe->check_count++;
- if (pe->check_count % EEH_MAX_FAILS == 0) {
+ if (pe->check_count == EEH_MAX_FAILS) {
dn = pci_device_to_OF_node(dev);
if (dn)
location = of_get_property(dn, "ibm,loc-code",
@@ -624,6 +597,7 @@ EXPORT_SYMBOL(eeh_check_failure);
/**
* eeh_pci_enable - Enable MMIO or DMA transfers for this slot
* @pe: EEH PE
+ * @function: EEH option
*
* This routine should be called to reenable frozen MMIO or DMA
* so that it would work correctly again. It's useful while doing
@@ -726,7 +700,6 @@ static void eeh_disable_and_save_dev_state(struct eeh_dev *edev,
static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
{
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
struct pci_dev *dev = userdata;
@@ -734,73 +707,14 @@ static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
return;
/* Apply customization from firmware */
- if (pdn && eeh_ops->restore_config)
- eeh_ops->restore_config(pdn);
+ if (eeh_ops->restore_config)
+ eeh_ops->restore_config(edev);
/* The caller should restore state for the specified device */
if (pdev != dev)
pci_restore_state(pdev);
}
-int eeh_restore_vf_config(struct pci_dn *pdn)
-{
- struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
- u32 devctl, cmd, cap2, aer_capctl;
- int old_mps;
-
- if (edev->pcie_cap) {
- /* Restore MPS */
- old_mps = (ffs(pdn->mps) - 8) << 5;
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, &devctl);
- devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
- devctl |= old_mps;
- eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, devctl);
-
- /* Disable Completion Timeout if possible */
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
- 4, &cap2);
- if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) {
- eeh_ops->read_config(pdn,
- edev->pcie_cap + PCI_EXP_DEVCTL2,
- 4, &cap2);
- cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS;
- eeh_ops->write_config(pdn,
- edev->pcie_cap + PCI_EXP_DEVCTL2,
- 4, cap2);
- }
- }
-
- /* Enable SERR and parity checking */
- eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
- cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
- eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
-
- /* Enable report various errors */
- if (edev->pcie_cap) {
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, &devctl);
- devctl &= ~PCI_EXP_DEVCTL_CERE;
- devctl |= (PCI_EXP_DEVCTL_NFERE |
- PCI_EXP_DEVCTL_FERE |
- PCI_EXP_DEVCTL_URRE);
- eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, devctl);
- }
-
- /* Enable ECRC generation and check */
- if (edev->pcie_cap && edev->aer_cap) {
- eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
- 4, &aer_capctl);
- aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
- eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
- 4, aer_capctl);
- }
-
- return 0;
-}
-
/**
* pcibios_set_pcie_reset_state - Set PCI-E reset state
* @dev: pci device struct
@@ -850,14 +764,14 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
default:
eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true);
return -EINVAL;
- };
+ }
return 0;
}
/**
- * eeh_set_pe_freset - Check the required reset for the indicated device
- * @data: EEH device
+ * eeh_set_dev_freset - Check the required reset for the indicated device
+ * @edev: EEH device
* @flag: return value
*
* Each device might have its preferred reset type: fundamental or
@@ -896,6 +810,7 @@ static void eeh_pe_refreeze_passed(struct eeh_pe *root)
/**
* eeh_pe_reset_full - Complete a full reset process on the indicated PE
* @pe: EEH PE
+ * @include_passed: include passed-through devices?
*
* This function executes a full reset procedure on a PE, including setting
* the appropriate flags, performing a fundamental or hot reset, and then
@@ -977,15 +892,13 @@ int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed)
*/
void eeh_save_bars(struct eeh_dev *edev)
{
- struct pci_dn *pdn;
int i;
- pdn = eeh_dev_to_pdn(edev);
- if (!pdn)
+ if (!edev)
return;
for (i = 0; i < 16; i++)
- eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]);
+ eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]);
/*
* For PCI bridges including root port, we need enable bus
@@ -997,56 +910,6 @@ void eeh_save_bars(struct eeh_dev *edev)
edev->config_space[1] |= PCI_COMMAND_MASTER;
}
-/**
- * eeh_ops_register - Register platform dependent EEH operations
- * @ops: platform dependent EEH operations
- *
- * Register the platform dependent EEH operation callback
- * functions. The platform should call this function before
- * any other EEH operations.
- */
-int __init eeh_ops_register(struct eeh_ops *ops)
-{
- if (!ops->name) {
- pr_warn("%s: Invalid EEH ops name for %p\n",
- __func__, ops);
- return -EINVAL;
- }
-
- if (eeh_ops && eeh_ops != ops) {
- pr_warn("%s: EEH ops of platform %s already existing (%s)\n",
- __func__, eeh_ops->name, ops->name);
- return -EEXIST;
- }
-
- eeh_ops = ops;
-
- return 0;
-}
-
-/**
- * eeh_ops_unregister - Unreigster platform dependent EEH operations
- * @name: name of EEH platform operations
- *
- * Unregister the platform dependent EEH operation callback
- * functions.
- */
-int __exit eeh_ops_unregister(const char *name)
-{
- if (!name || !strlen(name)) {
- pr_warn("%s: Invalid EEH ops name\n",
- __func__);
- return -EINVAL;
- }
-
- if (eeh_ops && !strcmp(eeh_ops->name, name)) {
- eeh_ops = NULL;
- return 0;
- }
-
- return -EEXIST;
-}
-
static int eeh_reboot_notifier(struct notifier_block *nb,
unsigned long action, void *unused)
{
@@ -1058,45 +921,66 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
+static int eeh_device_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+
+ switch (action) {
+ /*
+ * Note: It's not possible to perform EEH device addition (i.e.
+ * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
+ * the device's resources, which have not yet been set up.
+ */
+ case BUS_NOTIFY_DEL_DEVICE:
+ eeh_remove_device(to_pci_dev(dev));
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_device_nb = {
+ .notifier_call = eeh_device_notifier,
+};
+
/**
- * eeh_init - EEH initialization
+ * eeh_init - System wide EEH initialization
+ * @ops: struct to trace EEH operation callback functions
*
- * Initialize EEH by trying to enable it for all of the adapters in the system.
- * As a side effect we can determine here if eeh is supported at all.
- * Note that we leave EEH on so failed config cycles won't cause a machine
- * check. If a user turns off EEH for a particular adapter they are really
- * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
- * grant access to a slot if EEH isn't enabled, and so we always enable
- * EEH for all slots/all devices.
- *
- * The eeh-force-off option disables EEH checking globally, for all slots.
- * Even if force-off is set, the EEH hardware is still enabled, so that
- * newer systems can boot.
+ * It's the platform's job to call this from an arch_initcall().
*/
-static int eeh_init(void)
+int eeh_init(struct eeh_ops *ops)
{
struct pci_controller *hose, *tmp;
int ret = 0;
+ /* the platform should only initialise EEH once */
+ if (WARN_ON(eeh_ops))
+ return -EEXIST;
+ if (WARN_ON(!ops))
+ return -ENOENT;
+ eeh_ops = ops;
+
/* Register reboot notifier */
ret = register_reboot_notifier(&eeh_reboot_nb);
if (ret) {
- pr_warn("%s: Failed to register notifier (%d)\n",
+ pr_warn("%s: Failed to register reboot notifier (%d)\n",
__func__, ret);
return ret;
}
- /* call platform initialization function */
- if (!eeh_ops) {
- pr_warn("%s: Platform EEH operation not found\n",
- __func__);
- return -EEXIST;
- } else if ((ret = eeh_ops->init()))
+ ret = bus_register_notifier(&pci_bus_type, &eeh_device_nb);
+ if (ret) {
+ pr_warn("%s: Failed to register bus notifier (%d)\n",
+ __func__, ret);
return ret;
+ }
/* Initialize PHB PEs */
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
- eeh_dev_phb_init_dynamic(hose);
+ eeh_phb_pe_create(hose);
eeh_addr_cache_init();
@@ -1104,94 +988,47 @@ static int eeh_init(void)
return eeh_event_init();
}
-core_initcall_sync(eeh_init);
-
-/**
- * eeh_add_device_early - Enable EEH for the indicated device node
- * @pdn: PCI device node for which to set up EEH
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices that were added after system boot (e.g. hotplug, dlpar).
- * This routine must be called before any i/o is performed to the
- * adapter (inluding any config-space i/o).
- * Whether this actually enables EEH or not for this device depends
- * on the CEC architecture, type of the device, on earlier boot
- * command-line arguments & etc.
- */
-void eeh_add_device_early(struct pci_dn *pdn)
-{
- struct pci_controller *phb = pdn ? pdn->phb : NULL;
- struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-
- if (!edev)
- return;
-
- if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
- return;
-
- /* USB Bus children of PCI devices will not have BUID's */
- if (NULL == phb ||
- (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
- return;
-
- eeh_ops->probe(pdn, NULL);
-}
-
/**
- * eeh_add_device_tree_early - Enable EEH for the indicated device
- * @pdn: PCI device node
- *
- * This routine must be used to perform EEH initialization for the
- * indicated PCI device that was added after system boot (e.g.
- * hotplug, dlpar).
- */
-void eeh_add_device_tree_early(struct pci_dn *pdn)
-{
- struct pci_dn *n;
-
- if (!pdn)
- return;
-
- list_for_each_entry(n, &pdn->child_list, list)
- eeh_add_device_tree_early(n);
- eeh_add_device_early(pdn);
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
-
-/**
- * eeh_add_device_late - Perform EEH initialization for the indicated pci device
+ * eeh_probe_device() - Perform EEH initialization for the indicated pci device
* @dev: pci device for which to set up EEH
*
* This routine must be used to complete EEH initialization for PCI
* devices that were added after system boot (e.g. hotplug, dlpar).
*/
-void eeh_add_device_late(struct pci_dev *dev)
+void eeh_probe_device(struct pci_dev *dev)
{
- struct pci_dn *pdn;
struct eeh_dev *edev;
- if (!dev)
+ pr_debug("EEH: Adding device %s\n", pci_name(dev));
+
+ /*
+ * pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was
+ * already called for this device.
+ */
+ if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) {
+ pci_dbg(dev, "Already bound to an eeh_dev!\n");
return;
+ }
- pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
- edev = pdn_to_eeh_dev(pdn);
- eeh_edev_dbg(edev, "Adding device\n");
- if (edev->pdev == dev) {
- eeh_edev_dbg(edev, "Device already referenced!\n");
+ edev = eeh_ops->probe(dev);
+ if (!edev) {
+ pr_debug("EEH: Adding device failed\n");
return;
}
/*
- * The EEH cache might not be removed correctly because of
- * unbalanced kref to the device during unplug time, which
- * relies on pcibios_release_device(). So we have to remove
- * that here explicitly.
+ * FIXME: We rely on pcibios_release_device() to remove the
+ * existing EEH state. The release function is only called if
+ * the pci_dev's refcount drops to zero so if something is
+ * keeping a ref to a device (e.g. a filesystem) we need to
+ * remove the old EEH state.
+ *
+ * FIXME: HEY MA, LOOK AT ME, NO LOCKING!
*/
- if (edev->pdev) {
- eeh_rmv_from_parent_pe(edev);
+ if (edev->pdev && edev->pdev != dev) {
+ eeh_pe_tree_remove(edev);
eeh_addr_cache_rmv_dev(edev->pdev);
eeh_sysfs_remove_device(edev->pdev);
- edev->mode &= ~EEH_DEV_SYSFS;
/*
* We definitely should have the PCI device removed
@@ -1199,69 +1036,16 @@ void eeh_add_device_late(struct pci_dev *dev)
* into error handler afterwards.
*/
edev->mode |= EEH_DEV_NO_HANDLER;
-
- edev->pdev = NULL;
- dev->dev.archdata.edev = NULL;
}
- if (eeh_has_flag(EEH_PROBE_MODE_DEV))
- eeh_ops->probe(pdn, NULL);
-
+ /* bind the pdev and the edev together */
edev->pdev = dev;
dev->dev.archdata.edev = edev;
-
eeh_addr_cache_insert_dev(dev);
+ eeh_sysfs_add_device(dev);
}
/**
- * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_device_tree_late(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- if (eeh_has_flag(EEH_FORCE_DISABLED))
- return;
- list_for_each_entry(dev, &bus->devices, bus_list) {
- eeh_add_device_late(dev);
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- struct pci_bus *subbus = dev->subordinate;
- if (subbus)
- eeh_add_device_tree_late(subbus);
- }
- }
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
-
-/**
- * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to add EEH sysfs files for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_sysfs_files(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- eeh_sysfs_add_device(dev);
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- struct pci_bus *subbus = dev->subordinate;
- if (subbus)
- eeh_add_sysfs_files(subbus);
- }
- }
-}
-EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
-
-/**
* eeh_remove_device - Undo EEH setup for the indicated pci device
* @dev: pci device to be removed
*
@@ -1296,17 +1080,11 @@ void eeh_remove_device(struct pci_dev *dev)
edev->pdev = NULL;
/*
- * The flag "in_error" is used to trace EEH devices for VFs
- * in error state or not. It's set in eeh_report_error(). If
- * it's not set, eeh_report_{reset,resume}() won't be called
- * for the VF EEH device.
+ * eeh_sysfs_remove_device() uses pci_dev_to_eeh_dev() so we need to
+ * remove the sysfs files before clearing dev.archdata.edev
*/
- edev->in_error = false;
- dev->dev.archdata.edev = NULL;
- if (!(edev->pe->state & EEH_PE_KEEP))
- eeh_rmv_from_parent_pe(edev);
- else
- edev->mode |= EEH_DEV_DISCONNECTED;
+ if (edev->mode & EEH_DEV_SYSFS)
+ eeh_sysfs_remove_device(dev);
/*
* We're removing from the PCI subsystem, that means
@@ -1317,8 +1095,19 @@ void eeh_remove_device(struct pci_dev *dev)
edev->mode |= EEH_DEV_NO_HANDLER;
eeh_addr_cache_rmv_dev(dev);
- eeh_sysfs_remove_device(dev);
- edev->mode &= ~EEH_DEV_SYSFS;
+
+ /*
+ * The flag "in_error" is used to trace EEH devices for VFs
+ * in error state or not. It's set in eeh_report_error(). If
+ * it's not set, eeh_report_{reset,resume}() won't be called
+ * for the VF EEH device.
+ */
+ edev->in_error = false;
+ dev->dev.archdata.edev = NULL;
+ if (!(edev->pe->state & EEH_PE_KEEP))
+ eeh_pe_tree_remove(edev);
+ else
+ edev->mode |= EEH_DEV_DISCONNECTED;
}
int eeh_unfreeze_pe(struct eeh_pe *pe)
@@ -1540,7 +1329,7 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option)
/*
* EEH functionality could possibly be disabled, just
- * return error for the case. And the EEH functinality
+ * return error for the case. And the EEH functionality
* isn't expected to be disabled on one specific PE.
*/
switch (option) {
@@ -1664,6 +1453,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed)
* eeh_pe_reset - Issue PE reset according to specified type
* @pe: EEH PE
* @option: reset type
+ * @include_passed: include passed-through devices?
*
* The routine is called to reset the specified PE with the
* indicated type, either fundamental reset or hot reset.
@@ -1735,12 +1525,12 @@ EXPORT_SYMBOL_GPL(eeh_pe_configure);
* eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
* @pe: the indicated PE
* @type: error type
- * @function: error function
+ * @func: error function
* @addr: address
* @mask: address mask
*
* The routine is called to inject the specified PCI error, which
- * is determined by @type and @function, to the indicated PE for
+ * is determined by @type and @func, to the indicated PE for
* testing purpose.
*/
int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
@@ -1766,6 +1556,7 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
}
EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
+#ifdef CONFIG_PROC_FS
static int proc_eeh_show(struct seq_file *m, void *v)
{
if (!eeh_enabled()) {
@@ -1792,8 +1583,38 @@ static int proc_eeh_show(struct seq_file *m, void *v)
return 0;
}
+#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_DEBUG_FS
+
+
+static struct pci_dev *eeh_debug_lookup_pdev(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return ERR_PTR(-EFAULT);
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return ERR_PTR(-EINVAL);
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return ERR_PTR(-ENODEV);
+
+ return pdev;
+}
+
static int eeh_enable_dbgfs_set(void *data, u64 val)
{
if (val)
@@ -1850,7 +1671,7 @@ static ssize_t eeh_force_recover_write(struct file *filp,
return -ENODEV;
/* Retrieve PE */
- pe = eeh_pe_get(hose, pe_no, 0);
+ pe = eeh_pe_get(hose, pe_no);
if (!pe)
return -ENODEV;
@@ -1886,26 +1707,13 @@ static ssize_t eeh_dev_check_write(struct file *filp,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
- uint32_t domain, bus, dev, fn;
struct pci_dev *pdev;
struct eeh_dev *edev;
- char buf[20];
int ret;
- memset(buf, 0, sizeof(buf));
- ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
- if (!ret)
- return -EFAULT;
-
- ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
- if (ret != 4) {
- pr_err("%s: expected 4 args, got %d\n", __func__, ret);
- return -EINVAL;
- }
-
- pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
- if (!pdev)
- return -ENODEV;
+ pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
edev = pci_dev_to_eeh_dev(pdev);
if (!edev) {
@@ -1915,8 +1723,8 @@ static ssize_t eeh_dev_check_write(struct file *filp,
}
ret = eeh_dev_check_failure(edev);
- pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n",
- domain, bus, dev, fn, ret);
+ pci_info(pdev, "eeh_dev_check_failure(%s) = %d\n",
+ pci_name(pdev), ret);
pci_dev_put(pdev);
@@ -1996,7 +1804,7 @@ static int eeh_debugfs_break_device(struct pci_dev *pdev)
* PE freeze. Using the in_8() accessor skips the eeh detection hook
* so the freeze hook so the EEH Detection machinery won't be
* triggered here. This is to match the usual behaviour of EEH
- * where the HW will asyncronously freeze a PE and it's up to
+ * where the HW will asynchronously freeze a PE and it's up to
* the kernel to notice and deal with it.
*
* 3. Turn Memory space back on. This is more important for VFs
@@ -2027,25 +1835,12 @@ static ssize_t eeh_dev_break_write(struct file *filp,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
- uint32_t domain, bus, dev, fn;
struct pci_dev *pdev;
- char buf[20];
int ret;
- memset(buf, 0, sizeof(buf));
- ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
- if (!ret)
- return -EFAULT;
-
- ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
- if (ret != 4) {
- pr_err("%s: expected 4 args, got %d\n", __func__, ret);
- return -EINVAL;
- }
-
- pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
- if (!pdev)
- return -ENODEV;
+ pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
ret = eeh_debugfs_break_device(pdev);
pci_dev_put(pdev);
@@ -2063,6 +1858,53 @@ static const struct file_operations eeh_dev_break_fops = {
.read = eeh_debugfs_dev_usage,
};
+static ssize_t eeh_dev_can_recover(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct pci_driver *drv;
+ struct pci_dev *pdev;
+ size_t ret;
+
+ pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ /*
+ * In order for error recovery to work the driver needs to implement
+ * .error_detected(), so it can quiesce IO to the device, and
+ * .slot_reset() so it can re-initialise the device after a reset.
+ *
+ * Ideally they'd implement .resume() too, but some drivers which
+ * we need to support (notably IPR) don't so I guess we can tolerate
+ * that.
+ *
+ * .mmio_enabled() is mostly there as a work-around for devices which
+ * take forever to re-init after a hot reset. Implementing that is
+ * strictly optional.
+ */
+ drv = pci_dev_driver(pdev);
+ if (drv &&
+ drv->err_handler &&
+ drv->err_handler->error_detected &&
+ drv->err_handler->slot_reset) {
+ ret = count;
+ } else {
+ ret = -EOPNOTSUPP;
+ }
+
+ pci_dev_put(pdev);
+
+ return ret;
+}
+
+static const struct file_operations eeh_dev_can_recover_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_dev_can_recover,
+ .read = eeh_debugfs_dev_usage,
+};
+
#endif
static int __init eeh_init_proc(void)
@@ -2071,22 +1913,25 @@ static int __init eeh_init_proc(void)
proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show);
#ifdef CONFIG_DEBUG_FS
debugfs_create_file_unsafe("eeh_enable", 0600,
- powerpc_debugfs_root, NULL,
+ arch_debugfs_dir, NULL,
&eeh_enable_dbgfs_ops);
debugfs_create_u32("eeh_max_freezes", 0600,
- powerpc_debugfs_root, &eeh_max_freezes);
+ arch_debugfs_dir, &eeh_max_freezes);
debugfs_create_bool("eeh_disable_recovery", 0600,
- powerpc_debugfs_root,
+ arch_debugfs_dir,
&eeh_debugfs_no_recover);
debugfs_create_file_unsafe("eeh_dev_check", 0600,
- powerpc_debugfs_root, NULL,
+ arch_debugfs_dir, NULL,
&eeh_dev_check_fops);
debugfs_create_file_unsafe("eeh_dev_break", 0600,
- powerpc_debugfs_root, NULL,
+ arch_debugfs_dir, NULL,
&eeh_dev_break_fops);
debugfs_create_file_unsafe("eeh_force_recover", 0600,
- powerpc_debugfs_root, NULL,
+ arch_debugfs_dir, NULL,
&eeh_force_recover_fops);
+ debugfs_create_file_unsafe("eeh_dev_can_recover", 0600,
+ arch_debugfs_dir, NULL,
+ &eeh_dev_can_recover_fops);
eeh_cache_debugfs_init();
#endif
}
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index cf11277ebd02..2f9dbf8ad2ee 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -12,8 +12,8 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
+#include <linux/debugfs.h>
#include <asm/pci-bridge.h>
-#include <asm/debugfs.h>
#include <asm/ppc-pci.h>
@@ -159,18 +159,10 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
{
- struct pci_dn *pdn;
struct eeh_dev *edev;
int i;
- pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
- if (!pdn) {
- pr_warn("PCI: no pci dn found for dev=%s\n",
- pci_name(dev));
- return;
- }
-
- edev = pdn_to_eeh_dev(pdn);
+ edev = pci_dev_to_eeh_dev(dev);
if (!edev) {
pr_warn("PCI: no EEH dev found for %s\n",
pci_name(dev));
@@ -272,8 +264,9 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
{
struct pci_io_addr_range *piar;
struct rb_node *n;
+ unsigned long flags;
- spin_lock(&pci_io_addr_cache_root.piar_lock);
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) {
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
@@ -281,15 +274,15 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
(piar->flags & IORESOURCE_IO) ? "i/o" : "mem",
&piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
}
- spin_unlock(&pci_io_addr_cache_root.piar_lock);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache);
-void eeh_cache_debugfs_init(void)
+void __init eeh_cache_debugfs_init(void)
{
debugfs_create_file_unsafe("eeh_address_cache", 0400,
- powerpc_debugfs_root, NULL,
+ arch_debugfs_dir, NULL,
&eeh_addr_cache_fops);
}
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
deleted file mode 100644
index 7370185c7a05..000000000000
--- a/arch/powerpc/kernel/eeh_dev.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The file intends to implement dynamic creation of EEH device, which will
- * be bound with OF node and PCI device simutaneously. The EEH devices would
- * be foundamental information for EEH core components to work proerly. Besides,
- * We have to support multiple situations where dynamic creation of EEH device
- * is required:
- *
- * 1) Before PCI emunation starts, we need create EEH devices according to the
- * PCI sensitive OF nodes.
- * 2) When PCI emunation is done, we need do the binding between PCI device and
- * the associated EEH device.
- * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
- * will be created while PCI sensitive OF node is detected from DR.
- * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
- * PHB is newly inserted, we also need create EEH devices accordingly.
- *
- * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
- */
-
-#include <linux/export.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-/**
- * eeh_dev_init - Create EEH device according to OF node
- * @pdn: PCI device node
- *
- * It will create EEH device according to the given OF node. The function
- * might be called by PCI emunation, DR, PHB hotplug.
- */
-struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
-{
- struct eeh_dev *edev;
-
- /* Allocate EEH device */
- edev = kzalloc(sizeof(*edev), GFP_KERNEL);
- if (!edev)
- return NULL;
-
- /* Associate EEH device with OF node */
- pdn->edev = edev;
- edev->pdn = pdn;
- edev->bdfn = (pdn->busno << 8) | pdn->devfn;
- edev->controller = pdn->phb;
-
- return edev;
-}
-
-/**
- * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
- * @phb: PHB
- *
- * Scan the PHB OF node and its child association, then create the
- * EEH devices accordingly
- */
-void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
-{
- /* EEH PE for PHB */
- eeh_phb_pe_create(phb);
-}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 3dd1a422fc29..48773d2d9be3 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -16,7 +16,6 @@
#include <asm/eeh_event.h>
#include <asm/ppc-pci.h>
#include <asm/pci-bridge.h>
-#include <asm/prom.h>
#include <asm/rtas.h>
struct eeh_rmv_data {
@@ -40,7 +39,7 @@ static int eeh_result_priority(enum pci_ers_result result)
case PCI_ERS_RESULT_NEED_RESET:
return 6;
default:
- WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result);
+ WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", result);
return 0;
}
};
@@ -61,7 +60,7 @@ static const char *pci_ers_result_name(enum pci_ers_result result)
case PCI_ERS_RESULT_NO_AER_DRIVER:
return "no AER driver";
default:
- WARN_ONCE(1, "Unknown result type: %d\n", (int)result);
+ WARN_ONCE(1, "Unknown result type: %d\n", result);
return "unknown";
}
};
@@ -104,13 +103,13 @@ static bool eeh_edev_actionable(struct eeh_dev *edev)
*/
static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
{
- if (!pdev || !pdev->driver)
+ if (!pdev || !pdev->dev.driver)
return NULL;
- if (!try_module_get(pdev->driver->driver.owner))
+ if (!try_module_get(pdev->dev.driver->owner))
return NULL;
- return pdev->driver;
+ return to_pci_driver(pdev->dev.driver);
}
/**
@@ -122,10 +121,10 @@ static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
*/
static inline void eeh_pcid_put(struct pci_dev *pdev)
{
- if (!pdev || !pdev->driver)
+ if (!pdev || !pdev->dev.driver)
return;
- module_put(pdev->driver->driver.owner);
+ module_put(pdev->dev.driver->owner);
}
/**
@@ -214,7 +213,7 @@ static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
pci_save_state(pdev);
}
-static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
+static void eeh_set_channel_state(struct eeh_pe *root, pci_channel_state_t s)
{
struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
@@ -425,8 +424,8 @@ static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
#ifdef CONFIG_PCI_IOV
- if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
- eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
+ if (eeh_ops->notify_resume)
+ eeh_ops->notify_resume(edev);
#endif
return PCI_ERS_RESULT_NONE;
}
@@ -477,7 +476,7 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
}
#ifdef CONFIG_PCI_IOV
- pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index);
+ pci_iov_add_virtfn(edev->physfn, edev->vf_index);
#endif
return NULL;
}
@@ -521,16 +520,8 @@ static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
if (edev->physfn) {
#ifdef CONFIG_PCI_IOV
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
-
- pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
+ pci_iov_remove_virtfn(edev->physfn, edev->vf_index);
edev->pdev = NULL;
-
- /*
- * We have to set the VF PE number to invalid one, which is
- * required to plug the VF successfully.
- */
- pdn->pe_number = IODA_INVALID_PE;
#endif
if (rmv_data)
list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
@@ -550,7 +541,7 @@ static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
continue;
edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
- eeh_rmv_from_parent_pe(edev);
+ eeh_pe_tree_remove(edev);
}
return NULL;
@@ -759,7 +750,7 @@ static void eeh_pe_cleanup(struct eeh_pe *pe)
* @pdev: pci_dev to check
*
* This function may return a false positive if we can't determine the slot's
- * presence state. This might happen for for PCIe slots if the PE containing
+ * presence state. This might happen for PCIe slots if the PE containing
* the upstream bridge is also frozen, or the bridge is part of the same PE
* as the device.
*
@@ -913,18 +904,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
}
#endif /* CONFIG_STACKTRACE */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
if (pe->freeze_count > eeh_max_freezes) {
pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
pe->phb->global_number, pe->addr,
pe->freeze_count);
- result = PCI_ERS_RESULT_DISCONNECT;
- }
- eeh_for_each_pe(pe, tmp_pe)
- eeh_pe_for_each_dev(tmp_pe, edev, tmp)
- edev->mode &= ~EEH_DEV_NO_HANDLER;
+ goto recover_failed;
+ }
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
@@ -936,39 +928,38 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
* the error. Override the result if necessary to have partially
* hotplug for this case.
*/
- if (result != PCI_ERS_RESULT_DISCONNECT) {
- pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
- pe->freeze_count, eeh_max_freezes);
- pr_info("EEH: Notify device drivers to shutdown\n");
- eeh_set_channel_state(pe, pci_channel_io_frozen);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(IO frozen)", pe,
- eeh_report_error, &result);
- if ((pe->type & EEH_PE_PHB) &&
- result != PCI_ERS_RESULT_NONE &&
- result != PCI_ERS_RESULT_NEED_RESET)
- result = PCI_ERS_RESULT_NEED_RESET;
- }
+ pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+ pe->freeze_count, eeh_max_freezes);
+ pr_info("EEH: Notify device drivers to shutdown\n");
+ eeh_set_channel_state(pe, pci_channel_io_frozen);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(IO frozen)", pe,
+ eeh_report_error, &result);
+ if (result == PCI_ERS_RESULT_DISCONNECT)
+ goto recover_failed;
+
+ /*
+ * Error logged on a PHB are always fences which need a full
+ * PHB reset to clear so force that to happen.
+ */
+ if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE)
+ result = PCI_ERS_RESULT_NEED_RESET;
/* Get the current PCI slot state. This can take a long time,
* sometimes over 300 seconds for certain systems.
*/
- if (result != PCI_ERS_RESULT_DISCONNECT) {
- rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
- if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
- pr_warn("EEH: Permanent failure\n");
- result = PCI_ERS_RESULT_DISCONNECT;
- }
+ rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY * 1000);
+ if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
+ pr_warn("EEH: Permanent failure\n");
+ goto recover_failed;
}
/* Since rtas may enable MMIO when posting the error log,
* don't post the error log until after all dev drivers
* have been informed.
*/
- if (result != PCI_ERS_RESULT_DISCONNECT) {
- pr_info("EEH: Collect temporary log\n");
- eeh_slot_error_detail(pe, EEH_LOG_TEMP);
- }
+ pr_info("EEH: Collect temporary log\n");
+ eeh_slot_error_detail(pe, EEH_LOG_TEMP);
/* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they
@@ -978,9 +969,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Reset with hotplug activity\n");
rc = eeh_reset_device(pe, bus, NULL, false);
if (rc) {
- pr_warn("%s: Unable to reset, err=%d\n",
- __func__, rc);
- result = PCI_ERS_RESULT_DISCONNECT;
+ pr_warn("%s: Unable to reset, err=%d\n", __func__, rc);
+ goto recover_failed;
}
}
@@ -988,10 +978,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
pr_info("EEH: Enable I/O for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ if (rc < 0)
+ goto recover_failed;
- if (rc < 0) {
- result = PCI_ERS_RESULT_DISCONNECT;
- } else if (rc) {
+ if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
pr_info("EEH: Notify device drivers to resume I/O\n");
@@ -999,15 +989,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_report_mmio_enabled, &result);
}
}
-
- /* If all devices reported they can proceed, then re-enable DMA */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
pr_info("EEH: Enabled DMA for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+ if (rc < 0)
+ goto recover_failed;
- if (rc < 0) {
- result = PCI_ERS_RESULT_DISCONNECT;
- } else if (rc) {
+ if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
/*
@@ -1025,16 +1013,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Reset without hotplug activity\n");
rc = eeh_reset_device(pe, bus, &rmv_data, true);
if (rc) {
- pr_warn("%s: Cannot reset, err=%d\n",
- __func__, rc);
- result = PCI_ERS_RESULT_DISCONNECT;
- } else {
- result = PCI_ERS_RESULT_NONE;
- eeh_set_channel_state(pe, pci_channel_io_normal);
- eeh_set_irq_state(pe, true);
- eeh_pe_report("slot_reset", pe, eeh_report_reset,
- &result);
+ pr_warn("%s: Cannot reset, err=%d\n", __func__, rc);
+ goto recover_failed;
}
+
+ result = PCI_ERS_RESULT_NONE;
+ eeh_set_channel_state(pe, pci_channel_io_normal);
+ eeh_set_irq_state(pe, true);
+ eeh_pe_report("slot_reset", pe, eeh_report_reset,
+ &result);
}
if ((result == PCI_ERS_RESULT_RECOVERED) ||
@@ -1062,45 +1049,47 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
}
pr_info("EEH: Recovery successful.\n");
- } else {
- /*
- * About 90% of all real-life EEH failures in the field
- * are due to poorly seated PCI cards. Only 10% or so are
- * due to actual, failed cards.
- */
- pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
- "Please try reseating or replacing it\n",
- pe->phb->global_number, pe->addr);
+ goto out;
+ }
- eeh_slot_error_detail(pe, EEH_LOG_PERM);
+recover_failed:
+ /*
+ * About 90% of all real-life EEH failures in the field
+ * are due to poorly seated PCI cards. Only 10% or so are
+ * due to actual, failed cards.
+ */
+ pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
+ "Please try reseating or replacing it\n",
+ pe->phb->global_number, pe->addr);
- /* Notify all devices that they're about to go down. */
- eeh_set_channel_state(pe, pci_channel_io_perm_failure);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(permanent failure)", pe,
- eeh_report_failure, NULL);
+ eeh_slot_error_detail(pe, EEH_LOG_PERM);
- /* Mark the PE to be removed permanently */
- eeh_pe_state_mark(pe, EEH_PE_REMOVED);
+ /* Notify all devices that they're about to go down. */
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(permanent failure)", pe,
+ eeh_report_failure, NULL);
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
- /*
- * Shut down the device drivers for good. We mark
- * all removed devices correctly to avoid access
- * the their PCI config any more.
- */
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- } else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ /* Mark the PE to be removed permanently */
+ eeh_pe_state_mark(pe, EEH_PE_REMOVED);
- pci_lock_rescan_remove();
- pci_hp_remove_devices(bus);
- pci_unlock_rescan_remove();
- /* The passed PE should no longer be used */
- return;
- }
+ /*
+ * Shut down the device drivers for good. We mark
+ * all removed devices correctly to avoid access
+ * the their PCI config any more.
+ */
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
+ /* The passed PE should no longer be used */
+ return;
}
out:
@@ -1190,6 +1179,17 @@ void eeh_handle_special_event(void)
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
eeh_handle_normal_event(pe);
} else {
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
+ /* Notify all devices to be down */
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
+ eeh_pe_report(
+ "error_detected(permanent failure)", pe,
+ eeh_report_failure, NULL);
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+
pci_lock_rescan_remove();
list_for_each_entry(hose, &hose_list, list_node) {
phb_pe = eeh_phb_pe_get(hose);
@@ -1198,16 +1198,6 @@ void eeh_handle_special_event(void)
(phb_pe->state & EEH_PE_RECOVERING))
continue;
- eeh_for_each_pe(pe, tmp_pe)
- eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
- edev->mode &= ~EEH_DEV_NO_HANDLER;
-
- /* Notify all devices to be down */
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
- eeh_set_channel_state(pe, pci_channel_io_perm_failure);
- eeh_pe_report(
- "error_detected(permanent failure)", pe,
- eeh_report_failure, NULL);
bus = eeh_pe_bus_get(phb_pe);
if (!bus) {
pr_err("%s: Cannot find PCI bus for "
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index a7a8dc182efb..c23a454af08a 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -143,7 +143,7 @@ int __eeh_send_failure_event(struct eeh_pe *pe)
int eeh_send_failure_event(struct eeh_pe *pe)
{
/*
- * If we've manually supressed recovery events via debugfs
+ * If we've manually suppressed recovery events via debugfs
* then just drop it on the floor.
*/
if (eeh_debugfs_no_recover) {
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 177852e39a25..e0ce81279624 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -13,6 +13,7 @@
#include <linux/export.h>
#include <linux/gfp.h>
#include <linux/kernel.h>
+#include <linux/of.h>
#include <linux/pci.h>
#include <linux/string.h>
@@ -251,43 +252,21 @@ void eeh_pe_dev_traverse(struct eeh_pe *root,
/**
* __eeh_pe_get - Check the PE address
- * @data: EEH PE
- * @flag: EEH device
*
* For one particular PE, it can be identified by PE address
* or tranditional BDF address. BDF address is composed of
* Bus/Device/Function number. The extra data referred by flag
* indicates which type of address should be used.
*/
-struct eeh_pe_get_flag {
- int pe_no;
- int config_addr;
-};
-
static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
{
- struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag;
+ int *target_pe = flag;
- /* Unexpected PHB PE */
+ /* PHB PEs are special and should be ignored */
if (pe->type & EEH_PE_PHB)
return NULL;
- /*
- * We prefer PE address. For most cases, we should
- * have non-zero PE address
- */
- if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
- if (tmp->pe_no == pe->addr)
- return pe;
- } else {
- if (tmp->pe_no &&
- (tmp->pe_no == pe->addr))
- return pe;
- }
-
- /* Try BDF address */
- if (tmp->config_addr &&
- (tmp->config_addr == pe->config_addr))
+ if (*target_pe == pe->addr)
return pe;
return NULL;
@@ -297,7 +276,6 @@ static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
* eeh_pe_get - Search PE based on the given address
* @phb: PCI controller
* @pe_no: PE number
- * @config_addr: Config address
*
* Search the corresponding PE based on the specified address which
* is included in the eeh device. The function is used to check if
@@ -306,75 +284,30 @@ static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
* which is composed of PCI bus/device/function number, or unified
* PE address.
*/
-struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
- int pe_no, int config_addr)
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no)
{
struct eeh_pe *root = eeh_phb_pe_get(phb);
- struct eeh_pe_get_flag tmp = { pe_no, config_addr };
- struct eeh_pe *pe;
-
- pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp);
- return pe;
+ return eeh_pe_traverse(root, __eeh_pe_get, &pe_no);
}
/**
- * eeh_pe_get_parent - Retrieve the parent PE
+ * eeh_pe_tree_insert - Add EEH device to parent PE
* @edev: EEH device
+ * @new_pe_parent: PE to create additional PEs under
*
- * The whole PEs existing in the system are organized as hierarchy
- * tree. The function is used to retrieve the parent PE according
- * to the parent EEH device.
- */
-static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
-{
- struct eeh_dev *parent;
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
-
- /*
- * It might have the case for the indirect parent
- * EEH device already having associated PE, but
- * the direct parent EEH device doesn't have yet.
- */
- if (edev->physfn)
- pdn = pci_get_pdn(edev->physfn);
- else
- pdn = pdn ? pdn->parent : NULL;
- while (pdn) {
- /* We're poking out of PCI territory */
- parent = pdn_to_eeh_dev(pdn);
- if (!parent)
- return NULL;
-
- if (parent->pe)
- return parent->pe;
-
- pdn = pdn->parent;
- }
-
- return NULL;
-}
-
-/**
- * eeh_add_to_parent_pe - Add EEH device to parent PE
- * @edev: EEH device
+ * Add EEH device to the PE in edev->pe_config_addr. If a PE already
+ * exists with that address then @edev is added to that PE. Otherwise
+ * a new PE is created and inserted into the PE tree as a child of
+ * @new_pe_parent.
*
- * Add EEH device to the parent PE. If the parent PE already
- * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
- * we have to create new PE to hold the EEH device and the new
- * PE will be linked to its parent PE as well.
+ * If @new_pe_parent is NULL then the new PE will be inserted under
+ * directly under the PHB.
*/
-int eeh_add_to_parent_pe(struct eeh_dev *edev)
+int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
{
+ struct pci_controller *hose = edev->controller;
struct eeh_pe *pe, *parent;
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
- int config_addr = (pdn->busno << 8) | (pdn->devfn);
-
- /* Check if the PE number is valid */
- if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
- eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n");
- return -EINVAL;
- }
/*
* Search the PE has been existing or not according
@@ -382,7 +315,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* PE should be composed of PCI bus and its subordinate
* components.
*/
- pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
+ pe = eeh_pe_get(hose, edev->pe_config_addr);
if (pe) {
if (pe->type & EEH_PE_INVALID) {
list_add_tail(&edev->entry, &pe->edevs);
@@ -399,8 +332,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
parent = parent->parent;
}
- eeh_edev_dbg(edev,
- "Added to device PE (parent: PE#%x)\n",
+ eeh_edev_dbg(edev, "Added to existing PE (parent: PE#%x)\n",
pe->parent->addr);
} else {
/* Mark the PE as type of PCI bus */
@@ -416,15 +348,15 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Create a new EEH PE */
if (edev->physfn)
- pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF);
+ pe = eeh_pe_alloc(hose, EEH_PE_VF);
else
- pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE);
+ pe = eeh_pe_alloc(hose, EEH_PE_DEVICE);
if (!pe) {
pr_err("%s: out of memory!\n", __func__);
return -ENOMEM;
}
- pe->addr = edev->pe_config_addr;
- pe->config_addr = config_addr;
+
+ pe->addr = edev->pe_config_addr;
/*
* Put the new EEH PE into hierarchy tree. If the parent
@@ -432,34 +364,35 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* to PHB directly. Otherwise, we have to associate the
* PE with its parent.
*/
- parent = eeh_pe_get_parent(edev);
- if (!parent) {
- parent = eeh_phb_pe_get(pdn->phb);
- if (!parent) {
+ if (!new_pe_parent) {
+ new_pe_parent = eeh_phb_pe_get(hose);
+ if (!new_pe_parent) {
pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
- __func__, pdn->phb->global_number);
+ __func__, hose->global_number);
edev->pe = NULL;
kfree(pe);
return -EEXIST;
}
}
- pe->parent = parent;
+
+ /* link new PE into the tree */
+ pe->parent = new_pe_parent;
+ list_add_tail(&pe->child, &new_pe_parent->child_list);
/*
* Put the newly created PE into the child list and
* link the EEH device accordingly.
*/
- list_add_tail(&pe->child, &parent->child_list);
list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
- eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n",
- pe->parent->addr);
+ eeh_edev_dbg(edev, "Added to new (parent: PE#%x)\n",
+ new_pe_parent->addr);
return 0;
}
/**
- * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
+ * eeh_pe_tree_remove - Remove one EEH device from the associated PE
* @edev: EEH device
*
* The PE hierarchy tree might be changed when doing PCI hotplug.
@@ -467,7 +400,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* during EEH recovery. So we have to call the function remove the
* corresponding PE accordingly if necessary.
*/
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
+int eeh_pe_tree_remove(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
bool keep, recover;
@@ -698,7 +631,6 @@ void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed)
*/
static void eeh_bridge_check_link(struct eeh_dev *edev)
{
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
int cap;
uint32_t val;
int timeout = 0;
@@ -714,34 +646,33 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
/* Check slot status */
cap = edev->pcie_cap;
- eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_SLTSTA, 2, &val);
if (!(val & PCI_EXP_SLTSTA_PDS)) {
eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
return;
}
/* Check power status if we have the capability */
- eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCAP, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_SLTCAP, 2, &val);
if (val & PCI_EXP_SLTCAP_PCP) {
- eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_SLTCTL, 2, &val);
if (val & PCI_EXP_SLTCTL_PCC) {
eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
- eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val);
+ eeh_ops->write_config(edev, cap + PCI_EXP_SLTCTL, 2, val);
msleep(2 * 1000);
}
}
/* Enable link */
- eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCTL, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_LNKCTL, 2, &val);
val &= ~PCI_EXP_LNKCTL_LD;
- eeh_ops->write_config(pdn, cap + PCI_EXP_LNKCTL, 2, val);
+ eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val);
/* Check link */
- eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val);
- if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
- eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
+ if (!edev->pdev->link_active_reporting) {
+ eeh_edev_dbg(edev, "No link reporting capability\n");
msleep(1000);
return;
}
@@ -752,7 +683,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
msleep(20);
timeout += 20;
- eeh_ops->read_config(pdn, cap + PCI_EXP_LNKSTA, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_LNKSTA, 2, &val);
if (val & PCI_EXP_LNKSTA_DLLLA)
break;
}
@@ -769,7 +700,6 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
static void eeh_restore_bridge_bars(struct eeh_dev *edev)
{
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
int i;
/*
@@ -777,20 +707,20 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev)
* Bus numbers and windows: 0x18 - 0x30
*/
for (i = 4; i < 13; i++)
- eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]);
+ eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]);
/* Rom: 0x38 */
- eeh_ops->write_config(pdn, 14*4, 4, edev->config_space[14]);
+ eeh_ops->write_config(edev, 14*4, 4, edev->config_space[14]);
/* Cache line & Latency timer: 0xC 0xD */
- eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
+ eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1,
SAVED_BYTE(PCI_CACHE_LINE_SIZE));
- eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1,
- SAVED_BYTE(PCI_LATENCY_TIMER));
+ eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1,
+ SAVED_BYTE(PCI_LATENCY_TIMER));
/* Max latency, min grant, interrupt ping and line: 0x3C */
- eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]);
+ eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]);
/* PCI Command: 0x4 */
- eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1] |
+ eeh_ops->write_config(edev, PCI_COMMAND, 4, edev->config_space[1] |
PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
/* Check the PCIe link is ready */
@@ -799,28 +729,27 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev)
static void eeh_restore_device_bars(struct eeh_dev *edev)
{
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
int i;
u32 cmd;
for (i = 4; i < 10; i++)
- eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]);
+ eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]);
/* 12 == Expansion ROM Address */
- eeh_ops->write_config(pdn, 12*4, 4, edev->config_space[12]);
+ eeh_ops->write_config(edev, 12*4, 4, edev->config_space[12]);
- eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
+ eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1,
SAVED_BYTE(PCI_CACHE_LINE_SIZE));
- eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1,
+ eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1,
SAVED_BYTE(PCI_LATENCY_TIMER));
/* max latency, min grant, interrupt pin and line */
- eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]);
+ eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]);
/*
* Restore PERR & SERR bits, some devices require it,
* don't touch the other command bits
*/
- eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cmd);
+ eeh_ops->read_config(edev, PCI_COMMAND, 4, &cmd);
if (edev->config_space[1] & PCI_COMMAND_PARITY)
cmd |= PCI_COMMAND_PARITY;
else
@@ -829,7 +758,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev)
cmd |= PCI_COMMAND_SERR;
else
cmd &= ~PCI_COMMAND_SERR;
- eeh_ops->write_config(pdn, PCI_COMMAND, 4, cmd);
+ eeh_ops->write_config(edev, PCI_COMMAND, 4, cmd);
}
/**
@@ -843,16 +772,14 @@ static void eeh_restore_device_bars(struct eeh_dev *edev)
*/
static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
{
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
-
/* Do special restore for bridges */
if (edev->mode & EEH_DEV_BRIDGE)
eeh_restore_bridge_bars(edev);
else
eeh_restore_device_bars(edev);
- if (eeh_ops->restore_config && pdn)
- eeh_ops->restore_config(pdn);
+ if (eeh_ops->restore_config)
+ eeh_ops->restore_config(edev);
}
/**
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index ab44d965a53c..706e1eb95efe 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -6,6 +6,7 @@
*
* Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
*/
+#include <linux/of.h>
#include <linux/pci.h>
#include <linux/stat.h>
#include <asm/ppc-pci.h>
@@ -14,7 +15,7 @@
/**
* EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
* @_name: name of file in sysfs directory
- * @_memb: name of member in struct pci_dn to access
+ * @_memb: name of member in struct eeh_dev to access
* @_format: printf format for display
*
* All of the attributes look very similar, so just
@@ -75,7 +76,7 @@ static ssize_t eeh_pe_state_store(struct device *dev,
static DEVICE_ATTR_RW(eeh_pe_state);
-#ifdef CONFIG_PCI_IOV
+#if defined(CONFIG_PCI_IOV) && defined(CONFIG_PPC_PSERIES)
static ssize_t eeh_notify_resume_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -86,7 +87,6 @@ static ssize_t eeh_notify_resume_show(struct device *dev,
if (!edev || !edev->pe)
return -ENODEV;
- pdn = pci_get_pdn(pdev);
return sprintf(buf, "%d\n", pdn->last_allow_rc);
}
@@ -100,7 +100,7 @@ static ssize_t eeh_notify_resume_store(struct device *dev,
if (!edev || !edev->pe || !eeh_ops->notify_resume)
return -ENODEV;
- if (eeh_ops->notify_resume(pci_get_pdn(pdev)))
+ if (eeh_ops->notify_resume(edev))
return -EIO;
return count;
@@ -132,7 +132,7 @@ static void eeh_notify_resume_remove(struct pci_dev *pdev)
#else
static inline int eeh_notify_resume_add(struct pci_dev *pdev) { return 0; }
static inline void eeh_notify_resume_remove(struct pci_dev *pdev) { }
-#endif /* CONFIG_PCI_IOV */
+#endif /* CONFIG_PCI_IOV && CONFIG PPC_PSERIES*/
void eeh_sysfs_add_device(struct pci_dev *pdev)
{
@@ -160,22 +160,23 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
{
struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev) {
+ WARN_ON(eeh_enabled());
+ return;
+ }
+
+ edev->mode &= ~EEH_DEV_SYSFS;
+
/*
* The parent directory might have been removed. We needn't
* continue for that case.
*/
- if (!pdev->dev.kobj.sd) {
- if (edev)
- edev->mode &= ~EEH_DEV_SYSFS;
+ if (!pdev->dev.kobj.sd)
return;
- }
device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
eeh_notify_resume_remove(pdev);
-
- if (edev)
- edev->mode &= ~EEH_DEV_SYSFS;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index d60908ea37fb..7eda33a24bb4 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -18,6 +18,8 @@
#include <linux/err.h>
#include <linux/sys.h>
#include <linux/threads.h>
+#include <linux/linkage.h>
+
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -27,253 +29,37 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
-#include <asm/asm-405.h>
#include <asm/feature-fixups.h>
#include <asm/barrier.h>
#include <asm/kup.h>
#include <asm/bug.h>
+#include <asm/interrupt.h>
#include "head_32.h"
/*
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
+ * synchronisation instructions.
+ */
+
+/*
* Align to 4k in order to ensure that all functions modyfing srr0/srr1
* fit into one page in order to not encounter a TLB miss between the
* modification of srr0/srr1 and the associated rfi.
*/
.align 12
-#ifdef CONFIG_BOOKE
- .globl mcheck_transfer_to_handler
-mcheck_transfer_to_handler:
- mfspr r0,SPRN_DSRR0
- stw r0,_DSRR0(r11)
- mfspr r0,SPRN_DSRR1
- stw r0,_DSRR1(r11)
- /* fall through */
-
- .globl debug_transfer_to_handler
-debug_transfer_to_handler:
- mfspr r0,SPRN_CSRR0
- stw r0,_CSRR0(r11)
- mfspr r0,SPRN_CSRR1
- stw r0,_CSRR1(r11)
- /* fall through */
-
- .globl crit_transfer_to_handler
-crit_transfer_to_handler:
-#ifdef CONFIG_PPC_BOOK3E_MMU
- mfspr r0,SPRN_MAS0
- stw r0,MAS0(r11)
- mfspr r0,SPRN_MAS1
- stw r0,MAS1(r11)
- mfspr r0,SPRN_MAS2
- stw r0,MAS2(r11)
- mfspr r0,SPRN_MAS3
- stw r0,MAS3(r11)
- mfspr r0,SPRN_MAS6
- stw r0,MAS6(r11)
-#ifdef CONFIG_PHYS_64BIT
- mfspr r0,SPRN_MAS7
- stw r0,MAS7(r11)
-#endif /* CONFIG_PHYS_64BIT */
-#endif /* CONFIG_PPC_BOOK3E_MMU */
-#ifdef CONFIG_44x
- mfspr r0,SPRN_MMUCR
- stw r0,MMUCR(r11)
-#endif
- mfspr r0,SPRN_SRR0
- stw r0,_SRR0(r11)
- mfspr r0,SPRN_SRR1
- stw r0,_SRR1(r11)
-
- /* set the stack limit to the current stack */
- mfspr r8,SPRN_SPRG_THREAD
- lwz r0,KSP_LIMIT(r8)
- stw r0,SAVED_KSP_LIMIT(r11)
- rlwinm r0,r1,0,0,(31 - THREAD_SHIFT)
- stw r0,KSP_LIMIT(r8)
- /* fall through */
-#endif
-
-#ifdef CONFIG_40x
- .globl crit_transfer_to_handler
-crit_transfer_to_handler:
- lwz r0,crit_r10@l(0)
- stw r0,GPR10(r11)
- lwz r0,crit_r11@l(0)
- stw r0,GPR11(r11)
- mfspr r0,SPRN_SRR0
- stw r0,crit_srr0@l(0)
- mfspr r0,SPRN_SRR1
- stw r0,crit_srr1@l(0)
-
- /* set the stack limit to the current stack */
- mfspr r8,SPRN_SPRG_THREAD
- lwz r0,KSP_LIMIT(r8)
- stw r0,saved_ksp_limit@l(0)
- rlwinm r0,r1,0,0,(31 - THREAD_SHIFT)
- stw r0,KSP_LIMIT(r8)
- /* fall through */
-#endif
-
-/*
- * This code finishes saving the registers to the exception frame
- * and jumps to the appropriate handler for the exception, turning
- * on address translation.
- * Note that we rely on the caller having set cr0.eq iff the exception
- * occurred in kernel mode (i.e. MSR:PR = 0).
- */
- .globl transfer_to_handler_full
-transfer_to_handler_full:
- SAVE_NVGPRS(r11)
- /* fall through */
-
- .globl transfer_to_handler
-transfer_to_handler:
- stw r2,GPR2(r11)
- stw r12,_NIP(r11)
- stw r9,_MSR(r11)
- andi. r2,r9,MSR_PR
- mfctr r12
- mfspr r2,SPRN_XER
- stw r12,_CTR(r11)
- stw r2,_XER(r11)
- mfspr r12,SPRN_SPRG_THREAD
- beq 2f /* if from user, fix up THREAD.regs */
- addi r2, r12, -THREAD
- addi r11,r1,STACK_FRAME_OVERHEAD
- stw r11,PT_REGS(r12)
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
- /* Check to see if the dbcr0 register is set up to debug. Use the
- internal debug mode bit to do this. */
- lwz r12,THREAD_DBCR0(r12)
- andis. r12,r12,DBCR0_IDM@h
-#endif
- ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
-#ifdef CONFIG_PPC_BOOK3S_32
- kuep_lock r11, r12
-#endif
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
- beq+ 3f
- /* From user and task is ptraced - load up global dbcr0 */
- li r12,-1 /* clear all pending debug events */
- mtspr SPRN_DBSR,r12
- lis r11,global_dbcr0@ha
- tophys(r11,r11)
- addi r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
- lwz r9,TASK_CPU(r2)
- slwi r9,r9,3
- add r11,r11,r9
-#endif
- lwz r12,0(r11)
- mtspr SPRN_DBCR0,r12
- lwz r12,4(r11)
- addi r12,r12,-1
- stw r12,4(r11)
-#endif
-
- b 3f
-
-2: /* if from kernel, check interrupted DOZE/NAP mode and
- * check for stack overflow
- */
- kuap_save_and_lock r11, r12, r9, r2, r0
- addi r2, r12, -THREAD
- lwz r9,KSP_LIMIT(r12)
- cmplw r1,r9 /* if r1 <= ksp_limit */
- ble- stack_ovf /* then the kernel stack overflowed */
-5:
-#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_E500)
+ .globl prepare_transfer_to_handler
+prepare_transfer_to_handler:
+ /* if from kernel, check interrupted DOZE/NAP mode */
lwz r12,TI_LOCAL_FLAGS(r2)
mtcrf 0x01,r12
bt- 31-TLF_NAPPING,4f
bt- 31-TLF_SLEEPING,7f
-#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
- .globl transfer_to_handler_cont
-transfer_to_handler_cont:
-3:
- mflr r9
- tovirt(r2, r2) /* set r2 to current */
- lwz r11,0(r9) /* virtual address of handler */
- lwz r9,4(r9) /* where to go when done */
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
-#endif
-#ifdef CONFIG_TRACE_IRQFLAGS
- /*
- * When tracing IRQ state (lockdep) we enable the MMU before we call
- * the IRQ tracing functions as they might access vmalloc space or
- * perform IOs for console output.
- *
- * To speed up the syscall path where interrupts stay on, let's check
- * first if we are changing the MSR value at all.
- */
- tophys(r12, r1)
- lwz r12,_MSR(r12)
- andi. r12,r12,MSR_EE
- bne 1f
-
- /* MSR isn't changing, just transition directly */
-#endif
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r10
- mtlr r9
- SYNC
- RFI /* jump to handler, enable MMU */
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to
- * keep interrupts disabled at this point otherwise we might risk
- * taking an interrupt before we tell lockdep they are enabled.
- */
- lis r12,reenable_mmu@h
- ori r12,r12,reenable_mmu@l
- LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
- mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r0
- SYNC
- RFI
-
-reenable_mmu:
- /*
- * We save a bunch of GPRs,
- * r3 can be different from GPR3(r1) at this point, r9 and r11
- * contains the old MSR and handler address respectively,
- * r4 & r5 can contain page fault arguments that need to be passed
- * along as well. r12, CCR, CTR, XER etc... are left clobbered as
- * they aren't useful past this point (aren't syscall arguments),
- * the rest is restored from the exception frame.
- */
-
- stwu r1,-32(r1)
- stw r9,8(r1)
- stw r11,12(r1)
- stw r3,16(r1)
- stw r4,20(r1)
- stw r5,24(r1)
-
- /* If we are disabling interrupts (normal case), simply log it with
- * lockdep
- */
-1: bl trace_hardirqs_off
-2: lwz r5,24(r1)
- lwz r4,20(r1)
- lwz r3,16(r1)
- lwz r11,12(r1)
- lwz r9,8(r1)
- addi r1,r1,32
- lwz r0,GPR0(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
- mtctr r11
- mtlr r9
- bctr /* jump to handler */
-#endif /* CONFIG_TRACE_IRQFLAGS */
+ blr
-#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
4: rlwinm r12,r12,0,~_TLF_NAPPING
stw r12,TI_LOCAL_FLAGS(r2)
b power_save_ppc32_restore
@@ -283,481 +69,162 @@ reenable_mmu:
lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
- kuap_restore r11, r2, r3, r4, r5
+ REST_GPR(2, r11)
b fast_exception_return
-#endif
-
-/*
- * On kernel stack overflow, load up an initial stack pointer
- * and call StackOverflow(regs), which should not return.
- */
-stack_ovf:
- /* sometimes we use a statically-allocated stack, which is OK. */
- lis r12,_end@h
- ori r12,r12,_end@l
- cmplw r1,r12
- ble 5b /* r1 <= &_end is OK */
- SAVE_NVGPRS(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- lis r1,init_thread_union@ha
- addi r1,r1,init_thread_union@l
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
- lis r9,StackOverflow@ha
- addi r9,r9,StackOverflow@l
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
-#endif
- mtspr SPRN_SRR0,r9
- mtspr SPRN_SRR1,r10
- SYNC
- RFI
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-trace_syscall_entry_irq_off:
- /*
- * Syscall shouldn't happen while interrupts are disabled,
- * so let's do a warning here.
- */
-0: trap
- EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
- bl trace_hardirqs_on
+_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
+#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_PPC_E500 */
- /* Now enable for real */
- LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
- mtmsr r10
+#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
+SYM_FUNC_START(__kuep_lock)
+ lwz r9, THREAD+THSR0(r2)
+ update_user_segments_by_4 r9, r10, r11, r12
+ blr
+SYM_FUNC_END(__kuep_lock)
- REST_GPR(0, r1)
- REST_4GPRS(3, r1)
- REST_2GPRS(7, r1)
- b DoSyscall
-#endif /* CONFIG_TRACE_IRQFLAGS */
+SYM_FUNC_START_LOCAL(__kuep_unlock)
+ lwz r9, THREAD+THSR0(r2)
+ rlwinm r9,r9,0,~SR_NX
+ update_user_segments_by_4 r9, r10, r11, r12
+ blr
+SYM_FUNC_END(__kuep_unlock)
+
+.macro kuep_lock
+ bl __kuep_lock
+.endm
+.macro kuep_unlock
+ bl __kuep_unlock
+.endm
+#else
+.macro kuep_lock
+.endm
+.macro kuep_unlock
+.endm
+#endif
.globl transfer_to_syscall
transfer_to_syscall:
-#ifdef CONFIG_TRACE_IRQFLAGS
- andi. r12,r9,MSR_EE
- beq- trace_syscall_entry_irq_off
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
-/*
- * Handle a system call.
- */
- .stabs "arch/powerpc/kernel/",N_SO,0,0,0f
- .stabs "entry_32.S",N_SO,0,0,0f
-0:
-
-_GLOBAL(DoSyscall)
- stw r3,ORIG_GPR3(r1)
- li r12,0
- stw r12,RESULT(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Make sure interrupts are enabled */
- mfmsr r11
- andi. r12,r11,MSR_EE
- /* We came in with interrupts disabled, we WARN and mark them enabled
- * for lockdep now */
-0: tweqi r12, 0
- EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-#endif /* CONFIG_TRACE_IRQFLAGS */
- lwz r11,TI_FLAGS(r2)
- andi. r11,r11,_TIF_SYSCALL_DOTRACE
- bne- syscall_dotrace
-syscall_dotrace_cont:
- cmplwi 0,r0,NR_syscalls
- lis r10,sys_call_table@h
- ori r10,r10,sys_call_table@l
- slwi r0,r0,2
- bge- 66f
+ stw r3, ORIG_GPR3(r1)
+ stw r11, GPR1(r1)
+ stw r11, 0(r1)
+ mflr r12
+ stw r12, _LINK(r1)
+#ifdef CONFIG_BOOKE_OR_40x
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#endif
+ lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ SAVE_GPR(2, r1)
+ addi r12,r12,STACK_FRAME_REGS_MARKER@l
+ stw r9,_MSR(r1)
+ li r2, INTERRUPT_SYSCALL
+ stw r12,STACK_INT_FRAME_MARKER(r1)
+ stw r2,_TRAP(r1)
+ SAVE_GPR(0, r1)
+ SAVE_GPRS(3, 8, r1)
+ addi r2,r10,-THREAD
+ SAVE_NVGPRS(r1)
+ kuep_lock
- barrier_nospec_asm
- /*
- * Prevent the load of the handler below (based on the user-passed
- * system call number) being speculatively executed until the test
- * against NR_syscalls and branch to .66f above has
- * committed.
- */
+ /* Calling convention has r3 = regs, r4 = orig r0 */
+ addi r3,r1,STACK_INT_FRAME_REGS
+ mr r4,r0
+ bl system_call_exception
- lwzx r10,r10,r0 /* Fetch system call handler [ptr] */
- mtlr r10
- addi r9,r1,STACK_FRAME_OVERHEAD
- PPC440EP_ERR42
- blrl /* Call handler */
- .globl ret_from_syscall
ret_from_syscall:
-#ifdef CONFIG_DEBUG_RSEQ
- /* Check whether the syscall is issued inside a restartable sequence */
- stw r3,GPR3(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl rseq_syscall
- lwz r3,GPR3(r1)
-#endif
- mr r6,r3
- /* disable interrupts so current_thread_info()->flags can't change */
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
- /* Note: We don't bother telling lockdep about it */
- SYNC
- MTMSRD(r10)
- lwz r9,TI_FLAGS(r2)
- li r8,-MAX_ERRNO
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
- bne- syscall_exit_work
- cmplw 0,r3,r8
- blt+ syscall_exit_cont
- lwz r11,_CCR(r1) /* Load CR */
- neg r3,r3
- oris r11,r11,0x1000 /* Set SO bit in CR */
- stw r11,_CCR(r1)
-syscall_exit_cont:
- lwz r8,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* If we are going to return from the syscall with interrupts
- * off, we trace that here. It shouldn't normally happen.
- */
- andi. r10,r8,MSR_EE
- bne+ 1f
- stw r3,GPR3(r1)
- bl trace_hardirqs_off
- lwz r3,GPR3(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- /* If the process has its own DBCR0 value, load it up. The internal
- debug mode bit tells us that dbcr0 should be loaded. */
- lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IDM@h
- bnel- load_dbcr0
-#endif
-#ifdef CONFIG_44x
-BEGIN_MMU_FTR_SECTION
+ addi r4,r1,STACK_INT_FRAME_REGS
+ li r5,0
+ bl syscall_exit_prepare
+#ifdef CONFIG_PPC_47x
lis r4,icache_44x_need_flush@ha
lwz r5,icache_44x_need_flush@l(r4)
cmplwi cr0,r5,0
- bne- 2f
-1:
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x)
-#endif /* CONFIG_44x */
-BEGIN_FTR_SECTION
- lwarx r7,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
- stwcx. r0,0,r1 /* to clear the reservation */
- ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
-#ifdef CONFIG_PPC_BOOK3S_32
- kuep_unlock r5, r7
-#endif
- kuap_check r2, r4
+ bne- .L44x_icache_flush
+#endif /* CONFIG_PPC_47x */
+.L44x_icache_flush_return:
+ kuep_unlock
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
mtlr r4
- mtcr r5
lwz r7,_NIP(r1)
- lwz r2,GPR2(r1)
- lwz r1,GPR1(r1)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
-#endif
+ lwz r8,_MSR(r1)
+ cmpwi r3,0
+ REST_GPR(3, r1)
+syscall_exit_finish:
mtspr SPRN_SRR0,r7
mtspr SPRN_SRR1,r8
- SYNC
- RFI
+
+ bne 3f
+ mtcr r5
+
+1: REST_GPR(2, r1)
+ REST_GPR(1, r1)
+ rfi
+#ifdef CONFIG_40x
+ b . /* Prevent prefetch past rfi */
+#endif
+
+3: mtcr r5
+ lwz r4,_CTR(r1)
+ lwz r5,_XER(r1)
+ REST_NVGPRS(r1)
+ mtctr r4
+ mtxer r5
+ REST_GPR(0, r1)
+ REST_GPRS(3, 12, r1)
+ b 1b
+
#ifdef CONFIG_44x
-2: li r7,0
+.L44x_icache_flush:
+ li r7,0
iccci r0,r0
stw r7,icache_44x_need_flush@l(r4)
- b 1b
+ b .L44x_icache_flush_return
#endif /* CONFIG_44x */
-66: li r3,-ENOSYS
- b ret_from_syscall
-
.globl ret_from_fork
ret_from_fork:
REST_NVGPRS(r1)
bl schedule_tail
- li r3,0
+ li r3,0 /* fork() return value */
b ret_from_syscall
- .globl ret_from_kernel_thread
-ret_from_kernel_thread:
- REST_NVGPRS(r1)
+ .globl ret_from_kernel_user_thread
+ret_from_kernel_user_thread:
bl schedule_tail
- mtlr r14
+ mtctr r14
mr r3,r15
PPC440EP_ERR42
- blrl
+ bctrl
li r3,0
b ret_from_syscall
-/* Traced system call support */
-syscall_dotrace:
- SAVE_NVGPRS(r1)
- li r0,0xc00
- stw r0,_TRAP(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_enter
+ .globl start_kernel_thread
+start_kernel_thread:
+ bl schedule_tail
+ mtctr r14
+ mr r3,r15
+ PPC440EP_ERR42
+ bctrl
/*
- * Restore argument registers possibly just changed.
- * We use the return value of do_syscall_trace_enter
- * for call number to look up in the table (r0).
+ * This must not return. We actually want to BUG here, not WARN,
+ * because BUG will exit the process which is what the kernel thread
+ * should have done, which may give some hope of continuing.
*/
- mr r0,r3
- lwz r3,GPR3(r1)
- lwz r4,GPR4(r1)
- lwz r5,GPR5(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
- REST_NVGPRS(r1)
-
- cmplwi r0,NR_syscalls
- /* Return code is already in r3 thanks to do_syscall_trace_enter() */
- bge- ret_from_syscall
- b syscall_dotrace_cont
-
-syscall_exit_work:
- andi. r0,r9,_TIF_RESTOREALL
- beq+ 0f
- REST_NVGPRS(r1)
- b 2f
-0: cmplw 0,r3,r8
- blt+ 1f
- andi. r0,r9,_TIF_NOERROR
- bne- 1f
- lwz r11,_CCR(r1) /* Load CR */
- neg r3,r3
- oris r11,r11,0x1000 /* Set SO bit in CR */
- stw r11,_CCR(r1)
-
-1: stw r6,RESULT(r1) /* Save result */
- stw r3,GPR3(r1) /* Update return value */
-2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
- beq 4f
-
- /* Clear per-syscall TIF flags if any are set. */
-
- li r11,_TIF_PERSYSCALL_MASK
- addi r12,r2,TI_FLAGS
-3: lwarx r8,0,r12
- andc r8,r8,r11
-#ifdef CONFIG_IBM405_ERR77
- dcbt 0,r12
-#endif
- stwcx. r8,0,r12
- bne- 3b
-
-4: /* Anything which requires enabling interrupts? */
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
- beq ret_from_except
-
- /* Re-enable interrupts. There is no need to trace that with
- * lockdep as we are supposed to have IRQs on at this point
- */
- ori r10,r10,MSR_EE
- SYNC
- MTMSRD(r10)
-
- /* Save NVGPRS if they're not saved already */
- lwz r4,_TRAP(r1)
- andi. r4,r4,1
- beq 5f
- SAVE_NVGPRS(r1)
- li r4,0xc00
- stw r4,_TRAP(r1)
-5:
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_leave
- b ret_from_except_full
-
-/*
- * The fork/clone functions need to copy the full register set into
- * the child process. Therefore we need to save all the nonvolatile
- * registers (r13 - r31) before calling the C code.
- */
- .globl ppc_fork
-ppc_fork:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_fork
-
- .globl ppc_vfork
-ppc_vfork:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_vfork
-
- .globl ppc_clone
-ppc_clone:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_clone
-
- .globl ppc_clone3
-ppc_clone3:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_clone3
-
- .globl ppc_swapcontext
-ppc_swapcontext:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_swapcontext
-
-/*
- * Top-level page fault handling.
- * This is in assembler because if do_page_fault tells us that
- * it is a bad kernel page fault, we want to save the non-volatile
- * registers before calling bad_page_fault.
- */
- .globl handle_page_fault
-handle_page_fault:
- stw r4,_DAR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S_32
- andis. r0,r5,DSISR_DABRMATCH@h
- bne- handle_dabr_fault
-#endif
- bl do_page_fault
- cmpwi r3,0
- beq+ ret_from_except
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- clrrwi r0,r0,1
- stw r0,_TRAP(r1)
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- lwz r4,_DAR(r1)
- bl bad_page_fault
- b ret_from_except_full
-
-#ifdef CONFIG_PPC_BOOK3S_32
- /* We have a data breakpoint exception - handle it */
-handle_dabr_fault:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- clrrwi r0,r0,1
- stw r0,_TRAP(r1)
- bl do_break
- b ret_from_except_full
-#endif
-
-/*
- * This routine switches between two different tasks. The process
- * state of one is saved on its kernel stack. Then the state
- * of the other is restored from its kernel stack. The memory
- * management hardware is updated to the second process's state.
- * Finally, we can return to the second process.
- * On entry, r3 points to the THREAD for the current task, r4
- * points to the THREAD for the new task.
- *
- * This routine is always called with interrupts disabled.
- *
- * Note: there are two ways to get to the "going out" portion
- * of this code; either by coming in via the entry (_switch)
- * or via "fork" which must set up an environment equivalent
- * to the "_switch" path. If you change this , you'll have to
- * change the fork code also.
- *
- * The code which creates the new task context is in 'copy_thread'
- * in arch/ppc/kernel/process.c
- */
-_GLOBAL(_switch)
- stwu r1,-INT_FRAME_SIZE(r1)
- mflr r0
- stw r0,INT_FRAME_SIZE+4(r1)
- /* r3-r12 are caller saved -- Cort */
- SAVE_NVGPRS(r1)
- stw r0,_NIP(r1) /* Return to switch caller */
- mfmsr r11
- li r0,MSR_FP /* Disable floating-point */
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
- oris r0,r0,MSR_VEC@h /* Disable altivec */
- mfspr r12,SPRN_VRSAVE /* save vrsave register value */
- stw r12,THREAD+THREAD_VRSAVE(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
-BEGIN_FTR_SECTION
- oris r0,r0,MSR_SPE@h /* Disable SPE */
- mfspr r12,SPRN_SPEFSCR /* save spefscr register value */
- stw r12,THREAD+THREAD_SPEFSCR(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_SPE)
-#endif /* CONFIG_SPE */
- and. r0,r0,r11 /* FP or altivec or SPE enabled? */
- beq+ 1f
- andc r11,r11,r0
- MTMSRD(r11)
- isync
-1: stw r11,_MSR(r1)
- mfcr r10
- stw r10,_CCR(r1)
- stw r1,KSP(r3) /* Set old stack pointer */
-
- kuap_check r2, r4
-#ifdef CONFIG_SMP
- /* We need a sync somewhere here to make sure that if the
- * previous task gets rescheduled on another CPU, it sees all
- * stores it has performed on this one.
- */
- sync
-#endif /* CONFIG_SMP */
-
- tophys(r0,r4)
- mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */
- lwz r1,KSP(r4) /* Load new stack pointer */
-
- /* save the old current 'last' for return value */
- mr r3,r2
- addi r2,r4,-THREAD /* Update current */
-
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
- lwz r0,THREAD+THREAD_VRSAVE(r2)
- mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
-BEGIN_FTR_SECTION
- lwz r0,THREAD+THREAD_SPEFSCR(r2)
- mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */
-END_FTR_SECTION_IFSET(CPU_FTR_SPE)
-#endif /* CONFIG_SPE */
-
- lwz r0,_CCR(r1)
- mtcrf 0xFF,r0
- /* r3-r12 are destroyed -- Cort */
- REST_NVGPRS(r1)
-
- lwz r4,_NIP(r1) /* Return to _switch caller in new task */
- mtlr r4
- addi r1,r1,INT_FRAME_SIZE
- blr
+100: trap
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
.globl fast_exception_return
fast_exception_return:
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
andi. r10,r9,MSR_RI /* check for recoverable interrupt */
- beq 1f /* if not, we've got problems */
+ beq 3f /* if not, we've got problems */
#endif
-2: REST_4GPRS(3, r11)
- lwz r10,_CCR(r11)
- REST_GPR(1, r11)
+2: lwz r10,_CCR(r11)
+ REST_GPRS(1, 6, r11)
mtcr r10
lwz r10,_LINK(r11)
mtlr r10
- /* Clear the exception_marker on the stack to avoid confusing stacktrace */
+ /* Clear the exception marker on the stack to avoid confusing stacktrace */
li r10, 0
stw r10, 8(r11)
REST_GPR(10, r11)
@@ -768,279 +235,152 @@ fast_exception_return:
mtspr SPRN_SRR0,r12
REST_GPR(9, r11)
REST_GPR(12, r11)
- lwz r11,GPR11(r11)
- SYNC
- RFI
-
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-/* check if the exception happened in a restartable section */
-1: lis r3,exc_exit_restart_end@ha
- addi r3,r3,exc_exit_restart_end@l
- cmplw r12,r3
-#if CONFIG_PPC_BOOK3S_601
- bge 2b
-#else
- bge 3f
-#endif
- lis r4,exc_exit_restart@ha
- addi r4,r4,exc_exit_restart@l
- cmplw r12,r4
-#if CONFIG_PPC_BOOK3S_601
- blt 2b
-#else
- blt 3f
+ REST_GPR(11, r11)
+ rfi
+#ifdef CONFIG_40x
+ b . /* Prevent prefetch past rfi */
#endif
- lis r3,fee_restarts@ha
- tophys(r3,r3)
- lwz r5,fee_restarts@l(r3)
- addi r5,r5,1
- stw r5,fee_restarts@l(r3)
- mr r12,r4 /* restart at exc_exit_restart */
- b 2b
-
- .section .bss
- .align 2
-fee_restarts:
- .space 4
- .previous
+_ASM_NOKPROBE_SYMBOL(fast_exception_return)
/* aargh, a nonrecoverable interrupt, panic */
/* aargh, we don't know which trap this is */
-/* but the 601 doesn't implement the RI bit, so assume it's OK */
3:
li r10,-1
stw r10,_TRAP(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- lis r10,MSR_KERNEL@h
- ori r10,r10,MSR_KERNEL@l
- bl transfer_to_handler_full
- .long unrecoverable_exception
- .long ret_from_except
-#endif
-
- .globl ret_from_except_full
-ret_from_except_full:
- REST_NVGPRS(r1)
- /* fall through */
-
- .globl ret_from_except
-ret_from_except:
- /* Hard-disable interrupts so that current_thread_info()->flags
- * can't change between when we test it and when we return
- * from the interrupt. */
- /* Note: We don't bother telling lockdep about it */
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
- SYNC /* Some chip revs have problems here... */
- MTMSRD(r10) /* disable interrupts */
-
- lwz r3,_MSR(r1) /* Returning to user mode? */
- andi. r0,r3,MSR_PR
- beq resume_kernel
-
-user_exc_return: /* r10 contains MSR_KERNEL here */
- /* Check current_thread_info()->flags */
- lwz r9,TI_FLAGS(r2)
- andi. r0,r9,_TIF_USER_WORK_MASK
- bne do_work
-
-restore_user:
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- /* Check whether this process has its own DBCR0 value. The internal
- debug mode bit tells us that dbcr0 should be loaded. */
- lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IDM@h
- bnel- load_dbcr0
-#endif
- ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
-#ifdef CONFIG_PPC_BOOK3S_32
- kuep_unlock r10, r11
-#endif
+ prepare_transfer_to_handler
+ bl unrecoverable_exception
+ trap /* should not get here */
+
+ .globl interrupt_return
+interrupt_return:
+ lwz r4,_MSR(r1)
+ addi r3,r1,STACK_INT_FRAME_REGS
+ andi. r0,r4,MSR_PR
+ beq .Lkernel_interrupt_return
+ bl interrupt_exit_user_prepare
+ cmpwi r3,0
+ kuep_unlock
+ bne- .Lrestore_nvgprs
- b restore
+.Lfast_user_interrupt_return:
+ lwz r11,_NIP(r1)
+ lwz r12,_MSR(r1)
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
-/* N.B. the only way to get here is from the beq following ret_from_except. */
-resume_kernel:
- /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
- lwz r8,TI_FLAGS(r2)
- andis. r0,r8,_TIF_EMULATE_STACK_STORE@h
- beq+ 1f
+BEGIN_FTR_SECTION
+ stwcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ lwarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+ lwz r3,_CCR(r1)
+ lwz r4,_LINK(r1)
+ lwz r5,_CTR(r1)
+ lwz r6,_XER(r1)
+ li r0,0
- lwz r3,GPR1(r1)
- subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
- mr r4,r1 /* src: current exception frame */
- mr r1,r3 /* Reroute the trampoline frame to r1 */
+ /*
+ * Leaving a stale exception marker on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
+ */
+ stw r0,8(r1)
+ REST_GPRS(7, 12, r1)
- /* Copy from the original to the trampoline. */
- li r5,INT_FRAME_SIZE/4 /* size: INT_FRAME_SIZE */
- li r6,0 /* start offset: 0 */
+ mtcr r3
+ mtlr r4
mtctr r5
-2: lwzx r0,r6,r4
- stwx r0,r6,r3
- addi r6,r6,4
- bdnz 2b
-
- /* Do real store operation to complete stwu */
- lwz r5,GPR1(r1)
- stw r8,0(r5)
+ mtspr SPRN_XER,r6
- /* Clear _TIF_EMULATE_STACK_STORE flag */
- lis r11,_TIF_EMULATE_STACK_STORE@h
- addi r5,r2,TI_FLAGS
-0: lwarx r8,0,r5
- andc r8,r8,r11
-#ifdef CONFIG_IBM405_ERR77
- dcbt 0,r5
-#endif
- stwcx. r8,0,r5
- bne- 0b
-1:
-
-#ifdef CONFIG_PREEMPT
- /* check current_thread_info->preempt_count */
- lwz r0,TI_PREEMPT(r2)
- cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
- bne restore_kuap
- andi. r8,r8,_TIF_NEED_RESCHED
- beq+ restore_kuap
- lwz r3,_MSR(r1)
- andi. r0,r3,MSR_EE /* interrupts off? */
- beq restore_kuap /* don't schedule if so */
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Lockdep thinks irqs are enabled, we need to call
- * preempt_schedule_irq with IRQs off, so we inform lockdep
- * now that we -did- turn them off already
- */
- bl trace_hardirqs_off
-#endif
- bl preempt_schedule_irq
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* And now, to properly rebalance the above, we tell lockdep they
- * are being turned back on, which will happen when we return
- */
- bl trace_hardirqs_on
+ REST_GPRS(2, 6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ rfi
+#ifdef CONFIG_40x
+ b . /* Prevent prefetch past rfi */
#endif
-#endif /* CONFIG_PREEMPT */
-restore_kuap:
- kuap_restore r1, r2, r9, r10, r0
-
- /* interrupts are hard-disabled at this point */
-restore:
-#ifdef CONFIG_44x
-BEGIN_MMU_FTR_SECTION
- b 1f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
- lis r4,icache_44x_need_flush@ha
- lwz r5,icache_44x_need_flush@l(r4)
- cmplwi cr0,r5,0
- beq+ 1f
- li r6,0
- iccci r0,r0
- stw r6,icache_44x_need_flush@l(r4)
-1:
-#endif /* CONFIG_44x */
- lwz r9,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Lockdep doesn't know about the fact that IRQs are temporarily turned
- * off in this assembly code while peeking at TI_FLAGS() and such. However
- * we need to inform it if the exception turned interrupts off, and we
- * are about to trun them back on.
- */
- andi. r10,r9,MSR_EE
- beq 1f
- stwu r1,-32(r1)
- mflr r0
- stw r0,4(r1)
- bl trace_hardirqs_on
- addi r1, r1, 32
- lwz r9,_MSR(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
+.Lrestore_nvgprs:
+ REST_NVGPRS(r1)
+ b .Lfast_user_interrupt_return
- lwz r0,GPR0(r1)
- lwz r2,GPR2(r1)
- REST_4GPRS(3, r1)
- REST_2GPRS(7, r1)
+.Lkernel_interrupt_return:
+ bl interrupt_exit_kernel_prepare
- lwz r10,_XER(r1)
- lwz r11,_CTR(r1)
- mtspr SPRN_XER,r10
- mtctr r11
+.Lfast_kernel_interrupt_return:
+ cmpwi cr1,r3,0
+ lwz r11,_NIP(r1)
+ lwz r12,_MSR(r1)
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
- PPC405_ERR77(0,r1)
BEGIN_FTR_SECTION
- lwarx r11,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
- stwcx. r0,0,r1 /* to clear the reservation */
+ stwcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ lwarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ lwz r3,_LINK(r1)
+ lwz r4,_CTR(r1)
+ lwz r5,_XER(r1)
+ lwz r6,_CCR(r1)
+ li r0,0
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
- andi. r10,r9,MSR_RI /* check if this exception occurred */
- beql nonrecoverable /* at a bad place (MSR:RI = 0) */
+ REST_GPRS(7, 12, r1)
- lwz r10,_CCR(r1)
- lwz r11,_LINK(r1)
- mtcrf 0xFF,r10
- mtlr r11
+ mtlr r3
+ mtctr r4
+ mtspr SPRN_XER,r5
- /* Clear the exception_marker on the stack to avoid confusing stacktrace */
- li r10, 0
- stw r10, 8(r1)
/*
- * Once we put values in SRR0 and SRR1, we are in a state
- * where exceptions are not recoverable, since taking an
- * exception will trash SRR0 and SRR1. Therefore we clear the
- * MSR:RI bit to indicate this. If we do take an exception,
- * we can't return to the point of the exception but we
- * can restart the exception exit path at the label
- * exc_exit_restart below. -- paulus
+ * Leaving a stale exception marker on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
*/
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
- SYNC
- MTMSRD(r10) /* clear the RI bit */
- .globl exc_exit_restart
-exc_exit_restart:
- lwz r12,_NIP(r1)
- mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r9
- REST_4GPRS(9, r1)
- lwz r1,GPR1(r1)
- .globl exc_exit_restart_end
-exc_exit_restart_end:
- SYNC
- RFI
+ stw r0,8(r1)
-#else /* !(CONFIG_4xx || CONFIG_BOOKE) */
- /*
- * This is a bit different on 4xx/Book-E because it doesn't have
- * the RI bit in the MSR.
- * The TLB miss handler checks if we have interrupted
- * the exception exit path and restarts it if so
- * (well maybe one day it will... :).
+ REST_GPRS(2, 5, r1)
+
+ bne- cr1,1f /* emulate stack store */
+ mtcr r6
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ rfi
+#ifdef CONFIG_40x
+ b . /* Prevent prefetch past rfi */
+#endif
+
+1: /*
+ * Emulate stack store with update. New r1 value was already calculated
+ * and updated in our interrupt regs by emulate_loadstore, but we can't
+ * store the previous value of r1 to the stack before re-loading our
+ * registers from it, otherwise they could be clobbered. Use
+ * SPRG Scratch0 as temporary storage to hold the store
+ * data, as interrupts are disabled here so it won't be clobbered.
*/
- lwz r11,_LINK(r1)
- mtlr r11
- lwz r10,_CCR(r1)
- mtcrf 0xff,r10
- /* Clear the exception_marker on the stack to avoid confusing stacktrace */
- li r10, 0
- stw r10, 8(r1)
- REST_2GPRS(9, r1)
- .globl exc_exit_restart
-exc_exit_restart:
- lwz r11,_NIP(r1)
- lwz r12,_MSR(r1)
-exc_exit_start:
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
- REST_2GPRS(11, r1)
- lwz r1,GPR1(r1)
- .globl exc_exit_restart_end
-exc_exit_restart_end:
- PPC405_ERR77_SYNC
+ mtcr r6
+#ifdef CONFIG_BOOKE
+ mtspr SPRN_SPRG_WSCRATCH0, r9
+#else
+ mtspr SPRN_SPRG_SCRATCH0, r9
+#endif
+ addi r9,r1,INT_FRAME_SIZE /* get original r1 */
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ stw r9,0(r1) /* perform store component of stwu */
+#ifdef CONFIG_BOOKE
+ mfspr r9, SPRN_SPRG_RSCRATCH0
+#else
+ mfspr r9, SPRN_SPRG_SCRATCH0
+#endif
rfi
- b . /* prevent prefetch past rfi */
+#ifdef CONFIG_40x
+ b . /* Prevent prefetch past rfi */
+#endif
+_ASM_NOKPROBE_SYMBOL(interrupt_return)
+
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
/*
* Returning from a critical interrupt in user mode doesn't need
@@ -1071,17 +411,13 @@ exc_exit_restart_end:
REST_NVGPRS(r1); \
lwz r3,_MSR(r1); \
andi. r3,r3,MSR_PR; \
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \
- bne user_exc_return; \
- lwz r0,GPR0(r1); \
- lwz r2,GPR2(r1); \
- REST_4GPRS(3, r1); \
- REST_2GPRS(7, r1); \
+ bne interrupt_return; \
+ REST_GPR(0, r1); \
+ REST_GPRS(2, 8, r1); \
lwz r10,_XER(r1); \
lwz r11,_CTR(r1); \
mtspr SPRN_XER,r10; \
mtctr r11; \
- PPC405_ERR77(0,r1); \
stwcx. r0,0,r1; /* to clear the reservation */ \
lwz r11,_LINK(r1); \
mtlr r11; \
@@ -1096,12 +432,8 @@ exc_exit_restart_end:
lwz r12,_MSR(r1); \
mtspr exc_lvl_srr0,r11; \
mtspr exc_lvl_srr1,r12; \
- lwz r9,GPR9(r1); \
- lwz r12,GPR12(r1); \
- lwz r10,GPR10(r1); \
- lwz r11,GPR11(r1); \
- lwz r1,GPR1(r1); \
- PPC405_ERR77_SYNC; \
+ REST_GPRS(9, 12, r1); \
+ REST_GPR(1, r1); \
exc_lvl_rfi; \
b .; /* prevent prefetch past exc_lvl_rfi */
@@ -1111,7 +443,7 @@ exc_exit_restart_end:
mtspr SPRN_##exc_lvl_srr0,r9; \
mtspr SPRN_##exc_lvl_srr1,r10;
-#if defined(CONFIG_PPC_BOOK3E_MMU)
+#if defined(CONFIG_PPC_E500)
#ifdef CONFIG_PHYS_64BIT
#define RESTORE_MAS7 \
lwz r11,MAS7(r1); \
@@ -1142,11 +474,6 @@ exc_exit_restart_end:
#ifdef CONFIG_40x
.globl ret_from_crit_exc
ret_from_crit_exc:
- mfspr r9,SPRN_SPRG_THREAD
- lis r10,saved_ksp_limit@ha;
- lwz r10,saved_ksp_limit@l(r10);
- tovirt(r9,r9);
- stw r10,KSP_LIMIT(r9)
lis r9,crit_srr0@ha;
lwz r9,crit_srr0@l(r9);
lis r10,crit_srr1@ha;
@@ -1154,214 +481,32 @@ ret_from_crit_exc:
mtspr SPRN_SRR0,r9;
mtspr SPRN_SRR1,r10;
RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
#endif /* CONFIG_40x */
#ifdef CONFIG_BOOKE
.globl ret_from_crit_exc
ret_from_crit_exc:
- mfspr r9,SPRN_SPRG_THREAD
- lwz r10,SAVED_KSP_LIMIT(r1)
- stw r10,KSP_LIMIT(r9)
RESTORE_xSRR(SRR0,SRR1);
RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
.globl ret_from_debug_exc
ret_from_debug_exc:
- mfspr r9,SPRN_SPRG_THREAD
- lwz r10,SAVED_KSP_LIMIT(r1)
- stw r10,KSP_LIMIT(r9)
RESTORE_xSRR(SRR0,SRR1);
RESTORE_xSRR(CSRR0,CSRR1);
RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, PPC_RFDI)
+_ASM_NOKPROBE_SYMBOL(ret_from_debug_exc)
.globl ret_from_mcheck_exc
ret_from_mcheck_exc:
- mfspr r9,SPRN_SPRG_THREAD
- lwz r10,SAVED_KSP_LIMIT(r1)
- stw r10,KSP_LIMIT(r9)
RESTORE_xSRR(SRR0,SRR1);
RESTORE_xSRR(CSRR0,CSRR1);
RESTORE_xSRR(DSRR0,DSRR1);
RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc)
#endif /* CONFIG_BOOKE */
-
-/*
- * Load the DBCR0 value for a task that is being ptraced,
- * having first saved away the global DBCR0. Note that r0
- * has the dbcr0 value to set upon entry to this.
- */
-load_dbcr0:
- mfmsr r10 /* first disable debug exceptions */
- rlwinm r10,r10,0,~MSR_DE
- mtmsr r10
- isync
- mfspr r10,SPRN_DBCR0
- lis r11,global_dbcr0@ha
- addi r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
- lwz r9,TASK_CPU(r2)
- slwi r9,r9,3
- add r11,r11,r9
-#endif
- stw r10,0(r11)
- mtspr SPRN_DBCR0,r0
- lwz r10,4(r11)
- addi r10,r10,1
- stw r10,4(r11)
- li r11,-1
- mtspr SPRN_DBSR,r11 /* clear all pending debug events */
- blr
-
- .section .bss
- .align 4
- .global global_dbcr0
-global_dbcr0:
- .space 8*NR_CPUS
- .previous
#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
-
-do_work: /* r10 contains MSR_KERNEL here */
- andi. r0,r9,_TIF_NEED_RESCHED
- beq do_user_signal
-
-do_resched: /* r10 contains MSR_KERNEL here */
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_on
- mfmsr r10
-#endif
- ori r10,r10,MSR_EE
- SYNC
- MTMSRD(r10) /* hard-enable interrupts */
- bl schedule
-recheck:
- /* Note: And we don't tell it we are disabling them again
- * neither. Those disable/enable cycles used to peek at
- * TI_FLAGS aren't advertised.
- */
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
- SYNC
- MTMSRD(r10) /* disable interrupts */
- lwz r9,TI_FLAGS(r2)
- andi. r0,r9,_TIF_NEED_RESCHED
- bne- do_resched
- andi. r0,r9,_TIF_USER_WORK_MASK
- beq restore_user
-do_user_signal: /* r10 contains MSR_KERNEL here */
- ori r10,r10,MSR_EE
- SYNC
- MTMSRD(r10) /* hard-enable interrupts */
- /* save r13-r31 in the exception frame, if not already done */
- lwz r3,_TRAP(r1)
- andi. r0,r3,1
- beq 2f
- SAVE_NVGPRS(r1)
- rlwinm r3,r3,0,0,30
- stw r3,_TRAP(r1)
-2: addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r9
- bl do_notify_resume
- REST_NVGPRS(r1)
- b recheck
-
-/*
- * We come here when we are at the end of handling an exception
- * that occurred at a place where taking an exception will lose
- * state information, such as the contents of SRR0 and SRR1.
- */
-nonrecoverable:
- lis r10,exc_exit_restart_end@ha
- addi r10,r10,exc_exit_restart_end@l
- cmplw r12,r10
-#ifdef CONFIG_PPC_BOOK3S_601
- bgelr
-#else
- bge 3f
-#endif
- lis r11,exc_exit_restart@ha
- addi r11,r11,exc_exit_restart@l
- cmplw r12,r11
-#ifdef CONFIG_PPC_BOOK3S_601
- bltlr
-#else
- blt 3f
-#endif
- lis r10,ee_restarts@ha
- lwz r12,ee_restarts@l(r10)
- addi r12,r12,1
- stw r12,ee_restarts@l(r10)
- mr r12,r11 /* restart at exc_exit_restart */
- blr
-3: /* OK, we can't recover, kill this process */
- /* but the 601 doesn't implement the RI bit, so assume it's OK */
- lwz r3,_TRAP(r1)
- andi. r0,r3,1
- beq 5f
- SAVE_NVGPRS(r1)
- rlwinm r3,r3,0,0,30
- stw r3,_TRAP(r1)
-5: mfspr r2,SPRN_SPRG_THREAD
- addi r2,r2,-THREAD
- tovirt(r2,r2) /* set back r2 to current */
-4: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- /* shouldn't return */
- b 4b
-
- .section .bss
- .align 2
-ee_restarts:
- .space 4
- .previous
-
-/*
- * PROM code for specific machines follows. Put it
- * here so it's easy to add arch-specific sections later.
- * -- Cort
- */
-#ifdef CONFIG_PPC_RTAS
-/*
- * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
- * called with the MMU off.
- */
-_GLOBAL(enter_rtas)
- stwu r1,-INT_FRAME_SIZE(r1)
- mflr r0
- stw r0,INT_FRAME_SIZE+4(r1)
- LOAD_REG_ADDR(r4, rtas)
- lis r6,1f@ha /* physical return address for rtas */
- addi r6,r6,1f@l
- tophys(r6,r6)
- tophys(r7,r1)
- lwz r8,RTASENTRY(r4)
- lwz r4,RTASBASE(r4)
- mfmsr r9
- stw r9,8(r1)
- LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
- SYNC /* disable interrupts so SRR0/1 */
- MTMSRD(r0) /* don't get trashed */
- li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
- mtlr r6
- stw r7, THREAD + RTAS_SP(r2)
- mtspr SPRN_SRR0,r8
- mtspr SPRN_SRR1,r9
- RFI
-1: tophys(r9,r1)
- lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */
- lwz r9,8(r9) /* original msr value */
- addi r1,r1,INT_FRAME_SIZE
- li r0,0
- tophys(r7, r2)
- stw r0, THREAD + RTAS_SP(r7)
- mtspr SPRN_SRR0,r8
- mtspr SPRN_SRR1,r9
- RFI /* return to caller */
-
- .globl machine_check_in_rtas
-machine_check_in_rtas:
- twi 31,0,0
- /* XXX load up BATs and panic */
-
-#endif /* CONFIG_PPC_RTAS */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
deleted file mode 100644
index 3fd3ef352e3f..000000000000
--- a/arch/powerpc/kernel/entry_64.S
+++ /dev/null
@@ -1,1354 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * PowerPC version
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
- * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- * Adapted for Power Macintosh by Paul Mackerras.
- * Low-level exception handlers and MMU support
- * rewritten by Paul Mackerras.
- * Copyright (C) 1996 Paul Mackerras.
- * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
- *
- * This file contains the system call entry code, context switch
- * code, and exception/interrupt return code for PowerPC.
- */
-
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <asm/unistd.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/thread_info.h>
-#include <asm/code-patching-asm.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-#include <asm/firmware.h>
-#include <asm/bug.h>
-#include <asm/ptrace.h>
-#include <asm/irqflags.h>
-#include <asm/hw_irq.h>
-#include <asm/context_tracking.h>
-#include <asm/tm.h>
-#include <asm/ppc-opcode.h>
-#include <asm/barrier.h>
-#include <asm/export.h>
-#include <asm/asm-compat.h>
-#ifdef CONFIG_PPC_BOOK3S
-#include <asm/exception-64s.h>
-#else
-#include <asm/exception-64e.h>
-#endif
-#include <asm/feature-fixups.h>
-#include <asm/kup.h>
-
-/*
- * System calls.
- */
- .section ".toc","aw"
-SYS_CALL_TABLE:
- .tc sys_call_table[TC],sys_call_table
-
-COMPAT_SYS_CALL_TABLE:
- .tc compat_sys_call_table[TC],compat_sys_call_table
-
-/* This value is used to mark exception frames on the stack. */
-exception_marker:
- .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
-
- .section ".text"
- .align 7
-
- .globl system_call_common
-system_call_common:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
- extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
- bne .Ltabort_syscall
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
-#endif
- mr r10,r1
- ld r1,PACAKSAVE(r13)
- std r10,0(r1)
- std r11,_NIP(r1)
- std r12,_MSR(r1)
- std r0,GPR0(r1)
- std r10,GPR1(r1)
-#ifdef CONFIG_PPC_FSL_BOOK3E
-START_BTB_FLUSH_SECTION
- BTB_FLUSH(r10)
-END_BTB_FLUSH_SECTION
-#endif
- ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
- std r2,GPR2(r1)
- std r3,GPR3(r1)
- mfcr r2
- std r4,GPR4(r1)
- std r5,GPR5(r1)
- std r6,GPR6(r1)
- std r7,GPR7(r1)
- std r8,GPR8(r1)
- li r11,0
- std r11,GPR9(r1)
- std r11,GPR10(r1)
- std r11,GPR11(r1)
- std r11,GPR12(r1)
- std r11,_XER(r1)
- std r11,_CTR(r1)
- std r9,GPR13(r1)
- mflr r10
- /*
- * This clears CR0.SO (bit 28), which is the error indication on
- * return from this system call.
- */
- rldimi r2,r11,28,(63-28)
- li r11,0xc01
- std r10,_LINK(r1)
- std r11,_TRAP(r1)
- std r3,ORIG_GPR3(r1)
- std r2,_CCR(r1)
- ld r2,PACATOC(r13)
- addi r9,r1,STACK_FRAME_OVERHEAD
- ld r11,exception_marker@toc(r2)
- std r11,-16(r9) /* "regshere" marker */
-
- kuap_check_amr r10, r11
-
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
-BEGIN_FW_FTR_SECTION
- /* see if there are any DTL entries to process */
- ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
- ld r11,PACA_DTL_RIDX(r13) /* get log read index */
- addi r10,r10,LPPACA_DTLIDX
- LDX_BE r10,0,r10 /* get log write index */
- cmpd r11,r10
- beq+ 33f
- bl accumulate_stolen_time
- REST_GPR(0,r1)
- REST_4GPRS(3,r1)
- REST_2GPRS(7,r1)
- addi r9,r1,STACK_FRAME_OVERHEAD
-33:
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
-
- /*
- * A syscall should always be called with interrupts enabled
- * so we just unconditionally hard-enable here. When some kind
- * of irq tracing is used, we additionally check that condition
- * is correct
- */
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
- lbz r10,PACAIRQSOFTMASK(r13)
-1: tdnei r10,IRQS_ENABLED
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- li r11,MSR_RI
- ori r11,r11,MSR_EE
- mtmsrd r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
-system_call: /* label this so stack traces look sane */
- /* We do need to set SOFTE in the stack frame or the return
- * from interrupt will be painful
- */
- li r10,IRQS_ENABLED
- std r10,SOFTE(r1)
-
- ld r11, PACA_THREAD_INFO(r13)
- ld r10,TI_FLAGS(r11)
- andi. r11,r10,_TIF_SYSCALL_DOTRACE
- bne .Lsyscall_dotrace /* does not return */
- cmpldi 0,r0,NR_syscalls
- bge- .Lsyscall_enosys
-
-.Lsyscall:
-/*
- * Need to vector to 32 Bit or default sys_call_table here,
- * based on caller's run-mode / personality.
- */
- ld r11,SYS_CALL_TABLE@toc(2)
- andis. r10,r10,_TIF_32BIT@h
- beq 15f
- ld r11,COMPAT_SYS_CALL_TABLE@toc(2)
- clrldi r3,r3,32
- clrldi r4,r4,32
- clrldi r5,r5,32
- clrldi r6,r6,32
- clrldi r7,r7,32
- clrldi r8,r8,32
-15:
- slwi r0,r0,3
-
- barrier_nospec_asm
- /*
- * Prevent the load of the handler below (based on the user-passed
- * system call number) being speculatively executed until the test
- * against NR_syscalls and branch to .Lsyscall_enosys above has
- * committed.
- */
-
- ldx r12,r11,r0 /* Fetch system call handler [ptr] */
- mtctr r12
- bctrl /* Call handler */
-
- /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
-.Lsyscall_exit:
- std r3,RESULT(r1)
-
-#ifdef CONFIG_DEBUG_RSEQ
- /* Check whether the syscall is issued inside a restartable sequence */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl rseq_syscall
- ld r3,RESULT(r1)
-#endif
-
- ld r12, PACA_THREAD_INFO(r13)
-
- ld r8,_MSR(r1)
-
-/*
- * This is a few instructions into the actual syscall exit path (which actually
- * starts at .Lsyscall_exit) to cater to kprobe blacklisting and to reduce the
- * number of visible symbols for profiling purposes.
- *
- * We can probe from system_call until this point as MSR_RI is set. But once it
- * is cleared below, we won't be able to take a trap.
- *
- * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
- */
-system_call_exit:
- /*
- * Disable interrupts so current_thread_info()->flags can't change,
- * and so that we don't get interrupted after loading SRR0/1.
- *
- * Leave MSR_RI enabled for now, because with THREAD_INFO_IN_TASK we
- * could fault on the load of the TI_FLAGS below.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r11,MSR_RI
- mtmsrd r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
- ld r9,TI_FLAGS(r12)
- li r11,-MAX_ERRNO
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
- bne- .Lsyscall_exit_work
-
- andi. r0,r8,MSR_FP
- beq 2f
-#ifdef CONFIG_ALTIVEC
- andis. r0,r8,MSR_VEC@h
- bne 3f
-#endif
-2: addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_math
- ld r8,_MSR(r1)
- ld r3,RESULT(r1)
- li r11,-MAX_ERRNO
-
-3: cmpld r3,r11
- ld r5,_CCR(r1)
- bge- .Lsyscall_error
-.Lsyscall_error_cont:
- ld r7,_NIP(r1)
-BEGIN_FTR_SECTION
- stdcx. r0,0,r1 /* to clear the reservation */
-END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- andi. r6,r8,MSR_PR
- ld r4,_LINK(r1)
-
- kuap_check_amr r10, r11
-
-#ifdef CONFIG_PPC_BOOK3S
- /*
- * Clear MSR_RI, MSR_EE is already and remains disabled. We could do
- * this later, but testing shows that doing it here causes less slow
- * down than doing it closer to the rfid.
- */
- li r11,0
- mtmsrd r11,1
-#endif
-
- beq- 1f
- ACCOUNT_CPU_USER_EXIT(r13, r11, r12)
-
-BEGIN_FTR_SECTION
- HMT_MEDIUM_LOW
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- std r8, PACATMSCRATCH(r13)
-#endif
-
- /*
- * We don't need to restore AMR on the way back to userspace for KUAP.
- * The value of AMR only matters while we're in the kernel.
- */
- ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
- ld r2,GPR2(r1)
- ld r1,GPR1(r1)
- mtlr r4
- mtcr r5
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r8
- RFI_TO_USER
- b . /* prevent speculative execution */
-
-1: /* exit to kernel */
- kuap_restore_amr r2
-
- ld r2,GPR2(r1)
- ld r1,GPR1(r1)
- mtlr r4
- mtcr r5
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r8
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
-.Lsyscall_error:
- oris r5,r5,0x1000 /* Set SO bit in CR */
- neg r3,r3
- std r5,_CCR(r1)
- b .Lsyscall_error_cont
-
-/* Traced system call support */
-.Lsyscall_dotrace:
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_enter
-
- /*
- * We use the return value of do_syscall_trace_enter() as the syscall
- * number. If the syscall was rejected for any reason do_syscall_trace_enter()
- * returns an invalid syscall number and the test below against
- * NR_syscalls will fail.
- */
- mr r0,r3
-
- /* Restore argument registers just clobbered and/or possibly changed. */
- ld r3,GPR3(r1)
- ld r4,GPR4(r1)
- ld r5,GPR5(r1)
- ld r6,GPR6(r1)
- ld r7,GPR7(r1)
- ld r8,GPR8(r1)
-
- /* Repopulate r9 and r10 for the syscall path */
- addi r9,r1,STACK_FRAME_OVERHEAD
- ld r10, PACA_THREAD_INFO(r13)
- ld r10,TI_FLAGS(r10)
-
- cmpldi r0,NR_syscalls
- blt+ .Lsyscall
-
- /* Return code is already in r3 thanks to do_syscall_trace_enter() */
- b .Lsyscall_exit
-
-
-.Lsyscall_enosys:
- li r3,-ENOSYS
- b .Lsyscall_exit
-
-.Lsyscall_exit_work:
- /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
- If TIF_NOERROR is set, just save r3 as it is. */
-
- andi. r0,r9,_TIF_RESTOREALL
- beq+ 0f
- REST_NVGPRS(r1)
- b 2f
-0: cmpld r3,r11 /* r11 is -MAX_ERRNO */
- blt+ 1f
- andi. r0,r9,_TIF_NOERROR
- bne- 1f
- ld r5,_CCR(r1)
- neg r3,r3
- oris r5,r5,0x1000 /* Set SO bit in CR */
- std r5,_CCR(r1)
-1: std r3,GPR3(r1)
-2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
- beq 4f
-
- /* Clear per-syscall TIF flags if any are set. */
-
- li r11,_TIF_PERSYSCALL_MASK
- addi r12,r12,TI_FLAGS
-3: ldarx r10,0,r12
- andc r10,r10,r11
- stdcx. r10,0,r12
- bne- 3b
- subi r12,r12,TI_FLAGS
-
-4: /* Anything else left to do? */
-BEGIN_FTR_SECTION
- lis r3,DEFAULT_PPR@highest /* Set default PPR */
- sldi r3,r3,32 /* bits 11-13 are used for ppr */
- std r3,_PPR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
- beq ret_from_except_lite
-
- /* Re-enable interrupts */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- li r10,MSR_RI
- ori r10,r10,MSR_EE
- mtmsrd r10,1
-#endif /* CONFIG_PPC_BOOK3E */
-
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_leave
- b ret_from_except
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-.Ltabort_syscall:
- /* Firstly we need to enable TM in the kernel */
- mfmsr r10
- li r9, 1
- rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG
- mtmsrd r10, 0
-
- /* tabort, this dooms the transaction, nothing else */
- li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
- TABORT(R9)
-
- /*
- * Return directly to userspace. We have corrupted user register state,
- * but userspace will never see that register state. Execution will
- * resume after the tbegin of the aborted transaction with the
- * checkpointed register state.
- */
- li r9, MSR_RI
- andc r10, r10, r9
- mtmsrd r10, 1
- mtspr SPRN_SRR0, r11
- mtspr SPRN_SRR1, r12
- RFI_TO_USER
- b . /* prevent speculative execution */
-#endif
-_ASM_NOKPROBE_SYMBOL(system_call_common);
-_ASM_NOKPROBE_SYMBOL(system_call_exit);
-
-/* Save non-volatile GPRs, if not already saved. */
-_GLOBAL(save_nvgprs)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- beqlr-
- SAVE_NVGPRS(r1)
- clrrdi r0,r11,1
- std r0,_TRAP(r1)
- blr
-_ASM_NOKPROBE_SYMBOL(save_nvgprs);
-
-
-/*
- * The sigsuspend and rt_sigsuspend system calls can call do_signal
- * and thus put the process into the stopped state where we might
- * want to examine its user state with ptrace. Therefore we need
- * to save all the nonvolatile registers (r14 - r31) before calling
- * the C code. Similarly, fork, vfork and clone need the full
- * register state on the stack so that it can be copied to the child.
- */
-
-_GLOBAL(ppc_fork)
- bl save_nvgprs
- bl sys_fork
- b .Lsyscall_exit
-
-_GLOBAL(ppc_vfork)
- bl save_nvgprs
- bl sys_vfork
- b .Lsyscall_exit
-
-_GLOBAL(ppc_clone)
- bl save_nvgprs
- bl sys_clone
- b .Lsyscall_exit
-
-_GLOBAL(ppc_clone3)
- bl save_nvgprs
- bl sys_clone3
- b .Lsyscall_exit
-
-_GLOBAL(ppc32_swapcontext)
- bl save_nvgprs
- bl compat_sys_swapcontext
- b .Lsyscall_exit
-
-_GLOBAL(ppc64_swapcontext)
- bl save_nvgprs
- bl sys_swapcontext
- b .Lsyscall_exit
-
-_GLOBAL(ppc_switch_endian)
- bl save_nvgprs
- bl sys_switch_endian
- b .Lsyscall_exit
-
-_GLOBAL(ret_from_fork)
- bl schedule_tail
- REST_NVGPRS(r1)
- li r3,0
- b .Lsyscall_exit
-
-_GLOBAL(ret_from_kernel_thread)
- bl schedule_tail
- REST_NVGPRS(r1)
- mtlr r14
- mr r3,r15
-#ifdef PPC64_ELF_ABI_v2
- mr r12,r14
-#endif
- blrl
- li r3,0
- b .Lsyscall_exit
-
-#ifdef CONFIG_PPC_BOOK3S_64
-
-#define FLUSH_COUNT_CACHE \
-1: nop; \
- patch_site 1b, patch__call_flush_count_cache
-
-
-#define BCCTR_FLUSH .long 0x4c400420
-
-.macro nops number
- .rept \number
- nop
- .endr
-.endm
-
-.balign 32
-.global flush_count_cache
-flush_count_cache:
- /* Save LR into r9 */
- mflr r9
-
- // Flush the link stack
- .rept 64
- bl .+4
- .endr
- b 1f
- nops 6
-
- .balign 32
- /* Restore LR */
-1: mtlr r9
-
- // If we're just flushing the link stack, return here
-3: nop
- patch_site 3b patch__flush_link_stack_return
-
- li r9,0x7fff
- mtctr r9
-
- BCCTR_FLUSH
-
-2: nop
- patch_site 2b patch__flush_count_cache_return
-
- nops 3
-
- .rept 278
- .balign 32
- BCCTR_FLUSH
- nops 7
- .endr
-
- blr
-#else
-#define FLUSH_COUNT_CACHE
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-/*
- * This routine switches between two different tasks. The process
- * state of one is saved on its kernel stack. Then the state
- * of the other is restored from its kernel stack. The memory
- * management hardware is updated to the second process's state.
- * Finally, we can return to the second process, via ret_from_except.
- * On entry, r3 points to the THREAD for the current task, r4
- * points to the THREAD for the new task.
- *
- * Note: there are two ways to get to the "going out" portion
- * of this code; either by coming in via the entry (_switch)
- * or via "fork" which must set up an environment equivalent
- * to the "_switch" path. If you change this you'll have to change
- * the fork code also.
- *
- * The code which creates the new task context is in 'copy_thread'
- * in arch/powerpc/kernel/process.c
- */
- .align 7
-_GLOBAL(_switch)
- mflr r0
- std r0,16(r1)
- stdu r1,-SWITCH_FRAME_SIZE(r1)
- /* r3-r13 are caller saved -- Cort */
- SAVE_8GPRS(14, r1)
- SAVE_10GPRS(22, r1)
- std r0,_NIP(r1) /* Return to switch caller */
- mfcr r23
- std r23,_CCR(r1)
- std r1,KSP(r3) /* Set old stack pointer */
-
- kuap_check_amr r9, r10
-
- FLUSH_COUNT_CACHE
-
- /*
- * On SMP kernels, care must be taken because a task may be
- * scheduled off CPUx and on to CPUy. Memory ordering must be
- * considered.
- *
- * Cacheable stores on CPUx will be visible when the task is
- * scheduled on CPUy by virtue of the core scheduler barriers
- * (see "Notes on Program-Order guarantees on SMP systems." in
- * kernel/sched/core.c).
- *
- * Uncacheable stores in the case of involuntary preemption must
- * be taken care of. The smp_mb__before_spin_lock() in __schedule()
- * is implemented as hwsync on powerpc, which orders MMIO too. So
- * long as there is an hwsync in the context switch path, it will
- * be executed on the source CPU after the task has performed
- * all MMIO ops on that CPU, and on the destination CPU before the
- * task performs any MMIO ops there.
- */
-
- /*
- * The kernel context switch path must contain a spin_lock,
- * which contains larx/stcx, which will clear any reservation
- * of the task being switched.
- */
-#ifdef CONFIG_PPC_BOOK3S
-/* Cancel all explict user streams as they will have no use after context
- * switch and will stop the HW from creating streams itself
- */
- DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
-#endif
-
- addi r6,r4,-THREAD /* Convert THREAD to 'current' */
- std r6,PACACURRENT(r13) /* Set new 'current' */
-#if defined(CONFIG_STACKPROTECTOR)
- ld r6, TASK_CANARY(r6)
- std r6, PACA_CANARY(r13)
-#endif
-
- ld r8,KSP(r4) /* new stack pointer */
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_MMU_FTR_SECTION
- b 2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
-BEGIN_FTR_SECTION
- clrrdi r6,r8,28 /* get its ESID */
- clrrdi r9,r1,28 /* get current sp ESID */
-FTR_SECTION_ELSE
- clrrdi r6,r8,40 /* get its 1T ESID */
- clrrdi r9,r1,40 /* get current sp 1T ESID */
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_1T_SEGMENT)
- clrldi. r0,r6,2 /* is new ESID c00000000? */
- cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */
- cror eq,4*cr1+eq,eq
- beq 2f /* if yes, don't slbie it */
-
- /* Bolt in the new stack SLB entry */
- ld r7,KSP_VSID(r4) /* Get new stack's VSID */
- oris r0,r6,(SLB_ESID_V)@h
- ori r0,r0,(SLB_NUM_BOLTED-1)@l
-BEGIN_FTR_SECTION
- li r9,MMU_SEGSIZE_1T /* insert B field */
- oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
- rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-
- /* Update the last bolted SLB. No write barriers are needed
- * here, provided we only update the current CPU's SLB shadow
- * buffer.
- */
- ld r9,PACA_SLBSHADOWPTR(r13)
- li r12,0
- std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
- li r12,SLBSHADOW_STACKVSID
- STDX_BE r7,r12,r9 /* Save VSID */
- li r12,SLBSHADOW_STACKESID
- STDX_BE r0,r12,r9 /* Save ESID */
-
- /* No need to check for MMU_FTR_NO_SLBIE_B here, since when
- * we have 1TB segments, the only CPUs known to have the errata
- * only support less than 1TB of system memory and we'll never
- * actually hit this code path.
- */
-
- isync
- slbie r6
-BEGIN_FTR_SECTION
- slbie r6 /* Workaround POWER5 < DD2.1 issue */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
- slbmte r7,r0
- isync
-2:
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
- clrrdi r7, r8, THREAD_SHIFT /* base of new stack */
- /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
- because we don't need to leave the 288-byte ABI gap at the
- top of the kernel stack. */
- addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
-
- /*
- * PMU interrupts in radix may come in here. They will use r1, not
- * PACAKSAVE, so this stack switch will not cause a problem. They
- * will store to the process stack, which may then be migrated to
- * another CPU. However the rq lock release on this CPU paired with
- * the rq lock acquire on the new CPU before the stack becomes
- * active on the new CPU, will order those stores.
- */
- mr r1,r8 /* start using new stack pointer */
- std r7,PACAKSAVE(r13)
-
- ld r6,_CCR(r1)
- mtcrf 0xFF,r6
-
- /* r3-r13 are destroyed -- Cort */
- REST_8GPRS(14, r1)
- REST_10GPRS(22, r1)
-
- /* convert old thread to its task_struct for return value */
- addi r3,r3,-THREAD
- ld r7,_NIP(r1) /* Return to _switch caller in new task */
- mtlr r7
- addi r1,r1,SWITCH_FRAME_SIZE
- blr
-
- .align 7
-_GLOBAL(ret_from_except)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- bne ret_from_except_lite
- REST_NVGPRS(r1)
-
-_GLOBAL(ret_from_except_lite)
- /*
- * Disable interrupts so that current_thread_info()->flags
- * can't change between when we test it and when we return
- * from the interrupt.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r10,MSR_RI
- mtmsrd r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-
- ld r9, PACA_THREAD_INFO(r13)
- ld r3,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3E
- ld r10,PACACURRENT(r13)
-#endif /* CONFIG_PPC_BOOK3E */
- ld r4,TI_FLAGS(r9)
- andi. r3,r3,MSR_PR
- beq resume_kernel
-#ifdef CONFIG_PPC_BOOK3E
- lwz r3,(THREAD+THREAD_DBCR0)(r10)
-#endif /* CONFIG_PPC_BOOK3E */
-
- /* Check current_thread_info()->flags */
- andi. r0,r4,_TIF_USER_WORK_MASK
- bne 1f
-#ifdef CONFIG_PPC_BOOK3E
- /*
- * Check to see if the dbcr0 register is set up to debug.
- * Use the internal debug mode bit to do this.
- */
- andis. r0,r3,DBCR0_IDM@h
- beq restore
- mfmsr r0
- rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
- mtmsr r0
- mtspr SPRN_DBCR0,r3
- li r10, -1
- mtspr SPRN_DBSR,r10
- b restore
-#else
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_math
- b restore
-#endif
-1: andi. r0,r4,_TIF_NEED_RESCHED
- beq 2f
- bl restore_interrupts
- SCHEDULE_USER
- b ret_from_except_lite
-2:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
- bne 3f /* only restore TM if nothing else to do */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_tm_state
- b restore
-3:
-#endif
- bl save_nvgprs
- /*
- * Use a non volatile GPR to save and restore our thread_info flags
- * across the call to restore_interrupts.
- */
- mr r30,r4
- bl restore_interrupts
- mr r4,r30
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_notify_resume
- b ret_from_except
-
-resume_kernel:
- /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
- andis. r8,r4,_TIF_EMULATE_STACK_STORE@h
- beq+ 1f
-
- addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
-
- ld r3,GPR1(r1)
- subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
- mr r4,r1 /* src: current exception frame */
- mr r1,r3 /* Reroute the trampoline frame to r1 */
-
- /* Copy from the original to the trampoline. */
- li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */
- li r6,0 /* start offset: 0 */
- mtctr r5
-2: ldx r0,r6,r4
- stdx r0,r6,r3
- addi r6,r6,8
- bdnz 2b
-
- /* Do real store operation to complete stdu */
- ld r5,GPR1(r1)
- std r8,0(r5)
-
- /* Clear _TIF_EMULATE_STACK_STORE flag */
- lis r11,_TIF_EMULATE_STACK_STORE@h
- addi r5,r9,TI_FLAGS
-0: ldarx r4,0,r5
- andc r4,r4,r11
- stdcx. r4,0,r5
- bne- 0b
-1:
-
-#ifdef CONFIG_PREEMPT
- /* Check if we need to preempt */
- andi. r0,r4,_TIF_NEED_RESCHED
- beq+ restore
- /* Check that preempt_count() == 0 and interrupts are enabled */
- lwz r8,TI_PREEMPT(r9)
- cmpwi cr0,r8,0
- bne restore
- ld r0,SOFTE(r1)
- andi. r0,r0,IRQS_DISABLED
- bne restore
-
- /*
- * Here we are preempting the current task. We want to make
- * sure we are soft-disabled first and reconcile irq state.
- */
- RECONCILE_IRQ_STATE(r3,r4)
- bl preempt_schedule_irq
-
- /*
- * arch_local_irq_restore() from preempt_schedule_irq above may
- * enable hard interrupt but we really should disable interrupts
- * when we return from the interrupt, and so that we don't get
- * interrupted after loading SRR0/1.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r10,MSR_RI
- mtmsrd r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-#endif /* CONFIG_PREEMPT */
-
- .globl fast_exc_return_irq
-fast_exc_return_irq:
-restore:
- /*
- * This is the main kernel exit path. First we check if we
- * are about to re-enable interrupts
- */
- ld r5,SOFTE(r1)
- lbz r6,PACAIRQSOFTMASK(r13)
- andi. r5,r5,IRQS_DISABLED
- bne .Lrestore_irq_off
-
- /* We are enabling, were we already enabled ? Yes, just return */
- andi. r6,r6,IRQS_DISABLED
- beq cr0,.Ldo_restore
-
- /*
- * We are about to soft-enable interrupts (we are hard disabled
- * at this point). We check if there's anything that needs to
- * be replayed first.
- */
- lbz r0,PACAIRQHAPPENED(r13)
- cmpwi cr0,r0,0
- bne- .Lrestore_check_irq_replay
-
- /*
- * Get here when nothing happened while soft-disabled, just
- * soft-enable and move-on. We will hard-enable as a side
- * effect of rfi
- */
-.Lrestore_no_replay:
- TRACE_ENABLE_INTS
- li r0,IRQS_ENABLED
- stb r0,PACAIRQSOFTMASK(r13);
-
- /*
- * Final return path. BookE is handled in a different file
- */
-.Ldo_restore:
-#ifdef CONFIG_PPC_BOOK3E
- b exception_return_book3e
-#else
- /*
- * Clear the reservation. If we know the CPU tracks the address of
- * the reservation then we can potentially save some cycles and use
- * a larx. On POWER6 and POWER7 this is significantly faster.
- */
-BEGIN_FTR_SECTION
- stdcx. r0,0,r1 /* to clear the reservation */
-FTR_SECTION_ELSE
- ldarx r4,0,r1
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-
- /*
- * Some code path such as load_up_fpu or altivec return directly
- * here. They run entirely hard disabled and do not alter the
- * interrupt state. They also don't use lwarx/stwcx. and thus
- * are known not to leave dangling reservations.
- */
- .globl fast_exception_return
-fast_exception_return:
- ld r3,_MSR(r1)
- ld r4,_CTR(r1)
- ld r0,_LINK(r1)
- mtctr r4
- mtlr r0
- ld r4,_XER(r1)
- mtspr SPRN_XER,r4
-
- kuap_check_amr r5, r6
-
- REST_8GPRS(5, r1)
-
- andi. r0,r3,MSR_RI
- beq- .Lunrecov_restore
-
- /*
- * Clear RI before restoring r13. If we are returning to
- * userspace and we take an exception after restoring r13,
- * we end up corrupting the userspace r13 value.
- */
- li r4,0
- mtmsrd r4,1
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* TM debug */
- std r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */
-#endif
- /*
- * r13 is our per cpu area, only restore it if we are returning to
- * userspace the value stored in the stack frame may belong to
- * another CPU.
- */
- andi. r0,r3,MSR_PR
- beq 1f
-BEGIN_FTR_SECTION
- /* Restore PPR */
- ld r2,_PPR(r1)
- mtspr SPRN_PPR,r2
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
- ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
- REST_GPR(13, r1)
-
- /*
- * We don't need to restore AMR on the way back to userspace for KUAP.
- * The value of AMR only matters while we're in the kernel.
- */
- mtspr SPRN_SRR1,r3
-
- ld r2,_CCR(r1)
- mtcrf 0xFF,r2
- ld r2,_NIP(r1)
- mtspr SPRN_SRR0,r2
-
- ld r0,GPR0(r1)
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r4,GPR4(r1)
- ld r1,GPR1(r1)
- RFI_TO_USER
- b . /* prevent speculative execution */
-
-1: mtspr SPRN_SRR1,r3
-
- ld r2,_CCR(r1)
- mtcrf 0xFF,r2
- ld r2,_NIP(r1)
- mtspr SPRN_SRR0,r2
-
- /*
- * Leaving a stale exception_marker on the stack can confuse
- * the reliable stack unwinder later on. Clear it.
- */
- li r2,0
- std r2,STACK_FRAME_OVERHEAD-16(r1)
-
- ld r0,GPR0(r1)
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
-
- kuap_restore_amr r4
-
- ld r4,GPR4(r1)
- ld r1,GPR1(r1)
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
-#endif /* CONFIG_PPC_BOOK3E */
-
- /*
- * We are returning to a context with interrupts soft disabled.
- *
- * However, we may also about to hard enable, so we need to
- * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
- * or that bit can get out of sync and bad things will happen
- */
-.Lrestore_irq_off:
- ld r3,_MSR(r1)
- lbz r7,PACAIRQHAPPENED(r13)
- andi. r0,r3,MSR_EE
- beq 1f
- rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
- stb r7,PACAIRQHAPPENED(r13)
-1:
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
- /* The interrupt should not have soft enabled. */
- lbz r7,PACAIRQSOFTMASK(r13)
-1: tdeqi r7,IRQS_ENABLED
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
- b .Ldo_restore
-
- /*
- * Something did happen, check if a re-emit is needed
- * (this also clears paca->irq_happened)
- */
-.Lrestore_check_irq_replay:
- /* XXX: We could implement a fast path here where we check
- * for irq_happened being just 0x01, in which case we can
- * clear it and return. That means that we would potentially
- * miss a decrementer having wrapped all the way around.
- *
- * Still, this might be useful for things like hash_page
- */
- bl __check_irq_replay
- cmpwi cr0,r3,0
- beq .Lrestore_no_replay
-
- /*
- * We need to re-emit an interrupt. We do so by re-using our
- * existing exception frame. We first change the trap value,
- * but we need to ensure we preserve the low nibble of it
- */
- ld r4,_TRAP(r1)
- clrldi r4,r4,60
- or r4,r4,r3
- std r4,_TRAP(r1)
-
- /*
- * PACA_IRQ_HARD_DIS won't always be set here, so set it now
- * to reconcile the IRQ state. Tracing is already accounted for.
- */
- lbz r4,PACAIRQHAPPENED(r13)
- ori r4,r4,PACA_IRQ_HARD_DIS
- stb r4,PACAIRQHAPPENED(r13)
-
- /*
- * Then find the right handler and call it. Interrupts are
- * still soft-disabled and we keep them that way.
- */
- cmpwi cr0,r3,0x500
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl do_IRQ
- b ret_from_except
-1: cmpwi cr0,r3,0xf00
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl performance_monitor_exception
- b ret_from_except
-1: cmpwi cr0,r3,0xe60
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl handle_hmi_exception
- b ret_from_except
-1: cmpwi cr0,r3,0x900
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl timer_interrupt
- b ret_from_except
-#ifdef CONFIG_PPC_DOORBELL
-1:
-#ifdef CONFIG_PPC_BOOK3E
- cmpwi cr0,r3,0x280
-#else
- cmpwi cr0,r3,0xa00
-#endif /* CONFIG_PPC_BOOK3E */
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl doorbell_exception
-#endif /* CONFIG_PPC_DOORBELL */
-1: b ret_from_except /* What else to do here ? */
-
-.Lunrecov_restore:
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b .Lunrecov_restore
-
-_ASM_NOKPROBE_SYMBOL(ret_from_except);
-_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
-_ASM_NOKPROBE_SYMBOL(resume_kernel);
-_ASM_NOKPROBE_SYMBOL(fast_exc_return_irq);
-_ASM_NOKPROBE_SYMBOL(restore);
-_ASM_NOKPROBE_SYMBOL(fast_exception_return);
-
-
-#ifdef CONFIG_PPC_RTAS
-/*
- * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
- * called with the MMU off.
- *
- * In addition, we need to be in 32b mode, at least for now.
- *
- * Note: r3 is an input parameter to rtas, so don't trash it...
- */
-_GLOBAL(enter_rtas)
- mflr r0
- std r0,16(r1)
- stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
-
- /* Because RTAS is running in 32b mode, it clobbers the high order half
- * of all registers that it saves. We therefore save those registers
- * RTAS might touch to the stack. (r0, r3-r13 are caller saved)
- */
- SAVE_GPR(2, r1) /* Save the TOC */
- SAVE_GPR(13, r1) /* Save paca */
- SAVE_8GPRS(14, r1) /* Save the non-volatiles */
- SAVE_10GPRS(22, r1) /* ditto */
-
- mfcr r4
- std r4,_CCR(r1)
- mfctr r5
- std r5,_CTR(r1)
- mfspr r6,SPRN_XER
- std r6,_XER(r1)
- mfdar r7
- std r7,_DAR(r1)
- mfdsisr r8
- std r8,_DSISR(r1)
-
- /* Temporary workaround to clear CR until RTAS can be modified to
- * ignore all bits.
- */
- li r0,0
- mtcr r0
-
-#ifdef CONFIG_BUG
- /* There is no way it is acceptable to get here with interrupts enabled,
- * check it with the asm equivalent of WARN_ON
- */
- lbz r0,PACAIRQSOFTMASK(r13)
-1: tdeqi r0,IRQS_ENABLED
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-
- /* Hard-disable interrupts */
- mfmsr r6
- rldicl r7,r6,48,1
- rotldi r7,r7,16
- mtmsrd r7,1
-
- /* Unfortunately, the stack pointer and the MSR are also clobbered,
- * so they are saved in the PACA which allows us to restore
- * our original state after RTAS returns.
- */
- std r1,PACAR1(r13)
- std r6,PACASAVEDMSR(r13)
-
- /* Setup our real return addr */
- LOAD_REG_ADDR(r4,rtas_return_loc)
- clrldi r4,r4,2 /* convert to realmode address */
- mtlr r4
-
- li r0,0
- ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI
- andc r0,r6,r0
-
- li r9,1
- rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
- ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
- andc r6,r0,r9
-
-__enter_rtas:
- sync /* disable interrupts so SRR0/1 */
- mtmsrd r0 /* don't get trashed */
-
- LOAD_REG_ADDR(r4, rtas)
- ld r5,RTASENTRY(r4) /* get the rtas->entry value */
- ld r4,RTASBASE(r4) /* get the rtas->base value */
-
- mtspr SPRN_SRR0,r5
- mtspr SPRN_SRR1,r6
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
-rtas_return_loc:
- FIXUP_ENDIAN
-
- /*
- * Clear RI and set SF before anything.
- */
- mfmsr r6
- li r0,MSR_RI
- andc r6,r6,r0
- sldi r0,r0,(MSR_SF_LG - MSR_RI_LG)
- or r6,r6,r0
- sync
- mtmsrd r6
-
- /* relocation is off at this point */
- GET_PACA(r4)
- clrldi r4,r4,2 /* convert to realmode address */
-
- bcl 20,31,$+4
-0: mflr r3
- ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */
-
- ld r1,PACAR1(r4) /* Restore our SP */
- ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
-
- mtspr SPRN_SRR0,r3
- mtspr SPRN_SRR1,r4
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-_ASM_NOKPROBE_SYMBOL(__enter_rtas)
-_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
-
- .align 3
-1: .8byte rtas_restore_regs
-
-rtas_restore_regs:
- /* relocation is on at this point */
- REST_GPR(2, r1) /* Restore the TOC */
- REST_GPR(13, r1) /* Restore paca */
- REST_8GPRS(14, r1) /* Restore the non-volatiles */
- REST_10GPRS(22, r1) /* ditto */
-
- GET_PACA(r13)
-
- ld r4,_CCR(r1)
- mtcr r4
- ld r5,_CTR(r1)
- mtctr r5
- ld r6,_XER(r1)
- mtspr SPRN_XER,r6
- ld r7,_DAR(r1)
- mtdar r7
- ld r8,_DSISR(r1)
- mtdsisr r8
-
- addi r1,r1,SWITCH_FRAME_SIZE /* Unstack our frame */
- ld r0,16(r1) /* get return address */
-
- mtlr r0
- blr /* return to caller */
-
-#endif /* CONFIG_PPC_RTAS */
-
-_GLOBAL(enter_prom)
- mflr r0
- std r0,16(r1)
- stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
-
- /* Because PROM is running in 32b mode, it clobbers the high order half
- * of all registers that it saves. We therefore save those registers
- * PROM might touch to the stack. (r0, r3-r13 are caller saved)
- */
- SAVE_GPR(2, r1)
- SAVE_GPR(13, r1)
- SAVE_8GPRS(14, r1)
- SAVE_10GPRS(22, r1)
- mfcr r10
- mfmsr r11
- std r10,_CCR(r1)
- std r11,_MSR(r1)
-
- /* Put PROM address in SRR0 */
- mtsrr0 r4
-
- /* Setup our trampoline return addr in LR */
- bcl 20,31,$+4
-0: mflr r4
- addi r4,r4,(1f - 0b)
- mtlr r4
-
- /* Prepare a 32-bit mode big endian MSR
- */
-#ifdef CONFIG_PPC_BOOK3E
- rlwinm r11,r11,0,1,31
- mtsrr1 r11
- rfi
-#else /* CONFIG_PPC_BOOK3E */
- LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
- andc r11,r11,r12
- mtsrr1 r11
- RFI_TO_KERNEL
-#endif /* CONFIG_PPC_BOOK3E */
-
-1: /* Return from OF */
- FIXUP_ENDIAN
-
- /* Just make sure that r1 top 32 bits didn't get
- * corrupt by OF
- */
- rldicl r1,r1,0,32
-
- /* Restore the MSR (back to 64 bits) */
- ld r0,_MSR(r1)
- MTMSRD(r0)
- isync
-
- /* Restore other registers */
- REST_GPR(2, r1)
- REST_GPR(13, r1)
- REST_8GPRS(14, r1)
- REST_10GPRS(22, r1)
- ld r4,_CCR(r1)
- mtcr r4
-
- addi r1,r1,SWITCH_FRAME_SIZE
- ld r0,16(r1)
- mtlr r0
- blr
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
index 69a912550577..1a9b5ae8ccb2 100644
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -3,6 +3,7 @@
* Copyright (C) 2012 Freescale Semiconductor, Inc.
*/
+#include <linux/export.h>
#include <linux/threads.h>
#include <asm/epapr_hcalls.h>
#include <asm/reg.h>
@@ -12,7 +13,6 @@
#include <asm/ppc_asm.h>
#include <asm/asm-compat.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#ifndef CONFIG_PPC64
/* epapr_ev_idle() was derived from e500_idle() */
@@ -21,7 +21,13 @@ _GLOBAL(epapr_ev_idle)
ori r4, r4,_TLF_NAPPING /* so when we take an exception */
PPC_STL r4, TI_LOCAL_FLAGS(r2) /* it will return to our caller */
+#ifdef CONFIG_BOOKE_OR_40x
wrteei 1
+#else
+ mfmsr r4
+ ori r4, r4, MSR_EE
+ mtmsr r4
+#endif
idle_loop:
LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 9d32158ce36f..d4b8aff20815 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -11,6 +11,7 @@
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
#include <asm/machdep.h>
+#include <asm/inst.h>
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
extern void epapr_ev_idle(void);
@@ -36,7 +37,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
return -1;
for (i = 0; i < (len / 4); i++) {
- u32 inst = be32_to_cpu(insts[i]);
+ ppc_inst_t inst = ppc_inst(be32_to_cpu(insts[i]));
patch_instruction(epapr_hypercall_start + i, inst);
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
patch_instruction(epapr_ev_idle_start + i, inst);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index e4076e3c072d..dcf0591ad3c2 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -5,6 +5,7 @@
* Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
*/
+#include <linux/linkage.h>
#include <linux/threads.h>
#include <asm/reg.h>
#include <asm/page.h>
@@ -13,7 +14,6 @@
#include <asm/cputable.h>
#include <asm/setup.h>
#include <asm/thread_info.h>
-#include <asm/reg_a2.h>
#include <asm/exception-64e.h>
#include <asm/bug.h>
#include <asm/irqflags.h>
@@ -24,6 +24,11 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
#include <asm/feature-fixups.h>
+#include <asm/context_tracking.h>
+
+/* 64e interrupt returns always use SRR registers */
+#define fast_interrupt_return fast_interrupt_return_srr
+#define interrupt_return interrupt_return_srr
/* XXX This will ultimately add space for a special exception save
* structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
@@ -61,10 +66,7 @@
#define SPECIAL_EXC_LOAD(reg, name) \
ld reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
-special_reg_save:
- lbz r9,PACAIRQHAPPENED(r13)
- RECONCILE_IRQ_STATE(r3,r4)
-
+SYM_CODE_START_LOCAL(special_reg_save)
/*
* We only need (or have stack space) to save this stuff if
* we interrupted the kernel.
@@ -118,27 +120,25 @@ BEGIN_FTR_SECTION
mtspr SPRN_MAS5,r10
mtspr SPRN_MAS8,r10
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
- SPECIAL_EXC_STORE(r9,IRQHAPPENED)
-
mfspr r10,SPRN_DEAR
SPECIAL_EXC_STORE(r10,DEAR)
mfspr r10,SPRN_ESR
SPECIAL_EXC_STORE(r10,ESR)
- lbz r10,PACAIRQSOFTMASK(r13)
- SPECIAL_EXC_STORE(r10,SOFTE)
ld r10,_NIP(r1)
SPECIAL_EXC_STORE(r10,CSRR0)
ld r10,_MSR(r1)
SPECIAL_EXC_STORE(r10,CSRR1)
blr
+SYM_CODE_END(special_reg_save)
-ret_from_level_except:
+SYM_CODE_START_LOCAL(ret_from_level_except)
ld r3,_MSR(r1)
andi. r3,r3,MSR_PR
beq 1f
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
1:
LOAD_REG_ADDR(r11,extlb_level_exc)
@@ -192,27 +192,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_MAS8,r10
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
- lbz r6,PACAIRQSOFTMASK(r13)
- ld r5,SOFTE(r1)
-
- /* Interrupts had better not already be enabled... */
- tweqi r6,IRQS_ENABLED
-
- andi. r6,r5,IRQS_DISABLED
- bne 1f
-
- TRACE_ENABLE_INTS
- stb r5,PACAIRQSOFTMASK(r13)
-1:
- /*
- * Restore PACAIRQHAPPENED rather than setting it based on
- * the return MSR[EE], since we could have interrupted
- * __check_irq_replay() or other inconsistent transitory
- * states that must remain that way.
- */
- SPECIAL_EXC_LOAD(r10,IRQHAPPENED)
- stb r10,PACAIRQHAPPENED(r13)
-
SPECIAL_EXC_LOAD(r10,DEAR)
mtspr SPRN_DEAR,r10
SPECIAL_EXC_LOAD(r10,ESR)
@@ -220,8 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
stdcx. r0,0,r1 /* to clear the reservation */
- REST_4GPRS(2, r1)
- REST_4GPRS(6, r1)
+ REST_GPRS(2, 9, r1)
ld r10,_CTR(r1)
ld r11,_XER(r1)
@@ -229,6 +207,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mtxer r11
blr
+SYM_CODE_END(ret_from_level_except)
.macro ret_from_level srr0 srr1 paca_ex scratch
bl ret_from_level_except
@@ -239,17 +218,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mtlr r10
mtcr r11
- ld r10,GPR10(r1)
- ld r11,GPR11(r1)
- ld r12,GPR12(r1)
+ REST_GPRS(10, 12, r1)
mtspr \scratch,r0
std r10,\paca_ex+EX_R10(r13);
std r11,\paca_ex+EX_R11(r13);
ld r10,_NIP(r1)
ld r11,_MSR(r1)
- ld r0,GPR0(r1)
- ld r1,GPR1(r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
mtspr \srr0,r10
mtspr \srr1,r11
ld r10,\paca_ex+EX_R10(r13)
@@ -257,13 +234,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfspr r13,\scratch
.endm
-ret_from_crit_except:
+SYM_CODE_START_LOCAL(ret_from_crit_except)
ret_from_level SPRN_CSRR0 SPRN_CSRR1 PACA_EXCRIT SPRN_SPRG_CRIT_SCRATCH
rfci
+SYM_CODE_END(ret_from_crit_except)
-ret_from_mc_except:
+SYM_CODE_START_LOCAL(ret_from_mc_except)
ret_from_level SPRN_MCSRR0 SPRN_MCSRR1 PACA_EXMC SPRN_SPRG_MC_SCRATCH
rfmci
+SYM_CODE_END(ret_from_mc_except)
/* Exception prolog code for all exceptions */
#define EXCEPTION_PROLOG(n, intnum, type, addition) \
@@ -314,7 +293,6 @@ ret_from_mc_except:
#define SPRN_MC_SRR0 SPRN_MCSRR0
#define SPRN_MC_SRR1 SPRN_MCSRR1
-#ifdef CONFIG_PPC_FSL_BOOK3E
#define GEN_BTB_FLUSH \
START_BTB_FLUSH_SECTION \
beq 1f; \
@@ -330,13 +308,6 @@ ret_from_mc_except:
#define DBG_BTB_FLUSH CRIT_BTB_FLUSH
#define MC_BTB_FLUSH CRIT_BTB_FLUSH
#define GDBELL_BTB_FLUSH GEN_BTB_FLUSH
-#else
-#define GEN_BTB_FLUSH
-#define CRIT_BTB_FLUSH
-#define DBG_BTB_FLUSH
-#define MC_BTB_FLUSH
-#define GDBELL_BTB_FLUSH
-#endif
#define NORMAL_EXCEPTION_PROLOG(n, intnum, addition) \
EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n))
@@ -366,6 +337,12 @@ ret_from_mc_except:
andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \
bne masked_interrupt_book3e_##n
+/*
+ * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is
+ * called, because that does SAVE_NVGPRS which must see the original register
+ * values, otherwise the scratch values might be restored when exiting the
+ * interrupt.
+ */
#define PROLOG_ADDITION_2REGS_GEN(n) \
std r14,PACA_EXGEN+EX_R14(r13); \
std r15,PACA_EXGEN+EX_R15(r13)
@@ -385,46 +362,43 @@ ret_from_mc_except:
std r14,PACA_EXMC+EX_R14(r13); \
std r15,PACA_EXMC+EX_R15(r13)
-
/* Core exception code for all exceptions except TLB misses. */
#define EXCEPTION_COMMON_LVL(n, scratch, excf) \
exc_##n##_common: \
- std r0,GPR0(r1); /* save r0 in stackframe */ \
- std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- std r9,GPR9(r1); /* save r9 in stackframe */ \
+ SAVE_GPR(0, r1); /* save r0 in stackframe */ \
+ SAVE_GPRS(2, 9, r1); /* save r2 - r9 in stackframe */ \
std r10,_NIP(r1); /* save SRR0 to stackframe */ \
std r11,_MSR(r1); /* save SRR1 to stackframe */ \
beq 2f; /* if from kernel mode */ \
- ACCOUNT_CPU_USER_ENTRY(r13,r10,r11);/* accounting (uses cr0+eq) */ \
2: ld r3,excf+EX_R10(r13); /* get back r10 */ \
ld r4,excf+EX_R11(r13); /* get back r11 */ \
mfspr r5,scratch; /* get back r13 */ \
- std r12,GPR12(r1); /* save r12 in stackframe */ \
- ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
+ SAVE_GPR(12, r1); /* save r12 in stackframe */ \
+ LOAD_PACA_TOC(); /* get kernel TOC into r2 */ \
mflr r6; /* save LR in stackframe */ \
mfctr r7; /* save CTR in stackframe */ \
mfspr r8,SPRN_XER; /* save XER in stackframe */ \
ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \
lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \
lbz r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */ \
- ld r12,exception_marker@toc(r2); \
- li r0,0; \
+ LOAD_REG_IMMEDIATE(r12, STACK_FRAME_REGS_MARKER); \
+ ZEROIZE_GPR(0); \
std r3,GPR10(r1); /* save r10 to stackframe */ \
std r4,GPR11(r1); /* save r11 to stackframe */ \
std r5,GPR13(r1); /* save it to stackframe */ \
std r6,_LINK(r1); \
std r7,_CTR(r1); \
std r8,_XER(r1); \
- li r3,(n)+1; /* indicate partial regs in trap */ \
+ li r3,(n); /* regs.trap vector */ \
std r9,0(r1); /* store stack frame back link */ \
std r10,_CCR(r1); /* store orig CR in stackframe */ \
std r9,GPR1(r1); /* store stack frame back link */ \
std r11,SOFTE(r1); /* and save it to stackframe */ \
- std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
+ std r12,STACK_INT_FRAME_MARKER(r1); /* mark the frame */ \
std r3,_TRAP(r1); /* set trap number */ \
- std r0,RESULT(r1); /* clear regs->result */
+ std r0,RESULT(r1); /* clear regs->result */ \
+ SAVE_NVGPRS(r1); \
+ SANITIZE_NVGPRS(); /* minimise speculation influence */
#define EXCEPTION_COMMON(n) \
EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
@@ -435,28 +409,6 @@ exc_##n##_common: \
#define EXCEPTION_COMMON_DBG(n) \
EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG)
-/*
- * This is meant for exceptions that don't immediately hard-enable. We
- * set a bit in paca->irq_happened to ensure that a subsequent call to
- * arch_local_irq_restore() will properly hard-enable and avoid the
- * fast-path, and then reconcile irq state.
- */
-#define INTS_DISABLE RECONCILE_IRQ_STATE(r3,r4)
-
-/*
- * This is called by exceptions that don't use INTS_DISABLE (that did not
- * touch irq indicators in the PACA). This will restore MSR:EE to it's
- * previous value
- *
- * XXX In the long run, we may want to open-code it in order to separate the
- * load from the wrtee, thus limiting the latency caused by the dependency
- * but at this point, I'll favor code clarity until we have a near to final
- * implementation
- */
-#define INTS_RESTORE_HARD \
- ld r11,_MSR(r1); \
- wrtee r11;
-
/* XXX FIXME: Restore r14/r15 when necessary */
#define BAD_STACK_TRAMPOLINE(n) \
exc_##n##_bad_stack: \
@@ -505,18 +457,11 @@ exc_##n##_bad_stack: \
START_EXCEPTION(label); \
NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
EXCEPTION_COMMON(trapnum) \
- INTS_DISABLE; \
ack(r8); \
CHECK_NAPPING(); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
+ addi r3,r1,STACK_INT_FRAME_REGS; \
bl hdlr; \
- b ret_from_except_lite;
-
-/* This value is used to mark exception frames on the stack. */
- .section ".toc","aw"
-exception_marker:
- .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
-
+ b interrupt_return
/*
* And here we have the exception vectors !
@@ -561,11 +506,10 @@ __end_interrupts:
CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_CRIT(0x100)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unknown_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl unknown_nmi_exception
b ret_from_crit_except
/* Machine Check Interrupt */
@@ -573,10 +517,9 @@ __end_interrupts:
MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_MC(0x000)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl machine_check_exception
b ret_from_mc_except
@@ -586,8 +529,11 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
+ std r14,_DEAR(r1)
+ std r15,_ESR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x300)
- INTS_DISABLE
b storage_fault_common
/* Instruction Storage Interrupt */
@@ -596,8 +542,11 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
li r15,0
mr r14,r10
+ std r14,_DEAR(r1)
+ std r15,_ESR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x400)
- INTS_DISABLE
b storage_fault_common
/* External Input Interrupt */
@@ -610,6 +559,10 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
+ std r14,_DEAR(r1)
+ std r15,_ESR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x600)
b alignment_more /* no room, go out of line */
@@ -618,14 +571,13 @@ __end_interrupts:
NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
PROLOG_ADDITION_1REG)
mfspr r14,SPRN_ESR
- EXCEPTION_COMMON(0x700)
- INTS_DISABLE
- std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ std r14,_ESR(r1)
ld r14,PACA_EXGEN+EX_R14(r13)
- bl save_nvgprs
+ EXCEPTION_COMMON(0x700)
+ addi r3,r1,STACK_INT_FRAME_REGS
bl program_check_exception
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Floating Point Unavailable Interrupt */
START_EXCEPTION(fp_unavailable);
@@ -637,12 +589,10 @@ __end_interrupts:
andi. r0,r12,MSR_PR;
beq- 1f
bl load_up_fpu
- b fast_exception_return
-1: INTS_DISABLE
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+ b fast_interrupt_return
+1: addi r3,r1,STACK_INT_FRAME_REGS
bl kernel_fp_unavailable_exception
- b ret_from_except
+ b interrupt_return
/* Altivec Unavailable Interrupt */
START_EXCEPTION(altivec_unavailable);
@@ -656,15 +606,13 @@ BEGIN_FTR_SECTION
andi. r0,r12,MSR_PR;
beq- 1f
bl load_up_altivec
- b fast_exception_return
+ b fast_interrupt_return
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- INTS_DISABLE
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl altivec_unavailable_exception
- b ret_from_except
+ b interrupt_return
/* AltiVec Assist */
START_EXCEPTION(altivec_assist);
@@ -672,17 +620,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
BOOKE_INTERRUPT_ALTIVEC_ASSIST,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x220)
- INTS_DISABLE
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
bl altivec_assist_exception
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ REST_NVGPRS(r1)
#else
bl unknown_exception
#endif
- b ret_from_except
+ b interrupt_return
/* Decrementer Interrupt */
@@ -698,14 +645,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_CRIT(0x9f0)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_BOOKE_WDT
bl WatchdogException
#else
- bl unknown_exception
+ bl unknown_nmi_exception
#endif
b ret_from_crit_except
@@ -722,11 +668,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0xf20)
- INTS_DISABLE
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
- b ret_from_except
+ b interrupt_return
/* Debug exception as a critical interrupt*/
START_EXCEPTION(debug_crit);
@@ -747,9 +691,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
beq+ 1f
#ifdef CONFIG_RELOCATABLE
- ld r15,PACATOC(r13)
- ld r14,interrupt_base_book3e@got(r15)
- ld r15,__end_interrupts@got(r15)
+ __LOAD_PACA_TOC(r15)
+ LOAD_REG_ADDR_ALTTOC(r14, r15, interrupt_base_book3e)
+ LOAD_REG_ADDR_ALTTOC(r15, r15, __end_interrupts)
cmpld cr0,r10,r14
cmpld cr1,r10,r15
#else
@@ -787,15 +731,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
* normal exception
*/
mfspr r14,SPRN_DBSR
- EXCEPTION_COMMON_CRIT(0xd00)
std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r14
ld r14,PACA_EXCRIT+EX_R14(r13)
ld r15,PACA_EXCRIT+EX_R15(r13)
- bl save_nvgprs
+ EXCEPTION_COMMON_CRIT(0xd00)
+ addi r3,r1,STACK_INT_FRAME_REGS
bl DebugException
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
kernel_dbg_exc:
b . /* NYI */
@@ -819,9 +762,9 @@ kernel_dbg_exc:
beq+ 1f
#ifdef CONFIG_RELOCATABLE
- ld r15,PACATOC(r13)
- ld r14,interrupt_base_book3e@got(r15)
- ld r15,__end_interrupts@got(r15)
+ __LOAD_PACA_TOC(r15)
+ LOAD_REG_ADDR_ALTTOC(r14, r15, interrupt_base_book3e)
+ LOAD_REG_ADDR_ALTTOC(r15, r15, __end_interrupts)
cmpld cr0,r10,r14
cmpld cr1,r10,r15
#else
@@ -859,26 +802,30 @@ kernel_dbg_exc:
* normal exception
*/
mfspr r14,SPRN_DBSR
- EXCEPTION_COMMON_DBG(0xd08)
- INTS_DISABLE
std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r14
ld r14,PACA_EXDBG+EX_R14(r13)
ld r15,PACA_EXDBG+EX_R15(r13)
- bl save_nvgprs
+ EXCEPTION_COMMON_DBG(0xd08)
+ addi r3,r1,STACK_INT_FRAME_REGS
bl DebugException
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
START_EXCEPTION(perfmon);
NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x260)
- INTS_DISABLE
CHECK_NAPPING()
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
+ /*
+ * XXX: Returning from performance_monitor_exception taken as a
+ * soft-NMI (Linux irqs disabled) may be risky to use interrupt_return
+ * and could cause bugs in return or elsewhere. That case should just
+ * restore registers and return. There is a workaround for one known
+ * problem in interrupt_exit_kernel_prepare().
+ */
bl performance_monitor_exception
- b ret_from_except_lite
+ b interrupt_return
/* Doorbell interrupt */
MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
@@ -889,11 +836,10 @@ kernel_dbg_exc:
CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_CRIT(0x2a0)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unknown_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl unknown_nmi_exception
b ret_from_crit_except
/*
@@ -904,22 +850,19 @@ kernel_dbg_exc:
GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x2c0)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl save_nvgprs
- INTS_RESTORE_HARD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
- b ret_from_except
+ b interrupt_return
/* Guest Doorbell critical Interrupt */
START_EXCEPTION(guest_doorbell_crit);
CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_CRIT(0x2e0)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unknown_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl unknown_nmi_exception
b ret_from_crit_except
/* Hypervisor call */
@@ -927,33 +870,55 @@ kernel_dbg_exc:
NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x310)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl save_nvgprs
- INTS_RESTORE_HARD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
- b ret_from_except
+ b interrupt_return
/* Embedded Hypervisor priviledged */
START_EXCEPTION(ehpriv);
NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x320)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl save_nvgprs
- INTS_RESTORE_HARD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
- b ret_from_except
+ b interrupt_return
/* LRAT Error interrupt */
START_EXCEPTION(lrat_error);
NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x340)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl save_nvgprs
- INTS_RESTORE_HARD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
- b ret_from_except
+ b interrupt_return
+
+.macro SEARCH_RESTART_TABLE
+#ifdef CONFIG_RELOCATABLE
+ __LOAD_PACA_TOC(r11)
+ LOAD_REG_ADDR_ALTTOC(r14, r11, __start___restart_table)
+ LOAD_REG_ADDR_ALTTOC(r15, r11, __stop___restart_table)
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table)
+ LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table)
+#endif
+300:
+ cmpd r14,r15
+ beq 302f
+ ld r11,0(r14)
+ cmpld r10,r11
+ blt 301f
+ ld r11,8(r14)
+ cmpld r10,r11
+ bge 301f
+ ld r11,16(r14)
+ b 303f
+301:
+ addi r14,r14,24
+ b 300b
+302:
+ li r11,0
+303:
+.endm
/*
* An interrupt came in while soft-disabled; We mark paca->irq_happened
@@ -963,6 +928,9 @@ kernel_dbg_exc:
*/
.macro masked_interrupt_book3e paca_irq full_mask
+ std r14,PACA_EXGEN+EX_R14(r13)
+ std r15,PACA_EXGEN+EX_R15(r13)
+
lbz r10,PACAIRQHAPPENED(r13)
.if \full_mask == 1
ori r10,r10,\paca_irq | PACA_IRQ_HARD_DIS
@@ -972,22 +940,29 @@ kernel_dbg_exc:
stb r10,PACAIRQHAPPENED(r13)
.if \full_mask == 1
- rldicl r10,r11,48,1 /* clear MSR_EE */
- rotldi r11,r10,16
+ xori r11,r11,MSR_EE /* clear MSR_EE */
mtspr SPRN_SRR1,r11
.endif
+ mfspr r10,SPRN_SRR0
+ SEARCH_RESTART_TABLE
+ cmpdi r11,0
+ beq 1f
+ mtspr SPRN_SRR0,r11 /* return to restart address */
+1:
+
lwz r11,PACA_EXGEN+EX_CR(r13)
mtcr r11
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
mfspr r13,SPRN_SPRG_GEN_SCRATCH
rfi
b .
.endm
masked_interrupt_book3e_0x500:
- // XXX When adding support for EPR, use PACA_IRQ_EE_EDGE
masked_interrupt_book3e PACA_IRQ_EE 1
masked_interrupt_book3e_0x900:
@@ -1003,126 +978,26 @@ masked_interrupt_book3e_0x2c0:
masked_interrupt_book3e PACA_IRQ_DBELL 0
/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280
- * to indicate the kind of interrupt. MSR:EE is already off.
- * We generate a stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
- */
-_GLOBAL(__replay_interrupt)
- /* We are going to jump to the exception common code which
- * will retrieve various register values from the PACA which
- * we don't give a damn about.
- */
- mflr r10
- mfmsr r11
- mfcr r4
- mtspr SPRN_SPRG_GEN_SCRATCH,r13;
- std r1,PACA_EXGEN+EX_R1(r13);
- stw r4,PACA_EXGEN+EX_CR(r13);
- ori r11,r11,MSR_EE
- subi r1,r1,INT_FRAME_SIZE;
- cmpwi cr0,r3,0x500
- beq exc_0x500_common
- cmpwi cr0,r3,0x900
- beq exc_0x900_common
- cmpwi cr0,r3,0x280
- beq exc_0x280_common
- blr
-
-
-/*
* This is called from 0x300 and 0x400 handlers after the prologs with
* r14 and r15 containing the fault address and error code, with the
* original values stashed away in the PACA
*/
-storage_fault_common:
- std r14,_DAR(r1)
- std r15,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r14
- mr r5,r15
- ld r14,PACA_EXGEN+EX_R14(r13)
- ld r15,PACA_EXGEN+EX_R15(r13)
+SYM_CODE_START_LOCAL(storage_fault_common)
+ addi r3,r1,STACK_INT_FRAME_REGS
bl do_page_fault
- cmpdi r3,0
- bne- 1f
- b ret_from_except_lite
-1: bl save_nvgprs
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- ld r4,_DAR(r1)
- bl bad_page_fault
- b ret_from_except
+ b interrupt_return
+SYM_CODE_END(storage_fault_common)
/*
* Alignment exception doesn't fit entirely in the 0x100 bytes so it
* continues here.
*/
-alignment_more:
- std r14,_DAR(r1)
- std r15,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- ld r14,PACA_EXGEN+EX_R14(r13)
- ld r15,PACA_EXGEN+EX_R15(r13)
- bl save_nvgprs
- INTS_RESTORE_HARD
+SYM_CODE_START_LOCAL(alignment_more)
+ addi r3,r1,STACK_INT_FRAME_REGS
bl alignment_exception
- b ret_from_except
-
-/*
- * We branch here from entry_64.S for the last stage of the exception
- * return code path. MSR:EE is expected to be off at that point
- */
-_GLOBAL(exception_return_book3e)
- b 1f
-
-/* This is the return from load_up_fpu fast path which could do with
- * less GPR restores in fact, but for now we have a single return path
- */
- .globl fast_exception_return
-fast_exception_return:
- wrteei 0
-1: mr r0,r13
- ld r10,_MSR(r1)
- REST_4GPRS(2, r1)
- andi. r6,r10,MSR_PR
- REST_2GPRS(6, r1)
- beq 1f
- ACCOUNT_CPU_USER_EXIT(r13, r10, r11)
- ld r0,GPR13(r1)
-
-1: stdcx. r0,0,r1 /* to clear the reservation */
-
- ld r8,_CCR(r1)
- ld r9,_LINK(r1)
- ld r10,_CTR(r1)
- ld r11,_XER(r1)
- mtcr r8
- mtlr r9
- mtctr r10
- mtxer r11
- REST_2GPRS(8, r1)
- ld r10,GPR10(r1)
- ld r11,GPR11(r1)
- ld r12,GPR12(r1)
- mtspr SPRN_SPRG_GEN_SCRATCH,r0
-
- std r10,PACA_EXGEN+EX_R10(r13);
- std r11,PACA_EXGEN+EX_R11(r13);
- ld r10,_NIP(r1)
- ld r11,_MSR(r1)
- ld r0,GPR0(r1)
- ld r1,GPR1(r1)
- mtspr SPRN_SRR0,r10
- mtspr SPRN_SRR1,r11
- ld r10,PACA_EXGEN+EX_R10(r13)
- ld r11,PACA_EXGEN+EX_R11(r13)
- mfspr r13,SPRN_SPRG_GEN_SCRATCH
- rfi
+ REST_NVGPRS(r1)
+ b interrupt_return
+SYM_CODE_END(alignment_more)
/*
* Trampolines used when spotting a bad kernel stack pointer in
@@ -1161,8 +1036,7 @@ BAD_STACK_TRAMPOLINE(0xe00)
BAD_STACK_TRAMPOLINE(0xf00)
BAD_STACK_TRAMPOLINE(0xf20)
- .globl bad_stack_book3e
-bad_stack_book3e:
+_GLOBAL(bad_stack_book3e)
/* XXX: Needs to make SPRN_SPRG_GEN depend on exception type */
mfspr r10,SPRN_SRR0; /* read SRR0 before touching stack */
ld r1,PACAEMERGSP(r13)
@@ -1175,19 +1049,16 @@ bad_stack_book3e:
std r11,_CCR(r1)
mfspr r10,SPRN_DEAR
mfspr r11,SPRN_ESR
- std r10,_DAR(r1)
- std r11,_DSISR(r1)
- std r0,GPR0(r1); /* save r0 in stackframe */ \
- std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- std r9,GPR9(r1); /* save r9 in stackframe */ \
+ std r10,_DEAR(r1)
+ std r11,_ESR(r1)
+ SAVE_GPR(0, r1); /* save r0 in stackframe */ \
+ SAVE_GPRS(2, 9, r1); /* save r2 - r9 in stackframe */ \
ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \
ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \
mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \
std r3,GPR10(r1); /* save r10 to stackframe */ \
std r4,GPR11(r1); /* save r11 to stackframe */ \
- std r12,GPR12(r1); /* save r12 in stackframe */ \
+ SAVE_GPR(12, r1); /* save r12 in stackframe */ \
std r5,GPR13(r1); /* save it to stackframe */ \
mflr r10
mfctr r11
@@ -1195,16 +1066,15 @@ bad_stack_book3e:
std r10,_LINK(r1)
std r11,_CTR(r1)
std r12,_XER(r1)
- SAVE_10GPRS(14,r1)
- SAVE_8GPRS(24,r1)
+ SAVE_NVGPRS(r1)
lhz r12,PACA_TRAP_SAVE(r13)
std r12,_TRAP(r1)
addi r11,r1,INT_FRAME_SIZE
std r11,0(r1)
- li r12,0
+ ZEROIZE_GPR(12)
std r12,0(r11)
- ld r2,PACATOC(r13)
-1: addi r3,r1,STACK_FRAME_OVERHEAD
+ LOAD_PACA_TOC()
+1: addi r3,r1,STACK_INT_FRAME_REGS
bl kernel_bad_stack
b 1b
@@ -1245,7 +1115,7 @@ found_iprot:
* r3 = MAS0_TLBSEL (for the iprot array)
* r4 = SPRN_TLBnCFG
*/
- bl invstr /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
invstr: mflr r6 /* Make it accessible */
mfmsr r7
rlwinm r5,r7,27,31,31 /* extract MSR[IS] */
@@ -1314,7 +1184,7 @@ skpinv: addi r6,r6,1 /* Increment */
mfmsr r6
xori r6,r6,MSR_IS
mtspr SPRN_SRR1,r6
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r6
addi r6,r6,(2f - 1b)
mtspr SPRN_SRR0,r6
@@ -1374,7 +1244,7 @@ skpinv: addi r6,r6,1 /* Increment */
* r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
*/
/* Now we branch the new virtual address mapped by this entry */
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r6
addi r6,r6,(2f - 1b)
tovirt(r6,r6)
@@ -1420,8 +1290,7 @@ have_hes:
* ever takes any parameters, the SCOM code must also be updated to
* provide them.
*/
- .globl a2_tlbinit_code_start
-a2_tlbinit_code_start:
+_GLOBAL(a2_tlbinit_code_start)
ori r11,r3,MAS0_WQ_ALLWAYS
oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */
@@ -1443,7 +1312,12 @@ a2_tlbinit_code_start:
a2_tlbinit_after_linear_map:
/* Now we branch the new virtual address mapped by this entry */
+#ifdef CONFIG_RELOCATABLE
+ __LOAD_PACA_TOC(r5)
+ LOAD_REG_ADDR_ALTTOC(r3, r5, 1f)
+#else
LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
+#endif
mtctr r3
bctr
@@ -1609,8 +1483,7 @@ _GLOBAL(book3e_secondary_thread_init)
mflr r28
b 3b
- .globl init_core_book3e
-init_core_book3e:
+_GLOBAL(init_core_book3e)
/* Establish the interrupt vector base */
tovirt(r2,r2)
LOAD_REG_ADDR(r3, interrupt_base_book3e)
@@ -1618,7 +1491,7 @@ init_core_book3e:
sync
blr
-init_thread_book3e:
+SYM_CODE_START_LOCAL(init_thread_book3e)
lis r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h
mtspr SPRN_EPCR,r3
@@ -1632,6 +1505,7 @@ init_thread_book3e:
mtspr SPRN_TSR,r3
blr
+SYM_CODE_END(init_thread_book3e)
_GLOBAL(__setup_base_ivors)
SET_IVOR(0, 0x020) /* Critical Input */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 46508b148e16..eaf2f167c342 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -13,6 +13,7 @@
*
*/
+#include <linux/linkage.h>
#include <asm/hw_irq.h>
#include <asm/exception-64s.h>
#include <asm/ptrace.h>
@@ -21,28 +22,6 @@
#include <asm/feature-fixups.h>
#include <asm/kup.h>
-/* PACA save area offsets (exgen, exmc, etc) */
-#define EX_R9 0
-#define EX_R10 8
-#define EX_R11 16
-#define EX_R12 24
-#define EX_R13 32
-#define EX_DAR 40
-#define EX_DSISR 48
-#define EX_CCR 52
-#define EX_CFAR 56
-#define EX_PPR 64
-#if defined(CONFIG_RELOCATABLE)
-#define EX_CTR 72
-.if EX_SIZE != 10
- .error "EX_SIZE is wrong"
-.endif
-#else
-.if EX_SIZE != 9
- .error "EX_SIZE is wrong"
-.endif
-#endif
-
/*
* Following are fixed section helper macros.
*
@@ -50,7 +29,6 @@
* EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors
* TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these)
* TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use)
- * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated
* EXC_COMMON - After switching to virtual, relocated mode.
*/
@@ -71,7 +49,7 @@
.balign IFETCH_ALIGN_BYTES; \
.global name; \
_ASM_NOKPROBE_SYMBOL(name); \
- DEFINE_FIXED_SYMBOL(name); \
+ DEFINE_FIXED_SYMBOL(name, text); \
name:
#define TRAMP_REAL_BEGIN(name) \
@@ -80,13 +58,6 @@ name:
#define TRAMP_VIRT_BEGIN(name) \
FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define TRAMP_KVM_BEGIN(name) \
- TRAMP_VIRT_BEGIN(name)
-#else
-#define TRAMP_KVM_BEGIN(name)
-#endif
-
#define EXC_REAL_NONE(start, size) \
FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
@@ -106,282 +77,161 @@ name:
ld reg,PACAKBASE(r13); /* get high part of &label */ \
ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label)
-#define __LOAD_HANDLER(reg, label) \
+#define __LOAD_HANDLER(reg, label, section) \
ld reg,PACAKBASE(r13); \
- ori reg,reg,(ABS_ADDR(label))@l
+ ori reg,reg,(ABS_ADDR(label, section))@l
/*
* Branches from unrelocated code (e.g., interrupts) to labels outside
* head-y require >64K offsets.
*/
-#define __LOAD_FAR_HANDLER(reg, label) \
+#define __LOAD_FAR_HANDLER(reg, label, section) \
ld reg,PACAKBASE(r13); \
- ori reg,reg,(ABS_ADDR(label))@l; \
- addis reg,reg,(ABS_ADDR(label))@h
-
-/* Exception register prefixes */
-#define EXC_HV_OR_STD 2 /* depends on HVMODE */
-#define EXC_HV 1
-#define EXC_STD 0
-
-#if defined(CONFIG_RELOCATABLE)
-/*
- * If we support interrupts with relocation on AND we're a relocatable kernel,
- * we need to use CTR to get to the 2nd level handler. So, save/restore it
- * when required.
- */
-#define SAVE_CTR(reg, area) mfctr reg ; std reg,area+EX_CTR(r13)
-#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13)
-#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg
-#else
-/* ...else CTR is unused and in register. */
-#define SAVE_CTR(reg, area)
-#define GET_CTR(reg, area) mfctr reg
-#define RESTORE_CTR(reg, area)
-#endif
-
-/*
- * PPR save/restore macros used in exceptions-64s.S
- * Used for P7 or later processors
- */
-#define SAVE_PPR(area, ra) \
-BEGIN_FTR_SECTION_NESTED(940) \
- ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \
- std ra,_PPR(r1); \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
-
-#define RESTORE_PPR_PACA(area, ra) \
-BEGIN_FTR_SECTION_NESTED(941) \
- ld ra,area+EX_PPR(r13); \
- mtspr SPRN_PPR,ra; \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
-
-/*
- * Get an SPR into a register if the CPU has the given feature
- */
-#define OPT_GET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mfspr ra,spr; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Set an SPR from a register if the CPU has the given feature
- */
-#define OPT_SET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mtspr spr,ra; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
+ ori reg,reg,(ABS_ADDR(label, section))@l; \
+ addis reg,reg,(ABS_ADDR(label, section))@h
/*
- * Save a register to the PACA if the CPU has the given feature
+ * Interrupt code generation macros
*/
-#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- std ra,offset(r13); \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
+#define IVEC .L_IVEC_\name\() /* Interrupt vector address */
+#define IHSRR .L_IHSRR_\name\() /* Sets SRR or HSRR registers */
+#define IHSRR_IF_HVMODE .L_IHSRR_IF_HVMODE_\name\() /* HSRR if HV else SRR */
+#define IAREA .L_IAREA_\name\() /* PACA save area */
+#define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */
+#define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */
+#define ICFAR .L_ICFAR_\name\() /* Uses CFAR */
+#define ICFAR_IF_HVMODE .L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV */
+#define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */
+#define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */
+#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
+#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
+#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
+#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */
+#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name
+#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
+#define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */
+#define __ISTACK(name) .L_ISTACK_ ## name
+#define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */
+#define IMSR_R12 .L_IMSR_R12_\name\() /* Assumes MSR saved to r12 */
+
+#define INT_DEFINE_BEGIN(n) \
+.macro int_define_ ## n name
+
+#define INT_DEFINE_END(n) \
+.endm ; \
+int_define_ ## n n ; \
+do_define_int n
+
+.macro do_define_int name
+ .ifndef IVEC
+ .error "IVEC not defined"
+ .endif
+ .ifndef IHSRR
+ IHSRR=0
+ .endif
+ .ifndef IHSRR_IF_HVMODE
+ IHSRR_IF_HVMODE=0
+ .endif
+ .ifndef IAREA
+ IAREA=PACA_EXGEN
+ .endif
+ .ifndef IVIRT
+ IVIRT=1
+ .endif
+ .ifndef IISIDE
+ IISIDE=0
+ .endif
+ .ifndef ICFAR
+ ICFAR=1
+ .endif
+ .ifndef ICFAR_IF_HVMODE
+ ICFAR_IF_HVMODE=0
+ .endif
+ .ifndef IDAR
+ IDAR=0
+ .endif
+ .ifndef IDSISR
+ IDSISR=0
+ .endif
+ .ifndef IBRANCH_TO_COMMON
+ IBRANCH_TO_COMMON=1
+ .endif
+ .ifndef IREALMODE_COMMON
+ IREALMODE_COMMON=0
+ .else
+ .if ! IBRANCH_TO_COMMON
+ .error "IREALMODE_COMMON=1 but IBRANCH_TO_COMMON=0"
+ .endif
+ .endif
+ .ifndef IMASK
+ IMASK=0
+ .endif
+ .ifndef IKVM_REAL
+ IKVM_REAL=0
+ .endif
+ .ifndef IKVM_VIRT
+ IKVM_VIRT=0
+ .endif
+ .ifndef ISTACK
+ ISTACK=1
+ .endif
+ .ifndef IKUAP
+ IKUAP=1
+ .endif
+ .ifndef IMSR_R12
+ IMSR_R12=0
+ .endif
+.endm
/*
- * Branch to label using its 0xC000 address. This results in instruction
- * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
- * on using mtmsr rather than rfid.
+ * All interrupts which set HSRR registers, as well as SRESET and MCE and
+ * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
+ * so they all generally need to test whether they were taken in guest context.
+ *
+ * Note: SRESET and MCE may also be sent to the guest by the hypervisor, and be
+ * taken with MSR[HV]=0.
+ *
+ * Interrupts which set SRR registers (with the above exceptions) do not
+ * elevate to MSR[HV]=1 mode, though most can be taken when running with
+ * MSR[HV]=1 (e.g., bare metal kernel and userspace). So these interrupts do
+ * not need to test whether a guest is running because they get delivered to
+ * the guest directly, including nested HV KVM guests.
*
- * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than
- * load KBASE for a slight optimisation.
+ * The exception is PR KVM, where the guest runs with MSR[PR]=1 and the host
+ * runs with MSR[HV]=0, so the host takes all interrupts on behalf of the
+ * guest. PR KVM runs with LPCR[AIL]=0 which causes interrupts to always be
+ * delivered to the real-mode entry point, therefore such interrupts only test
+ * KVM in their real mode handlers, and only when PR KVM is possible.
+ *
+ * Interrupts that are taken in MSR[HV]=0 and escalate to MSR[HV]=1 are always
+ * delivered in real-mode when the MMU is in hash mode because the MMU
+ * registers are not set appropriately to translate host addresses. In nested
+ * radix mode these can be delivered in virt-mode as the host translations are
+ * used implicitly (see: effective LPID, effective PID).
*/
-#define BRANCH_TO_C000(reg, label) \
- __LOAD_FAR_HANDLER(reg, label); \
- mtctr reg; \
- bctr
-.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
- TRAMP_KVM_BEGIN(\name\()_kvm)
- KVM_HANDLER \vec, \hsrr, \area, \skip
-.endm
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
- * If hv is possible, interrupts come into to the hv version
- * of the kvmppc_interrupt code, which then jumps to the PR handler,
- * kvmppc_interrupt_pr, if the guest is a PR guest.
+ * If an interrupt is taken while a guest is running, it is immediately routed
+ * to KVM to handle.
*/
-#define kvmppc_interrupt kvmppc_interrupt_hv
-#else
-#define kvmppc_interrupt kvmppc_interrupt_pr
-#endif
-.macro KVMTEST name, hsrr, n
+.macro KVMTEST name handler
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,0
- bne \name\()_kvm
-.endm
-
-.macro KVM_HANDLER vec, hsrr, area, skip
- .if \skip
- cmpwi r10,KVM_GUEST_MODE_SKIP
- beq 89f
- .else
-BEGIN_FTR_SECTION_NESTED(947)
- ld r10,\area+EX_CFAR(r13)
- std r10,HSTATE_CFAR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
- .endif
-
-BEGIN_FTR_SECTION_NESTED(948)
- ld r10,\area+EX_PPR(r13)
- std r10,HSTATE_PPR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
- ld r10,\area+EX_R10(r13)
- std r12,HSTATE_SCRATCH0(r13)
- sldi r12,r9,32
/* HSRR variants have the 0x2 bit added to their trap number */
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- ori r12,r12,(\vec + 0x2)
- FTR_SECTION_ELSE
- ori r12,r12,(\vec)
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- ori r12,r12,(\vec + 0x2)
- .else
- ori r12,r12,(\vec)
- .endif
-
-#ifdef CONFIG_RELOCATABLE
- /*
- * KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
- * outside the head section. CONFIG_RELOCATABLE KVM expects CTR
- * to be saved in HSTATE_SCRATCH1.
- */
- mfctr r9
- std r9,HSTATE_SCRATCH1(r13)
- __LOAD_FAR_HANDLER(r9, kvmppc_interrupt)
- mtctr r9
- ld r9,\area+EX_R9(r13)
- bctr
-#else
- ld r9,\area+EX_R9(r13)
- b kvmppc_interrupt
-#endif
-
-
- .if \skip
-89: mtocrf 0x80,r9
- ld r9,\area+EX_R9(r13)
- ld r10,\area+EX_R10(r13)
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- b kvmppc_skip_Hinterrupt
- FTR_SECTION_ELSE
- b kvmppc_skip_interrupt
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- b kvmppc_skip_Hinterrupt
- .else
- b kvmppc_skip_interrupt
- .endif
- .endif
-.endm
-
-#else
-.macro KVMTEST name, hsrr, n
-.endm
-.macro KVM_HANDLER name, vec, hsrr, area, skip
-.endm
-#endif
-
-.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri
- ld r10,PACAKMSR(r13) /* get MSR value for kernel */
- .if ! \set_ri
- xori r10,r10,MSR_RI /* Clear MSR_RI */
- .endif
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- mtspr SPRN_HSRR1,r10
- FTR_SECTION_ELSE
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- mtspr SPRN_SRR1,r10
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- mtspr SPRN_HSRR1,r10
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- mtspr SPRN_SRR1,r10
- .endif
- LOAD_HANDLER(r10, \label\())
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mtspr SPRN_HSRR0,r10
- HRFI_TO_KERNEL
- FTR_SECTION_ELSE
- mtspr SPRN_SRR0,r10
- RFI_TO_KERNEL
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mtspr SPRN_HSRR0,r10
- HRFI_TO_KERNEL
- .else
- mtspr SPRN_SRR0,r10
- RFI_TO_KERNEL
- .endif
- b . /* prevent speculative execution */
-.endm
-
-/* INT_SAVE_SRR_AND_JUMP works for real or virt, this is faster but virt only */
-.macro INT_VIRT_SAVE_SRR_AND_JUMP label, hsrr
-#ifdef CONFIG_RELOCATABLE
- .if \hsrr == EXC_HV_OR_STD
+ .if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ li r10,(IVEC + 0x2)
FTR_SECTION_ELSE
- mfspr r11,SPRN_SRR0 /* save SRR0 */
+ li r10,(IVEC)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ .elseif IHSRR
+ li r10,(IVEC + 0x2)
.else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
+ li r10,(IVEC)
.endif
- LOAD_HANDLER(r12, \label\())
- mtctr r12
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- FTR_SECTION_ELSE
- mfspr r12,SPRN_SRR1 /* and HSRR1 */
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- .else
- mfspr r12,SPRN_SRR1 /* and HSRR1 */
- .endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- bctr
-#else
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- FTR_SECTION_ELSE
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- .endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- b \label
+ bne \handler
#endif
.endm
@@ -405,14 +255,53 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
* - Fall through and continue executing in real, unrelocated mode.
* This is done if early=2.
*/
-.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, ri=1, dar=0, dsisr=0, bitmask=0, kvm=0
+
+.macro GEN_BRANCH_TO_COMMON name, virt
+ .if IREALMODE_COMMON
+ LOAD_HANDLER(r10, \name\()_common)
+ mtctr r10
+ bctr
+ .else
+ .if \virt
+#ifndef CONFIG_RELOCATABLE
+ b \name\()_common_virt
+#else
+ LOAD_HANDLER(r10, \name\()_common_virt)
+ mtctr r10
+ bctr
+#endif
+ .else
+ LOAD_HANDLER(r10, \name\()_common_real)
+ mtctr r10
+ bctr
+ .endif
+ .endif
+.endm
+
+.macro GEN_INT_ENTRY name, virt, ool=0
SET_SCRATCH0(r13) /* save r13 */
GET_PACA(r13)
- std r9,\area\()+EX_R9(r13) /* save r9 */
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
+ std r9,IAREA+EX_R9(r13) /* save r9 */
+BEGIN_FTR_SECTION
+ mfspr r9,SPRN_PPR
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
- std r10,\area\()+EX_R10(r13) /* save r10 - r12 */
- OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+ std r10,IAREA+EX_R10(r13) /* save r10 */
+ .if ICFAR
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_CFAR
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ .elseif ICFAR_IF_HVMODE
+BEGIN_FTR_SECTION
+ BEGIN_FTR_SECTION_NESTED(69)
+ mfspr r10,SPRN_CFAR
+ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(69)
+ li r10,0
+ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .endif
.if \ool
.if !\virt
b tramp_real_\name
@@ -425,47 +314,20 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
.endif
- OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
- OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ std r9,IAREA+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ .if ICFAR || ICFAR_IF_HVMODE
+BEGIN_FTR_SECTION
+ std r10,IAREA+EX_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ .endif
INTERRUPT_TO_KERNEL
- SAVE_CTR(r10, \area\())
+ mfctr r10
+ std r10,IAREA+EX_CTR(r13)
mfcr r9
- .if \kvm
- KVMTEST \name \hsrr \vec
- .endif
- .if \bitmask
- lbz r10,PACAIRQSOFTMASK(r13)
- andi. r10,r10,\bitmask
- /* Associate vector numbers with bits in paca->irq_happened */
- .if \vec == 0x500 || \vec == 0xea0
- li r10,PACA_IRQ_EE
- .elseif \vec == 0x900
- li r10,PACA_IRQ_DEC
- .elseif \vec == 0xa00 || \vec == 0xe80
- li r10,PACA_IRQ_DBELL
- .elseif \vec == 0xe60
- li r10,PACA_IRQ_HMI
- .elseif \vec == 0xf00
- li r10,PACA_IRQ_PMI
- .else
- .abort "Bad maskable vector"
- .endif
-
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- bne masked_Hinterrupt
- FTR_SECTION_ELSE
- bne masked_interrupt
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- bne masked_Hinterrupt
- .else
- bne masked_interrupt
- .endif
- .endif
-
- std r11,\area\()+EX_R11(r13)
- std r12,\area\()+EX_R12(r13)
+ std r11,IAREA+EX_R11(r13) /* save r11 - r12 */
+ std r12,IAREA+EX_R12(r13)
/*
* DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
@@ -473,51 +335,165 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
* not recoverable if they are live.
*/
GET_SCRATCH0(r10)
- std r10,\area\()+EX_R13(r13)
- .if \dar
- .if \hsrr
+ std r10,IAREA+EX_R13(r13)
+ .if IDAR && !IISIDE
+ .if IHSRR
mfspr r10,SPRN_HDAR
.else
mfspr r10,SPRN_DAR
.endif
- std r10,\area\()+EX_DAR(r13)
+ std r10,IAREA+EX_DAR(r13)
.endif
- .if \dsisr
- .if \hsrr
+ .if IDSISR && !IISIDE
+ .if IHSRR
mfspr r10,SPRN_HDSISR
.else
mfspr r10,SPRN_DSISR
.endif
- stw r10,\area\()+EX_DSISR(r13)
+ stw r10,IAREA+EX_DSISR(r13)
.endif
- .if \early == 2
- /* nothing more */
- .elseif \early
- mfctr r10 /* save ctr, even for !RELOCATABLE */
- BRANCH_TO_C000(r11, \name\()_early_common)
- .elseif !\virt
- INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
.else
- INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ .endif
+
+ .if IBRANCH_TO_COMMON
+ GEN_BRANCH_TO_COMMON \name \virt
.endif
+
.if \ool
.popsection
.endif
.endm
/*
- * On entry r13 points to the paca, r9-r13 are saved in the paca,
- * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
- * SRR1, and relocation is on.
+ * __GEN_COMMON_ENTRY is required to receive the branch from interrupt
+ * entry, except in the case of the real-mode handlers which require
+ * __GEN_REALMODE_COMMON_ENTRY.
*
- * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
- * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
+ * This switches to virtual mode and sets MSR[RI].
*/
-.macro INT_COMMON vec, area, stack, kuap, reconcile, dar, dsisr
- .if \stack
+.macro __GEN_COMMON_ENTRY name
+DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
+\name\()_common_real:
+ .if IKVM_REAL
+ KVMTEST \name kvm_interrupt
+ .endif
+
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ /* MSR[RI] is clear iff using SRR regs */
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ xori r10,r10,MSR_RI
+ END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
+ .elseif ! IHSRR
+ xori r10,r10,MSR_RI
+ .endif
+ mtmsrd r10
+
+ .if IVIRT
+ .if IKVM_VIRT
+ b 1f /* skip the virt test coming from real */
+ .endif
+
+ .balign IFETCH_ALIGN_BYTES
+DEFINE_FIXED_SYMBOL(\name\()_common_virt, text)
+\name\()_common_virt:
+ .if IKVM_VIRT
+ KVMTEST \name kvm_interrupt
+1:
+ .endif
+ .endif /* IVIRT */
+.endm
+
+/*
+ * Don't switch to virt mode. Used for early MCE and HMI handlers that
+ * want to run in real mode.
+ */
+.macro __GEN_REALMODE_COMMON_ENTRY name
+DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
+\name\()_common_real:
+ .if IKVM_REAL
+ KVMTEST \name kvm_interrupt
+ .endif
+.endm
+
+.macro __GEN_COMMON_BODY name
+ .if IMASK
+ .if ! ISTACK
+ .error "No support for masked interrupt to use custom stack"
+ .endif
+
+ /* If coming from user, skip soft-mask tests. */
+ andi. r10,r12,MSR_PR
+ bne 3f
+
+ /*
+ * Kernel code running below __end_soft_masked may be
+ * implicitly soft-masked if it is within the regions
+ * in the soft mask table.
+ */
+ LOAD_HANDLER(r10, __end_soft_masked)
+ cmpld r11,r10
+ bge+ 1f
+
+ /* SEARCH_SOFT_MASK_TABLE clobbers r9,r10,r12 */
+ mtctr r12
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ SEARCH_SOFT_MASK_TABLE
+ cmpdi r12,0
+ mfctr r12 /* Restore r12 to SRR1 */
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ beq 1f /* Not in soft-mask table */
+ li r10,IMASK
+ b 2f /* In soft-mask table, always mask */
+
+ /* Test the soft mask state against our interrupt's bit */
+1: lbz r10,PACAIRQSOFTMASK(r13)
+2: andi. r10,r10,IMASK
+ /* Associate vector numbers with bits in paca->irq_happened */
+ .if IVEC == 0x500 || IVEC == 0xea0
+ li r10,PACA_IRQ_EE
+ .elseif IVEC == 0x900
+ li r10,PACA_IRQ_DEC
+ .elseif IVEC == 0xa00 || IVEC == 0xe80
+ li r10,PACA_IRQ_DBELL
+ .elseif IVEC == 0xe60
+ li r10,PACA_IRQ_HMI
+ .elseif IVEC == 0xf00
+ li r10,PACA_IRQ_PMI
+ .else
+ .abort "Bad maskable vector"
+ .endif
+
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ bne masked_Hinterrupt
+ FTR_SECTION_ELSE
+ bne masked_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ bne masked_Hinterrupt
+ .else
+ bne masked_interrupt
+ .endif
+ .endif
+
+ .if ISTACK
andi. r10,r12,MSR_PR /* See if coming from user */
- mr r10,r1 /* Save r1 */
+3: mr r10,r1 /* Save r1 */
subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
beq- 100f
ld r1,PACAKSAVE(r13) /* kernel stack to use */
@@ -531,96 +507,192 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
std r10,0(r1) /* make stack chain pointer */
std r0,GPR0(r1) /* save r0 in stackframe */
std r10,GPR1(r1) /* save r1 in stackframe */
+ SANITIZE_GPR(0)
- .if \stack
- .if \kuap
+ /* Mark our [H]SRRs valid for return */
+ li r10,1
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ stb r10,PACAHSRR_VALID(r13)
+ FTR_SECTION_ELSE
+ stb r10,PACASRR_VALID(r13)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ stb r10,PACAHSRR_VALID(r13)
+ .else
+ stb r10,PACASRR_VALID(r13)
+ .endif
+
+ .if ISTACK
+ .if IKUAP
kuap_save_amr_and_lock r9, r10, cr1, cr0
.endif
beq 101f /* if from kernel mode */
- ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
- SAVE_PPR(\area, r9)
+BEGIN_FTR_SECTION
+ ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */
+ std r9,_PPR(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
101:
.else
- .if \kuap
+ .if IKUAP
kuap_save_amr_and_lock r9, r10, cr1
.endif
.endif
/* Save original regs values from save area to stack frame. */
- ld r9,\area+EX_R9(r13) /* move r9, r10 to stackframe */
- ld r10,\area+EX_R10(r13)
+ ld r9,IAREA+EX_R9(r13) /* move r9, r10 to stackframe */
+ ld r10,IAREA+EX_R10(r13)
std r9,GPR9(r1)
std r10,GPR10(r1)
- ld r9,\area+EX_R11(r13) /* move r11 - r13 to stackframe */
- ld r10,\area+EX_R12(r13)
- ld r11,\area+EX_R13(r13)
+ ld r9,IAREA+EX_R11(r13) /* move r11 - r13 to stackframe */
+ ld r10,IAREA+EX_R12(r13)
+ ld r11,IAREA+EX_R13(r13)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
- .if \dar
- .if \dar == 2
+ .if !IMSR_R12
+ SANITIZE_GPRS(9, 12)
+ .else
+ SANITIZE_GPRS(9, 11)
+ .endif
+
+ SAVE_NVGPRS(r1)
+ SANITIZE_NVGPRS()
+
+ .if IDAR
+ .if IISIDE
ld r10,_NIP(r1)
.else
- ld r10,\area+EX_DAR(r13)
+ ld r10,IAREA+EX_DAR(r13)
.endif
std r10,_DAR(r1)
.endif
- .if \dsisr
- .if \dsisr == 2
+
+ .if IDSISR
+ .if IISIDE
ld r10,_MSR(r1)
lis r11,DSISR_SRR1_MATCH_64S@h
and r10,r10,r11
.else
- lwz r10,\area+EX_DSISR(r13)
+ lwz r10,IAREA+EX_DSISR(r13)
.endif
std r10,_DSISR(r1)
.endif
-BEGIN_FTR_SECTION_NESTED(66)
- ld r10,\area+EX_CFAR(r13)
+
+BEGIN_FTR_SECTION
+ .if ICFAR || ICFAR_IF_HVMODE
+ ld r10,IAREA+EX_CFAR(r13)
+ .else
+ li r10,0
+ .endif
std r10,ORIG_GPR3(r1)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
- GET_CTR(r10, \area)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ ld r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
- std r2,GPR2(r1) /* save r2 in stackframe */
- SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
- SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */
+ SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe */
+ SANITIZE_GPRS(2, 8)
mflr r9 /* Get LR, later save to stack */
- ld r2,PACATOC(r13) /* get kernel TOC into r2 */
+ LOAD_PACA_TOC() /* get kernel TOC into r2 */
std r9,_LINK(r1)
lbz r10,PACAIRQSOFTMASK(r13)
mfspr r11,SPRN_XER /* save XER in stackframe */
std r10,SOFTE(r1)
std r11,_XER(r1)
- li r9,(\vec)+1
+ li r9,IVEC
std r9,_TRAP(r1) /* set trap number */
li r10,0
- ld r11,exception_marker@toc(r2)
+ LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
std r10,RESULT(r1) /* clear regs->result */
- std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */
+ std r11,STACK_INT_FRAME_MARKER(r1) /* mark the frame */
+.endm
- .if \stack
- ACCOUNT_STOLEN_TIME
- .endif
+/*
+ * On entry r13 points to the paca, r9-r13 are saved in the paca,
+ * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
+ * SRR1, and relocation is on.
+ *
+ * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
+ * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
+ */
+.macro GEN_COMMON name
+ __GEN_COMMON_ENTRY \name
+ __GEN_COMMON_BODY \name
+.endm
- .if \reconcile
- RECONCILE_IRQ_STATE(r10, r11)
- .endif
+.macro SEARCH_RESTART_TABLE
+#ifdef CONFIG_RELOCATABLE
+ mr r12,r2
+ LOAD_PACA_TOC()
+ LOAD_REG_ADDR(r9, __start___restart_table)
+ LOAD_REG_ADDR(r10, __stop___restart_table)
+ mr r2,r12
+#else
+ LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___restart_table)
+ LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___restart_table)
+#endif
+300:
+ cmpd r9,r10
+ beq 302f
+ ld r12,0(r9)
+ cmpld r11,r12
+ blt 301f
+ ld r12,8(r9)
+ cmpld r11,r12
+ bge 301f
+ ld r12,16(r9)
+ b 303f
+301:
+ addi r9,r9,24
+ b 300b
+302:
+ li r12,0
+303:
+.endm
+
+.macro SEARCH_SOFT_MASK_TABLE
+#ifdef CONFIG_RELOCATABLE
+ mr r12,r2
+ LOAD_PACA_TOC()
+ LOAD_REG_ADDR(r9, __start___soft_mask_table)
+ LOAD_REG_ADDR(r10, __stop___soft_mask_table)
+ mr r2,r12
+#else
+ LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___soft_mask_table)
+ LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___soft_mask_table)
+#endif
+300:
+ cmpd r9,r10
+ beq 302f
+ ld r12,0(r9)
+ cmpld r11,r12
+ blt 301f
+ ld r12,8(r9)
+ cmpld r11,r12
+ bge 301f
+ li r12,1
+ b 303f
+301:
+ addi r9,r9,16
+ b 300b
+302:
+ li r12,0
+303:
.endm
/*
* Restore all registers including H/SRR0/1 saved in a stack frame of a
* standard exception.
*/
-.macro EXCEPTION_RESTORE_REGS hsrr
+.macro EXCEPTION_RESTORE_REGS hsrr=0
/* Move original SRR0 and SRR1 into the respective regs */
ld r9,_MSR(r1)
- .if \hsrr == EXC_HV_OR_STD
- .error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS"
- .endif
+ li r10,0
.if \hsrr
mtspr SPRN_HSRR1,r9
+ stb r10,PACAHSRR_VALID(r13)
.else
mtspr SPRN_SRR1,r9
+ stb r10,PACASRR_VALID(r13)
.endif
ld r9,_NIP(r1)
.if \hsrr
@@ -636,61 +708,77 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
mtlr r9
ld r9,_CCR(r1)
mtcr r9
- REST_8GPRS(2, r1)
- REST_4GPRS(10, r1)
+ SANITIZE_RESTORE_NVGPRS()
+ REST_GPRS(2, 13, r1)
REST_GPR(0, r1)
/* restore original r1. */
ld r1,GPR1(r1)
.endm
-#define RUNLATCH_ON \
-BEGIN_FTR_SECTION \
- ld r3, PACA_THREAD_INFO(r13); \
- ld r4,TI_LOCAL_FLAGS(r3); \
- andi. r0,r4,_TLF_RUNLATCH; \
- beql ppc64_runlatch_on_trampoline; \
-END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
-
/*
- * When the idle code in power4_idle puts the CPU into NAP mode,
- * it has to do so in a loop, and relies on the external interrupt
- * and decrementer interrupt entry code to get it out of the loop.
- * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
- * to signal that it is in the loop and needs help to get out.
+ * EARLY_BOOT_FIXUP - Fix real-mode interrupt with wrong endian in early boot.
+ *
+ * There's a short window during boot where although the kernel is running
+ * little endian, any exceptions will cause the CPU to switch back to big
+ * endian. For example a WARN() boils down to a trap instruction, which will
+ * cause a program check, and we end up here but with the CPU in big endian
+ * mode. The first instruction of the program check handler (in GEN_INT_ENTRY
+ * below) is an mtsprg, which when executed in the wrong endian is an lhzu with
+ * a ~3GB displacement from r3. The content of r3 is random, so that is a load
+ * from some random location, and depending on the system can easily lead to a
+ * checkstop, or an infinitely recursive page fault.
+ *
+ * So to handle that case we have a trampoline here that can detect we are in
+ * the wrong endian and flip us back to the correct endian. We can't flip
+ * MSR[LE] using mtmsr, so we have to use rfid. That requires backing up SRR0/1
+ * as well as a GPR. To do that we use SPRG0/2/3, as SPRG1 is already used for
+ * the paca. SPRG3 is user readable, but this trampoline is only active very
+ * early in boot, and SPRG3 will be reinitialised in vdso_getcpu_init() before
+ * userspace starts.
*/
-#ifdef CONFIG_PPC_970_NAP
-#define FINISH_NAP \
-BEGIN_FTR_SECTION \
- ld r11, PACA_THREAD_INFO(r13); \
- ld r9,TI_LOCAL_FLAGS(r11); \
- andi. r10,r9,_TLF_NAPPING; \
- bnel power4_fixup_nap; \
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-#else
-#define FINISH_NAP
+.macro EARLY_BOOT_FIXUP
+BEGIN_FTR_SECTION
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+ tdi 0,0,0x48 // Trap never, or in reverse endian: b . + 8
+ b 2f // Skip trampoline if endian is correct
+ .long 0xa643707d // mtsprg 0, r11 Backup r11
+ .long 0xa6027a7d // mfsrr0 r11
+ .long 0xa643727d // mtsprg 2, r11 Backup SRR0 in SPRG2
+ .long 0xa6027b7d // mfsrr1 r11
+ .long 0xa643737d // mtsprg 3, r11 Backup SRR1 in SPRG3
+ .long 0xa600607d // mfmsr r11
+ .long 0x01006b69 // xori r11, r11, 1 Invert MSR[LE]
+ .long 0xa6037b7d // mtsrr1 r11
+ /*
+ * This is 'li r11,1f' where 1f is the absolute address of that
+ * label, byteswapped into the SI field of the instruction.
+ */
+ .long 0x00006039 | \
+ ((ABS_ADDR(1f, real_vectors) & 0x00ff) << 24) | \
+ ((ABS_ADDR(1f, real_vectors) & 0xff00) << 8)
+ .long 0xa6037a7d // mtsrr0 r11
+ .long 0x2400004c // rfid
+1:
+ mfsprg r11, 3
+ mtsrr1 r11 // Restore SRR1
+ mfsprg r11, 2
+ mtsrr0 r11 // Restore SRR0
+ mfsprg r11, 0 // Restore r11
+2:
#endif
-
-#define EXC_COMMON(name, realvec, hdlr) \
- EXC_COMMON_BEGIN(name); \
- INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
- bl save_nvgprs; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b ret_from_except
-
-/*
- * Like EXC_COMMON, but for exceptions that can occur in the idle task and
- * therefore need the special idle handling (finish nap and runlatch)
- */
-#define EXC_COMMON_ASYNC(name, realvec, hdlr) \
- EXC_COMMON_BEGIN(name); \
- INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
- FINISH_NAP; \
- RUNLATCH_ON; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b ret_from_except_lite
-
+ /*
+ * program check could hit at any time, and pseries can not block
+ * MSR[ME] in early boot. So check if there is anything useful in r13
+ * yet, and spin forever if not.
+ */
+ mtsprg 0, r11
+ mfcr r11
+ cmpdi r13, 0
+ beq .
+ mtcr r11
+ mfsprg r11, 0
+END_FTR_SECTION(0, 1) // nop out after boot
+.endm
/*
* There are a few constraints to be concerned with.
@@ -716,6 +804,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
* guarantee they will be delivered virtually. Some conditions (see the ISA)
* cause exceptions to be delivered in real mode.
*
+ * The scv instructions are a special case. They get a 0x3000 offset applied.
+ * scv exceptions have unique reentrancy properties, see below.
+ *
* It's impossible to receive interrupts below 0x300 via AIL.
*
* KVM: None of the virtual exceptions are from the guest. Anything that
@@ -725,8 +816,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
* We layout physical memory as follows:
* 0x0000 - 0x00ff : Secondary processor spin code
* 0x0100 - 0x18ff : Real mode pSeries interrupt vectors
- * 0x1900 - 0x3fff : Real mode trampolines
- * 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
+ * 0x1900 - 0x2fff : Real mode trampolines
+ * 0x3000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
* 0x5900 - 0x6fff : Relon mode trampolines
* 0x7000 - 0x7fff : FWNMI data area
* 0x8000 - .... : Common interrupt handlers, remaining early
@@ -737,8 +828,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
* vectors there.
*/
OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900)
-OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000)
-OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900)
+OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x3000)
+OPEN_FIXED_SECTION(virt_vectors, 0x3000, 0x5900)
OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000)
#ifdef CONFIG_PPC_POWERNV
@@ -774,10 +865,131 @@ USE_FIXED_SECTION(real_vectors)
.globl __start_interrupts
__start_interrupts:
+/**
+ * Interrupt 0x3000 - System Call Vectored Interrupt (syscall).
+ * This is a synchronous interrupt invoked with the "scv" instruction. The
+ * system call does not alter the HV bit, so it is directed to the OS.
+ *
+ * Handling:
+ * scv instructions enter the kernel without changing EE, RI, ME, or HV.
+ * In particular, this means we can take a maskable interrupt at any point
+ * in the scv handler, which is unlike any other interrupt. This is solved
+ * by treating the instruction addresses in the handler as being soft-masked,
+ * by adding a SOFT_MASK_TABLE entry for them.
+ *
+ * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and
+ * ensure scv is never executed with relocation off, which means AIL-0
+ * should never happen.
+ *
+ * Before leaving the following inside-__end_soft_masked text, at least of the
+ * following must be true:
+ * - MSR[PR]=1 (i.e., return to userspace)
+ * - MSR_EE|MSR_RI is clear (no reentrant exceptions)
+ * - Standard kernel environment is set up (stack, paca, etc)
+ *
+ * KVM:
+ * These interrupts do not elevate HV 0->1, so HV is not involved. PR KVM
+ * ensures that FSCR[SCV] is disabled whenever it has to force AIL off.
+ *
+ * Call convention:
+ *
+ * syscall register convention is in Documentation/arch/powerpc/syscall64-abi.rst
+ */
+EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
+ /* SCV 0 */
+ mr r9,r13
+ GET_PACA(r13)
+ mflr r11
+ mfctr r12
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+#ifdef CONFIG_RELOCATABLE
+ b system_call_vectored_tramp
+#else
+ b system_call_vectored_common
+#endif
+ nop
+
+ /* SCV 1 - 127 */
+ .rept 127
+ mr r9,r13
+ GET_PACA(r13)
+ mflr r11
+ mfctr r12
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+ li r0,-1 /* cause failure */
+#ifdef CONFIG_RELOCATABLE
+ b system_call_vectored_sigill_tramp
+#else
+ b system_call_vectored_sigill
+#endif
+ .endr
+EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000)
+
+// Treat scv vectors as soft-masked, see comment above.
+// Use absolute values rather than labels here, so they don't get relocated,
+// because this code runs unrelocated.
+SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000)
+
+#ifdef CONFIG_RELOCATABLE
+TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
+ __LOAD_HANDLER(r10, system_call_vectored_common, virt_trampolines)
+ mtctr r10
+ bctr
+
+TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp)
+ __LOAD_HANDLER(r10, system_call_vectored_sigill, virt_trampolines)
+ mtctr r10
+ bctr
+#endif
+
+
/* No virt vectors corresponding with 0x0..0x100 */
EXC_VIRT_NONE(0x4000, 0x100)
+/**
+ * Interrupt 0x100 - System Reset Interrupt (SRESET aka NMI).
+ * This is a non-maskable, asynchronous interrupt always taken in real-mode.
+ * It is caused by:
+ * - Wake from power-saving state, on powernv.
+ * - An NMI from another CPU, triggered by firmware or hypercall.
+ * - As crash/debug signal injected from BMC, firmware or hypervisor.
+ *
+ * Handling:
+ * Power-save wakeup is the only performance critical path, so this is
+ * determined quickly as possible first. In this case volatile registers
+ * can be discarded and SPRs like CFAR don't need to be read.
+ *
+ * If not a powersave wakeup, then it's run as a regular interrupt, however
+ * it uses its own stack and PACA save area to preserve the regular kernel
+ * environment for debugging.
+ *
+ * This interrupt is not maskable, so triggering it when MSR[RI] is clear,
+ * or SCRATCH0 is in use, etc. may cause a crash. It's also not entirely
+ * correct to switch to virtual mode to run the regular interrupt handler
+ * because it might be interrupted when the MMU is in a bad state (e.g., SLB
+ * is clear).
+ *
+ * FWNMI:
+ * PAPR specifies a "fwnmi" facility which sends the sreset to a different
+ * entry point with a different register set up. Some hypervisors will
+ * send the sreset to 0x100 in the guest if it is not fwnmi capable.
+ *
+ * KVM:
+ * Unlike most SRR interrupts, this may be taken by the host while executing
+ * in a guest, so a KVM test is required. KVM will pull the CPU out of guest
+ * mode and then raise the sreset.
+ */
+INT_DEFINE_BEGIN(system_reset)
+ IVEC=0x100
+ IAREA=PACA_EXNMI
+ IVIRT=0 /* no virt entry point */
+ ISTACK=0
+ IKVM_REAL=1
+INT_DEFINE_END(system_reset)
+
EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
#ifdef CONFIG_PPC_P7_NAP
/*
@@ -815,11 +1027,8 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
- INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1
+ GEN_INT_ENTRY system_reset, virt=0
/*
- * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
- * being used, so a nested NMI exception would corrupt it.
- *
* In theory, we should not enable relocation here if it was disabled
* in SRR1, because the MMU may not be configured to support it (e.g.,
* SLB may have been cleared). In practice, there should only be a few
@@ -828,14 +1037,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
*/
EXC_REAL_END(system_reset, 0x100, 0x100)
EXC_VIRT_NONE(0x4100, 0x100)
-INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0
#ifdef CONFIG_PPC_P7_NAP
TRAMP_REAL_BEGIN(system_reset_idle_wake)
/* We are waking up from idle, so may clobber any volatile register */
cmpwi cr1,r5,2
bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */
- BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss))
+ __LOAD_FAR_HANDLER(r12, DOTSYM(idle_return_gpr_loss), real_trampolines)
+ mtctr r12
+ bctr
#endif
#ifdef CONFIG_PPC_PSERIES
@@ -843,45 +1053,29 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
* Vectors for the FWNMI option. Share common code.
*/
TRAMP_REAL_BEGIN(system_reset_fwnmi)
- /* See comment at system_reset exception, don't turn on RI */
- INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0
+ GEN_INT_ENTRY system_reset, virt=0
#endif /* CONFIG_PPC_PSERIES */
EXC_COMMON_BEGIN(system_reset_common)
+ __GEN_COMMON_ENTRY system_reset
/*
- * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
- * to recover, but nested NMI will notice in_nmi and not recover
- * because of the use of the NMI stack. in_nmi reentrancy is tested in
- * system_reset_exception.
+ * Increment paca->in_nmi. When the interrupt entry wrapper later
+ * enable MSR_RI, then SLB or MCE will be able to recover, but a nested
+ * NMI will notice in_nmi and not recover because of the use of the NMI
+ * stack. in_nmi reentrancy is tested in system_reset_exception.
*/
lhz r10,PACA_IN_NMI(r13)
addi r10,r10,1
sth r10,PACA_IN_NMI(r13)
- li r10,MSR_RI
- mtmsrd r10,1
mr r10,r1
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
- INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0
- bl save_nvgprs
- /*
- * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
- * the right thing. We do not want to reconcile because that goes
- * through irq tracing which we don't want in NMI.
- *
- * Save PACAIRQHAPPENED because some code will do a hard disable
- * (e.g., xmon). So we want to restore this back to where it was
- * when we return. DAR is unused in the stack, so save it there.
- */
- li r10,IRQS_ALL_DISABLED
- stb r10,PACAIRQSOFTMASK(r13)
- lbz r10,PACAIRQHAPPENED(r13)
- std r10,_DAR(r1)
+ __GEN_COMMON_BODY system_reset
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl system_reset_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(system_reset_exception)
/* Clear MSR_RI before setting SRR0 and SRR1. */
li r9,0
@@ -894,36 +1088,86 @@ EXC_COMMON_BEGIN(system_reset_common)
subi r10,r10,1
sth r10,PACA_IN_NMI(r13)
- /*
- * Restore soft mask settings.
- */
- ld r10,_DAR(r1)
- stb r10,PACAIRQHAPPENED(r13)
- ld r10,SOFTE(r1)
- stb r10,PACAIRQSOFTMASK(r13)
-
- EXCEPTION_RESTORE_REGS EXC_STD
+ kuap_kernel_restore r9, r10
+ EXCEPTION_RESTORE_REGS
RFI_TO_USER_OR_KERNEL
+/**
+ * Interrupt 0x200 - Machine Check Interrupt (MCE).
+ * This is a non-maskable interrupt always taken in real-mode. It can be
+ * synchronous or asynchronous, caused by hardware or software, and it may be
+ * taken in a power-saving state.
+ *
+ * Handling:
+ * Similarly to system reset, this uses its own stack and PACA save area,
+ * the difference is re-entrancy is allowed on the machine check stack.
+ *
+ * machine_check_early is run in real mode, and carefully decodes the
+ * machine check and tries to handle it (e.g., flush the SLB if there was an
+ * error detected there), determines if it was recoverable and logs the
+ * event.
+ *
+ * This early code does not "reconcile" irq soft-mask state like SRESET or
+ * regular interrupts do, so irqs_disabled() among other things may not work
+ * properly (irq disable/enable already doesn't work because irq tracing can
+ * not work in real mode).
+ *
+ * Then, depending on the execution context when the interrupt is taken, there
+ * are 3 main actions:
+ * - Executing in kernel mode. The event is queued with irq_work, which means
+ * it is handled when it is next safe to do so (i.e., the kernel has enabled
+ * interrupts), which could be immediately when the interrupt returns. This
+ * avoids nasty issues like switching to virtual mode when the MMU is in a
+ * bad state, or when executing OPAL code. (SRESET is exposed to such issues,
+ * but it has different priorities). Check to see if the CPU was in power
+ * save, and return via the wake up code if it was.
+ *
+ * - Executing in user mode. machine_check_exception is run like a normal
+ * interrupt handler, which processes the data generated by the early handler.
+ *
+ * - Executing in guest mode. The interrupt is run with its KVM test, and
+ * branches to KVM to deal with. KVM may queue the event for the host
+ * to report later.
+ *
+ * This interrupt is not maskable, so if it triggers when MSR[RI] is clear,
+ * or SCRATCH0 is in use, it may cause a crash.
+ *
+ * KVM:
+ * See SRESET.
+ */
+INT_DEFINE_BEGIN(machine_check_early)
+ IVEC=0x200
+ IAREA=PACA_EXMC
+ IVIRT=0 /* no virt entry point */
+ IREALMODE_COMMON=1
+ ISTACK=0
+ IDAR=1
+ IDSISR=1
+ IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+INT_DEFINE_END(machine_check_early)
+
+INT_DEFINE_BEGIN(machine_check)
+ IVEC=0x200
+ IAREA=PACA_EXMC
+ IVIRT=0 /* no virt entry point */
+ IDAR=1
+ IDSISR=1
+ IKVM_REAL=1
+INT_DEFINE_END(machine_check)
+
EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
- INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
- /*
- * MSR_RI is not enabled, because PACA_EXMC is being used, so a
- * nested machine check corrupts it. machine_check_common enables
- * MSR_RI.
- */
+ EARLY_BOOT_FIXUP
+ GEN_INT_ENTRY machine_check_early, virt=0
EXC_REAL_END(machine_check, 0x200, 0x100)
EXC_VIRT_NONE(0x4200, 0x100)
#ifdef CONFIG_PPC_PSERIES
TRAMP_REAL_BEGIN(machine_check_fwnmi)
/* See comment at machine_check exception, don't turn on RI */
- INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+ GEN_INT_ENTRY machine_check_early, virt=0
#endif
-INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
-
#define MACHINE_CHECK_HANDLER_WINDUP \
/* Clear MSR_RI before setting SRR0 and SRR1. */\
li r9,0; \
@@ -932,12 +1176,10 @@ INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
lhz r12,PACA_IN_MCE(r13); \
subi r12,r12,1; \
sth r12,PACA_IN_MCE(r13); \
- EXCEPTION_RESTORE_REGS EXC_STD
+ EXCEPTION_RESTORE_REGS
EXC_COMMON_BEGIN(machine_check_early_common)
- mtctr r10 /* Restore ctr */
- mfspr r11,SPRN_SRR0
- mfspr r12,SPRN_SRR1
+ __GEN_REALMODE_COMMON_ENTRY machine_check_early
/*
* Switch to mc_emergency stack and handle re-entrancy (we limit
@@ -974,18 +1216,16 @@ EXC_COMMON_BEGIN(machine_check_early_common)
bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- /* We don't touch AMR here, we never go to virtual mode */
- INT_COMMON 0x200, PACA_EXMC, 0, 0, 0, 1, 1
+ __GEN_COMMON_BODY machine_check_early
BEGIN_FTR_SECTION
bl enable_machine_check
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- li r10,MSR_RI
- mtmsrd r10,1
-
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_early
+ addi r3,r1,STACK_INT_FRAME_REGS
+BEGIN_FTR_SECTION
+ bl CFUNC(machine_check_early_boot)
+END_FTR_SECTION(0, 1) // nop out after boot
+ bl CFUNC(machine_check_early)
std r3,RESULT(r1) /* Save result */
ld r12,_MSR(r1)
@@ -1009,7 +1249,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
/*
* Check if we are coming from guest. If yes, then run the normal
* exception handler which will take the
- * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
+ * machine_check_kvm->kvm_interrupt branch to deliver the MC event
* to guest.
*/
lbz r11,HSTATE_IN_GUEST(r13)
@@ -1046,7 +1286,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
* Queue up the MCE event so that we can log it later, while
* returning from kernel or opal call.
*/
- bl machine_check_queue_event
+ bl CFUNC(machine_check_queue_event)
MACHINE_CHECK_HANDLER_WINDUP
RFI_TO_KERNEL
@@ -1063,23 +1303,18 @@ BEGIN_FTR_SECTION
mtspr SPRN_CFAR,r10
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
MACHINE_CHECK_HANDLER_WINDUP
- /* See comment at machine_check exception, don't turn on RI */
- INT_HANDLER machine_check, 0x200, area=PACA_EXMC, ri=0, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY machine_check, virt=0
EXC_COMMON_BEGIN(machine_check_common)
/*
* Machine check is different because we use a different
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
- INT_COMMON 0x200, PACA_EXMC, 1, 1, 1, 1, 1
- FINISH_NAP
- /* Enable MSR_RI when finished with PACA_EXMC */
- li r10,MSR_RI
- mtmsrd r10,1
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_exception
- b ret_from_except
+ GEN_COMMON machine_check
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(machine_check_exception_async)
+ b interrupt_return_srr
+
#ifdef CONFIG_PPC_P7_NAP
/*
@@ -1087,20 +1322,22 @@ EXC_COMMON_BEGIN(machine_check_common)
* done. Queue the event then call the idle code to do the wake up.
*/
EXC_COMMON_BEGIN(machine_check_idle_common)
- bl machine_check_queue_event
+ bl CFUNC(machine_check_queue_event)
/*
- * We have not used any non-volatile GPRs here, and as a rule
- * most exception code including machine check does not.
- * Therefore PACA_NAPSTATELOST does not need to be set. Idle
- * wakeup will restore volatile registers.
+ * GPR-loss wakeups are relatively straightforward, because the
+ * idle sleep code has saved all non-volatile registers on its
+ * own stack, and r1 in PACAR1.
*
- * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce.
+ * For no-loss wakeups the r1 and lr registers used by the
+ * early machine check handler have to be restored first. r2 is
+ * the kernel TOC, so no need to restore it.
*
* Then decrement MCE nesting after finishing with the stack.
*/
ld r3,_MSR(r1)
ld r4,_LINK(r1)
+ ld r1,GPR1(r1)
lhz r11,PACA_IN_MCE(r13)
subi r11,r11,1
@@ -1109,7 +1346,7 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
mtlr r4
rlwinm r10,r3,47-31,30,31
cmpwi cr1,r10,2
- bltlr cr1 /* no state loss, return to idle caller */
+ bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */
b idle_return_gpr_loss
#endif
@@ -1124,168 +1361,355 @@ EXC_COMMON_BEGIN(unrecoverable_mce)
BEGIN_FTR_SECTION
li r10,0 /* clear MSR_RI */
mtmsrd r10,1
- bl disable_machine_check
+ bl CFUNC(disable_machine_check)
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
ld r10,PACAKMSR(r13)
li r3,MSR_ME
andc r10,r10,r3
mtmsrd r10
- /* Invoke machine_check_exception to print MCE event and panic. */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_exception
+ lhz r12,PACA_IN_MCE(r13)
+ subi r12,r12,1
+ sth r12,PACA_IN_MCE(r13)
+
+ /*
+ * Invoke machine_check_exception to print MCE event and panic.
+ * This is the NMI version of the handler because we are called from
+ * the early handler which is a true NMI.
+ */
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(machine_check_exception)
/*
* We will not reach here. Even if we did, there is no way out.
* Call unrecoverable_exception and die.
*/
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(unrecoverable_exception)
b .
+/**
+ * Interrupt 0x300 - Data Storage Interrupt (DSI).
+ * This is a synchronous interrupt generated due to a data access exception,
+ * e.g., a load orstore which does not have a valid page table entry with
+ * permissions. DAWR matches also fault here, as do RC updates, and minor misc
+ * errors e.g., copy/paste, AMO, certain invalid CI accesses, etc.
+ *
+ * Handling:
+ * - Hash MMU
+ * Go to do_hash_fault, which attempts to fill the HPT from an entry in the
+ * Linux page table. Hash faults can hit in kernel mode in a fairly
+ * arbitrary state (e.g., interrupts disabled, locks held) when accessing
+ * "non-bolted" regions, e.g., vmalloc space. However these should always be
+ * backed by Linux page table entries.
+ *
+ * If no entry is found the Linux page fault handler is invoked (by
+ * do_hash_fault). Linux page faults can happen in kernel mode due to user
+ * copy operations of course.
+ *
+ * KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest
+ * MMU context, which may cause a DSI in the host, which must go to the
+ * KVM handler. MSR[IR] is not enabled, so the real-mode handler will
+ * always be used regardless of AIL setting.
+ *
+ * - Radix MMU
+ * The hardware loads from the Linux page table directly, so a fault goes
+ * immediately to Linux page fault.
+ *
+ * Conditions like DAWR match are handled on the way in to Linux page fault.
+ */
+INT_DEFINE_BEGIN(data_access)
+ IVEC=0x300
+ IDAR=1
+ IDSISR=1
+ IKVM_REAL=1
+INT_DEFINE_END(data_access)
+
EXC_REAL_BEGIN(data_access, 0x300, 0x80)
- INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY data_access, virt=0
EXC_REAL_END(data_access, 0x300, 0x80)
EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
- INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1
+ GEN_INT_ENTRY data_access, virt=1
EXC_VIRT_END(data_access, 0x4300, 0x80)
-INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON_BEGIN(data_access_common)
- /*
- * Here r13 points to the paca, r9 contains the saved CR,
- * SRR0 and SRR1 are saved in r11 and r12,
- * r9 - r13 are saved in paca->exgen.
- * EX_DAR and EX_DSISR have saved DAR/DSISR
- */
- INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
+ GEN_COMMON data_access
+ ld r4,_DSISR(r1)
+ addi r3,r1,STACK_INT_FRAME_REGS
+ andis. r0,r4,DSISR_DABRMATCH@h
+ bne- 1f
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
- ld r6,_MSR(r1)
- li r3,0x300
- b do_hash_page /* Try to handle as hpte fault */
+ bl CFUNC(do_hash_fault)
MMU_FTR_SECTION_ELSE
- b handle_page_fault
+ bl CFUNC(do_page_fault)
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ bl CFUNC(do_page_fault)
+#endif
+ b interrupt_return_srr
+1: bl CFUNC(do_break)
+ /*
+ * do_break() may have changed the NV GPRS while handling a breakpoint.
+ * If so, we need to restore them with their updated values.
+ */
+ HANDLER_RESTORE_NVGPRS()
+ b interrupt_return_srr
+
+
+/**
+ * Interrupt 0x380 - Data Segment Interrupt (DSLB).
+ * This is a synchronous interrupt in response to an MMU fault missing SLB
+ * entry for HPT, or an address outside RPT translation range.
+ *
+ * Handling:
+ * - HPT:
+ * This refills the SLB, or reports an access fault similarly to a bad page
+ * fault. When coming from user-mode, the SLB handler may access any kernel
+ * data, though it may itself take a DSLB. When coming from kernel mode,
+ * recursive faults must be avoided so access is restricted to the kernel
+ * image text/data, kernel stack, and any data allocated below
+ * ppc64_bolted_size (first segment). The kernel handler must avoid stomping
+ * on user-handler data structures.
+ *
+ * KVM: Same as 0x300, DSLB must test for KVM guest.
+ */
+INT_DEFINE_BEGIN(data_access_slb)
+ IVEC=0x380
+ IDAR=1
+ IKVM_REAL=1
+INT_DEFINE_END(data_access_slb)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
- INT_HANDLER data_access_slb, 0x380, ool=1, area=PACA_EXSLB, dar=1, kvm=1
+ GEN_INT_ENTRY data_access_slb, virt=0
EXC_REAL_END(data_access_slb, 0x380, 0x80)
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
- INT_HANDLER data_access_slb, 0x380, virt=1, area=PACA_EXSLB, dar=1
+ GEN_INT_ENTRY data_access_slb, virt=1
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
-INT_KVM_HANDLER data_access_slb, 0x380, EXC_STD, PACA_EXSLB, 1
EXC_COMMON_BEGIN(data_access_slb_common)
- INT_COMMON 0x380, PACA_EXSLB, 1, 1, 0, 1, 0
- ld r4,_DAR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ GEN_COMMON data_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
- bl do_slb_fault
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(do_slb_fault)
cmpdi r3,0
bne- 1f
- b fast_exception_return
+ b fast_interrupt_return_srr
1: /* Error case */
MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ li r3,-EFAULT
+#endif
std r3,RESULT(r1)
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- ld r4,_DAR(r1)
- ld r5,RESULT(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_bad_slb_fault
- b ret_from_except
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(do_bad_segment_interrupt)
+ b interrupt_return_srr
+/**
+ * Interrupt 0x400 - Instruction Storage Interrupt (ISI).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSI, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR), and status in SRR1 (rather than DSISR).
+ */
+INT_DEFINE_BEGIN(instruction_access)
+ IVEC=0x400
+ IISIDE=1
+ IDAR=1
+ IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_access)
+
EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
- INT_HANDLER instruction_access, 0x400, kvm=1
+ GEN_INT_ENTRY instruction_access, virt=0
EXC_REAL_END(instruction_access, 0x400, 0x80)
EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
- INT_HANDLER instruction_access, 0x400, virt=1
+ GEN_INT_ENTRY instruction_access, virt=1
EXC_VIRT_END(instruction_access, 0x4400, 0x80)
-INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(instruction_access_common)
- INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
+ GEN_COMMON instruction_access
+ addi r3,r1,STACK_INT_FRAME_REGS
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
- ld r6,_MSR(r1)
- li r3,0x400
- b do_hash_page /* Try to handle as hpte fault */
+ bl CFUNC(do_hash_fault)
MMU_FTR_SECTION_ELSE
- b handle_page_fault
+ bl CFUNC(do_page_fault)
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ bl CFUNC(do_page_fault)
+#endif
+ b interrupt_return_srr
+
+/**
+ * Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSLB, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR).
+ */
+INT_DEFINE_BEGIN(instruction_access_slb)
+ IVEC=0x480
+ IISIDE=1
+ IDAR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_access_slb)
EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
- INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1
+ GEN_INT_ENTRY instruction_access_slb, virt=0
EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
- INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB
+ GEN_INT_ENTRY instruction_access_slb, virt=1
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
-INT_KVM_HANDLER instruction_access_slb, 0x480, EXC_STD, PACA_EXSLB, 0
EXC_COMMON_BEGIN(instruction_access_slb_common)
- INT_COMMON 0x480, PACA_EXSLB, 1, 1, 0, 2, 0
- ld r4,_DAR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ GEN_COMMON instruction_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
- bl do_slb_fault
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(do_slb_fault)
cmpdi r3,0
bne- 1f
- b fast_exception_return
+ b fast_interrupt_return_srr
1: /* Error case */
MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ li r3,-EFAULT
+#endif
std r3,RESULT(r1)
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- ld r4,_DAR(r1)
- ld r5,RESULT(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_bad_slb_fault
- b ret_from_except
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(do_bad_segment_interrupt)
+ b interrupt_return_srr
+
+
+/**
+ * Interrupt 0x500 - External Interrupt.
+ * This is an asynchronous maskable interrupt in response to an "external
+ * exception" from the interrupt controller or hypervisor (e.g., device
+ * interrupt). It is maskable in hardware by clearing MSR[EE], and
+ * soft-maskable with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * When running in HV mode, Linux sets up the LPCR[LPES] bit such that
+ * interrupts are delivered with HSRR registers, guests use SRRs, which
+ * reqiures IHSRR_IF_HVMODE.
+ *
+ * On bare metal POWER9 and later, Linux sets the LPCR[HVICE] bit such that
+ * external interrupts are delivered as Hypervisor Virtualization Interrupts
+ * rather than External Interrupts.
+ *
+ * Handling:
+ * This calls into Linux IRQ handler. NVGPRs are not saved to reduce overhead,
+ * because registers at the time of the interrupt are not so important as it is
+ * asynchronous.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not required because this is an asynchronous interrupt that in
+ * general won't have much bearing on the state of the CPU, with the possible
+ * exception of crash/debug IPIs, but those are generally moving to use SRESET
+ * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case
+ * it may be exiting the guest and need CFAR to be saved.
+ */
+INT_DEFINE_BEGIN(hardware_interrupt)
+ IVEC=0x500
+ IHSRR_IF_HVMODE=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+ ICFAR=0
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR_IF_HVMODE=1
+#endif
+INT_DEFINE_END(hardware_interrupt)
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
- INT_HANDLER hardware_interrupt, 0x500, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY hardware_interrupt, virt=0
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
- INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY hardware_interrupt, virt=1
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
-INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
+EXC_COMMON_BEGIN(hardware_interrupt_common)
+ GEN_COMMON hardware_interrupt
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(do_IRQ)
+ BEGIN_FTR_SECTION
+ b interrupt_return_hsrr
+ FTR_SECTION_ELSE
+ b interrupt_return_srr
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+/**
+ * Interrupt 0x600 - Alignment Interrupt
+ * This is a synchronous interrupt in response to data alignment fault.
+ */
+INT_DEFINE_BEGIN(alignment)
+ IVEC=0x600
+ IDAR=1
+ IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(alignment)
+
EXC_REAL_BEGIN(alignment, 0x600, 0x100)
- INT_HANDLER alignment, 0x600, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY alignment, virt=0
EXC_REAL_END(alignment, 0x600, 0x100)
EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
- INT_HANDLER alignment, 0x600, virt=1, dar=1, dsisr=1
+ GEN_INT_ENTRY alignment, virt=1
EXC_VIRT_END(alignment, 0x4600, 0x100)
-INT_KVM_HANDLER alignment, 0x600, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(alignment_common)
- INT_COMMON 0x600, PACA_EXGEN, 1, 1, 1, 1, 1
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl alignment_exception
- b ret_from_except
+ GEN_COMMON alignment
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(alignment_exception)
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+ b interrupt_return_srr
+/**
+ * Interrupt 0x700 - Program Interrupt (program check).
+ * This is a synchronous interrupt in response to various instruction faults:
+ * traps, privilege errors, TM errors, floating point exceptions.
+ *
+ * Handling:
+ * This interrupt may use the "emergency stack" in some cases when being taken
+ * from kernel context, which complicates handling.
+ */
+INT_DEFINE_BEGIN(program_check)
+ IVEC=0x700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(program_check)
+
EXC_REAL_BEGIN(program_check, 0x700, 0x100)
- INT_HANDLER program_check, 0x700, kvm=1
+ EARLY_BOOT_FIXUP
+ GEN_INT_ENTRY program_check, virt=0
EXC_REAL_END(program_check, 0x700, 0x100)
EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
- INT_HANDLER program_check, 0x700, virt=1
+ GEN_INT_ENTRY program_check, virt=1
EXC_VIRT_END(program_check, 0x4700, 0x100)
-INT_KVM_HANDLER program_check, 0x700, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(program_check_common)
+ __GEN_COMMON_ENTRY program_check
+
/*
* It's possible to receive a TM Bad Thing type program check with
* userspace register values (in particular r1), but with SRR1 reporting
@@ -1296,45 +1720,64 @@ EXC_COMMON_BEGIN(program_check_common)
*/
andi. r10,r12,MSR_PR
- bne 2f /* If userspace, go normal path */
+ bne .Lnormal_stack /* If userspace, go normal path */
andis. r10,r12,(SRR1_PROGTM)@h
- bne 1f /* If TM, emergency */
+ bne .Lemergency_stack /* If TM, emergency */
cmpdi r1,-INT_FRAME_SIZE /* check if r1 is in userspace */
- blt 2f /* normal path if not */
+ blt .Lnormal_stack /* normal path if not */
/* Use the emergency stack */
-1: andi. r10,r12,MSR_PR /* Set CR0 correctly for label */
+.Lemergency_stack:
+ andi. r10,r12,MSR_PR /* Set CR0 correctly for label */
/* 3 in EXCEPTION_PROLOG_COMMON */
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- INT_COMMON 0x700, PACA_EXGEN, 0, 1, 1, 0, 0
- b 3f
-2:
- INT_COMMON 0x700, PACA_EXGEN, 1, 1, 1, 0, 0
-3:
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl program_check_exception
- b ret_from_except
+ __ISTACK(program_check)=0
+ __GEN_COMMON_BODY program_check
+ b .Ldo_program_check
+
+.Lnormal_stack:
+ __ISTACK(program_check)=1
+ __GEN_COMMON_BODY program_check
+
+.Ldo_program_check:
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(program_check_exception)
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+ b interrupt_return_srr
+/*
+ * Interrupt 0x800 - Floating-Point Unavailable Interrupt.
+ * This is a synchronous interrupt in response to executing an fp instruction
+ * with MSR[FP]=0.
+ *
+ * Handling:
+ * This will load FP registers and enable the FP bit if coming from userspace,
+ * otherwise report a bad kernel use of FP.
+ */
+INT_DEFINE_BEGIN(fp_unavailable)
+ IVEC=0x800
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+ IMSR_R12=1
+INT_DEFINE_END(fp_unavailable)
+
EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
- INT_HANDLER fp_unavailable, 0x800, kvm=1
+ GEN_INT_ENTRY fp_unavailable, virt=0
EXC_REAL_END(fp_unavailable, 0x800, 0x100)
EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
- INT_HANDLER fp_unavailable, 0x800, virt=1
+ GEN_INT_ENTRY fp_unavailable, virt=1
EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
-INT_KVM_HANDLER fp_unavailable, 0x800, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(fp_unavailable_common)
- INT_COMMON 0x800, PACA_EXGEN, 1, 1, 0, 0, 0
+ GEN_COMMON fp_unavailable
bne 1f /* if from user, just load it up */
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl kernel_fp_unavailable_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(kernel_fp_unavailable_exception)
0: trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
1:
@@ -1347,89 +1790,183 @@ BEGIN_FTR_SECTION
bne- 2f
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
- bl load_up_fpu
- b fast_exception_return
+ bl CFUNC(load_up_fpu)
+ b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl fp_unavailable_tm
- b ret_from_except
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(fp_unavailable_tm)
+ b interrupt_return_srr
#endif
+/**
+ * Interrupt 0x900 - Decrementer Interrupt.
+ * This is an asynchronous interrupt in response to a decrementer exception
+ * (e.g., DEC has wrapped below zero). It is maskable in hardware by clearing
+ * MSR[EE], and soft-maskable with IRQS_DISABLED mask (i.e.,
+ * local_irq_disable()).
+ *
+ * Handling:
+ * This calls into Linux timer handler. NVGPRs are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and bump the decrementer to a high value, leaving MSR[EE] enabled
+ * in the interrupted context.
+ * If PPC_WATCHDOG is configured, the soft masked handler will actually set
+ * things back up to run soft_nmi_interrupt as a regular interrupt handler
+ * on the emergency stack.
+ *
+ * CFAR is not required because this is asynchronous (see hardware_interrupt).
+ * A watchdog interrupt may like to have CFAR, but usually the interesting
+ * branch is long gone by that point (e.g., infinite loop).
+ */
+INT_DEFINE_BEGIN(decrementer)
+ IVEC=0x900
+ IMASK=IRQS_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+ ICFAR=0
+INT_DEFINE_END(decrementer)
+
EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
- INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY decrementer, virt=0
EXC_REAL_END(decrementer, 0x900, 0x80)
EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
- INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED
+ GEN_INT_ENTRY decrementer, virt=1
EXC_VIRT_END(decrementer, 0x4900, 0x80)
-INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
+EXC_COMMON_BEGIN(decrementer_common)
+ GEN_COMMON decrementer
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(timer_interrupt)
+ b interrupt_return_srr
+/**
+ * Interrupt 0x980 - Hypervisor Decrementer Interrupt.
+ * This is an asynchronous interrupt, similar to 0x900 but for the HDEC
+ * register.
+ *
+ * Handling:
+ * Linux does not use this outside KVM where it's used to keep a host timer
+ * while the guest is given control of DEC. It should normally be caught by
+ * the KVM test and routed there.
+ */
+INT_DEFINE_BEGIN(hdecrementer)
+ IVEC=0x980
+ IHSRR=1
+ ISTACK=0
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(hdecrementer)
+
EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
- INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY hdecrementer, virt=0
EXC_REAL_END(hdecrementer, 0x980, 0x80)
EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
- INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY hdecrementer, virt=1
EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
-INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
+EXC_COMMON_BEGIN(hdecrementer_common)
+ __GEN_COMMON_ENTRY hdecrementer
+ /*
+ * Hypervisor decrementer interrupts not caught by the KVM test
+ * shouldn't occur but are sometimes left pending on exit from a KVM
+ * guest. We don't need to do anything to clear them, as they are
+ * edge-triggered.
+ *
+ * Be careful to avoid touching the kernel stack.
+ */
+ li r10,0
+ stb r10,PACAHSRR_VALID(r13)
+ ld r10,PACA_EXGEN+EX_CTR(r13)
+ mtctr r10
+ mtcrf 0x80,r9
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ HRFI_TO_KERNEL
+/**
+ * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsndp doorbell.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * Guests may use this for IPIs between threads in a core if the
+ * hypervisor supports it. NVGPRS are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, leaving MSR[EE] enabled in the interrupted context because the
+ * doorbells are edge triggered.
+ *
+ * CFAR is not required, similarly to hardware_interrupt.
+ */
+INT_DEFINE_BEGIN(doorbell_super)
+ IVEC=0xa00
+ IMASK=IRQS_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+ ICFAR=0
+INT_DEFINE_END(doorbell_super)
+
EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
- INT_HANDLER doorbell_super, 0xa00, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY doorbell_super, virt=0
EXC_REAL_END(doorbell_super, 0xa00, 0x100)
EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
- INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED
+ GEN_INT_ENTRY doorbell_super, virt=1
EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
-INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(doorbell_super_common)
+ GEN_COMMON doorbell_super
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_PPC_DOORBELL
-EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
+ bl CFUNC(doorbell_exception)
#else
-EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception)
+ bl CFUNC(unknown_async_exception)
#endif
+ b interrupt_return_srr
EXC_REAL_NONE(0xb00, 0x100)
EXC_VIRT_NONE(0x4b00, 0x100)
-/*
- * system call / hypercall (0xc00, 0x4c00)
- *
- * The system call exception is invoked with "sc 0" and does not alter HV bit.
- *
- * The hypercall is invoked with "sc 1" and sets HV=1.
+/**
+ * Interrupt 0xc00 - System Call Interrupt (syscall, hcall).
+ * This is a synchronous interrupt invoked with the "sc" instruction. The
+ * system call is invoked with "sc 0" and does not alter the HV bit, so it
+ * is directed to the currently running OS. The hypercall is invoked with
+ * "sc 1" and it sets HV=1, so it elevates to hypervisor.
*
* In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to
* 0x4c00 virtual mode.
*
+ * Handling:
+ * If the KVM test fires then it was due to a hypercall and is accordingly
+ * routed to KVM. Otherwise this executes a normal Linux system call.
+ *
* Call convention:
*
- * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
- *
- * For hypercalls, the register convention is as follows:
- * r0 volatile
- * r1-2 nonvolatile
- * r3 volatile parameter and return value for status
- * r4-r10 volatile input and output value
- * r11 volatile hypercall number and output value
- * r12 volatile input and output value
- * r13-r31 nonvolatile
- * LR nonvolatile
- * CTR volatile
- * XER volatile
- * CR0-1 CR5-7 volatile
- * CR2-4 nonvolatile
- * Other registers nonvolatile
+ * syscall and hypercalls register conventions are documented in
+ * Documentation/arch/powerpc/syscall64-abi.rst and
+ * Documentation/arch/powerpc/papr_hcalls.rst respectively.
*
* The intersection of volatile registers that don't contain possible
* inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry
* without saving, though xer is not a good idea to use, as hardware may
* interpret some bits so it may be costly to change them.
*/
+INT_DEFINE_BEGIN(system_call)
+ IVEC=0xc00
+ IKVM_REAL=1
+ IKVM_VIRT=1
+ ICFAR=0
+INT_DEFINE_END(system_call)
+
.macro SYSTEM_CALL virt
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
@@ -1444,7 +1981,7 @@ EXC_VIRT_NONE(0x4b00, 0x100)
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
- KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to system_call_kvm */
+ KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */
mfctr r9
#else
mr r9,r13
@@ -1466,17 +2003,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
HMT_MEDIUM
.if ! \virt
- __LOAD_HANDLER(r10, system_call_common)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
+ __LOAD_HANDLER(r10, system_call_common_real, real_vectors)
+ mtctr r10
+ bctr
.else
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
#ifdef CONFIG_RELOCATABLE
- __LOAD_HANDLER(r10, system_call_common)
+ __LOAD_HANDLER(r10, system_call_common, virt_vectors)
mtctr r10
bctr
#else
@@ -1503,108 +2035,213 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
EXC_VIRT_END(system_call, 0x4c00, 0x100)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
- /*
- * This is a hcall, so register convention is as above, with these
- * differences:
- * r13 = PACA
- * ctr = orig r13
- * orig r10 saved in PACA
- */
-TRAMP_KVM_BEGIN(system_call_kvm)
+TRAMP_REAL_BEGIN(kvm_hcall)
+ std r9,PACA_EXGEN+EX_R9(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfcr r9
+ mfctr r10
+ std r10,PACA_EXGEN+EX_R13(r13)
+ li r10,0
+ std r10,PACA_EXGEN+EX_CFAR(r13)
+ std r10,PACA_EXGEN+EX_CTR(r13)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
* guest state (it is the guest's PPR value).
*/
- OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_PPR
+ std r10,PACA_EXGEN+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
HMT_MEDIUM
- OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR)
- mfctr r10
- SET_SCRATCH0(r10)
- std r9,PACA_EXGEN+EX_R9(r13)
- mfcr r9
- KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0
+
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
+ * outside the head section.
+ */
+ __LOAD_FAR_HANDLER(r10, kvmppc_hcall, real_trampolines)
+ mtctr r10
+ bctr
+#else
+ b kvmppc_hcall
+#endif
#endif
+/**
+ * Interrupt 0xd00 - Trace Interrupt.
+ * This is a synchronous interrupt in response to instruction step or
+ * breakpoint faults.
+ */
+INT_DEFINE_BEGIN(single_step)
+ IVEC=0xd00
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(single_step)
EXC_REAL_BEGIN(single_step, 0xd00, 0x100)
- INT_HANDLER single_step, 0xd00, kvm=1
+ GEN_INT_ENTRY single_step, virt=0
EXC_REAL_END(single_step, 0xd00, 0x100)
EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
- INT_HANDLER single_step, 0xd00, virt=1
+ GEN_INT_ENTRY single_step, virt=1
EXC_VIRT_END(single_step, 0x4d00, 0x100)
-INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON(single_step_common, 0xd00, single_step_exception)
+EXC_COMMON_BEGIN(single_step_common)
+ GEN_COMMON single_step
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(single_step_exception)
+ b interrupt_return_srr
+/**
+ * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest data access.
+ *
+ * Handling:
+ * This should always get routed to KVM. In radix MMU mode, this is caused
+ * by a guest nested radix access that can't be performed due to the
+ * partition scope page table. In hash mode, this can be caused by guests
+ * running with translation disabled (virtual real mode) or with VPM enabled.
+ * KVM will update the page table structures or disallow the access.
+ */
+INT_DEFINE_BEGIN(h_data_storage)
+ IVEC=0xe00
+ IHSRR=1
+ IDAR=1
+ IDSISR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_data_storage)
+
EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20)
- INT_HANDLER h_data_storage, 0xe00, ool=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY h_data_storage, virt=0, ool=1
EXC_REAL_END(h_data_storage, 0xe00, 0x20)
EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
- INT_HANDLER h_data_storage, 0xe00, ool=1, virt=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY h_data_storage, virt=1, ool=1
EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
-INT_KVM_HANDLER h_data_storage, 0xe00, EXC_HV, PACA_EXGEN, 1
EXC_COMMON_BEGIN(h_data_storage_common)
- INT_COMMON 0xe00, PACA_EXGEN, 1, 1, 1, 1, 1
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+ GEN_COMMON h_data_storage
+ addi r3,r1,STACK_INT_FRAME_REGS
BEGIN_MMU_FTR_SECTION
- ld r4,_DAR(r1)
- li r5,SIGSEGV
- bl bad_page_fault
+ bl CFUNC(do_bad_page_fault_segv)
MMU_FTR_SECTION_ELSE
- bl unknown_exception
+ bl CFUNC(unknown_exception)
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
- b ret_from_except
+ b interrupt_return_hsrr
+/**
+ * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest instruction fetch, similar to HDSI.
+ */
+INT_DEFINE_BEGIN(h_instr_storage)
+ IVEC=0xe20
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_instr_storage)
+
EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20)
- INT_HANDLER h_instr_storage, 0xe20, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_instr_storage, virt=0, ool=1
EXC_REAL_END(h_instr_storage, 0xe20, 0x20)
EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
- INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_instr_storage, virt=1, ool=1
EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
-INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
+EXC_COMMON_BEGIN(h_instr_storage_common)
+ GEN_COMMON h_instr_storage
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(unknown_exception)
+ b interrupt_return_hsrr
+/**
+ * Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
+ */
+INT_DEFINE_BEGIN(emulation_assist)
+ IVEC=0xe40
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(emulation_assist)
+
EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20)
- INT_HANDLER emulation_assist, 0xe40, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY emulation_assist, virt=0, ool=1
EXC_REAL_END(emulation_assist, 0xe40, 0x20)
EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
- INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY emulation_assist, virt=1, ool=1
EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
-INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
-
-
-/*
- * hmi_exception trampoline is a special case. It jumps to hmi_exception_early
- * first, and then eventaully from there to the trampoline to get into virtual
- * mode.
+EXC_COMMON_BEGIN(emulation_assist_common)
+ GEN_COMMON emulation_assist
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(emulation_assist_interrupt)
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+ b interrupt_return_hsrr
+
+
+/**
+ * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
+ * This is an asynchronous interrupt caused by a Hypervisor Maintenance
+ * Exception. It is always taken in real mode but uses HSRR registers
+ * unlike SRESET and MCE.
+ *
+ * It is maskable in hardware by clearing MSR[EE], and partially soft-maskable
+ * with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * This is a special case, this is handled similarly to machine checks, with an
+ * initial real mode handler that is not soft-masked, which attempts to fix the
+ * problem. Then a regular handler which is soft-maskable and reports the
+ * problem.
+ *
+ * The emergency stack is used for the early real mode handler.
+ *
+ * XXX: unclear why MCE and HMI schemes could not be made common, e.g.,
+ * either use soft-masking for the MCE, or use irq_work for the HMI.
+ *
+ * KVM:
+ * Unlike MCE, this calls into KVM without calling the real mode handler
+ * first.
*/
+INT_DEFINE_BEGIN(hmi_exception_early)
+ IVEC=0xe60
+ IHSRR=1
+ IREALMODE_COMMON=1
+ ISTACK=0
+ IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+ IKVM_REAL=1
+INT_DEFINE_END(hmi_exception_early)
+
+INT_DEFINE_BEGIN(hmi_exception)
+ IVEC=0xe60
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+INT_DEFINE_END(hmi_exception)
+
EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20)
- INT_HANDLER hmi_exception, 0xe60, ool=1, early=1, hsrr=EXC_HV, ri=0, kvm=1
+ GEN_INT_ENTRY hmi_exception_early, virt=0, ool=1
EXC_REAL_END(hmi_exception, 0xe60, 0x20)
EXC_VIRT_NONE(0x4e60, 0x20)
-INT_KVM_HANDLER hmi_exception, 0xe60, EXC_HV, PACA_EXGEN, 0
+
EXC_COMMON_BEGIN(hmi_exception_early_common)
- mtctr r10 /* Restore ctr */
- mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
+ __GEN_REALMODE_COMMON_ENTRY hmi_exception_early
+
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- /* We don't touch AMR here, we never go to virtual mode */
- INT_COMMON 0xe60, PACA_EXGEN, 0, 0, 0, 0, 0
+ __GEN_COMMON_BODY hmi_exception_early
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl hmi_exception_realmode
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(hmi_exception_realmode)
cmpdi cr0,r3,0
bne 1f
- EXCEPTION_RESTORE_REGS EXC_HV
+ EXCEPTION_RESTORE_REGS hsrr=1
HRFI_TO_USER_OR_KERNEL
1:
@@ -1612,41 +2249,84 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
* Go to virtual mode and pull the HMI event information from
* firmware.
*/
- EXCEPTION_RESTORE_REGS EXC_HV
- INT_HANDLER hmi_exception, 0xe60, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ EXCEPTION_RESTORE_REGS hsrr=1
+ GEN_INT_ENTRY hmi_exception, virt=0
EXC_COMMON_BEGIN(hmi_exception_common)
- INT_COMMON 0xe60, PACA_EXGEN, 1, 1, 1, 0, 0
- FINISH_NAP
- RUNLATCH_ON
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl handle_hmi_exception
- b ret_from_except
+ GEN_COMMON hmi_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(handle_hmi_exception)
+ b interrupt_return_hsrr
+
+/**
+ * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsnd doorbell.
+ * Similar to the 0xa00 doorbell but for host rather than guest.
+ *
+ * CFAR is not required (similar to doorbell_interrupt), unless KVM HV
+ * is enabled, in which case it may be a guest exit. Most PowerNV kernels
+ * include KVM support so it would be nice if this could be dynamically
+ * patched out if KVM was not currently running any guests.
+ */
+INT_DEFINE_BEGIN(h_doorbell)
+ IVEC=0xe80
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR=0
+#endif
+INT_DEFINE_END(h_doorbell)
EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
- INT_HANDLER h_doorbell, 0xe80, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_doorbell, virt=0, ool=1
EXC_REAL_END(h_doorbell, 0xe80, 0x20)
EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
- INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_doorbell, virt=1, ool=1
EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
-INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(h_doorbell_common)
+ GEN_COMMON h_doorbell
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_PPC_DOORBELL
-EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
+ bl CFUNC(doorbell_exception)
#else
-EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
+ bl CFUNC(unknown_async_exception)
#endif
+ b interrupt_return_hsrr
+
+/**
+ * Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
+ * This is an asynchronous interrupt in response to an "external exception".
+ * Similar to 0x500 but for host only.
+ *
+ * Like h_doorbell, CFAR is only required for KVM HV because this can be
+ * a guest exit.
+ */
+INT_DEFINE_BEGIN(h_virt_irq)
+ IVEC=0xea0
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR=0
+#endif
+INT_DEFINE_END(h_virt_irq)
EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
- INT_HANDLER h_virt_irq, 0xea0, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_virt_irq, virt=0, ool=1
EXC_REAL_END(h_virt_irq, 0xea0, 0x20)
EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
- INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_virt_irq, virt=1, ool=1
EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
-INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
+EXC_COMMON_BEGIN(h_virt_irq_common)
+ GEN_COMMON h_virt_irq
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(do_IRQ)
+ b interrupt_return_hsrr
EXC_REAL_NONE(0xec0, 0x20)
@@ -1655,25 +2335,80 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
+/*
+ * Interrupt 0xf00 - Performance Monitor Interrupt (PMI, PMU).
+ * This is an asynchronous interrupt in response to a PMU exception.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_PMI_DISABLED mask (NOTE: NOT local_irq_disable()).
+ *
+ * Handling:
+ * This calls into the perf subsystem.
+ *
+ * Like the watchdog soft-nmi, it appears an NMI interrupt to Linux, in that it
+ * runs under local_irq_disable. However it may be soft-masked in
+ * powerpc-specific code.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not used by perf interrupts so not required.
+ */
+INT_DEFINE_BEGIN(performance_monitor)
+ IVEC=0xf00
+ IMASK=IRQS_PMI_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+ ICFAR=0
+INT_DEFINE_END(performance_monitor)
+
EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
- INT_HANDLER performance_monitor, 0xf00, ool=1, bitmask=IRQS_PMI_DISABLED, kvm=1
+ GEN_INT_ENTRY performance_monitor, virt=0, ool=1
EXC_REAL_END(performance_monitor, 0xf00, 0x20)
EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
- INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED
+ GEN_INT_ENTRY performance_monitor, virt=1, ool=1
EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
-INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
+EXC_COMMON_BEGIN(performance_monitor_common)
+ GEN_COMMON performance_monitor
+ addi r3,r1,STACK_INT_FRAME_REGS
+ lbz r4,PACAIRQSOFTMASK(r13)
+ cmpdi r4,IRQS_ENABLED
+ bne 1f
+ bl CFUNC(performance_monitor_exception_async)
+ b interrupt_return_srr
+1:
+ bl CFUNC(performance_monitor_exception_nmi)
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r9,0
+ mtmsrd r9,1
+
+ kuap_kernel_restore r9, r10
+ EXCEPTION_RESTORE_REGS hsrr=0
+ RFI_TO_KERNEL
+
+/**
+ * Interrupt 0xf20 - Vector Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a vector (or altivec) instruction with MSR[VEC]=0.
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(altivec_unavailable)
+ IVEC=0xf20
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+ IMSR_R12=1
+INT_DEFINE_END(altivec_unavailable)
EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
- INT_HANDLER altivec_unavailable, 0xf20, ool=1, kvm=1
+ GEN_INT_ENTRY altivec_unavailable, virt=0, ool=1
EXC_REAL_END(altivec_unavailable, 0xf20, 0x20)
EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20)
- INT_HANDLER altivec_unavailable, 0xf20, ool=1, virt=1
+ GEN_INT_ENTRY altivec_unavailable, virt=1, ool=1
EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20)
-INT_KVM_HANDLER altivec_unavailable, 0xf20, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(altivec_unavailable_common)
- INT_COMMON 0xf20, PACA_EXGEN, 1, 1, 0, 0, 0
+ GEN_COMMON altivec_unavailable
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
beq 1f
@@ -1686,35 +2421,44 @@ BEGIN_FTR_SECTION
bne- 2f
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
#endif
- bl load_up_altivec
- b fast_exception_return
+ bl CFUNC(load_up_altivec)
+ b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl altivec_unavailable_tm
- b ret_from_except
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(altivec_unavailable_tm)
+ b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl altivec_unavailable_exception
- b ret_from_except
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(altivec_unavailable_exception)
+ b interrupt_return_srr
+
+/**
+ * Interrupt 0xf40 - VSX Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a VSX instruction with MSR[VSX]=0.
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(vsx_unavailable)
+ IVEC=0xf40
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+ IMSR_R12=1
+INT_DEFINE_END(vsx_unavailable)
EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
- INT_HANDLER vsx_unavailable, 0xf40, ool=1, kvm=1
+ GEN_INT_ENTRY vsx_unavailable, virt=0, ool=1
EXC_REAL_END(vsx_unavailable, 0xf40, 0x20)
EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20)
- INT_HANDLER vsx_unavailable, 0xf40, ool=1, virt=1
+ GEN_INT_ENTRY vsx_unavailable, virt=1, ool=1
EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20)
-INT_KVM_HANDLER vsx_unavailable, 0xf40, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(vsx_unavailable_common)
- INT_COMMON 0xf40, PACA_EXGEN, 1, 1, 0, 0, 0
+ GEN_COMMON vsx_unavailable
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
beq 1f
@@ -1730,40 +2474,73 @@ BEGIN_FTR_SECTION
b load_up_vsx
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl vsx_unavailable_tm
- b ret_from_except
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(vsx_unavailable_tm)
+ b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl vsx_unavailable_exception
- b ret_from_except
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(vsx_unavailable_exception)
+ b interrupt_return_srr
+
+/**
+ * Interrupt 0xf60 - Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can be
+ * resolved by the OS (e.g., FSCR, MSR).
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(facility_unavailable)
+ IVEC=0xf60
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(facility_unavailable)
EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20)
- INT_HANDLER facility_unavailable, 0xf60, ool=1, kvm=1
+ GEN_INT_ENTRY facility_unavailable, virt=0, ool=1
EXC_REAL_END(facility_unavailable, 0xf60, 0x20)
EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
- INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1
+ GEN_INT_ENTRY facility_unavailable, virt=1, ool=1
EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
-INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception)
-
+EXC_COMMON_BEGIN(facility_unavailable_common)
+ GEN_COMMON facility_unavailable
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(facility_unavailable_exception)
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+ b interrupt_return_srr
+
+
+/**
+ * Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can only
+ * be resolved in HV mode (e.g., HFSCR).
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(h_facility_unavailable)
+ IVEC=0xf80
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_facility_unavailable)
EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20)
- INT_HANDLER h_facility_unavailable, 0xf80, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_facility_unavailable, virt=0, ool=1
EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20)
EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
- INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_facility_unavailable, virt=1, ool=1
EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
-INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception)
+EXC_COMMON_BEGIN(h_facility_unavailable_common)
+ GEN_COMMON h_facility_unavailable
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(facility_unavailable_exception)
+ /* XXX Shouldn't be necessary in practice */
+ HANDLER_RESTORE_NVGPRS()
+ b interrupt_return_hsrr
EXC_REAL_NONE(0xfa0, 0x20)
@@ -1779,56 +2556,94 @@ EXC_REAL_NONE(0x1100, 0x100)
EXC_VIRT_NONE(0x5100, 0x100)
#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_system_error)
+ IVEC=0x1200
+ IHSRR=1
+INT_DEFINE_END(cbe_system_error)
+
EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
- INT_HANDLER cbe_system_error, 0x1200, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY cbe_system_error, virt=0
EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
-INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception)
+EXC_COMMON_BEGIN(cbe_system_error_common)
+ GEN_COMMON cbe_system_error
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(cbe_system_error_exception)
+ b interrupt_return_hsrr
+
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
#endif
+/**
+ * Interrupt 0x1300 - Instruction Address Breakpoint Interrupt.
+ * This has been removed from the ISA before 2.01, which is the earliest
+ * 64-bit BookS ISA supported, however the G5 / 970 implements this
+ * interrupt with a non-architected feature available through the support
+ * processor interface.
+ */
+INT_DEFINE_BEGIN(instruction_breakpoint)
+ IVEC=0x1300
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_breakpoint)
EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100)
- INT_HANDLER instruction_breakpoint, 0x1300, kvm=1
+ GEN_INT_ENTRY instruction_breakpoint, virt=0
EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100)
EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
- INT_HANDLER instruction_breakpoint, 0x1300, virt=1
+ GEN_INT_ENTRY instruction_breakpoint, virt=1
EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
-INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1
-EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception)
+EXC_COMMON_BEGIN(instruction_breakpoint_common)
+ GEN_COMMON instruction_breakpoint
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(instruction_breakpoint_exception)
+ b interrupt_return_srr
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
-EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
- INT_HANDLER denorm_exception_hv, 0x1500, early=2, hsrr=EXC_HV
+/**
+ * Interrupt 0x1500 - Soft Patch Interrupt
+ *
+ * Handling:
+ * This is an implementation specific interrupt which can be used for a
+ * range of exceptions.
+ *
+ * This interrupt handler is unique in that it runs the denormal assist
+ * code even for guests (and even in guest context) without going to KVM,
+ * for speed. POWER9 does not raise denorm exceptions, so this special case
+ * could be phased out in future to reduce special cases.
+ */
+INT_DEFINE_BEGIN(denorm_exception)
+ IVEC=0x1500
+ IHSRR=1
+ IBRANCH_TO_COMMON=0
+ IKVM_REAL=1
+INT_DEFINE_END(denorm_exception)
+
+EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100)
+ GEN_INT_ENTRY denorm_exception, virt=0
#ifdef CONFIG_PPC_DENORMALISATION
- mfspr r10,SPRN_HSRR1
- andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
#endif
- KVMTEST denorm_exception_hv, EXC_HV 0x1500
- INT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV, 1
-EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
-
+ GEN_BRANCH_TO_COMMON denorm_exception, virt=0
+EXC_REAL_END(denorm_exception, 0x1500, 0x100)
#ifdef CONFIG_PPC_DENORMALISATION
EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
- INT_HANDLER denorm_exception, 0x1500, 0, 2, 1, EXC_HV, PACA_EXGEN, 1, 0, 0, 0, 0
- mfspr r10,SPRN_HSRR1
- andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ GEN_INT_ENTRY denorm_exception, virt=1
+ andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
- INT_VIRT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV
+ GEN_BRANCH_TO_COMMON denorm_exception, virt=1
EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
#else
EXC_VIRT_NONE(0x5500, 0x100)
#endif
-INT_KVM_HANDLER denorm_exception_hv, 0x1500, EXC_HV, PACA_EXGEN, 0
-
#ifdef CONFIG_PPC_DENORMALISATION
TRAMP_REAL_BEGIN(denorm_assist)
BEGIN_FTR_SECTION
@@ -1885,11 +2700,16 @@ denorm_done:
mtspr SPRN_HSRR0,r11
mtcrf 0x80,r9
ld r9,PACA_EXGEN+EX_R9(r13)
- RESTORE_PPR_PACA(PACA_EXGEN, r10)
+BEGIN_FTR_SECTION
+ ld r10,PACA_EXGEN+EX_PPR(r13)
+ mtspr SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
BEGIN_FTR_SECTION
ld r10,PACA_EXGEN+EX_CFAR(r13)
mtspr SPRN_CFAR,r10
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ li r10,0
+ stb r10,PACAHSRR_VALID(r13)
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
ld r12,PACA_EXGEN+EX_R12(r13)
@@ -1898,43 +2718,76 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
b .
#endif
-EXC_COMMON(denorm_common, 0x1500, unknown_exception)
+EXC_COMMON_BEGIN(denorm_exception_common)
+ GEN_COMMON denorm_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(unknown_exception)
+ b interrupt_return_hsrr
#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_maintenance)
+ IVEC=0x1600
+ IHSRR=1
+INT_DEFINE_END(cbe_maintenance)
+
EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
- INT_HANDLER cbe_maintenance, 0x1600, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY cbe_maintenance, virt=0
EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
-INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception)
+EXC_COMMON_BEGIN(cbe_maintenance_common)
+ GEN_COMMON cbe_maintenance
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(cbe_maintenance_exception)
+ b interrupt_return_hsrr
+
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
#endif
+INT_DEFINE_BEGIN(altivec_assist)
+ IVEC=0x1700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(altivec_assist)
+
EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100)
- INT_HANDLER altivec_assist, 0x1700, kvm=1
+ GEN_INT_ENTRY altivec_assist, virt=0
EXC_REAL_END(altivec_assist, 0x1700, 0x100)
EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
- INT_HANDLER altivec_assist, 0x1700, virt=1
+ GEN_INT_ENTRY altivec_assist, virt=1
EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
-INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(altivec_assist_common)
+ GEN_COMMON altivec_assist
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_ALTIVEC
-EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception)
+ bl CFUNC(altivec_assist_exception)
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
#else
-EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception)
+ bl CFUNC(unknown_exception)
#endif
+ b interrupt_return_srr
#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_thermal)
+ IVEC=0x1800
+ IHSRR=1
+INT_DEFINE_END(cbe_thermal)
+
EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
- INT_HANDLER cbe_thermal, 0x1800, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY cbe_thermal, virt=0
EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
-INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception)
+EXC_COMMON_BEGIN(cbe_thermal_common)
+ GEN_COMMON cbe_thermal
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(cbe_thermal_exception)
+ b interrupt_return_hsrr
+
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
@@ -1943,14 +2796,11 @@ EXC_VIRT_NONE(0x5800, 0x100)
#ifdef CONFIG_PPC_WATCHDOG
-#define MASKED_DEC_HANDLER_LABEL 3f
-
-#define MASKED_DEC_HANDLER(_H) \
-3: /* soft-nmi */ \
- std r12,PACA_EXGEN+EX_R12(r13); \
- GET_SCRATCH0(r10); \
- std r10,PACA_EXGEN+EX_R13(r13); \
- INT_SAVE_SRR_AND_JUMP soft_nmi_common, _H, 1
+INT_DEFINE_BEGIN(soft_nmi)
+ IVEC=0x900
+ ISTACK=0
+ ICFAR=0
+INT_DEFINE_END(soft_nmi)
/*
* Branch to soft_nmi_interrupt using the emergency stack. The emergency
@@ -1965,20 +2815,25 @@ EXC_COMMON_BEGIN(soft_nmi_common)
mr r10,r1
ld r1,PACAEMERGSP(r13)
subi r1,r1,INT_FRAME_SIZE
- INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl soft_nmi_interrupt
- b ret_from_except
-
-#else /* CONFIG_PPC_WATCHDOG */
-#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
-#define MASKED_DEC_HANDLER(_H)
+ __GEN_COMMON_BODY soft_nmi
+
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(soft_nmi_interrupt)
+
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r9,0
+ mtmsrd r9,1
+
+ kuap_kernel_restore r9, r10
+
+ EXCEPTION_RESTORE_REGS hsrr=0
+ RFI_TO_KERNEL
+
#endif /* CONFIG_PPC_WATCHDOG */
/*
* An interrupt came in while soft-disabled. We set paca->irq_happened, then:
- * - If it was a decrementer interrupt, we bump the dec to max and and return.
+ * - If it was a decrementer interrupt, we bump the dec to max and return.
* - If it was a doorbell we return immediately since doorbells are edge
* triggered and won't automatically refire.
* - If it was a HMI we return immediately since we handled it in realmode
@@ -1986,49 +2841,89 @@ EXC_COMMON_BEGIN(soft_nmi_common)
* - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
* This is called with r10 containing the value to OR to the paca field.
*/
-.macro MASKED_INTERRUPT hsrr
+.macro MASKED_INTERRUPT hsrr=0
.if \hsrr
masked_Hinterrupt:
.else
masked_interrupt:
.endif
- std r11,PACA_EXGEN+EX_R11(r13)
- lbz r11,PACAIRQHAPPENED(r13)
- or r11,r11,r10
- stb r11,PACAIRQHAPPENED(r13)
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ /*
+ * Ensure there was no previous MUST_HARD_MASK interrupt or
+ * HARD_DIS setting. If this does fire, the interrupt is still
+ * masked and MSR[EE] will be cleared on return, so no need to
+ * panic, but somebody probably enabled MSR[EE] under
+ * PACA_IRQ_HARD_DIS, mtmsr(mfmsr() | MSR_x) being a common
+ * cause.
+ */
+ lbz r9,PACAIRQHAPPENED(r13)
+ andi. r9,r9,(PACA_IRQ_MUST_HARD_MASK|PACA_IRQ_HARD_DIS)
+0: tdnei r9,0
+ EMIT_WARN_ENTRY 0b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+#endif
+ lbz r9,PACAIRQHAPPENED(r13)
+ or r9,r9,r10
+ stb r9,PACAIRQHAPPENED(r13)
+
+ .if ! \hsrr
cmpwi r10,PACA_IRQ_DEC
bne 1f
- lis r10,0x7fff
- ori r10,r10,0xffff
- mtspr SPRN_DEC,r10
- b MASKED_DEC_HANDLER_LABEL
+ LOAD_REG_IMMEDIATE(r9, 0x7fffffff)
+ mtspr SPRN_DEC,r9
+#ifdef CONFIG_PPC_WATCHDOG
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ b soft_nmi_common
+#else
+ b 2f
+#endif
+ .endif
+
1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK
beq 2f
+ xori r12,r12,MSR_EE /* clear MSR_EE */
.if \hsrr
- mfspr r10,SPRN_HSRR1
- xori r10,r10,MSR_EE /* clear MSR_EE */
- mtspr SPRN_HSRR1,r10
+ mtspr SPRN_HSRR1,r12
.else
- mfspr r10,SPRN_SRR1
- xori r10,r10,MSR_EE /* clear MSR_EE */
- mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR1,r12
.endif
- ori r11,r11,PACA_IRQ_HARD_DIS
- stb r11,PACAIRQHAPPENED(r13)
+ ori r9,r9,PACA_IRQ_HARD_DIS
+ stb r9,PACAIRQHAPPENED(r13)
2: /* done */
+ li r9,0
+ .if \hsrr
+ stb r9,PACAHSRR_VALID(r13)
+ .else
+ stb r9,PACASRR_VALID(r13)
+ .endif
+
+ SEARCH_RESTART_TABLE
+ cmpdi r12,0
+ beq 3f
+ .if \hsrr
+ mtspr SPRN_HSRR0,r12
+ .else
+ mtspr SPRN_SRR0,r12
+ .endif
+3:
+
+ ld r9,PACA_EXGEN+EX_CTR(r13)
+ mtctr r9
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
mtcrf 0x80,r9
std r1,PACAR1(r13)
ld r9,PACA_EXGEN+EX_R9(r13)
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
- /* returns to kernel where r13 must be set up, so don't restore it */
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ /* May return to masked low address where r13 is not set up */
.if \hsrr
HRFI_TO_KERNEL
.else
RFI_TO_KERNEL
.endif
b .
- MASKED_DEC_HANDLER(\hsrr\())
.endm
TRAMP_REAL_BEGIN(stf_barrier_fallback)
@@ -2044,15 +2939,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback)
.endr
blr
-TRAMP_REAL_BEGIN(rfi_flush_fallback)
- SET_SCRATCH0(r13);
- GET_PACA(r13);
- std r1,PACA_EXRFI+EX_R12(r13)
- ld r1,PACAKSAVE(r13)
- std r9,PACA_EXRFI+EX_R9(r13)
- std r10,PACA_EXRFI+EX_R10(r13)
- std r11,PACA_EXRFI+EX_R11(r13)
- mfctr r9
+/* Clobbers r10, r11, ctr */
+.macro L1D_DISPLACEMENT_FLUSH
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
@@ -2063,7 +2951,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
sync
/*
- * The load adresses are at staggered offsets within cachelines,
+ * The load addresses are at staggered offsets within cachelines,
* which suits some pipelines better (on others it should not
* hurt).
*/
@@ -2078,7 +2966,49 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
ld r11,(0x80 + 8)*7(r10)
addi r10,r10,0x80*8
bdnz 1b
+.endm
+TRAMP_REAL_BEGIN(entry_flush_fallback)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ L1D_DISPLACEMENT_FLUSH
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ blr
+
+/*
+ * The SCV entry flush happens with interrupts enabled, so it must disable
+ * to prevent EXRFI being clobbered by NMIs (e.g., soft_nmi_common). r10
+ * (containing LR) does not need to be preserved here because scv entry
+ * puts 0 in the pt_regs, CTR can be clobbered for the same reason.
+ */
+TRAMP_REAL_BEGIN(scv_entry_flush_fallback)
+ li r10,0
+ mtmsrd r10,1
+ lbz r10,PACAIRQHAPPENED(r13)
+ ori r10,r10,PACA_IRQ_HARD_DIS
+ stb r10,PACAIRQHAPPENED(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ L1D_DISPLACEMENT_FLUSH
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ li r10,MSR_RI
+ mtmsrd r10,1
+ blr
+
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r1,PACA_EXRFI+EX_R12(r13)
+ ld r1,PACAKSAVE(r13)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ L1D_DISPLACEMENT_FLUSH
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
@@ -2096,6 +3026,22 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
mfctr r9
+ L1D_DISPLACEMENT_FLUSH
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ ld r1,PACA_EXRFI+EX_R12(r13)
+ GET_SCRATCH0(r13);
+ hrfid
+
+TRAMP_REAL_BEGIN(rfscv_flush_fallback)
+ /* system call volatile */
+ mr r7,r13
+ GET_PACA(r13);
+ mr r8,r1
+ ld r1,PACAKSAVE(r13)
+ mfctr r9
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
@@ -2123,79 +3069,45 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
bdnz 1b
mtctr r9
- ld r9,PACA_EXRFI+EX_R9(r13)
- ld r10,PACA_EXRFI+EX_R10(r13)
- ld r11,PACA_EXRFI+EX_R11(r13)
- ld r1,PACA_EXRFI+EX_R12(r13)
- GET_SCRATCH0(r13);
- hrfid
+ li r9,0
+ li r10,0
+ li r11,0
+ mr r1,r8
+ mr r13,r7
+ RFSCV
-/*
- * Real mode exceptions actually use this too, but alternate
- * instruction code patches (which end up in the common .text area)
- * cannot reach these if they are put there.
- */
-USE_FIXED_SECTION(virt_trampolines)
- MASKED_INTERRUPT EXC_STD
- MASKED_INTERRUPT EXC_HV
+USE_TEXT_SECTION()
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
+kvm_interrupt:
/*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
+ * The conditional branch in KVMTEST can't reach all the way,
+ * make a stub.
*/
- mfspr r13, SPRN_SRR0
- addi r13, r13, 4
- mtspr SPRN_SRR0, r13
- GET_SCRATCH0(r13)
- RFI_TO_KERNEL
- b .
-
-TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
- /*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
- */
- mfspr r13, SPRN_HSRR0
- addi r13, r13, 4
- mtspr SPRN_HSRR0, r13
- GET_SCRATCH0(r13)
- HRFI_TO_KERNEL
- b .
+ b kvmppc_interrupt
#endif
-/*
- * Ensure that any handlers that get invoked from the exception prologs
- * above are below the first 64KB (0x10000) of the kernel image because
- * the prologs assemble the addresses of these handlers using the
- * LOAD_HANDLER macro, which uses an ori instruction.
- */
-
-/*** Common interrupt handlers ***/
-
-
- /*
- * Relocation-on interrupts: A subset of the interrupts can be delivered
- * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
- * it. Addresses are the same as the original interrupt addresses, but
- * offset by 0xc000000000004000.
- * It's impossible to receive interrupts below 0x300 via this mechanism.
- * KVM: None of these traps are from the guest ; anything that escalated
- * to HV=1 from HV=0 is delivered via real mode handlers.
- */
+_GLOBAL(do_uaccess_flush)
+ UACCESS_FLUSH_FIXUP_SECTION
+ nop
+ nop
+ nop
+ blr
+ L1D_DISPLACEMENT_FLUSH
+ blr
+_ASM_NOKPROBE_SYMBOL(do_uaccess_flush)
+EXPORT_SYMBOL(do_uaccess_flush)
- /*
- * This uses the standard macro, since the original 0x300 vector
- * only has extra guff for STAB-based processors -- which never
- * come here.
- */
-EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline)
- b __ppc64_runlatch_on
+MASKED_INTERRUPT
+MASKED_INTERRUPT hsrr=1
USE_FIXED_SECTION(virt_trampolines)
/*
+ * All code below __end_soft_masked is treated as soft-masked. If
+ * any code runs here with MSR[EE]=1, it must then cope with pending
+ * soft interrupt being raised (i.e., by ensuring it is replayed).
+ *
* The __end_interrupts marker must be past the out-of-line (OOL)
* handlers, so that they are copied to real address 0x100 when running
* a relocatable kernel. This ensures they can be reached from the short
@@ -2205,16 +3117,7 @@ USE_FIXED_SECTION(virt_trampolines)
.align 7
.globl __end_interrupts
__end_interrupts:
-DEFINE_FIXED_SYMBOL(__end_interrupts)
-
-#ifdef CONFIG_PPC_970_NAP
-EXC_COMMON_BEGIN(power4_fixup_nap)
- andc r9,r9,r10
- std r9,TI_LOCAL_FLAGS(r11)
- ld r10,_LINK(r1) /* make idle task do the */
- std r10,_NIP(r1) /* equivalent of a blr */
- blr
-#endif
+DEFINE_FIXED_SYMBOL(__end_interrupts, virt_trampolines)
CLOSE_FIXED_SECTION(real_vectors);
CLOSE_FIXED_SECTION(real_trampolines);
@@ -2224,7 +3127,7 @@ CLOSE_FIXED_SECTION(virt_trampolines);
USE_TEXT_SECTION()
/* MSR[RI] should be clear because this uses SRR[01] */
-enable_machine_check:
+_GLOBAL(enable_machine_check)
mflr r0
bcl 20,31,$+4
0: mflr r3
@@ -2238,7 +3141,7 @@ enable_machine_check:
blr
/* MSR[RI] should be clear because this uses SRR[01] */
-disable_machine_check:
+SYM_FUNC_START_LOCAL(disable_machine_check)
mflr r0
bcl 20,31,$+4
0: mflr r3
@@ -2251,161 +3154,4 @@ disable_machine_check:
RFI_TO_KERNEL
1: mtlr r0
blr
-
-/*
- * Hash table stuff
- */
- .balign IFETCH_ALIGN_BYTES
-do_hash_page:
-#ifdef CONFIG_PPC_BOOK3S_64
- lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
- ori r0,r0,DSISR_BAD_FAULT_64S@l
- and. r0,r5,r0 /* weird error? */
- bne- handle_page_fault /* if not, try to insert a HPTE */
- ld r11, PACA_THREAD_INFO(r13)
- lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
- andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
- bne 77f /* then don't call hash_page now */
-
- /*
- * r3 contains the trap number
- * r4 contains the faulting address
- * r5 contains dsisr
- * r6 msr
- *
- * at return r3 = 0 for success, 1 for page fault, negative for error
- */
- bl __hash_page /* build HPTE if possible */
- cmpdi r3,0 /* see if __hash_page succeeded */
-
- /* Success */
- beq fast_exc_return_irq /* Return from exception on success */
-
- /* Error */
- blt- 13f
-
- /* Reload DAR/DSISR into r4/r5 for the DABR check below */
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-/* Here we have a page fault that hash_page can't handle. */
-handle_page_fault:
-11: andis. r0,r5,DSISR_DABRMATCH@h
- bne- handle_dabr_fault
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_page_fault
- cmpdi r3,0
- beq+ ret_from_except_lite
- bl save_nvgprs
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- ld r4,_DAR(r1)
- bl bad_page_fault
- b ret_from_except
-
-/* We have a data breakpoint exception - handle it */
-handle_dabr_fault:
- bl save_nvgprs
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_break
- /*
- * do_break() may have changed the NV GPRS while handling a breakpoint.
- * If so, we need to restore them with their updated values. Don't use
- * ret_from_except_lite here.
- */
- b ret_from_except
-
-
-#ifdef CONFIG_PPC_BOOK3S_64
-/* We have a page fault that hash_page could handle but HV refused
- * the PTE insertion
- */
-13: bl save_nvgprs
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- ld r4,_DAR(r1)
- bl low_hash_fault
- b ret_from_except
-#endif
-
-/*
- * We come here as a result of a DSI at a point where we don't want
- * to call hash_page, such as when we are accessing memory (possibly
- * user memory) inside a PMU interrupt that occurred while interrupts
- * were soft-disabled. We want to invoke the exception handler for
- * the access, or panic if there isn't a handler.
- */
-77: bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- li r5,SIGSEGV
- bl bad_page_fault
- b ret_from_except
-
-/*
- * When doorbell is triggered from system reset wakeup, the message is
- * not cleared, so it would fire again when EE is enabled.
- *
- * When coming from local_irq_enable, there may be the same problem if
- * we were hard disabled.
- *
- * Execute msgclr to clear pending exceptions before handling it.
- */
-h_doorbell_common_msgclr:
- LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
- PPC_MSGCLR(3)
- b h_doorbell_common
-
-doorbell_super_common_msgclr:
- LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
- PPC_MSGCLRP(3)
- b doorbell_super_common
-
-/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
- * which kind of interrupt. MSR:EE is already off. We generate a
- * stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
- *
- * Note that we don't specify LR as the NIP (return address) for
- * the interrupt because that would unbalance the return branch
- * predictor.
- */
-_GLOBAL(__replay_interrupt)
- /* We are going to jump to the exception common code which
- * will retrieve various register values from the PACA which
- * we don't give a damn about, so we don't bother storing them.
- */
- mfmsr r12
- LOAD_REG_ADDR(r11, replay_interrupt_return)
- mfcr r9
- ori r12,r12,MSR_EE
- cmpwi r3,0x900
- beq decrementer_common
- cmpwi r3,0x500
-BEGIN_FTR_SECTION
- beq h_virt_irq_common
-FTR_SECTION_ELSE
- beq hardware_interrupt_common
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
- cmpwi r3,0xf00
- beq performance_monitor_common
-BEGIN_FTR_SECTION
- cmpwi r3,0xa00
- beq h_doorbell_common_msgclr
- cmpwi r3,0xe60
- beq hmi_exception_common
-FTR_SECTION_ELSE
- cmpwi r3,0xa00
- beq doorbell_super_common_msgclr
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-replay_interrupt_return:
- blr
-
-_ASM_NOKPROBE_SYMBOL(__replay_interrupt)
+SYM_FUNC_END(disable_machine_check)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index ff0114aeba9b..d14eda1e8589 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -24,22 +24,45 @@
#include <linux/slab.h>
#include <linux/cma.h>
#include <linux/hugetlb.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
-#include <asm/debugfs.h>
#include <asm/page.h>
-#include <asm/prom.h>
#include <asm/fadump.h>
#include <asm/fadump-internal.h>
#include <asm/setup.h>
+#include <asm/interrupt.h>
+
+/*
+ * The CPU who acquired the lock to trigger the fadump crash should
+ * wait for other CPUs to enter.
+ *
+ * The timeout is in milliseconds.
+ */
+#define CRASH_TIMEOUT 500
static struct fw_dump fw_dump;
static void __init fadump_reserve_crash_area(u64 base);
#ifndef CONFIG_PRESERVE_FA_DUMP
+
+static struct kobject *fadump_kobj;
+
+static atomic_t cpus_in_fadump;
static DEFINE_MUTEX(fadump_mutex);
-struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
-struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 };
+
+static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false };
+
+#define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */
+#define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \
+ sizeof(struct fadump_memory_range))
+static struct fadump_memory_range rngs[RESERVED_RNGS_CNT];
+static struct fadump_mrange_info
+reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true };
+
+static void __init early_init_dt_scan_reserved_ranges(unsigned long node);
#ifdef CONFIG_CMA
static struct cma *fadump_cma;
@@ -51,13 +74,13 @@ static struct cma *fadump_cma;
* The total size of fadump reserved memory covers for boot memory size
* + cpu data size + hpte size and metadata.
* Initialize only the area equivalent to boot memory size for CMA use.
- * The reamining portion of fadump reserved memory will be not given
- * to CMA and pages for thoes will stay reserved. boot memory size is
+ * The remaining portion of fadump reserved memory will be not given
+ * to CMA and pages for those will stay reserved. boot memory size is
* aligned per CMA requirement to satisy cma_init_reserved_mem() call.
* But for some reason even if it fails we still have the memory reservation
* with us and we can still continue doing fadump.
*/
-int __init fadump_cma_init(void)
+static int __init fadump_cma_init(void)
{
unsigned long long base, size;
int rc;
@@ -91,6 +114,12 @@ int __init fadump_cma_init(void)
}
/*
+ * If CMA activation fails, keep the pages reserved, instead of
+ * exposing them to buddy allocator. Same as 'fadump=nocma' case.
+ */
+ cma_reserve_pages_on_error(fadump_cma);
+
+ /*
* So we now have successfully initialized cma area for fadump.
*/
pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
@@ -108,6 +137,11 @@ static int __init fadump_cma_init(void) { return 1; }
int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
int depth, void *data)
{
+ if (depth == 0) {
+ early_init_dt_scan_reserved_ranges(node);
+ return 0;
+ }
+
if (depth != 1)
return 0;
@@ -164,13 +198,13 @@ int is_fadump_active(void)
*/
static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
{
- struct memblock_region *reg;
+ phys_addr_t reg_start, reg_end;
bool ret = false;
- u64 start, end;
+ u64 i, start, end;
- for_each_memblock(memory, reg) {
- start = max_t(u64, d_start, reg->base);
- end = min_t(u64, d_end, (reg->base + reg->size));
+ for_each_mem_range(i, &reg_start, &reg_end) {
+ start = max_t(u64, d_start, reg_start);
+ end = min_t(u64, d_end, reg_end);
if (d_start < end) {
/* Memory hole from d_start to start */
if (start > d_start)
@@ -224,7 +258,7 @@ bool is_fadump_reserved_mem_contiguous(void)
}
/* Print firmware assisted dump configurations for debugging purpose. */
-static void fadump_show_config(void)
+static void __init fadump_show_config(void)
{
int i;
@@ -265,7 +299,7 @@ static void fadump_show_config(void)
* that is required for a kernel to boot successfully.
*
*/
-static inline u64 fadump_calculate_reserve_size(void)
+static __init u64 fadump_calculate_reserve_size(void)
{
u64 base, size, bootmem_min;
int ret;
@@ -279,7 +313,7 @@ static inline u64 fadump_calculate_reserve_size(void)
* memory at a predefined offset.
*/
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
- &size, &base);
+ &size, &base, NULL, NULL);
if (ret == 0 && size > 0) {
unsigned long max_size;
@@ -326,12 +360,17 @@ static inline u64 fadump_calculate_reserve_size(void)
* Calculate the total memory size required to be reserved for
* firmware-assisted dump registration.
*/
-static unsigned long get_fadump_area_size(void)
+static unsigned long __init get_fadump_area_size(void)
{
unsigned long size = 0;
size += fw_dump.cpu_state_data_size;
size += fw_dump.hpte_region_size;
+ /*
+ * Account for pagesize alignment of boot memory area destination address.
+ * This faciliates in mmap reading of first kernel's memory.
+ */
+ size = PAGE_ALIGN(size);
size += fw_dump.boot_memory_size;
size += sizeof(struct fadump_crash_info_header);
size += sizeof(struct elfhdr); /* ELF core header.*/
@@ -395,44 +434,106 @@ static int __init add_boot_mem_regions(unsigned long mstart,
static int __init fadump_get_boot_mem_regions(void)
{
- unsigned long base, size, cur_size, hole_size, last_end;
+ unsigned long size, cur_size, hole_size, last_end;
unsigned long mem_size = fw_dump.boot_memory_size;
- struct memblock_region *reg;
+ phys_addr_t reg_start, reg_end;
int ret = 1;
+ u64 i;
fw_dump.boot_mem_regs_cnt = 0;
last_end = 0;
hole_size = 0;
cur_size = 0;
- for_each_memblock(memory, reg) {
- base = reg->base;
- size = reg->size;
- hole_size += (base - last_end);
+ for_each_mem_range(i, &reg_start, &reg_end) {
+ size = reg_end - reg_start;
+ hole_size += (reg_start - last_end);
if ((cur_size + size) >= mem_size) {
size = (mem_size - cur_size);
- ret = add_boot_mem_regions(base, size);
+ ret = add_boot_mem_regions(reg_start, size);
break;
}
mem_size -= size;
cur_size += size;
- ret = add_boot_mem_regions(base, size);
+ ret = add_boot_mem_regions(reg_start, size);
if (!ret)
break;
- last_end = base + size;
+ last_end = reg_end;
}
fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
return ret;
}
+/*
+ * Returns true, if the given range overlaps with reserved memory ranges
+ * starting at idx. Also, updates idx to index of overlapping memory range
+ * with the given memory range.
+ * False, otherwise.
+ */
+static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx)
+{
+ bool ret = false;
+ int i;
+
+ for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) {
+ u64 rbase = reserved_mrange_info.mem_ranges[i].base;
+ u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size;
+
+ if (end <= rbase)
+ break;
+
+ if ((end > rbase) && (base < rend)) {
+ *idx = i;
+ ret = true;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Locate a suitable memory area to reserve memory for FADump. While at it,
+ * lookup reserved-ranges & avoid overlap with them, as they are used by F/W.
+ */
+static u64 __init fadump_locate_reserve_mem(u64 base, u64 size)
+{
+ struct fadump_memory_range *mrngs;
+ phys_addr_t mstart, mend;
+ int idx = 0;
+ u64 i, ret = 0;
+
+ mrngs = reserved_mrange_info.mem_ranges;
+ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+ &mstart, &mend, NULL) {
+ pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n",
+ i, mstart, mend, base);
+
+ if (mstart > base)
+ base = PAGE_ALIGN(mstart);
+
+ while ((mend > base) && ((mend - base) >= size)) {
+ if (!overlaps_reserved_ranges(base, base+size, &idx)) {
+ ret = base;
+ goto out;
+ }
+
+ base = mrngs[idx].base + mrngs[idx].size;
+ base = PAGE_ALIGN(base);
+ }
+ }
+
+out:
+ return ret;
+}
+
int __init fadump_reserve_mem(void)
{
- u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE;
- bool is_memblock_bottom_up = memblock_bottom_up();
+ u64 base, size, mem_boundary, bootmem_min;
int ret = 1;
if (!fw_dump.fadump_enabled)
@@ -453,9 +554,9 @@ int __init fadump_reserve_mem(void)
PAGE_ALIGN(fadump_calculate_reserve_size());
#ifdef CONFIG_CMA
if (!fw_dump.nocma) {
- align = FADUMP_CMA_ALIGNMENT;
fw_dump.boot_memory_size =
- ALIGN(fw_dump.boot_memory_size, align);
+ ALIGN(fw_dump.boot_memory_size,
+ CMA_MIN_ALIGNMENT_BYTES);
}
#endif
@@ -523,13 +624,9 @@ int __init fadump_reserve_mem(void)
* Reserve memory at an offset closer to bottom of the RAM to
* minimize the impact of memory hot-remove operation.
*/
- memblock_set_bottom_up(true);
- base = memblock_find_in_range(base, mem_boundary, size, align);
+ base = fadump_locate_reserve_mem(base, size);
- /* Restore the previous allocation mode */
- memblock_set_bottom_up(is_memblock_bottom_up);
-
- if (!base) {
+ if (!base || (base + size > mem_boundary)) {
pr_err("Failed to find memory chunk for reservation!\n");
goto error_out;
}
@@ -557,6 +654,7 @@ int __init fadump_reserve_mem(void)
return ret;
error_out:
fw_dump.fadump_enabled = 0;
+ fw_dump.reserve_dump_area_size = 0;
return 0;
}
@@ -594,8 +692,11 @@ early_param("fadump_reserve_mem", early_fadump_reserve_mem);
void crash_fadump(struct pt_regs *regs, const char *str)
{
+ unsigned int msecs;
struct fadump_crash_info_header *fdh = NULL;
int old_cpu, this_cpu;
+ /* Do not include first CPU */
+ unsigned int ncpus = num_online_cpus() - 1;
if (!should_fadump_crash())
return;
@@ -611,6 +712,8 @@ void crash_fadump(struct pt_regs *regs, const char *str)
old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
if (old_cpu != -1) {
+ atomic_inc(&cpus_in_fadump);
+
/*
* We can't loop here indefinitely. Wait as long as fadump
* is in force. If we race with fadump un-registration this
@@ -632,12 +735,22 @@ void crash_fadump(struct pt_regs *regs, const char *str)
else
ppc_save_regs(&fdh->regs);
- fdh->online_mask = *cpu_online_mask;
+ fdh->cpu_mask = *cpu_online_mask;
+
+ /*
+ * If we came in via system reset, wait a while for the secondary
+ * CPUs to enter.
+ */
+ if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) {
+ msecs = CRASH_TIMEOUT;
+ while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0))
+ mdelay(1);
+ }
fw_dump.ops->fadump_trigger(fdh, str);
}
-u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
{
struct elf_prstatus prstatus;
@@ -646,18 +759,16 @@ u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
* FIXME: How do i get PID? Do I really need it?
* prstatus.pr_pid = ????
*/
- elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+ elf_core_copy_regs(&prstatus.pr_reg, regs);
buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
&prstatus, sizeof(prstatus));
return buf;
}
-void fadump_update_elfcore_header(char *bufp)
+void __init fadump_update_elfcore_header(char *bufp)
{
- struct elfhdr *elf;
struct elf_phdr *phdr;
- elf = (struct elfhdr *)bufp;
bufp += sizeof(struct elfhdr);
/* First note is a place holder for cpu notes info. */
@@ -672,7 +783,7 @@ void fadump_update_elfcore_header(char *bufp)
return;
}
-static void *fadump_alloc_buffer(unsigned long size)
+static void *__init fadump_alloc_buffer(unsigned long size)
{
unsigned long count, i;
struct page *page;
@@ -694,7 +805,7 @@ static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
}
-s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
+s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus)
{
/* Allocate buffer to hold cpu crash notes. */
fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
@@ -726,10 +837,14 @@ void fadump_free_cpu_notes_buf(void)
static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
{
+ if (mrange_info->is_static) {
+ mrange_info->mem_range_cnt = 0;
+ return;
+ }
+
kfree(mrange_info->mem_ranges);
- mrange_info->mem_ranges = NULL;
- mrange_info->mem_ranges_sz = 0;
- mrange_info->max_mem_ranges = 0;
+ memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0,
+ (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ));
}
/*
@@ -759,7 +874,6 @@ static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
sizeof(struct fadump_memory_range));
return 0;
}
-
static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
u64 base, u64 end)
{
@@ -778,7 +892,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
size = mem_ranges[mrange_info->mem_range_cnt - 1].size;
- if ((start + size) == base)
+ /*
+ * Boot memory area needs separate PT_LOAD segment(s) as it
+ * is moved to a different location at the time of crash.
+ * So, fold only if the region is not boot memory area.
+ */
+ if ((start + size) == base && start >= fw_dump.boot_mem_top)
is_adjacent = true;
}
if (!is_adjacent) {
@@ -786,6 +905,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
int ret;
+ if (mrange_info->is_static) {
+ pr_err("Reached array size limit for %s memory ranges\n",
+ mrange_info->name);
+ return -ENOSPC;
+ }
+
ret = fadump_alloc_mem_ranges(mrange_info);
if (ret)
return ret;
@@ -854,11 +979,14 @@ static int fadump_init_elfcore_header(char *bufp)
elf->e_entry = 0;
elf->e_phoff = sizeof(struct elfhdr);
elf->e_shoff = 0;
-#if defined(_CALL_ELF)
- elf->e_flags = _CALL_ELF;
-#else
- elf->e_flags = 0;
-#endif
+
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+ elf->e_flags = 2;
+ else if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1))
+ elf->e_flags = 1;
+ else
+ elf->e_flags = 0;
+
elf->e_ehsize = sizeof(struct elfhdr);
elf->e_phentsize = sizeof(struct elf_phdr);
elf->e_phnum = 0;
@@ -875,9 +1003,8 @@ static int fadump_init_elfcore_header(char *bufp)
*/
static int fadump_setup_crash_memory_ranges(void)
{
- struct memblock_region *reg;
- u64 start, end;
- int i, ret;
+ u64 i, start, end;
+ int ret;
pr_debug("Setup crash memory ranges.\n");
crash_mrange_info.mem_range_cnt = 0;
@@ -895,10 +1022,7 @@ static int fadump_setup_crash_memory_ranges(void)
return ret;
}
- for_each_memblock(memory, reg) {
- start = (u64)reg->base;
- end = start + (u64)reg->size;
-
+ for_each_mem_range(i, &start, &end) {
/*
* skip the memory chunk that is already added
* (0 through boot_memory_top).
@@ -1054,6 +1178,11 @@ static unsigned long init_fadump_header(unsigned long addr)
fdh->elfcorehdr_addr = addr;
/* We will set the crashing cpu id in crash_fadump() during crash. */
fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
+ /*
+ * When LPAR is terminated by PYHP, ensure all possible CPUs'
+ * register data is processed while exporting the vmcore.
+ */
+ fdh->cpu_mask = *cpu_possible_mask;
return addr;
}
@@ -1132,14 +1261,17 @@ static void fadump_free_reserved_memory(unsigned long start_pfn,
*/
static void fadump_release_reserved_area(u64 start, u64 end)
{
+ unsigned long reg_spfn, reg_epfn;
u64 tstart, tend, spfn, epfn;
- struct memblock_region *reg;
+ int i;
spfn = PHYS_PFN(start);
epfn = PHYS_PFN(end);
- for_each_memblock(memory, reg) {
- tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
- tend = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
+
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &reg_spfn, &reg_epfn, NULL) {
+ tstart = max_t(u64, spfn, reg_spfn);
+ tend = min_t(u64, epfn, reg_epfn);
+
if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend);
@@ -1158,7 +1290,6 @@ static void fadump_release_reserved_area(u64 start, u64 end)
static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
{
struct fadump_memory_range *mem_ranges;
- struct fadump_memory_range tmp_range;
u64 base, size;
int i, j, idx;
@@ -1173,11 +1304,8 @@ static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
if (mem_ranges[idx].base > mem_ranges[j].base)
idx = j;
}
- if (idx != i) {
- tmp_range = mem_ranges[idx];
- mem_ranges[idx] = mem_ranges[i];
- mem_ranges[i] = tmp_range;
- }
+ if (idx != i)
+ swap(mem_ranges[idx], mem_ranges[i]);
}
/* Merge adjacent reserved ranges */
@@ -1202,20 +1330,19 @@ static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
* Scan reserved-ranges to consider them while reserving/releasing
* memory for FADump.
*/
-static inline int fadump_scan_reserved_mem_ranges(void)
+static void __init early_init_dt_scan_reserved_ranges(unsigned long node)
{
- struct device_node *root;
const __be32 *prop;
int len, ret = -1;
unsigned long i;
- root = of_find_node_by_path("/");
- if (!root)
- return ret;
+ /* reserved-ranges already scanned */
+ if (reserved_mrange_info.mem_range_cnt != 0)
+ return;
- prop = of_get_property(root, "reserved-ranges", &len);
+ prop = of_get_flat_dt_prop(node, "reserved-ranges", &len);
if (!prop)
- return ret;
+ return;
/*
* Each reserved range is an (address,size) pair, 2 cells each,
@@ -1237,7 +1364,8 @@ static inline int fadump_scan_reserved_mem_ranges(void)
}
}
- return ret;
+ /* Compact reserved ranges */
+ sort_and_merge_mem_ranges(&reserved_mrange_info);
}
/*
@@ -1251,32 +1379,21 @@ static void fadump_release_memory(u64 begin, u64 end)
u64 ra_start, ra_end, tstart;
int i, ret;
- fadump_scan_reserved_mem_ranges();
-
ra_start = fw_dump.reserve_dump_area_start;
ra_end = ra_start + fw_dump.reserve_dump_area_size;
/*
- * Add reserved dump area to reserved ranges list
- * and exclude all these ranges while releasing memory.
+ * If reserved ranges array limit is hit, overwrite the last reserved
+ * memory range with reserved dump area to ensure it is excluded from
+ * the memory being released (reused for next FADump registration).
*/
- ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
- if (ret != 0) {
- /*
- * Not enough memory to setup reserved ranges but the system is
- * running shortage of memory. So, release all the memory except
- * Reserved dump area (reused for next fadump registration).
- */
- if (begin < ra_end && end > ra_start) {
- if (begin < ra_start)
- fadump_release_reserved_area(begin, ra_start);
- if (end > ra_end)
- fadump_release_reserved_area(ra_end, end);
- } else
- fadump_release_reserved_area(begin, end);
+ if (reserved_mrange_info.mem_range_cnt ==
+ reserved_mrange_info.max_mem_ranges)
+ reserved_mrange_info.mem_range_cnt--;
+ ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
+ if (ret != 0)
return;
- }
/* Get the reserved ranges list in order first. */
sort_and_merge_mem_ranges(&reserved_mrange_info);
@@ -1323,9 +1440,9 @@ static void fadump_invalidate_release_mem(void)
fw_dump.ops->fadump_init_mem_struct(&fw_dump);
}
-static ssize_t fadump_release_memory_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
+static ssize_t release_mem_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
{
int input = -1;
@@ -1350,23 +1467,40 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj,
return count;
}
-static ssize_t fadump_enabled_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf)
+/* Release the reserved memory and disable the FADump */
+static void __init unregister_fadump(void)
+{
+ fadump_cleanup();
+ fadump_release_memory(fw_dump.reserve_dump_area_start,
+ fw_dump.reserve_dump_area_size);
+ fw_dump.fadump_enabled = 0;
+ kobject_put(fadump_kobj);
+}
+
+static ssize_t enabled_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
}
-static ssize_t fadump_register_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf)
+static ssize_t mem_reserved_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size);
+}
+
+static ssize_t registered_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", fw_dump.dump_registered);
}
-static ssize_t fadump_register_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
+static ssize_t registered_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
{
int ret = 0;
int input = -1;
@@ -1418,45 +1552,82 @@ static int fadump_region_show(struct seq_file *m, void *private)
return 0;
}
-static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
- 0200, NULL,
- fadump_release_memory_store);
-static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
- 0444, fadump_enabled_show,
- NULL);
-static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
- 0644, fadump_register_show,
- fadump_register_store);
+static struct kobj_attribute release_attr = __ATTR_WO(release_mem);
+static struct kobj_attribute enable_attr = __ATTR_RO(enabled);
+static struct kobj_attribute register_attr = __ATTR_RW(registered);
+static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved);
+
+static struct attribute *fadump_attrs[] = {
+ &enable_attr.attr,
+ &register_attr.attr,
+ &mem_reserved_attr.attr,
+ NULL,
+};
+
+ATTRIBUTE_GROUPS(fadump);
DEFINE_SHOW_ATTRIBUTE(fadump_region);
-static void fadump_init_files(void)
+static void __init fadump_init_files(void)
{
- struct dentry *debugfs_file;
int rc = 0;
- rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
- if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_enabled (%d)\n", rc);
+ fadump_kobj = kobject_create_and_add("fadump", kernel_kobj);
+ if (!fadump_kobj) {
+ pr_err("failed to create fadump kobject\n");
+ return;
+ }
+
+ debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL,
+ &fadump_region_fops);
- rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
- if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_registered (%d)\n", rc);
+ if (fw_dump.dump_active) {
+ rc = sysfs_create_file(fadump_kobj, &release_attr.attr);
+ if (rc)
+ pr_err("unable to create release_mem sysfs file (%d)\n",
+ rc);
+ }
- debugfs_file = debugfs_create_file("fadump_region", 0444,
- powerpc_debugfs_root, NULL,
- &fadump_region_fops);
- if (!debugfs_file)
- printk(KERN_ERR "fadump: unable to create debugfs file"
- " fadump_region\n");
+ rc = sysfs_create_groups(fadump_kobj, fadump_groups);
+ if (rc) {
+ pr_err("sysfs group creation failed (%d), unregistering FADump",
+ rc);
+ unregister_fadump();
+ return;
+ }
+
+ /*
+ * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to
+ * create symlink at old location to maintain backward compatibility.
+ *
+ * - fadump_enabled -> fadump/enabled
+ * - fadump_registered -> fadump/registered
+ * - fadump_release_mem -> fadump/release_mem
+ */
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
+ "enabled", "fadump_enabled");
+ if (rc) {
+ pr_err("unable to create fadump_enabled symlink (%d)", rc);
+ return;
+ }
+
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
+ "registered",
+ "fadump_registered");
+ if (rc) {
+ pr_err("unable to create fadump_registered symlink (%d)", rc);
+ sysfs_remove_link(kernel_kobj, "fadump_enabled");
+ return;
+ }
if (fw_dump.dump_active) {
- rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj,
+ fadump_kobj,
+ "release_mem",
+ "fadump_release_mem");
if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_release_mem (%d)\n", rc);
+ pr_err("unable to create fadump_release_mem symlink (%d)",
+ rc);
}
return;
}
@@ -1487,13 +1658,28 @@ int __init setup_fadump(void)
if (fw_dump.ops->fadump_process(&fw_dump) < 0)
fadump_invalidate_release_mem();
}
- /* Initialize the kernel dump memory structure for FAD registration. */
- else if (fw_dump.reserve_dump_area_size)
+ /* Initialize the kernel dump memory structure and register with f/w */
+ else if (fw_dump.reserve_dump_area_size) {
fw_dump.ops->fadump_init_mem_struct(&fw_dump);
+ register_fadump();
+ }
+
+ /*
+ * In case of panic, fadump is triggered via ppc_panic_event()
+ * panic notifier. Setting crash_kexec_post_notifiers to 'true'
+ * lets panic() function take crash friendly path before panic
+ * notifiers are invoked.
+ */
+ crash_kexec_post_notifiers = true;
return 1;
}
-subsys_initcall(setup_fadump);
+/*
+ * Use subsys_initcall_sync() here because there is dependency with
+ * crash_save_vmcoreinfo_init(), which must run first to ensure vmcoreinfo initialization
+ * is done before registering with f/w.
+ */
+subsys_initcall_sync(setup_fadump);
#else /* !CONFIG_PRESERVE_FA_DUMP */
/* Scan the Firmware Assisted dump configuration details. */
@@ -1531,12 +1717,10 @@ int __init fadump_reserve_mem(void)
/* Preserve everything above the base address */
static void __init fadump_reserve_crash_area(u64 base)
{
- struct memblock_region *reg;
- u64 mstart, msize;
+ u64 i, mstart, mend, msize;
- for_each_memblock(memory, reg) {
- mstart = reg->base;
- msize = reg->size;
+ for_each_mem_range(i, &mstart, &mend) {
+ msize = mend - mstart;
if ((mstart + msize) < base)
continue;
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
index cc4a5e3f51f1..8987eee33dc8 100644
--- a/arch/powerpc/kernel/firmware.c
+++ b/arch/powerpc/kernel/firmware.c
@@ -11,8 +11,33 @@
#include <linux/export.h>
#include <linux/cache.h>
+#include <linux/of.h>
#include <asm/firmware.h>
+#include <asm/kvm_guest.h>
+#ifdef CONFIG_PPC64
unsigned long powerpc_firmware_features __read_mostly;
EXPORT_SYMBOL_GPL(powerpc_firmware_features);
+#endif
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
+DEFINE_STATIC_KEY_FALSE(kvm_guest);
+EXPORT_SYMBOL_GPL(kvm_guest);
+
+int __init check_kvm_guest(void)
+{
+ struct device_node *hyper_node;
+
+ hyper_node = of_find_node_by_path("/hypervisor");
+ if (!hyper_node)
+ return 0;
+
+ if (of_device_is_compatible(hyper_node, "linux,kvm"))
+ static_branch_enable(&kvm_guest);
+
+ of_node_put(hyper_node);
+ return 0;
+}
+core_initcall(check_kvm_guest); // before kvm_guest_init()
+#endif
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 0bb991ddd264..2f8f3f93cbb6 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -9,21 +9,29 @@
* Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
*/
+#include <linux/export.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
#ifdef CONFIG_VSX
+#define __REST_1FPVSR(n,c,base) \
+BEGIN_FTR_SECTION \
+ b 2f; \
+END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
+ REST_FPR(n,base); \
+ b 3f; \
+2: REST_VSR(n,c,base); \
+3:
+
#define __REST_32FPVSRS(n,c,base) \
BEGIN_FTR_SECTION \
b 2f; \
@@ -42,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
2: SAVE_32VSRS(n,c,base); \
3:
#else
+#define __REST_1FPVSR(n,b,base) REST_FPR(n, base)
#define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base)
#define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base)
#endif
+#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base)
#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
@@ -68,6 +78,7 @@ _GLOBAL(store_fp_state)
SAVE_32FPVSRS(0, R4, R3)
mffs fr0
stfd fr0,FPSTATE_FPSCR(r3)
+ REST_1FPVSR(0, R4, R3)
blr
EXPORT_SYMBOL(store_fp_state)
@@ -82,18 +93,22 @@ EXPORT_SYMBOL(store_fp_state)
*/
_GLOBAL(load_up_fpu)
mfmsr r5
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ ori r5,r5,MSR_FP|MSR_RI
+#else
ori r5,r5,MSR_FP
+#endif
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
oris r5,r5,MSR_VSX@h
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- SYNC
MTMSRD(r5) /* enable use of fpu now */
isync
/* enable use of FP after return */
#ifdef CONFIG_PPC32
- mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
+ addi r5,r2,THREAD
lwz r4,THREAD_FPEXC_MODE(r5)
ori r9,r9,MSR_FP /* enable FP for current */
or r9,r9,r4
@@ -104,10 +119,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
ori r12,r12,MSR_FP
or r12,r12,r4
std r12,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S_64
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
#endif
- /* Don't care if r4 overflows, this is desired behaviour */
- lbz r4,THREAD_LOAD_FP(r5)
- addi r4,r4,1
+#endif
+ li r4,1
stb r4,THREAD_LOAD_FP(r5)
addi r10,r5,THREAD_FPSTATE
lfd fr0,FPSTATE_FPSCR(r10)
@@ -116,6 +133,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
/* restore registers and return */
/* we haven't used ctr or xer or lr */
blr
+_ASM_NOKPROBE_SYMBOL(load_up_fpu)
/*
* save_fpu(tsk)
@@ -132,19 +150,5 @@ _GLOBAL(save_fpu)
2: SAVE_32FPVSRS(0, R4, R6)
mffs fr0
stfd fr0,FPSTATE_FPSCR(r6)
- blr
-
-/*
- * These are used in the alignment trap handler when emulating
- * single-precision loads and stores.
- */
-
-_GLOBAL(cvt_fd)
- lfs 0,0(r3)
- stfd 0,0(r4)
- blr
-
-_GLOBAL(cvt_df)
- lfd 0,0(r3)
- stfs 0,0(r4)
+ REST_1FPVSR(0, R4, R6)
blr
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 8abc7783dbe5..f8e2911478a7 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -10,135 +10,161 @@
* We assume sprg3 has the physical address of the current
* task's thread_struct.
*/
+.macro EXCEPTION_PROLOG trapno name handle_dar_dsisr=0
+ EXCEPTION_PROLOG_0 handle_dar_dsisr=\handle_dar_dsisr
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2 \trapno \name handle_dar_dsisr=\handle_dar_dsisr
+.endm
-.macro EXCEPTION_PROLOG
+.macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0
mtspr SPRN_SPRG_SCRATCH0,r10
mtspr SPRN_SPRG_SCRATCH1,r11
+ mfspr r10, SPRN_SPRG_THREAD
+ .if \handle_dar_dsisr
+#ifdef CONFIG_40x
+ mfspr r11, SPRN_DEAR
+#else
+ mfspr r11, SPRN_DAR
+#endif
+ stw r11, DAR(r10)
+#ifdef CONFIG_40x
+ mfspr r11, SPRN_ESR
+#else
+ mfspr r11, SPRN_DSISR
+#endif
+ stw r11, DSISR(r10)
+ .endif
+ mfspr r11, SPRN_SRR0
+ stw r11, SRR0(r10)
+ mfspr r11, SPRN_SRR1 /* check whether user or kernel */
+ stw r11, SRR1(r10)
mfcr r10
- EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2
+ andi. r11, r11, MSR_PR
.endm
.macro EXCEPTION_PROLOG_1
- mfspr r11,SPRN_SRR1 /* check whether user or kernel */
- andi. r11,r11,MSR_PR
- tophys(r11,r1) /* use tophys(r1) if kernel */
+ mtspr SPRN_SPRG_SCRATCH2,r1
+ subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */
beq 1f
- mfspr r11,SPRN_SPRG_THREAD
- lwz r11,TASK_STACK-THREAD(r11)
- addi r11,r11,THREAD_SIZE
- tophys(r11,r11)
-1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
+ mfspr r1,SPRN_SPRG_THREAD
+ lwz r1,TASK_STACK-THREAD(r1)
+ addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+1:
+#ifdef CONFIG_VMAP_STACK
+ mtcrf 0x3f, r1
+ bt 32 - THREAD_ALIGN_SHIFT, vmap_stack_overflow
+#endif
.endm
-.macro EXCEPTION_PROLOG_2
+.macro EXCEPTION_PROLOG_2 trapno name handle_dar_dsisr=0
+#ifdef CONFIG_PPC_8xx
+ .if \handle_dar_dsisr
+ li r11, RPN_PATTERN
+ mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */
+ .endif
+#endif
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~MSR_RI) /* re-enable MMU */
+ mtspr SPRN_SRR1, r11
+ lis r11, 1f@h
+ ori r11, r11, 1f@l
+ mtspr SPRN_SRR0, r11
+ mfspr r11, SPRN_SPRG_SCRATCH2
+ rfi
+
+ .text
+\name\()_virt:
+1:
+ stw r11,GPR1(r1)
+ stw r11,0(r1)
+ mr r11, r1
stw r10,_CCR(r11) /* save registers */
stw r12,GPR12(r11)
stw r9,GPR9(r11)
mfspr r10,SPRN_SPRG_SCRATCH0
- stw r10,GPR10(r11)
mfspr r12,SPRN_SPRG_SCRATCH1
+ stw r10,GPR10(r11)
stw r12,GPR11(r11)
mflr r10
stw r10,_LINK(r11)
- mfspr r12,SPRN_SRR0
- mfspr r9,SPRN_SRR1
- stw r1,GPR1(r11)
- stw r1,0(r11)
- tovirt(r1,r11) /* set new kernel sp */
+ mfspr r12, SPRN_SPRG_THREAD
+ tovirt(r12, r12)
+ .if \handle_dar_dsisr
+ lwz r10, DAR(r12)
+ stw r10, _DAR(r11)
+ lwz r10, DSISR(r12)
+ stw r10, _DSISR(r11)
+ .endif
+ lwz r9, SRR1(r12)
+ lwz r12, SRR0(r12)
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#elif defined(CONFIG_PPC_8xx)
+ mtspr SPRN_EID, r2 /* Set MSR_RI */
#else
- li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */
- MTMSRD(r10) /* (except for mach check in rtas) */
+ li r10, MSR_KERNEL /* can take exceptions */
+ mtmsr r10 /* (except for mach check in rtas) */
#endif
- stw r0,GPR0(r11)
- lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
- addi r10,r10,STACK_FRAME_REGS_MARKER@l
- stw r10,8(r11)
- SAVE_4GPRS(3, r11)
- SAVE_2GPRS(7, r11)
+ COMMON_EXCEPTION_PROLOG_END \trapno
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
.endm
-.macro SYSCALL_ENTRY trapno
- mfspr r12,SPRN_SPRG_THREAD
- mfcr r10
- lwz r11,TASK_STACK-THREAD(r12)
- mflr r9
- addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE
- rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
- tophys(r11,r11)
- stw r10,_CCR(r11) /* save registers */
- mfspr r10,SPRN_SRR0
- stw r9,_LINK(r11)
- mfspr r9,SPRN_SRR1
- stw r1,GPR1(r11)
- stw r1,0(r11)
- tovirt(r1,r11) /* set new kernel sp */
- stw r10,_NIP(r11)
-#ifdef CONFIG_40x
- rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
-#else
- LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
- MTMSRD(r10) /* (except for mach check in rtas) */
-#endif
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+ stw r0,GPR0(r1)
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
- stw r2,GPR2(r11)
addi r10,r10,STACK_FRAME_REGS_MARKER@l
- stw r9,_MSR(r11)
- li r2, \trapno + 1
- stw r10,8(r11)
- stw r2,_TRAP(r11)
- SAVE_GPR(0, r11)
- SAVE_4GPRS(3, r11)
- SAVE_2GPRS(7, r11)
- addi r11,r1,STACK_FRAME_OVERHEAD
- addi r2,r12,-THREAD
- stw r11,PT_REGS(r12)
-#if defined(CONFIG_40x)
- /* Check to see if the dbcr0 register is set up to debug. Use the
- internal debug mode bit to do this. */
- lwz r12,THREAD_DBCR0(r12)
- andis. r12,r12,DBCR0_IDM@h
-#endif
- ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
-#if defined(CONFIG_40x)
- beq+ 3f
- /* From user and task is ptraced - load up global dbcr0 */
- li r12,-1 /* clear all pending debug events */
- mtspr SPRN_DBSR,r12
- lis r11,global_dbcr0@ha
- tophys(r11,r11)
- addi r11,r11,global_dbcr0@l
- lwz r12,0(r11)
- mtspr SPRN_DBCR0,r12
- lwz r12,4(r11)
- addi r12,r12,-1
- stw r12,4(r11)
-#endif
+ stw r10,STACK_INT_FRAME_MARKER(r1)
+ li r10, \trapno
+ stw r10,_TRAP(r1)
+ SAVE_GPRS(3, 8, r1)
+ SAVE_NVGPRS(r1)
+ stw r2,GPR2(r1)
+ stw r12,_NIP(r1)
+ stw r9,_MSR(r1)
+ mfctr r10
+ mfspr r2,SPRN_SPRG_THREAD
+ stw r10,_CTR(r1)
+ tovirt(r2, r2)
+ mfspr r10,SPRN_XER
+ addi r2, r2, -THREAD
+ stw r10,_XER(r1)
+ addi r3,r1,STACK_INT_FRAME_REGS
+.endm
-3:
- tovirt(r2, r2) /* set r2 to current */
- lis r11, transfer_to_syscall@h
- ori r11, r11, transfer_to_syscall@l
-#ifdef CONFIG_TRACE_IRQFLAGS
- /*
- * If MSR is changing we need to keep interrupts disabled at this point
- * otherwise we might risk taking an interrupt before we tell lockdep
- * they are enabled.
- */
- LOAD_REG_IMMEDIATE(r10, MSR_KERNEL)
- rlwimi r10, r9, 0, MSR_EE
-#else
- LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
+.macro prepare_transfer_to_handler
+#ifdef CONFIG_PPC_BOOK3S_32
+ andi. r12,r9,MSR_PR
+ bne 777f
+ bl prepare_transfer_to_handler
+#ifdef CONFIG_PPC_KUEP
+ b 778f
+777:
+ bl __kuep_lock
+778:
#endif
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
+777:
#endif
- mtspr SPRN_SRR1,r10
- mtspr SPRN_SRR0,r11
- SYNC
- RFI /* jump to handler, enable MMU */
+.endm
+
+.macro SYSCALL_ENTRY trapno
+ mfspr r9, SPRN_SRR1
+ mfspr r12, SPRN_SRR0
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions */
+ lis r10, 1f@h
+ ori r10, r10, 1f@l
+ mtspr SPRN_SRR1, r11
+ mtspr SPRN_SRR0, r10
+ mfspr r10,SPRN_SPRG_THREAD
+ mr r11, r1
+ lwz r1,TASK_STACK-THREAD(r10)
+ tovirt(r10, r10)
+ addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+ rfi
+1:
+ stw r12,_NIP(r1)
+ mfcr r12
+ rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
+ stw r12,_CCR(r1)
+ b transfer_to_syscall /* jump to handler */
.endm
/*
@@ -154,37 +180,43 @@
*/
#ifdef CONFIG_PPC_BOOK3S
#define START_EXCEPTION(n, label) \
+ __HEAD; \
. = n; \
DO_KVM n; \
label:
#else
#define START_EXCEPTION(n, label) \
+ __HEAD; \
. = n; \
label:
#endif
-#define EXCEPTION(n, label, hdlr, xfer) \
+#define EXCEPTION(n, label, hdlr) \
START_EXCEPTION(n, label) \
- EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- LOAD_REG_IMMEDIATE(r10, msr); \
- bl tfer; \
- .long hdlr; \
- .long ret
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
- ret_from_except)
+ EXCEPTION_PROLOG n label; \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b interrupt_return
+
+.macro vmap_stack_overflow_exception
+ __HEAD
+vmap_stack_overflow:
+#ifdef CONFIG_SMP
+ mfspr r1, SPRN_SPRG_THREAD
+ lwz r1, TASK_CPU - THREAD(r1)
+ slwi r1, r1, 3
+ addis r1, r1, emergency_ctx-PAGE_OFFSET@ha
+#else
+ lis r1, emergency_ctx-PAGE_OFFSET@ha
+#endif
+ lwz r1, emergency_ctx-PAGE_OFFSET@l(r1)
+ addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+ EXCEPTION_PROLOG_2 0 vmap_stack_overflow
+ prepare_transfer_to_handler
+ bl stack_overflow_exception
+ b interrupt_return
+.endm
#endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 585ea1976550..9fc90410b385 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -26,17 +26,18 @@
*/
#include <linux/init.h>
+#include <linux/pgtable.h>
+#include <linux/sizes.h>
+#include <linux/linkage.h>
+
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
-#include <asm/asm-405.h>
#include "head_32.h"
@@ -53,8 +54,8 @@
* This is all going to change RSN when we add bi_recs....... -- Dan
*/
__HEAD
-_ENTRY(_stext);
-_ENTRY(_start);
+_GLOBAL(_stext);
+_GLOBAL(_start);
mr r31,r3 /* save device tree ptr */
@@ -73,7 +74,6 @@ turn_on_mmu:
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr SPRN_SRR0,r0
- SYNC
rfi /* enables MMU */
b . /* prevent prefetch past rfi */
@@ -83,15 +83,19 @@ turn_on_mmu:
*/
. = 0xc0
crit_save:
-_ENTRY(crit_r10)
+_GLOBAL(crit_r10)
+ .space 4
+_GLOBAL(crit_r11)
.space 4
-_ENTRY(crit_r11)
+_GLOBAL(crit_srr0)
.space 4
-_ENTRY(crit_srr0)
+_GLOBAL(crit_srr1)
.space 4
-_ENTRY(crit_srr1)
+_GLOBAL(crit_r1)
.space 4
-_ENTRY(saved_ksp_limit)
+_GLOBAL(crit_dear)
+ .space 4
+_GLOBAL(crit_esr)
.space 4
/*
@@ -102,42 +106,62 @@ _ENTRY(saved_ksp_limit)
* Instead we use a couple of words of memory at low physical addresses.
* This is OK since we don't support SMP on these processors.
*/
-#define CRITICAL_EXCEPTION_PROLOG \
- stw r10,crit_r10@l(0); /* save two registers to work with */\
- stw r11,crit_r11@l(0); \
- mfcr r10; /* save CR in r10 for now */\
- mfspr r11,SPRN_SRR3; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
- lis r11,critirq_ctx@ha; \
- tophys(r11,r11); \
- lwz r11,critirq_ctx@l(r11); \
- beq 1f; \
- /* COMING FROM USER MODE */ \
- mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
-1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\
- tophys(r11,r11); \
- stw r10,_CCR(r11); /* save various registers */\
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\
- stw r12,_DEAR(r11); /* since they may have had stuff */\
- mfspr r9,SPRN_ESR; /* in them at the point where the */\
- stw r9,_ESR(r11); /* exception was taken */\
- mfspr r12,SPRN_SRR2; \
- stw r1,GPR1(r11); \
- mfspr r9,SPRN_SRR3; \
- stw r1,0(r11); \
- tovirt(r1,r11); \
- rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
- addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
- stw r10, 8(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
+.macro CRITICAL_EXCEPTION_PROLOG trapno name
+ stw r10,crit_r10@l(0) /* save two registers to work with */
+ stw r11,crit_r11@l(0)
+ mfspr r10,SPRN_SRR0
+ mfspr r11,SPRN_SRR1
+ stw r10,crit_srr0@l(0)
+ stw r11,crit_srr1@l(0)
+ mfspr r10,SPRN_DEAR
+ mfspr r11,SPRN_ESR
+ stw r10,crit_dear@l(0)
+ stw r11,crit_esr@l(0)
+ mfcr r10 /* save CR in r10 for now */
+ mfspr r11,SPRN_SRR3 /* check whether user or kernel */
+ andi. r11,r11,MSR_PR
+ lis r11,(critirq_ctx-PAGE_OFFSET)@ha
+ lwz r11,(critirq_ctx-PAGE_OFFSET)@l(r11)
+ beq 1f
+ /* COMING FROM USER MODE */
+ mfspr r11,SPRN_SPRG_THREAD /* if from user, start at top of */
+ lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */
+1: stw r1,crit_r1@l(0)
+ addi r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)) /* re-enable MMU */
+ mtspr SPRN_SRR1, r11
+ lis r11, 1f@h
+ ori r11, r11, 1f@l
+ mtspr SPRN_SRR0, r11
+ rfi
+
+ .text
+1:
+\name\()_virt:
+ lwz r11,crit_r1@l(0)
+ stw r11,GPR1(r1)
+ stw r11,0(r1)
+ mr r11,r1
+ stw r10,_CCR(r11) /* save various registers */
+ stw r12,GPR12(r11)
+ stw r9,GPR9(r11)
+ mflr r10
+ stw r10,_LINK(r11)
+ lis r9,PAGE_OFFSET@ha
+ lwz r10,crit_r10@l(r9)
+ lwz r12,crit_r11@l(r9)
+ stw r10,GPR10(r11)
+ stw r12,GPR11(r11)
+ lwz r12,crit_dear@l(r9)
+ lwz r9,crit_esr@l(r9)
+ stw r12,_DEAR(r11) /* since they may have had stuff */
+ stw r9,_ESR(r11) /* exception was taken */
+ mfspr r12,SPRN_SRR2
+ mfspr r9,SPRN_SRR3
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+ COMMON_EXCEPTION_PROLOG_END \trapno + 2
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
+.endm
/*
* State at this point:
@@ -157,10 +181,10 @@ _ENTRY(saved_ksp_limit)
*/
#define CRITICAL_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(n, label); \
- CRITICAL_EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- crit_transfer_to_handler, ret_from_crit_exc)
+ CRITICAL_EXCEPTION_PROLOG n label; \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b ret_from_crit_exc
/*
* 0x0100 - Critical Interrupt Exception
@@ -176,190 +200,71 @@ _ENTRY(saved_ksp_limit)
* 0x0300 - Data Storage Exception
* This happens for just a few reasons. U0 set (but we don't do that),
* or zone protection fault (user violation, write to protected page).
- * If this is just an update of modified status, we do that quickly
- * and exit. Otherwise, we call heavywight functions to do the work.
+ * The other Data TLB exceptions bail out to this point
+ * if they can't resolve the lightweight TLB fault.
*/
START_EXCEPTION(0x0300, DataStorage)
- mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
- mtspr SPRN_SPRG_SCRATCH1, r11
-#ifdef CONFIG_403GCX
- stw r12, 0(r0)
- stw r9, 4(r0)
- mfcr r11
- mfspr r12, SPRN_PID
- stw r11, 8(r0)
- stw r12, 12(r0)
-#else
- mtspr SPRN_SPRG_SCRATCH3, r12
- mtspr SPRN_SPRG_SCRATCH4, r9
- mfcr r11
- mfspr r12, SPRN_PID
- mtspr SPRN_SPRG_SCRATCH6, r11
- mtspr SPRN_SPRG_SCRATCH5, r12
-#endif
-
- /* First, check if it was a zone fault (which means a user
- * tried to access a kernel or read-protected page - always
- * a SEGV). All other faults here must be stores, so no
- * need to check ESR_DST as well. */
- mfspr r10, SPRN_ESR
- andis. r10, r10, ESR_DIZ@h
- bne 2f
-
- mfspr r10, SPRN_DEAR /* Get faulting address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- lis r11, PAGE_OFFSET@h
- cmplw r10, r11
- blt+ 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
- li r9, 0
- mtspr SPRN_PID, r9 /* TLB will have 0 TID */
- b 4f
-
- /* Get the PGD for the current thread.
- */
-3:
- mfspr r11,SPRN_SPRG_THREAD
- lwz r11,PGDIR(r11)
-4:
- tophys(r11, r11)
- rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
- lwz r11, 0(r11) /* Get L1 entry */
- rlwinm. r12, r11, 0, 0, 19 /* Extract L2 (pte) base address */
- beq 2f /* Bail if no table */
-
- rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
- lwz r11, 0(r12) /* Get Linux PTE */
-
- andi. r9, r11, _PAGE_RW /* Is it writeable? */
- beq 2f /* Bail if not */
-
- /* Update 'changed'.
- */
- ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
- stw r11, 0(r12) /* Update Linux page table */
-
- /* Most of the Linux PTE is ready to load into the TLB LO.
- * We set ZSEL, where only the LS-bit determines user access.
- * We set execute, because we don't have the granularity to
- * properly set this at the page level (Linux problem).
- * If shared is set, we cause a zero PID->TID load.
- * Many of these bits are software only. Bits we don't set
- * here we (properly should) assume have the appropriate value.
- */
- li r12, 0x0ce2
- andc r11, r11, r12 /* Make sure 20, 21 are zero */
-
- /* find the TLB index that caused the fault. It has to be here.
- */
- tlbsx r9, 0, r10
-
- tlbwe r11, r9, TLB_DATA /* Load TLB LO */
-
- /* Done...restore registers and get out of here.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG_SCRATCH4
- mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- PPC405_ERR77_SYNC
- rfi /* Should sync shadow TLBs */
- b . /* prevent prefetch past rfi */
-
-2:
- /* The bailout. Restore registers to pre-exception conditions
- * and call the heavyweights to help us out.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG_SCRATCH4
- mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- b DataAccess
+ EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
/*
* 0x0400 - Instruction Storage Exception
* This is caused by a fetch from non-execute or guarded pages.
*/
START_EXCEPTION(0x0400, InstructionAccess)
- EXCEPTION_PROLOG
- mr r4,r12 /* Pass SRR0 as arg2 */
- li r5,0 /* Pass zero as arg3 */
- EXC_XFER_LITE(0x400, handle_page_fault)
+ EXCEPTION_PROLOG 0x400 InstructionAccess
+ li r5,0
+ stw r5, _ESR(r11) /* Zero ESR */
+ stw r12, _DEAR(r11) /* SRR0 as DEAR */
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
/* 0x0500 - External Interrupt Exception */
- EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, HardwareInterrupt, do_IRQ)
/* 0x0600 - Alignment Exception */
START_EXCEPTION(0x0600, Alignment)
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */
- stw r4,_DEAR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x600, alignment_exception)
+ EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl alignment_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* 0x0700 - Program Exception */
START_EXCEPTION(0x0700, ProgramCheck)
- EXCEPTION_PROLOG
- mfspr r4,SPRN_ESR /* Grab the ESR and save it */
- stw r4,_ESR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x700, program_check_exception)
+ EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl program_check_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
- EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0800, Trap_08, unknown_exception)
+ EXCEPTION(0x0900, Trap_09, unknown_exception)
+ EXCEPTION(0x0A00, Trap_0A, unknown_exception)
+ EXCEPTION(0x0B00, Trap_0B, unknown_exception)
/* 0x0C00 - System Call Exception */
START_EXCEPTION(0x0C00, SystemCall)
SYSCALL_ENTRY 0xc00
+/* Trap_0D is commented out to get more space for system call exception */
- EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD)
+/* EXCEPTION(0x0D00, Trap_0D, unknown_exception) */
+ EXCEPTION(0x0E00, Trap_0E, unknown_exception)
+ EXCEPTION(0x0F00, Trap_0F, unknown_exception)
/* 0x1000 - Programmable Interval Timer (PIT) Exception */
- . = 0x1000
+ START_EXCEPTION(0x1000, DecrementerTrap)
b Decrementer
-/* 0x1010 - Fixed Interval Timer (FIT) Exception
-*/
- . = 0x1010
+/* 0x1010 - Fixed Interval Timer (FIT) Exception */
+ START_EXCEPTION(0x1010, FITExceptionTrap)
b FITException
-/* 0x1020 - Watchdog Timer (WDT) Exception
-*/
- . = 0x1020
+/* 0x1020 - Watchdog Timer (WDT) Exception */
+ START_EXCEPTION(0x1020, WDTExceptionTrap)
b WDTException
/* 0x1100 - Data TLB Miss Exception
@@ -368,23 +273,13 @@ _ENTRY(saved_ksp_limit)
* load TLB entries from the page table if they exist.
*/
START_EXCEPTION(0x1100, DTLBMiss)
- mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
- mtspr SPRN_SPRG_SCRATCH1, r11
-#ifdef CONFIG_403GCX
- stw r12, 0(r0)
- stw r9, 4(r0)
- mfcr r11
- mfspr r12, SPRN_PID
- stw r11, 8(r0)
- stw r12, 12(r0)
-#else
+ mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_SCRATCH6, r11
mtspr SPRN_SPRG_SCRATCH3, r12
mtspr SPRN_SPRG_SCRATCH4, r9
- mfcr r11
- mfspr r12, SPRN_PID
- mtspr SPRN_SPRG_SCRATCH6, r11
- mtspr SPRN_SPRG_SCRATCH5, r12
-#endif
+ mfcr r12
+ mfspr r9, SPRN_PID
+ rlwimi r12, r9, 0, 0xff
mfspr r10, SPRN_DEAR /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
@@ -404,31 +299,37 @@ _ENTRY(saved_ksp_limit)
3:
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ rlwinm. r9, r9, 0, 0xff
+ beq 5f /* Kuap fault */
+#endif
4:
tophys(r11, r11)
rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
- lwz r12, 0(r11) /* Get L1 entry */
- andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */
+ lwz r11, 0(r11) /* Get L1 entry */
+ andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */
beq 2f /* Bail if no table */
- rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
- lwz r11, 0(r12) /* Get Linux PTE */
- andi. r9, r11, _PAGE_PRESENT
- beq 5f
+ rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */
+ lwz r11, 0(r11) /* Get Linux PTE */
+ li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ
+ andc. r9, r9, r11 /* Check permission */
+ bne 5f
- ori r11, r11, _PAGE_ACCESSED
- stw r11, 0(r12)
+ rlwinm r9, r11, 1, _PAGE_WRITE /* dirty => w */
+ and r9, r9, r11 /* hwwrite = dirty & w */
+ rlwimi r11, r9, 0, _PAGE_WRITE /* replace w by hwwrite */
/* Create TLB tag. This is the faulting address plus a static
* set of bits. These are size, valid, E, U0.
*/
- li r12, 0x00c0
- rlwimi r10, r12, 0, 20, 31
+ li r9, 0x00c0
+ rlwimi r10, r9, 0, 20, 31
b finish_tlb_load
2: /* Check for possible large-page pmd entry */
- rlwinm. r9, r12, 2, 22, 24
+ rlwinm. r9, r11, 2, 22, 24
beq 5f
/* Create TLB tag. This is the faulting address, plus a static
@@ -436,7 +337,6 @@ _ENTRY(saved_ksp_limit)
*/
ori r9, r9, 0x40
rlwimi r10, r9, 0, 20, 31
- mr r11, r12
b finish_tlb_load
@@ -444,47 +344,26 @@ _ENTRY(saved_ksp_limit)
/* The bailout. Restore registers to pre-exception conditions
* and call the heavyweights to help us out.
*/
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
mtspr SPRN_PID, r12
- mtcr r11
+ mtcrf 0x80, r12
mfspr r9, SPRN_SPRG_SCRATCH4
mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- b DataAccess
+ mfspr r11, SPRN_SPRG_SCRATCH6
+ mfspr r10, SPRN_SPRG_SCRATCH5
+ b DataStorage
/* 0x1200 - Instruction TLB Miss Exception
* Nearly the same as above, except we get our information from different
* registers and bailout to a different point.
*/
START_EXCEPTION(0x1200, ITLBMiss)
- mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
- mtspr SPRN_SPRG_SCRATCH1, r11
-#ifdef CONFIG_403GCX
- stw r12, 0(r0)
- stw r9, 4(r0)
- mfcr r11
- mfspr r12, SPRN_PID
- stw r11, 8(r0)
- stw r12, 12(r0)
-#else
+ mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_SCRATCH6, r11
mtspr SPRN_SPRG_SCRATCH3, r12
mtspr SPRN_SPRG_SCRATCH4, r9
- mfcr r11
- mfspr r12, SPRN_PID
- mtspr SPRN_SPRG_SCRATCH6, r11
- mtspr SPRN_SPRG_SCRATCH5, r12
-#endif
+ mfcr r12
+ mfspr r9, SPRN_PID
+ rlwimi r12, r9, 0, 0xff
mfspr r10, SPRN_SRR0 /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
@@ -504,31 +383,37 @@ _ENTRY(saved_ksp_limit)
3:
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ rlwinm. r9, r9, 0, 0xff
+ beq 5f /* Kuap fault */
+#endif
4:
tophys(r11, r11)
rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
- lwz r12, 0(r11) /* Get L1 entry */
- andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */
+ lwz r11, 0(r11) /* Get L1 entry */
+ andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */
beq 2f /* Bail if no table */
- rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
- lwz r11, 0(r12) /* Get Linux PTE */
- andi. r9, r11, _PAGE_PRESENT
- beq 5f
+ rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */
+ lwz r11, 0(r11) /* Get Linux PTE */
+ li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+ andc. r9, r9, r11 /* Check permission */
+ bne 5f
- ori r11, r11, _PAGE_ACCESSED
- stw r11, 0(r12)
+ rlwinm r9, r11, 1, _PAGE_WRITE /* dirty => w */
+ and r9, r9, r11 /* hwwrite = dirty & w */
+ rlwimi r11, r9, 0, _PAGE_WRITE /* replace w by hwwrite */
/* Create TLB tag. This is the faulting address plus a static
* set of bits. These are size, valid, E, U0.
*/
- li r12, 0x00c0
- rlwimi r10, r12, 0, 20, 31
+ li r9, 0x00c0
+ rlwimi r10, r9, 0, 20, 31
b finish_tlb_load
2: /* Check for possible large-page pmd entry */
- rlwinm. r9, r12, 2, 22, 24
+ rlwinm. r9, r11, 2, 22, 24
beq 5f
/* Create TLB tag. This is the faulting address, plus a static
@@ -536,7 +421,6 @@ _ENTRY(saved_ksp_limit)
*/
ori r9, r9, 0x40
rlwimi r10, r9, 0, 20, 31
- mr r11, r12
b finish_tlb_load
@@ -544,44 +428,27 @@ _ENTRY(saved_ksp_limit)
/* The bailout. Restore registers to pre-exception conditions
* and call the heavyweights to help us out.
*/
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
mtspr SPRN_PID, r12
- mtcr r11
+ mtcrf 0x80, r12
mfspr r9, SPRN_SPRG_SCRATCH4
mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH6
+ mfspr r10, SPRN_SPRG_SCRATCH5
b InstructionAccess
- EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
-#ifdef CONFIG_IBM405_ERR51
- /* 405GP errata 51 */
- START_EXCEPTION(0x1700, Trap_17)
- b DTLBMiss
-#else
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
-#endif
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1300, Trap_13, unknown_exception)
+ EXCEPTION(0x1400, Trap_14, unknown_exception)
+ EXCEPTION(0x1500, Trap_15, unknown_exception)
+ EXCEPTION(0x1600, Trap_16, unknown_exception)
+ EXCEPTION(0x1700, Trap_17, unknown_exception)
+ EXCEPTION(0x1800, Trap_18, unknown_exception)
+ EXCEPTION(0x1900, Trap_19, unknown_exception)
+ EXCEPTION(0x1A00, Trap_1A, unknown_exception)
+ EXCEPTION(0x1B00, Trap_1B, unknown_exception)
+ EXCEPTION(0x1C00, Trap_1C, unknown_exception)
+ EXCEPTION(0x1D00, Trap_1D, unknown_exception)
+ EXCEPTION(0x1E00, Trap_1E, unknown_exception)
+ EXCEPTION(0x1F00, Trap_1F, unknown_exception)
/* Check for a single step debug exception while in an exception
* handler before state has been saved. This is to catch the case
@@ -598,7 +465,7 @@ _ENTRY(saved_ksp_limit)
*/
/* 0x2000 - Debug Exception */
START_EXCEPTION(0x2000, DebugTrap)
- CRITICAL_EXCEPTION_PROLOG
+ CRITICAL_EXCEPTION_PROLOG 0x2000 DebugTrap
/*
* If this is a single step or branch-taken exception in an
@@ -634,49 +501,41 @@ _ENTRY(saved_ksp_limit)
lwz r12,GPR12(r11)
lwz r10,crit_r10@l(0)
lwz r11,crit_r11@l(0)
- PPC405_ERR77_SYNC
rfci
b .
/* continue normal handling for a critical exception... */
2: mfspr r4,SPRN_DBSR
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_TEMPLATE(DebugException, 0x2002, \
- (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- crit_transfer_to_handler, ret_from_crit_exc)
+ stw r4,_ESR(r11) /* DebugException takes DBSR in _ESR */
+ prepare_transfer_to_handler
+ bl DebugException
+ b ret_from_crit_exc
/* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
+ __HEAD
Decrementer:
- EXCEPTION_PROLOG
+ EXCEPTION_PROLOG 0x1000 Decrementer
lis r0,TSR_PIS@h
mtspr SPRN_TSR,r0 /* Clear the PIT exception */
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0x1000, timer_interrupt)
+ prepare_transfer_to_handler
+ bl timer_interrupt
+ b interrupt_return
/* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
+ __HEAD
FITException:
- EXCEPTION_PROLOG
- addi r3,r1,STACK_FRAME_OVERHEAD;
- EXC_XFER_STD(0x1010, unknown_exception)
+ EXCEPTION_PROLOG 0x1010 FITException
+ prepare_transfer_to_handler
+ bl unknown_exception
+ b interrupt_return
/* Watchdog Timer (WDT) Exception. (from 0x1020) */
+ __HEAD
WDTException:
- CRITICAL_EXCEPTION_PROLOG;
- addi r3,r1,STACK_FRAME_OVERHEAD;
- EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
- (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
- crit_transfer_to_handler, ret_from_crit_exc)
-
-/*
- * The other Data TLB exceptions bail out to this point
- * if they can't resolve the lightweight TLB fault.
- */
-DataAccess:
- EXCEPTION_PROLOG
- mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
- stw r5,_ESR(r11)
- mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
- EXC_XFER_LITE(0x300, handle_page_fault)
+ CRITICAL_EXCEPTION_PROLOG 0x1020 WDTException
+ prepare_transfer_to_handler
+ bl WatchdogException
+ b ret_from_crit_exc
/* Other PowerPC processors, namely those derived from the 6xx-series
* have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
@@ -684,12 +543,13 @@ DataAccess:
* reserved.
*/
+ __HEAD
/* Damn, I came up one instruction too many to fit into the
* exception space :-). Both the instruction and data TLB
* miss get to this point to load the TLB.
* r10 - TLB_TAG value
* r11 - Linux PTE
- * r12, r9 - available to use
+ * r9 - available to use
* PID - loaded with proper value when we get here
* Upon exit, we reload everything and RFI.
* Actually, it will fit now, but oh well.....a common place
@@ -698,45 +558,32 @@ DataAccess:
tlb_4xx_index:
.long 0
finish_tlb_load:
- /* load the next available TLB index.
- */
- lwz r9, tlb_4xx_index@l(0)
- addi r9, r9, 1
- andi. r9, r9, (PPC40X_TLB_SIZE-1)
- stw r9, tlb_4xx_index@l(0)
-
-6:
/*
* Clear out the software-only bits in the PTE to generate the
* TLB_DATA value. These are the bottom 2 bits of the RPM, the
- * top 3 bits of the zone field, and M.
+ * 4 bits of the zone field, and M.
*/
- li r12, 0x0ce2
- andc r11, r11, r12
+ li r9, 0x0cf2
+ andc r11, r11, r9
+ rlwimi r11, r10, 8, 24, 27 /* Copy 4 upper address bit into zone */
+
+ /* load the next available TLB index. */
+ lwz r9, tlb_4xx_index@l(0)
+ addi r9, r9, 1
+ andi. r9, r9, PPC40X_TLB_SIZE - 1
+ stw r9, tlb_4xx_index@l(0)
tlbwe r11, r9, TLB_DATA /* Load TLB LO */
tlbwe r10, r9, TLB_TAG /* Load TLB HI */
/* Done...restore registers and get out of here.
*/
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
mtspr SPRN_PID, r12
- mtcr r11
+ mtcrf 0x80, r12
mfspr r9, SPRN_SPRG_SCRATCH4
mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- PPC405_ERR77_SYNC
+ mfspr r11, SPRN_SPRG_SCRATCH6
+ mfspr r10, SPRN_SPRG_SCRATCH5
rfi /* Should sync shadow TLBs */
b . /* prevent prefetch past rfi */
@@ -757,7 +604,7 @@ start_here:
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
bl early_init /* We have to do this with MMU on */
@@ -799,7 +646,7 @@ start_here:
ori r6, r6, swapper_pg_dir@l
lis r5, abatron_pteptrs@h
ori r5, r5, abatron_pteptrs@l
- stw r5, 0xf0(r0) /* Must match your Abatron config file */
+ stw r5, 0xf0(0) /* Must match your Abatron config file */
tophys(r5,r5)
stw r6, 0(r5)
@@ -814,10 +661,10 @@ start_here:
b . /* prevent prefetch past rfi */
/* Set up the initial MMU state so we can do the first level of
- * kernel initialization. This maps the first 16 MBytes of memory 1:1
+ * kernel initialization. This maps the first 32 MBytes of memory 1:1
* virtual to physical and more importantly sets the cache mode.
*/
-initial_mmu:
+SYM_FUNC_START_LOCAL(initial_mmu)
tlbia /* Invalidate all TLB entries */
isync
@@ -851,6 +698,12 @@ initial_mmu:
tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
+ li r0,62 /* TLB slot 62 */
+ addis r4,r4,SZ_16M@h
+ addis r3,r3,SZ_16M@h
+ tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
+ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
+
isync
/* Establish the exception vector base
@@ -860,44 +713,9 @@ initial_mmu:
mtspr SPRN_EVPR,r0
blr
+SYM_FUNC_END(initial_mmu)
_GLOBAL(abort)
mfspr r13,SPRN_DBCR0
oris r13,r13,DBCR0_RST_SYSTEM@h
mtspr SPRN_DBCR0,r13
-
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@ha
- stw r4, abatron_pteptrs@l + 0x4(r5)
-#endif
- sync
- mtspr SPRN_PID,r3
- isync /* Need an isync to flush shadow */
- /* TLBs after changing PID */
- blr
-
-/* We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align 12
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
-EXPORT_SYMBOL(empty_zero_page)
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 51dd01a27314..25642e802ed3 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -25,17 +25,16 @@
*/
#include <linux/init.h>
+#include <linux/pgtable.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/synch.h>
-#include <asm/export.h>
#include <asm/code-patching-asm.h>
#include "head_booke.h"
@@ -52,8 +51,8 @@
*
*/
__HEAD
-_ENTRY(_stext);
-_ENTRY(_start);
+_GLOBAL(_stext);
+_GLOBAL(_start);
/*
* Reserve a word at a fixed location to store the address
* of abatron_pteptrs
@@ -70,7 +69,7 @@ _ENTRY(_start);
* address.
* r21 will be loaded with the physical runtime address of _stext
*/
- bl 0f /* Get our runtime address */
+ bcl 20,31,$+4 /* Get our runtime address */
0: mflr r21 /* Make it accessible */
addis r21,r21,(_stext - 0b)@ha
addi r21,r21,(_stext - 0b)@l /* Get our current runtime base */
@@ -109,7 +108,7 @@ _ENTRY(_start);
lis r1,init_thread_union@h
ori r1,r1,init_thread_union@l
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
bl early_init
@@ -263,8 +262,7 @@ interrupt_base:
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \
- do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, do_IRQ)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
@@ -277,7 +275,7 @@ interrupt_base:
FP_UNAVAILABLE_EXCEPTION
#else
EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
- FloatingPointUnavailable, unknown_exception, EXC_XFER_STD)
+ FloatingPointUnavailable, unknown_exception)
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
@@ -285,15 +283,14 @@ interrupt_base:
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
- AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD)
+ AuxillaryProcessorUnavailable, unknown_exception)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
- unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, unknown_exception)
/* Watchdog Timer Interrupt */
/* TODO: Add watchdog support */
@@ -317,8 +314,8 @@ interrupt_base:
* kernel page tables.
*/
lis r11, PAGE_OFFSET@h
- cmplw r10, r11
- blt+ 3f
+ cmplw cr7, r10, r11
+ blt+ cr7, 3f
lis r11, swapper_pg_dir@h
ori r11, r11, swapper_pg_dir@l
@@ -336,12 +333,16 @@ interrupt_base:
mfspr r12,SPRN_MMUCR
mfspr r13,SPRN_PID /* Get PID */
rlwimi r12,r13,0,24,31 /* Set TID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r13,0
+ beq 2f /* KUAP Fault */
+#endif
4:
mtspr SPRN_MMUCR,r12
/* Mask of required permission bits. Note that while we
- * do copy ESR:ST to _PAGE_RW position as trying to write
+ * do copy ESR:ST to _PAGE_WRITE position as trying to write
* to an RO page is pretty common, we don't do it with
* _PAGE_DIRTY. We could do it, but it's a fairly rare
* event so I'd rather take the overhead when it happens
@@ -354,7 +355,7 @@ interrupt_base:
* place or can we save a couple of instructions here ?
*/
mfspr r12,SPRN_ESR
- li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+ li r13,_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_READ
rlwimi r13,r12,10,30,30
/* Load the PTE */
@@ -376,7 +377,7 @@ interrupt_base:
/* Load the next available TLB index */
lwz r13,tlb_44x_index@l(r10)
- bne 2f /* Bail if permission mismach */
+ bne 2f /* Bail if permission mismatch */
/* Increment, rollover, and store TLB index */
addi r13,r13,1
@@ -427,8 +428,8 @@ interrupt_base:
* kernel page tables.
*/
lis r11, PAGE_OFFSET@h
- cmplw r10, r11
- blt+ 3f
+ cmplw cr7, r10, r11
+ blt+ cr7, 3f
lis r11, swapper_pg_dir@h
ori r11, r11, swapper_pg_dir@l
@@ -446,6 +447,10 @@ interrupt_base:
mfspr r12,SPRN_MMUCR
mfspr r13,SPRN_PID /* Get PID */
rlwimi r12,r13,0,24,31 /* Set TID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r13,0
+ beq 2f /* KUAP Fault */
+#endif
4:
mtspr SPRN_MMUCR,r12
@@ -471,7 +476,7 @@ interrupt_base:
/* Load the next available TLB index */
lwz r13,tlb_44x_index@l(r10)
- bne 2f /* Bail if permission mismach */
+ bne 2f /* Bail if permission mismatch */
/* Increment, rollover, and store TLB index */
addi r13,r13,1
@@ -510,6 +515,7 @@ interrupt_base:
* r11 - PTE high word value
* r12 - PTE low word value
* r13 - TLB index
+ * cr7 - Result of comparison with PAGE_OFFSET
* MMUCR - loaded with proper value when we get here
* Upon exit, we reload everything and RFI.
*/
@@ -528,12 +534,12 @@ finish_tlb_load_44x:
tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */
/* And WS 2 */
- li r10,0xf85 /* Mask to apply from PTE */
- rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */
+ li r10,0xf84 /* Mask to apply from PTE */
+ rlwimi r10,r12,29,30,31 /* DIRTY,READ -> SW,SR position */
and r11,r12,r10 /* Mask PTE bits to keep */
- andi. r10,r12,_PAGE_USER /* User page ? */
- beq 1f /* nope, leave U bits empty */
+ bge cr7,1f /* User page ? no, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+ rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */
1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */
/* Done...restore registers and get out of here.
@@ -562,8 +568,8 @@ finish_tlb_load_44x:
* kernel page tables.
*/
lis r11,PAGE_OFFSET@h
- cmplw cr0,r10,r11
- blt+ 3f
+ cmplw cr7,r10,r11
+ blt+ cr7,3f
lis r11,swapper_pg_dir@h
ori r11,r11, swapper_pg_dir@l
li r12,0 /* MMUCR = 0 */
@@ -573,10 +579,14 @@ finish_tlb_load_44x:
3: mfspr r11,SPRN_SPRG3
lwz r11,PGDIR(r11)
mfspr r12,SPRN_PID /* Get PID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r12,0
+ beq 2f /* KUAP Fault */
+#endif
4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
/* Mask of required permission bits. Note that while we
- * do copy ESR:ST to _PAGE_RW position as trying to write
+ * do copy ESR:ST to _PAGE_WRITE position as trying to write
* to an RO page is pretty common, we don't do it with
* _PAGE_DIRTY. We could do it, but it's a fairly rare
* event so I'd rather take the overhead when it happens
@@ -589,7 +599,7 @@ finish_tlb_load_44x:
* place or can we save a couple of instructions here ?
*/
mfspr r12,SPRN_ESR
- li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+ li r13,_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_READ
rlwimi r13,r12,10,30,30
/* Load the PTE */
@@ -659,8 +669,8 @@ finish_tlb_load_44x:
* kernel page tables.
*/
lis r11,PAGE_OFFSET@h
- cmplw cr0,r10,r11
- blt+ 3f
+ cmplw cr7,r10,r11
+ blt+ cr7,3f
lis r11,swapper_pg_dir@h
ori r11,r11, swapper_pg_dir@l
li r12,0 /* MMUCR = 0 */
@@ -670,6 +680,10 @@ finish_tlb_load_44x:
3: mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
mfspr r12,SPRN_PID /* Get PID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r12,0
+ beq 2f /* KUAP Fault */
+#endif
4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
/* Make up the required permissions */
@@ -730,6 +744,7 @@ finish_tlb_load_44x:
* r11 - PTE high word value
* r12 - PTE low word value
* r13 - free to use
+ * cr7 - Result of comparison with PAGE_OFFSET
* MMUCR - loaded with proper value when we get here
* Upon exit, we reload everything and RFI.
*/
@@ -739,12 +754,12 @@ finish_tlb_load_47x:
tlbwe r11,r13,1
/* And make up word 2 */
- li r10,0xf85 /* Mask to apply from PTE */
- rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */
+ li r10,0xf84 /* Mask to apply from PTE */
+ rlwimi r10,r12,29,30,31 /* DIRTY,READ -> SW,SR position */
and r11,r12,r10 /* Mask PTE bits to keep */
- andi. r10,r12,_PAGE_USER /* User page ? */
- beq 1f /* nope, leave U bits empty */
+ bge cr7,1f /* User page ? no, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+ rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */
1: tlbwe r11,r13,2
/* Done...restore registers and get out of here.
@@ -782,20 +797,6 @@ _GLOBAL(__fixup_440A_mcheck)
sync
blr
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r4, 0x4(r5)
-#endif
- mtspr SPRN_PID,r3
- isync /* Force context change */
- blr
-
/*
* Init CPU state. This is called at boot time or for secondary CPUs
* to setup initial TLB entries, setup IVORs, etc...
@@ -861,7 +862,7 @@ _GLOBAL(init_cpu_state)
wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */
sync
- bl invstr /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
invstr: mflr r5 /* Make it accessible */
tlbsx r23,0,r5 /* Find entry we are in */
li r4,0 /* Start at TLB entry 0 */
@@ -1010,7 +1011,7 @@ _GLOBAL(start_secondary_47x)
*/
lis r1,temp_boot_stack@h
ori r1,r1,temp_boot_stack@l
- addi r1,r1,1024-STACK_FRAME_OVERHEAD
+ addi r1,r1,1024-STACK_FRAME_MIN_SIZE
li r0,0
stw r0,0(r1)
bl mmu_init_secondary
@@ -1023,7 +1024,7 @@ _GLOBAL(start_secondary_47x)
lwz r1,TASK_STACK(r2)
/* Current stack pointer */
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
li r0,0
stw r0,0(r1)
@@ -1053,7 +1054,7 @@ head_start_47x:
sync
/* Find the entry we are running from */
- bl 1f
+ bcl 20,31,$+4
1: mflr r23
tlbsx r23,0,r23
tlbre r24,r23,0
@@ -1241,34 +1242,8 @@ head_start_common:
isync
blr
-/*
- * We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align PAGE_SHIFT
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
-/*
- * To support >32-bit physical addresses, we use an 8KB pgdir.
- */
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/*
- * Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
-
#ifdef CONFIG_SMP
+ .data
.align 12
temp_boot_stack:
.space 1024
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index ad79fddb974d..4690c219bfa4 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -18,6 +18,7 @@
* variants.
*/
+#include <linux/linkage.h>
#include <linux/threads.h>
#include <linux/init.h>
#include <asm/reg.h>
@@ -39,8 +40,12 @@
#include <asm/hw_irq.h>
#include <asm/cputhreads.h>
#include <asm/ppc-opcode.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
/* The physical memory is laid out such that the secondary processor
* spin code sits at 0x0000...0x00ff. On server, the vectors follow
@@ -70,6 +75,13 @@
* 2. The kernel is entered at __start
*/
+/*
+ * boot_from_prom and prom_init run at the physical address. Everything
+ * after prom and kexec entry run at the virtual address (PAGE_OFFSET).
+ * Secondaries run at the virtual address from generic_secondary_common_init
+ * onward.
+ */
+
OPEN_FIXED_SECTION(first_256B, 0x0, 0x100)
USE_FIXED_SECTION(first_256B)
/*
@@ -106,7 +118,7 @@ __secondary_hold_acknowledge:
#ifdef CONFIG_RELOCATABLE
/* This flag is set to 1 by a loader if the kernel should run
* at the loaded address instead of the linked address. This
- * is used by kexec-tools to keep the the kdump kernel in the
+ * is used by kexec-tools to keep the kdump kernel in the
* crash_kernel region. The loader is responsible for
* observing the alignment requirement.
*/
@@ -121,7 +133,7 @@ __secondary_hold_acknowledge:
. = 0x5c
.globl __run_at_load
__run_at_load:
-DEFINE_FIXED_SYMBOL(__run_at_load)
+DEFINE_FIXED_SYMBOL(__run_at_load, first_256B)
.long RUN_AT_LOAD_DEFAULT
#endif
@@ -138,7 +150,7 @@ DEFINE_FIXED_SYMBOL(__run_at_load)
.globl __secondary_hold
__secondary_hold:
FIXUP_ENDIAN
-#ifndef CONFIG_PPC_BOOK3E
+#ifndef CONFIG_PPC_BOOK3E_64
mfmsr r24
ori r24,r24,MSR_RI
mtmsrd r24 /* RI on */
@@ -151,20 +163,16 @@ __secondary_hold:
/* Tell the master cpu we're here */
/* Relocation is off & we are located at an address less */
/* than 0x100, so only need to grab low order offset. */
- std r24,(ABS_ADDR(__secondary_hold_acknowledge))(0)
+ std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0)
sync
- li r26,0
-#ifdef CONFIG_PPC_BOOK3E
- tovirt(r26,r26)
-#endif
/* All secondary cpus wait here until told to start. */
-100: ld r12,(ABS_ADDR(__secondary_hold_spinloop))(r26)
+100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(0)
cmpdi 0,r12,0
beq 100b
#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
tovirt(r12,r12)
#endif
mtctr r12
@@ -173,7 +181,7 @@ __secondary_hold:
* it may be the case that other platforms have r4 right to
* begin with, this gives us some safety in case it is not
*/
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
mr r4,r25
#else
li r4,0
@@ -187,12 +195,6 @@ __secondary_hold:
#endif
CLOSE_FIXED_SECTION(first_256B)
-/* This value is used to mark exception frames on the stack. */
- .section ".toc","aw"
-exception_marker:
- .tc ID_72656773_68657265[TC],0x7265677368657265
- .previous
-
/*
* On server, we include the exception vectors code here as it
* relies on absolute addressing which is only possible within
@@ -206,7 +208,9 @@ OPEN_TEXT_SECTION(0x100)
USE_TEXT_SECTION()
-#ifdef CONFIG_PPC_BOOK3E
+#include "interrupt_64.S"
+
+#ifdef CONFIG_PPC_BOOK3E_64
/*
* The booting_thread_hwid holds the thread id we want to boot in cpu
* hotplug case. It is set by cpu hotplug code, and is invalid by default.
@@ -300,25 +304,20 @@ _GLOBAL(fsl_secondary_thread_init)
rlwimi r3, r3, 30, 2, 30
mtspr SPRN_PIR, r3
1:
-#endif
-
-_GLOBAL(generic_secondary_thread_init)
mr r24,r3
/* turn on 64-bit mode */
bl enable_64b_mode
- /* get a valid TOC pointer, wherever we're mapped at */
- bl relative_toc
- tovirt(r2,r2)
-
-#ifdef CONFIG_PPC_BOOK3E
/* Book3E initialization */
mr r3,r24
bl book3e_secondary_thread_init
-#endif
+ bl relative_toc
+
b generic_secondary_common_init
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
/*
* On pSeries and most other platforms, secondary processors spin
* in the following code.
@@ -330,22 +329,24 @@ _GLOBAL(generic_secondary_thread_init)
*/
_GLOBAL(generic_secondary_smp_init)
FIXUP_ENDIAN
+
+ li r13,0
+
+ /* Poison TOC */
+ li r2,-1
+
mr r24,r3
mr r25,r4
/* turn on 64-bit mode */
bl enable_64b_mode
- /* get a valid TOC pointer, wherever we're mapped at */
- bl relative_toc
- tovirt(r2,r2)
-
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
/* Book3E initialization */
mr r3,r24
mr r4,r25
bl book3e_secondary_core_init
-
+ /* Now NIA and r2 are relocated to PAGE_OFFSET if not already */
/*
* After common core init has finished, check if the current thread is the
* one we wanted to boot. If not, start the specified thread and stop the
@@ -373,8 +374,7 @@ _GLOBAL(generic_secondary_smp_init)
beq 20f
/* start the specified thread */
- LOAD_REG_ADDR(r5, fsl_secondary_thread_init)
- ld r4, 0(r5)
+ LOAD_REG_ADDR(r5, DOTSYM(fsl_secondary_thread_init))
bl book3e_start_thread
/* stop the current thread */
@@ -383,6 +383,16 @@ _GLOBAL(generic_secondary_smp_init)
10:
b 10b
20:
+#else
+ /* Now the MMU is off, can branch to our PAGE_OFFSET address */
+ bcl 20,31,$+4
+1: mflr r11
+ addi r11,r11,(2f - 1b)
+ tovirt(r11, r11)
+ mtctr r11
+ bctr
+2:
+ bl relative_toc
#endif
generic_secondary_common_init:
@@ -395,8 +405,12 @@ generic_secondary_common_init:
#else
LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */
ld r8,0(r8) /* Get base vaddr of array */
+#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
+ LOAD_REG_IMMEDIATE(r7, NR_CPUS)
+#else
LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */
lwz r7,0(r7) /* also the max paca allocated */
+#endif
li r5,0 /* logical cpu id */
1:
sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */
@@ -412,7 +426,7 @@ generic_secondary_common_init:
b kexec_wait /* next kernel might do better */
2: SET_PACA(r13)
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */
mtspr SPRN_SPRG_TLB_EXFRAME,r12
#endif
@@ -420,13 +434,17 @@ generic_secondary_common_init:
/* From now on, r24 is expected to be logical cpuid */
mr r24,r5
+ /* Create a temp kernel stack for use before relocation is on. */
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,STACK_FRAME_MIN_SIZE
+
/* See if we need to call a cpu state restore handler */
LOAD_REG_ADDR(r23, cur_cpu_spec)
ld r23,0(r23)
ld r12,CPU_SPEC_RESTORE(r23)
cmpdi 0,r12,0
beq 3f
-#ifdef PPC64_ELF_ABI_v1
+#ifdef CONFIG_PPC64_ELF_ABI_V1
ld r12,0(r12)
#endif
mtctr r12
@@ -448,10 +466,6 @@ generic_secondary_common_init:
sync /* order paca.run and cur_cpu_spec */
isync /* In case code patching happened */
- /* Create a temp kernel stack for use before relocation is on. */
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
-
b __secondary_start
#endif /* SMP */
@@ -460,7 +474,7 @@ generic_secondary_common_init:
* Assumes we're mapped EA == RA if the MMU is on.
*/
#ifdef CONFIG_PPC_BOOK3S
-__mmu_off:
+SYM_FUNC_START_LOCAL(__mmu_off)
mfmsr r3
andi. r0,r3,MSR_IR|MSR_DR
beqlr
@@ -471,8 +485,34 @@ __mmu_off:
sync
rfid
b . /* prevent speculative execution */
-#endif
+SYM_FUNC_END(__mmu_off)
+SYM_FUNC_START_LOCAL(start_initialization_book3s)
+ mflr r25
+
+ /* Setup some critical 970 SPRs before switching MMU off */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39 /* 970 */
+ beq 1f
+ cmpwi r0,0x3c /* 970FX */
+ beq 1f
+ cmpwi r0,0x44 /* 970MP */
+ beq 1f
+ cmpwi r0,0x45 /* 970GX */
+ bne 2f
+1: bl __cpu_preinit_ppc970
+2:
+
+ /* Switch off MMU if not already off */
+ bl __mmu_off
+
+ /* Now the MMU is off, can return to our PAGE_OFFSET address */
+ tovirt(r25,r25)
+ mtlr r25
+ blr
+SYM_FUNC_END(start_initialization_book3s)
+#endif
/*
* Here is our main kernel entry point. We support currently 2 kind of entries
@@ -489,14 +529,11 @@ __start_initialization_multiplatform:
/* Make sure we are running in 64 bits mode */
bl enable_64b_mode
- /* Get TOC pointer (current runtime address) */
- bl relative_toc
+ /* Zero r13 (paca) so early program check / mce don't use it */
+ li r13,0
- /* find out where we are now */
- bcl 20,31,$+4
-0: mflr r26 /* r26 = runtime addr here */
- addis r26,r26,(_stext - 0b)@ha
- addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+ /* Poison TOC */
+ li r2,-1
/*
* Are we booted from a PROM Of-type client-interface ?
@@ -514,31 +551,41 @@ __start_initialization_multiplatform:
mr r29,r9
#endif
-#ifdef CONFIG_PPC_BOOK3E
+ /* Get TOC pointer (current runtime address) */
+ bl relative_toc
+
+ /* These functions return to the virtual (PAGE_OFFSET) address */
+#ifdef CONFIG_PPC_BOOK3E_64
bl start_initialization_book3e
- b __after_prom_start
#else
- /* Setup some critical 970 SPRs before switching MMU off */
- mfspr r0,SPRN_PVR
- srwi r0,r0,16
- cmpwi r0,0x39 /* 970 */
- beq 1f
- cmpwi r0,0x3c /* 970FX */
- beq 1f
- cmpwi r0,0x44 /* 970MP */
- beq 1f
- cmpwi r0,0x45 /* 970GX */
- bne 2f
-1: bl __cpu_preinit_ppc970
-2:
+ bl start_initialization_book3s
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
+ /* Get TOC pointer, virtual */
+ bl relative_toc
+
+ /* find out where we are now */
+
+ /* OPAL doesn't pass base address in r4, have to derive it. */
+ bcl 20,31,$+4
+0: mflr r26 /* r26 = runtime addr here */
+ addis r26,r26,(_stext - 0b)@ha
+ addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
- /* Switch off MMU if not already off */
- bl __mmu_off
b __after_prom_start
-#endif /* CONFIG_PPC_BOOK3E */
+__REF
__boot_from_prom:
#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
+ /* Get TOC pointer, non-virtual */
+ bl relative_toc
+
+ /* find out where we are now */
+ bcl 20,31,$+4
+0: mflr r26 /* r26 = runtime addr here */
+ addis r26,r26,(_stext - 0b)@ha
+ addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+
/* Save parameters */
mr r31,r3
mr r30,r4
@@ -568,31 +615,25 @@ __boot_from_prom:
/* Do all of the interaction with OF client interface */
mr r8,r26
- bl prom_init
+ bl CFUNC(prom_init)
#endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */
/* We never return. We also hit that trap if trying to boot
* from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
trap
+ .previous
__after_prom_start:
#ifdef CONFIG_RELOCATABLE
/* process relocations for the final address of the kernel */
- lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
- sldi r25,r25,32
-#if defined(CONFIG_PPC_BOOK3E)
- tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */
-#endif
lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
-#if defined(CONFIG_PPC_BOOK3E)
- tophys(r26,r26)
-#endif
cmplwi cr0,r7,1 /* flagged to stay where we are ? */
- bne 1f
- add r25,r25,r26
+ mr r25,r26 /* then use current kernel base */
+ beq 1f
+ LOAD_REG_IMMEDIATE(r25, PAGE_OFFSET) /* else use static kernel base */
1: mr r3,r25
bl relocate
-#if defined(CONFIG_PPC_BOOK3E)
+#if defined(CONFIG_PPC_BOOK3E_64)
/* IVPR needs to be set after relocation. */
bl init_core_book3e
#endif
@@ -605,14 +646,8 @@ __after_prom_start:
*
* Note: This process overwrites the OF exception vectors.
*/
- li r3,0 /* target addr */
-#ifdef CONFIG_PPC_BOOK3E
- tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */
-#endif
+ LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET)
mr. r4,r26 /* In some cases the loader may */
-#if defined(CONFIG_PPC_BOOK3E)
- tovirt(r4,r4)
-#endif
beq 9f /* have already put us at zero */
li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */
@@ -623,14 +658,11 @@ __after_prom_start:
* variable __run_at_load, if it is set the kernel is treated as relocatable
* kernel, otherwise it will be moved to PHYSICAL_START
*/
-#if defined(CONFIG_PPC_BOOK3E)
- tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */
-#endif
lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
cmplwi cr0,r7,1
bne 3f
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
LOAD_REG_ADDR(r5, __end_interrupts)
LOAD_REG_ADDR(r11, _stext)
sub r5,r5,r11
@@ -642,15 +674,15 @@ __after_prom_start:
3:
#endif
/* # bytes of memory to copy */
- lis r5,(ABS_ADDR(copy_to_here))@ha
- addi r5,r5,(ABS_ADDR(copy_to_here))@l
+ lis r5,(ABS_ADDR(copy_to_here, text))@ha
+ addi r5,r5,(ABS_ADDR(copy_to_here, text))@l
bl copy_and_flush /* copy the first n bytes */
/* this includes the code being */
/* executed here. */
/* Jump to the copy of this code that we just made */
- addis r8,r3,(ABS_ADDR(4f))@ha
- addi r12,r8,(ABS_ADDR(4f))@l
+ addis r8,r3,(ABS_ADDR(4f, text))@ha
+ addi r12,r8,(ABS_ADDR(4f, text))@l
mtctr r12
bctr
@@ -662,8 +694,8 @@ p_end: .8byte _end - copy_to_here
* Now copy the rest of the kernel up to _end, add
* _end - copy_to_here to the copy limit and run again.
*/
- addis r8,r26,(ABS_ADDR(p_end))@ha
- ld r8,(ABS_ADDR(p_end))@l(r8)
+ addis r8,r26,(ABS_ADDR(p_end, text))@ha
+ ld r8,(ABS_ADDR(p_end, text))@l(r8)
add r5,r5,r8
5: bl copy_and_flush /* copy the rest */
@@ -705,6 +737,8 @@ _GLOBAL(copy_and_flush)
isync
blr
+_ASM_NOKPROBE_SYMBOL(copy_and_flush); /* Called in real mode */
+
.align 8
copy_to_here:
@@ -742,9 +776,15 @@ _GLOBAL(pmac_secondary_start)
sync
slbia
- /* get TOC pointer (real address) */
+ /* Branch to our PAGE_OFFSET address */
+ bcl 20,31,$+4
+1: mflr r11
+ addi r11,r11,(2f - 1b)
+ tovirt(r11, r11)
+ mtctr r11
+ bctr
+2:
bl relative_toc
- tovirt(r2,r2)
/* Copy some CPU settings from CPU 0 */
bl __restore_cpu_ppc970
@@ -771,7 +811,7 @@ _GLOBAL(pmac_secondary_start)
/* Create a temp kernel stack for use before relocation is on. */
ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
+ subi r1,r1,STACK_FRAME_MIN_SIZE
b __secondary_start
@@ -803,7 +843,7 @@ __secondary_start:
* can turn it on below. This is a call to C, which is OK, we're still
* running on the emergency stack.
*/
- bl early_setup_secondary
+ bl CFUNC(early_setup_secondary)
/*
* The primary has initialized our kernel stack for us in the paca, grab
@@ -830,7 +870,7 @@ __secondary_start:
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- RFI
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
/*
@@ -839,10 +879,10 @@ __secondary_start:
* before going into C code.
*/
start_secondary_prolog:
- ld r2,PACATOC(r13)
+ LOAD_PACA_TOC()
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
- bl start_secondary
+ bl CFUNC(start_secondary)
b .
/*
* Reset stack pointer and call start_secondary
@@ -853,26 +893,26 @@ _GLOBAL(start_secondary_resume)
ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
- bl start_secondary
+ bl CFUNC(start_secondary)
b .
#endif
/*
* This subroutine clobbers r11 and r12
*/
-enable_64b_mode:
+SYM_FUNC_START_LOCAL(enable_64b_mode)
mfmsr r11 /* grab the current MSR */
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */
mtmsr r11
-#else /* CONFIG_PPC_BOOK3E */
- li r12,(MSR_64BIT | MSR_ISF)@highest
- sldi r12,r12,48
+#else /* CONFIG_PPC_BOOK3E_64 */
+ LOAD_REG_IMMEDIATE(r12, MSR_64BIT)
or r11,r11,r12
mtmsrd r11
isync
#endif
blr
+SYM_FUNC_END(enable_64b_mode)
/*
* This puts the TOC pointer into r2, offset by 0x8000 (as expected
@@ -883,10 +923,15 @@ enable_64b_mode:
* TOC in -mcmodel=medium mode. After we relocate to 0 but before
* the MMU is on we need our TOC to be a virtual address otherwise
* these pointers will be real addresses which may get stored and
- * accessed later with the MMU on. We use tovirt() at the call
- * sites to handle this.
+ * accessed later with the MMU on. We branch to the virtual address
+ * while still in real mode then call relative_toc again to handle
+ * this.
*/
_GLOBAL(relative_toc)
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ tdnei r2,-1
+ blr
+#else
mflr r0
bcl 20,31,$+4
0: mflr r11
@@ -896,16 +941,16 @@ _GLOBAL(relative_toc)
blr
.balign 8
-p_toc: .8byte __toc_start + 0x8000 - 0b
+p_toc: .8byte .TOC. - 0b
+#endif
/*
* This is where the main kernel code starts.
*/
__REF
start_here_multiplatform:
- /* set up the TOC */
- bl relative_toc
- tovirt(r2,r2)
+ /* Adjust TOC for moved kernel. Could adjust when moving it instead. */
+ bl relative_toc
/* Clear out the BSS. It may have been done in prom_init,
* already but that's irrelevant since prom_init will soon
@@ -932,7 +977,7 @@ start_here_multiplatform:
std r29,8(r11);
#endif
-#ifndef CONFIG_PPC_BOOK3E
+#ifndef CONFIG_PPC_BOOK3E_64
mfmsr r6
ori r6,r6,MSR_RI
mtmsrd r6 /* RI on */
@@ -945,25 +990,21 @@ start_here_multiplatform:
std r0,0(r4)
#endif
- /* The following gets the stack set up with the regs */
- /* pointing to the real addr of the kernel stack. This is */
- /* all done to support the C function call below which sets */
- /* up the htab. This is done because we have relocated the */
- /* kernel but are still running in real mode. */
-
- LOAD_REG_ADDR(r3,init_thread_union)
-
/* set up a stack pointer */
+ LOAD_REG_ADDR(r3,init_thread_union)
LOAD_REG_IMMEDIATE(r1,THREAD_SIZE)
add r1,r3,r1
li r0,0
- stdu r0,-STACK_FRAME_OVERHEAD(r1)
+ stdu r0,-STACK_FRAME_MIN_SIZE(r1)
/*
* Do very early kernel initializations, including initial hash table
* and SLB setup before we turn on relocation.
*/
+#ifdef CONFIG_KASAN
+ bl CFUNC(kasan_early_init)
+#endif
/* Restore parameters passed from prom_init/kexec */
mr r3,r31
LOAD_REG_ADDR(r12, DOTSYM(early_setup))
@@ -974,10 +1015,9 @@ start_here_multiplatform:
ld r4,PACAKMSR(r13)
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- RFI
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
- .previous
/* This is where all platforms converge execution */
start_here_common:
@@ -985,7 +1025,7 @@ start_here_common:
std r1,PACAKSAVE(r13)
/* Load the TOC (virtual address) */
- ld r2,PACATOC(r13)
+ LOAD_PACA_TOC()
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
@@ -996,28 +1036,9 @@ start_here_common:
stb r0,PACAIRQHAPPENED(r13)
/* Generic kernel entry */
- bl start_kernel
+ bl CFUNC(start_kernel)
/* Not reached */
- trap
+0: trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the bss, which is page-aligned.
- */
- .section ".bss"
-/*
- * pgd dir should be aligned to PGD_TABLE_SIZE which is 64K.
- * We will need to find a better way to fix this
- */
- .align 16
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
- .globl empty_zero_page
-empty_zero_page:
- .space PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
+ .previous
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_85xx.S
index 6f7a3a7162c5..39724ff5ae1f 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -28,17 +28,18 @@
#include <linux/init.h>
#include <linux/threads.h>
+#include <linux/pgtable.h>
+#include <linux/linkage.h>
+
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
#include "head_booke.h"
@@ -54,8 +55,8 @@
*
*/
__HEAD
-_ENTRY(_stext);
-_ENTRY(_start);
+_GLOBAL(_stext);
+_GLOBAL(_start);
/*
* Reserve a word at a fixed location to store the address
* of abatron_pteptrs
@@ -79,7 +80,7 @@ _ENTRY(_start);
mr r23,r3
mr r25,r4
- bl 0f
+ bcl 20,31,$+4
0: mflr r8
addis r3,r8,(is_second_reloc - 0b)@ha
lwz r19,(is_second_reloc - 0b)@l(r3)
@@ -113,7 +114,7 @@ _ENTRY(_start);
1:
/*
- * We have the runtime (virutal) address of our base.
+ * We have the runtime (virtual) address of our base.
* We calculate our shift of offset from a 64M page.
* We could map the 64M page we belong to at PAGE_OFFSET and
* get going from there.
@@ -129,7 +130,7 @@ _ENTRY(_start);
/*
* For the second relocation, we already set the right tlb entries
- * for the kernel space, so skip the code in fsl_booke_entry_mapping.S
+ * for the kernel space, so skip the code in 85xx_entry_mapping.S
*/
cmpwi r19,1
beq set_ivor
@@ -154,12 +155,12 @@ _ENTRY(_start);
* if needed
*/
-_ENTRY(__early_start)
+_GLOBAL(__early_start)
LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr)
lwz r20,0(r20)
#define ENTRY_MAPPING_BOOT_SETUP
-#include "fsl_booke_entry_mapping.S"
+#include "85xx_entry_mapping.S"
#undef ENTRY_MAPPING_BOOT_SETUP
set_ivor:
@@ -187,9 +188,6 @@ set_ivor:
/* Setup the defaults for TLB entries */
li r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l
-#ifdef CONFIG_E200
- oris r2,r2,MAS4_TLBSELD(1)@h
-#endif
mtspr SPRN_MAS4, r2
#if !defined(CONFIG_BDI_SWITCH)
@@ -232,7 +230,7 @@ set_ivor:
lis r1,init_thread_union@h
ori r1,r1,init_thread_union@l
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
#ifdef CONFIG_SMP
stw r24, TASK_CPU(r2)
@@ -362,32 +360,30 @@ interrupt_base:
CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
/* Machine Check Interrupt */
-#ifdef CONFIG_E200
- /* no RFMCI, MCSRRs on E200 */
- CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
- machine_check_exception)
-#else
MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
-#endif
/* Data Storage Interrupt */
START_EXCEPTION(DataStorage)
- NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
- mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
+ NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE)
+ mfspr r5,SPRN_ESR /* Grab the ESR, save it */
stw r5,_ESR(r11)
- mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
+ mfspr r4,SPRN_DEAR /* Grab the DEAR, save it */
+ stw r4, _DEAR(r11)
andis. r10,r5,(ESR_ILK|ESR_DLK)@h
bne 1f
- EXC_XFER_LITE(0x0300, handle_page_fault)
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
1:
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0x0300, CacheLockingException)
+ prepare_transfer_to_handler
+ bl CacheLockingException
+ b interrupt_return
/* Instruction Storage Interrupt */
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
@@ -399,14 +395,7 @@ interrupt_base:
#ifdef CONFIG_PPC_FPU
FP_UNAVAILABLE_EXCEPTION
#else
-#ifdef CONFIG_E200
- /* E200 treats 'normal' floating point instructions as FP Unavail exception */
- EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
- program_check_exception, EXC_XFER_STD)
-#else
- EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
- unknown_exception, EXC_XFER_STD)
-#endif
+ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, emulation_assist_interrupt)
#endif
/* System Call Interrupt */
@@ -414,16 +403,14 @@ interrupt_base:
SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1
/* Auxiliary Processor Unavailable Interrupt */
- EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
- unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, unknown_exception)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
- unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x3100, FIT, FixedIntervalTimer, unknown_exception)
/* Watchdog Timer Interrupt */
#ifdef CONFIG_BOOKE_WDT
@@ -476,9 +463,15 @@ END_BTB_FLUSH_SECTION
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ mfspr r12, SPRN_MAS1
+ rlwinm. r12,r12,0,0x3fff0000
+ beq 2f /* KUAP fault */
+#endif
+
4:
/* Mask of required permission bits. Note that while we
- * do copy ESR:ST to _PAGE_RW position as trying to write
+ * do copy ESR:ST to _PAGE_WRITE position as trying to write
* to an RO page is pretty common, we don't do it with
* _PAGE_DIRTY. We could do it, but it's a fairly rare
* event so I'd rather take the overhead when it happens
@@ -492,10 +485,10 @@ END_BTB_FLUSH_SECTION
*/
mfspr r12,SPRN_ESR
#ifdef CONFIG_PTE_64BIT
- li r13,_PAGE_PRESENT
+ li r13,_PAGE_PRESENT|_PAGE_BAP_SR
oris r13,r13,_PAGE_ACCESSED@h
#else
- li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+ li r13,_PAGE_PRESENT|_PAGE_READ|_PAGE_ACCESSED
#endif
rlwimi r13,r12,11,29,29
@@ -511,7 +504,7 @@ END_BTB_FLUSH_SECTION
#endif
#endif
- bne 2f /* Bail if permission/valid mismach */
+ bne 2f /* Bail if permission/valid mismatch */
/* Jump to common tlb load */
b finish_tlb_load
@@ -585,6 +578,12 @@ END_BTB_FLUSH_SECTION
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ mfspr r12, SPRN_MAS1
+ rlwinm. r12,r12,0,0x3fff0000
+ beq 2f /* KUAP fault */
+#endif
+
/* Make up the required permissions for user code */
#ifdef CONFIG_PTE_64BIT
li r13,_PAGE_PRESENT | _PAGE_BAP_UX
@@ -606,7 +605,7 @@ END_BTB_FLUSH_SECTION
#endif
#endif
- bne 2f /* Bail if permission mismach */
+ bne 2f /* Bail if permission mismatch */
/* Jump to common TLB load point */
b finish_tlb_load
@@ -624,42 +623,48 @@ END_BTB_FLUSH_SECTION
mfspr r10, SPRN_SPRG_RSCRATCH0
b InstructionStorage
-/* Define SPE handlers for e200 and e500v2 */
+/* Define SPE handlers for e500v2 */
#ifdef CONFIG_SPE
/* SPE Unavailable */
START_EXCEPTION(SPEUnavailable)
- NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
+ NORMAL_EXCEPTION_PROLOG(0x2010, SPE_UNAVAIL)
beq 1f
bl load_up_spe
b fast_exception_return
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0x2010, KernelSPE)
+1: prepare_transfer_to_handler
+ bl KernelSPE
+ b interrupt_return
#elif defined(CONFIG_SPE_POSSIBLE)
- EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
- unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, unknown_exception)
#endif /* CONFIG_SPE_POSSIBLE */
/* SPE Floating Point Data */
#ifdef CONFIG_SPE
- EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
- SPEFloatingPointException, EXC_XFER_STD)
+ START_EXCEPTION(SPEFloatingPointData)
+ NORMAL_EXCEPTION_PROLOG(0x2030, SPE_FP_DATA)
+ prepare_transfer_to_handler
+ bl SPEFloatingPointException
+ REST_NVGPRS(r1)
+ b interrupt_return
/* SPE Floating Point Round */
- EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
- SPEFloatingPointRoundException, EXC_XFER_STD)
+ START_EXCEPTION(SPEFloatingPointRound)
+ NORMAL_EXCEPTION_PROLOG(0x2050, SPE_FP_ROUND)
+ prepare_transfer_to_handler
+ bl SPEFloatingPointRoundException
+ REST_NVGPRS(r1)
+ b interrupt_return
#elif defined(CONFIG_SPE_POSSIBLE)
- EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
- unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
- unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception)
+ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, unknown_exception)
#endif /* CONFIG_SPE_POSSIBLE */
/* Performance Monitor */
EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
- performance_monitor_exception, EXC_XFER_STD)
+ performance_monitor_exception)
- EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD)
+ EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception)
CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
CriticalDoorbell, unknown_exception)
@@ -674,10 +679,10 @@ END_BTB_FLUSH_SECTION
unknown_exception)
/* Hypercall */
- EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception)
/* Embedded Hypervisor Privilege */
- EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception)
interrupt_end:
@@ -778,14 +783,15 @@ BEGIN_MMU_FTR_SECTION
mtspr SPRN_MAS7, r10
END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
#else
- li r10, (_PAGE_EXEC | _PAGE_PRESENT)
+ li r10, (_PAGE_EXEC | _PAGE_READ)
mr r13, r11
rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */
and r12, r11, r10
- andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */
+ mcrf cr0, cr5 /* Test for user page */
slwi r10, r12, 1
or r10, r10, r12
- iseleq r12, r12, r10
+ rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */
+ isellt r12, r10, r12
rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */
mtspr SPRN_MAS3, r13
#endif
@@ -806,31 +812,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
#endif
3: mtspr SPRN_MAS2, r12
-#ifdef CONFIG_E200
- /* Round robin TLB1 entries assignment */
- mfspr r12, SPRN_MAS0
-
- /* Extract TLB1CFG(NENTRY) */
- mfspr r11, SPRN_TLB1CFG
- andi. r11, r11, 0xfff
-
- /* Extract MAS0(NV) */
- andi. r13, r12, 0xfff
- addi r13, r13, 1
- cmpw 0, r13, r11
- addi r12, r12, 1
-
- /* check if we need to wrap */
- blt 7f
-
- /* wrap back to first free tlbcam entry */
- lis r13, tlbcam_index@ha
- lwz r13, tlbcam_index@l(r13)
- rlwimi r12, r13, 0, 20, 31
-7:
- mtspr SPRN_MAS0,r12
-#endif /* CONFIG_E200 */
-
tlb_write_entry:
tlbwe
@@ -882,7 +863,7 @@ _GLOBAL(load_up_spe)
* SPE unavailable trap from kernel - print a message, but let
* the task use SPE in the kernel until it returns to user mode.
*/
-KernelSPE:
+SYM_FUNC_START_LOCAL(KernelSPE)
lwz r3,_MSR(r1)
oris r3,r3,MSR_SPE@h
stw r3,_MSR(r1) /* enable use of SPE after return */
@@ -891,21 +872,22 @@ KernelSPE:
ori r3,r3,87f@l
mr r4,r2 /* current */
lwz r5,_NIP(r1)
- bl printk
+ bl _printk
#endif
- b ret_from_except
+ b interrupt_return
#ifdef CONFIG_PRINTK
87: .string "SPE used in kernel (task=%p, pc=%x) \n"
#endif
.align 4,0
+SYM_FUNC_END(KernelSPE)
#endif /* CONFIG_SPE */
/*
* Translate the effec addr in r3 to phys addr. The phys addr will be put
* into r3(higher 32bit) and r4(lower 32bit)
*/
-get_phys_addr:
+SYM_FUNC_START_LOCAL(get_phys_addr)
mfmsr r8
mfspr r9,SPRN_PID
rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */
@@ -927,27 +909,13 @@ get_phys_addr:
mfspr r3,SPRN_MAS7
#endif
blr
+SYM_FUNC_END(get_phys_addr)
/*
* Global functions
*/
-#ifdef CONFIG_E200
-/* Adjust or setup IVORs for e200 */
-_GLOBAL(__setup_e200_ivors)
- li r3,DebugDebug@l
- mtspr SPRN_IVOR15,r3
- li r3,SPEUnavailable@l
- mtspr SPRN_IVOR32,r3
- li r3,SPEFloatingPointData@l
- mtspr SPRN_IVOR33,r3
- li r3,SPEFloatingPointRound@l
- mtspr SPRN_IVOR34,r3
- sync
- blr
-#endif
-
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
#ifndef CONFIG_PPC_E500MC
/* Adjust or setup IVORs for e500v1/v2 */
_GLOBAL(__setup_e500_ivors)
@@ -990,7 +958,7 @@ _GLOBAL(__setup_ehv_ivors)
sync
blr
#endif /* CONFIG_PPC_E500MC */
-#endif /* CONFIG_E500 */
+#endif /* CONFIG_PPC_E500 */
#ifdef CONFIG_SPE
/*
@@ -1007,10 +975,10 @@ _GLOBAL(__giveup_spe)
li r4,THREAD_ACC
evstddx evr6, r4, r3 /* save off accumulator */
beq 1f
- lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ lwz r4,_MSR-STACK_INT_FRAME_REGS(r5)
lis r3,MSR_SPE@h
andc r4,r4,r3 /* disable SPE for previous task */
- stw r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ stw r4,_MSR-STACK_INT_FRAME_REGS(r5)
1:
blr
#endif /* CONFIG_SPE */
@@ -1033,20 +1001,6 @@ _GLOBAL(abort)
mtspr SPRN_DBCR0,r13
isync
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r4, 0x4(r5)
-#endif
- mtspr SPRN_PID,r3
- isync /* Force context change */
- blr
-
#ifdef CONFIG_SMP
/* When we get here, r24 needs to hold the CPU # */
.globl __secondary_start
@@ -1093,7 +1047,7 @@ __secondary_start:
lwz r1,TASK_STACK(r2)
/* stack */
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
li r0,0
stw r0,0(r1)
@@ -1194,7 +1148,7 @@ _GLOBAL(switch_to_as1)
bne 1b
/* Get the tlb entry used by the current running code */
- bl 0f
+ bcl 20,31,$+4
0: mflr r4
tlbsx 0,r4
@@ -1228,7 +1182,7 @@ _GLOBAL(switch_to_as1)
_GLOBAL(restore_to_as0)
mflr r0
- bl 0f
+ bcl 20,31,$+4
0: mflr r9
addi r9,r9,1f - 0b
@@ -1274,26 +1228,3 @@ _GLOBAL(restore_to_as0)
*/
3: mr r3,r5
bl _start
-
-/*
- * We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align 12
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
-EXPORT_SYMBOL(empty_zero_page)
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/*
- * Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 19f583e18402..647b0b445e89 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -16,35 +16,21 @@
#include <linux/init.h>
#include <linux/magic.h>
+#include <linux/pgtable.h>
+#include <linux/sizes.h>
+#include <linux/linkage.h>
+
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/cache.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
#include <asm/code-patching-asm.h>
-
-#include "head_32.h"
-
-#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
-/* By simply checking Address >= 0x80000000, we know if its a kernel address */
-#define SIMPLE_KERNEL_ADDRESS 1
-#endif
-
-/*
- * We need an ITLB miss handler for kernel addresses if:
- * - Either we have modules
- * - Or we have not pinned the first 8M
- */
-#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \
- defined(CONFIG_DEBUG_PAGEALLOC)
-#define ITLB_MISS_KERNEL 1
-#endif
+#include <asm/interrupt.h>
/*
* Value for the bits that have fixed value in RPN entries.
@@ -52,12 +38,24 @@
*/
#define RPN_PATTERN 0x00f0
+#include "head_32.h"
+
+.macro compare_to_kernel_boundary scratch, addr
+#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
+/* By simply checking Address >= 0x80000000, we know if its a kernel address */
+ not. \scratch, \addr
+#else
+ rlwinm \scratch, \addr, 16, 0xfff8
+ cmpli cr0, \scratch, PAGE_OFFSET@h
+#endif
+.endm
+
#define PAGE_SHIFT_512K 19
#define PAGE_SHIFT_8M 23
__HEAD
-_ENTRY(_stext);
-_ENTRY(_start);
+_GLOBAL(_stext);
+_GLOBAL(_start);
/* MPC8xx
* This port was done on an MBX board with an 860. Right now I only
@@ -122,89 +120,54 @@ instruction_counter:
#endif
/* System reset */
- EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD)
+ EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, system_reset_exception)
/* Machine check */
- . = 0x200
-MachineCheck:
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DAR
- stw r4,_DAR(r11)
- li r5,RPN_PATTERN
- mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x200, machine_check_exception)
-
-/* Data access exception.
- * This is "never generated" by the MPC8xx.
- */
- . = 0x300
-DataAccess:
-
-/* Instruction access exception.
- * This is "never generated" by the MPC8xx.
- */
- . = 0x400
-InstructionAccess:
+ START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck)
+ EXCEPTION_PROLOG INTERRUPT_MACHINE_CHECK MachineCheck handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl machine_check_exception
+ b interrupt_return
/* External interrupt */
- EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ)
/* Alignment exception */
- . = 0x600
-Alignment:
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DAR
- stw r4,_DAR(r11)
- li r5,RPN_PATTERN
- mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x600, alignment_exception)
+ START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment)
+ EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl alignment_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Program check exception */
- EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
-
-/* No FPU on MPC8xx. This exception is not supposed to happen.
-*/
- EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD)
+ START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck)
+ EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck
+ prepare_transfer_to_handler
+ bl program_check_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Decrementer */
- EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
-
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt)
/* System call */
- . = 0xc00
-SystemCall:
- SYSCALL_ENTRY 0xc00
+ START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall)
+ SYSCALL_ENTRY INTERRUPT_SYSCALL
/* Single step - not used on 601 */
- EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception)
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
*/
- EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
-
-/* Called from DataStoreTLBMiss when perf TLB misses events are activated */
-#ifdef CONFIG_PERF_EVENTS
- patch_site 0f, patch__dtlbmiss_perf
-0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- addi r10, r10, 1
- stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- rfi
-#endif
+ START_EXCEPTION(INTERRUPT_SOFT_EMU_8xx, SoftEmu)
+ EXCEPTION_PROLOG INTERRUPT_SOFT_EMU_8xx SoftEmu
+ prepare_transfer_to_handler
+ bl emulation_assist_interrupt
+ REST_NVGPRS(r1)
+ b interrupt_return
- . = 0x1100
/*
* For the MPC8xx, this is a software tablewalk to load the instruction
* TLB. The task switch loads the M_TWB register with the pointer to the first
@@ -217,88 +180,58 @@ SystemCall:
*/
#ifdef CONFIG_8xx_CPU15
-#define INVALIDATE_ADJACENT_PAGES_CPU15(addr) \
- addi addr, addr, PAGE_SIZE; \
- tlbie addr; \
- addi addr, addr, -(PAGE_SIZE << 1); \
- tlbie addr; \
- addi addr, addr, PAGE_SIZE
+#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) \
+ addi tmp, addr, PAGE_SIZE; \
+ tlbie tmp; \
+ addi tmp, addr, -PAGE_SIZE; \
+ tlbie tmp
#else
-#define INVALIDATE_ADJACENT_PAGES_CPU15(addr)
+#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp)
#endif
-InstructionTLBMiss:
- mtspr SPRN_SPRG_SCRATCH0, r10
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
- mtspr SPRN_SPRG_SCRATCH1, r11
-#endif
+ START_EXCEPTION(INTERRUPT_INST_TLB_MISS_8xx, InstructionTLBMiss)
+ mtspr SPRN_SPRG_SCRATCH2, r10
+ mtspr SPRN_M_TW, r11
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
mfspr r10, SPRN_SRR0 /* Get effective address of fault */
- INVALIDATE_ADJACENT_PAGES_CPU15(r10)
+ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
mtspr SPRN_MD_EPN, r10
- /* Only modules will cause ITLB Misses as we always
- * pin the first 8MB of kernel memory */
-#ifdef ITLB_MISS_KERNEL
+#ifdef CONFIG_MODULES
mfcr r11
-#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
- cmpi cr0, r10, 0 /* Address >= 0x80000000 */
-#else
- rlwinm r10, r10, 16, 0xfff8
- cmpli cr0, r10, PAGE_OFFSET@h
-#ifndef CONFIG_PIN_TLB_TEXT
- /* It is assumed that kernel code fits into the first 32M */
-0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h
- patch_site 0b, patch__itlbmiss_linmem_top
-#endif
-#endif
+ compare_to_kernel_boundary r10, r10
#endif
mfspr r10, SPRN_M_TWB /* Get level 1 table */
-#ifdef ITLB_MISS_KERNEL
-#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
- bge+ 3f
-#else
+#ifdef CONFIG_MODULES
blt+ 3f
-#endif
-#ifndef CONFIG_PIN_TLB_TEXT
- blt cr7, ITLBMissLinear
-#endif
rlwinm r10, r10, 0, 20, 31
oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
3:
+ mtcr r11
#endif
- lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
- mtspr SPRN_MI_TWC, r10 /* Set segment attributes */
-
- mtspr SPRN_MD_TWC, r10
+ lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
+ mtspr SPRN_MD_TWC, r11
mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
-#ifdef ITLB_MISS_KERNEL
- mtcr r11
-#endif
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
-#endif
+ rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED
+ rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
+ mtspr SPRN_MI_TWC, r11
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 20 and 23 must be clear.
* Software indicator bits 22, 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
- rlwimi r10, r10, 0, 0x0f00 /* Clear bits 20-23 */
+ rlwinm r10, r10, 0, ~0x0f00 /* Clear bits 20-23 */
rlwimi r10, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */
ori r10, r10, RPN_PATTERN | 0x200 /* Set 22 and 24-27 */
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
/* Restore registers */
-0: mfspr r10, SPRN_SPRG_SCRATCH0
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
- mfspr r11, SPRN_SPRG_SCRATCH1
-#endif
+0: mfspr r10, SPRN_SPRG_SCRATCH2
+ mfspr r11, SPRN_M_TW
rfi
patch_site 0b, patch__itlbmiss_exit_1
@@ -307,65 +240,23 @@ InstructionTLBMiss:
0: lwz r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
addi r10, r10, 1
stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
- mfspr r10, SPRN_SPRG_SCRATCH0
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
- mfspr r11, SPRN_SPRG_SCRATCH1
-#endif
+ mfspr r10, SPRN_SPRG_SCRATCH2
+ mfspr r11, SPRN_M_TW
rfi
#endif
-#ifndef CONFIG_PIN_TLB_TEXT
-ITLBMissLinear:
- mtcr r11
-#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23
- patch_site 0f, patch__itlbmiss_linmem_top8
-
- mfspr r10, SPRN_SRR0
-0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha
- rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K
- ori r11, r11, MI_PS512K | MI_SVALID
- rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */
-#else
- /* Set 8M byte page and mark it valid */
- li r11, MI_PS8MEG | MI_SVALID
- rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
-#endif
- mtspr SPRN_MI_TWC, r11
- ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
- _PAGE_PRESENT
- mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
-
-0: mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- rfi
- patch_site 0b, patch__itlbmiss_exit_2
-#endif
-
- . = 0x1200
-DataStoreTLBMiss:
- mtspr SPRN_SPRG_SCRATCH0, r10
- mtspr SPRN_SPRG_SCRATCH1, r11
+ START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss)
+ mtspr SPRN_SPRG_SCRATCH2, r10
+ mtspr SPRN_M_TW, r11
mfcr r11
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
mfspr r10, SPRN_MD_EPN
- rlwinm r10, r10, 16, 0xfff8
- cmpli cr0, r10, PAGE_OFFSET@h
-#ifndef CONFIG_PIN_TLB_IMMR
- cmpli cr6, r10, VIRT_IMMR_BASE@h
-#endif
-0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h
- patch_site 0b, patch__dtlbmiss_linmem_top
-
+ compare_to_kernel_boundary r10, r10
mfspr r10, SPRN_M_TWB /* Get level 1 table */
blt+ 3f
-#ifndef CONFIG_PIN_TLB_IMMR
-0: beq- cr6, DTLBMissIMMR
- patch_site 0b, patch__dtlbmiss_immr_jmp
-#endif
- blt cr7, DTLBMissLinear
rlwinm r10, r10, 0, 20, 31
oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
3:
@@ -376,29 +267,16 @@ DataStoreTLBMiss:
mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
- /* Insert the Guarded flag into the TWC from the Linux PTE.
+ /* Insert Guarded and Accessed flags into the TWC from the Linux PTE.
* It is bit 27 of both the Linux PTE and the TWC (at least
* I got that right :-). It will be better when we can put
* this into the Linux pgd/pmd and load it in the operation
* above.
*/
- rlwimi r11, r10, 0, _PAGE_GUARDED
+ rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED
+ rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
mtspr SPRN_MD_TWC, r11
- /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
- * We also need to know if the insn is a load/store, so:
- * Clear _PAGE_PRESENT and load that which will
- * trap into DTLB Error with store bit set accordinly.
- */
- /* PRESENT=0x1, ACCESSED=0x20
- * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
- * r10 = (r10 & ~PRESENT) | r11;
- */
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
-#endif
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
@@ -407,153 +285,97 @@ DataStoreTLBMiss:
li r11, RPN_PATTERN
rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
+ mtspr SPRN_DAR, r11 /* Tag DAR */
/* Restore registers */
- mtspr SPRN_DAR, r11 /* Tag DAR */
-0: mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
+0: mfspr r10, SPRN_SPRG_SCRATCH2
+ mfspr r11, SPRN_M_TW
rfi
patch_site 0b, patch__dtlbmiss_exit_1
-DTLBMissIMMR:
- mtcr r11
- /* Set 512k byte guarded page and mark it valid */
- li r10, MD_PS512K | MD_GUARDED | MD_SVALID
- mtspr SPRN_MD_TWC, r10
- mfspr r10, SPRN_IMMR /* Get current IMMR */
- rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
- _PAGE_PRESENT | _PAGE_NO_CACHE
- mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
-
- li r11, RPN_PATTERN
- mtspr SPRN_DAR, r11 /* Tag DAR */
-
-0: mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
+#ifdef CONFIG_PERF_EVENTS
+ patch_site 0f, patch__dtlbmiss_perf
+0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, 1
+ stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ mfspr r10, SPRN_SPRG_SCRATCH2
+ mfspr r11, SPRN_M_TW
rfi
- patch_site 0b, patch__dtlbmiss_exit_2
-
-DTLBMissLinear:
- mtcr r11
- rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
-#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_DATA_SHIFT < 23
- patch_site 0f, patch__dtlbmiss_romem_top8
-
-0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha
- rlwinm r11, r11, 0, 0xff800000
- neg r10, r11
- or r11, r11, r10
- rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K
- ori r11, r11, MI_PS512K | MI_SVALID
- mfspr r10, SPRN_MD_EPN
- rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */
-#else
- /* Set 8M byte page and mark it valid */
- li r11, MD_PS8MEG | MD_SVALID
#endif
- mtspr SPRN_MD_TWC, r11
-#ifdef CONFIG_STRICT_KERNEL_RWX
- patch_site 0f, patch__dtlbmiss_romem_top
-
-0: subis r11, r10, 0
- rlwimi r10, r11, 11, _PAGE_RO
-#endif
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
- _PAGE_PRESENT
- mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
-
- li r11, RPN_PATTERN
- mtspr SPRN_DAR, r11 /* Tag DAR */
-
-0: mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- rfi
- patch_site 0b, patch__dtlbmiss_exit_3
/* This is an instruction TLB error on the MPC8xx. This could be due
* to many reasons, such as executing guarded memory or illegal instruction
* addresses. There is nothing to do but handle a big time error fault.
*/
- . = 0x1300
-InstructionTLBError:
- EXCEPTION_PROLOG
- mr r4,r12
+ START_EXCEPTION(INTERRUPT_INST_TLB_ERROR_8xx, InstructionTLBError)
+ /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
+ EXCEPTION_PROLOG INTERRUPT_INST_STORAGE InstructionTLBError
andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
andis. r10,r9,SRR1_ISI_NOPT@h
beq+ .Litlbie
- tlbie r4
- /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
+ tlbie r12
.Litlbie:
- EXC_XFER_LITE(0x400, handle_page_fault)
+ stw r12, _DAR(r11)
+ stw r5, _DSISR(r11)
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
/* This is the data TLB error on the MPC8xx. This could be due to
* many reasons, including a dirty update to a pte. We bail out to
* a higher level function that can handle it.
*/
- . = 0x1400
-DataTLBError:
- mtspr SPRN_SPRG_SCRATCH0, r10
- mtspr SPRN_SPRG_SCRATCH1, r11
- mfcr r10
-
+ START_EXCEPTION(INTERRUPT_DATA_TLB_ERROR_8xx, DataTLBError)
+ EXCEPTION_PROLOG_0 handle_dar_dsisr=1
mfspr r11, SPRN_DAR
- cmpwi cr0, r11, RPN_PATTERN
- beq- FixupDAR /* must be a buggy dcbX, icbi insn. */
+ cmpwi cr1, r11, RPN_PATTERN
+ beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */
DARFixed:/* Return from dcbx instruction bug workaround */
EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
- mfspr r4,SPRN_DAR
+ /* 0x300 is DataAccess exception, needed by bad_page_fault() */
+ EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1
+ lwz r4, _DAR(r11)
+ lwz r5, _DSISR(r11)
andis. r10,r5,DSISR_NOHPTE@h
beq+ .Ldtlbie
tlbie r4
.Ldtlbie:
- li r10,RPN_PATTERN
- mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */
- /* 0x300 is DataAccess exception, needed by bad_page_fault() */
- EXC_XFER_LITE(0x300, handle_page_fault)
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
+#ifdef CONFIG_VMAP_STACK
+ vmap_stack_overflow_exception
+#endif
/* On the MPC8xx, these next four traps are used for development
* support of breakpoints and such. Someday I will get around to
* using them.
*/
- . = 0x1c00
-DataBreakpoint:
- mtspr SPRN_SPRG_SCRATCH0, r10
- mtspr SPRN_SPRG_SCRATCH1, r11
- mfcr r10
+ START_EXCEPTION(INTERRUPT_DATA_BREAKPOINT_8xx, DataBreakpoint)
+ EXCEPTION_PROLOG_0 handle_dar_dsisr=1
mfspr r11, SPRN_SRR0
- cmplwi cr0, r11, (.Ldtlbie - PAGE_OFFSET)@l
+ cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l
- beq- cr0, 11f
- beq- cr7, 11f
- EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2
- addi r3,r1,STACK_FRAME_OVERHEAD
- mfspr r4,SPRN_BAR
- stw r4,_DAR(r11)
- mfspr r5,SPRN_DSISR
- EXC_XFER_STD(0x1c00, do_break)
-11:
+ cror 4*cr1+eq, 4*cr1+eq, 4*cr7+eq
+ bne cr1, 1f
mtcr r10
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
rfi
+1: EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2 INTERRUPT_DATA_BREAKPOINT_8xx DataBreakpoint handle_dar_dsisr=1
+ mfspr r4,SPRN_BAR
+ stw r4,_DAR(r11)
+ prepare_transfer_to_handler
+ bl do_break
+ REST_NVGPRS(r1)
+ b interrupt_return
+
#ifdef CONFIG_PERF_EVENTS
- . = 0x1d00
-InstructionBreakpoint:
+ START_EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, InstructionBreakpoint)
mtspr SPRN_SPRG_SCRATCH0, r10
lwz r10, (instruction_counter - PAGE_OFFSET)@l(0)
addi r10, r10, -1
@@ -564,11 +386,12 @@ InstructionBreakpoint:
mfspr r10, SPRN_SPRG_SCRATCH0
rfi
#else
- EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, Trap_1d, unknown_exception)
#endif
- EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception)
+ __HEAD
. = 0x2000
/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
@@ -581,28 +404,22 @@ FixupDAR:/* Entry point for dcbx workaround. */
mfspr r10, SPRN_SRR0
mtspr SPRN_MD_EPN, r10
rlwinm r11, r10, 16, 0xfff8
- cmpli cr0, r11, PAGE_OFFSET@h
+ cmpli cr1, r11, PAGE_OFFSET@h
mfspr r11, SPRN_M_TWB /* Get level 1 table */
- blt+ 3f
- rlwinm r11, r10, 16, 0xfff8
-
-0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
- patch_site 0b, patch__fixupdar_linmem_top
+ blt+ cr1, 3f
/* create physical page address from effective address */
tophys(r11, r10)
- blt- cr7, 201f
mfspr r11, SPRN_M_TWB /* Get level 1 table */
rlwinm r11, r11, 0, 20, 31
oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha
3:
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */
mtspr SPRN_MD_TWC, r11
- mtcr r11
+ mtcrf 0x01, r11
mfspr r11, SPRN_MD_TWC
lwz r11, 0(r11) /* Get the pte */
bt 28,200f /* bit 28 = Large page (8M) */
- bt 29,202f /* bit 29 = Large page (8M or 512K) */
/* concat physical page address(r11) and page offset(r10) */
rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31
201: lwz r11,0(r11)
@@ -611,16 +428,16 @@ FixupDAR:/* Entry point for dcbx workaround. */
* no need to include them here */
xoris r10, r11, 0x7c00 /* check if major OP code is 31 */
rlwinm r10, r10, 0, 21, 5
- cmpwi cr0, r10, 2028 /* Is dcbz? */
- beq+ 142f
- cmpwi cr0, r10, 940 /* Is dcbi? */
- beq+ 142f
- cmpwi cr0, r10, 108 /* Is dcbst? */
- beq+ 144f /* Fix up store bit! */
- cmpwi cr0, r10, 172 /* Is dcbf? */
- beq+ 142f
- cmpwi cr0, r10, 1964 /* Is icbi? */
- beq+ 142f
+ cmpwi cr1, r10, 2028 /* Is dcbz? */
+ beq+ cr1, 142f
+ cmpwi cr1, r10, 940 /* Is dcbi? */
+ beq+ cr1, 142f
+ cmpwi cr1, r10, 108 /* Is dcbst? */
+ beq+ cr1, 144f /* Fix up store bit! */
+ cmpwi cr1, r10, 172 /* Is dcbf? */
+ beq+ cr1, 142f
+ cmpwi cr1, r10, 1964 /* Is icbi? */
+ beq+ cr1, 142f
141: mfspr r10,SPRN_M_TW
b DARFixed /* Nope, go back to normal TLB processing */
@@ -629,11 +446,6 @@ FixupDAR:/* Entry point for dcbx workaround. */
rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31
b 201b
-202:
- /* concat physical page address(r11) and page offset(r10) */
- rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31
- b 201b
-
144: mfspr r10, SPRN_DSISR
rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
mtspr SPRN_DSISR, r10
@@ -679,8 +491,9 @@ FixupDAR:/* Entry point for dcbx workaround. */
add r10, r10, r30 ;b 151f
add r10, r10, r31
151:
- rlwinm. r11,r11,19,24,28 /* offset into jump table for reg RA */
- beq 152f /* if reg RA is zero, don't add it */
+ rlwinm r11,r11,19,24,28 /* offset into jump table for reg RA */
+ cmpwi cr1, r11, 0
+ beq cr1, 152f /* if reg RA is zero, don't add it */
addi r11, r11, 150b@l /* add start of table */
mtctr r11 /* load ctr with jump address */
rlwinm r11,r11,0,16,10 /* make sure we don't execute this more than once */
@@ -688,7 +501,10 @@ FixupDAR:/* Entry point for dcbx workaround. */
152:
mfdar r11
mtctr r11 /* restore ctr reg from DAR */
- mtdar r10 /* save fault EA to DAR */
+ mfspr r11, SPRN_SPRG_THREAD
+ stw r10, DAR(r11)
+ mfspr r10, SPRN_DSISR
+ stw r10, DSISR(r11)
mfspr r10,SPRN_M_TW
b DARFixed /* Go back to normal TLB handling */
@@ -722,7 +538,7 @@ start_here:
ori r0, r0, STACK_END_MAGIC@l
stw r0, 0(r1)
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
lis r6, swapper_pg_dir@ha
tophys(r6,r6)
@@ -760,6 +576,27 @@ start_here:
rfi
/* Load up the kernel context */
2:
+#ifdef CONFIG_PIN_TLB_IMMR
+ lis r0, MD_TWAM@h
+ oris r0, r0, 0x1f00
+ mtspr SPRN_MD_CTR, r0
+ LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID)
+ tlbie r0
+ mtspr SPRN_MD_EPN, r0
+ LOAD_REG_IMMEDIATE(r0, MD_SVALID | MD_PS512K | MD_GUARDED)
+ mtspr SPRN_MD_TWC, r0
+ mfspr r0, SPRN_IMMR
+ rlwinm r0, r0, 0, 0xfff80000
+ ori r0, r0, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \
+ _PAGE_NO_CACHE | _PAGE_PRESENT
+ mtspr SPRN_MD_RPN, r0
+ lis r0, (MD_TWAM | MD_RSV4I)@h
+ mtspr SPRN_MD_CTR, r0
+#endif
+#if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR)
+ lis r0, MD_TWAM@h
+ mtspr SPRN_MD_CTR, r0
+#endif
tlbia /* Clear all TLB entries */
sync /* wait for tlbia/tlbie to finish */
@@ -789,20 +626,13 @@ start_here:
* 24 Mbytes of data, and the 512k IMMR space. Anything not covered by
* these mappings is mapped by page tables.
*/
-initial_mmu:
+SYM_FUNC_START_LOCAL(initial_mmu)
li r8, 0
mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */
- lis r10, MD_RESETVAL@h
-#ifndef CONFIG_8xx_COPYBACK
- oris r10, r10, MD_WTDEF@h
-#endif
+ lis r10, MD_TWAM@h
mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */
tlbia /* Invalidate all TLB entries */
-#ifdef CONFIG_PIN_TLB_DATA
- oris r10, r10, MD_RSV4I@h
- mtspr SPRN_MD_CTR, r10 /* Set data TLB control */
-#endif
lis r8, MI_APG_INIT@h /* Set protection modes */
ori r8, r8, MI_APG_INIT@l
@@ -811,55 +641,32 @@ initial_mmu:
ori r8, r8, MD_APG_INIT@l
mtspr SPRN_MD_AP, r8
- /* Map a 512k page for the IMMR to get the processor
- * internal registers (among other things).
- */
-#ifdef CONFIG_PIN_TLB_IMMR
- oris r10, r10, MD_RSV4I@h
- ori r10, r10, 0x1c00
- mtspr SPRN_MD_CTR, r10
-
- mfspr r9, 638 /* Get current IMMR */
- andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */
-
- lis r8, VIRT_IMMR_BASE@h /* Create vaddr for TLB */
- ori r8, r8, MD_EVALID /* Mark it valid */
- mtspr SPRN_MD_EPN, r8
- li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */
- ori r8, r8, MD_SVALID /* Make it valid */
- mtspr SPRN_MD_TWC, r8
- mr r8, r9 /* Create paddr for TLB */
- ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
- mtspr SPRN_MD_RPN, r8
-#endif
-
- /* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */
-#ifdef CONFIG_PIN_TLB_TEXT
+ /* Map the lower RAM (up to 32 Mbytes) into the ITLB and DTLB */
lis r8, MI_RSV4I@h
ori r8, r8, 0x1c00
-#endif
+ oris r12, r10, MD_RSV4I@h
+ ori r12, r12, 0x1c00
li r9, 4 /* up to 4 pages of 8M */
mtctr r9
lis r9, KERNELBASE@h /* Create vaddr for TLB */
- li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */
+ li r10, MI_PS8MEG | _PMD_ACCESSED | MI_SVALID
li r11, MI_BOOTINIT /* Create RPN for address 0 */
- lis r12, _einittext@h
- ori r12, r12, _einittext@l
1:
-#ifdef CONFIG_PIN_TLB_TEXT
mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
addi r8, r8, 0x100
-#endif
-
ori r0, r9, MI_EVALID /* Mark it valid */
mtspr SPRN_MI_EPN, r0
mtspr SPRN_MI_TWC, r10
mtspr SPRN_MI_RPN, r11 /* Store TLB entry */
+ mtspr SPRN_MD_CTR, r12
+ addi r12, r12, 0x100
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r10
+ mtspr SPRN_MD_RPN, r11
addis r9, r9, 0x80
addis r11, r11, 0x80
- cmpl cr0, r9, r12
- bdnzf gt, 1b
+ bdnz 1b
/* Since the cache is enabled according to the information we
* just loaded into the TLB, invalidate and enable the caches here.
@@ -870,17 +677,7 @@ initial_mmu:
mtspr SPRN_DC_CST, r8
lis r8, IDC_ENABLE@h
mtspr SPRN_IC_CST, r8
-#ifdef CONFIG_8xx_COPYBACK
- mtspr SPRN_DC_CST, r8
-#else
- /* For a debug option, I left this here to easily enable
- * the write through cache mode
- */
- lis r8, DC_SFWT@h
- mtspr SPRN_DC_CST, r8
- lis r8, IDC_ENABLE@h
mtspr SPRN_DC_CST, r8
-#endif
/* Disable debug mode entry on breakpoints */
mfspr r8, SPRN_DER
#ifdef CONFIG_PERF_EVENTS
@@ -890,29 +687,105 @@ initial_mmu:
#endif
mtspr SPRN_DER, r8
blr
-
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
- .data
- .globl sdata
-sdata:
- .globl empty_zero_page
- .align PAGE_SHIFT
-empty_zero_page:
- .space PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/* Room for two PTE table poiners, usually the kernel and current user
- * pointer to their respective root page table (pgdir).
- */
- .globl abatron_pteptrs
-abatron_pteptrs:
- .space 8
+SYM_FUNC_END(initial_mmu)
+
+_GLOBAL(mmu_pin_tlb)
+ lis r9, (1f - PAGE_OFFSET)@h
+ ori r9, r9, (1f - PAGE_OFFSET)@l
+ mfmsr r10
+ mflr r11
+ li r12, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
+ rlwinm r0, r10, 0, ~MSR_RI
+ rlwinm r0, r0, 0, ~MSR_EE
+ mtmsr r0
+ isync
+ .align 4
+ mtspr SPRN_SRR0, r9
+ mtspr SPRN_SRR1, r12
+ rfi
+1:
+ li r5, 0
+ lis r6, MD_TWAM@h
+ mtspr SPRN_MI_CTR, r5
+ mtspr SPRN_MD_CTR, r6
+ tlbia
+
+ LOAD_REG_IMMEDIATE(r5, 28 << 8)
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+ LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
+ LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+ LOAD_REG_ADDR(r9, _sinittext)
+ li r0, 4
+ mtctr r0
+
+2: ori r0, r6, MI_EVALID
+ mtspr SPRN_MI_CTR, r5
+ mtspr SPRN_MI_EPN, r0
+ mtspr SPRN_MI_TWC, r7
+ mtspr SPRN_MI_RPN, r8
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r9
+ bdnzt lt, 2b
+ lis r0, MI_RSV4I@h
+ mtspr SPRN_MI_CTR, r0
+
+ LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM)
+#ifdef CONFIG_PIN_TLB_DATA
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+ LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
+ li r8, 0
+#ifdef CONFIG_PIN_TLB_IMMR
+ li r0, 3
+#else
+ li r0, 4
+#endif
+ mtctr r0
+ cmpwi r4, 0
+ beq 4f
+ LOAD_REG_ADDR(r9, _sinittext)
+
+2: ori r0, r6, MD_EVALID
+ ori r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+ mtspr SPRN_MD_RPN, r12
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r9
+ bdnzt lt, 2b
+4:
+2: ori r0, r6, MD_EVALID
+ ori r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+ mtspr SPRN_MD_RPN, r12
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r3
+ bdnzt lt, 2b
+#endif
+#ifdef CONFIG_PIN_TLB_IMMR
+ LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID)
+ LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED | _PMD_ACCESSED)
+ mfspr r8, SPRN_IMMR
+ rlwinm r8, r8, 0, 0xfff80000
+ ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \
+ _PAGE_NO_CACHE | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+ mtspr SPRN_MD_RPN, r8
+#endif
+#if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA)
+ lis r0, (MD_RSV4I | MD_TWAM)@h
+ mtspr SPRN_MD_CTR, r0
+#endif
+ mtspr SPRN_SRR1, r10
+ mtspr SPRN_SRR0, r11
+ rfi
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 4a24f8f026c7..c1d89764dd22 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -17,10 +17,12 @@
*/
#include <linux/init.h>
+#include <linux/pgtable.h>
+#include <linux/linkage.h>
+
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
@@ -29,21 +31,11 @@
#include <asm/ptrace.h>
#include <asm/bug.h>
#include <asm/kvm_book3s_asm.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
+#include <asm/interrupt.h>
#include "head_32.h"
-/* 601 only have IBAT */
-#ifdef CONFIG_PPC_BOOK3S_601
-#define LOAD_BAT(n, reg, RA, RB) \
- li RA,0; \
- mtspr SPRN_IBAT##n##U,RA; \
- lwz RA,(n*16)+0(reg); \
- lwz RB,(n*16)+4(reg); \
- mtspr SPRN_IBAT##n##U,RA; \
- mtspr SPRN_IBAT##n##L,RB
-#else
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
li RA,0; \
@@ -57,19 +49,15 @@
lwz RB,(n*16)+12(reg); \
mtspr SPRN_DBAT##n##U,RA; \
mtspr SPRN_DBAT##n##L,RB
-#endif
__HEAD
- .stabs "arch/powerpc/kernel/",N_SO,0,0,0f
- .stabs "head_32.S",N_SO,0,0,0f
-0:
-_ENTRY(_stext);
+_GLOBAL(_stext);
/*
* _start is defined this way because the XCOFF loader in the OpenFirmware
* on the powermac expects the entry point to be a procedure descriptor.
*/
-_ENTRY(_start);
+_GLOBAL(_start);
/*
* These are here for legacy reasons, the kernel used to
* need to look like a coff function entry for the pmac
@@ -166,9 +154,8 @@ __after_mmu_off:
bl initial_bats
bl load_segment_registers
-#ifdef CONFIG_KASAN
+ bl reloc_offset
bl early_hash_table
-#endif
#if defined(CONFIG_BOOTX_TEXT)
bl setup_disp_bat
#endif
@@ -185,10 +172,8 @@ __after_mmu_off:
bl reloc_offset
li r24,0 /* cpu# */
bl call_setup_cpu /* Call setup_cpu for this CPU */
-#ifdef CONFIG_PPC_BOOK3S_32
bl reloc_offset
bl init_idle_6xx
-#endif /* CONFIG_PPC_BOOK3S_32 */
/*
@@ -219,8 +204,7 @@ turn_on_mmu:
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr SPRN_SRR0,r0
- SYNC
- RFI /* enables MMU */
+ rfi /* enables MMU */
/*
* We need __secondary_hold as a place to hold the other cpus on
@@ -253,8 +237,8 @@ __secondary_hold_acknowledge:
/* System reset */
/* core99 pmac starts the seconary here by changing the vector, and
- putting it back to what it was (unknown_exception) when done. */
- EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
+ putting it back to what it was (unknown_async_exception) when done. */
+ EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, unknown_async_exception)
/* Machine check */
/*
@@ -270,89 +254,111 @@ __secondary_hold_acknowledge:
* pointer when we take an exception from supervisor mode.)
* -- paulus.
*/
- . = 0x200
- DO_KVM 0x200
- mtspr SPRN_SPRG_SCRATCH0,r10
- mtspr SPRN_SPRG_SCRATCH1,r11
- mfcr r10
+ START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck)
+ EXCEPTION_PROLOG_0
#ifdef CONFIG_PPC_CHRP
- mfspr r11, SPRN_SPRG_THREAD
- lwz r11, RTAS_SP(r11)
- cmpwi cr1, r11, 0
+ mtspr SPRN_SPRG_SCRATCH2,r1
+ mfspr r1, SPRN_SPRG_THREAD
+ lwz r1, RTAS_SP(r1)
+ cmpwi cr1, r1, 0
bne cr1, 7f
+ mfspr r1, SPRN_SPRG_SCRATCH2
#endif /* CONFIG_PPC_CHRP */
EXCEPTION_PROLOG_1
-7: EXCEPTION_PROLOG_2
- addi r3,r1,STACK_FRAME_OVERHEAD
+7: EXCEPTION_PROLOG_2 0x200 MachineCheck
#ifdef CONFIG_PPC_CHRP
- bne cr1,1f
-#endif
- EXC_XFER_STD(0x200, machine_check_exception)
-#ifdef CONFIG_PPC_CHRP
-1: b machine_check_in_rtas
+ beq cr1, 1f
+ twi 31, 0, 0
#endif
+1: prepare_transfer_to_handler
+ bl machine_check_exception
+ b interrupt_return
/* Data access exception. */
- . = 0x300
- DO_KVM 0x300
-DataAccess:
- EXCEPTION_PROLOG
- mfspr r10,SPRN_DSISR
- stw r10,_DSISR(r11)
-#ifdef CONFIG_PPC_KUAP
- andis. r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
-#else
- andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
-#endif
- bne 1f /* if not, try to put a PTE */
- mfspr r4,SPRN_DAR /* into the hash table */
- rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
+ START_EXCEPTION(INTERRUPT_DATA_STORAGE, DataAccess)
+#ifdef CONFIG_PPC_BOOK3S_604
BEGIN_MMU_FTR_SECTION
- bl hash_page
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-1: lwz r5,_DSISR(r11) /* get DSISR value */
- mfspr r4,SPRN_DAR
- EXC_XFER_LITE(0x300, handle_page_fault)
+ mtspr SPRN_SPRG_SCRATCH2,r10
+ mfspr r10, SPRN_SPRG_THREAD
+ stw r11, THR11(r10)
+ mfspr r10, SPRN_DSISR
+ mfcr r11
+ andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
+ mfspr r10, SPRN_SPRG_THREAD
+ beq hash_page_dsi
+.Lhash_page_dsi_cont:
+ mtcr r11
+ lwz r11, THR11(r10)
+ mfspr r10, SPRN_SPRG_SCRATCH2
+MMU_FTR_SECTION_ELSE
+ b 1f
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
+1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ lwz r5, _DSISR(r1)
+ andis. r0, r5, DSISR_DABRMATCH@h
+ bne- 1f
+ bl do_page_fault
+ b interrupt_return
+1: bl do_break
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Instruction access exception. */
- . = 0x400
- DO_KVM 0x400
-InstructionAccess:
- EXCEPTION_PROLOG
- andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */
- beq 1f /* if so, try to put a PTE */
- li r3,0 /* into the hash table */
- mr r4,r12 /* SRR0 is fault address */
+ START_EXCEPTION(INTERRUPT_INST_STORAGE, InstructionAccess)
+ mtspr SPRN_SPRG_SCRATCH0,r10
+ mtspr SPRN_SPRG_SCRATCH1,r11
+ mfspr r10, SPRN_SPRG_THREAD
+ mfspr r11, SPRN_SRR0
+ stw r11, SRR0(r10)
+ mfspr r11, SPRN_SRR1 /* check whether user or kernel */
+ stw r11, SRR1(r10)
+ mfcr r10
+#ifdef CONFIG_PPC_BOOK3S_604
BEGIN_MMU_FTR_SECTION
- bl hash_page
+ andis. r11, r11, SRR1_ISI_NOPT@h /* no pte found? */
+ bne hash_page_isi
+.Lhash_page_isi_cont:
+ mfspr r11, SPRN_SRR1 /* check whether user or kernel */
END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-1: mr r4,r12
+#endif
+ andi. r11, r11, MSR_PR
+
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2 INTERRUPT_INST_STORAGE InstructionAccess
andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
- EXC_XFER_LITE(0x400, handle_page_fault)
+ stw r5, _DSISR(r11)
+ stw r12, _DAR(r11)
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
/* External interrupt */
- EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ)
/* Alignment exception */
- . = 0x600
- DO_KVM 0x600
-Alignment:
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DAR
- stw r4,_DAR(r11)
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x600, alignment_exception)
+ START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment)
+ EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl alignment_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Program check exception */
- EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+ START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck)
+ EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck
+ prepare_transfer_to_handler
+ bl program_check_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Floating-point unavailable */
- . = 0x800
- DO_KVM 0x800
-FPUnavailable:
+ START_EXCEPTION(0x800, FPUnavailable)
+#ifdef CONFIG_PPC_FPU
BEGIN_FTR_SECTION
/*
* Certain Freescale cores don't have a FPU and treat fp instructions
@@ -360,28 +366,29 @@ BEGIN_FTR_SECTION
*/
b ProgramCheck
END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
- EXCEPTION_PROLOG
+ EXCEPTION_PROLOG INTERRUPT_FP_UNAVAIL FPUnavailable
beq 1f
bl load_up_fpu /* if from user, just load it up */
b fast_exception_return
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception)
+1: prepare_transfer_to_handler
+ bl kernel_fp_unavailable_exception
+ b interrupt_return
+#else
+ b ProgramCheck
+#endif
/* Decrementer */
- EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
+ EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt)
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0xa00, Trap_0a, unknown_exception)
+ EXCEPTION(0xb00, Trap_0b, unknown_exception)
/* System call */
- . = 0xc00
- DO_KVM 0xc00
-SystemCall:
- SYSCALL_ENTRY 0xc00
+ START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall)
+ SYSCALL_ENTRY INTERRUPT_SYSCALL
-/* Single step - not used on 601 */
- EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception)
+ EXCEPTION(0xe00, Trap_0e, unknown_exception)
/*
* The Altivec unavailable trap is at 0x0f20. Foo.
@@ -391,41 +398,39 @@ SystemCall:
* non-altivec kernel running on a machine with altivec just
* by executing an altivec instruction.
*/
- . = 0xf00
- DO_KVM 0xf00
+ START_EXCEPTION(INTERRUPT_PERFMON, PerformanceMonitorTrap)
b PerformanceMonitor
- . = 0xf20
- DO_KVM 0xf20
+ START_EXCEPTION(INTERRUPT_ALTIVEC_UNAVAIL, AltiVecUnavailableTrap)
b AltiVecUnavailable
+ __HEAD
/*
* Handle TLB miss for instruction on 603/603e.
* Note: we get an alternate set of r0 - r3 to use automatically.
*/
- . = 0x1000
+ . = INTERRUPT_INST_TLB_MISS_603
InstructionTLBMiss:
/*
- * r0: scratch
+ * r0: userspace flag (later scratch)
* r1: linux style pte ( later becomes ppc hardware pte )
* r2: ptr to linux-style pte
- * r3: scratch
+ * r3: fault address
*/
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_IMISS
-#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
- lis r1,PAGE_OFFSET@h /* check if kernel address */
+#ifdef CONFIG_MODULES
+ lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
#endif
- mfspr r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
+ mfspr r2, SPRN_SDR1
li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
-#else
- li r1,_PAGE_PRESENT | _PAGE_EXEC
-#endif
-#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
- bge- 112f
+ rlwinm r2, r2, 28, 0xfffff000
+#ifdef CONFIG_MODULES
+ li r0, 3
+ bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ li r0, 0
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
#endif
112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
@@ -433,13 +438,15 @@ InstructionTLBMiss:
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- InstructionAddressInvalid /* return if no mapping */
rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
- lwz r0,0(r2) /* get linux-style pte */
- andc. r1,r1,r0 /* check access & ~permission */
+ lwz r2,0(r2) /* get linux-style pte */
+ andc. r1,r1,r2 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
+#ifdef CONFIG_MODULES
+ rlwimi r2, r0, 0, 31, 31 /* userspace ? -> PP lsb */
+#endif
ori r1, r1, 0xe06 /* clear out reserved bits */
- andc r1, r0, r1 /* PP = user? 1 : 0 */
+ andc r1, r2, r1 /* PP = user? 1 : 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -471,51 +478,45 @@ InstructionAddressInvalid:
/*
* Handle TLB miss for DATA Load operation on 603/603e
*/
- . = 0x1100
+ . = INTERRUPT_DATA_LOAD_TLB_MISS_603
DataLoadTLBMiss:
/*
- * r0: scratch
+ * r0: userspace flag (later scratch)
* r1: linux style pte ( later becomes ppc hardware pte )
* r2: ptr to linux-style pte
- * r3: scratch
+ * r3: fault address
*/
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_DMISS
- lis r1,PAGE_OFFSET@h /* check if kernel address */
+ lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
- li r1, _PAGE_PRESENT | _PAGE_ACCESSED
-#else
- li r1, _PAGE_PRESENT
-#endif
- bge- 112f
+ mfspr r2, SPRN_SDR1
+ li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ
+ rlwinm r2, r2, 28, 0xfffff000
+ li r0, 3
+ bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ li r0, 0
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- DataAddressInvalid /* return if no mapping */
rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
- lwz r0,0(r2) /* get linux-style pte */
- andc. r1,r1,r0 /* check access & ~permission */
+ lwz r2,0(r2) /* get linux-style pte */
+ andc. r1,r1,r2 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- /*
- * NOTE! We are assuming this is not an SMP system, otherwise
- * we would need to update the pte atomically with lwarx/stwcx.
- */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
+ rlwinm r1,r2,32-9,30,30 /* _PAGE_WRITE -> PP msb */
+ rlwimi r2,r0,0,30,31 /* userspace ? -> PP */
+ rlwimi r1,r2,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */
+ xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */
ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */
+ andc r1,r2,r1 /* PP = user? rw? 1: 3: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
- mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
- mtcrf 0x80,r2
BEGIN_MMU_FTR_SECTION
li r0,1
mfspr r1,SPRN_SPRG_603_LRU
@@ -527,9 +528,15 @@ BEGIN_MMU_FTR_SECTION
mfspr r2,SPRN_SRR1
rlwimi r2,r0,31-14,14,14
mtspr SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+ mtcrf 0x80,r2
+ tlbld r3
+ rfi
+MMU_FTR_SECTION_ELSE
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
tlbld r3
rfi
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
DataAddressInvalid:
mfspr r3,SPRN_SRR1
rlwinm r1,r3,9,6,6 /* Get load/store bit */
@@ -551,43 +558,38 @@ DataAddressInvalid:
/*
* Handle TLB miss for DATA Store on 603/603e
*/
- . = 0x1200
+ . = INTERRUPT_DATA_STORE_TLB_MISS_603
DataStoreTLBMiss:
/*
- * r0: scratch
+ * r0: userspace flag (later scratch)
* r1: linux style pte ( later becomes ppc hardware pte )
* r2: ptr to linux-style pte
- * r3: scratch
+ * r3: fault address
*/
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_DMISS
- lis r1,PAGE_OFFSET@h /* check if kernel address */
+ lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
+ mfspr r2, SPRN_SDR1
li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
-#else
- li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
-#endif
- bge- 112f
+ rlwinm r2, r2, 28, 0xfffff000
+ li r0, 3
+ bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ li r0, 0
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- DataAddressInvalid /* return if no mapping */
rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
- lwz r0,0(r2) /* get linux-style pte */
- andc. r1,r1,r0 /* check access & ~permission */
+ lwz r2,0(r2) /* get linux-style pte */
+ andc. r1,r1,r2 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- /*
- * NOTE! We are assuming this is not an SMP system, otherwise
- * we would need to update the pte atomically with lwarx/stwcx.
- */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
+ rlwimi r2,r0,0,31,31 /* userspace ? -> PP lsb */
li r1,0xe06 /* clear out reserved bits & PP msb */
- andc r1,r0,r1 /* PP = user? 1: 0 */
+ andc r1,r2,r1 /* PP = user? 1: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -605,70 +607,162 @@ BEGIN_MMU_FTR_SECTION
mfspr r2,SPRN_SRR1
rlwimi r2,r0,31-14,14,14
mtspr SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+ mtcrf 0x80,r2
+ tlbld r3
+ rfi
+MMU_FTR_SECTION_ELSE
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
tlbld r3
rfi
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
#ifndef CONFIG_ALTIVEC
#define altivec_assist_exception unknown_exception
#endif
- EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
- EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD)
- EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD)
- EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD)
+#ifndef CONFIG_TAU_INT
+#define TAUException unknown_async_exception
+#endif
+
+ EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception)
+ EXCEPTION(0x1400, SMI, SMIException)
+ EXCEPTION(0x1500, Trap_15, unknown_exception)
+ EXCEPTION(0x1600, Trap_16, altivec_assist_exception)
+ EXCEPTION(0x1700, Trap_17, TAUException)
+ EXCEPTION(0x1800, Trap_18, unknown_exception)
+ EXCEPTION(0x1900, Trap_19, unknown_exception)
+ EXCEPTION(0x1a00, Trap_1a, unknown_exception)
+ EXCEPTION(0x1b00, Trap_1b, unknown_exception)
+ EXCEPTION(0x1c00, Trap_1c, unknown_exception)
+ EXCEPTION(0x1d00, Trap_1d, unknown_exception)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception)
+ EXCEPTION(0x2000, RunMode, RunModeException)
+ EXCEPTION(0x2100, Trap_21, unknown_exception)
+ EXCEPTION(0x2200, Trap_22, unknown_exception)
+ EXCEPTION(0x2300, Trap_23, unknown_exception)
+ EXCEPTION(0x2400, Trap_24, unknown_exception)
+ EXCEPTION(0x2500, Trap_25, unknown_exception)
+ EXCEPTION(0x2600, Trap_26, unknown_exception)
+ EXCEPTION(0x2700, Trap_27, unknown_exception)
+ EXCEPTION(0x2800, Trap_28, unknown_exception)
+ EXCEPTION(0x2900, Trap_29, unknown_exception)
+ EXCEPTION(0x2a00, Trap_2a, unknown_exception)
+ EXCEPTION(0x2b00, Trap_2b, unknown_exception)
+ EXCEPTION(0x2c00, Trap_2c, unknown_exception)
+ EXCEPTION(0x2d00, Trap_2d, unknown_exception)
+ EXCEPTION(0x2e00, Trap_2e, unknown_exception)
+ EXCEPTION(0x2f00, Trap_2f, unknown_exception)
+ __HEAD
. = 0x3000
+#ifdef CONFIG_PPC_BOOK3S_604
+.macro save_regs_thread thread
+ stw r0, THR0(\thread)
+ stw r3, THR3(\thread)
+ stw r4, THR4(\thread)
+ stw r5, THR5(\thread)
+ stw r6, THR6(\thread)
+ stw r8, THR8(\thread)
+ stw r9, THR9(\thread)
+ mflr r0
+ stw r0, THLR(\thread)
+ mfctr r0
+ stw r0, THCTR(\thread)
+.endm
+
+.macro restore_regs_thread thread
+ lwz r0, THLR(\thread)
+ mtlr r0
+ lwz r0, THCTR(\thread)
+ mtctr r0
+ lwz r0, THR0(\thread)
+ lwz r3, THR3(\thread)
+ lwz r4, THR4(\thread)
+ lwz r5, THR5(\thread)
+ lwz r6, THR6(\thread)
+ lwz r8, THR8(\thread)
+ lwz r9, THR9(\thread)
+.endm
+
+hash_page_dsi:
+ save_regs_thread r10
+ mfdsisr r3
+ mfdar r4
+ mfsrr0 r5
+ mfsrr1 r9
+ rlwinm r3, r3, 32 - 15, _PAGE_WRITE /* DSISR_STORE -> _PAGE_WRITE */
+ ori r3, r3, _PAGE_PRESENT | _PAGE_READ
+ bl hash_page
+ mfspr r10, SPRN_SPRG_THREAD
+ restore_regs_thread r10
+ b .Lhash_page_dsi_cont
+
+hash_page_isi:
+ mr r11, r10
+ mfspr r10, SPRN_SPRG_THREAD
+ save_regs_thread r10
+ li r3, _PAGE_PRESENT | _PAGE_EXEC
+ lwz r4, SRR0(r10)
+ lwz r9, SRR1(r10)
+ bl hash_page
+ mfspr r10, SPRN_SPRG_THREAD
+ restore_regs_thread r10
+ mr r10, r11
+ b .Lhash_page_isi_cont
+
+ .globl fast_hash_page_return
+fast_hash_page_return:
+ andis. r10, r9, SRR1_ISI_NOPT@h /* Set on ISI, cleared on DSI */
+ mfspr r10, SPRN_SPRG_THREAD
+ restore_regs_thread r10
+ bne 1f
+
+ /* DSI */
+ mtcr r11
+ lwz r11, THR11(r10)
+ mfspr r10, SPRN_SPRG_SCRATCH2
+ rfi
+
+1: /* ISI */
+ mtcr r11
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ rfi
+#endif /* CONFIG_PPC_BOOK3S_604 */
+
+#ifdef CONFIG_VMAP_STACK
+ vmap_stack_overflow_exception
+#endif
+
+ __HEAD
AltiVecUnavailable:
- EXCEPTION_PROLOG
+ EXCEPTION_PROLOG 0xf20 AltiVecUnavailable
#ifdef CONFIG_ALTIVEC
beq 1f
bl load_up_altivec /* if from user, just load it up */
b fast_exception_return
#endif /* CONFIG_ALTIVEC */
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0xf20, altivec_unavailable_exception)
+1: prepare_transfer_to_handler
+ bl altivec_unavailable_exception
+ b interrupt_return
+ __HEAD
PerformanceMonitor:
- EXCEPTION_PROLOG
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0xf00, performance_monitor_exception)
+ EXCEPTION_PROLOG 0xf00 PerformanceMonitor
+ prepare_transfer_to_handler
+ bl performance_monitor_exception
+ b interrupt_return
+ __HEAD
/*
* This code is jumped to from the startup code to copy
* the kernel image to physical address PHYSICAL_START.
*/
relocate_kernel:
- addis r9,r26,klimit@ha /* fetch klimit */
- lwz r25,klimit@l(r9)
- addis r25,r25,-KERNELBASE@h
lis r3,PHYSICAL_START@h /* Destination base address */
li r6,0 /* Destination offset */
li r5,0x4000 /* # bytes of memory to copy */
@@ -676,7 +770,8 @@ relocate_kernel:
addi r0,r3,4f@l /* jump to the address of 4f */
mtctr r0 /* in copy and do the rest. */
bctr /* jump to the copy */
-4: mr r5,r25
+4: lis r5,_end-KERNELBASE@h
+ ori r5,r5,_end-KERNELBASE@l
bl copy_and_flush /* copy the rest */
b turn_on_mmu
@@ -686,7 +781,7 @@ relocate_kernel:
* r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
* on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
*/
-_ENTRY(copy_and_flush)
+_GLOBAL(copy_and_flush)
addi r5,r5,-4
addi r6,r6,-4
4: li r0,L1_CACHE_BYTES/4
@@ -729,7 +824,6 @@ __secondary_start_pmac_0:
set to map the 0xf0000000 - 0xffffffff region */
mfmsr r0
rlwinm r0,r0,0,28,26 /* clear DR (0x10) */
- SYNC
mtmsr r0
isync
@@ -741,10 +835,8 @@ __secondary_start:
lis r3,-KERNELBASE@h
mr r4,r24
bl call_setup_cpu /* Call setup_cpu for this CPU */
-#ifdef CONFIG_PPC_BOOK3S_32
lis r3,-KERNELBASE@h
bl init_idle_6xx
-#endif /* CONFIG_PPC_BOOK3S_32 */
/* get current's stack and current */
lis r2,secondary_current@ha
@@ -754,7 +846,7 @@ __secondary_start:
lwz r1,TASK_STACK(r1)
/* stack */
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
li r0,0
tophys(r3,r1)
stw r0,0(r3)
@@ -767,9 +859,12 @@ __secondary_start:
tophys(r4,r2)
addi r4,r4,THREAD /* phys address of our thread_struct */
mtspr SPRN_SPRG_THREAD,r4
+BEGIN_MMU_FTR_SECTION
lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
- mtspr SPRN_SPRG_PGDIR, r4
+ rlwinm r4, r4, 4, 0xffff01ff
+ mtspr SPRN_SDR1, r4
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
/* enable MMU and jump to start_secondary */
li r4,MSR_KERNEL
@@ -777,8 +872,7 @@ __secondary_start:
ori r3,r3,start_secondary@l
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- SYNC
- RFI
+ rfi
#endif /* CONFIG_SMP */
#ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -786,22 +880,10 @@ __secondary_start:
#endif
/*
- * Those generic dummy functions are kept for CPUs not
- * included in CONFIG_PPC_BOOK3S_32
- */
-#if !defined(CONFIG_PPC_BOOK3S_32)
-_ENTRY(__save_cpu_setup)
- blr
-_ENTRY(__restore_cpu_setup)
- blr
-#endif /* !defined(CONFIG_PPC_BOOK3S_32) */
-
-/*
* Load stuff into the MMU. Intended to be called with
* IR=0 and DR=0.
*/
-#ifdef CONFIG_KASAN
-early_hash_table:
+SYM_FUNC_START_LOCAL(early_hash_table)
sync /* Force all PTE updates to finish */
isync
tlbia /* Clear all TLB entries */
@@ -812,22 +894,23 @@ early_hash_table:
ori r6, r6, 3 /* 256kB table */
mtspr SPRN_SDR1, r6
blr
-#endif
+SYM_FUNC_END(early_hash_table)
-load_up_mmu:
+SYM_FUNC_START_LOCAL(load_up_mmu)
sync /* Force all PTE updates to finish */
isync
tlbia /* Clear all TLB entries */
sync /* wait for tlbia/tlbie to finish */
TLBSYNC /* ... on all CPUs */
+BEGIN_MMU_FTR_SECTION
/* Load the SDR1 register (hash table base & size) */
lis r6,_SDR1@ha
tophys(r6,r6)
lwz r6,_SDR1@l(r6)
mtspr SPRN_SDR1,r6
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-/* Load the BAT registers with the values set up by MMU_init.
- MMU_init takes care of whether we're on a 601 or not. */
+/* Load the BAT registers with the values set up by MMU_init. */
lis r3,BATS@ha
addi r3,r3,BATS@l
tophys(r3,r3)
@@ -842,16 +925,15 @@ BEGIN_MMU_FTR_SECTION
LOAD_BAT(7,r3,r4,r5)
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
+SYM_FUNC_END(load_up_mmu)
-load_segment_registers:
+_GLOBAL(load_segment_registers)
li r0, NUM_USER_SEGMENTS /* load up user segment register values */
mtctr r0 /* for context 0 */
- li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
#ifdef CONFIG_PPC_KUEP
- oris r3, r3, SR_NX@h /* Set Nx */
-#endif
-#ifdef CONFIG_PPC_KUAP
- oris r3, r3, SR_KS@h /* Set Ks */
+ lis r3, SR_NX@h /* Kp = 0, Ks = 0, VSID = 0 */
+#else
+ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
#endif
li r4, 0
3: mtsrin r3, r4
@@ -881,15 +963,18 @@ start_here:
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
mtspr SPRN_SPRG_THREAD,r4
+BEGIN_MMU_FTR_SECTION
lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
- mtspr SPRN_SPRG_PGDIR, r4
+ rlwinm r4, r4, 4, 0xffff01ff
+ mtspr SPRN_SDR1, r4
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
/* stack */
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
/*
* Do early platform-specific initialization,
* and set up the MMU.
@@ -902,11 +987,7 @@ start_here:
bl machine_init
bl __save_cpu_setup
bl MMU_init
-#ifdef CONFIG_KASAN
-BEGIN_MMU_FTR_SECTION
bl MMU_init_hw_patch
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
/*
* Go back to running unmapped so we can load up new values
@@ -917,10 +998,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
ori r4,r4,2f@l
tophys(r4,r4)
li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+
+ .align 4
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
- SYNC
- RFI
+ rfi
/* Load up the kernel context */
2: bl load_up_mmu
@@ -931,7 +1013,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
*/
lis r5, abatron_pteptrs@h
ori r5, r5, abatron_pteptrs@l
- stw r5, 0xf0(r0) /* This much match your Abatron config */
+ stw r5, 0xf0(0) /* This much match your Abatron config */
lis r6, swapper_pg_dir@h
ori r6, r6, swapper_pg_dir@l
tophys(r5, r5)
@@ -944,54 +1026,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
ori r3,r3,start_kernel@l
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- SYNC
- RFI
-
-/*
- * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
- *
- * Set up the segment registers for a new context.
- */
-_ENTRY(switch_mmu_context)
- lwz r3,MMCONTEXTID(r4)
- cmpwi cr0,r3,0
- blt- 4f
- mulli r3,r3,897 /* multiply context by skew factor */
- rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */
-#ifdef CONFIG_PPC_KUEP
- oris r3, r3, SR_NX@h /* Set Nx */
-#endif
-#ifdef CONFIG_PPC_KUAP
- oris r3, r3, SR_KS@h /* Set Ks */
-#endif
- li r0,NUM_USER_SEGMENTS
- mtctr r0
-
- lwz r4, MM_PGD(r4)
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is passed as second argument.
- */
- lis r5, abatron_pteptrs@ha
- stw r4, abatron_pteptrs@l + 0x4(r5)
-#endif
- tophys(r4, r4)
- mtspr SPRN_SPRG_PGDIR, r4
- li r4,0
- isync
-3:
- mtsrin r3,r4
- addi r3,r3,0x111 /* next VSID */
- rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */
- addis r4,r4,0x1000 /* address of next segment */
- bdnz 3b
- sync
- isync
- blr
-4: trap
- EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0
- blr
-EXPORT_SYMBOL(switch_mmu_context)
+ rfi
/*
* An undocumented "feature" of 604e requires that the v bit
@@ -1001,10 +1036,9 @@ EXPORT_SYMBOL(switch_mmu_context)
* this makes sure it's done.
* -- Cort
*/
-clear_bats:
+SYM_FUNC_START_LOCAL(clear_bats)
li r10,0
-#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT0U,r10
mtspr SPRN_DBAT0L,r10
mtspr SPRN_DBAT1U,r10
@@ -1013,7 +1047,6 @@ clear_bats:
mtspr SPRN_DBAT2L,r10
mtspr SPRN_DBAT3U,r10
mtspr SPRN_DBAT3L,r10
-#endif
mtspr SPRN_IBAT0U,r10
mtspr SPRN_IBAT0L,r10
mtspr SPRN_IBAT1U,r10
@@ -1047,8 +1080,9 @@ BEGIN_MMU_FTR_SECTION
mtspr SPRN_IBAT7L,r10
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
+SYM_FUNC_END(clear_bats)
-_ENTRY(update_bats)
+_GLOBAL(update_bats)
lis r4, 1f@h
ori r4, r4, 1f@l
tophys(r4, r4)
@@ -1058,10 +1092,11 @@ _ENTRY(update_bats)
rlwinm r0, r6, 0, ~MSR_RI
rlwinm r0, r0, 0, ~MSR_EE
mtmsr r0
+
+ .align 4
mtspr SPRN_SRR0, r4
mtspr SPRN_SRR1, r3
- SYNC
- RFI
+ rfi
1: bl clear_bats
lis r3, BATS@ha
addi r3, r3, BATS@l
@@ -1080,48 +1115,34 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
mtmsr r3
mtspr SPRN_SRR0, r7
mtspr SPRN_SRR1, r6
- SYNC
- RFI
+ rfi
-flush_tlbs:
+SYM_FUNC_START_LOCAL(flush_tlbs)
lis r10, 0x40
1: addic. r10, r10, -0x1000
tlbie r10
bgt 1b
sync
blr
+SYM_FUNC_END(flush_tlbs)
-mmu_off:
+SYM_FUNC_START_LOCAL(mmu_off)
addi r4, r3, __after_mmu_off - _start
mfmsr r3
andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */
beqlr
andc r3,r3,r0
+
+ .align 4
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
sync
- RFI
+ rfi
+SYM_FUNC_END(mmu_off)
-/*
- * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET
- * (we keep one for debugging) and on others, we use one 256M BAT.
- */
-initial_bats:
+/* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */
+SYM_FUNC_START_LOCAL(initial_bats)
lis r11,PAGE_OFFSET@h
-#ifdef CONFIG_PPC_BOOK3S_601
- ori r11,r11,4 /* set up BAT registers for 601 */
- li r8,0x7f /* valid, block length = 8MB */
- mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
- mtspr SPRN_IBAT0L,r8 /* lower BAT register */
- addis r11,r11,0x800000@h
- addis r8,r8,0x800000@h
- mtspr SPRN_IBAT1U,r11
- mtspr SPRN_IBAT1L,r8
- addis r11,r11,0x800000@h
- addis r8,r8,0x800000@h
- mtspr SPRN_IBAT2U,r11
- mtspr SPRN_IBAT2L,r8
-#else
tophys(r8,r11)
#ifdef CONFIG_SMP
ori r8,r8,0x12 /* R/W access, M=1 */
@@ -1130,16 +1151,16 @@ initial_bats:
#endif /* CONFIG_SMP */
ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */
- mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */
+ mtspr SPRN_DBAT0L,r8 /* N.B. 6xx have valid */
mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
mtspr SPRN_IBAT0L,r8
mtspr SPRN_IBAT0U,r11
-#endif
isync
blr
+SYM_FUNC_END(initial_bats)
#ifdef CONFIG_BOOTX_TEXT
-setup_disp_bat:
+SYM_FUNC_START_LOCAL(setup_disp_bat)
/*
* setup the display bat prepared for us in prom.c
*/
@@ -1152,18 +1173,14 @@ setup_disp_bat:
beqlr
lwz r11,0(r8)
lwz r8,4(r8)
-#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT3L,r8
mtspr SPRN_DBAT3U,r11
-#else
- mtspr SPRN_IBAT3L,r8
- mtspr SPRN_IBAT3U,r11
-#endif
blr
+SYM_FUNC_END(setup_disp_bat)
#endif /* CONFIG_BOOTX_TEXT */
#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
-setup_cpm_bat:
+SYM_FUNC_START_LOCAL(setup_cpm_bat)
lis r8, 0xf000
ori r8, r8, 0x002a
mtspr SPRN_DBAT1L, r8
@@ -1173,10 +1190,11 @@ setup_cpm_bat:
mtspr SPRN_DBAT1U, r11
blr
+SYM_FUNC_END(setup_cpm_bat)
#endif
#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
-setup_usbgecko_bat:
+SYM_FUNC_START_LOCAL(setup_usbgecko_bat)
/* prepare a BAT for early io */
#if defined(CONFIG_GAMECUBE)
lis r8, 0x0c00
@@ -1195,63 +1213,7 @@ setup_usbgecko_bat:
mtspr SPRN_DBAT1L, r8
mtspr SPRN_DBAT1U, r11
blr
+SYM_FUNC_END(setup_usbgecko_bat)
#endif
-#ifdef CONFIG_8260
-/* Jump into the system reset for the rom.
- * We first disable the MMU, and then jump to the ROM reset address.
- *
- * r3 is the board info structure, r4 is the location for starting.
- * I use this for building a small kernel that can load other kernels,
- * rather than trying to write or rely on a rom monitor that can tftp load.
- */
- .globl m8260_gorom
-m8260_gorom:
- mfmsr r0
- rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */
- sync
- mtmsr r0
- sync
- mfspr r11, SPRN_HID0
- lis r10, 0
- ori r10,r10,HID0_ICE|HID0_DCE
- andc r11, r11, r10
- mtspr SPRN_HID0, r11
- isync
- li r5, MSR_ME|MSR_RI
- lis r6,2f@h
- addis r6,r6,-KERNELBASE@h
- ori r6,r6,2f@l
- mtspr SPRN_SRR0,r6
- mtspr SPRN_SRR1,r5
- isync
- sync
- rfi
-2:
- mtlr r4
- blr
-#endif
-
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
.data
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
-EXPORT_SYMBOL(empty_zero_page)
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 2ae635df9026..b6b5b01a173c 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -5,6 +5,7 @@
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
+#include <asm/thread_info.h> /* for THREAD_SHIFT */
#ifdef __ASSEMBLY__
@@ -34,7 +35,7 @@
*/
#define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4))
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
#define BOOKE_CLEAR_BTB(reg) \
START_BTB_FLUSH_SECTION \
BTB_FLUSH(reg) \
@@ -44,7 +45,7 @@ END_BTB_FLUSH_SECTION
#endif
-#define NORMAL_EXCEPTION_PROLOG(intno) \
+#define NORMAL_EXCEPTION_PROLOG(trapno, intno) \
mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
mfspr r10, SPRN_SPRG_THREAD; \
stw r11, THREAD_NORMSAVE(0)(r10); \
@@ -53,6 +54,8 @@ END_BTB_FLUSH_SECTION
mfspr r11, SPRN_SRR1; \
DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \
andi. r11, r11, MSR_PR; /* check whether user or kernel */\
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL); \
+ mtmsr r11; \
mr r11, r1; \
beq 1f; \
BOOKE_CLEAR_BTB(r11) \
@@ -76,12 +79,38 @@ END_BTB_FLUSH_SECTION
stw r1, 0(r11); \
mr r1, r11; \
rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \
- addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
- stw r10, 8(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
+ COMMON_EXCEPTION_PROLOG_END trapno
+
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+ stw r0,GPR0(r1)
+ lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ addi r10, r10, STACK_FRAME_REGS_MARKER@l
+ stw r10, STACK_INT_FRAME_MARKER(r1)
+ li r10, \trapno
+ stw r10,_TRAP(r1)
+ SAVE_GPRS(3, 8, r1)
+ SAVE_NVGPRS(r1)
+ stw r2,GPR2(r1)
+ stw r12,_NIP(r1)
+ stw r9,_MSR(r1)
+ mfctr r10
+ mfspr r2,SPRN_SPRG_THREAD
+ stw r10,_CTR(r1)
+ tovirt(r2, r2)
+ mfspr r10,SPRN_XER
+ addi r2, r2, -THREAD
+ stw r10,_XER(r1)
+ addi r3,r1,STACK_INT_FRAME_REGS
+.endm
+
+.macro prepare_transfer_to_handler
+#ifdef CONFIG_PPC_E500
+ andi. r12,r9,MSR_PR
+ bne 777f
+ bl prepare_transfer_to_handler
+777:
+#endif
+.endm
.macro SYSCALL_ENTRY trapno intno srr1
mfspr r10, SPRN_SPRG_THREAD
@@ -99,83 +128,21 @@ BEGIN_FTR_SECTION
mr r12, r13
lwz r13, THREAD_NORMSAVE(2)(r10)
FTR_SECTION_ELSE
-#endif
mfcr r12
-#ifdef CONFIG_KVM_BOOKE_HV
ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
+#else
+ mfcr r12
#endif
+ mfspr r9, SPRN_SRR1
BOOKE_CLEAR_BTB(r11)
- lwz r11, TASK_STACK - THREAD(r10)
+ mr r11, r1
+ lwz r1, TASK_STACK - THREAD(r10)
rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
- ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
- stw r12, _CCR(r11) /* save various registers */
- mflr r12
- stw r12,_LINK(r11)
+ ALLOC_STACK_FRAME(r1, THREAD_SIZE - INT_FRAME_SIZE)
+ stw r12, _CCR(r1)
mfspr r12,SPRN_SRR0
- stw r1, GPR1(r11)
- mfspr r9,SPRN_SRR1
- stw r1, 0(r11)
- mr r1, r11
- stw r12,_NIP(r11)
- rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
- lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
- stw r2,GPR2(r11)
- addi r12, r12, STACK_FRAME_REGS_MARKER@l
- stw r9,_MSR(r11)
- li r2, \trapno + 1
- stw r12, 8(r11)
- stw r2,_TRAP(r11)
- SAVE_GPR(0, r11)
- SAVE_4GPRS(3, r11)
- SAVE_2GPRS(7, r11)
-
- addi r11,r1,STACK_FRAME_OVERHEAD
- addi r2,r10,-THREAD
- stw r11,PT_REGS(r10)
- /* Check to see if the dbcr0 register is set up to debug. Use the
- internal debug mode bit to do this. */
- lwz r12,THREAD_DBCR0(r10)
- andis. r12,r12,DBCR0_IDM@h
- ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
- beq+ 3f
- /* From user and task is ptraced - load up global dbcr0 */
- li r12,-1 /* clear all pending debug events */
- mtspr SPRN_DBSR,r12
- lis r11,global_dbcr0@ha
- tophys(r11,r11)
- addi r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
- lwz r10, TASK_CPU(r2)
- slwi r10, r10, 3
- add r11, r11, r10
-#endif
- lwz r12,0(r11)
- mtspr SPRN_DBCR0,r12
- lwz r12,4(r11)
- addi r12,r12,-1
- stw r12,4(r11)
-
-3:
- tovirt(r2, r2) /* set r2 to current */
- lis r11, transfer_to_syscall@h
- ori r11, r11, transfer_to_syscall@l
-#ifdef CONFIG_TRACE_IRQFLAGS
- /*
- * If MSR is changing we need to keep interrupts disabled at this point
- * otherwise we might risk taking an interrupt before we tell lockdep
- * they are enabled.
- */
- lis r10, MSR_KERNEL@h
- ori r10, r10, MSR_KERNEL@l
- rlwimi r10, r9, 0, MSR_EE
-#else
- lis r10, (MSR_KERNEL | MSR_EE)@h
- ori r10, r10, (MSR_KERNEL | MSR_EE)@l
-#endif
- mtspr SPRN_SRR1,r10
- mtspr SPRN_SRR0,r11
- SYNC
- RFI /* jump to handler, enable MMU */
+ stw r12,_NIP(r1)
+ b transfer_to_syscall /* jump to handler */
.endm
/* To handle the additional exception priority levels on 40x and Book-E
@@ -183,7 +150,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
*
* On 40x critical is the only additional level
* On 44x/e500 we have critical and machine check
- * On e200 we have critical and debug (machine check occurs via critical)
*
* Additionally we reserve a SPRG for each priority level so we can free up a
* GPR to use as the base for indirect access to the exception stacks. This
@@ -199,23 +165,21 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
#define MC_STACK_BASE mcheckirq_ctx
#define CRIT_STACK_BASE critirq_ctx
-/* only on e500mc/e200 */
+/* only on e500mc */
#define DBG_STACK_BASE dbgirq_ctx
-#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE)
-
#ifdef CONFIG_SMP
#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
mfspr r8,SPRN_PIR; \
slwi r8,r8,2; \
addis r8,r8,level##_STACK_BASE@ha; \
lwz r8,level##_STACK_BASE@l(r8); \
- addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
+ addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE;
#else
#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
lis r8,level##_STACK_BASE@ha; \
lwz r8,level##_STACK_BASE@l(r8); \
- addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
+ addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE;
#endif
/*
@@ -226,7 +190,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
* registers as the normal prolog above. Instead we use a portion of the
* critical/machine check exception stack at low physical addresses.
*/
-#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \
+#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, trapno, intno, exc_level_srr0, exc_level_srr1) \
mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \
BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
stw r9,GPR9(r8); /* save various registers */\
@@ -238,9 +202,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \
BOOKE_CLEAR_BTB(r10) \
andi. r11,r11,MSR_PR; \
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)); \
+ mtmsr r11; \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\
- addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
+ addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE; /* allocate stack frame */\
beq 1f; \
/* COMING FROM USER MODE */ \
stw r9,_CCR(r11); /* save CR */\
@@ -267,16 +233,44 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
stw r1,0(r11); \
mr r1,r11; \
rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-#define CRITICAL_EXCEPTION_PROLOG(intno) \
- EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1)
-#define DEBUG_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
-#define MCHECK_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \
+ COMMON_EXCEPTION_PROLOG_END trapno
+
+#define SAVE_xSRR(xSRR) \
+ mfspr r0,SPRN_##xSRR##0; \
+ stw r0,_##xSRR##0(r1); \
+ mfspr r0,SPRN_##xSRR##1; \
+ stw r0,_##xSRR##1(r1)
+
+
+.macro SAVE_MMU_REGS
+#ifdef CONFIG_PPC_E500
+ mfspr r0,SPRN_MAS0
+ stw r0,MAS0(r1)
+ mfspr r0,SPRN_MAS1
+ stw r0,MAS1(r1)
+ mfspr r0,SPRN_MAS2
+ stw r0,MAS2(r1)
+ mfspr r0,SPRN_MAS3
+ stw r0,MAS3(r1)
+ mfspr r0,SPRN_MAS6
+ stw r0,MAS6(r1)
+#ifdef CONFIG_PHYS_64BIT
+ mfspr r0,SPRN_MAS7
+ stw r0,MAS7(r1)
+#endif /* CONFIG_PHYS_64BIT */
+#endif /* CONFIG_PPC_E500 */
+#ifdef CONFIG_44x
+ mfspr r0,SPRN_MMUCR
+ stw r0,MMUCR(r1)
+#endif
+.endm
+
+#define CRITICAL_EXCEPTION_PROLOG(trapno, intno) \
+ EXC_LEVEL_EXCEPTION_PROLOG(CRIT, trapno+2, intno, SPRN_CSRR0, SPRN_CSRR1)
+#define DEBUG_EXCEPTION_PROLOG(trapno) \
+ EXC_LEVEL_EXCEPTION_PROLOG(DBG, trapno+8, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
+#define MCHECK_EXCEPTION_PROLOG(trapno) \
+ EXC_LEVEL_EXCEPTION_PROLOG(MC, trapno+4, MACHINE_CHECK, \
SPRN_MCSRR0, SPRN_MCSRR1)
/*
@@ -303,44 +297,34 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
.align 5; \
label:
-#define EXCEPTION(n, intno, label, hdlr, xfer) \
+#define EXCEPTION(n, intno, label, hdlr) \
START_EXCEPTION(label); \
- NORMAL_EXCEPTION_PROLOG(intno); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
+ NORMAL_EXCEPTION_PROLOG(n, intno); \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b interrupt_return
#define CRITICAL_EXCEPTION(n, intno, label, hdlr) \
START_EXCEPTION(label); \
- CRITICAL_EXCEPTION_PROLOG(intno); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- crit_transfer_to_handler, ret_from_crit_exc)
+ CRITICAL_EXCEPTION_PROLOG(n, intno); \
+ SAVE_MMU_REGS; \
+ SAVE_xSRR(SRR); \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b ret_from_crit_exc
#define MCHECK_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(label); \
- MCHECK_EXCEPTION_PROLOG; \
+ MCHECK_EXCEPTION_PROLOG(n); \
mfspr r5,SPRN_ESR; \
stw r5,_ESR(r11); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- mcheck_transfer_to_handler, ret_from_mcheck_exc)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- lis r10,msr@h; \
- ori r10,r10,msr@l; \
- bl tfer; \
- .long hdlr; \
- .long ret
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
- ret_from_except)
+ SAVE_xSRR(DSRR); \
+ SAVE_xSRR(CSRR); \
+ SAVE_MMU_REGS; \
+ SAVE_xSRR(SRR); \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b ret_from_mcheck_exc
/* Check for a single step debug exception while in an exception
* handler before state has been saved. This is to catch the case
@@ -357,7 +341,7 @@ label:
*/
#define DEBUG_DEBUG_EXCEPTION \
START_EXCEPTION(DebugDebug); \
- DEBUG_EXCEPTION_PROLOG; \
+ DEBUG_EXCEPTION_PROLOG(2000); \
\
/* \
* If there is a single step or branch-taken exception in an \
@@ -405,12 +389,17 @@ label:
\
/* continue normal handling for a debug exception... */ \
2: mfspr r4,SPRN_DBSR; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc)
+ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\
+ SAVE_xSRR(CSRR); \
+ SAVE_MMU_REGS; \
+ SAVE_xSRR(SRR); \
+ prepare_transfer_to_handler; \
+ bl DebugException; \
+ b ret_from_debug_exc
#define DEBUG_CRIT_EXCEPTION \
START_EXCEPTION(DebugCrit); \
- CRITICAL_EXCEPTION_PROLOG(DEBUG); \
+ CRITICAL_EXCEPTION_PROLOG(2000,DEBUG); \
\
/* \
* If there is a single step or branch-taken exception in an \
@@ -458,78 +447,81 @@ label:
\
/* continue normal handling for a critical exception... */ \
2: mfspr r4,SPRN_DBSR; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc)
+ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\
+ SAVE_MMU_REGS; \
+ SAVE_xSRR(SRR); \
+ prepare_transfer_to_handler; \
+ bl DebugException; \
+ b ret_from_crit_exc
#define DATA_STORAGE_EXCEPTION \
START_EXCEPTION(DataStorage) \
- NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \
+ NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE); \
mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
stw r5,_ESR(r11); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \
- EXC_XFER_LITE(0x0300, handle_page_fault)
+ stw r4, _DEAR(r11); \
+ prepare_transfer_to_handler; \
+ bl do_page_fault; \
+ b interrupt_return
+/*
+ * Instruction TLB Error interrupt handlers may call InstructionStorage
+ * directly without clearing ESR, so the ESR at this point may be left over
+ * from a prior interrupt.
+ *
+ * In any case, do_page_fault for BOOK3E does not use ESR and always expects
+ * dsisr to be 0. ESR_DST from a prior store in particular would confuse fault
+ * handling.
+ */
#define INSTRUCTION_STORAGE_EXCEPTION \
START_EXCEPTION(InstructionStorage) \
- NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \
- mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
+ NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE); \
+ li r5,0; /* Store 0 in regs->esr (dsisr) */ \
stw r5,_ESR(r11); \
- mr r4,r12; /* Pass SRR0 as arg2 */ \
- li r5,0; /* Pass zero as arg3 */ \
- EXC_XFER_LITE(0x0400, handle_page_fault)
+ stw r12, _DEAR(r11); /* Set regs->dear (dar) to SRR0 */ \
+ prepare_transfer_to_handler; \
+ bl do_page_fault; \
+ b interrupt_return
#define ALIGNMENT_EXCEPTION \
START_EXCEPTION(Alignment) \
- NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \
+ NORMAL_EXCEPTION_PROLOG(0x600, ALIGNMENT); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \
stw r4,_DEAR(r11); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_STD(0x0600, alignment_exception)
+ prepare_transfer_to_handler; \
+ bl alignment_exception; \
+ REST_NVGPRS(r1); \
+ b interrupt_return
#define PROGRAM_EXCEPTION \
START_EXCEPTION(Program) \
- NORMAL_EXCEPTION_PROLOG(PROGRAM); \
+ NORMAL_EXCEPTION_PROLOG(0x700, PROGRAM); \
mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \
stw r4,_ESR(r11); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_STD(0x0700, program_check_exception)
+ prepare_transfer_to_handler; \
+ bl program_check_exception; \
+ REST_NVGPRS(r1); \
+ b interrupt_return
#define DECREMENTER_EXCEPTION \
START_EXCEPTION(Decrementer) \
- NORMAL_EXCEPTION_PROLOG(DECREMENTER); \
+ NORMAL_EXCEPTION_PROLOG(0x900, DECREMENTER); \
lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \
mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_LITE(0x0900, timer_interrupt)
+ prepare_transfer_to_handler; \
+ bl timer_interrupt; \
+ b interrupt_return
#define FP_UNAVAILABLE_EXCEPTION \
START_EXCEPTION(FloatingPointUnavailable) \
- NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \
+ NORMAL_EXCEPTION_PROLOG(0x800, FP_UNAVAIL); \
beq 1f; \
bl load_up_fpu; /* if from user, just load it up */ \
b fast_exception_return; \
-1: addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_STD(0x800, kernel_fp_unavailable_exception)
-
-#else /* __ASSEMBLY__ */
-struct exception_regs {
- unsigned long mas0;
- unsigned long mas1;
- unsigned long mas2;
- unsigned long mas3;
- unsigned long mas6;
- unsigned long mas7;
- unsigned long srr0;
- unsigned long srr1;
- unsigned long csrr0;
- unsigned long csrr1;
- unsigned long dsrr0;
- unsigned long dsrr1;
- unsigned long saved_ksp_limit;
-};
-
-/* ensure this structure is always sized to a multiple of the stack alignment */
-#define STACK_EXC_LVL_FRAME_SIZE _ALIGN_UP(sizeof (struct exception_regs), 16)
+1: prepare_transfer_to_handler; \
+ bl kernel_fp_unavailable_exception; \
+ b interrupt_return
#endif /* __ASSEMBLY__ */
#endif /* __HEAD_BOOKE_H__ */
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 58ce3d37c2a3..a1318ce18d0e 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -15,6 +15,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp.h>
+#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/init.h>
@@ -22,15 +23,15 @@
#include <asm/processor.h>
#include <asm/sstep.h>
#include <asm/debug.h>
-#include <asm/debugfs.h>
#include <asm/hvcall.h>
+#include <asm/inst.h>
#include <linux/uaccess.h>
/*
* Stores the breakpoints currently in use on each breakpoint address
* register for every cpu
*/
-static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM_MAX]);
/*
* Returns total number of data or instruction breakpoints available.
@@ -38,10 +39,11 @@ static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
int hw_breakpoint_slots(int type)
{
if (type == TYPE_DATA)
- return HBP_NUM;
+ return nr_wp_slots();
return 0; /* no instruction breakpoints available */
}
+
/*
* Install a perf counter breakpoint.
*
@@ -54,16 +56,26 @@ int hw_breakpoint_slots(int type)
int arch_install_hw_breakpoint(struct perf_event *bp)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
- struct perf_event **slot = this_cpu_ptr(&bp_per_reg);
+ struct perf_event **slot;
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ slot = this_cpu_ptr(&bp_per_reg[i]);
+ if (!*slot) {
+ *slot = bp;
+ break;
+ }
+ }
- *slot = bp;
+ if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
+ return -EBUSY;
/*
* Do not install DABR values if the instruction must be single-stepped.
* If so, DABR will be populated in single_step_dabr_instruction().
*/
- if (current->thread.last_hit_ubp != bp)
- __set_breakpoint(info);
+ if (!info->perf_single_step)
+ __set_breakpoint(i, info);
return 0;
}
@@ -79,31 +91,27 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
*/
void arch_uninstall_hw_breakpoint(struct perf_event *bp)
{
- struct perf_event **slot = this_cpu_ptr(&bp_per_reg);
+ struct arch_hw_breakpoint null_brk = {0};
+ struct perf_event **slot;
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ slot = this_cpu_ptr(&bp_per_reg[i]);
+ if (*slot == bp) {
+ *slot = NULL;
+ break;
+ }
+ }
- if (*slot != bp) {
- WARN_ONCE(1, "Can't find the breakpoint");
+ if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
return;
- }
- *slot = NULL;
- hw_breakpoint_disable();
+ __set_breakpoint(i, &null_brk);
}
-/*
- * Perform cleanup of arch-specific counters during unregistration
- * of the perf-event
- */
-void arch_unregister_hw_breakpoint(struct perf_event *bp)
+static bool is_ptrace_bp(struct perf_event *bp)
{
- /*
- * If the breakpoint is unregistered between a hw_breakpoint_handler()
- * and the single_step_dabr_instruction(), then cleanup the breakpoint
- * restoration variables to prevent dangling pointers.
- * FIXME, this should not be using bp->ctx at all! Sayeth peterz.
- */
- if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L))
- bp->ctx->task->thread.last_hit_ubp = NULL;
+ return bp->overflow_handler == ptrace_triggered;
}
/*
@@ -140,10 +148,10 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
* <---8 bytes--->
*
* In this case, we should configure hw as:
- * start_addr = address & ~HW_BREAKPOINT_ALIGN
+ * start_addr = address & ~(HW_BREAKPOINT_SIZE - 1)
* len = 16 bytes
*
- * @start_addr and @end_addr are inclusive.
+ * @start_addr is inclusive but @end_addr is exclusive.
*/
static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
{
@@ -151,15 +159,19 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
u16 hw_len;
unsigned long start_addr, end_addr;
- start_addr = hw->address & ~HW_BREAKPOINT_ALIGN;
- end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN;
- hw_len = end_addr - start_addr + 1;
+ start_addr = ALIGN_DOWN(hw->address, HW_BREAKPOINT_SIZE);
+ end_addr = ALIGN(hw->address + hw->len, HW_BREAKPOINT_SIZE);
+ hw_len = end_addr - start_addr;
if (dawr_enabled()) {
max_len = DAWR_MAX_LEN;
- /* DAWR region can't cross 512 bytes boundary */
- if ((start_addr >> 9) != (end_addr >> 9))
+ /* DAWR region can't cross 512 bytes boundary on p10 predecessors */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
+ (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512)))
return -EINVAL;
+ } else if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ /* 8xx can setup a range without limitation */
+ max_len = U16_MAX;
}
if (hw_len > max_len)
@@ -208,94 +220,173 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
* Restores the breakpoint on the debug registers.
* Invoke this function if it is known that the execution context is
* about to change to cause loss of MSR_SE settings.
+ *
+ * The perf watchpoint will simply re-trigger once the thread is started again,
+ * and the watchpoint handler will set up MSR_SE and perf_single_step as
+ * needed.
*/
void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
{
struct arch_hw_breakpoint *info;
+ int i;
- if (likely(!tsk->thread.last_hit_ubp))
- return;
+ preempt_disable();
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ struct perf_event *bp = __this_cpu_read(bp_per_reg[i]);
+
+ if (unlikely(bp && counter_arch_bp(bp)->perf_single_step))
+ goto reset;
+ }
+ goto out;
+
+reset:
+ regs_set_return_msr(regs, regs->msr & ~MSR_SE);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ info = counter_arch_bp(__this_cpu_read(bp_per_reg[i]));
+ __set_breakpoint(i, info);
+ info->perf_single_step = false;
+ }
- info = counter_arch_bp(tsk->thread.last_hit_ubp);
- regs->msr &= ~MSR_SE;
- __set_breakpoint(info);
- tsk->thread.last_hit_ubp = NULL;
+out:
+ preempt_enable();
}
-static bool dar_within_range(unsigned long dar, struct arch_hw_breakpoint *info)
+static bool is_larx_stcx_instr(int type)
{
- return ((info->address <= dar) && (dar - info->address < info->len));
+ return type == LARX || type == STCX;
}
-static bool
-dar_range_overlaps(unsigned long dar, int size, struct arch_hw_breakpoint *info)
+static bool is_octword_vsx_instr(int type, int size)
{
- return ((dar <= info->address + info->len - 1) &&
- (dar + size - 1 >= info->address));
+ return ((type == LOAD_VSX || type == STORE_VSX) && size == 32);
}
/*
- * Handle debug exception notifications.
+ * We've failed in reliably handling the hw-breakpoint. Unregister
+ * it and throw a warning message to let the user know about it.
*/
-static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
- struct arch_hw_breakpoint *info)
+static void handler_error(struct perf_event *bp)
{
- unsigned int instr = 0;
- int ret, type, size;
- struct instruction_op op;
- unsigned long addr = info->address;
-
- if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
- goto fail;
-
- ret = analyse_instr(&op, regs, instr);
- type = GETTYPE(op.type);
- size = GETSIZE(op.type);
-
- if (!ret && (type == LARX || type == STCX)) {
- printk_ratelimited("Breakpoint hit on instruction that can't be emulated."
- " Breakpoint at 0x%lx will be disabled.\n", addr);
- goto disable;
- }
+ WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.",
+ counter_arch_bp(bp)->address);
+ perf_event_disable_inatomic(bp);
+}
- /*
- * If it's extraneous event, we still need to emulate/single-
- * step the instruction, but we don't generate an event.
- */
- if (size && !dar_range_overlaps(regs->dar, size, info))
- info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+static void larx_stcx_err(struct perf_event *bp)
+{
+ printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n",
+ counter_arch_bp(bp)->address);
+ perf_event_disable_inatomic(bp);
+}
+
+static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
+ int *hit, ppc_inst_t instr)
+{
+ int i;
+ int stepped;
/* Do not emulate user-space instructions, instead single-step them */
if (user_mode(regs)) {
- current->thread.last_hit_ubp = bp;
- regs->msr |= MSR_SE;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+
+ counter_arch_bp(bp[i])->perf_single_step = true;
+ bp[i] = NULL;
+ }
+ regs_set_return_msr(regs, regs->msr | MSR_SE);
return false;
}
- if (!emulate_step(regs, instr))
- goto fail;
-
+ stepped = emulate_step(regs, instr);
+ if (!stepped) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ handler_error(bp[i]);
+ bp[i] = NULL;
+ }
+ return false;
+ }
return true;
+}
+
+static void handle_p10dd1_spurious_exception(struct perf_event **bp,
+ int *hit, unsigned long ea)
+{
+ int i;
+ unsigned long hw_end_addr;
-fail:
/*
- * We've failed in reliably handling the hw-breakpoint. Unregister
- * it and throw a warning message to let the user know about it.
+ * Handle spurious exception only when any bp_per_reg is set.
+ * Otherwise this might be created by xmon and not actually a
+ * spurious exception.
*/
- WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
- "0x%lx will be disabled.", addr);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ struct arch_hw_breakpoint *info;
-disable:
- perf_event_disable_inatomic(bp);
- return false;
+ if (!bp[i])
+ continue;
+
+ info = counter_arch_bp(bp[i]);
+
+ hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
+
+ /*
+ * Ending address of DAWR range is less than starting
+ * address of op.
+ */
+ if ((hw_end_addr - 1) >= ea)
+ continue;
+
+ /*
+ * Those addresses need to be in the same or in two
+ * consecutive 512B blocks;
+ */
+ if (((hw_end_addr - 1) >> 10) != (ea >> 10))
+ continue;
+
+ /*
+ * 'op address + 64B' generates an address that has a
+ * carry into bit 52 (crosses 2K boundary).
+ */
+ if ((ea & 0x800) == ((ea + 64) & 0x800))
+ continue;
+
+ break;
+ }
+
+ if (i == nr_wp_slots())
+ return;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (bp[i]) {
+ hit[i] = 1;
+ counter_arch_bp(bp[i])->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ }
+ }
}
+/*
+ * Handle a DABR or DAWR exception.
+ *
+ * Called in atomic context.
+ */
int hw_breakpoint_handler(struct die_args *args)
{
+ bool err = false;
int rc = NOTIFY_STOP;
- struct perf_event *bp;
+ struct perf_event *bp[HBP_NUM_MAX] = { NULL };
struct pt_regs *regs = args->regs;
- struct arch_hw_breakpoint *info;
+ int i;
+ int hit[HBP_NUM_MAX] = {0};
+ int nr_hit = 0;
+ bool ptrace_bp = false;
+ ppc_inst_t instr = ppc_inst(0);
+ int type = 0;
+ int size = 0;
+ unsigned long ea = 0;
/* Disable breakpoints during exception handling */
hw_breakpoint_disable();
@@ -308,12 +399,48 @@ int hw_breakpoint_handler(struct die_args *args)
*/
rcu_read_lock();
- bp = __this_cpu_read(bp_per_reg);
- if (!bp) {
- rc = NOTIFY_DONE;
- goto out;
+ if (!IS_ENABLED(CONFIG_PPC_8xx))
+ wp_get_instr_detail(regs, &instr, &type, &size, &ea);
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ struct arch_hw_breakpoint *info;
+
+ bp[i] = __this_cpu_read(bp_per_reg[i]);
+ if (!bp[i])
+ continue;
+
+ info = counter_arch_bp(bp[i]);
+ info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
+
+ if (wp_check_constraints(regs, instr, ea, type, size, info)) {
+ if (!IS_ENABLED(CONFIG_PPC_8xx) &&
+ ppc_inst_equal(instr, ppc_inst(0))) {
+ handler_error(bp[i]);
+ bp[i] = NULL;
+ err = 1;
+ continue;
+ }
+
+ if (is_ptrace_bp(bp[i]))
+ ptrace_bp = true;
+ hit[i] = 1;
+ nr_hit++;
+ }
+ }
+
+ if (err)
+ goto reset;
+
+ if (!nr_hit) {
+ /* Workaround for Power10 DD1 */
+ if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 &&
+ is_octword_vsx_instr(type, size)) {
+ handle_p10dd1_spurious_exception(bp, hit, ea);
+ } else {
+ rc = NOTIFY_DONE;
+ goto out;
+ }
}
- info = counter_arch_bp(bp);
/*
* Return early after invoking user-callback function without restoring
@@ -321,29 +448,50 @@ int hw_breakpoint_handler(struct die_args *args)
* one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
* generated in do_dabr().
*/
- if (bp->overflow_handler == ptrace_triggered) {
- perf_bp_event(bp, regs);
+ if (ptrace_bp) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i] || !is_ptrace_bp(bp[i]))
+ continue;
+ perf_bp_event(bp[i], regs);
+ bp[i] = NULL;
+ }
rc = NOTIFY_DONE;
- goto out;
+ goto reset;
}
- info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
- if (IS_ENABLED(CONFIG_PPC_8xx)) {
- if (!dar_within_range(regs->dar, info))
- info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
- } else {
- if (!stepping_handler(regs, bp, info))
- goto out;
+ if (!IS_ENABLED(CONFIG_PPC_8xx)) {
+ if (is_larx_stcx_instr(type)) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ larx_stcx_err(bp[i]);
+ bp[i] = NULL;
+ }
+ goto reset;
+ }
+
+ if (!stepping_handler(regs, bp, hit, instr))
+ goto reset;
}
/*
* As a policy, the callback is invoked in a 'trigger-after-execute'
* fashion
*/
- if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
- perf_bp_event(bp, regs);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ if (!(counter_arch_bp(bp[i])->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+ perf_bp_event(bp[i], regs);
+ }
+
+reset:
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!bp[i])
+ continue;
+ __set_breakpoint(i, counter_arch_bp(bp[i]));
+ }
- __set_breakpoint(info);
out:
rcu_read_unlock();
return rc;
@@ -352,38 +500,51 @@ NOKPROBE_SYMBOL(hw_breakpoint_handler);
/*
* Handle single-step exceptions following a DABR hit.
+ *
+ * Called in atomic context.
*/
static int single_step_dabr_instruction(struct die_args *args)
{
struct pt_regs *regs = args->regs;
- struct perf_event *bp = NULL;
- struct arch_hw_breakpoint *info;
+ bool found = false;
- bp = current->thread.last_hit_ubp;
/*
* Check if we are single-stepping as a result of a
* previous HW Breakpoint exception
*/
- if (!bp)
- return NOTIFY_DONE;
+ for (int i = 0; i < nr_wp_slots(); i++) {
+ struct perf_event *bp;
+ struct arch_hw_breakpoint *info;
- info = counter_arch_bp(bp);
+ bp = __this_cpu_read(bp_per_reg[i]);
- /*
- * We shall invoke the user-defined callback function in the single
- * stepping handler to confirm to 'trigger-after-execute' semantics
- */
- if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
- perf_bp_event(bp, regs);
+ if (!bp)
+ continue;
+
+ info = counter_arch_bp(bp);
+
+ if (!info->perf_single_step)
+ continue;
- __set_breakpoint(info);
- current->thread.last_hit_ubp = NULL;
+ found = true;
+
+ /*
+ * We shall invoke the user-defined callback function in the
+ * single stepping handler to confirm to 'trigger-after-execute'
+ * semantics
+ */
+ if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+ perf_bp_event(bp, regs);
+
+ info->perf_single_step = false;
+ __set_breakpoint(i, counter_arch_bp(bp));
+ }
/*
* If the process was being single-stepped by ptrace, let the
* other single-step actions occur (e.g. generate SIGTRAP).
*/
- if (test_thread_flag(TIF_SINGLESTEP))
+ if (!found || test_thread_flag(TIF_SINGLESTEP))
return NOTIFY_DONE;
return NOTIFY_STOP;
@@ -392,6 +553,8 @@ NOKPROBE_SYMBOL(single_step_dabr_instruction);
/*
* Handle debug exception notifications.
+ *
+ * Called in atomic context.
*/
int hw_breakpoint_exceptions_notify(
struct notifier_block *unused, unsigned long val, void *data)
@@ -416,13 +579,32 @@ NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify);
*/
void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
{
+ int i;
struct thread_struct *t = &tsk->thread;
- unregister_hw_breakpoint(t->ptrace_bps[0]);
- t->ptrace_bps[0] = NULL;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ unregister_hw_breakpoint(t->ptrace_bps[i]);
+ t->ptrace_bps[i] = NULL;
+ }
}
void hw_breakpoint_pmu_read(struct perf_event *bp)
{
/* TODO */
}
+
+void ptrace_triggered(struct perf_event *bp,
+ struct perf_sample_data *data, struct pt_regs *regs)
+{
+ struct perf_event_attr attr;
+
+ /*
+ * Disable the breakpoint request here since ptrace has defined a
+ * one-shot behaviour for breakpoint exceptions in PPC64.
+ * The SIGTRAP signal is generated automatically for us in do_dabr().
+ * We don't have to do anything about that here
+ */
+ attr = bp->attr;
+ attr.disabled = true;
+ modify_user_hw_breakpoint(bp, &attr);
+}
diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c
new file mode 100644
index 000000000000..9e51801c4915
--- /dev/null
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/sstep.h>
+#include <asm/cache.h>
+
+static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+ return ((info->address <= dar) && (dar - info->address < info->len));
+}
+
+static bool ea_user_range_overlaps(unsigned long ea, int size,
+ struct arch_hw_breakpoint *info)
+{
+ return ((ea < info->address + info->len) &&
+ (ea + size > info->address));
+}
+
+static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+ unsigned long hw_start_addr, hw_end_addr;
+
+ hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
+ hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
+
+ return ((hw_start_addr <= dar) && (hw_end_addr > dar));
+}
+
+static bool ea_hw_range_overlaps(unsigned long ea, int size,
+ struct arch_hw_breakpoint *info)
+{
+ unsigned long hw_start_addr, hw_end_addr;
+ unsigned long align_size = HW_BREAKPOINT_SIZE;
+
+ /*
+ * On p10 predecessors, quadword is handle differently then
+ * other instructions.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16)
+ align_size = HW_BREAKPOINT_SIZE_QUADWORD;
+
+ hw_start_addr = ALIGN_DOWN(info->address, align_size);
+ hw_end_addr = ALIGN(info->address + info->len, align_size);
+
+ return ((ea < hw_end_addr) && (ea + size > hw_start_addr));
+}
+
+/*
+ * If hw has multiple DAWR registers, we also need to check all
+ * dawrx constraint bits to confirm this is _really_ a valid event.
+ * If type is UNKNOWN, but privilege level matches, consider it as
+ * a positive match.
+ */
+static bool check_dawrx_constraints(struct pt_regs *regs, int type,
+ struct arch_hw_breakpoint *info)
+{
+ if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ))
+ return false;
+
+ /*
+ * The Cache Management instructions other than dcbz never
+ * cause a match. i.e. if type is CACHEOP, the instruction
+ * is dcbz, and dcbz is treated as Store.
+ */
+ if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE))
+ return false;
+
+ if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL))
+ return false;
+
+ if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER))
+ return false;
+
+ return true;
+}
+
+/*
+ * Return true if the event is valid wrt dawr configuration,
+ * including extraneous exception. Otherwise return false.
+ */
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
+ unsigned long ea, int type, int size,
+ struct arch_hw_breakpoint *info)
+{
+ bool in_user_range = dar_in_user_range(regs->dar, info);
+ bool dawrx_constraints;
+
+ /*
+ * 8xx supports only one breakpoint and thus we can
+ * unconditionally return true.
+ */
+ if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ if (!in_user_range)
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ return true;
+ }
+
+ if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ !dar_in_hw_range(regs->dar, info))
+ return false;
+
+ return true;
+ }
+
+ dawrx_constraints = check_dawrx_constraints(regs, type, info);
+
+ if (type == UNKNOWN) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ !dar_in_hw_range(regs->dar, info))
+ return false;
+
+ return dawrx_constraints;
+ }
+
+ if (ea_user_range_overlaps(ea, size, info))
+ return dawrx_constraints;
+
+ if (ea_hw_range_overlaps(ea, size, info)) {
+ if (dawrx_constraints) {
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ return true;
+ }
+ }
+ return false;
+}
+
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
+ int *type, int *size, unsigned long *ea)
+{
+ struct instruction_op op;
+ int err;
+
+ pagefault_disable();
+ err = __get_user_instr(*instr, (void __user *)regs->nip);
+ pagefault_enable();
+
+ if (err)
+ return;
+
+ analyse_instr(&op, regs, *instr);
+ *type = GETTYPE(op.type);
+ *ea = op.ea;
+
+ if (!(regs->msr & MSR_64BIT))
+ *ea &= 0xffffffffUL;
+
+
+ *size = GETSIZE(op.type);
+ if (*type == CACHEOP) {
+ *size = l1_dcache_bytes();
+ *ea &= ~(*size - 1);
+ } else if (*type == LOAD_VMX || *type == STORE_VMX) {
+ *ea &= ~(*size - 1);
+ }
+}
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index a36fd053c3db..30b56c67fa61 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -37,18 +37,10 @@ static int __init powersave_off(char *arg)
{
ppc_md.power_save = NULL;
cpuidle_disable = IDLE_POWERSAVE_OFF;
- return 0;
+ return 1;
}
__setup("powersave=off", powersave_off);
-#ifdef CONFIG_HOTPLUG_CPU
-void arch_cpu_idle_dead(void)
-{
- sched_preempt_enable_no_resched();
- cpu_die();
-}
-#endif
-
void arch_cpu_idle(void)
{
ppc64_runlatch_off();
@@ -59,10 +51,9 @@ void arch_cpu_idle(void)
* Some power_save functions return with
* interrupts enabled, some don't.
*/
- if (irqs_disabled())
- local_irq_enable();
+ if (!irqs_disabled())
+ raw_local_irq_disable();
} else {
- local_irq_enable();
/*
* Go into low thread priority and possibly
* low power mode.
@@ -77,6 +68,31 @@ void arch_cpu_idle(void)
int powersave_nap;
+#ifdef CONFIG_PPC_970_NAP
+void power4_idle(void)
+{
+ if (!cpu_has_feature(CPU_FTR_CAN_NAP))
+ return;
+
+ if (!powersave_nap)
+ return;
+
+ if (!prep_irq_for_idle())
+ return;
+
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ asm volatile(PPC_DSSALL " ; sync" ::: "memory");
+
+ power4_idle_nap();
+
+ /*
+ * power4_idle_nap returns with interrupts enabled (soft and hard).
+ * to our caller with interrupts enabled (soft and hard). Our caller
+ * can cope with either interrupts disabled or enabled upon return.
+ */
+}
+#endif
+
#ifdef CONFIG_SYSCTL
/*
* Register the sysctl to set/clear powersave_nap.
@@ -89,21 +105,12 @@ static struct ctl_table powersave_nap_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {}
-};
-static struct ctl_table powersave_nap_sysctl_root[] = {
- {
- .procname = "kernel",
- .mode = 0555,
- .child = powersave_nap_ctl_table,
- },
- {}
};
static int __init
register_powersave_nap_sysctl(void)
{
- register_sysctl_table(powersave_nap_sysctl_root);
+ register_sysctl("kernel", powersave_nap_ctl_table);
return 0;
}
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_64e.S
index cc008de58b05..0fc680e03dee 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_64e.S
@@ -2,7 +2,7 @@
/*
* Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org>
*
- * Generic idle routine for Book3E processors
+ * Generic idle routine for 64 bits e500 processors
*/
#include <linux/threads.h>
@@ -16,8 +16,6 @@
#include <asm/hw_irq.h>
/* 64-bit version only for now */
-#ifdef CONFIG_PPC64
-
.macro BOOK3E_IDLE name loop
_GLOBAL(\name)
/* Save LR for later */
@@ -77,7 +75,7 @@ _GLOBAL(\name)
.macro BOOK3E_IDLE_LOOP
1:
- PPC_WAIT(0)
+ PPC_WAIT_v203
b 1b
.endm
@@ -98,6 +96,4 @@ epapr_ev_idle_start:
BOOK3E_IDLE epapr_ev_idle EPAPR_EV_IDLE_LOOP
-BOOK3E_IDLE book3e_idle BOOK3E_IDLE_LOOP
-
-#endif /* CONFIG_PPC64 */
+BOOK3E_IDLE e500_idle BOOK3E_IDLE_LOOP
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 0ffdd18b9f26..3c097356366b 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -129,7 +129,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
mtspr SPRN_HID0,r4
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */
@@ -145,9 +145,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
/*
* Return from NAP/DOZE mode, restore some CPU specific registers,
- * we are called with DR/IR still off and r2 containing physical
- * address of current. R11 points to the exception frame (physical
- * address). We have to preserve r10.
+ * R11 points to the exception frame. We have to preserve r10.
*/
_GLOBAL(power_save_ppc32_restore)
lwz r9,_LINK(r11) /* interrupted in ppc6xx_idle: */
@@ -166,7 +164,7 @@ BEGIN_FTR_SECTION
mfspr r9,SPRN_HID0
andis. r9,r9,HID0_NAP@h
beq 1f
- addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha
+ addis r9, r11, nap_save_msscr0@ha
lwz r9,nap_save_msscr0@l(r9)
mtspr SPRN_MSSCR0, r9
sync
@@ -174,11 +172,12 @@ BEGIN_FTR_SECTION
1:
END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
BEGIN_FTR_SECTION
- addis r9,r11,(nap_save_hid1-KERNELBASE)@ha
+ addis r9, r11, nap_save_hid1@ha
lwz r9,nap_save_hid1@l(r9)
mtspr SPRN_HID1, r9
END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
- b transfer_to_handler_cont
+ blr
+_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
.data
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_85xx.S
index 308f499e146c..9e1bc4502c50 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_85xx.S
@@ -74,19 +74,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
/*
* Return from NAP/DOZE mode, restore some CPU specific registers,
- * r2 containing physical address of current.
- * r11 points to the exception frame (physical address).
+ * r2 containing address of current.
+ * r11 points to the exception frame.
* We have to preserve r10.
*/
_GLOBAL(power_save_ppc32_restore)
lwz r9,_LINK(r11) /* interrupted in e500_idle */
stw r9,_NIP(r11) /* make it do a blr */
-
-#ifdef CONFIG_SMP
- lwz r11,TASK_CPU(r2) /* get cpu number * 4 */
- slwi r11,r11,2
-#else
- li r11,0
-#endif
-
- b transfer_to_handler_cont
+ blr
+_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index d32751994a62..3d97fb833834 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -15,7 +15,9 @@
#include <asm/asm-offsets.h>
#include <asm/ppc-opcode.h>
#include <asm/cpuidle.h>
+#include <asm/thread_info.h> /* TLF_NAPPING */
+#ifdef CONFIG_PPC_P7_NAP
/*
* Desired PSSCR in r3
*
@@ -50,28 +52,32 @@ _GLOBAL(isa300_idle_stop_mayloss)
std r1,PACAR1(r13)
mflr r4
mfcr r5
- /* use stack red zone rather than a new frame for saving regs */
- std r2,-8*0(r1)
- std r14,-8*1(r1)
- std r15,-8*2(r1)
- std r16,-8*3(r1)
- std r17,-8*4(r1)
- std r18,-8*5(r1)
- std r19,-8*6(r1)
- std r20,-8*7(r1)
- std r21,-8*8(r1)
- std r22,-8*9(r1)
- std r23,-8*10(r1)
- std r24,-8*11(r1)
- std r25,-8*12(r1)
- std r26,-8*13(r1)
- std r27,-8*14(r1)
- std r28,-8*15(r1)
- std r29,-8*16(r1)
- std r30,-8*17(r1)
- std r31,-8*18(r1)
- std r4,-8*19(r1)
- std r5,-8*20(r1)
+ /*
+ * Use the stack red zone rather than a new frame for saving regs since
+ * in the case of no GPR loss the wakeup code branches directly back to
+ * the caller without deallocating the stack frame first.
+ */
+ std r2,-8*1(r1)
+ std r14,-8*2(r1)
+ std r15,-8*3(r1)
+ std r16,-8*4(r1)
+ std r17,-8*5(r1)
+ std r18,-8*6(r1)
+ std r19,-8*7(r1)
+ std r20,-8*8(r1)
+ std r21,-8*9(r1)
+ std r22,-8*10(r1)
+ std r23,-8*11(r1)
+ std r24,-8*12(r1)
+ std r25,-8*13(r1)
+ std r26,-8*14(r1)
+ std r27,-8*15(r1)
+ std r28,-8*16(r1)
+ std r29,-8*17(r1)
+ std r30,-8*18(r1)
+ std r31,-8*19(r1)
+ std r4,-8*20(r1)
+ std r5,-8*21(r1)
/* 168 bytes */
PPC_STOP
b . /* catch bugs */
@@ -87,8 +93,8 @@ _GLOBAL(isa300_idle_stop_mayloss)
*/
_GLOBAL(idle_return_gpr_loss)
ld r1,PACAR1(r13)
- ld r4,-8*19(r1)
- ld r5,-8*20(r1)
+ ld r4,-8*20(r1)
+ ld r5,-8*21(r1)
mtlr r4
mtcr r5
/*
@@ -96,38 +102,40 @@ _GLOBAL(idle_return_gpr_loss)
* from PACATOC. This could be avoided for that less common case
* if KVM saved its r2.
*/
- ld r2,-8*0(r1)
- ld r14,-8*1(r1)
- ld r15,-8*2(r1)
- ld r16,-8*3(r1)
- ld r17,-8*4(r1)
- ld r18,-8*5(r1)
- ld r19,-8*6(r1)
- ld r20,-8*7(r1)
- ld r21,-8*8(r1)
- ld r22,-8*9(r1)
- ld r23,-8*10(r1)
- ld r24,-8*11(r1)
- ld r25,-8*12(r1)
- ld r26,-8*13(r1)
- ld r27,-8*14(r1)
- ld r28,-8*15(r1)
- ld r29,-8*16(r1)
- ld r30,-8*17(r1)
- ld r31,-8*18(r1)
+ ld r2,-8*1(r1)
+ ld r14,-8*2(r1)
+ ld r15,-8*3(r1)
+ ld r16,-8*4(r1)
+ ld r17,-8*5(r1)
+ ld r18,-8*6(r1)
+ ld r19,-8*7(r1)
+ ld r20,-8*8(r1)
+ ld r21,-8*9(r1)
+ ld r22,-8*10(r1)
+ ld r23,-8*11(r1)
+ ld r24,-8*12(r1)
+ ld r25,-8*13(r1)
+ ld r26,-8*14(r1)
+ ld r27,-8*15(r1)
+ ld r28,-8*16(r1)
+ ld r29,-8*17(r1)
+ ld r30,-8*18(r1)
+ ld r31,-8*19(r1)
blr
/*
* This is the sequence required to execute idle instructions, as
* specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
- *
- * The 0(r1) slot is used to save r2 in isa206, so use that here.
+ * We have to store a GPR somewhere, ptesync, then reload it, and create
+ * a false dependency on the result of the load. It doesn't matter which
+ * GPR we store, or where we store it. We have already stored r2 to the
+ * stack at -8(r1) in isa206_idle_insn_mayloss, so use that.
*/
#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
/* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r2,0(r1); \
+ std r2,-8(r1); \
ptesync; \
- ld r2,0(r1); \
+ ld r2,-8(r1); \
236: cmpd cr0,r2,r2; \
bne 236b; \
IDLE_INST; \
@@ -152,28 +160,32 @@ _GLOBAL(isa206_idle_insn_mayloss)
std r1,PACAR1(r13)
mflr r4
mfcr r5
- /* use stack red zone rather than a new frame for saving regs */
- std r2,-8*0(r1)
- std r14,-8*1(r1)
- std r15,-8*2(r1)
- std r16,-8*3(r1)
- std r17,-8*4(r1)
- std r18,-8*5(r1)
- std r19,-8*6(r1)
- std r20,-8*7(r1)
- std r21,-8*8(r1)
- std r22,-8*9(r1)
- std r23,-8*10(r1)
- std r24,-8*11(r1)
- std r25,-8*12(r1)
- std r26,-8*13(r1)
- std r27,-8*14(r1)
- std r28,-8*15(r1)
- std r29,-8*16(r1)
- std r30,-8*17(r1)
- std r31,-8*18(r1)
- std r4,-8*19(r1)
- std r5,-8*20(r1)
+ /*
+ * Use the stack red zone rather than a new frame for saving regs since
+ * in the case of no GPR loss the wakeup code branches directly back to
+ * the caller without deallocating the stack frame first.
+ */
+ std r2,-8*1(r1)
+ std r14,-8*2(r1)
+ std r15,-8*3(r1)
+ std r16,-8*4(r1)
+ std r17,-8*5(r1)
+ std r18,-8*6(r1)
+ std r19,-8*7(r1)
+ std r20,-8*8(r1)
+ std r21,-8*9(r1)
+ std r22,-8*10(r1)
+ std r23,-8*11(r1)
+ std r24,-8*12(r1)
+ std r25,-8*13(r1)
+ std r26,-8*14(r1)
+ std r27,-8*15(r1)
+ std r28,-8*16(r1)
+ std r29,-8*17(r1)
+ std r30,-8*18(r1)
+ std r31,-8*19(r1)
+ std r4,-8*20(r1)
+ std r5,-8*21(r1)
cmpwi r3,PNV_THREAD_NAP
bne 1f
IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
@@ -181,4 +193,26 @@ _GLOBAL(isa206_idle_insn_mayloss)
bne 2f
IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
2: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
+#endif
+#ifdef CONFIG_PPC_970_NAP
+_GLOBAL(power4_idle_nap)
+ LOAD_REG_IMMEDIATE(r7, MSR_KERNEL|MSR_EE|MSR_POW)
+ ld r9,PACA_THREAD_INFO(r13)
+ ld r8,TI_LOCAL_FLAGS(r9)
+ ori r8,r8,_TLF_NAPPING
+ std r8,TI_LOCAL_FLAGS(r9)
+ /*
+ * NAPPING bit is set, from this point onward power4_fixup_nap
+ * will cause exceptions to return to power4_idle_nap_return.
+ */
+1: sync
+ isync
+ mtmsrd r7
+ isync
+ b 1b
+
+ .globl power4_idle_nap_return
+power4_idle_nap_return:
+ blr
+#endif
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
deleted file mode 100644
index 33c625329078..000000000000
--- a/arch/powerpc/kernel/idle_power4.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains the power_save function for 970-family CPUs.
- */
-
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/irqflags.h>
-#include <asm/hw_irq.h>
-#include <asm/feature-fixups.h>
-
-#undef DEBUG
-
- .text
-
-_GLOBAL(power4_idle)
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
- /* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDRBASE(r3,powersave_nap)
- lwz r4,ADDROFF(powersave_nap)(r3)
- cmpwi 0,r4,0
- beqlr
-
- /* This sequence is similar to prep_irq_for_idle() */
-
- /* Hard disable interrupts */
- mfmsr r7
- rldicl r0,r7,48,1
- rotldi r0,r0,16
- mtmsrd r0,1
-
- /* Check if something happened while soft-disabled */
- lbz r0,PACAIRQHAPPENED(r13)
- cmpwi cr0,r0,0
- bne- 2f
-
- /*
- * Soft-enable interrupts. This will make power4_fixup_nap return
- * to our caller with interrupts enabled (soft and hard). The caller
- * can cope with either interrupts disabled or enabled upon return.
- */
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Tell the tracer interrupts are on, because idle responds to them. */
- mflr r0
- std r0,16(r1)
- stdu r1,-128(r1)
- bl trace_hardirqs_on
- addi r1,r1,128
- ld r0,16(r1)
- mtlr r0
- mfmsr r7
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
- li r0,IRQS_ENABLED
- stb r0,PACAIRQSOFTMASK(r13) /* we'll hard-enable shortly */
-BEGIN_FTR_SECTION
- DSSALL
- sync
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- ld r9, PACA_THREAD_INFO(r13)
- ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
- ori r8,r8,_TLF_NAPPING /* so when we take an exception */
- std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
- ori r7,r7,MSR_EE
- oris r7,r7,MSR_POW@h
-1: sync
- isync
- mtmsrd r7
- isync
- b 1b
-
-2: /* Return if an interrupt had happened while soft disabled */
- /* Set the HARD_DIS flag because interrupts are now hard disabled */
- ori r0,r0,PACA_IRQ_HARD_DIS
- stb r0,PACAIRQHAPPENED(r13)
- blr
diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c
index e34116255ced..b7029beed847 100644
--- a/arch/powerpc/kernel/ima_arch.c
+++ b/arch/powerpc/kernel/ima_arch.c
@@ -19,13 +19,13 @@ bool arch_ima_get_secureboot(void)
* to be stored as an xattr or as an appended signature.
*
* To avoid duplicate signature verification as much as possible, the IMA
- * policy rule for module appraisal is added only if CONFIG_MODULE_SIG_FORCE
+ * policy rule for module appraisal is added only if CONFIG_MODULE_SIG
* is not enabled.
*/
static const char *const secure_rules[] = {
- "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
-#ifndef CONFIG_MODULE_SIG_FORCE
- "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+ "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG
+ "appraise func=MODULE_CHECK appraise_type=imasig|modsig",
#endif
NULL
};
@@ -49,9 +49,9 @@ static const char *const trusted_rules[] = {
static const char *const secure_and_trusted_rules[] = {
"measure func=KEXEC_KERNEL_CHECK template=ima-modsig",
"measure func=MODULE_CHECK template=ima-modsig",
- "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
-#ifndef CONFIG_MODULE_SIG_FORCE
- "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+ "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG
+ "appraise func=MODULE_CHECK appraise_type=imasig|modsig",
#endif
NULL
};
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
new file mode 100644
index 000000000000..eca293794a1e
--- /dev/null
+++ b/arch/powerpc/kernel/interrupt.c
@@ -0,0 +1,505 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/context_tracking.h>
+#include <linux/err.h>
+#include <linux/compat.h>
+#include <linux/rseq.h>
+#include <linux/sched/debug.h> /* for show_regs */
+
+#include <asm/kup.h>
+#include <asm/cputime.h>
+#include <asm/hw_irq.h>
+#include <asm/interrupt.h>
+#include <asm/kprobes.h>
+#include <asm/paca.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/signal.h>
+#include <asm/switch_to.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+#include <asm/tm.h>
+#include <asm/unistd.h>
+
+#if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32)
+unsigned long global_dbcr0[NR_CPUS];
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+static inline bool exit_must_hard_disable(void)
+{
+ return static_branch_unlikely(&interrupt_exit_not_reentrant);
+}
+#else
+static inline bool exit_must_hard_disable(void)
+{
+ return true;
+}
+#endif
+
+/*
+ * local irqs must be disabled. Returns false if the caller must re-enable
+ * them, check for new work, and try again.
+ *
+ * This should be called with local irqs disabled, but if they were previously
+ * enabled when the interrupt handler returns (indicating a process-context /
+ * synchronous interrupt) then irqs_enabled should be true.
+ *
+ * restartable is true then EE/RI can be left on because interrupts are handled
+ * with a restart sequence.
+ */
+static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
+{
+ bool must_hard_disable = (exit_must_hard_disable() || !restartable);
+
+ /* This must be done with RI=1 because tracing may touch vmaps */
+ trace_hardirqs_on();
+
+ if (must_hard_disable)
+ __hard_EE_RI_disable();
+
+#ifdef CONFIG_PPC64
+ /* This pattern matches prep_irq_for_idle */
+ if (unlikely(lazy_irq_pending_nocheck())) {
+ if (must_hard_disable) {
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ __hard_RI_enable();
+ }
+ trace_hardirqs_off();
+
+ return false;
+ }
+#endif
+ return true;
+}
+
+static notrace void booke_load_dbcr0(void)
+{
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ unsigned long dbcr0 = current->thread.debug.dbcr0;
+
+ if (likely(!(dbcr0 & DBCR0_IDM)))
+ return;
+
+ /*
+ * Check to see if the dbcr0 register is set up to debug.
+ * Use the internal debug mode bit to do this.
+ */
+ mtmsr(mfmsr() & ~MSR_DE);
+ if (IS_ENABLED(CONFIG_PPC32)) {
+ isync();
+ global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0);
+ }
+ mtspr(SPRN_DBCR0, dbcr0);
+ mtspr(SPRN_DBSR, -1);
+#endif
+}
+
+static notrace void check_return_regs_valid(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ unsigned long trap, srr0, srr1;
+ static bool warned;
+ u8 *validp;
+ char *h;
+
+ if (trap_is_scv(regs))
+ return;
+
+ trap = TRAP(regs);
+ // EE in HV mode sets HSRRs like 0xea0
+ if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
+ trap = 0xea0;
+
+ switch (trap) {
+ case 0x980:
+ case INTERRUPT_H_DATA_STORAGE:
+ case 0xe20:
+ case 0xe40:
+ case INTERRUPT_HMI:
+ case 0xe80:
+ case 0xea0:
+ case INTERRUPT_H_FAC_UNAVAIL:
+ case 0x1200:
+ case 0x1500:
+ case 0x1600:
+ case 0x1800:
+ validp = &local_paca->hsrr_valid;
+ if (!READ_ONCE(*validp))
+ return;
+
+ srr0 = mfspr(SPRN_HSRR0);
+ srr1 = mfspr(SPRN_HSRR1);
+ h = "H";
+
+ break;
+ default:
+ validp = &local_paca->srr_valid;
+ if (!READ_ONCE(*validp))
+ return;
+
+ srr0 = mfspr(SPRN_SRR0);
+ srr1 = mfspr(SPRN_SRR1);
+ h = "";
+ break;
+ }
+
+ if (srr0 == regs->nip && srr1 == regs->msr)
+ return;
+
+ /*
+ * A NMI / soft-NMI interrupt may have come in after we found
+ * srr_valid and before the SRRs are loaded. The interrupt then
+ * comes in and clobbers SRRs and clears srr_valid. Then we load
+ * the SRRs here and test them above and find they don't match.
+ *
+ * Test validity again after that, to catch such false positives.
+ *
+ * This test in general will have some window for false negatives
+ * and may not catch and fix all such cases if an NMI comes in
+ * later and clobbers SRRs without clearing srr_valid, but hopefully
+ * such things will get caught most of the time, statistically
+ * enough to be able to get a warning out.
+ */
+ if (!READ_ONCE(*validp))
+ return;
+
+ if (!data_race(warned)) {
+ data_race(warned = true);
+ printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
+ printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
+ show_regs(regs);
+ }
+
+ WRITE_ONCE(*validp, 0); /* fixup */
+#endif
+}
+
+static notrace unsigned long
+interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
+{
+ unsigned long ti_flags;
+
+again:
+ ti_flags = read_thread_flags();
+ while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ local_irq_enable();
+ if (ti_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ } else {
+ /*
+ * SIGPENDING must restore signal handler function
+ * argument GPRs, and some non-volatiles (e.g., r1).
+ * Restore all for now. This could be made lighter.
+ */
+ if (ti_flags & _TIF_SIGPENDING)
+ ret |= _TIF_RESTOREALL;
+ do_notify_resume(regs, ti_flags);
+ }
+ local_irq_disable();
+ ti_flags = read_thread_flags();
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ unlikely((ti_flags & _TIF_RESTORE_TM))) {
+ restore_tm_state(regs);
+ } else {
+ unsigned long mathflags = MSR_FP;
+
+ if (cpu_has_feature(CPU_FTR_VSX))
+ mathflags |= MSR_VEC | MSR_VSX;
+ else if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ mathflags |= MSR_VEC;
+
+ /*
+ * If userspace MSR has all available FP bits set,
+ * then they are live and no need to restore. If not,
+ * it means the regs were given up and restore_math
+ * may decide to restore them (to avoid taking an FP
+ * fault).
+ */
+ if ((regs->msr & mathflags) != mathflags)
+ restore_math(regs);
+ }
+ }
+
+ check_return_regs_valid(regs);
+
+ user_enter_irqoff();
+ if (!prep_irq_for_enabled_exit(true)) {
+ user_exit_irqoff();
+ local_irq_enable();
+ local_irq_disable();
+ goto again;
+ }
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ booke_load_dbcr0();
+
+ account_cpu_user_exit();
+
+ /* Restore user access locks last */
+ kuap_user_restore(regs);
+
+ return ret;
+}
+
+/*
+ * This should be called after a syscall returns, with r3 the return value
+ * from the syscall. If this function returns non-zero, the system call
+ * exit assembly should additionally load all GPR registers and CTR and XER
+ * from the interrupt frame.
+ *
+ * The function graph tracer can not trace the return side of this function,
+ * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
+ */
+notrace unsigned long syscall_exit_prepare(unsigned long r3,
+ struct pt_regs *regs,
+ long scv)
+{
+ unsigned long ti_flags;
+ unsigned long ret = 0;
+ bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
+
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+ kuap_assert_locked();
+
+ regs->result = r3;
+
+ /* Check whether the syscall is issued inside a restartable sequence */
+ rseq_syscall(regs);
+
+ ti_flags = read_thread_flags();
+
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
+ if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+ r3 = -r3;
+ regs->ccr |= 0x10000000; /* Set SO bit in CR */
+ }
+ }
+
+ if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+ if (ti_flags & _TIF_RESTOREALL)
+ ret = _TIF_RESTOREALL;
+ else
+ regs->gpr[3] = r3;
+ clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
+ } else {
+ regs->gpr[3] = r3;
+ }
+
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ do_syscall_trace_leave(regs);
+ ret |= _TIF_RESTOREALL;
+ }
+
+ local_irq_disable();
+ ret = interrupt_exit_user_prepare_main(ret, regs);
+
+#ifdef CONFIG_PPC64
+ regs->exit_result = ret;
+#endif
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC64
+notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
+{
+ /*
+ * This is called when detecting a soft-pending interrupt as well as
+ * an alternate-return interrupt. So we can't just have the alternate
+ * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless
+ * the soft-pending case were to fix things up as well). RI might be
+ * disabled, in which case it gets re-enabled by __hard_irq_disable().
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ trace_hardirqs_off();
+ user_exit_irqoff();
+ account_cpu_user_entry();
+
+ BUG_ON(!user_mode(regs));
+
+ regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
+
+ return regs->exit_result;
+}
+#endif
+
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
+{
+ unsigned long ret;
+
+ BUG_ON(regs_is_unrecoverable(regs));
+ BUG_ON(arch_irq_disabled_regs(regs));
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * AMR can only have been unlocked if we interrupted the kernel.
+ */
+ kuap_assert_locked();
+
+ local_irq_disable();
+
+ ret = interrupt_exit_user_prepare_main(0, regs);
+
+#ifdef CONFIG_PPC64
+ regs->exit_result = ret;
+#endif
+
+ return ret;
+}
+
+void preempt_schedule_irq(void);
+
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
+{
+ unsigned long ret = 0;
+ unsigned long kuap;
+ bool stack_store = read_thread_flags() & _TIF_EMULATE_STACK_STORE;
+
+ if (regs_is_unrecoverable(regs))
+ unrecoverable_exception(regs);
+ /*
+ * CT_WARN_ON comes here via program_check_exception, so avoid
+ * recursion.
+ *
+ * Skip the assertion on PMIs on 64e to work around a problem caused
+ * by NMI PMIs incorrectly taking this interrupt return path, it's
+ * possible for this to hit after interrupt exit to user switches
+ * context to user. See also the comment in the performance monitor
+ * handler in exceptions-64e.S
+ */
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) &&
+ TRAP(regs) != INTERRUPT_PROGRAM &&
+ TRAP(regs) != INTERRUPT_PERFMON)
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+ kuap = kuap_get_and_assert_locked();
+
+ local_irq_disable();
+
+ if (!arch_irq_disabled_regs(regs)) {
+ /* Returning to a kernel context with local irqs enabled. */
+ WARN_ON_ONCE(!(regs->msr & MSR_EE));
+again:
+ if (IS_ENABLED(CONFIG_PREEMPT)) {
+ /* Return to preemptible kernel context */
+ if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
+ if (preempt_count() == 0)
+ preempt_schedule_irq();
+ }
+ }
+
+ check_return_regs_valid(regs);
+
+ /*
+ * Stack store exit can't be restarted because the interrupt
+ * stack frame might have been clobbered.
+ */
+ if (!prep_irq_for_enabled_exit(unlikely(stack_store))) {
+ /*
+ * Replay pending soft-masked interrupts now. Don't
+ * just local_irq_enabe(); local_irq_disable(); because
+ * if we are returning from an asynchronous interrupt
+ * here, another one might hit after irqs are enabled,
+ * and it would exit via this same path allowing
+ * another to fire, and so on unbounded.
+ */
+ hard_irq_disable();
+ replay_soft_interrupts();
+ /* Took an interrupt, may have more exit work to do. */
+ goto again;
+ }
+#ifdef CONFIG_PPC64
+ /*
+ * An interrupt may clear MSR[EE] and set this concurrently,
+ * but it will be marked pending and the exit will be retried.
+ * This leaves a racy window where MSR[EE]=0 and HARD_DIS is
+ * clear, until interrupt_exit_kernel_restart() calls
+ * hard_irq_disable(), which will set HARD_DIS again.
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+ } else {
+ check_return_regs_valid(regs);
+
+ if (unlikely(stack_store))
+ __hard_EE_RI_disable();
+#endif /* CONFIG_PPC64 */
+ }
+
+ if (unlikely(stack_store)) {
+ clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
+ ret = 1;
+ }
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ /*
+ * 64s does not want to mfspr(SPRN_AMR) here, because this comes after
+ * mtmsr, which would cause Read-After-Write stalls. Hence, take the
+ * AMR value from the check above.
+ */
+ kuap_kernel_restore(regs, kuap);
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC64
+notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs)
+{
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ trace_hardirqs_off();
+ user_exit_irqoff();
+ account_cpu_user_entry();
+
+ BUG_ON(!user_mode(regs));
+
+ regs->exit_result |= interrupt_exit_user_prepare(regs);
+
+ return regs->exit_result;
+}
+
+/*
+ * No real need to return a value here because the stack store case does not
+ * get restarted.
+ */
+notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs)
+{
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ if (regs->softe == IRQS_ENABLED)
+ trace_hardirqs_off();
+
+ BUG_ON(user_mode(regs));
+
+ return interrupt_exit_kernel_prepare(regs);
+}
+#endif
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
new file mode 100644
index 000000000000..bd863702d812
--- /dev/null
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -0,0 +1,772 @@
+#include <asm/asm-offsets.h>
+#include <asm/bug.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
+#include <asm/feature-fixups.h>
+#include <asm/head-64.h>
+#include <asm/hw_irq.h>
+#include <asm/kup.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+
+ .align 7
+
+.macro DEBUG_SRR_VALID srr
+#ifdef CONFIG_PPC_RFI_SRR_DEBUG
+ .ifc \srr,srr
+ mfspr r11,SPRN_SRR0
+ ld r12,_NIP(r1)
+ clrrdi r11,r11,2
+ clrrdi r12,r12,2
+100: tdne r11,r12
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ mfspr r11,SPRN_SRR1
+ ld r12,_MSR(r1)
+100: tdne r11,r12
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ .else
+ mfspr r11,SPRN_HSRR0
+ ld r12,_NIP(r1)
+ clrrdi r11,r11,2
+ clrrdi r12,r12,2
+100: tdne r11,r12
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ mfspr r11,SPRN_HSRR1
+ ld r12,_MSR(r1)
+100: tdne r11,r12
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ .endif
+#endif
+.endm
+
+#ifdef CONFIG_PPC_BOOK3S
+.macro system_call_vectored name trapnr
+ .globl system_call_vectored_\name
+system_call_vectored_\name:
+_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
+ SCV_INTERRUPT_TO_KERNEL
+ mr r10,r1
+ ld r1,PACAKSAVE(r13)
+ std r10,0(r1)
+ std r11,_NIP(r1)
+ std r12,_MSR(r1)
+ std r0,GPR0(r1)
+ std r10,GPR1(r1)
+ std r2,GPR2(r1)
+ LOAD_PACA_TOC()
+ mfcr r12
+ li r11,0
+ /* Save syscall parameters in r3-r8 */
+ SAVE_GPRS(3, 8, r1)
+ /* Zero r9-r12, this should only be required when restoring all GPRs */
+ std r11,GPR9(r1)
+ std r11,GPR10(r1)
+ std r11,GPR11(r1)
+ std r11,GPR12(r1)
+ std r9,GPR13(r1)
+ SAVE_NVGPRS(r1)
+ std r11,_XER(r1)
+ std r11,_LINK(r1)
+ std r11,_CTR(r1)
+
+ li r11,\trapnr
+ std r11,_TRAP(r1)
+ std r12,_CCR(r1)
+ std r3,ORIG_GPR3(r1)
+ LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+ std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */
+ /* Calling convention has r3 = regs, r4 = orig r0 */
+ addi r3,r1,STACK_INT_FRAME_REGS
+ mr r4,r0
+
+BEGIN_FTR_SECTION
+ HMT_MEDIUM
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ /*
+ * scv enters with MSR[EE]=1 and is immediately considered soft-masked.
+ * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED,
+ * and interrupts may be masked and pending already.
+ * system_call_exception() will call trace_hardirqs_off() which means
+ * interrupts could already have been blocked before trace_hardirqs_off,
+ * but this is the best we can do.
+ */
+
+ /*
+ * Zero user registers to prevent influencing speculative execution
+ * state of kernel code.
+ */
+ SANITIZE_SYSCALL_GPRS()
+ bl CFUNC(system_call_exception)
+
+.Lsyscall_vectored_\name\()_exit:
+ addi r4,r1,STACK_INT_FRAME_REGS
+ li r5,1 /* scv */
+ bl CFUNC(syscall_exit_prepare)
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+.Lsyscall_vectored_\name\()_rst_start:
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ bne- syscall_vectored_\name\()_restart
+ li r11,IRQS_ENABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ li r11,0
+ stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+ ld r2,_CCR(r1)
+ ld r4,_NIP(r1)
+ ld r5,_MSR(r1)
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+BEGIN_FTR_SECTION
+ HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ SANITIZE_RESTORE_NVGPRS()
+ cmpdi r3,0
+ bne .Lsyscall_vectored_\name\()_restore_regs
+
+ /* rfscv returns with LR->NIA and CTR->MSR */
+ mtlr r4
+ mtctr r5
+
+ /* Could zero these as per ABI, but we may consider a stricter ABI
+ * which preserves these if libc implementations can benefit, so
+ * restore them for now until further measurement is done. */
+ REST_GPR(0, r1)
+ REST_GPRS(4, 8, r1)
+ /* Zero volatile regs that may contain sensitive kernel data */
+ ZEROIZE_GPRS(9, 12)
+ mtspr SPRN_XER,r0
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
+ mtcr r2
+ REST_GPRS(2, 3, r1)
+ REST_GPR(13, r1)
+ REST_GPR(1, r1)
+ RFSCV_TO_USER
+ b . /* prevent speculative execution */
+
+.Lsyscall_vectored_\name\()_restore_regs:
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r5
+
+ ld r3,_CTR(r1)
+ ld r4,_LINK(r1)
+ ld r5,_XER(r1)
+
+ HANDLER_RESTORE_NVGPRS()
+ REST_GPR(0, r1)
+ mtcr r2
+ mtctr r3
+ mtlr r4
+ mtspr SPRN_XER,r5
+ REST_GPRS(2, 13, r1)
+ REST_GPR(1, r1)
+ RFI_TO_USER
+.Lsyscall_vectored_\name\()_rst_end:
+
+syscall_vectored_\name\()_restart:
+_ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart)
+ GET_PACA(r13)
+ ld r1,PACA_EXIT_SAVE_R1(r13)
+ LOAD_PACA_TOC()
+ ld r3,RESULT(r1)
+ addi r4,r1,STACK_INT_FRAME_REGS
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bl CFUNC(syscall_exit_restart)
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+ b .Lsyscall_vectored_\name\()_rst_start
+1:
+
+SOFT_MASK_TABLE(.Lsyscall_vectored_\name\()_rst_start, 1b)
+RESTART_TABLE(.Lsyscall_vectored_\name\()_rst_start, .Lsyscall_vectored_\name\()_rst_end, syscall_vectored_\name\()_restart)
+
+.endm
+
+system_call_vectored common 0x3000
+
+/*
+ * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
+ * which is tested by system_call_exception when r0 is -1 (as set by vector
+ * entry code).
+ */
+system_call_vectored sigill 0x7ff0
+
+#endif /* CONFIG_PPC_BOOK3S */
+
+ .balign IFETCH_ALIGN_BYTES
+ .globl system_call_common_real
+system_call_common_real:
+_ASM_NOKPROBE_SYMBOL(system_call_common_real)
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ mtmsrd r10
+
+ .balign IFETCH_ALIGN_BYTES
+ .globl system_call_common
+system_call_common:
+_ASM_NOKPROBE_SYMBOL(system_call_common)
+ mr r10,r1
+ ld r1,PACAKSAVE(r13)
+ std r10,0(r1)
+ std r11,_NIP(r1)
+ std r12,_MSR(r1)
+ std r0,GPR0(r1)
+ std r10,GPR1(r1)
+ std r2,GPR2(r1)
+#ifdef CONFIG_PPC_E500
+START_BTB_FLUSH_SECTION
+ BTB_FLUSH(r10)
+END_BTB_FLUSH_SECTION
+#endif
+ LOAD_PACA_TOC()
+ mfcr r12
+ li r11,0
+ /* Save syscall parameters in r3-r8 */
+ SAVE_GPRS(3, 8, r1)
+ /* Zero r9-r12, this should only be required when restoring all GPRs */
+ std r11,GPR9(r1)
+ std r11,GPR10(r1)
+ std r11,GPR11(r1)
+ std r11,GPR12(r1)
+ std r9,GPR13(r1)
+ SAVE_NVGPRS(r1)
+ std r11,_XER(r1)
+ std r11,_CTR(r1)
+ mflr r10
+
+ /*
+ * This clears CR0.SO (bit 28), which is the error indication on
+ * return from this system call.
+ */
+ rldimi r12,r11,28,(63-28)
+ li r11,0xc00
+ std r10,_LINK(r1)
+ std r11,_TRAP(r1)
+ std r12,_CCR(r1)
+ std r3,ORIG_GPR3(r1)
+ LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+ std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */
+ /* Calling convention has r3 = regs, r4 = orig r0 */
+ addi r3,r1,STACK_INT_FRAME_REGS
+ mr r4,r0
+
+#ifdef CONFIG_PPC_BOOK3S
+ li r11,1
+ stb r11,PACASRR_VALID(r13)
+#endif
+
+ /*
+ * We always enter kernel from userspace with irq soft-mask enabled and
+ * nothing pending. system_call_exception() will call
+ * trace_hardirqs_off().
+ */
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+#ifdef CONFIG_PPC_BOOK3S
+ li r12,-1 /* Set MSR_EE and MSR_RI */
+ mtmsrd r12,1
+#else
+ wrteei 1
+#endif
+
+ /*
+ * Zero user registers to prevent influencing speculative execution
+ * state of kernel code.
+ */
+ SANITIZE_SYSCALL_GPRS()
+ bl CFUNC(system_call_exception)
+
+.Lsyscall_exit:
+ addi r4,r1,STACK_INT_FRAME_REGS
+ li r5,0 /* !scv */
+ bl CFUNC(syscall_exit_prepare)
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+#ifdef CONFIG_PPC_BOOK3S
+.Lsyscall_rst_start:
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ bne- syscall_restart
+#endif
+ li r11,IRQS_ENABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ li r11,0
+ stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+ ld r2,_CCR(r1)
+ ld r6,_LINK(r1)
+ mtlr r6
+
+#ifdef CONFIG_PPC_BOOK3S
+ lbz r4,PACASRR_VALID(r13)
+ cmpdi r4,0
+ bne 1f
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+#endif
+ ld r4,_NIP(r1)
+ ld r5,_MSR(r1)
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r5
+1:
+ DEBUG_SRR_VALID srr
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ SANITIZE_RESTORE_NVGPRS()
+ cmpdi r3,0
+ bne .Lsyscall_restore_regs
+ /* Zero volatile regs that may contain sensitive kernel data */
+ ZEROIZE_GPR(0)
+ ZEROIZE_GPRS(4, 12)
+ mtctr r0
+ mtspr SPRN_XER,r0
+.Lsyscall_restore_regs_cont:
+
+BEGIN_FTR_SECTION
+ HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
+ mtcr r2
+ REST_GPRS(2, 3, r1)
+ REST_GPR(13, r1)
+ REST_GPR(1, r1)
+ RFI_TO_USER
+ b . /* prevent speculative execution */
+
+.Lsyscall_restore_regs:
+ ld r3,_CTR(r1)
+ ld r4,_XER(r1)
+ HANDLER_RESTORE_NVGPRS()
+ mtctr r3
+ mtspr SPRN_XER,r4
+ REST_GPR(0, r1)
+ REST_GPRS(4, 12, r1)
+ b .Lsyscall_restore_regs_cont
+.Lsyscall_rst_end:
+
+#ifdef CONFIG_PPC_BOOK3S
+syscall_restart:
+_ASM_NOKPROBE_SYMBOL(syscall_restart)
+ GET_PACA(r13)
+ ld r1,PACA_EXIT_SAVE_R1(r13)
+ LOAD_PACA_TOC()
+ ld r3,RESULT(r1)
+ addi r4,r1,STACK_INT_FRAME_REGS
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bl CFUNC(syscall_exit_restart)
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+ b .Lsyscall_rst_start
+1:
+
+SOFT_MASK_TABLE(.Lsyscall_rst_start, 1b)
+RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart)
+#endif
+
+ /*
+ * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
+ * touched, no exit work created, then this can be used.
+ */
+ .balign IFETCH_ALIGN_BYTES
+ .globl fast_interrupt_return_srr
+fast_interrupt_return_srr:
+_ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr)
+ kuap_check_amr r3, r4
+ ld r5,_MSR(r1)
+ andi. r0,r5,MSR_PR
+#ifdef CONFIG_PPC_BOOK3S
+ beq 1f
+ kuap_user_restore r3, r4
+ b .Lfast_user_interrupt_return_srr
+1: kuap_kernel_restore r3, r4
+ andi. r0,r5,MSR_RI
+ li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
+ bne+ .Lfast_kernel_interrupt_return_srr
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(unrecoverable_exception)
+ b . /* should not get here */
+#else
+ bne .Lfast_user_interrupt_return_srr
+ b .Lfast_kernel_interrupt_return_srr
+#endif
+
+.macro interrupt_return_macro srr
+ .balign IFETCH_ALIGN_BYTES
+ .globl interrupt_return_\srr
+interrupt_return_\srr\():
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\())
+ ld r4,_MSR(r1)
+ andi. r0,r4,MSR_PR
+ beq interrupt_return_\srr\()_kernel
+interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(interrupt_exit_user_prepare)
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+ cmpdi r3,0
+ bne- .Lrestore_nvgprs_\srr
+.Lrestore_nvgprs_\srr\()_cont:
+#endif
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+#ifdef CONFIG_PPC_BOOK3S
+.Linterrupt_return_\srr\()_user_rst_start:
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ bne- interrupt_return_\srr\()_user_restart
+#endif
+ li r11,IRQS_ENABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ li r11,0
+ stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+.Lfast_user_interrupt_return_\srr\():
+ SANITIZE_RESTORE_NVGPRS()
+#ifdef CONFIG_PPC_BOOK3S
+ .ifc \srr,srr
+ lbz r4,PACASRR_VALID(r13)
+ .else
+ lbz r4,PACAHSRR_VALID(r13)
+ .endif
+ cmpdi r4,0
+ li r4,0
+ bne 1f
+#endif
+ ld r11,_NIP(r1)
+ ld r12,_MSR(r1)
+ .ifc \srr,srr
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+ stb r4,PACASRR_VALID(r13)
+#endif
+ .else
+ mtspr SPRN_HSRR0,r11
+ mtspr SPRN_HSRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+ stb r4,PACAHSRR_VALID(r13)
+#endif
+ .endif
+ DEBUG_SRR_VALID \srr
+
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ lbz r4,PACAIRQSOFTMASK(r13)
+ tdnei r4,IRQS_ENABLED
+#endif
+
+BEGIN_FTR_SECTION
+ ld r10,_PPR(r1)
+ mtspr SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ ldarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ ld r3,_CCR(r1)
+ ld r4,_LINK(r1)
+ ld r5,_CTR(r1)
+ ld r6,_XER(r1)
+ li r0,0
+
+ REST_GPRS(7, 13, r1)
+
+ mtcr r3
+ mtlr r4
+ mtctr r5
+ mtspr SPRN_XER,r6
+
+ REST_GPRS(2, 6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ .ifc \srr,srr
+ RFI_TO_USER
+ .else
+ HRFI_TO_USER
+ .endif
+ b . /* prevent speculative execution */
+.Linterrupt_return_\srr\()_user_rst_end:
+
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+.Lrestore_nvgprs_\srr\():
+ REST_NVGPRS(r1)
+ b .Lrestore_nvgprs_\srr\()_cont
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_\srr\()_user_restart:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
+ GET_PACA(r13)
+ ld r1,PACA_EXIT_SAVE_R1(r13)
+ LOAD_PACA_TOC()
+ addi r3,r1,STACK_INT_FRAME_REGS
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bl CFUNC(interrupt_exit_user_restart)
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+ b .Linterrupt_return_\srr\()_user_rst_start
+1:
+
+SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_user_rst_start, 1b)
+RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr\()_user_rst_end, interrupt_return_\srr\()_user_restart)
+#endif
+
+ .balign IFETCH_ALIGN_BYTES
+interrupt_return_\srr\()_kernel:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(interrupt_exit_kernel_prepare)
+
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+.Linterrupt_return_\srr\()_kernel_rst_start:
+ ld r11,SOFTE(r1)
+ cmpwi r11,IRQS_ENABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ beq .Linterrupt_return_\srr\()_soft_enabled
+
+ /*
+ * Returning to soft-disabled context.
+ * Check if a MUST_HARD_MASK interrupt has become pending, in which
+ * case we need to disable MSR[EE] in the return context.
+ *
+ * The MSR[EE] check catches among other things the short incoherency
+ * in hard_irq_disable() between clearing MSR[EE] and setting
+ * PACA_IRQ_HARD_DIS.
+ */
+ ld r12,_MSR(r1)
+ andi. r10,r12,MSR_EE
+ beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r10,r11,PACA_IRQ_MUST_HARD_MASK
+ bne 1f // HARD_MASK is pending
+ // No HARD_MASK pending, clear possible HARD_DIS set by interrupt
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ stb r11,PACAIRQHAPPENED(r13)
+ b .Lfast_kernel_interrupt_return_\srr\()
+
+
+1: /* Must clear MSR_EE from _MSR */
+#ifdef CONFIG_PPC_BOOK3S
+ li r10,0
+ /* Clear valid before changing _MSR */
+ .ifc \srr,srr
+ stb r10,PACASRR_VALID(r13)
+ .else
+ stb r10,PACAHSRR_VALID(r13)
+ .endif
+#endif
+ xori r12,r12,MSR_EE
+ std r12,_MSR(r1)
+ b .Lfast_kernel_interrupt_return_\srr\()
+
+.Linterrupt_return_\srr\()_soft_enabled:
+ /*
+ * In the soft-enabled case, need to double-check that we have no
+ * pending interrupts that might have come in before we reached the
+ * restart section of code, and restart the exit so those can be
+ * handled.
+ *
+ * If there are none, it is be possible that the interrupt still
+ * has PACA_IRQ_HARD_DIS set, which needs to be cleared for the
+ * interrupted context. This clear will not clobber a new pending
+ * interrupt coming in, because we're in the restart section, so
+ * such would return to the restart location.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ bne- interrupt_return_\srr\()_kernel_restart
+#endif
+ li r11,0
+ stb r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS
+
+.Lfast_kernel_interrupt_return_\srr\():
+ SANITIZE_RESTORE_NVGPRS()
+ cmpdi cr1,r3,0
+#ifdef CONFIG_PPC_BOOK3S
+ .ifc \srr,srr
+ lbz r4,PACASRR_VALID(r13)
+ .else
+ lbz r4,PACAHSRR_VALID(r13)
+ .endif
+ cmpdi r4,0
+ li r4,0
+ bne 1f
+#endif
+ ld r11,_NIP(r1)
+ ld r12,_MSR(r1)
+ .ifc \srr,srr
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+ stb r4,PACASRR_VALID(r13)
+#endif
+ .else
+ mtspr SPRN_HSRR0,r11
+ mtspr SPRN_HSRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+ stb r4,PACAHSRR_VALID(r13)
+#endif
+ .endif
+ DEBUG_SRR_VALID \srr
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ ldarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ ld r3,_LINK(r1)
+ ld r4,_CTR(r1)
+ ld r5,_XER(r1)
+ ld r6,_CCR(r1)
+ li r0,0
+
+ REST_GPRS(7, 12, r1)
+
+ mtlr r3
+ mtctr r4
+ mtspr SPRN_XER,r5
+
+ /*
+ * Leaving a stale STACK_FRAME_REGS_MARKER on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
+ */
+ std r0,STACK_INT_FRAME_MARKER(r1)
+
+ REST_GPRS(2, 5, r1)
+
+ bne- cr1,1f /* emulate stack store */
+ mtcr r6
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ .ifc \srr,srr
+ RFI_TO_KERNEL
+ .else
+ HRFI_TO_KERNEL
+ .endif
+ b . /* prevent speculative execution */
+
+1: /*
+ * Emulate stack store with update. New r1 value was already calculated
+ * and updated in our interrupt regs by emulate_loadstore, but we can't
+ * store the previous value of r1 to the stack before re-loading our
+ * registers from it, otherwise they could be clobbered. Use
+ * PACA_EXGEN as temporary storage to hold the store data, as
+ * interrupts are disabled here so it won't be clobbered.
+ */
+ mtcr r6
+ std r9,PACA_EXGEN+0(r13)
+ addi r9,r1,INT_FRAME_SIZE /* get original r1 */
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ std r9,0(r1) /* perform store component of stdu */
+ ld r9,PACA_EXGEN+0(r13)
+
+ .ifc \srr,srr
+ RFI_TO_KERNEL
+ .else
+ HRFI_TO_KERNEL
+ .endif
+ b . /* prevent speculative execution */
+.Linterrupt_return_\srr\()_kernel_rst_end:
+
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_\srr\()_kernel_restart:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart)
+ GET_PACA(r13)
+ ld r1,PACA_EXIT_SAVE_R1(r13)
+ LOAD_PACA_TOC()
+ addi r3,r1,STACK_INT_FRAME_REGS
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bl CFUNC(interrupt_exit_kernel_restart)
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+ b .Linterrupt_return_\srr\()_kernel_rst_start
+1:
+
+SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, 1b)
+RESTART_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, .Linterrupt_return_\srr\()_kernel_rst_end, interrupt_return_\srr\()_kernel_restart)
+#endif
+
+.endm
+
+interrupt_return_macro srr
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_macro hsrr
+
+ .globl __end_soft_masked
+__end_soft_masked:
+DEFINE_FIXED_SYMBOL(__end_soft_masked, text)
+#endif /* CONFIG_PPC_BOOK3S */
+
+#ifdef CONFIG_PPC_BOOK3S
+_GLOBAL(ret_from_fork_scv)
+ bl CFUNC(schedule_tail)
+ HANDLER_RESTORE_NVGPRS()
+ li r3,0 /* fork() return value */
+ b .Lsyscall_vectored_common_exit
+#endif
+
+_GLOBAL(ret_from_fork)
+ bl CFUNC(schedule_tail)
+ HANDLER_RESTORE_NVGPRS()
+ li r3,0 /* fork() return value */
+ b .Lsyscall_exit
+
+_GLOBAL(ret_from_kernel_user_thread)
+ bl CFUNC(schedule_tail)
+ mtctr r14
+ mr r3,r15
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+ mr r12,r14
+#endif
+ bctrl
+ li r3,0
+ /*
+ * It does not matter whether this returns via the scv or sc path
+ * because it returns as execve() and therefore has no calling ABI
+ * (i.e., it sets registers according to the exec()ed entry point).
+ */
+ b .Lsyscall_exit
+
+_GLOBAL(start_kernel_thread)
+ bl CFUNC(schedule_tail)
+ mtctr r14
+ mr r3,r15
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+ mr r12,r14
+#endif
+ bctrl
+ /*
+ * This must not return. We actually want to BUG here, not WARN,
+ * because BUG will exit the process which is what the kernel thread
+ * should have done, which may give some hope of continuing.
+ */
+100: trap
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 0276bc8c8969..c877f074d174 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -10,10 +10,10 @@
#include <linux/kernel.h>
#include <linux/sched/mm.h> /* for init_mm */
+#include <linux/pgtable.h>
#include <asm/io.h>
#include <asm/machdep.h>
-#include <asm/pgtable.h>
#include <asm/ppc-pci.h>
#include <asm/io-workarounds.h>
#include <asm/pte-walk.h>
@@ -55,7 +55,6 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
#ifdef CONFIG_PPC_INDIRECT_MMIO
struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
{
- unsigned hugepage_shift;
struct iowa_bus *bus;
int token;
@@ -65,22 +64,13 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
bus = &iowa_busses[token - 1];
else {
unsigned long vaddr, paddr;
- pte_t *ptep;
vaddr = (unsigned long)PCI_FIX_ADDR(addr);
if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
return NULL;
- /*
- * We won't find huge pages here (iomem). Also can't hit
- * a page table free due to init_mm
- */
- ptep = find_init_mm_pte(vaddr, &hugepage_shift);
- if (ptep == NULL)
- paddr = 0;
- else {
- WARN_ON(hugepage_shift);
- paddr = pte_pfn(*ptep) << PAGE_SHIFT;
- }
+
+ paddr = ppc_find_vmap_phys(vaddr);
+
bus = iowa_pci_find(vaddr, paddr);
if (bus == NULL)
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
index 2f29b7d432de..6af535905984 100644
--- a/arch/powerpc/kernel/io.c
+++ b/arch/powerpc/kernel/io.c
@@ -33,7 +33,7 @@ void _insb(const volatile u8 __iomem *port, void *buf, long count)
return;
asm volatile("sync");
do {
- tmp = *port;
+ tmp = *(const volatile u8 __force *)port;
eieio();
*tbuf++ = tmp;
} while (--count != 0);
@@ -49,7 +49,7 @@ void _outsb(volatile u8 __iomem *port, const void *buf, long count)
return;
asm volatile("sync");
do {
- *port = *tbuf++;
+ *(volatile u8 __force *)port = *tbuf++;
} while (--count != 0);
asm volatile("sync");
}
@@ -64,7 +64,7 @@ void _insw_ns(const volatile u16 __iomem *port, void *buf, long count)
return;
asm volatile("sync");
do {
- tmp = *port;
+ tmp = *(const volatile u16 __force *)port;
eieio();
*tbuf++ = tmp;
} while (--count != 0);
@@ -80,7 +80,7 @@ void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count)
return;
asm volatile("sync");
do {
- *port = *tbuf++;
+ *(volatile u16 __force *)port = *tbuf++;
} while (--count != 0);
asm volatile("sync");
}
@@ -95,7 +95,7 @@ void _insl_ns(const volatile u32 __iomem *port, void *buf, long count)
return;
asm volatile("sync");
do {
- tmp = *port;
+ tmp = *(const volatile u32 __force *)port;
eieio();
*tbuf++ = tmp;
} while (--count != 0);
@@ -111,7 +111,7 @@ void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count)
return;
asm volatile("sync");
do {
- *port = *tbuf++;
+ *(volatile u32 __force *)port = *tbuf++;
} while (--count != 0);
asm volatile("sync");
}
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index 5ac84efc6ede..72862a4d3a5d 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -11,177 +11,11 @@
#include <asm/pci-bridge.h>
#include <asm/isa-bridge.h>
-/*
- * Here comes the ppc64 implementation of the IOMAP
- * interfaces.
- */
-unsigned int ioread8(void __iomem *addr)
-{
- return readb(addr);
-}
-unsigned int ioread16(void __iomem *addr)
-{
- return readw(addr);
-}
-unsigned int ioread16be(void __iomem *addr)
-{
- return readw_be(addr);
-}
-unsigned int ioread32(void __iomem *addr)
-{
- return readl(addr);
-}
-unsigned int ioread32be(void __iomem *addr)
-{
- return readl_be(addr);
-}
-EXPORT_SYMBOL(ioread8);
-EXPORT_SYMBOL(ioread16);
-EXPORT_SYMBOL(ioread16be);
-EXPORT_SYMBOL(ioread32);
-EXPORT_SYMBOL(ioread32be);
-#ifdef __powerpc64__
-u64 ioread64(void __iomem *addr)
-{
- return readq(addr);
-}
-u64 ioread64_lo_hi(void __iomem *addr)
-{
- return readq(addr);
-}
-u64 ioread64_hi_lo(void __iomem *addr)
-{
- return readq(addr);
-}
-u64 ioread64be(void __iomem *addr)
-{
- return readq_be(addr);
-}
-u64 ioread64be_lo_hi(void __iomem *addr)
-{
- return readq_be(addr);
-}
-u64 ioread64be_hi_lo(void __iomem *addr)
-{
- return readq_be(addr);
-}
-EXPORT_SYMBOL(ioread64);
-EXPORT_SYMBOL(ioread64_lo_hi);
-EXPORT_SYMBOL(ioread64_hi_lo);
-EXPORT_SYMBOL(ioread64be);
-EXPORT_SYMBOL(ioread64be_lo_hi);
-EXPORT_SYMBOL(ioread64be_hi_lo);
-#endif /* __powerpc64__ */
-
-void iowrite8(u8 val, void __iomem *addr)
-{
- writeb(val, addr);
-}
-void iowrite16(u16 val, void __iomem *addr)
-{
- writew(val, addr);
-}
-void iowrite16be(u16 val, void __iomem *addr)
-{
- writew_be(val, addr);
-}
-void iowrite32(u32 val, void __iomem *addr)
-{
- writel(val, addr);
-}
-void iowrite32be(u32 val, void __iomem *addr)
-{
- writel_be(val, addr);
-}
-EXPORT_SYMBOL(iowrite8);
-EXPORT_SYMBOL(iowrite16);
-EXPORT_SYMBOL(iowrite16be);
-EXPORT_SYMBOL(iowrite32);
-EXPORT_SYMBOL(iowrite32be);
-#ifdef __powerpc64__
-void iowrite64(u64 val, void __iomem *addr)
-{
- writeq(val, addr);
-}
-void iowrite64_lo_hi(u64 val, void __iomem *addr)
-{
- writeq(val, addr);
-}
-void iowrite64_hi_lo(u64 val, void __iomem *addr)
-{
- writeq(val, addr);
-}
-void iowrite64be(u64 val, void __iomem *addr)
-{
- writeq_be(val, addr);
-}
-void iowrite64be_lo_hi(u64 val, void __iomem *addr)
-{
- writeq_be(val, addr);
-}
-void iowrite64be_hi_lo(u64 val, void __iomem *addr)
-{
- writeq_be(val, addr);
-}
-EXPORT_SYMBOL(iowrite64);
-EXPORT_SYMBOL(iowrite64_lo_hi);
-EXPORT_SYMBOL(iowrite64_hi_lo);
-EXPORT_SYMBOL(iowrite64be);
-EXPORT_SYMBOL(iowrite64be_lo_hi);
-EXPORT_SYMBOL(iowrite64be_hi_lo);
-#endif /* __powerpc64__ */
-
-/*
- * These are the "repeat read/write" functions. Note the
- * non-CPU byte order. We do things in "IO byteorder"
- * here.
- *
- * FIXME! We could make these do EEH handling if we really
- * wanted. Not clear if we do.
- */
-void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
-{
- readsb(addr, dst, count);
-}
-void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
-{
- readsw(addr, dst, count);
-}
-void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
-{
- readsl(addr, dst, count);
-}
-EXPORT_SYMBOL(ioread8_rep);
-EXPORT_SYMBOL(ioread16_rep);
-EXPORT_SYMBOL(ioread32_rep);
-
-void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
-{
- writesb(addr, src, count);
-}
-void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
-{
- writesw(addr, src, count);
-}
-void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
-{
- writesl(addr, src, count);
-}
-EXPORT_SYMBOL(iowrite8_rep);
-EXPORT_SYMBOL(iowrite16_rep);
-EXPORT_SYMBOL(iowrite32_rep);
-
void __iomem *ioport_map(unsigned long port, unsigned int len)
{
return (void __iomem *) (port + _IO_BASE);
}
-
-void ioport_unmap(void __iomem *addr)
-{
- /* Nothing to do */
-}
EXPORT_SYMBOL(ioport_map);
-EXPORT_SYMBOL(ioport_unmap);
#ifdef CONFIG_PCI
void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 9704f3f76e63..ebe259bdd462 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -25,8 +25,8 @@
#include <linux/pci.h>
#include <linux/iommu.h>
#include <linux/sched.h>
+#include <linux/debugfs.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
@@ -35,9 +35,48 @@
#include <asm/vio.h>
#include <asm/tce.h>
#include <asm/mmu_context.h>
+#include <asm/ppc-pci.h>
#define DBG(...)
+#ifdef CONFIG_IOMMU_DEBUGFS
+static int iommu_debugfs_weight_get(void *data, u64 *val)
+{
+ struct iommu_table *tbl = data;
+ *val = bitmap_weight(tbl->it_map, tbl->it_size);
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(iommu_debugfs_fops_weight, iommu_debugfs_weight_get, NULL, "%llu\n");
+
+static void iommu_debugfs_add(struct iommu_table *tbl)
+{
+ char name[10];
+ struct dentry *liobn_entry;
+
+ sprintf(name, "%08lx", tbl->it_index);
+ liobn_entry = debugfs_create_dir(name, iommu_debugfs_dir);
+
+ debugfs_create_file_unsafe("weight", 0400, liobn_entry, tbl, &iommu_debugfs_fops_weight);
+ debugfs_create_ulong("it_size", 0400, liobn_entry, &tbl->it_size);
+ debugfs_create_ulong("it_page_shift", 0400, liobn_entry, &tbl->it_page_shift);
+ debugfs_create_ulong("it_reserved_start", 0400, liobn_entry, &tbl->it_reserved_start);
+ debugfs_create_ulong("it_reserved_end", 0400, liobn_entry, &tbl->it_reserved_end);
+ debugfs_create_ulong("it_indirect_levels", 0400, liobn_entry, &tbl->it_indirect_levels);
+ debugfs_create_ulong("it_level_size", 0400, liobn_entry, &tbl->it_level_size);
+}
+
+static void iommu_debugfs_del(struct iommu_table *tbl)
+{
+ char name[10];
+
+ sprintf(name, "%08lx", tbl->it_index);
+ debugfs_lookup_and_remove(name, iommu_debugfs_dir);
+}
+#else
+static void iommu_debugfs_add(struct iommu_table *tbl){}
+static void iommu_debugfs_del(struct iommu_table *tbl){}
+#endif
+
static int novmerge;
static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
@@ -133,17 +172,28 @@ static int fail_iommu_bus_notify(struct notifier_block *nb,
return 0;
}
-static struct notifier_block fail_iommu_bus_notifier = {
+/*
+ * PCI and VIO buses need separate notifier_block structs, since they're linked
+ * list nodes. Sharing a notifier_block would mean that any notifiers later
+ * registered for PCI buses would also get called by VIO buses and vice versa.
+ */
+static struct notifier_block fail_iommu_pci_bus_notifier = {
+ .notifier_call = fail_iommu_bus_notify
+};
+
+#ifdef CONFIG_IBMVIO
+static struct notifier_block fail_iommu_vio_bus_notifier = {
.notifier_call = fail_iommu_bus_notify
};
+#endif
static int __init fail_iommu_setup(void)
{
#ifdef CONFIG_PCI
- bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
+ bus_register_notifier(&pci_bus_type, &fail_iommu_pci_bus_notifier);
#endif
#ifdef CONFIG_IBMVIO
- bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
+ bus_register_notifier(&vio_bus_type, &fail_iommu_vio_bus_notifier);
#endif
return 0;
@@ -172,7 +222,6 @@ static unsigned long iommu_range_alloc(struct device *dev,
int largealloc = npages > 15;
int pass = 0;
unsigned long align_mask;
- unsigned long boundary_size;
unsigned long flags;
unsigned int pool_nr;
struct iommu_pool *pool;
@@ -236,15 +285,9 @@ again:
}
}
- if (dev)
- boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
- 1 << tbl->it_page_shift);
- else
- boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift);
- /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
-
n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
- boundary_size >> tbl->it_page_shift, align_mask);
+ dma_get_seg_boundary_nr_pages(dev, tbl->it_page_shift),
+ align_mask);
if (n == -1) {
if (likely(pass == 0)) {
/* First try the pool from the start */
@@ -262,6 +305,15 @@ again:
pass++;
goto again;
+ } else if (pass == tbl->nr_pools + 1) {
+ /* Last resort: try largepool */
+ spin_unlock(&pool->lock);
+ pool = &tbl->large_pool;
+ spin_lock(&pool->lock);
+ pool->hint = pool->start;
+ pass++;
+ goto again;
+
} else {
/* Give up */
spin_unlock_irqrestore(&(pool->lock), flags);
@@ -430,7 +482,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
BUG_ON(direction == DMA_NONE);
if ((nelems == 0) || !tbl)
- return 0;
+ return -EINVAL;
outs = s = segstart = &sglist[0];
outcount = 1;
@@ -477,7 +529,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
/* Convert entry to a dma_addr_t */
entry += tbl->it_offset;
dma_addr = entry << tbl->it_page_shift;
- dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl));
+ dma_addr |= (vaddr & ~IOMMU_PAGE_MASK(tbl));
DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n",
npages, entry, dma_addr);
@@ -532,7 +584,6 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
*/
if (outcount < incount) {
outs = sg_next(outs);
- outs->dma_address = DMA_MAPPING_ERROR;
outs->dma_length = 0;
}
@@ -550,13 +601,12 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
npages = iommu_num_pages(s->dma_address, s->dma_length,
IOMMU_PAGE_SIZE(tbl));
__iommu_free(tbl, vaddr, npages);
- s->dma_address = DMA_MAPPING_ERROR;
s->dma_length = 0;
}
if (s == outs)
break;
}
- return 0;
+ return -EIO;
}
@@ -647,32 +697,24 @@ static void iommu_table_reserve_pages(struct iommu_table *tbl,
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
- tbl->it_reserved_start = res_start;
- tbl->it_reserved_end = res_end;
+ if (res_start < tbl->it_offset)
+ res_start = tbl->it_offset;
- /* Check if res_start..res_end isn't empty and overlaps the table */
- if (res_start && res_end &&
- (tbl->it_offset + tbl->it_size < res_start ||
- res_end < tbl->it_offset))
- return;
+ if (res_end > (tbl->it_offset + tbl->it_size))
+ res_end = tbl->it_offset + tbl->it_size;
- for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
- set_bit(i - tbl->it_offset, tbl->it_map);
-}
-
-static void iommu_table_release_pages(struct iommu_table *tbl)
-{
- int i;
+ /* Check if res_start..res_end is a valid range in the table */
+ if (res_start >= res_end) {
+ tbl->it_reserved_start = tbl->it_offset;
+ tbl->it_reserved_end = tbl->it_offset;
+ return;
+ }
- /*
- * In case we have reserved the first bit, we should not emit
- * the warning below.
- */
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ tbl->it_reserved_start = res_start;
+ tbl->it_reserved_end = res_end;
for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
- clear_bit(i - tbl->it_offset, tbl->it_map);
+ set_bit(i - tbl->it_offset, tbl->it_map);
}
/*
@@ -684,7 +726,6 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
{
unsigned long sz;
static int welcomed = 0;
- struct page *page;
unsigned int i;
struct iommu_pool *p;
@@ -693,11 +734,11 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
/* number of bytes needed for the bitmap */
sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
- page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz));
- if (!page)
- panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
- tbl->it_map = page_address(page);
- memset(tbl->it_map, 0, sz);
+ tbl->it_map = vzalloc_node(sz, nid);
+ if (!tbl->it_map) {
+ pr_err("%s: Can't allocate %ld bytes\n", __func__, sz);
+ return NULL;
+ }
iommu_table_reserve_pages(tbl, res_start, res_end);
@@ -732,13 +773,34 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
welcomed = 1;
}
+ iommu_debugfs_add(tbl);
+
return tbl;
}
+bool iommu_table_in_use(struct iommu_table *tbl)
+{
+ unsigned long start = 0, end;
+
+ /* ignore reserved bit0 */
+ if (tbl->it_offset == 0)
+ start = 1;
+
+ /* Simple case with no reserved MMIO32 region */
+ if (!tbl->it_reserved_start && !tbl->it_reserved_end)
+ return find_next_bit(tbl->it_map, tbl->it_size, start) != tbl->it_size;
+
+ end = tbl->it_reserved_start - tbl->it_offset;
+ if (find_next_bit(tbl->it_map, end, start) != end)
+ return true;
+
+ start = tbl->it_reserved_end - tbl->it_offset;
+ end = tbl->it_size;
+ return find_next_bit(tbl->it_map, end, start) != end;
+}
+
static void iommu_table_free(struct kref *kref)
{
- unsigned long bitmap_sz;
- unsigned int order;
struct iommu_table *tbl;
tbl = container_of(kref, struct iommu_table, it_kref);
@@ -751,18 +813,14 @@ static void iommu_table_free(struct kref *kref)
return;
}
- iommu_table_release_pages(tbl);
+ iommu_debugfs_del(tbl);
/* verify that table contains no entries */
- if (!bitmap_empty(tbl->it_map, tbl->it_size))
+ if (iommu_table_in_use(tbl))
pr_warn("%s: Unexpected TCEs\n", __func__);
- /* calculate bitmap size in bytes */
- bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
-
/* free bitmap */
- order = get_order(bitmap_sz);
- free_pages((unsigned long) tbl->it_map, order);
+ vfree(tbl->it_map);
/* free table */
kfree(tbl);
@@ -858,6 +916,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
unsigned int order;
unsigned int nio_pages, io_order;
struct page *page;
+ int tcesize = (1 << tbl->it_page_shift);
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -884,7 +943,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
memset(ret, 0, size);
/* Set up tces to cover the allocated range */
- nio_pages = size >> tbl->it_page_shift;
+ nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift;
+
io_order = get_iommu_order(size, tbl);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
mask >> tbl->it_page_shift, io_order, 0);
@@ -892,7 +952,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
free_pages((unsigned long)ret, order);
return NULL;
}
- *dma_handle = mapping;
+
+ *dma_handle = mapping | ((u64)ret & (tcesize - 1));
return ret;
}
@@ -903,7 +964,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
unsigned int nio_pages;
size = PAGE_ALIGN(size);
- nio_pages = size >> tbl->it_page_shift;
+ nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift;
iommu_free(tbl, dma_handle, nio_pages);
size = PAGE_ALIGN(size);
free_pages((unsigned long)vaddr, get_order(size));
@@ -1013,15 +1074,15 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
-extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
- struct iommu_table *tbl,
- unsigned long entry, unsigned long *hpa,
- enum dma_data_direction *direction)
+long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+ struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction)
{
long ret;
unsigned long size = 0;
- ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
+ ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL)) &&
!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
@@ -1036,11 +1097,12 @@ void iommu_tce_kill(struct iommu_table *tbl,
unsigned long entry, unsigned long pages)
{
if (tbl->it_ops->tce_kill)
- tbl->it_ops->tce_kill(tbl, entry, pages, false);
+ tbl->it_ops->tce_kill(tbl, entry, pages);
}
EXPORT_SYMBOL_GPL(iommu_tce_kill);
-int iommu_take_ownership(struct iommu_table *tbl)
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+static int iommu_take_ownership(struct iommu_table *tbl)
{
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
int ret = 0;
@@ -1057,16 +1119,11 @@ int iommu_take_ownership(struct iommu_table *tbl)
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
- spin_lock(&tbl->pools[i].lock);
-
- iommu_table_release_pages(tbl);
+ spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
- if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
+ if (iommu_table_in_use(tbl)) {
pr_err("iommu_tce: it_map is not empty");
ret = -EBUSY;
- /* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
- iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
- tbl->it_reserved_end);
} else {
memset(tbl->it_map, 0xff, sz);
}
@@ -1077,15 +1134,14 @@ int iommu_take_ownership(struct iommu_table *tbl)
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_take_ownership);
-void iommu_release_ownership(struct iommu_table *tbl)
+static void iommu_release_ownership(struct iommu_table *tbl)
{
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
- spin_lock(&tbl->pools[i].lock);
+ spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
memset(tbl->it_map, 0, sz);
@@ -1096,7 +1152,7 @@ void iommu_release_ownership(struct iommu_table *tbl)
spin_unlock(&tbl->pools[i].lock);
spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
}
-EXPORT_SYMBOL_GPL(iommu_release_ownership);
+#endif
int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
{
@@ -1117,25 +1173,240 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
pr_debug("%s: Adding %s to iommu group %d\n",
__func__, dev_name(dev), iommu_group_id(table_group->group));
-
- return iommu_group_add_device(table_group->group, dev);
+ /*
+ * This is still not adding devices via the IOMMU bus notifier because
+ * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls
+ * pcibios_scan_phb() first (and this guy adds devices and triggers
+ * the notifier) and only then it calls pci_bus_add_devices() which
+ * configures DMA for buses which also creates PEs and IOMMU groups.
+ */
+ return iommu_probe_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_add_device);
-void iommu_del_device(struct device *dev)
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+/*
+ * A simple iommu_table_group_ops which only allows reusing the existing
+ * iommu_table. This handles VFIO for POWER7 or the nested KVM.
+ * The ops does not allow creating windows and only allows reusing the existing
+ * one if it matches table_group->tce32_start/tce32_size/page_shift.
+ */
+static unsigned long spapr_tce_get_table_size(__u32 page_shift,
+ __u64 window_size, __u32 levels)
+{
+ unsigned long size;
+
+ if (levels > 1)
+ return ~0U;
+ size = window_size >> (page_shift - 3);
+ return size;
+}
+
+static long spapr_tce_create_table(struct iommu_table_group *table_group, int num,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table **ptbl)
{
+ struct iommu_table *tbl = table_group->tables[0];
+
+ if (num > 0)
+ return -EPERM;
+
+ if (tbl->it_page_shift != page_shift ||
+ tbl->it_size != (window_size >> page_shift) ||
+ tbl->it_indirect_levels != levels - 1)
+ return -EINVAL;
+
+ *ptbl = iommu_tce_table_get(tbl);
+ return 0;
+}
+
+static long spapr_tce_set_window(struct iommu_table_group *table_group,
+ int num, struct iommu_table *tbl)
+{
+ return tbl == table_group->tables[num] ? 0 : -EPERM;
+}
+
+static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num)
+{
+ return 0;
+}
+
+static long spapr_tce_take_ownership(struct iommu_table_group *table_group)
+{
+ int i, j, rc = 0;
+
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ struct iommu_table *tbl = table_group->tables[i];
+
+ if (!tbl || !tbl->it_map)
+ continue;
+
+ rc = iommu_take_ownership(tbl);
+ if (!rc)
+ continue;
+
+ for (j = 0; j < i; ++j)
+ iommu_release_ownership(table_group->tables[j]);
+ return rc;
+ }
+ return 0;
+}
+
+static void spapr_tce_release_ownership(struct iommu_table_group *table_group)
+{
+ int i;
+
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ struct iommu_table *tbl = table_group->tables[i];
+
+ if (!tbl)
+ continue;
+
+ iommu_table_clear(tbl);
+ if (tbl->it_map)
+ iommu_release_ownership(tbl);
+ }
+}
+
+struct iommu_table_group_ops spapr_tce_table_group_ops = {
+ .get_table_size = spapr_tce_get_table_size,
+ .create_table = spapr_tce_create_table,
+ .set_window = spapr_tce_set_window,
+ .unset_window = spapr_tce_unset_window,
+ .take_ownership = spapr_tce_take_ownership,
+ .release_ownership = spapr_tce_release_ownership,
+};
+
+/*
+ * A simple iommu_ops to allow less cruft in generic VFIO code.
+ */
+static int
+spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain,
+ struct device *dev)
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iommu_group *grp = iommu_group_get(dev);
+ struct iommu_table_group *table_group;
+ int ret = -EINVAL;
+
+ /* At first attach the ownership is already set */
+ if (!domain)
+ return 0;
+
+ if (!grp)
+ return -ENODEV;
+
+ table_group = iommu_group_get_iommudata(grp);
+ ret = table_group->ops->take_ownership(table_group);
+ iommu_group_put(grp);
+
+ return ret;
+}
+
+static const struct iommu_domain_ops spapr_tce_platform_domain_ops = {
+ .attach_dev = spapr_tce_platform_iommu_attach_dev,
+};
+
+static struct iommu_domain spapr_tce_platform_domain = {
+ .type = IOMMU_DOMAIN_PLATFORM,
+ .ops = &spapr_tce_platform_domain_ops,
+};
+
+static struct iommu_domain spapr_tce_blocked_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
/*
- * Some devices might not have IOMMU table and group
- * and we needn't detach them from the associated
- * IOMMU groups
+ * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain
+ * also sets the dma_api ops
*/
- if (!device_iommu_mapped(dev)) {
- pr_debug("iommu_tce: skipping device %s with no tbl\n",
- dev_name(dev));
- return;
+ .ops = &spapr_tce_platform_domain_ops,
+};
+
+static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
+{
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
+ return true;
+ default:
+ break;
}
- iommu_group_remove_device(dev);
+ return false;
}
-EXPORT_SYMBOL_GPL(iommu_del_device);
+
+static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
+{
+ struct pci_dev *pdev;
+ struct pci_controller *hose;
+
+ if (!dev_is_pci(dev))
+ return ERR_PTR(-EPERM);
+
+ pdev = to_pci_dev(dev);
+ hose = pdev->bus->sysdata;
+
+ return &hose->iommu;
+}
+
+static void spapr_tce_iommu_release_device(struct device *dev)
+{
+}
+
+static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
+{
+ struct pci_controller *hose;
+ struct pci_dev *pdev;
+
+ pdev = to_pci_dev(dev);
+ hose = pdev->bus->sysdata;
+
+ if (!hose->controller_ops.device_group)
+ return ERR_PTR(-ENOENT);
+
+ return hose->controller_ops.device_group(hose, pdev);
+}
+
+static const struct iommu_ops spapr_tce_iommu_ops = {
+ .default_domain = &spapr_tce_platform_domain,
+ .blocked_domain = &spapr_tce_blocked_domain,
+ .capable = spapr_tce_iommu_capable,
+ .probe_device = spapr_tce_iommu_probe_device,
+ .release_device = spapr_tce_iommu_release_device,
+ .device_group = spapr_tce_iommu_device_group,
+};
+
+static struct attribute *spapr_tce_iommu_attrs[] = {
+ NULL,
+};
+
+static struct attribute_group spapr_tce_iommu_group = {
+ .name = "spapr-tce-iommu",
+ .attrs = spapr_tce_iommu_attrs,
+};
+
+static const struct attribute_group *spapr_tce_iommu_groups[] = {
+ &spapr_tce_iommu_group,
+ NULL,
+};
+
+/*
+ * This registers IOMMU devices of PHBs. This needs to happen
+ * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and
+ * before subsys_initcall(iommu_subsys_init).
+ */
+static int __init spapr_tce_setup_phb_iommus_initcall(void)
+{
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ iommu_device_sysfs_add(&hose->iommu, hose->parent,
+ spapr_tce_iommu_groups, "iommu-phb%04x",
+ hose->global_number);
+ iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops,
+ hose->parent);
+ }
+ return 0;
+}
+postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall);
+#endif
+
#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5645bc9cbc09..6f7d4edaa0bc 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -50,26 +50,23 @@
#include <linux/debugfs.h>
#include <linux/of.h>
#include <linux/of_irq.h>
+#include <linux/vmalloc.h>
+#include <linux/pgtable.h>
+#include <linux/static_call.h>
#include <linux/uaccess.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
-#include <asm/pgtable.h>
#include <asm/irq.h>
#include <asm/cache.h>
-#include <asm/prom.h>
#include <asm/ptrace.h>
#include <asm/machdep.h>
#include <asm/udbg.h>
#include <asm/smp.h>
-#include <asm/livepatch.h>
-#include <asm/asm-prototypes.h>
#include <asm/hw_irq.h>
+#include <asm/softirq_stack.h>
+#include <asm/ppc_asm.h>
-#ifdef CONFIG_PPC64
-#include <asm/paca.h>
-#include <asm/firmware.h>
-#include <asm/lv1call.h>
-#endif
#define CREATE_TRACE_POINTS
#include <asm/trace.h>
#include <asm/cpu_has_feature.h>
@@ -86,417 +83,6 @@ u32 tau_interrupts(unsigned long cpu);
#endif
#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-
-int distribute_irqs = 1;
-
-static inline notrace unsigned long get_irq_happened(void)
-{
- unsigned long happened;
-
- __asm__ __volatile__("lbz %0,%1(13)"
- : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened)));
-
- return happened;
-}
-
-static inline notrace int decrementer_check_overflow(void)
-{
- u64 now = get_tb_or_rtc();
- u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
-
- return now >= *next_tb;
-}
-
-/* This is called whenever we are re-enabling interrupts
- * and returns either 0 (nothing to do) or 500/900/280/a00/e80 if
- * there's an EE, DEC or DBELL to generate.
- *
- * This is called in two contexts: From arch_local_irq_restore()
- * before soft-enabling interrupts, and from the exception exit
- * path when returning from an interrupt from a soft-disabled to
- * a soft enabled context. In both case we have interrupts hard
- * disabled.
- *
- * We take care of only clearing the bits we handled in the
- * PACA irq_happened field since we can only re-emit one at a
- * time and we don't want to "lose" one.
- */
-notrace unsigned int __check_irq_replay(void)
-{
- /*
- * We use local_paca rather than get_paca() to avoid all
- * the debug_smp_processor_id() business in this low level
- * function
- */
- unsigned char happened = local_paca->irq_happened;
-
- /*
- * We are responding to the next interrupt, so interrupt-off
- * latencies should be reset here.
- */
- trace_hardirqs_on();
- trace_hardirqs_off();
-
- /*
- * We are always hard disabled here, but PACA_IRQ_HARD_DIS may
- * not be set, which means interrupts have only just been hard
- * disabled as part of the local_irq_restore or interrupt return
- * code. In that case, skip the decrementr check becaus it's
- * expensive to read the TB.
- *
- * HARD_DIS then gets cleared here, but it's reconciled later.
- * Either local_irq_disable will replay the interrupt and that
- * will reconcile state like other hard interrupts. Or interrupt
- * retur will replay the interrupt and in that case it sets
- * PACA_IRQ_HARD_DIS by hand (see comments in entry_64.S).
- */
- if (happened & PACA_IRQ_HARD_DIS) {
- local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
-
- /*
- * We may have missed a decrementer interrupt if hard disabled.
- * Check the decrementer register in case we had a rollover
- * while hard disabled.
- */
- if (!(happened & PACA_IRQ_DEC)) {
- if (decrementer_check_overflow()) {
- local_paca->irq_happened |= PACA_IRQ_DEC;
- happened |= PACA_IRQ_DEC;
- }
- }
- }
-
- /*
- * Force the delivery of pending soft-disabled interrupts on PS3.
- * Any HV call will have this side effect.
- */
- if (firmware_has_feature(FW_FEATURE_PS3_LV1)) {
- u64 tmp, tmp2;
- lv1_get_version_info(&tmp, &tmp2);
- }
-
- /*
- * Check if an hypervisor Maintenance interrupt happened.
- * This is a higher priority interrupt than the others, so
- * replay it first.
- */
- if (happened & PACA_IRQ_HMI) {
- local_paca->irq_happened &= ~PACA_IRQ_HMI;
- return 0xe60;
- }
-
- if (happened & PACA_IRQ_DEC) {
- local_paca->irq_happened &= ~PACA_IRQ_DEC;
- return 0x900;
- }
-
- if (happened & PACA_IRQ_PMI) {
- local_paca->irq_happened &= ~PACA_IRQ_PMI;
- return 0xf00;
- }
-
- if (happened & PACA_IRQ_EE) {
- local_paca->irq_happened &= ~PACA_IRQ_EE;
- return 0x500;
- }
-
-#ifdef CONFIG_PPC_BOOK3E
- /*
- * Check if an EPR external interrupt happened this bit is typically
- * set if we need to handle another "edge" interrupt from within the
- * MPIC "EPR" handler.
- */
- if (happened & PACA_IRQ_EE_EDGE) {
- local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
- return 0x500;
- }
-
- if (happened & PACA_IRQ_DBELL) {
- local_paca->irq_happened &= ~PACA_IRQ_DBELL;
- return 0x280;
- }
-#else
- if (happened & PACA_IRQ_DBELL) {
- local_paca->irq_happened &= ~PACA_IRQ_DBELL;
- return 0xa00;
- }
-#endif /* CONFIG_PPC_BOOK3E */
-
- /* There should be nothing left ! */
- BUG_ON(local_paca->irq_happened != 0);
-
- return 0;
-}
-
-notrace void arch_local_irq_restore(unsigned long mask)
-{
- unsigned char irq_happened;
- unsigned int replay;
-
- /* Write the new soft-enabled value */
- irq_soft_mask_set(mask);
- if (mask)
- return;
-
- /*
- * From this point onward, we can take interrupts, preempt,
- * etc... unless we got hard-disabled. We check if an event
- * happened. If none happened, we know we can just return.
- *
- * We may have preempted before the check below, in which case
- * we are checking the "new" CPU instead of the old one. This
- * is only a problem if an event happened on the "old" CPU.
- *
- * External interrupt events will have caused interrupts to
- * be hard-disabled, so there is no problem, we
- * cannot have preempted.
- */
- irq_happened = get_irq_happened();
- if (!irq_happened) {
-#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- WARN_ON_ONCE(!(mfmsr() & MSR_EE));
-#endif
- return;
- }
-
- /*
- * We need to hard disable to get a trusted value from
- * __check_irq_replay(). We also need to soft-disable
- * again to avoid warnings in there due to the use of
- * per-cpu variables.
- */
- if (!(irq_happened & PACA_IRQ_HARD_DIS)) {
-#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- WARN_ON_ONCE(!(mfmsr() & MSR_EE));
-#endif
- __hard_irq_disable();
-#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- } else {
- /*
- * We should already be hard disabled here. We had bugs
- * where that wasn't the case so let's dbl check it and
- * warn if we are wrong. Only do that when IRQ tracing
- * is enabled as mfmsr() can be costly.
- */
- if (WARN_ON_ONCE(mfmsr() & MSR_EE))
- __hard_irq_disable();
-#endif
- }
-
- irq_soft_mask_set(IRQS_ALL_DISABLED);
- trace_hardirqs_off();
-
- /*
- * Check if anything needs to be re-emitted. We haven't
- * soft-enabled yet to avoid warnings in decrementer_check_overflow
- * accessing per-cpu variables
- */
- replay = __check_irq_replay();
-
- /* We can soft-enable now */
- trace_hardirqs_on();
- irq_soft_mask_set(IRQS_ENABLED);
-
- /*
- * And replay if we have to. This will return with interrupts
- * hard-enabled.
- */
- if (replay) {
- __replay_interrupt(replay);
- return;
- }
-
- /* Finally, let's ensure we are hard enabled */
- __hard_irq_enable();
-}
-EXPORT_SYMBOL(arch_local_irq_restore);
-
-/*
- * This is specifically called by assembly code to re-enable interrupts
- * if they are currently disabled. This is typically called before
- * schedule() or do_signal() when returning to userspace. We do it
- * in C to avoid the burden of dealing with lockdep etc...
- *
- * NOTE: This is called with interrupts hard disabled but not marked
- * as such in paca->irq_happened, so we need to resync this.
- */
-void notrace restore_interrupts(void)
-{
- if (irqs_disabled()) {
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
- local_irq_enable();
- } else
- __hard_irq_enable();
-}
-
-/*
- * This is a helper to use when about to go into idle low-power
- * when the latter has the side effect of re-enabling interrupts
- * (such as calling H_CEDE under pHyp).
- *
- * You call this function with interrupts soft-disabled (this is
- * already the case when ppc_md.power_save is called). The function
- * will return whether to enter power save or just return.
- *
- * In the former case, it will have notified lockdep of interrupts
- * being re-enabled and generally sanitized the lazy irq state,
- * and in the latter case it will leave with interrupts hard
- * disabled and marked as such, so the local_irq_enable() call
- * in arch_cpu_idle() will properly re-enable everything.
- */
-bool prep_irq_for_idle(void)
-{
- /*
- * First we need to hard disable to ensure no interrupt
- * occurs before we effectively enter the low power state
- */
- __hard_irq_disable();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-
- /*
- * If anything happened while we were soft-disabled,
- * we return now and do not enter the low power state.
- */
- if (lazy_irq_pending())
- return false;
-
- /* Tell lockdep we are about to re-enable */
- trace_hardirqs_on();
-
- /*
- * Mark interrupts as soft-enabled and clear the
- * PACA_IRQ_HARD_DIS from the pending mask since we
- * are about to hard enable as well as a side effect
- * of entering the low power state.
- */
- local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
- irq_soft_mask_set(IRQS_ENABLED);
-
- /* Tell the caller to enter the low power state */
- return true;
-}
-
-#ifdef CONFIG_PPC_BOOK3S
-/*
- * This is for idle sequences that return with IRQs off, but the
- * idle state itself wakes on interrupt. Tell the irq tracer that
- * IRQs are enabled for the duration of idle so it does not get long
- * off times. Must be paired with fini_irq_for_idle_irqsoff.
- */
-bool prep_irq_for_idle_irqsoff(void)
-{
- WARN_ON(!irqs_disabled());
-
- /*
- * First we need to hard disable to ensure no interrupt
- * occurs before we effectively enter the low power state
- */
- __hard_irq_disable();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-
- /*
- * If anything happened while we were soft-disabled,
- * we return now and do not enter the low power state.
- */
- if (lazy_irq_pending())
- return false;
-
- /* Tell lockdep we are about to re-enable */
- trace_hardirqs_on();
-
- return true;
-}
-
-/*
- * Take the SRR1 wakeup reason, index into this table to find the
- * appropriate irq_happened bit.
- *
- * Sytem reset exceptions taken in idle state also come through here,
- * but they are NMI interrupts so do not need to wait for IRQs to be
- * restored, and should be taken as early as practical. These are marked
- * with 0xff in the table. The Power ISA specifies 0100b as the system
- * reset interrupt reason.
- */
-#define IRQ_SYSTEM_RESET 0xff
-
-static const u8 srr1_to_lazyirq[0x10] = {
- 0, 0, 0,
- PACA_IRQ_DBELL,
- IRQ_SYSTEM_RESET,
- PACA_IRQ_DBELL,
- PACA_IRQ_DEC,
- 0,
- PACA_IRQ_EE,
- PACA_IRQ_EE,
- PACA_IRQ_HMI,
- 0, 0, 0, 0, 0 };
-
-void replay_system_reset(void)
-{
- struct pt_regs regs;
-
- ppc_save_regs(&regs);
- regs.trap = 0x100;
- get_paca()->in_nmi = 1;
- system_reset_exception(&regs);
- get_paca()->in_nmi = 0;
-}
-EXPORT_SYMBOL_GPL(replay_system_reset);
-
-void irq_set_pending_from_srr1(unsigned long srr1)
-{
- unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18;
- u8 reason = srr1_to_lazyirq[idx];
-
- /*
- * Take the system reset now, which is immediately after registers
- * are restored from idle. It's an NMI, so interrupts need not be
- * re-enabled before it is taken.
- */
- if (unlikely(reason == IRQ_SYSTEM_RESET)) {
- replay_system_reset();
- return;
- }
-
- /*
- * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
- * so this can be called unconditionally with the SRR1 wake
- * reason as returned by the idle code, which uses 0 to mean no
- * interrupt.
- *
- * If a future CPU was to designate this as an interrupt reason,
- * then a new index for no interrupt must be assigned.
- */
- local_paca->irq_happened |= reason;
-}
-#endif /* CONFIG_PPC_BOOK3S */
-
-/*
- * Force a replay of the external interrupt handler on this CPU.
- */
-void force_external_irq_replay(void)
-{
- /*
- * This must only be called with interrupts soft-disabled,
- * the replay will happen when re-enabling.
- */
- WARN_ON(!arch_irqs_disabled());
-
- /*
- * Interrupts must always be hard disabled before irq_happened is
- * modified (to prevent lost update in case of interrupt between
- * load and store).
- */
- __hard_irq_disable();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-
- /* Indicate in the PACA that we have an interrupt to replay */
- local_paca->irq_happened |= PACA_IRQ_EE;
-}
-
-#endif /* CONFIG_PPC64 */
-
int arch_show_interrupts(struct seq_file *p, int prec)
{
int j;
@@ -540,13 +126,14 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions);
seq_printf(p, " Machine check exceptions\n");
+#ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_HVMODE)) {
seq_printf(p, "%*s: ", prec, "HMI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- per_cpu(irq_stat, j).hmi_exceptions);
+ seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs);
seq_printf(p, " Hypervisor Maintenance Interrupts\n");
}
+#endif
seq_printf(p, "%*s: ", prec, "NMI");
for_each_online_cpu(j)
@@ -584,7 +171,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += per_cpu(irq_stat, cpu).mce_exceptions;
sum += per_cpu(irq_stat, cpu).spurious_irqs;
sum += per_cpu(irq_stat, cpu).timer_irqs_others;
- sum += per_cpu(irq_stat, cpu).hmi_exceptions;
+#ifdef CONFIG_PPC_BOOK3S_64
+ sum += paca_ptrs[cpu]->hmi_irqs;
+#endif
sum += per_cpu(irq_stat, cpu).sreset_irqs;
#ifdef CONFIG_PPC_WATCHDOG
sum += per_cpu(irq_stat, cpu).soft_nmi_irqs;
@@ -596,40 +185,65 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
return sum;
}
-static inline void check_stack_overflow(void)
+static inline void check_stack_overflow(unsigned long sp)
{
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
- long sp;
+ if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW))
+ return;
- sp = current_stack_pointer() & (THREAD_SIZE-1);
+ sp &= THREAD_SIZE - 1;
- /* check for stack overflow: is there less than 2KB free? */
- if (unlikely(sp < 2048)) {
+ /* check for stack overflow: is there less than 1/4th free? */
+ if (unlikely(sp < THREAD_SIZE / 4)) {
pr_err("do_IRQ: stack overflow: %ld\n", sp);
dump_stack();
}
+}
+
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
+static __always_inline void call_do_softirq(const void *sp)
+{
+ /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
+ asm volatile (
+ PPC_STLU " %%r1, %[offset](%[sp]) ;"
+ "mr %%r1, %[sp] ;"
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ "bl %[callee]@notoc ;"
+#else
+ "bl %[callee] ;"
#endif
+ PPC_LL " %%r1, 0(%%r1) ;"
+ : // Outputs
+ : // Inputs
+ [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE),
+ [callee] "i" (__do_softirq)
+ : // Clobbers
+ "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
+ "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+ "r11", "r12"
+ );
}
+#endif
-void __do_irq(struct pt_regs *regs)
+DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq);
+
+static void __do_irq(struct pt_regs *regs, unsigned long oldsp)
{
unsigned int irq;
- irq_enter();
-
trace_irq_entry(regs);
- check_stack_overflow();
+ check_stack_overflow(oldsp);
/*
* Query the platform PIC for the interrupt & ack it.
*
* This will typically lower the interrupt line to the CPU
*/
- irq = ppc_md.get_irq();
+ irq = static_call(ppc_get_irq)();
/* We can hard enable interrupts now to allow perf interrupts */
- may_hard_irq_enable();
+ if (should_hard_irq_enable(regs))
+ do_hard_irq_enable();
/* And finally process it */
if (unlikely(!irq))
@@ -638,39 +252,89 @@ void __do_irq(struct pt_regs *regs)
generic_handle_irq(irq);
trace_irq_exit(regs);
+}
- irq_exit();
+static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
+{
+ register unsigned long r3 asm("r3") = (unsigned long)regs;
+
+ /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */
+ asm volatile (
+ PPC_STLU " %%r1, %[offset](%[sp]) ;"
+ "mr %%r4, %%r1 ;"
+ "mr %%r1, %[sp] ;"
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ "bl %[callee]@notoc ;"
+#else
+ "bl %[callee] ;"
+#endif
+ PPC_LL " %%r1, 0(%%r1) ;"
+ : // Outputs
+ "+r" (r3)
+ : // Inputs
+ [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE),
+ [callee] "i" (__do_irq)
+ : // Clobbers
+ "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
+ "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+ "r11", "r12"
+ );
}
-void do_IRQ(struct pt_regs *regs)
+void __do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
void *cursp, *irqsp, *sirqsp;
/* Switch to the irq stack to handle this */
- cursp = (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+ cursp = (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
irqsp = hardirq_ctx[raw_smp_processor_id()];
sirqsp = softirq_ctx[raw_smp_processor_id()];
- /* Already there ? */
- if (unlikely(cursp == irqsp || cursp == sirqsp)) {
- __do_irq(regs);
- set_irq_regs(old_regs);
- return;
- }
- /* Switch stack and call */
- call_do_irq(regs, irqsp);
+ /* Already there ? If not switch stack and call */
+ if (unlikely(cursp == irqsp || cursp == sirqsp))
+ __do_irq(regs, current_stack_pointer);
+ else
+ call_do_irq(regs, irqsp);
set_irq_regs(old_regs);
}
+DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
+{
+ __do_IRQ(regs);
+}
+
+static void *__init alloc_vm_stack(void)
+{
+ return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP,
+ NUMA_NO_NODE, (void *)_RET_IP_);
+}
+
+static void __init vmap_irqstack_init(void)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ softirq_ctx[i] = alloc_vm_stack();
+ hardirq_ctx[i] = alloc_vm_stack();
+ }
+}
+
+
void __init init_IRQ(void)
{
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ vmap_irqstack_init();
+
if (ppc_md.init_IRQ)
ppc_md.init_IRQ();
+
+ if (!WARN_ON(!ppc_md.get_irq))
+ static_call_update(ppc_get_irq, ppc_md.get_irq);
}
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
void *critirq_ctx[NR_CPUS] __read_mostly;
void *dbgirq_ctx[NR_CPUS] __read_mostly;
void *mcheckirq_ctx[NR_CPUS] __read_mostly;
@@ -679,10 +343,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_mostly;
void *softirq_ctx[NR_CPUS] __read_mostly;
void *hardirq_ctx[NR_CPUS] __read_mostly;
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
void do_softirq_own_stack(void)
{
call_do_softirq(softirq_ctx[smp_processor_id()]);
}
+#endif
irq_hw_number_t virq_to_hw(unsigned int virq)
{
@@ -726,13 +392,3 @@ int irq_choose_cpu(const struct cpumask *mask)
return hard_smp_processor_id();
}
#endif
-
-#ifdef CONFIG_PPC64
-static int __init setup_noirqdistrib(char *str)
-{
- distribute_irqs = 0;
- return 1;
-}
-
-__setup("noirqdistrib", setup_noirqdistrib);
-#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
new file mode 100644
index 000000000000..938e66829eae
--- /dev/null
+++ b/arch/powerpc/kernel/irq_64.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Derived from arch/i386/kernel/irq.c
+ * Copyright (C) 1992 Linus Torvalds
+ * Adapted from arch/i386 by Gary Thomas
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Updated and modified by Cort Dougan <cort@fsmlabs.com>
+ * Copyright (C) 1996-2001 Cort Dougan
+ * Adapted for Power Macintosh by Paul Mackerras
+ * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/threads.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/vmalloc.h>
+#include <linux/pgtable.h>
+#include <linux/static_call.h>
+
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/cache.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
+#include <asm/hw_irq.h>
+#include <asm/softirq_stack.h>
+#include <asm/ppc_asm.h>
+
+#include <asm/paca.h>
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
+#include <asm/dbell.h>
+#include <asm/trace.h>
+#include <asm/cpu_has_feature.h>
+
+int distribute_irqs = 1;
+
+static inline void next_interrupt(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS));
+ WARN_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+ }
+
+ /*
+ * We are responding to the next interrupt, so interrupt-off
+ * latencies should be reset here.
+ */
+ lockdep_hardirq_exit();
+ trace_hardirqs_on();
+ trace_hardirqs_off();
+ lockdep_hardirq_enter();
+}
+
+static inline bool irq_happened_test_and_clear(u8 irq)
+{
+ if (local_paca->irq_happened & irq) {
+ local_paca->irq_happened &= ~irq;
+ return true;
+ }
+ return false;
+}
+
+static __no_kcsan void __replay_soft_interrupts(void)
+{
+ struct pt_regs regs;
+
+ /*
+ * We use local_paca rather than get_paca() to avoid all the
+ * debug_smp_processor_id() business in this low level function.
+ */
+
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+ WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS));
+ WARN_ON(local_paca->irq_happened & PACA_IRQ_REPLAYING);
+ }
+
+ /*
+ * PACA_IRQ_REPLAYING prevents interrupt handlers from enabling
+ * MSR[EE] to get PMIs, which can result in more IRQs becoming
+ * pending.
+ */
+ local_paca->irq_happened |= PACA_IRQ_REPLAYING;
+
+ ppc_save_regs(&regs);
+ regs.softe = IRQS_ENABLED;
+ regs.msr |= MSR_EE;
+
+ /*
+ * Force the delivery of pending soft-disabled interrupts on PS3.
+ * Any HV call will have this side effect.
+ */
+ if (firmware_has_feature(FW_FEATURE_PS3_LV1)) {
+ u64 tmp, tmp2;
+ lv1_get_version_info(&tmp, &tmp2);
+ }
+
+ /*
+ * Check if an hypervisor Maintenance interrupt happened.
+ * This is a higher priority interrupt than the others, so
+ * replay it first.
+ */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) &&
+ irq_happened_test_and_clear(PACA_IRQ_HMI)) {
+ regs.trap = INTERRUPT_HMI;
+ handle_hmi_exception(&regs);
+ next_interrupt(&regs);
+ }
+
+ if (irq_happened_test_and_clear(PACA_IRQ_DEC)) {
+ regs.trap = INTERRUPT_DECREMENTER;
+ timer_interrupt(&regs);
+ next_interrupt(&regs);
+ }
+
+ if (irq_happened_test_and_clear(PACA_IRQ_EE)) {
+ regs.trap = INTERRUPT_EXTERNAL;
+ do_IRQ(&regs);
+ next_interrupt(&regs);
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_DOORBELL) &&
+ irq_happened_test_and_clear(PACA_IRQ_DBELL)) {
+ regs.trap = INTERRUPT_DOORBELL;
+ doorbell_exception(&regs);
+ next_interrupt(&regs);
+ }
+
+ /* Book3E does not support soft-masking PMI interrupts */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) &&
+ irq_happened_test_and_clear(PACA_IRQ_PMI)) {
+ regs.trap = INTERRUPT_PERFMON;
+ performance_monitor_exception(&regs);
+ next_interrupt(&regs);
+ }
+
+ local_paca->irq_happened &= ~PACA_IRQ_REPLAYING;
+}
+
+__no_kcsan void replay_soft_interrupts(void)
+{
+ irq_enter(); /* See comment in arch_local_irq_restore */
+ __replay_soft_interrupts();
+ irq_exit();
+}
+
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP)
+static inline __no_kcsan void replay_soft_interrupts_irqrestore(void)
+{
+ unsigned long kuap_state = get_kuap();
+
+ /*
+ * Check if anything calls local_irq_enable/restore() when KUAP is
+ * disabled (user access enabled). We handle that case here by saving
+ * and re-locking AMR but we shouldn't get here in the first place,
+ * hence the warning.
+ */
+ kuap_assert_locked();
+
+ if (kuap_state != AMR_KUAP_BLOCKED)
+ set_kuap(AMR_KUAP_BLOCKED);
+
+ __replay_soft_interrupts();
+
+ if (kuap_state != AMR_KUAP_BLOCKED)
+ set_kuap(kuap_state);
+}
+#else
+#define replay_soft_interrupts_irqrestore() __replay_soft_interrupts()
+#endif
+
+notrace __no_kcsan void arch_local_irq_restore(unsigned long mask)
+{
+ unsigned char irq_happened;
+
+ /* Write the new soft-enabled value if it is a disable */
+ if (mask) {
+ irq_soft_mask_set(mask);
+ return;
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ WARN_ON_ONCE(in_nmi());
+ WARN_ON_ONCE(in_hardirq());
+ WARN_ON_ONCE(local_paca->irq_happened & PACA_IRQ_REPLAYING);
+ }
+
+again:
+ /*
+ * After the stb, interrupts are unmasked and there are no interrupts
+ * pending replay. The restart sequence makes this atomic with
+ * respect to soft-masked interrupts. If this was just a simple code
+ * sequence, a soft-masked interrupt could become pending right after
+ * the comparison and before the stb.
+ *
+ * This allows interrupts to be unmasked without hard disabling, and
+ * also without new hard interrupts coming in ahead of pending ones.
+ */
+ asm_volatile_goto(
+"1: \n"
+" lbz 9,%0(13) \n"
+" cmpwi 9,0 \n"
+" bne %l[happened] \n"
+" stb 9,%1(13) \n"
+"2: \n"
+ RESTART_TABLE(1b, 2b, 1b)
+ : : "i" (offsetof(struct paca_struct, irq_happened)),
+ "i" (offsetof(struct paca_struct, irq_soft_mask))
+ : "cr0", "r9"
+ : happened);
+
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(!(mfmsr() & MSR_EE));
+
+ /*
+ * If we came here from the replay below, we might have a preempt
+ * pending (due to preempt_enable_no_resched()). Have to check now.
+ */
+ preempt_check_resched();
+
+ return;
+
+happened:
+ irq_happened = READ_ONCE(local_paca->irq_happened);
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(!irq_happened);
+
+ if (irq_happened == PACA_IRQ_HARD_DIS) {
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+ irq_soft_mask_set(IRQS_ENABLED);
+ local_paca->irq_happened = 0;
+ __hard_irq_enable();
+ preempt_check_resched();
+ return;
+ }
+
+ /* Have interrupts to replay, need to hard disable first */
+ if (!(irq_happened & PACA_IRQ_HARD_DIS)) {
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ if (!(mfmsr() & MSR_EE)) {
+ /*
+ * An interrupt could have come in and cleared
+ * MSR[EE] and set IRQ_HARD_DIS, so check
+ * IRQ_HARD_DIS again and warn if it is still
+ * clear.
+ */
+ irq_happened = READ_ONCE(local_paca->irq_happened);
+ WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS));
+ }
+ }
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ } else {
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ if (WARN_ON_ONCE(mfmsr() & MSR_EE))
+ __hard_irq_disable();
+ }
+ }
+
+ /*
+ * Disable preempt here, so that the below preempt_enable will
+ * perform resched if required (a replayed interrupt may set
+ * need_resched).
+ */
+ preempt_disable();
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
+ trace_hardirqs_off();
+
+ /*
+ * Now enter interrupt context. The interrupt handlers themselves
+ * also call irq_enter/exit (which is okay, they can nest). But call
+ * it here now to hold off softirqs until the below irq_exit(). If
+ * we allowed replayed handlers to run softirqs, that enables irqs,
+ * which must replay interrupts, which recurses in here and makes
+ * things more complicated. The recursion is limited to 2, and it can
+ * be made to work, but it's complicated.
+ *
+ * local_bh_disable can not be used here because interrupts taken in
+ * idle are not in the right context (RCU, tick, etc) to run softirqs
+ * so irq_enter must be called.
+ */
+ irq_enter();
+
+ replay_soft_interrupts_irqrestore();
+
+ irq_exit();
+
+ if (unlikely(local_paca->irq_happened != PACA_IRQ_HARD_DIS)) {
+ /*
+ * The softirq processing in irq_exit() may enable interrupts
+ * temporarily, which can result in MSR[EE] being enabled and
+ * more irqs becoming pending. Go around again if that happens.
+ */
+ trace_hardirqs_on();
+ preempt_enable_no_resched();
+ goto again;
+ }
+
+ trace_hardirqs_on();
+ irq_soft_mask_set(IRQS_ENABLED);
+ local_paca->irq_happened = 0;
+ __hard_irq_enable();
+ preempt_enable();
+}
+EXPORT_SYMBOL(arch_local_irq_restore);
+
+/*
+ * This is a helper to use when about to go into idle low-power
+ * when the latter has the side effect of re-enabling interrupts
+ * (such as calling H_CEDE under pHyp).
+ *
+ * You call this function with interrupts soft-disabled (this is
+ * already the case when ppc_md.power_save is called). The function
+ * will return whether to enter power save or just return.
+ *
+ * In the former case, it will have generally sanitized the lazy irq
+ * state, and in the latter case it will leave with interrupts hard
+ * disabled and marked as such, so the local_irq_enable() call
+ * in arch_cpu_idle() will properly re-enable everything.
+ */
+__cpuidle bool prep_irq_for_idle(void)
+{
+ /*
+ * First we need to hard disable to ensure no interrupt
+ * occurs before we effectively enter the low power state
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ /*
+ * If anything happened while we were soft-disabled,
+ * we return now and do not enter the low power state.
+ */
+ if (lazy_irq_pending())
+ return false;
+
+ /*
+ * Mark interrupts as soft-enabled and clear the
+ * PACA_IRQ_HARD_DIS from the pending mask since we
+ * are about to hard enable as well as a side effect
+ * of entering the low power state.
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ irq_soft_mask_set(IRQS_ENABLED);
+
+ /* Tell the caller to enter the low power state */
+ return true;
+}
+
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * This is for idle sequences that return with IRQs off, but the
+ * idle state itself wakes on interrupt. Tell the irq tracer that
+ * IRQs are enabled for the duration of idle so it does not get long
+ * off times. Must be paired with fini_irq_for_idle_irqsoff.
+ */
+bool prep_irq_for_idle_irqsoff(void)
+{
+ WARN_ON(!irqs_disabled());
+
+ /*
+ * First we need to hard disable to ensure no interrupt
+ * occurs before we effectively enter the low power state
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ /*
+ * If anything happened while we were soft-disabled,
+ * we return now and do not enter the low power state.
+ */
+ if (lazy_irq_pending())
+ return false;
+
+ /* Tell lockdep we are about to re-enable */
+ trace_hardirqs_on();
+
+ return true;
+}
+
+/*
+ * Take the SRR1 wakeup reason, index into this table to find the
+ * appropriate irq_happened bit.
+ *
+ * Sytem reset exceptions taken in idle state also come through here,
+ * but they are NMI interrupts so do not need to wait for IRQs to be
+ * restored, and should be taken as early as practical. These are marked
+ * with 0xff in the table. The Power ISA specifies 0100b as the system
+ * reset interrupt reason.
+ */
+#define IRQ_SYSTEM_RESET 0xff
+
+static const u8 srr1_to_lazyirq[0x10] = {
+ 0, 0, 0,
+ PACA_IRQ_DBELL,
+ IRQ_SYSTEM_RESET,
+ PACA_IRQ_DBELL,
+ PACA_IRQ_DEC,
+ 0,
+ PACA_IRQ_EE,
+ PACA_IRQ_EE,
+ PACA_IRQ_HMI,
+ 0, 0, 0, 0, 0 };
+
+void replay_system_reset(void)
+{
+ struct pt_regs regs;
+
+ ppc_save_regs(&regs);
+ regs.trap = 0x100;
+ get_paca()->in_nmi = 1;
+ system_reset_exception(&regs);
+ get_paca()->in_nmi = 0;
+}
+EXPORT_SYMBOL_GPL(replay_system_reset);
+
+void irq_set_pending_from_srr1(unsigned long srr1)
+{
+ unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18;
+ u8 reason = srr1_to_lazyirq[idx];
+
+ /*
+ * Take the system reset now, which is immediately after registers
+ * are restored from idle. It's an NMI, so interrupts need not be
+ * re-enabled before it is taken.
+ */
+ if (unlikely(reason == IRQ_SYSTEM_RESET)) {
+ replay_system_reset();
+ return;
+ }
+
+ if (reason == PACA_IRQ_DBELL) {
+ /*
+ * When doorbell triggers a system reset wakeup, the message
+ * is not cleared, so if the doorbell interrupt is replayed
+ * and the IPI handled, the doorbell interrupt would still
+ * fire when EE is enabled.
+ *
+ * To avoid taking the superfluous doorbell interrupt,
+ * execute a msgclr here before the interrupt is replayed.
+ */
+ ppc_msgclr(PPC_DBELL_MSGTYPE);
+ }
+
+ /*
+ * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
+ * so this can be called unconditionally with the SRR1 wake
+ * reason as returned by the idle code, which uses 0 to mean no
+ * interrupt.
+ *
+ * If a future CPU was to designate this as an interrupt reason,
+ * then a new index for no interrupt must be assigned.
+ */
+ local_paca->irq_happened |= reason;
+}
+#endif /* CONFIG_PPC_BOOK3S */
+
+/*
+ * Force a replay of the external interrupt handler on this CPU.
+ */
+void force_external_irq_replay(void)
+{
+ /*
+ * This must only be called with interrupts soft-disabled,
+ * the replay will happen when re-enabling.
+ */
+ WARN_ON(!arch_irqs_disabled());
+
+ /*
+ * Interrupts must always be hard disabled before irq_happened is
+ * modified (to prevent lost update in case of interrupt between
+ * load and store).
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ /* Indicate in the PACA that we have an interrupt to replay */
+ local_paca->irq_happened |= PACA_IRQ_EE;
+}
+
+static int __init setup_noirqdistrib(char *str)
+{
+ distribute_irqs = 0;
+ return 1;
+}
+
+__setup("noirqdistrib", setup_noirqdistrib);
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 773671b512df..48e0eaf1ad61 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -18,10 +18,11 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/notifier.h>
+#include <linux/of_address.h>
+#include <linux/vmalloc.h>
#include <asm/processor.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
@@ -38,82 +39,66 @@ EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
#define ISA_SPACE_MASK 0x1
#define ISA_SPACE_IO 0x1
-static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
- unsigned long phb_io_base_phys)
+static void remap_isa_base(phys_addr_t pa, unsigned long size)
+{
+ WARN_ON_ONCE(ISA_IO_BASE & ~PAGE_MASK);
+ WARN_ON_ONCE(pa & ~PAGE_MASK);
+ WARN_ON_ONCE(size & ~PAGE_MASK);
+
+ if (slab_is_available()) {
+ if (ioremap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa,
+ pgprot_noncached(PAGE_KERNEL)))
+ vunmap_range(ISA_IO_BASE, ISA_IO_BASE + size);
+ } else {
+ early_ioremap_range(ISA_IO_BASE, pa, size,
+ pgprot_noncached(PAGE_KERNEL));
+ }
+}
+
+static int process_ISA_OF_ranges(struct device_node *isa_node,
+ unsigned long phb_io_base_phys)
{
- /* We should get some saner parsing here and remove these structs */
- struct pci_address {
- u32 a_hi;
- u32 a_mid;
- u32 a_lo;
- };
-
- struct isa_address {
- u32 a_hi;
- u32 a_lo;
- };
-
- struct isa_range {
- struct isa_address isa_addr;
- struct pci_address pci_addr;
- unsigned int size;
- };
-
- const struct isa_range *range;
- unsigned long pci_addr;
- unsigned int isa_addr;
unsigned int size;
- int rlen = 0;
+ struct of_range_parser parser;
+ struct of_range range;
- range = of_get_property(isa_node, "ranges", &rlen);
- if (range == NULL || (rlen < sizeof(struct isa_range)))
+ if (of_range_parser_init(&parser, isa_node))
goto inval_range;
- /* From "ISA Binding to 1275"
- * The ranges property is laid out as an array of elements,
- * each of which comprises:
- * cells 0 - 1: an ISA address
- * cells 2 - 4: a PCI address
- * (size depending on dev->n_addr_cells)
- * cell 5: the size of the range
- */
- if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) {
- range++;
- rlen -= sizeof(struct isa_range);
- if (rlen < sizeof(struct isa_range))
- goto inval_range;
- }
- if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO)
- goto inval_range;
+ for_each_of_range(&parser, &range) {
+ if ((range.flags & ISA_SPACE_MASK) != ISA_SPACE_IO)
+ continue;
- isa_addr = range->isa_addr.a_lo;
- pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
- range->pci_addr.a_lo;
+ if (range.cpu_addr == OF_BAD_ADDR) {
+ pr_err("ISA: Bad CPU mapping: %s\n", __func__);
+ return -EINVAL;
+ }
- /* Assume these are both zero. Note: We could fix that and
- * do a proper parsing instead ... oh well, that will do for
- * now as nobody uses fancy mappings for ISA bridges
- */
- if ((pci_addr != 0) || (isa_addr != 0)) {
- printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
- __func__);
- return;
- }
+ /* We need page alignment */
+ if ((range.bus_addr & ~PAGE_MASK) || (range.cpu_addr & ~PAGE_MASK)) {
+ pr_warn("ISA: bridge %pOF has non aligned IO range\n", isa_node);
+ return -EINVAL;
+ }
- /* Align size and make sure it's cropped to 64K */
- size = PAGE_ALIGN(range->size);
- if (size > 0x10000)
- size = 0x10000;
+ /* Align size and make sure it's cropped to 64K */
+ size = PAGE_ALIGN(range.size);
+ if (size > 0x10000)
+ size = 0x10000;
- __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- size, pgprot_noncached(PAGE_KERNEL));
- return;
+ if (!phb_io_base_phys)
+ phb_io_base_phys = range.cpu_addr;
+
+ remap_isa_base(phb_io_base_phys, size);
+ return 0;
+ }
inval_range:
- printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
- "mapping 64k\n");
- __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- 0x10000, pgprot_noncached(PAGE_KERNEL));
+ if (phb_io_base_phys) {
+ pr_err("no ISA IO ranges or unexpected isa range, mapping 64k\n");
+ remap_isa_base(phb_io_base_phys, 0x10000);
+ return 0;
+ }
+ return -EINVAL;
}
@@ -155,7 +140,7 @@ void __init isa_bridge_find_early(struct pci_controller *hose)
isa_bridge_devnode = np;
/* Now parse the "ranges" property and setup the ISA mapping */
- pci_process_ISA_OF_ranges(np, hose->io_base_phys);
+ process_ISA_OF_ranges(np, hose->io_base_phys);
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
@@ -171,75 +156,15 @@ void __init isa_bridge_find_early(struct pci_controller *hose)
*/
void __init isa_bridge_init_non_pci(struct device_node *np)
{
- const __be32 *ranges, *pbasep = NULL;
- int rlen, i, rs;
- u32 na, ns, pna;
- u64 cbase, pbase, size = 0;
+ int ret;
/* If we already have an ISA bridge, bail off */
if (isa_bridge_devnode != NULL)
return;
- pna = of_n_addr_cells(np);
- if (of_property_read_u32(np, "#address-cells", &na) ||
- of_property_read_u32(np, "#size-cells", &ns)) {
- pr_warn("ISA: Non-PCI bridge %pOF is missing address format\n",
- np);
- return;
- }
-
- /* Check it's a supported address format */
- if (na != 2 || ns != 1) {
- pr_warn("ISA: Non-PCI bridge %pOF has unsupported address format\n",
- np);
- return;
- }
- rs = na + ns + pna;
-
- /* Grab the ranges property */
- ranges = of_get_property(np, "ranges", &rlen);
- if (ranges == NULL || rlen < rs) {
- pr_warn("ISA: Non-PCI bridge %pOF has absent or invalid ranges\n",
- np);
+ ret = process_ISA_OF_ranges(np, 0);
+ if (ret)
return;
- }
-
- /* Parse it. We are only looking for IO space */
- for (i = 0; (i + rs - 1) < rlen; i += rs) {
- if (be32_to_cpup(ranges + i) != 1)
- continue;
- cbase = be32_to_cpup(ranges + i + 1);
- size = of_read_number(ranges + i + na + pna, ns);
- pbasep = ranges + i + na;
- break;
- }
-
- /* Got something ? */
- if (!size || !pbasep) {
- pr_warn("ISA: Non-PCI bridge %pOF has no usable IO range\n",
- np);
- return;
- }
-
- /* Align size and make sure it's cropped to 64K */
- size = PAGE_ALIGN(size);
- if (size > 0x10000)
- size = 0x10000;
-
- /* Map pbase */
- pbase = of_translate_address(np, pbasep);
- if (pbase == OF_BAD_ADDR) {
- pr_warn("ISA: Non-PCI bridge %pOF failed to translate IO base\n",
- np);
- return;
- }
-
- /* We need page alignment */
- if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) {
- pr_warn("ISA: Non-PCI bridge %pOF has non aligned IO range\n",
- np);
- return;
- }
/* Got it */
isa_bridge_devnode = np;
@@ -248,8 +173,6 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
* and map it
*/
isa_io_base = ISA_IO_BASE;
- __ioremap_at(pbase, (void *)ISA_IO_BASE,
- size, pgprot_noncached(PAGE_KERNEL));
pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
}
@@ -268,7 +191,7 @@ static void isa_bridge_find_late(struct pci_dev *pdev,
isa_bridge_pcidev = pdev;
/* Now parse the "ranges" property and setup the ISA mapping */
- pci_process_ISA_OF_ranges(devnode, hose->io_base_phys);
+ process_ISA_OF_ranges(devnode, hose->io_base_phys);
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
@@ -297,7 +220,7 @@ static void isa_bridge_remove(void)
isa_bridge_pcidev = NULL;
/* Unmap the ISA area */
- __iounmap_at((void *)ISA_IO_BASE, 0x10000);
+ vunmap_range(ISA_IO_BASE, ISA_IO_BASE + 0x10000);
}
/**
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index ca37702bde97..5277cf582c16 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -6,14 +6,15 @@
#include <linux/kernel.h>
#include <linux/jump_label.h>
#include <asm/code-patching.h>
+#include <asm/inst.h>
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- u32 *addr = (u32 *)(unsigned long)entry->code;
+ u32 *addr = (u32 *)jump_entry_code(entry);
if (type == JUMP_LABEL_JMP)
- patch_branch(addr, entry->target, 0);
+ patch_branch(addr, jump_entry_target(entry), 0);
else
- patch_instruction(addr, PPC_INST_NOP);
+ patch_instruction(addr, ppc_inst(PPC_RAW_NOP()));
}
diff --git a/arch/powerpc/kernel/kdebugfs.c b/arch/powerpc/kernel/kdebugfs.c
new file mode 100644
index 000000000000..36d3124d5a8b
--- /dev/null
+++ b/arch/powerpc/kernel/kdebugfs.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
+static int __init arch_kdebugfs_init(void)
+{
+ arch_debugfs_dir = debugfs_create_dir("powerpc", NULL);
+ return 0;
+}
+arch_initcall(arch_kdebugfs_init);
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 7dd55eb1259d..ebe4d1645ca1 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PowerPC backend to the KGDB stub.
*
@@ -8,10 +9,6 @@
* PPC32 support restored by Vitaly Wool <vwool@ru.mvista.com> and
* Sergei Shtylyov <sshtylyov@ru.mvista.com>
* Copyright (C) 2007-2008 Wind River Systems, Inc.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program as licensed "as is" without any warranty of any
- * kind, whether express or implied.
*/
#include <linux/kernel.h>
@@ -26,6 +23,7 @@
#include <asm/debug.h>
#include <asm/code-patching.h>
#include <linux/slab.h>
+#include <asm/inst.h>
/*
* This table contains the mapping between PowerPC hardware trap types, and
@@ -47,9 +45,9 @@ static struct hard_trap_info
{ 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */
{ 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */
{ 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+#ifdef CONFIG_BOOKE_OR_40x
{ 0x2002, 0x05 /* SIGTRAP */ }, /* debug */
-#if defined(CONFIG_FSL_BOOKE)
+#if defined(CONFIG_PPC_85xx)
{ 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */
{ 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */
{ 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */
@@ -59,14 +57,14 @@ static struct hard_trap_info
{ 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */
{ 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */
{ 0x3200, 0x02 /* SIGINT */ }, /* watchdog */
-#else /* ! CONFIG_FSL_BOOKE */
+#else /* ! CONFIG_PPC_85xx */
{ 0x1000, 0x0e /* SIGALRM */ }, /* prog interval timer */
{ 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */
{ 0x1020, 0x02 /* SIGINT */ }, /* watchdog */
{ 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */
{ 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */
#endif
-#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */
+#else /* !CONFIG_BOOKE_OR_40x */
{ 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
#if defined(CONFIG_PPC_8xx)
{ 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
@@ -146,7 +144,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
return 0;
if (*(u32 *)regs->nip == BREAK_INSTR)
- regs->nip += BREAK_INSTR_SIZE;
+ regs_add_return_ip(regs, BREAK_INSTR_SIZE);
return 1;
}
@@ -193,7 +191,7 @@ static int kgdb_break_match(struct pt_regs *regs)
void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
{
struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp +
- STACK_FRAME_OVERHEAD);
+ STACK_INT_FRAME_REGS);
unsigned long *ptr = gdb_regs;
int reg;
@@ -210,7 +208,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
for (reg = 14; reg < 32; reg++)
PACK64(ptr, regs->gpr[reg]);
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
#ifdef CONFIG_SPE
for (reg = 0; reg < 32; reg++)
PACK64(ptr, p->thread.evr[reg]);
@@ -236,7 +234,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
#define GDB_SIZEOF_REG sizeof(unsigned long)
#define GDB_SIZEOF_REG_U32 sizeof(u32)
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
#define GDB_SIZEOF_FLOAT_REG sizeof(unsigned long)
#else
#define GDB_SIZEOF_FLOAT_REG sizeof(u64)
@@ -331,7 +329,7 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
if (regno >= 32 && regno < 64) {
/* FP registers 32 -> 63 */
-#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE)
+#if defined(CONFIG_PPC_85xx) && defined(CONFIG_SPE)
if (current)
memcpy(mem, &current->thread.evr[regno-32],
dbg_reg_def[regno].size);
@@ -357,7 +355,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
if (regno >= 32 && regno < 64) {
/* FP registers 32 -> 63 */
-#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE)
+#if defined(CONFIG_PPC_85xx) && defined(CONFIG_SPE)
memcpy(&current->thread.evr[regno-32], mem,
dbg_reg_def[regno].size);
#else
@@ -371,11 +369,11 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
{
- regs->nip = pc;
+ regs_set_return_ip(regs, pc);
}
/*
- * This function does PowerPC specific procesing for interfacing to gdb.
+ * This function does PowerPC specific processing for interfacing to gdb.
*/
int kgdb_arch_handle_exception(int vector, int signo, int err_code,
char *remcom_in_buffer, char *remcom_out_buffer,
@@ -393,7 +391,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
case 'c':
/* handle the optional parameter */
if (kgdb_hex2long(&ptr, &addr))
- linux_regs->nip = addr;
+ regs_set_return_ip(linux_regs, addr);
atomic_set(&kgdb_cpu_doing_single_step, -1);
/* set the trace bit if we're stepping */
@@ -401,9 +399,9 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
mtspr(SPRN_DBCR0,
mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
- linux_regs->msr |= MSR_DE;
+ regs_set_return_msr(linux_regs, linux_regs->msr | MSR_DE);
#else
- linux_regs->msr |= MSR_SE;
+ regs_set_return_msr(linux_regs, linux_regs->msr | MSR_SE);
#endif
atomic_set(&kgdb_cpu_doing_single_step,
raw_smp_processor_id());
@@ -416,19 +414,18 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
+ u32 instr, *addr = (u32 *)bpt->bpt_addr;
int err;
- unsigned int instr;
- unsigned int *addr = (unsigned int *)bpt->bpt_addr;
- err = probe_kernel_address(addr, instr);
+ err = get_kernel_nofault(instr, addr);
if (err)
return err;
- err = patch_instruction(addr, BREAK_INSTR);
+ err = patch_instruction(addr, ppc_inst(BREAK_INSTR));
if (err)
return -EFAULT;
- *(unsigned int *)bpt->saved_instr = instr;
+ *(u32 *)bpt->saved_instr = instr;
return 0;
}
@@ -437,9 +434,9 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
unsigned int instr = *(unsigned int *)bpt->saved_instr;
- unsigned int *addr = (unsigned int *)bpt->bpt_addr;
+ u32 *addr = (u32 *)bpt->bpt_addr;
- err = patch_instruction(addr, instr);
+ err = patch_instruction(addr, ppc_inst(instr));
if (err)
return -EFAULT;
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 972cb28174b2..072ebe7f290b 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -14,14 +14,21 @@
/* Ftrace callback handler for kprobes */
void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
- struct ftrace_ops *ops, struct pt_regs *regs)
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
struct kprobe *p;
struct kprobe_ctlblk *kcb;
+ struct pt_regs *regs;
+ int bit;
+ bit = ftrace_test_recursion_trylock(nip, parent_nip);
+ if (bit < 0)
+ return;
+
+ regs = ftrace_get_regs(fregs);
p = get_kprobe((kprobe_opcode_t *)nip);
if (unlikely(!p) || kprobe_disabled(p))
- return;
+ goto out;
kcb = get_kprobe_ctlblk();
if (kprobe_running()) {
@@ -31,7 +38,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
* On powerpc, NIP is *before* this instruction for the
* pre handler
*/
- regs->nip -= MCOUNT_INSN_SIZE;
+ regs_add_return_ip(regs, -MCOUNT_INSN_SIZE);
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
@@ -40,7 +47,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
* Emulate singlestep (and also recover regs->nip)
* as if there is a nop
*/
- regs->nip += MCOUNT_INSN_SIZE;
+ regs_add_return_ip(regs, MCOUNT_INSN_SIZE);
if (unlikely(p->post_handler)) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
p->post_handler(p, regs, 0);
@@ -52,6 +59,8 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
*/
__this_cpu_write(current_kprobe, NULL);
}
+out:
+ ftrace_test_recursion_unlock(bit);
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 2d27ec4feee4..b20ee72e873a 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -19,10 +19,13 @@
#include <linux/extable.h>
#include <linux/kdebug.h>
#include <linux/slab.h>
+#include <linux/moduleloader.h>
+#include <linux/set_memory.h>
#include <asm/code-patching.h>
#include <asm/cacheflush.h>
#include <asm/sstep.h>
#include <asm/sections.h>
+#include <asm/inst.h>
#include <linux/uaccess.h>
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -42,7 +45,7 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
{
kprobe_opcode_t *addr = NULL;
-#ifdef PPC64_ELF_ABI_v2
+#ifdef CONFIG_PPC64_ELF_ABI_V2
/* PPC64 ABIv2 needs local entry point */
addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
if (addr && !offset) {
@@ -60,7 +63,7 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
#endif
addr = (kprobe_opcode_t *)ppc_function_entry(addr);
}
-#elif defined(PPC64_ELF_ABI_v1)
+#elif defined(CONFIG_PPC64_ELF_ABI_V1)
/*
* 64bit powerpc ABIv1 uses function descriptors:
* - Check for the dot variant of the symbol first.
@@ -102,16 +105,67 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
return addr;
}
+static bool arch_kprobe_on_func_entry(unsigned long offset)
+{
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+#ifdef CONFIG_KPROBES_ON_FTRACE
+ return offset <= 16;
+#else
+ return offset <= 8;
+#endif
+#else
+ return !offset;
+#endif
+}
+
+/* XXX try and fold the magic of kprobe_lookup_name() in this */
+kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset,
+ bool *on_func_entry)
+{
+ *on_func_entry = arch_kprobe_on_func_entry(offset);
+ return (kprobe_opcode_t *)(addr + offset);
+}
+
+void *alloc_insn_page(void)
+{
+ void *page;
+
+ page = module_alloc(PAGE_SIZE);
+ if (!page)
+ return NULL;
+
+ if (strict_module_rwx_enabled())
+ set_memory_rox((unsigned long)page, 1);
+
+ return page;
+}
+
int arch_prepare_kprobe(struct kprobe *p)
{
int ret = 0;
- kprobe_opcode_t insn = *p->addr;
+ struct kprobe *prev;
+ ppc_inst_t insn = ppc_inst_read(p->addr);
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
ret = -EINVAL;
- } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) {
- printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n");
+ } else if (!can_single_step(ppc_inst_val(insn))) {
+ printk("Cannot register a kprobe on instructions that can't be single stepped\n");
+ ret = -EINVAL;
+ } else if ((unsigned long)p->addr & ~PAGE_MASK &&
+ ppc_inst_prefixed(ppc_inst_read(p->addr - 1))) {
+ printk("Cannot register a kprobe on the second word of prefixed instruction\n");
+ ret = -EINVAL;
+ }
+ prev = get_kprobe(p->addr - 1);
+
+ /*
+ * When prev is a ftrace-based kprobe, we don't have an insn, and it
+ * doesn't probe for prefixed instruction.
+ */
+ if (prev && !kprobe_ftrace(prev) &&
+ ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) {
+ printk("Cannot register a kprobe on the second word of prefixed instruction\n");
ret = -EINVAL;
}
@@ -124,11 +178,8 @@ int arch_prepare_kprobe(struct kprobe *p)
}
if (!ret) {
- memcpy(p->ainsn.insn, p->addr,
- MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
- p->opcode = *p->addr;
- flush_icache_range((unsigned long)p->ainsn.insn,
- (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
+ patch_instruction(p->ainsn.insn, insn);
+ p->opcode = ppc_inst_val(insn);
}
p->ainsn.boostable = 0;
@@ -138,13 +189,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe);
void arch_arm_kprobe(struct kprobe *p)
{
- patch_instruction(p->addr, BREAKPOINT_INSTRUCTION);
+ WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)));
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
void arch_disarm_kprobe(struct kprobe *p)
{
- patch_instruction(p->addr, p->opcode);
+ WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(p->opcode)));
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
@@ -167,7 +218,7 @@ static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs
* variant as values in regs could play a part in
* if the trap is taken or not
*/
- regs->nip = (unsigned long)p->ainsn.insn;
+ regs_set_return_ip(regs, (unsigned long)p->ainsn.insn);
}
static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
@@ -191,32 +242,20 @@ static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs
kcb->kprobe_saved_msr = regs->msr;
}
-bool arch_kprobe_on_func_entry(unsigned long offset)
-{
-#ifdef PPC64_ELF_ABI_v2
-#ifdef CONFIG_KPROBES_ON_FTRACE
- return offset <= 16;
-#else
- return offset <= 8;
-#endif
-#else
- return !offset;
-#endif
-}
-
void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
{
ri->ret_addr = (kprobe_opcode_t *)regs->link;
+ ri->fp = NULL;
/* Replace the return addr with trampoline addr */
- regs->link = (unsigned long)kretprobe_trampoline;
+ regs->link = (unsigned long)__kretprobe_trampoline;
}
NOKPROBE_SYMBOL(arch_prepare_kretprobe);
static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
{
int ret;
- unsigned int insn = *p->ainsn.insn;
+ ppc_inst_t insn = ppc_inst_read(p->ainsn.insn);
/* regs->nip is also adjusted if emulate_step returns 1 */
ret = emulate_step(regs, insn);
@@ -233,7 +272,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
* So, we should never get here... but, its still
* good to catch them, just in case...
*/
- printk("Can't step on instruction %x\n", insn);
+ printk("Can't step on instruction %08lx\n", ppc_inst_as_ulong(insn));
BUG();
} else {
/*
@@ -264,6 +303,10 @@ int kprobe_handler(struct pt_regs *regs)
if (user_mode(regs))
return 0;
+ if (!IS_ENABLED(CONFIG_BOOKE) &&
+ (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)))
+ return 0;
+
/*
* We don't want to be preempted for the entire
* duration of kprobe processing
@@ -271,64 +314,20 @@ int kprobe_handler(struct pt_regs *regs)
preempt_disable();
kcb = get_kprobe_ctlblk();
- /* Check we're not actually recursing */
- if (kprobe_running()) {
- p = get_kprobe(addr);
- if (p) {
- kprobe_opcode_t insn = *p->ainsn.insn;
- if (kcb->kprobe_status == KPROBE_HIT_SS &&
- is_trap(insn)) {
- /* Turn off 'trace' bits */
- regs->msr &= ~MSR_SINGLESTEP;
- regs->msr |= kcb->kprobe_saved_msr;
- goto no_kprobe;
- }
- /* We have reentered the kprobe_handler(), since
- * another probe was hit while within the handler.
- * We here save the original kprobes variables and
- * just single step on the instruction of the new probe
- * without calling any user handlers.
- */
- save_previous_kprobe(kcb);
- set_current_kprobe(p, regs, kcb);
- kprobes_inc_nmissed_count(p);
- kcb->kprobe_status = KPROBE_REENTER;
- if (p->ainsn.boostable >= 0) {
- ret = try_to_emulate(p, regs);
-
- if (ret > 0) {
- restore_previous_kprobe(kcb);
- preempt_enable_no_resched();
- return 1;
- }
- }
- prepare_singlestep(p, regs);
- return 1;
- } else if (*addr != BREAKPOINT_INSTRUCTION) {
- /* If trap variant, then it belongs not to us */
- kprobe_opcode_t cur_insn = *addr;
-
- if (is_trap(cur_insn))
- goto no_kprobe;
- /* The breakpoint instruction was removed by
- * another cpu right after we hit, no further
- * handling of this interrupt is appropriate
- */
- ret = 1;
- }
- goto no_kprobe;
- }
-
p = get_kprobe(addr);
if (!p) {
- if (*addr != BREAKPOINT_INSTRUCTION) {
+ unsigned int instr;
+
+ if (get_kernel_nofault(instr, addr))
+ goto no_kprobe;
+
+ if (instr != BREAKPOINT_INSTRUCTION) {
/*
* PowerPC has multiple variants of the "trap"
* instruction. If the current instruction is a
* trap variant, it could belong to someone else
*/
- kprobe_opcode_t cur_insn = *addr;
- if (is_trap(cur_insn))
+ if (is_trap(instr))
goto no_kprobe;
/*
* The breakpoint instruction was removed right
@@ -343,12 +342,46 @@ int kprobe_handler(struct pt_regs *regs)
goto no_kprobe;
}
+ /* Check we're not actually recursing */
+ if (kprobe_running()) {
+ kprobe_opcode_t insn = *p->ainsn.insn;
+ if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) {
+ /* Turn off 'trace' bits */
+ regs_set_return_msr(regs,
+ (regs->msr & ~MSR_SINGLESTEP) |
+ kcb->kprobe_saved_msr);
+ goto no_kprobe;
+ }
+
+ /*
+ * We have reentered the kprobe_handler(), since another probe
+ * was hit while within the handler. We here save the original
+ * kprobes variables and just single step on the instruction of
+ * the new probe without calling any user handlers.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kprobes_inc_nmissed_count(p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ if (p->ainsn.boostable >= 0) {
+ ret = try_to_emulate(p, regs);
+
+ if (ret > 0) {
+ restore_previous_kprobe(kcb);
+ preempt_enable();
+ return 1;
+ }
+ }
+ prepare_singlestep(p, regs);
+ return 1;
+ }
+
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
set_current_kprobe(p, regs, kcb);
if (p->pre_handler && p->pre_handler(p, regs)) {
/* handler changed execution path, so skip ss setup */
reset_current_kprobe();
- preempt_enable_no_resched();
+ preempt_enable();
return 1;
}
@@ -361,7 +394,7 @@ int kprobe_handler(struct pt_regs *regs)
kcb->kprobe_status = KPROBE_HIT_SSDONE;
reset_current_kprobe();
- preempt_enable_no_resched();
+ preempt_enable();
return 1;
}
}
@@ -370,7 +403,7 @@ int kprobe_handler(struct pt_regs *regs)
return 1;
no_kprobe:
- preempt_enable_no_resched();
+ preempt_enable();
return ret;
}
NOKPROBE_SYMBOL(kprobe_handler);
@@ -381,62 +414,21 @@ NOKPROBE_SYMBOL(kprobe_handler);
* - When the probed function returns, this probe
* causes the handlers to fire
*/
-asm(".global kretprobe_trampoline\n"
- ".type kretprobe_trampoline, @function\n"
- "kretprobe_trampoline:\n"
+asm(".global __kretprobe_trampoline\n"
+ ".type __kretprobe_trampoline, @function\n"
+ "__kretprobe_trampoline:\n"
"nop\n"
"blr\n"
- ".size kretprobe_trampoline, .-kretprobe_trampoline\n");
+ ".size __kretprobe_trampoline, .-__kretprobe_trampoline\n");
/*
* Called when the probe at kretprobe trampoline is hit
*/
static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head, empty_rp;
- struct hlist_node *tmp;
- unsigned long flags, orig_ret_address = 0;
- unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
-
- INIT_HLIST_HEAD(&empty_rp);
- kretprobe_hash_lock(current, &head, &flags);
-
- /*
- * It is possible to have multiple instances associated with a given
- * task either because an multiple functions in the call path
- * have a return probe installed on them, and/or more than one return
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always inserted at the head of the list
- * - when multiple return probes are registered for the same
- * function, the first instance's ret_addr will point to the
- * real return address, and all the rest will point to
- * kretprobe_trampoline
- */
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- if (ri->rp && ri->rp->handler)
- ri->rp->handler(ri, regs);
-
- orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri, &empty_rp);
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
+ unsigned long orig_ret_address;
+ orig_ret_address = __kretprobe_trampoline_handler(regs, NULL);
/*
* We get here through one of two paths:
* 1. by taking a trap -> kprobe_handler() -> here
@@ -446,22 +438,15 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
* as it is used to determine the return address from the trap.
* For (2), since nip is not honoured with optprobes, we instead setup
* the link register properly so that the subsequent 'blr' in
- * kretprobe_trampoline jumps back to the right instruction.
+ * __kretprobe_trampoline jumps back to the right instruction.
*
* For nip, we should set the address to the previous instruction since
* we end up emulating it in kprobe_handler(), which increments the nip
* again.
*/
- regs->nip = orig_ret_address - 4;
+ regs_set_return_ip(regs, orig_ret_address - 4);
regs->link = orig_ret_address;
- kretprobe_hash_unlock(current, &flags);
-
- hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
- hlist_del(&ri->hlist);
- kfree(ri);
- }
-
return 0;
}
NOKPROBE_SYMBOL(trampoline_probe_handler);
@@ -476,14 +461,16 @@ NOKPROBE_SYMBOL(trampoline_probe_handler);
*/
int kprobe_post_handler(struct pt_regs *regs)
{
+ int len;
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
if (!cur || user_mode(regs))
return 0;
+ len = ppc_inst_len(ppc_inst_read(cur->ainsn.insn));
/* make sure we got here for instruction we have a kprobe on */
- if (((unsigned long)cur->ainsn.insn + 4) != regs->nip)
+ if (((unsigned long)cur->ainsn.insn + len) != regs->nip)
return 0;
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
@@ -492,8 +479,8 @@ int kprobe_post_handler(struct pt_regs *regs)
}
/* Adjust nip to after the single-stepped instruction */
- regs->nip = (unsigned long)cur->addr + 4;
- regs->msr |= kcb->kprobe_saved_msr;
+ regs_set_return_ip(regs, (unsigned long)cur->addr + len);
+ regs_set_return_msr(regs, regs->msr | kcb->kprobe_saved_msr);
/*Restore back the original saved kprobes variables and continue. */
if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -502,7 +489,7 @@ int kprobe_post_handler(struct pt_regs *regs)
}
reset_current_kprobe();
out:
- preempt_enable_no_resched();
+ preempt_enable();
/*
* if somebody else is singlestepping across a probe point, msr
@@ -532,40 +519,25 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* and allow the page fault handler to continue as a
* normal page fault.
*/
- regs->nip = (unsigned long)cur->addr;
- regs->msr &= ~MSR_SINGLESTEP; /* Turn off 'trace' bits */
- regs->msr |= kcb->kprobe_saved_msr;
+ regs_set_return_ip(regs, (unsigned long)cur->addr);
+ /* Turn off 'trace' bits */
+ regs_set_return_msr(regs,
+ (regs->msr & ~MSR_SINGLESTEP) |
+ kcb->kprobe_saved_msr);
if (kcb->kprobe_status == KPROBE_REENTER)
restore_previous_kprobe(kcb);
else
reset_current_kprobe();
- preempt_enable_no_resched();
+ preempt_enable();
break;
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
if ((entry = search_exception_tables(regs->nip)) != NULL) {
- regs->nip = extable_fixup(entry);
+ regs_set_return_ip(regs, extable_fixup(entry));
return 1;
}
@@ -581,19 +553,8 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
}
NOKPROBE_SYMBOL(kprobe_fault_handler);
-unsigned long arch_deref_entry_point(void *entry)
-{
-#ifdef PPC64_ELF_ABI_v1
- if (!kernel_text_address((unsigned long)entry))
- return ppc_global_function_entry(entry);
- else
-#endif
- return (unsigned long)entry;
-}
-NOKPROBE_SYMBOL(arch_deref_entry_point);
-
static struct kprobe trampoline_p = {
- .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .addr = (kprobe_opcode_t *) &__kretprobe_trampoline,
.pre_handler = trampoline_probe_handler
};
@@ -604,7 +565,7 @@ int __init arch_init_kprobes(void)
int arch_trampoline_kprobe(struct kprobe *p)
{
- if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
+ if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline)
return 1;
return 0;
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 617eba82531c..5b3c093611ba 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -455,7 +455,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features)
kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt);
break;
-#ifdef CONFIG_PPC_BOOK3E_MMU
+#ifdef CONFIG_PPC_E500
case KVM_INST_MFSPR(SPRN_MAS0):
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt);
@@ -484,7 +484,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features)
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt);
break;
-#endif /* CONFIG_PPC_BOOK3E_MMU */
+#endif /* CONFIG_PPC_E500 */
case KVM_INST_MFSPR(SPRN_SPRG4):
#ifdef CONFIG_BOOKE
@@ -557,7 +557,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features)
case KVM_INST_MTSPR(SPRN_DSISR):
kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt);
break;
-#ifdef CONFIG_PPC_BOOK3E_MMU
+#ifdef CONFIG_PPC_E500
case KVM_INST_MTSPR(SPRN_MAS0):
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt);
@@ -586,7 +586,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features)
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt);
break;
-#endif /* CONFIG_PPC_BOOK3E_MMU */
+#endif /* CONFIG_PPC_E500 */
case KVM_INST_MTSPR(SPRN_SPRG4):
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
@@ -669,7 +669,8 @@ static void __init kvm_use_magic_page(void)
on_each_cpu(kvm_map_magic_page, &features, 1);
/* Quick self-test to see if the mapping works */
- if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) {
+ if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE,
+ sizeof(u32))) {
kvm_patching_worked = false;
return;
}
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 2020d255585f..f2e03ed423d0 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR)
/* Stop DST streams */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
@@ -256,7 +256,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
sync
/* Restore MSR (restores EE and DR bits to original state) */
- SYNC
mtmsr r7
isync
@@ -293,7 +292,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
isync
/* Stop DST streams */
- DSSALL
+ PPC_DSSALL
sync
/* Get the current enable bit of the L3CR into r4 */
@@ -377,7 +376,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
1: bdnz 1b
/* Restore MSR (restores EE and DR bits to original state) */
-4: SYNC
+4:
mtmsr r7
isync
blr
@@ -402,7 +401,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
_GLOBAL(__flush_disable_L1)
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
@@ -455,5 +454,6 @@ _GLOBAL(__inval_enable_L1)
sync
blr
+_ASM_NOKPROBE_SYMBOL(__inval_enable_L1)
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 7cea5978f21f..1da2f6e7d2a1 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -5,16 +5,17 @@
#include <linux/serial_core.h>
#include <linux/console.h>
#include <linux/pci.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of_irq.h>
#include <linux/serial_reg.h>
#include <asm/io.h>
#include <asm/mmu.h>
-#include <asm/prom.h>
#include <asm/serial.h>
#include <asm/udbg.h>
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
+#include <asm/early_ioremap.h>
#undef DEBUG
@@ -34,6 +35,7 @@ static struct legacy_serial_info {
unsigned int clock;
int irq_check_parent;
phys_addr_t taddr;
+ void __iomem *early_addr;
} legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
static const struct of_device_id legacy_serial_parents[] __initconst = {
@@ -169,15 +171,15 @@ static int __init add_legacy_soc_port(struct device_node *np,
/* We only support ports that have a clock frequency properly
* encoded in the device-tree.
*/
- if (of_get_property(np, "clock-frequency", NULL) == NULL)
+ if (!of_property_present(np, "clock-frequency"))
return -1;
/* if reg-offset don't try to use it */
- if ((of_get_property(np, "reg-offset", NULL) != NULL))
+ if (of_property_present(np, "reg-offset"))
return -1;
/* if rtas uses this device, don't try to use it as well */
- if (of_get_property(np, "used-by-rtas", NULL) != NULL)
+ if (of_property_read_bool(np, "used-by-rtas"))
return -1;
/* Get the address */
@@ -235,7 +237,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
* Note: Don't even try on P8 lpc, we know it's not directly mapped
*/
if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") ||
- of_get_property(isa_brg, "ranges", NULL)) {
+ of_property_present(isa_brg, "ranges")) {
taddr = of_translate_address(np, reg);
if (taddr == OF_BAD_ADDR)
taddr = 0;
@@ -266,7 +268,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
* compatible UARTs on PCI need all sort of quirks (port offsets
* etc...) that this code doesn't know about
*/
- if (of_get_property(np, "clock-frequency", NULL) == NULL)
+ if (!of_property_present(np, "clock-frequency"))
return -1;
/* Get the PCI address. Assume BAR 0 */
@@ -325,17 +327,16 @@ static void __init setup_legacy_serial_console(int console)
{
struct legacy_serial_info *info = &legacy_serial_infos[console];
struct plat_serial8250_port *port = &legacy_serial_ports[console];
- void __iomem *addr;
unsigned int stride;
stride = 1 << port->regshift;
/* Check if a translated MMIO address has been found */
if (info->taddr) {
- addr = ioremap(info->taddr, 0x1000);
- if (addr == NULL)
+ info->early_addr = early_ioremap(info->taddr, 0x1000);
+ if (info->early_addr == NULL)
return;
- udbg_uart_init_mmio(addr, stride);
+ udbg_uart_init_mmio(info->early_addr, stride);
} else {
/* Check if it's PIO and we support untranslated PIO */
if (port->iotype == UPIO_PORT && isa_io_special)
@@ -353,6 +354,33 @@ static void __init setup_legacy_serial_console(int console)
udbg_uart_setup(info->speed, info->clock);
}
+static int __init ioremap_legacy_serial_console(void)
+{
+ struct plat_serial8250_port *port;
+ struct legacy_serial_info *info;
+ void __iomem *vaddr;
+
+ if (legacy_serial_console < 0)
+ return 0;
+
+ info = &legacy_serial_infos[legacy_serial_console];
+ port = &legacy_serial_ports[legacy_serial_console];
+
+ if (!info->early_addr)
+ return 0;
+
+ vaddr = ioremap(info->taddr, 0x1000);
+ if (WARN_ON(!vaddr))
+ return -ENOMEM;
+
+ udbg_uart_init_mmio(vaddr, 1 << port->regshift);
+ early_iounmap(info->early_addr, 0x1000);
+ info->early_addr = NULL;
+
+ return 0;
+}
+early_initcall(ioremap_legacy_serial_console);
+
/*
* This is called very early, as part of setup_system() or eventually
* setup_arch(), basically before anything else in this file. This function
@@ -443,6 +471,8 @@ void __init find_legacy_serial_ports(void)
}
#endif
+ of_node_put(stdout);
+
DBG("legacy_serial_console = %d\n", legacy_serial_console);
if (legacy_serial_console >= 0)
setup_legacy_serial_console(legacy_serial_console);
@@ -478,10 +508,16 @@ static void __init fixup_port_irq(int index,
port->irq = virq;
-#ifdef CONFIG_SERIAL_8250_FSL
- if (of_device_is_compatible(np, "fsl,ns16550"))
- port->handle_irq = fsl8250_handle_irq;
-#endif
+ if (IS_ENABLED(CONFIG_SERIAL_8250) &&
+ of_device_is_compatible(np, "fsl,ns16550")) {
+ if (IS_REACHABLE(CONFIG_SERIAL_8250_FSL)) {
+ port->handle_irq = fsl8250_handle_irq;
+ port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);
+ } else {
+ pr_warn_once("Not activating Freescale specific workaround for device %pOFP\n",
+ np);
+ }
+ }
}
static void __init fixup_port_pio(int index,
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 34c1001e9e8b..219f28637a3e 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -15,37 +15,36 @@
#include <linux/percpu.h>
#include <linux/export.h>
#include <linux/irq_work.h>
+#include <linux/extable.h>
+#include <linux/ftrace.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <asm/interrupt.h>
#include <asm/machdep.h>
#include <asm/mce.h>
#include <asm/nmi.h>
-static DEFINE_PER_CPU(int, mce_nest_count);
-static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
+#include "setup.h"
-/* Queue for delayed MCE events. */
-static DEFINE_PER_CPU(int, mce_queue_count);
-static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
-
-/* Queue for delayed MCE UE events. */
-static DEFINE_PER_CPU(int, mce_ue_count);
-static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
- mce_ue_event_queue);
-
-static void machine_check_process_queued_event(struct irq_work *work);
-static void machine_check_ue_irq_work(struct irq_work *work);
static void machine_check_ue_event(struct machine_check_event *evt);
static void machine_process_ue_event(struct work_struct *work);
-static struct irq_work mce_event_process_work = {
- .func = machine_check_process_queued_event,
-};
+static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
-static struct irq_work mce_ue_event_irq_work = {
- .func = machine_check_ue_irq_work,
-};
+static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
-DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
+int mce_register_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_register_notifier);
+
+int mce_unregister_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_unregister_notifier);
static void mce_set_error_info(struct machine_check_event *mce,
struct mce_error_info *mce_err)
@@ -79,6 +78,13 @@ static void mce_set_error_info(struct machine_check_event *mce,
}
}
+void mce_irq_work_queue(void)
+{
+ /* Raise decrementer interrupt */
+ arch_irq_work_raise();
+ set_mce_pending_irq_work();
+}
+
/*
* Decode and save high level MCE information into per cpu buffer which
* is an array of machine_check_event structure.
@@ -87,9 +93,10 @@ void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err,
uint64_t nip, uint64_t addr, uint64_t phys_addr)
{
- int index = __this_cpu_inc_return(mce_nest_count) - 1;
- struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
+ int index = local_paca->mce_info->mce_nest_count++;
+ struct machine_check_event *mce;
+ mce = &local_paca->mce_info->mce_event[index];
/*
* Return if we don't have enough space to log mce event.
* mce_nest_count may go beyond MAX_MC_EVT but that's ok,
@@ -121,6 +128,15 @@ void save_mce_event(struct pt_regs *regs, long handled,
* Populate the mce error_type and type-specific error_type.
*/
mce_set_error_info(mce, mce_err);
+ if (mce->error_type == MCE_ERROR_TYPE_UE)
+ mce->u.ue_error.ignore_event = mce_err->ignore_event;
+
+ /*
+ * Raise irq work, So that we don't miss to log the error for
+ * unrecoverable errors.
+ */
+ if (mce->disposition == MCE_DISPOSITION_NOT_RECOVERED)
+ mce_irq_work_queue();
if (!addr)
return;
@@ -149,7 +165,6 @@ void save_mce_event(struct pt_regs *regs, long handled,
if (phys_addr != ULONG_MAX) {
mce->u.ue_error.physical_address_provided = true;
mce->u.ue_error.physical_address = phys_addr;
- mce->u.ue_error.ignore_event = mce_err->ignore_event;
machine_check_ue_event(mce);
}
}
@@ -175,7 +190,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
*/
int get_mce_event(struct machine_check_event *mce, bool release)
{
- int index = __this_cpu_read(mce_nest_count) - 1;
+ int index = local_paca->mce_info->mce_nest_count - 1;
struct machine_check_event *mc_evt;
int ret = 0;
@@ -185,7 +200,7 @@ int get_mce_event(struct machine_check_event *mce, bool release)
/* Check if we have MCE info to process. */
if (index < MAX_MC_EVT) {
- mc_evt = this_cpu_ptr(&mce_event[index]);
+ mc_evt = &local_paca->mce_info->mce_event[index];
/* Copy the event structure and release the original */
if (mce)
*mce = *mc_evt;
@@ -195,7 +210,7 @@ int get_mce_event(struct machine_check_event *mce, bool release)
}
/* Decrement the count to free the slot. */
if (release)
- __this_cpu_dec(mce_nest_count);
+ local_paca->mce_info->mce_nest_count--;
return ret;
}
@@ -205,7 +220,7 @@ void release_mce_event(void)
get_mce_event(NULL, true);
}
-static void machine_check_ue_irq_work(struct irq_work *work)
+static void machine_check_ue_work(void)
{
schedule_work(&mce_ue_event_work);
}
@@ -217,16 +232,14 @@ static void machine_check_ue_event(struct machine_check_event *evt)
{
int index;
- index = __this_cpu_inc_return(mce_ue_count) - 1;
+ index = local_paca->mce_info->mce_ue_count++;
/* If queue is full, just return for now. */
if (index >= MAX_MC_EVT) {
- __this_cpu_dec(mce_ue_count);
+ local_paca->mce_info->mce_ue_count--;
return;
}
- memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
-
- /* Queue work to process this event later. */
- irq_work_queue(&mce_ue_event_irq_work);
+ memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
+ evt, sizeof(*evt));
}
/*
@@ -240,17 +253,30 @@ void machine_check_queue_event(void)
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return;
- index = __this_cpu_inc_return(mce_queue_count) - 1;
+ index = local_paca->mce_info->mce_queue_count++;
/* If queue is full, just return for now. */
if (index >= MAX_MC_EVT) {
- __this_cpu_dec(mce_queue_count);
+ local_paca->mce_info->mce_queue_count--;
return;
}
- memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
+ memcpy(&local_paca->mce_info->mce_event_queue[index],
+ &evt, sizeof(evt));
+
+ mce_irq_work_queue();
+}
+
+void mce_common_process_ue(struct pt_regs *regs,
+ struct mce_error_info *mce_err)
+{
+ const struct exception_table_entry *entry;
- /* Queue irq work to process this event later. */
- irq_work_queue(&mce_event_process_work);
+ entry = search_kernel_exception_table(regs->nip);
+ if (entry) {
+ mce_err->ignore_event = true;
+ regs_set_return_ip(regs, extable_fixup(entry));
+ }
}
+
/*
* process pending MCE event from the mce event queue. This function will be
* called during syscall exit.
@@ -260,9 +286,10 @@ static void machine_process_ue_event(struct work_struct *work)
int index;
struct machine_check_event *evt;
- while (__this_cpu_read(mce_ue_count) > 0) {
- index = __this_cpu_read(mce_ue_count) - 1;
- evt = this_cpu_ptr(&mce_ue_event_queue[index]);
+ while (local_paca->mce_info->mce_ue_count > 0) {
+ index = local_paca->mce_info->mce_ue_count - 1;
+ evt = &local_paca->mce_info->mce_ue_event_queue[index];
+ blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
#ifdef CONFIG_MEMORY_FAILURE
/*
* This should probably queued elsewhere, but
@@ -274,7 +301,7 @@ static void machine_process_ue_event(struct work_struct *work)
*/
if (evt->error_type == MCE_ERROR_TYPE_UE) {
if (evt->u.ue_error.ignore_event) {
- __this_cpu_dec(mce_ue_count);
+ local_paca->mce_info->mce_ue_count--;
continue;
}
@@ -290,14 +317,14 @@ static void machine_process_ue_event(struct work_struct *work)
"was generated\n");
}
#endif
- __this_cpu_dec(mce_ue_count);
+ local_paca->mce_info->mce_ue_count--;
}
}
/*
* process pending MCE event from the mce event queue. This function will be
* called during syscall exit.
*/
-static void machine_check_process_queued_event(struct irq_work *work)
+static void machine_check_process_queued_event(void)
{
int index;
struct machine_check_event *evt;
@@ -308,17 +335,38 @@ static void machine_check_process_queued_event(struct irq_work *work)
* For now just print it to console.
* TODO: log this error event to FSP or nvram.
*/
- while (__this_cpu_read(mce_queue_count) > 0) {
- index = __this_cpu_read(mce_queue_count) - 1;
- evt = this_cpu_ptr(&mce_event_queue[index]);
+ while (local_paca->mce_info->mce_queue_count > 0) {
+ index = local_paca->mce_info->mce_queue_count - 1;
+ evt = &local_paca->mce_info->mce_event_queue[index];
if (evt->error_type == MCE_ERROR_TYPE_UE &&
evt->u.ue_error.ignore_event) {
- __this_cpu_dec(mce_queue_count);
+ local_paca->mce_info->mce_queue_count--;
continue;
}
machine_check_print_event_info(evt, false, false);
- __this_cpu_dec(mce_queue_count);
+ local_paca->mce_info->mce_queue_count--;
+ }
+}
+
+void set_mce_pending_irq_work(void)
+{
+ local_paca->mce_pending_irq_work = 1;
+}
+
+void clear_mce_pending_irq_work(void)
+{
+ local_paca->mce_pending_irq_work = 0;
+}
+
+void mce_run_irq_context_handlers(void)
+{
+ if (unlikely(local_paca->mce_pending_irq_work)) {
+ if (ppc_md.machine_check_log_err)
+ ppc_md.machine_check_log_err();
+ machine_check_process_queued_event();
+ machine_check_ue_work();
+ clear_mce_pending_irq_work();
}
}
@@ -355,18 +403,19 @@ void machine_check_print_event_info(struct machine_check_event *evt,
static const char *mc_user_types[] = {
"Indeterminate",
"tlbie(l) invalid",
+ "scv invalid",
};
static const char *mc_ra_types[] = {
"Indeterminate",
"Instruction fetch (bad)",
- "Instruction fetch (foreign)",
+ "Instruction fetch (foreign/control memory)",
"Page table walk ifetch (bad)",
- "Page table walk ifetch (foreign)",
+ "Page table walk ifetch (foreign/control memory)",
"Load (bad)",
"Store (bad)",
"Page table walk Load/Store (bad)",
- "Page table walk Load/Store (foreign)",
- "Load/Store (foreign)",
+ "Page table walk Load/Store (foreign/control memory)",
+ "Load/Store (foreign/control memory)",
};
static const char *mc_link_types[] = {
"Indeterminate",
@@ -524,7 +573,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
}
printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
- level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
+ level, evt->cpu, sevstr, in_guest ? "Guest" : "",
err_type, subtype, dar_str,
evt->disposition == MCE_DISPOSITION_RECOVERED ?
"Recovered" : "Not recovered");
@@ -544,9 +593,9 @@ void machine_check_print_event_info(struct machine_check_event *evt,
mc_error_class[evt->error_class] : "Unknown";
printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Display faulty slb contents for SLB errors. */
- if (evt->error_type == MCE_ERROR_TYPE_SLB)
+ if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
slb_dump_contents(local_paca->mce_faulty_slbs);
#endif
}
@@ -557,7 +606,7 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
*
* regs->nip and regs->msr contains srr0 and ssr1.
*/
-long machine_check_early(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
{
long handled = 0;
@@ -568,6 +617,7 @@ long machine_check_early(struct pt_regs *regs)
*/
if (ppc_md.machine_check_early)
handled = ppc_md.machine_check_early(regs);
+
return handled;
}
@@ -679,11 +729,11 @@ long hmi_handle_debugtrig(struct pt_regs *regs)
/*
* Return values:
*/
-long hmi_exception_realmode(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
{
int ret;
- __this_cpu_inc(irq_stat.hmi_exceptions);
+ local_paca->hmi_irqs++;
ret = hmi_handle_debugtrig(regs);
if (ret >= 0)
@@ -698,3 +748,24 @@ long hmi_exception_realmode(struct pt_regs *regs)
return 1;
}
+
+void __init mce_init(void)
+{
+ struct mce_info *mce_info;
+ u64 limit;
+ int i;
+
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
+ for_each_possible_cpu(i) {
+ mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
+ __alignof__(*mce_info),
+ MEMBLOCK_LOW_LIMIT,
+ limit, early_cpu_to_node(i));
+ if (!mce_info)
+ goto err;
+ paca_ptrs[i]->mce_info = mce_info;
+ }
+ return;
+err:
+ panic("Failed to allocate memory for MCE event data\n");
+}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 1cbf7f1a4e3d..71e8f2a92e36 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -12,14 +12,15 @@
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/extable.h>
+#include <linux/pgtable.h>
#include <asm/mmu.h>
#include <asm/mce.h>
#include <asm/machdep.h>
-#include <asm/pgtable.h>
#include <asm/pte-walk.h>
#include <asm/sstep.h>
#include <asm/exception-64s.h>
#include <asm/extable.h>
+#include <asm/inst.h>
/*
* Convert an address related to an mm to a PFN. NOTE: we are in real
@@ -27,7 +28,7 @@
*/
unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
{
- pte_t *ptep;
+ pte_t *ptep, pte;
unsigned int shift;
unsigned long pfn, flags;
struct mm_struct *mm;
@@ -39,42 +40,52 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
local_irq_save(flags);
ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift);
+ if (!ptep) {
+ pfn = ULONG_MAX;
+ goto out;
+ }
+ pte = READ_ONCE(*ptep);
- if (!ptep || pte_special(*ptep)) {
+ if (!pte_present(pte) || pte_special(pte)) {
pfn = ULONG_MAX;
goto out;
}
if (shift <= PAGE_SHIFT)
- pfn = pte_pfn(*ptep);
+ pfn = pte_pfn(pte);
else {
unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
- pfn = pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask)));
+ pfn = pte_pfn(__pte(pte_val(pte) | (addr & rpnmask)));
}
-
out:
local_irq_restore(flags);
return pfn;
}
-/* flush SLBs and reload */
-#ifdef CONFIG_PPC_BOOK3S_64
-void flush_and_reload_slb(void)
+static bool mce_in_guest(void)
{
- /* Invalidate all SLBs */
- slb_flush_all_realmode();
-
#ifdef CONFIG_KVM_BOOK3S_HANDLER
/*
- * If machine check is hit when in guest or in transition, we will
- * only flush the SLBs and continue.
+ * If machine check is hit when in guest context or low level KVM
+ * code, avoid looking up any translations or making any attempts
+ * to recover, just record the event and pass to KVM.
*/
if (get_paca()->kvm_hstate.in_guest)
- return;
+ return true;
#endif
+ return false;
+}
+
+/* flush SLBs and reload */
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void flush_and_reload_slb(void)
+{
if (early_radix_enabled())
return;
+ /* Invalidate all SLBs */
+ slb_flush_all_realmode();
+
/*
* This probably shouldn't happen, but it may be possible it's
* called in early boot before SLB shadows are allocated.
@@ -86,9 +97,9 @@ void flush_and_reload_slb(void)
}
#endif
-static void flush_erat(void)
+void flush_erat(void)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
flush_and_reload_slb();
return;
@@ -103,7 +114,7 @@ static void flush_erat(void)
static int mce_flush(int what)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (what == MCE_FLUSH_SLB) {
flush_and_reload_slb();
return 1;
@@ -238,6 +249,45 @@ static const struct mce_ierror_table mce_p9_ierror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0, 0, 0, 0, 0, 0, 0 } };
+static const struct mce_ierror_table mce_p10_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008080000, true,
+ MCE_ERROR_TYPE_USER,MCE_USER_ERROR_SCV, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x00000000080c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008100000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008140000, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
+
struct mce_derror_table {
unsigned long dsisr_value;
bool dar_valid; /* dar is a valid indicator of faulting address */
@@ -356,6 +406,46 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0, false, 0, 0, 0, 0, 0 } };
+static const struct mce_derror_table mce_p10_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000200, false,
+ MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000040, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000020, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000010, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000008, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
+
static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
uint64_t *phys_addr)
{
@@ -365,7 +455,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
* in real-mode is tricky and can lead to recursive
* faults
*/
- int instr;
+ ppc_inst_t instr;
unsigned long pfn, instr_addr;
struct instruction_op op;
struct pt_regs tmp = *regs;
@@ -373,7 +463,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
pfn = addr_to_pfn(regs, regs->nip);
if (pfn != ULONG_MAX) {
instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
- instr = *(unsigned int *)(instr_addr);
+ instr = ppc_inst_read((u32 *)instr_addr);
if (!analyse_instr(&op, &tmp, instr)) {
pfn = addr_to_pfn(regs, op.ea);
*addr = op.ea;
@@ -391,12 +481,11 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
return -1;
}
-static int mce_handle_ierror(struct pt_regs *regs,
+static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1,
const struct mce_ierror_table table[],
struct mce_error_info *mce_err, uint64_t *addr,
uint64_t *phys_addr)
{
- uint64_t srr1 = regs->msr;
int handled = 0;
int i;
@@ -406,19 +495,23 @@ static int mce_handle_ierror(struct pt_regs *regs,
if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
continue;
- /* attempt to correct the error */
- switch (table[i].error_type) {
- case MCE_ERROR_TYPE_SLB:
- if (local_paca->in_mce == 1)
- slb_save_contents(local_paca->mce_faulty_slbs);
- handled = mce_flush(MCE_FLUSH_SLB);
- break;
- case MCE_ERROR_TYPE_ERAT:
- handled = mce_flush(MCE_FLUSH_ERAT);
- break;
- case MCE_ERROR_TYPE_TLB:
- handled = mce_flush(MCE_FLUSH_TLB);
- break;
+ if (!mce_in_guest()) {
+ /* attempt to correct the error */
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
+ handled = mce_flush(MCE_FLUSH_SLB);
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ handled = mce_flush(MCE_FLUSH_ERAT);
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ handled = mce_flush(MCE_FLUSH_TLB);
+ break;
+ }
}
/* now fill in mce_error_info */
@@ -450,7 +543,7 @@ static int mce_handle_ierror(struct pt_regs *regs,
mce_err->sync_error = table[i].sync_error;
mce_err->severity = table[i].severity;
mce_err->initiator = table[i].initiator;
- if (table[i].nip_valid) {
+ if (table[i].nip_valid && !mce_in_guest()) {
*addr = regs->nip;
if (mce_err->sync_error &&
table[i].error_type == MCE_ERROR_TYPE_UE) {
@@ -493,22 +586,26 @@ static int mce_handle_derror(struct pt_regs *regs,
if (!(dsisr & table[i].dsisr_value))
continue;
- /* attempt to correct the error */
- switch (table[i].error_type) {
- case MCE_ERROR_TYPE_SLB:
- if (local_paca->in_mce == 1)
- slb_save_contents(local_paca->mce_faulty_slbs);
- if (mce_flush(MCE_FLUSH_SLB))
- handled = 1;
- break;
- case MCE_ERROR_TYPE_ERAT:
- if (mce_flush(MCE_FLUSH_ERAT))
- handled = 1;
- break;
- case MCE_ERROR_TYPE_TLB:
- if (mce_flush(MCE_FLUSH_TLB))
- handled = 1;
- break;
+ if (!mce_in_guest()) {
+ /* attempt to correct the error */
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
+ if (mce_flush(MCE_FLUSH_SLB))
+ handled = 1;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ if (mce_flush(MCE_FLUSH_ERAT))
+ handled = 1;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ if (mce_flush(MCE_FLUSH_TLB))
+ handled = 1;
+ break;
+ }
}
/*
@@ -550,7 +647,7 @@ static int mce_handle_derror(struct pt_regs *regs,
mce_err->initiator = table[i].initiator;
if (table[i].dar_valid)
*addr = regs->dar;
- else if (mce_err->sync_error &&
+ else if (mce_err->sync_error && !mce_in_guest() &&
table[i].error_type == MCE_ERROR_TYPE_UE) {
/*
* We do a maximum of 4 nested MCE calls, see
@@ -578,15 +675,12 @@ static int mce_handle_derror(struct pt_regs *regs,
static long mce_handle_ue_error(struct pt_regs *regs,
struct mce_error_info *mce_err)
{
- long handled = 0;
- const struct exception_table_entry *entry;
+ if (mce_in_guest())
+ return 0;
- entry = search_kernel_exception_table(regs->nip);
- if (entry) {
- mce_err->ignore_event = true;
- regs->nip = extable_fixup(entry);
+ mce_common_process_ue(regs, mce_err);
+ if (mce_err->ignore_event)
return 1;
- }
/*
* On specific SCOM read via MMIO we may get a machine check
@@ -597,25 +691,26 @@ static long mce_handle_ue_error(struct pt_regs *regs,
if (ppc_md.mce_check_early_recovery) {
if (ppc_md.mce_check_early_recovery(regs))
- handled = 1;
+ return 1;
}
- return handled;
+
+ return 0;
}
static long mce_handle_error(struct pt_regs *regs,
+ unsigned long srr1,
const struct mce_derror_table dtable[],
const struct mce_ierror_table itable[])
{
struct mce_error_info mce_err = { 0 };
uint64_t addr, phys_addr = ULONG_MAX;
- uint64_t srr1 = regs->msr;
long handled;
if (SRR1_MC_LOADSTORE(srr1))
handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
&phys_addr);
else
- handled = mce_handle_ierror(regs, itable, &mce_err, &addr,
+ handled = mce_handle_ierror(regs, srr1, itable, &mce_err, &addr,
&phys_addr);
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
@@ -631,16 +726,20 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
/* P7 DD1 leaves top bits of DSISR undefined */
regs->dsisr &= 0x0000ffff;
- return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table);
+ return mce_handle_error(regs, regs->msr,
+ mce_p7_derror_table, mce_p7_ierror_table);
}
long __machine_check_early_realmode_p8(struct pt_regs *regs)
{
- return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table);
+ return mce_handle_error(regs, regs->msr,
+ mce_p8_derror_table, mce_p8_ierror_table);
}
long __machine_check_early_realmode_p9(struct pt_regs *regs)
{
+ unsigned long srr1 = regs->msr;
+
/*
* On POWER9 DD2.1 and below, it's possible to get a machine check
* caused by a paste instruction where only DSISR bit 25 is set. This
@@ -654,5 +753,39 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs)
if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
return 1;
- return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
+ /*
+ * Async machine check due to bad real address from store or foreign
+ * link time out comes with the load/store bit (PPC bit 42) set in
+ * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're
+ * directed to the ierror table so it will find the cause (which
+ * describes it correctly as a store error).
+ */
+ if (SRR1_MC_LOADSTORE(srr1) &&
+ ((srr1 & 0x081c0000) == 0x08140000 ||
+ (srr1 & 0x081c0000) == 0x08180000)) {
+ srr1 &= ~PPC_BIT(42);
+ }
+
+ return mce_handle_error(regs, srr1,
+ mce_p9_derror_table, mce_p9_ierror_table);
+}
+
+long __machine_check_early_realmode_p10(struct pt_regs *regs)
+{
+ unsigned long srr1 = regs->msr;
+
+ /*
+ * Async machine check due to bad real address from store comes with
+ * the load/store bit (PPC bit 42) set in SRR1, but the cause comes in
+ * SRR1 not DSISR. Clear bit 42 so we're directed to the ierror table
+ * so it will find the cause (which describes it correctly as a store
+ * error).
+ */
+ if (SRR1_MC_LOADSTORE(srr1) &&
+ (srr1 & 0x081c0000) == 0x08140000) {
+ srr1 &= ~PPC_BIT(42);
+ }
+
+ return mce_handle_error(regs, srr1,
+ mce_p10_derror_table, mce_p10_ierror_table);
}
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 974f65f79a8e..29e1440d14cc 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -10,11 +10,11 @@
*
* setjmp/longjmp code by Paul Mackerras.
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
#include <asm/unistd.h>
#include <asm/asm-compat.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
.text
@@ -29,13 +29,15 @@ _GLOBAL(reloc_offset)
li r3, 0
_GLOBAL(add_reloc_offset)
mflr r0
- bl 1f
+ bcl 20,31,$+4
1: mflr r5
PPC_LL r4,(2f-1b)(r5)
subf r5,r4,r5
add r3,r3,r5
mtlr r0
blr
+_ASM_NOKPROBE_SYMBOL(reloc_offset)
+_ASM_NOKPROBE_SYMBOL(add_reloc_offset)
.align 3
2: PPC_LONG 1b
@@ -110,7 +112,7 @@ _GLOBAL(longjmp)
li r3, 1
blr
-_GLOBAL(current_stack_pointer)
+_GLOBAL(current_stack_frame)
PPC_LL r3,0(r1)
blr
-EXPORT_SYMBOL(current_stack_pointer)
+EXPORT_SYMBOL(current_stack_frame)
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index d80212be8698..2eabb15687a6 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -8,6 +8,7 @@
*
*/
+#include <linux/export.h>
#include <linux/sys.h>
#include <asm/unistd.h>
#include <asm/errno.h>
@@ -22,51 +23,11 @@
#include <asm/processor.h>
#include <asm/bug.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
.text
/*
- * We store the saved ksp_limit in the unused part
- * of the STACK_FRAME_OVERHEAD
- */
-_GLOBAL(call_do_softirq)
- mflr r0
- stw r0,4(r1)
- lwz r10,THREAD+KSP_LIMIT(r2)
- stw r3, THREAD+KSP_LIMIT(r2)
- stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
- mr r1,r3
- stw r10,8(r1)
- bl __do_softirq
- lwz r10,8(r1)
- lwz r1,0(r1)
- lwz r0,4(r1)
- stw r10,THREAD+KSP_LIMIT(r2)
- mtlr r0
- blr
-
-/*
- * void call_do_irq(struct pt_regs *regs, void *sp);
- */
-_GLOBAL(call_do_irq)
- mflr r0
- stw r0,4(r1)
- lwz r10,THREAD+KSP_LIMIT(r2)
- stw r4, THREAD+KSP_LIMIT(r2)
- stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
- mr r1,r4
- stw r10,8(r1)
- bl __do_irq
- lwz r10,8(r1)
- lwz r1,0(r1)
- lwz r0,4(r1)
- stw r10,THREAD+KSP_LIMIT(r2)
- mtlr r0
- blr
-
-/*
* This returns the high 64 bits of the product of two 64-bit numbers.
*/
_GLOBAL(mulhdu)
@@ -106,7 +67,7 @@ _GLOBAL(reloc_got2)
srwi. r8,r8,2
beqlr
mtctr r8
- bl 1f
+ bcl 20,31,$+4
1: mflr r0
lis r4,1b@ha
addi r4,r4,1b@l
@@ -215,19 +176,6 @@ _GLOBAL(low_choose_7447a_dfs)
#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */
-/*
- * complement mask on the msr then "or" some values on.
- * _nmask_and_or_msr(nmask, value_to_or)
- */
-_GLOBAL(_nmask_and_or_msr)
- mfmsr r0 /* Get current msr */
- andc r0,r0,r3 /* And off the bits set in r3 (first parm) */
- or r0,r0,r4 /* Or on the bits in r4 (second parm) */
- SYNC /* Some chip revs have problems here... */
- mtmsr r0 /* Update machine state */
- isync
- blr /* Done */
-
#ifdef CONFIG_40x
/*
@@ -246,6 +194,7 @@ _GLOBAL(real_readb)
sync
isync
blr
+_ASM_NOKPROBE_SYMBOL(real_readb)
/*
* Do an IO access in real mode
@@ -263,53 +212,10 @@ _GLOBAL(real_writeb)
sync
isync
blr
+_ASM_NOKPROBE_SYMBOL(real_writeb)
#endif /* CONFIG_40x */
-
-/*
- * Flush instruction cache.
- * This is a no-op on the 601.
- */
-#ifndef CONFIG_PPC_8xx
-_GLOBAL(flush_instruction_cache)
-#if defined(CONFIG_4xx)
-#ifdef CONFIG_403GCX
- li r3, 512
- mtctr r3
- lis r4, KERNELBASE@h
-1: iccci 0, r4
- addi r4, r4, 16
- bdnz 1b
-#else
- lis r3, KERNELBASE@h
- iccci 0,r3
-#endif
-#elif defined(CONFIG_FSL_BOOKE)
-#ifdef CONFIG_E200
- mfspr r3,SPRN_L1CSR0
- ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
- /* msync; isync recommended here */
- mtspr SPRN_L1CSR0,r3
- isync
- blr
-#endif
- mfspr r3,SPRN_L1CSR1
- ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
- mtspr SPRN_L1CSR1,r3
-#elif defined(CONFIG_PPC_BOOK3S_601)
- blr /* for 601, do nothing */
-#else
- /* 603/604 processor - use invalidate-all bit in HID0 */
- mfspr r3,SPRN_HID0
- ori r3,r3,HID0_ICFI
- mtspr SPRN_HID0,r3
-#endif /* CONFIG_4xx */
- isync
- blr
-EXPORT_SYMBOL(flush_instruction_cache)
-#endif /* CONFIG_PPC_8xx */
-
/*
* Copy a whole page. We use the dcbz instruction on the destination
* to reduce memory traffic (it eliminates the unnecessary reads of
@@ -331,7 +237,7 @@ _GLOBAL(copy_page)
addi r3,r3,-4
0: twnei r5, 0 /* WARN if r3 is not cache aligned */
- EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+ EMIT_WARN_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
addi r4,r4,-4
@@ -476,15 +382,9 @@ EXPORT_SYMBOL(__bswapdi2)
_GLOBAL(start_secondary_resume)
/* Reset stack */
rlwinm r1, r1, 0, 0, 31 - THREAD_SHIFT
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
li r3,0
stw r3,0(r1) /* Zero the stack frame pointer */
bl start_secondary
b .
#endif /* CONFIG_SMP */
-
-/*
- * This routine is just here to keep GCC happy - sigh...
- */
-_GLOBAL(__main)
- blr
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 1864605eca29..1a8cdafd68e8 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -9,6 +9,8 @@
* PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
*/
+#include <linux/export.h>
+#include <linux/linkage.h>
#include <linux/sys.h>
#include <asm/unistd.h>
#include <asm/errno.h>
@@ -22,33 +24,10 @@
#include <asm/kexec.h>
#include <asm/ptrace.h>
#include <asm/mmu.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
.text
-_GLOBAL(call_do_softirq)
- mflr r0
- std r0,16(r1)
- stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
- mr r1,r3
- bl __do_softirq
- ld r1,0(r1)
- ld r0,16(r1)
- mtlr r0
- blr
-
-_GLOBAL(call_do_irq)
- mflr r0
- std r0,16(r1)
- stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
- mr r1,r4
- bl __do_irq
- ld r1,0(r1)
- ld r0,16(r1)
- mtlr r0
- blr
-
_GLOBAL(__bswapdi2)
EXPORT_SYMBOL(__bswapdi2)
srdi r8,r3,32
@@ -277,7 +256,7 @@ _GLOBAL(scom970_write)
* Physical (hardware) cpu id should be in r3.
*/
_GLOBAL(kexec_wait)
- bl 1f
+ bcl 20,31,$+4
1: mflr r5
addi r5,r5,kexec_flag-1b
@@ -308,7 +287,7 @@ kexec_flag:
#ifdef CONFIG_KEXEC_CORE
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
/*
* BOOK3E has no real MMU mode, so we have to setup the initial TLB
* for a core to identity map v:0 to p:0. This current implementation
@@ -365,7 +344,6 @@ _GLOBAL(kexec_smp_wait)
li r4,KEXEC_STATE_REAL_MODE
stb r4,PACAKEXECSTATE(r13)
- SYNC
b kexec_wait
@@ -376,8 +354,8 @@ _GLOBAL(kexec_smp_wait)
*
* don't overwrite r3 here, it is live for kexec_wait above.
*/
-real_mode: /* assume normal blr return */
-#ifdef CONFIG_PPC_BOOK3E
+SYM_FUNC_START_LOCAL(real_mode) /* assume normal blr return */
+#ifdef CONFIG_PPC_BOOK3E_64
/* Create an identity mapping. */
b kexec_create_tlb
#else
@@ -393,6 +371,7 @@ real_mode: /* assume normal blr return */
mtspr SPRN_SRR0,r11
rfid
#endif
+SYM_FUNC_END(real_mode)
/*
* kexec_sequence(newstack, start, image, control, clear_all(),
@@ -407,26 +386,12 @@ _GLOBAL(kexec_sequence)
std r0,16(r1)
/* switch stacks to newstack -- &kexec_stack.stack */
- stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
+ stdu r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r3)
mr r1,r3
li r0,0
std r0,16(r1)
-BEGIN_FTR_SECTION
- /*
- * This is the best time to turn AMR/IAMR off.
- * key 0 is used in radix for supervisor<->user
- * protection, but on hash key 0 is reserved
- * ideally we want to enter with a clean state.
- * NOTE, we rely on r0 being 0 from above.
- */
- mtspr SPRN_IAMR,r0
-BEGIN_FTR_SECTION_NESTED(42)
- mtspr SPRN_AMOR,r0
-END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
/* save regs for local vars on new stack.
* yes, we won't go back, but ...
*/
@@ -438,7 +403,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
std r26,-48(r1)
std r25,-56(r1)
- stdu r1,-STACK_FRAME_OVERHEAD-64(r1)
+ stdu r1,-STACK_FRAME_MIN_SIZE-64(r1)
/* save args into preserved regs */
mr r31,r3 /* newstack (both) */
@@ -450,7 +415,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */
/* disable interrupts, we are overwriting kernel data next */
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
wrteei 0
#else
mfmsr r3
@@ -467,7 +432,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1:
/* copy dest pages, flush whole dest image */
mr r3,r29
- bl kexec_copy_flush /* (image) */
+ bl CFUNC(kexec_copy_flush) /* (image) */
/* turn off mmu now if not done earlier */
cmpdi r26,0
@@ -491,7 +456,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
beq 1f
/* clear out hardware hash page table and tlb */
-#ifdef PPC64_ELF_ABI_v1
+#ifdef CONFIG_PPC64_ELF_ABI_V1
ld r12,0(r27) /* deref function descriptor */
#else
mr r12,r27
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index df649acb5631..f6d6ae0a1692 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -8,12 +8,14 @@
#include <linux/moduleloader.h>
#include <linux/err.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <linux/bug.h>
#include <asm/module.h>
#include <linux/uaccess.h>
#include <asm/firmware.h>
#include <linux/sort.h>
#include <asm/setup.h>
+#include <asm/sections.h>
static LIST_HEAD(module_bug_list);
@@ -62,13 +64,13 @@ int module_finalize(const Elf_Ehdr *hdr,
(void *)sect->sh_addr + sect->sh_size);
#endif /* CONFIG_PPC64 */
-#ifdef PPC64_ELF_ABI_v1
+#ifdef CONFIG_PPC64_ELF_ABI_V1
sect = find_section(hdr, sechdrs, ".opd");
if (sect != NULL) {
me->arch.start_opd = sect->sh_addr;
me->arch.end_opd = sect->sh_addr + sect->sh_size;
}
-#endif /* PPC64_ELF_ABI_v1 */
+#endif /* CONFIG_PPC64_ELF_ABI_V1 */
#ifdef CONFIG_PPC_BARRIER_NOSPEC
sect = find_section(hdr, sechdrs, "__spec_barrier_fixup");
@@ -86,3 +88,40 @@ int module_finalize(const Elf_Ehdr *hdr,
return 0;
}
+
+static __always_inline void *
+__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn)
+{
+ pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
+ gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0);
+
+ /*
+ * Don't do huge page allocations for modules yet until more testing
+ * is done. STRICT_MODULE_RWX may require extra work to support this
+ * too.
+ */
+ return __vmalloc_node_range(size, 1, start, end, gfp, prot,
+ VM_FLUSH_RESET_PERMS,
+ NUMA_NO_NODE, __builtin_return_address(0));
+}
+
+void *module_alloc(unsigned long size)
+{
+#ifdef MODULES_VADDR
+ unsigned long limit = (unsigned long)_etext - SZ_32M;
+ void *ptr = NULL;
+
+ BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
+
+ /* First try within 32M limit from _etext to avoid branch trampolines */
+ if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit)
+ ptr = __module_alloc(size, limit, MODULES_END, true);
+
+ if (!ptr)
+ ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false);
+
+ return ptr;
+#else
+ return __module_alloc(size, VMALLOC_START, VMALLOC_END, false);
+#endif
+}
diff --git a/arch/powerpc/kernel/module.lds b/arch/powerpc/kernel/module.lds
deleted file mode 100644
index cea5dc124be4..000000000000
--- a/arch/powerpc/kernel/module.lds
+++ /dev/null
@@ -1,8 +0,0 @@
-/* Force alignment of .toc section. */
-SECTIONS
-{
- .toc 0 : ALIGN(256)
- {
- *(.got .toc)
- }
-}
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index d7134c614c16..816a63fd71fb 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -18,6 +18,7 @@
#include <linux/bug.h>
#include <linux/sort.h>
#include <asm/setup.h>
+#include <asm/code-patching.h>
/* Count how many different relocations (different symbol, different
addend) */
@@ -67,21 +68,6 @@ static int relacmp(const void *_x, const void *_y)
return 0;
}
-static void relaswap(void *_x, void *_y, int size)
-{
- uint32_t *x, *y, tmp;
- int i;
-
- y = (uint32_t *)_x;
- x = (uint32_t *)_y;
-
- for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) {
- tmp = x[i];
- x[i] = y[i];
- y[i] = tmp;
- }
-}
-
/* Get the potential trampolines size required of the init and
non-init sections */
static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
@@ -113,12 +99,12 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
/* Sort the relocation information based on a symbol and
* addend key. This is a stable O(n*log n) complexity
- * alogrithm but it will reduce the complexity of
+ * algorithm but it will reduce the complexity of
* count_relocs() to linear complexity O(n)
*/
sort((void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size / sizeof(Elf32_Rela),
- sizeof(Elf32_Rela), relacmp, relaswap);
+ sizeof(Elf32_Rela), relacmp, NULL);
ret += count_relocs((void *)hdr
+ sechdrs[i].sh_offset,
@@ -160,10 +146,9 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
{
- if (entry->jump[0] != (PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val)))
+ if (entry->jump[0] != PPC_RAW_LIS(_R12, PPC_HA(val)))
return 0;
- if (entry->jump[1] != (PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) |
- PPC_LO(val)))
+ if (entry->jump[1] != PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)))
return 0;
return 1;
}
@@ -178,8 +163,7 @@ static uint32_t do_plt_call(void *location,
pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location);
/* Init, or core PLT? */
- if (location >= mod->core_layout.base
- && location < mod->core_layout.base + mod->core_layout.size)
+ if (within_module_core((unsigned long)location, mod))
entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
else
entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
@@ -190,21 +174,25 @@ static uint32_t do_plt_call(void *location,
entry++;
}
- /*
- * lis r12, sym@ha
- * addi r12, r12, sym@l
- * mtctr r12
- * bctr
- */
- entry->jump[0] = PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val);
- entry->jump[1] = PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | PPC_LO(val);
- entry->jump[2] = PPC_INST_MTCTR | __PPC_RS(R12);
- entry->jump[3] = PPC_INST_BCTR;
+ if (patch_instruction(&entry->jump[0], ppc_inst(PPC_RAW_LIS(_R12, PPC_HA(val)))))
+ return 0;
+ if (patch_instruction(&entry->jump[1], ppc_inst(PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)))))
+ return 0;
+ if (patch_instruction(&entry->jump[2], ppc_inst(PPC_RAW_MTCTR(_R12))))
+ return 0;
+ if (patch_instruction(&entry->jump[3], ppc_inst(PPC_RAW_BCTR())))
+ return 0;
pr_debug("Initialized plt for 0x%x at %p\n", val, entry);
return (uint32_t)entry;
}
+static int patch_location_16(uint32_t *loc, u16 value)
+{
+ loc = PTR_ALIGN_DOWN(loc, sizeof(u32));
+ return patch_instruction(loc, ppc_inst((*loc & 0xffff0000) | value));
+}
+
int apply_relocate_add(Elf32_Shdr *sechdrs,
const char *strtab,
unsigned int symindex,
@@ -238,44 +226,46 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
case R_PPC_ADDR16_LO:
/* Low half of the symbol */
- *(uint16_t *)location = value;
+ if (patch_location_16(location, PPC_LO(value)))
+ return -EFAULT;
break;
case R_PPC_ADDR16_HI:
/* Higher half of the symbol */
- *(uint16_t *)location = (value >> 16);
+ if (patch_location_16(location, PPC_HI(value)))
+ return -EFAULT;
break;
case R_PPC_ADDR16_HA:
- /* Sign-adjusted lower 16 bits: PPC ELF ABI says:
- (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF.
- This is the same, only sane.
- */
- *(uint16_t *)location = (value + 0x8000) >> 16;
+ if (patch_location_16(location, PPC_HA(value)))
+ return -EFAULT;
break;
case R_PPC_REL24:
if ((int)(value - (uint32_t)location) < -0x02000000
- || (int)(value - (uint32_t)location) >= 0x02000000)
+ || (int)(value - (uint32_t)location) >= 0x02000000) {
value = do_plt_call(location, value,
sechdrs, module);
+ if (!value)
+ return -EFAULT;
+ }
/* Only replace bits 2 through 26 */
pr_debug("REL24 value = %08X. location = %08X\n",
value, (uint32_t)location);
pr_debug("Location before: %08X.\n",
*(uint32_t *)location);
- *(uint32_t *)location
- = (*(uint32_t *)location & ~0x03fffffc)
- | ((value - (uint32_t)location)
- & 0x03fffffc);
+ value = (*(uint32_t *)location & ~PPC_LI_MASK) |
+ PPC_LI(value - (uint32_t)location);
+
+ if (patch_instruction(location, ppc_inst(value)))
+ return -EFAULT;
+
pr_debug("Location after: %08X.\n",
*(uint32_t *)location);
pr_debug("ie. jump to %08X+%08X = %08X\n",
- *(uint32_t *)location & 0x03fffffc,
- (uint32_t)location,
- (*(uint32_t *)location & 0x03fffffc)
- + (uint32_t)location);
+ *(uint32_t *)PPC_LI((uint32_t)location), (uint32_t)location,
+ (*(uint32_t *)PPC_LI((uint32_t)location)) + (uint32_t)location);
break;
case R_PPC_REL32:
@@ -295,14 +285,56 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
}
#ifdef CONFIG_DYNAMIC_FTRACE
+notrace int module_trampoline_target(struct module *mod, unsigned long addr,
+ unsigned long *target)
+{
+ ppc_inst_t jmp[4];
+
+ /* Find where the trampoline jumps to */
+ if (copy_inst_from_kernel_nofault(jmp, (void *)addr))
+ return -EFAULT;
+ if (__copy_inst_from_kernel_nofault(jmp + 1, (void *)addr + 4))
+ return -EFAULT;
+ if (__copy_inst_from_kernel_nofault(jmp + 2, (void *)addr + 8))
+ return -EFAULT;
+ if (__copy_inst_from_kernel_nofault(jmp + 3, (void *)addr + 12))
+ return -EFAULT;
+
+ /* verify that this is what we expect it to be */
+ if ((ppc_inst_val(jmp[0]) & 0xffff0000) != PPC_RAW_LIS(_R12, 0))
+ return -EINVAL;
+ if ((ppc_inst_val(jmp[1]) & 0xffff0000) != PPC_RAW_ADDI(_R12, _R12, 0))
+ return -EINVAL;
+ if (ppc_inst_val(jmp[2]) != PPC_RAW_MTCTR(_R12))
+ return -EINVAL;
+ if (ppc_inst_val(jmp[3]) != PPC_RAW_BCTR())
+ return -EINVAL;
+
+ addr = (ppc_inst_val(jmp[1]) & 0xffff) | ((ppc_inst_val(jmp[0]) & 0xffff) << 16);
+ if (addr & 0x8000)
+ addr -= 0x10000;
+
+ *target = addr;
+
+ return 0;
+}
+
int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
{
- module->arch.tramp = do_plt_call(module->core_layout.base,
+ module->arch.tramp = do_plt_call(module->mem[MOD_TEXT].base,
(unsigned long)ftrace_caller,
sechdrs, module);
if (!module->arch.tramp)
return -ENOENT;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ module->arch.tramp_regs = do_plt_call(module->mem[MOD_TEXT].base,
+ (unsigned long)ftrace_regs_caller,
+ sechdrs, module);
+ if (!module->arch.tramp_regs)
+ return -ENOENT;
+#endif
+
return 0;
}
#endif
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 007606a48fd9..7112adc597a8 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -14,12 +14,14 @@
#include <linux/ftrace.h>
#include <linux/bug.h>
#include <linux/uaccess.h>
+#include <linux/kernel.h>
#include <asm/module.h>
#include <asm/firmware.h>
#include <asm/code-patching.h>
#include <linux/sort.h>
#include <asm/setup.h>
#include <asm/sections.h>
+#include <asm/inst.h>
/* FIXME: We don't do .init separately. To do this, we'd need to have
a separate r2 value in the init and core section, and stub between
@@ -29,22 +31,25 @@
this, and makes other things simpler. Anton?
--RR. */
-#ifdef PPC64_ELF_ABI_v2
+bool module_elf_check_arch(Elf_Ehdr *hdr)
+{
+ unsigned long abi_level = hdr->e_flags & 0x3;
+
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+ return abi_level == 2;
+ else
+ return abi_level < 2;
+}
-/* An address is simply the address of the function. */
-typedef unsigned long func_desc_t;
+#ifdef CONFIG_PPC64_ELF_ABI_V2
static func_desc_t func_desc(unsigned long addr)
{
- return addr;
-}
-static unsigned long func_addr(unsigned long addr)
-{
- return addr;
-}
-static unsigned long stub_func_addr(func_desc_t func)
-{
- return func;
+ func_desc_t desc = {
+ .addr = addr,
+ };
+
+ return desc;
}
/* PowerPC64 specific values for the Elf64_Sym st_other field. */
@@ -62,20 +67,9 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym)
}
#else
-/* An address is address of the OPD entry, which contains address of fn. */
-typedef struct ppc64_opd_entry func_desc_t;
-
static func_desc_t func_desc(unsigned long addr)
{
- return *(struct ppc64_opd_entry *)addr;
-}
-static unsigned long func_addr(unsigned long addr)
-{
- return func_desc(addr).funcaddr;
-}
-static unsigned long stub_func_addr(func_desc_t func)
-{
- return func.funcaddr;
+ return *(struct func_desc *)addr;
}
static unsigned int local_entry_offset(const Elf64_Sym *sym)
{
@@ -92,97 +86,80 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr)
}
#endif
+static unsigned long func_addr(unsigned long addr)
+{
+ return func_desc(addr).addr;
+}
+
+static unsigned long stub_func_addr(func_desc_t func)
+{
+ return func.addr;
+}
+
#define STUB_MAGIC 0x73747562 /* stub */
/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
the kernel itself). But on PPC64, these need to be used for every
jump, actually, to reset r2 (TOC+0x8000). */
-struct ppc64_stub_entry
-{
- /* 28 byte jump instruction sequence (7 instructions). We only
- * need 6 instructions on ABIv2 but we always allocate 7 so
- * so we don't have to modify the trampoline load instruction. */
+struct ppc64_stub_entry {
+ /*
+ * 28 byte jump instruction sequence (7 instructions) that can
+ * hold ppc64_stub_insns or stub_insns. Must be 8-byte aligned
+ * with PCREL kernels that use prefix instructions in the stub.
+ */
u32 jump[7];
/* Used by ftrace to identify stubs */
u32 magic;
/* Data for the above code */
func_desc_t funcdata;
+} __aligned(8);
+
+struct ppc64_got_entry {
+ u64 addr;
};
/*
* PPC64 uses 24 bit jumps, but we need to jump into other modules or
* the kernel which may be further. So we jump to a stub.
*
- * For ELFv1 we need to use this to set up the new r2 value (aka TOC
- * pointer). For ELFv2 it's the callee's responsibility to set up the
- * new r2, but for both we need to save the old r2.
+ * Target address and TOC are loaded from function descriptor in the
+ * ppc64_stub_entry.
*
- * We could simply patch the new r2 value and function pointer into
- * the stub, but it's significantly shorter to put these values at the
- * end of the stub code, and patch the stub address (32-bits relative
- * to the TOC ptr, r2) into the stub.
+ * r12 is used to generate the target address, which is required for the
+ * ELFv2 global entry point calling convention.
*
- * addis r11,r2, <high>
- * addi r11,r11, <low>
- * std r2,R2_STACK_OFFSET(r1)
- * ld r12,32(r11)
- * ld r2,40(r11)
- * mtctr r12
- * bctr
+ * TOC handling:
+ * - PCREL does not have a TOC.
+ * - ELFv2 non-PCREL just has to save r2, the callee is responsible for
+ * setting its own TOC pointer at the global entry address.
+ * - ELFv1 must load the new TOC pointer from the function descriptor.
*/
static u32 ppc64_stub_insns[] = {
- PPC_INST_ADDIS | __PPC_RT(R11) | __PPC_RA(R2),
- PPC_INST_ADDI | __PPC_RT(R11) | __PPC_RA(R11),
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /* pld r12,addr */
+ PPC_PREFIX_8LS | __PPC_PRFX_R(1),
+ PPC_INST_PLD | ___PPC_RT(_R12),
+#else
+ PPC_RAW_ADDIS(_R11, _R2, 0),
+ PPC_RAW_ADDI(_R11, _R11, 0),
/* Save current r2 value in magic place on the stack. */
- PPC_INST_STD | __PPC_RS(R2) | __PPC_RA(R1) | R2_STACK_OFFSET,
- PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R11) | 32,
-#ifdef PPC64_ELF_ABI_v1
+ PPC_RAW_STD(_R2, _R1, R2_STACK_OFFSET),
+ PPC_RAW_LD(_R12, _R11, 32),
+#ifdef CONFIG_PPC64_ELF_ABI_V1
/* Set up new r2 from function descriptor */
- PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R11) | 40,
+ PPC_RAW_LD(_R2, _R11, 40),
#endif
- PPC_INST_MTCTR | __PPC_RS(R12),
- PPC_INST_BCTR,
-};
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-int module_trampoline_target(struct module *mod, unsigned long addr,
- unsigned long *target)
-{
- struct ppc64_stub_entry *stub;
- func_desc_t funcdata;
- u32 magic;
-
- if (!within_module_core(addr, mod)) {
- pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name);
- return -EFAULT;
- }
-
- stub = (struct ppc64_stub_entry *)addr;
-
- if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) {
- pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name);
- return -EFAULT;
- }
-
- if (magic != STUB_MAGIC) {
- pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name);
- return -EFAULT;
- }
-
- if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) {
- pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name);
- return -EFAULT;
- }
-
- *target = stub_func_addr(funcdata);
-
- return 0;
-}
#endif
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR(),
+};
-/* Count how many different 24-bit relocations (different symbol,
- different addend) */
-static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
+/*
+ * Count how many different r_type relocations (different symbol,
+ * different addend).
+ */
+static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num,
+ unsigned long r_type)
{
unsigned int i, r_info, r_addend, _count_relocs;
@@ -191,8 +168,8 @@ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
r_info = 0;
r_addend = 0;
for (i = 0; i < num; i++)
- /* Only count 24-bit relocs, others don't need stubs */
- if (ELF64_R_TYPE(rela[i].r_info) == R_PPC_REL24 &&
+ /* Only count r_type relocs, others don't need stubs */
+ if (ELF64_R_TYPE(rela[i].r_info) == r_type &&
(r_info != ELF64_R_SYM(rela[i].r_info) ||
r_addend != rela[i].r_addend)) {
_count_relocs++;
@@ -226,26 +203,11 @@ static int relacmp(const void *_x, const void *_y)
return 0;
}
-static void relaswap(void *_x, void *_y, int size)
-{
- uint64_t *x, *y, tmp;
- int i;
-
- y = (uint64_t *)_x;
- x = (uint64_t *)_y;
-
- for (i = 0; i < sizeof(Elf64_Rela) / sizeof(uint64_t); i++) {
- tmp = x[i];
- x[i] = y[i];
- y[i] = tmp;
- }
-}
-
/* Get size of potential trampolines required. */
static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
const Elf64_Shdr *sechdrs)
{
- /* One extra reloc so it's always 0-funcaddr terminated */
+ /* One extra reloc so it's always 0-addr terminated */
unsigned long relocs = 1;
unsigned i;
@@ -259,16 +221,23 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
/* Sort the relocation information based on a symbol and
* addend key. This is a stable O(n*log n) complexity
- * alogrithm but it will reduce the complexity of
+ * algorithm but it will reduce the complexity of
* count_relocs() to linear complexity O(n)
*/
sort((void *)sechdrs[i].sh_addr,
sechdrs[i].sh_size / sizeof(Elf64_Rela),
- sizeof(Elf64_Rela), relacmp, relaswap);
+ sizeof(Elf64_Rela), relacmp, NULL);
relocs += count_relocs((void *)sechdrs[i].sh_addr,
sechdrs[i].sh_size
- / sizeof(Elf64_Rela));
+ / sizeof(Elf64_Rela),
+ R_PPC_REL24);
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ relocs += count_relocs((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela),
+ R_PPC64_REL24_NOTOC);
+#endif
}
}
@@ -285,6 +254,95 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
return relocs * sizeof(struct ppc64_stub_entry);
}
+#ifdef CONFIG_PPC_KERNEL_PCREL
+static int count_pcpu_relocs(const Elf64_Shdr *sechdrs,
+ const Elf64_Rela *rela, unsigned int num,
+ unsigned int symindex, unsigned int pcpu)
+{
+ unsigned int i, r_info, r_addend, _count_relocs;
+
+ _count_relocs = 0;
+ r_info = 0;
+ r_addend = 0;
+
+ for (i = 0; i < num; i++) {
+ Elf64_Sym *sym;
+
+ /* This is the symbol it is referring to */
+ sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ + ELF64_R_SYM(rela[i].r_info);
+
+ if (sym->st_shndx == pcpu &&
+ (r_info != ELF64_R_SYM(rela[i].r_info) ||
+ r_addend != rela[i].r_addend)) {
+ _count_relocs++;
+ r_info = ELF64_R_SYM(rela[i].r_info);
+ r_addend = rela[i].r_addend;
+ }
+ }
+
+ return _count_relocs;
+}
+
+/* Get size of potential GOT required. */
+static unsigned long get_got_size(const Elf64_Ehdr *hdr,
+ const Elf64_Shdr *sechdrs,
+ struct module *me)
+{
+ /* One extra reloc so it's always 0-addr terminated */
+ unsigned long relocs = 1;
+ unsigned int i, symindex = 0;
+
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_SYMTAB) {
+ symindex = i;
+ break;
+ }
+ }
+ WARN_ON_ONCE(!symindex);
+
+ /* Every relocated section... */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_RELA) {
+ pr_debug("Found relocations in section %u\n", i);
+ pr_debug("Ptr: %p. Number: %llu\n", (void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela));
+
+ /*
+ * Sort the relocation information based on a symbol and
+ * addend key. This is a stable O(n*log n) complexity
+ * algorithm but it will reduce the complexity of
+ * count_relocs() to linear complexity O(n)
+ */
+ sort((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela),
+ sizeof(Elf64_Rela), relacmp, NULL);
+
+ relocs += count_relocs((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela),
+ R_PPC64_GOT_PCREL34);
+
+ /*
+ * Percpu data access typically gets linked with
+ * REL34 relocations, but the percpu section gets
+ * moved at load time and requires that to be
+ * converted to GOT linkage.
+ */
+ if (IS_ENABLED(CONFIG_SMP) && symindex)
+ relocs += count_pcpu_relocs(sechdrs,
+ (void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela),
+ symindex, me->arch.pcpu_section);
+ }
+ }
+
+ pr_debug("Looks like a total of %lu GOT entries, max\n", relocs);
+ return relocs * sizeof(struct ppc64_got_entry);
+}
+#else /* CONFIG_PPC_KERNEL_PCREL */
+
/* Still needed for ELFv2, for .TOC. */
static void dedotify_versions(struct modversion_info *vers,
unsigned long size)
@@ -334,6 +392,13 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
}
return NULL;
}
+#endif /* CONFIG_PPC_KERNEL_PCREL */
+
+bool module_init_section(const char *name)
+{
+ /* We don't handle .init for the moment: always return false. */
+ return false;
+}
int module_frob_arch_sections(Elf64_Ehdr *hdr,
Elf64_Shdr *sechdrs,
@@ -344,9 +409,17 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
/* Find .toc and .stubs sections, symtab and strtab */
for (i = 1; i < hdr->e_shnum; i++) {
- char *p;
if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
me->arch.stubs_section = i;
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".data..percpu") == 0)
+ me->arch.pcpu_section = i;
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".mygot") == 0) {
+ me->arch.got_section = i;
+ if (sechdrs[i].sh_addralign < 8)
+ sechdrs[i].sh_addralign = 8;
+ }
+#else
else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) {
me->arch.toc_section = i;
if (sechdrs[i].sh_addralign < 8)
@@ -356,15 +429,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size);
- /* We don't handle .init for the moment: rename to _init */
- while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
- p[0] = '_';
-
if (sechdrs[i].sh_type == SHT_SYMTAB)
dedotify((void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size / sizeof(Elf64_Sym),
(void *)hdr
+ sechdrs[sechdrs[i].sh_link].sh_offset);
+#endif
}
if (!me->arch.stubs_section) {
@@ -372,18 +442,135 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
return -ENOEXEC;
}
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ if (!me->arch.got_section) {
+ pr_err("%s: doesn't contain .mygot.\n", me->name);
+ return -ENOEXEC;
+ }
+
+ /* Override the got size */
+ sechdrs[me->arch.got_section].sh_size = get_got_size(hdr, sechdrs, me);
+#else
/* If we don't have a .toc, just use .stubs. We need to set r2
to some reasonable value in case the module calls out to
other functions via a stub, or if a function pointer escapes
the module by some means. */
if (!me->arch.toc_section)
me->arch.toc_section = me->arch.stubs_section;
+#endif
/* Override the stubs size */
sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
+
+ return 0;
+}
+
+#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)
+
+static u32 stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)),
+ PPC_RAW_NOP(), /* align the prefix insn */
+ /* paddi r12,r12,addr */
+ PPC_PREFIX_MLS | __PPC_PRFX_R(0),
+ PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR(),
+#else
+ PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),
+ PPC_RAW_ADDIS(_R12, _R12, 0),
+ PPC_RAW_ADDI(_R12, _R12, 0),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR(),
+#endif
+};
+
+/*
+ * For mprofile-kernel we use a special stub for ftrace_caller() because we
+ * can't rely on r2 containing this module's TOC when we enter the stub.
+ *
+ * That can happen if the function calling us didn't need to use the toc. In
+ * that case it won't have setup r2, and the r2 value will be either the
+ * kernel's toc, or possibly another modules toc.
+ *
+ * To deal with that this stub uses the kernel toc, which is always accessible
+ * via the paca (in r13). The target (ftrace_caller()) is responsible for
+ * saving and restoring the toc before returning.
+ */
+static inline int create_ftrace_stub(struct ppc64_stub_entry *entry,
+ unsigned long addr,
+ struct module *me)
+{
+ long reladdr;
+
+ if ((unsigned long)entry->jump % 8 != 0) {
+ pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name);
+ return 0;
+ }
+
+ BUILD_BUG_ON(sizeof(stub_insns) > sizeof(entry->jump));
+ memcpy(entry->jump, stub_insns, sizeof(stub_insns));
+
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ /* Stub uses address relative to kernel base (from the paca) */
+ reladdr = addr - local_paca->kernelbase;
+ if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) {
+ pr_err("%s: Address of %ps out of range of 34-bit relative address.\n",
+ me->name, (void *)addr);
+ return 0;
+ }
+
+ entry->jump[2] |= IMM_H18(reladdr);
+ entry->jump[3] |= IMM_L(reladdr);
+ } else {
+ /* Stub uses address relative to kernel toc (from the paca) */
+ reladdr = addr - kernel_toc_addr();
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("%s: Address of %ps out of range of kernel_toc.\n",
+ me->name, (void *)addr);
+ return 0;
+ }
+
+ entry->jump[1] |= PPC_HA(reladdr);
+ entry->jump[2] |= PPC_LO(reladdr);
+ }
+
+ /* Even though we don't use funcdata in the stub, it's needed elsewhere. */
+ entry->funcdata = func_desc(addr);
+ entry->magic = STUB_MAGIC;
+
+ return 1;
+}
+
+static bool is_mprofile_ftrace_call(const char *name)
+{
+ if (!strcmp("_mcount", name))
+ return true;
+#ifdef CONFIG_DYNAMIC_FTRACE
+ if (!strcmp("ftrace_caller", name))
+ return true;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ if (!strcmp("ftrace_regs_caller", name))
+ return true;
+#endif
+#endif
+
+ return false;
+}
+#else
+static inline int create_ftrace_stub(struct ppc64_stub_entry *entry,
+ unsigned long addr,
+ struct module *me)
+{
return 0;
}
+static bool is_mprofile_ftrace_call(const char *name)
+{
+ return false;
+}
+#endif
+
/*
* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the
* value maximum span in an instruction which uses a signed offset). Round down
@@ -392,32 +579,85 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
*/
static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)
{
+#ifndef CONFIG_PPC_KERNEL_PCREL
return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000;
+#else
+ return -1;
+#endif
}
/* Patch stub to reference function and correct r2 value. */
static inline int create_stub(const Elf64_Shdr *sechdrs,
struct ppc64_stub_entry *entry,
unsigned long addr,
- struct module *me)
+ struct module *me,
+ const char *name)
{
long reladdr;
+ func_desc_t desc;
+ int i;
- memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns));
+ if (is_mprofile_ftrace_call(name))
+ return create_ftrace_stub(entry, addr, me);
- /* Stub uses address relative to r2. */
- reladdr = (unsigned long)entry - my_r2(sechdrs, me);
- if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
- pr_err("%s: Address %p of stub out of range of %p.\n",
- me->name, (void *)reladdr, (void *)my_r2);
+ if ((unsigned long)entry->jump % 8 != 0) {
+ pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name);
return 0;
}
- pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
- entry->jump[0] |= PPC_HA(reladdr);
- entry->jump[1] |= PPC_LO(reladdr);
- entry->funcdata = func_desc(addr);
- entry->magic = STUB_MAGIC;
+ BUILD_BUG_ON(sizeof(ppc64_stub_insns) > sizeof(entry->jump));
+ for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) {
+ if (patch_instruction(&entry->jump[i],
+ ppc_inst(ppc64_stub_insns[i])))
+ return 0;
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ /* Stub uses address relative to itself! */
+ reladdr = 0 + offsetof(struct ppc64_stub_entry, funcdata);
+ BUILD_BUG_ON(reladdr != 32);
+ if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) {
+ pr_err("%s: Address of %p out of range of 34-bit relative address.\n",
+ me->name, (void *)reladdr);
+ return 0;
+ }
+ pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+ /* May not even need this if we're relative to 0 */
+ if (patch_instruction(&entry->jump[0],
+ ppc_inst_prefix(entry->jump[0] | IMM_H18(reladdr),
+ entry->jump[1] | IMM_L(reladdr))))
+ return 0;
+
+ } else {
+ /* Stub uses address relative to r2. */
+ reladdr = (unsigned long)entry - my_r2(sechdrs, me);
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("%s: Address %p of stub out of range of %p.\n",
+ me->name, (void *)reladdr, (void *)my_r2);
+ return 0;
+ }
+ pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+ if (patch_instruction(&entry->jump[0],
+ ppc_inst(entry->jump[0] | PPC_HA(reladdr))))
+ return 0;
+
+ if (patch_instruction(&entry->jump[1],
+ ppc_inst(entry->jump[1] | PPC_LO(reladdr))))
+ return 0;
+ }
+
+ // func_desc_t is 8 bytes if ABIv2, else 16 bytes
+ desc = func_desc(addr);
+ for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) {
+ if (patch_instruction(((u32 *)&entry->funcdata) + i,
+ ppc_inst(((u32 *)(&desc))[i])))
+ return 0;
+ }
+
+ if (patch_instruction(&entry->magic, ppc_inst(STUB_MAGIC)))
+ return 0;
return 1;
}
@@ -426,7 +666,8 @@ static inline int create_stub(const Elf64_Shdr *sechdrs,
stub to set up the TOC ptr (r2) for the function. */
static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
unsigned long addr,
- struct module *me)
+ struct module *me,
+ const char *name)
{
struct ppc64_stub_entry *stubs;
unsigned int i, num_stubs;
@@ -443,52 +684,40 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
return (unsigned long)&stubs[i];
}
- if (!create_stub(sechdrs, &stubs[i], addr, me))
+ if (!create_stub(sechdrs, &stubs[i], addr, me, name))
return 0;
return (unsigned long)&stubs[i];
}
-#ifdef CONFIG_MPROFILE_KERNEL
-static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction)
+#ifdef CONFIG_PPC_KERNEL_PCREL
+/* Create GOT to load the location described in this ptr */
+static unsigned long got_for_addr(const Elf64_Shdr *sechdrs,
+ unsigned long addr,
+ struct module *me,
+ const char *name)
{
- if (strcmp("_mcount", name))
- return false;
+ struct ppc64_got_entry *got;
+ unsigned int i, num_got;
- /*
- * Check if this is one of the -mprofile-kernel sequences.
- */
- if (instruction[-1] == PPC_INST_STD_LR &&
- instruction[-2] == PPC_INST_MFLR)
- return true;
+ if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ return addr;
- if (instruction[-1] == PPC_INST_MFLR)
- return true;
+ num_got = sechdrs[me->arch.got_section].sh_size / sizeof(*got);
- return false;
-}
+ /* Find this stub, or if that fails, the next avail. entry */
+ got = (void *)sechdrs[me->arch.got_section].sh_addr;
+ for (i = 0; got[i].addr; i++) {
+ if (WARN_ON(i >= num_got))
+ return 0;
-/*
- * In case of _mcount calls, do not save the current callee's TOC (in r2) into
- * the original caller's stack frame. If we did we would clobber the saved TOC
- * value of the original caller.
- */
-static void squash_toc_save_inst(const char *name, unsigned long addr)
-{
- struct ppc64_stub_entry *stub = (struct ppc64_stub_entry *)addr;
+ if (got[i].addr == addr)
+ return (unsigned long)&got[i];
+ }
- /* Only for calls to _mcount */
- if (strcmp("_mcount", name) != 0)
- return;
+ got[i].addr = addr;
- stub->jump[2] = PPC_INST_NOP;
-}
-#else
-static void squash_toc_save_inst(const char *name, unsigned long addr) { }
-
-static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction)
-{
- return false;
+ return (unsigned long)&got[i];
}
#endif
@@ -497,26 +726,39 @@ static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction)
static int restore_r2(const char *name, u32 *instruction, struct module *me)
{
u32 *prev_insn = instruction - 1;
+ u32 insn_val = *instruction;
- if (is_mprofile_mcount_callsite(name, prev_insn))
- return 1;
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ return 0;
+
+ if (is_mprofile_ftrace_call(name))
+ return 0;
/*
* Make sure the branch isn't a sibling call. Sibling calls aren't
* "link" branches and they don't return, so they don't need the r2
* restore afterwards.
*/
- if (!instr_is_relative_link_branch(*prev_insn))
- return 1;
+ if (!instr_is_relative_link_branch(ppc_inst(*prev_insn)))
+ return 0;
- if (*instruction != PPC_INST_NOP) {
- pr_err("%s: Expected nop after call, got %08x at %pS\n",
- me->name, *instruction, instruction);
+ /*
+ * For livepatch, the restore r2 instruction might have already been
+ * written previously, if the referenced symbol is in a previously
+ * unloaded module which is now being loaded again. In that case, skip
+ * the warning and the instruction write.
+ */
+ if (insn_val == PPC_INST_LD_TOC)
return 0;
+
+ if (insn_val != PPC_RAW_NOP()) {
+ pr_err("%s: Expected nop after call, got %08x at %pS\n",
+ me->name, insn_val, instruction);
+ return -ENOEXEC;
}
+
/* ld r2,R2_STACK_OFFSET(r1) */
- *instruction = PPC_INST_LD_TOC;
- return 1;
+ return patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC));
}
int apply_relocate_add(Elf64_Shdr *sechdrs,
@@ -534,6 +776,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
pr_debug("Applying ADD relocate section %u to %u\n", relsec,
sechdrs[relsec].sh_info);
+#ifndef CONFIG_PPC_KERNEL_PCREL
/* First time we're called, we can fix up .TOC. */
if (!me->arch.toc_fixed) {
sym = find_dot_toc(sechdrs, strtab, symindex);
@@ -543,7 +786,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
sym->st_value = my_r2(sechdrs, me);
me->arch.toc_fixed = true;
}
-
+#endif
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
/* This is where to make the change */
location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -571,6 +814,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
*(unsigned long *)location = value;
break;
+#ifndef CONFIG_PPC_KERNEL_PCREL
case R_PPC64_TOC:
*(unsigned long *)location = my_r2(sechdrs, me);
break;
@@ -630,20 +874,24 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
= (*((uint16_t *) location) & ~0xffff)
| (value & 0xffff);
break;
+#endif
case R_PPC_REL24:
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /* PCREL still generates REL24 for mcount */
+ case R_PPC64_REL24_NOTOC:
+#endif
/* FIXME: Handle weak symbols here --RR */
if (sym->st_shndx == SHN_UNDEF ||
sym->st_shndx == SHN_LIVEPATCH) {
/* External: go via stub */
- value = stub_for_addr(sechdrs, value, me);
+ value = stub_for_addr(sechdrs, value, me,
+ strtab + sym->st_name);
if (!value)
return -ENOENT;
- if (!restore_r2(strtab + sym->st_name,
- (u32 *)location + 1, me))
+ if (restore_r2(strtab + sym->st_name,
+ (u32 *)location + 1, me))
return -ENOEXEC;
-
- squash_toc_save_inst(strtab + sym->st_name, value);
} else
value += local_entry_offset(sym);
@@ -656,9 +904,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
}
/* Only replace bits 2 through 26 */
- *(uint32_t *)location
- = (*(uint32_t *)location & ~0x03fffffc)
- | (value & 0x03fffffc);
+ value = (*(uint32_t *)location & ~PPC_LI_MASK) | PPC_LI(value);
+
+ if (patch_instruction((u32 *)location, ppc_inst(value)))
+ return -EFAULT;
+
break;
case R_PPC64_REL64:
@@ -678,6 +928,47 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
*(u32 *)location = value;
break;
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ case R_PPC64_PCREL34: {
+ unsigned long absvalue = value;
+
+ /* Convert value to relative */
+ value -= (unsigned long)location;
+
+ if (value + 0x200000000 > 0x3ffffffff) {
+ if (sym->st_shndx != me->arch.pcpu_section) {
+ pr_err("%s: REL34 %li out of range!\n",
+ me->name, (long)value);
+ return -ENOEXEC;
+ }
+
+ /*
+ * per-cpu section is special cased because
+ * it is moved during loading, so has to be
+ * converted to use GOT.
+ */
+ value = got_for_addr(sechdrs, absvalue, me,
+ strtab + sym->st_name);
+ if (!value)
+ return -ENOENT;
+ value -= (unsigned long)location;
+
+ /* Turn pla into pld */
+ if (patch_instruction((u32 *)location,
+ ppc_inst_prefix((*(u32 *)location & ~0x02000000),
+ (*((u32 *)location + 1) & ~0xf8000000) | 0xe4000000)))
+ return -EFAULT;
+ }
+
+ if (patch_instruction((u32 *)location,
+ ppc_inst_prefix((*(u32 *)location & ~0x3ffff) | IMM_H18(value),
+ (*((u32 *)location + 1) & ~0xffff) | IMM_L(value))))
+ return -EFAULT;
+
+ break;
+ }
+
+#else
case R_PPC64_TOCSAVE:
/*
* Marker reloc indicates we don't have to save r2.
@@ -685,8 +976,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
* it.
*/
break;
+#endif
case R_PPC64_ENTRY:
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ break;
+
/*
* Optimize ELFv2 large code model entry point if
* the TOC is within 2GB range of current location.
@@ -699,21 +994,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
* ld r2, ...(r12)
* add r2, r2, r12
*/
- if ((((uint32_t *)location)[0] & ~0xfffc) !=
- (PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R12)))
+ if ((((uint32_t *)location)[0] & ~0xfffc) != PPC_RAW_LD(_R2, _R12, 0))
break;
- if (((uint32_t *)location)[1] !=
- (PPC_INST_ADD | __PPC_RT(R2) | __PPC_RA(R2) | __PPC_RB(R12)))
+ if (((uint32_t *)location)[1] != PPC_RAW_ADD(_R2, _R2, _R12))
break;
/*
* If found, replace it with:
* addis r2, r12, (.TOC.-func)@ha
* addi r2, r2, (.TOC.-func)@l
*/
- ((uint32_t *)location)[0] = PPC_INST_ADDIS | __PPC_RT(R2) |
- __PPC_RA(R12) | PPC_HA(value);
- ((uint32_t *)location)[1] = PPC_INST_ADDI | __PPC_RT(R2) |
- __PPC_RA(R2) | PPC_LO(value);
+ ((uint32_t *)location)[0] = PPC_RAW_ADDIS(_R2, _R12, PPC_HA(value));
+ ((uint32_t *)location)[1] = PPC_RAW_ADDI(_R2, _R2, PPC_LO(value));
break;
case R_PPC64_REL16_HA:
@@ -733,6 +1024,20 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
| (value & 0xffff);
break;
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ case R_PPC64_GOT_PCREL34:
+ value = got_for_addr(sechdrs, value, me,
+ strtab + sym->st_name);
+ if (!value)
+ return -ENOENT;
+ value -= (unsigned long)location;
+ ((uint32_t *)location)[0] = (((uint32_t *)location)[0] & ~0x3ffff) |
+ ((value >> 16) & 0x3ffff);
+ ((uint32_t *)location)[1] = (((uint32_t *)location)[1] & ~0xffff) |
+ (value & 0xffff);
+ break;
+#endif
+
default:
pr_err("%s: Unknown ADD relocation: %lu\n",
me->name,
@@ -745,89 +1050,53 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
}
#ifdef CONFIG_DYNAMIC_FTRACE
-
-#ifdef CONFIG_MPROFILE_KERNEL
-
-#define PACATOC offsetof(struct paca_struct, kernel_toc)
-
-/*
- * For mprofile-kernel we use a special stub for ftrace_caller() because we
- * can't rely on r2 containing this module's TOC when we enter the stub.
- *
- * That can happen if the function calling us didn't need to use the toc. In
- * that case it won't have setup r2, and the r2 value will be either the
- * kernel's toc, or possibly another modules toc.
- *
- * To deal with that this stub uses the kernel toc, which is always accessible
- * via the paca (in r13). The target (ftrace_caller()) is responsible for
- * saving and restoring the toc before returning.
- */
-static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs,
- struct module *me, unsigned long addr)
+int module_trampoline_target(struct module *mod, unsigned long addr,
+ unsigned long *target)
{
- struct ppc64_stub_entry *entry;
- unsigned int i, num_stubs;
- /*
- * ld r12,PACATOC(r13)
- * addis r12,r12,<high>
- * addi r12,r12,<low>
- * mtctr r12
- * bctr
- */
- static u32 stub_insns[] = {
- PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC,
- PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12),
- PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12),
- PPC_INST_MTCTR | __PPC_RS(R12),
- PPC_INST_BCTR,
- };
- long reladdr;
+ struct ppc64_stub_entry *stub;
+ func_desc_t funcdata;
+ u32 magic;
- num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*entry);
+ if (!within_module_core(addr, mod)) {
+ pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name);
+ return -EFAULT;
+ }
- /* Find the next available stub entry */
- entry = (void *)sechdrs[me->arch.stubs_section].sh_addr;
- for (i = 0; i < num_stubs && stub_func_addr(entry->funcdata); i++, entry++);
+ stub = (struct ppc64_stub_entry *)addr;
- if (i >= num_stubs) {
- pr_err("%s: Unable to find a free slot for ftrace stub.\n", me->name);
- return 0;
+ if (copy_from_kernel_nofault(&magic, &stub->magic,
+ sizeof(magic))) {
+ pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name);
+ return -EFAULT;
}
- memcpy(entry->jump, stub_insns, sizeof(stub_insns));
-
- /* Stub uses address relative to kernel toc (from the paca) */
- reladdr = addr - kernel_toc_addr();
- if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
- pr_err("%s: Address of %ps out of range of kernel_toc.\n",
- me->name, (void *)addr);
- return 0;
+ if (magic != STUB_MAGIC) {
+ pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name);
+ return -EFAULT;
}
- entry->jump[1] |= PPC_HA(reladdr);
- entry->jump[2] |= PPC_LO(reladdr);
+ if (copy_from_kernel_nofault(&funcdata, &stub->funcdata,
+ sizeof(funcdata))) {
+ pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name);
+ return -EFAULT;
+ }
- /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */
- entry->funcdata = func_desc(addr);
- entry->magic = STUB_MAGIC;
+ *target = stub_func_addr(funcdata);
- return (unsigned long)entry;
-}
-#else
-static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs,
- struct module *me, unsigned long addr)
-{
- return stub_for_addr(sechdrs, addr, me);
+ return 0;
}
-#endif
int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
{
- mod->arch.tramp = create_ftrace_stub(sechdrs, mod,
- (unsigned long)ftrace_caller);
+ mod->arch.tramp = stub_for_addr(sechdrs,
+ (unsigned long)ftrace_caller,
+ mod,
+ "ftrace_caller");
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
- mod->arch.tramp_regs = create_ftrace_stub(sechdrs, mod,
- (unsigned long)ftrace_regs_caller);
+ mod->arch.tramp_regs = stub_for_addr(sechdrs,
+ (unsigned long)ftrace_regs_caller,
+ mod,
+ "ftrace_regs_caller");
if (!mod->arch.tramp_regs)
return -ENOENT;
#endif
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index fb4f61096613..e385d3164648 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -19,9 +19,9 @@
#include <linux/pstore.h>
#include <linux/zlib.h>
#include <linux/uaccess.h>
+#include <linux/of.h>
#include <asm/nvram.h>
#include <asm/rtas.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#undef DEBUG_NVRAM
@@ -540,7 +540,7 @@ static struct pstore_info nvram_pstore_info = {
.write = nvram_pstore_write,
};
-static int nvram_pstore_init(void)
+static int __init nvram_pstore_init(void)
{
int rc = 0;
@@ -562,7 +562,7 @@ static int nvram_pstore_init(void)
return rc;
}
#else
-static int nvram_pstore_init(void)
+static int __init nvram_pstore_init(void)
{
return -1;
}
@@ -647,6 +647,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
{
struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
static unsigned int oops_count = 0;
+ static struct kmsg_dump_iter iter;
static bool panicking = false;
static DEFINE_SPINLOCK(lock);
unsigned long flags;
@@ -655,9 +656,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
int rc = -1;
switch (reason) {
- case KMSG_DUMP_RESTART:
- case KMSG_DUMP_HALT:
- case KMSG_DUMP_POWEROFF:
+ case KMSG_DUMP_SHUTDOWN:
/* These are almost always orderly shutdowns. */
return;
case KMSG_DUMP_OOPS:
@@ -683,13 +682,14 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
return;
if (big_oops_buf) {
- kmsg_dump_get_buffer(dumper, false,
+ kmsg_dump_rewind(&iter);
+ kmsg_dump_get_buffer(&iter, false,
big_oops_buf, big_oops_buf_sz, &text_len);
rc = zip_oops(text_len);
}
if (rc != 0) {
- kmsg_dump_rewind(dumper);
- kmsg_dump_get_buffer(dumper, false,
+ kmsg_dump_rewind(&iter);
+ kmsg_dump_get_buffer(&iter, false,
oops_data, oops_data_sz, &text_len);
err_type = ERR_TYPE_KERNEL_PANIC;
oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
@@ -755,7 +755,7 @@ static unsigned char __init nvram_checksum(struct nvram_header *p)
* Per the criteria passed via nvram_remove_partition(), should this
* partition be removed? 1=remove, 0=keep
*/
-static int nvram_can_remove_partition(struct nvram_partition *part,
+static int __init nvram_can_remove_partition(struct nvram_partition *part,
const char *name, int sig, const char *exceptions[])
{
if (part->header.signature != sig)
@@ -854,8 +854,8 @@ loff_t __init nvram_create_partition(const char *name, int sig,
BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
/* Convert sizes from bytes to blocks */
- req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
- min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
+ req_size = ALIGN(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
+ min_size = ALIGN(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
/* If no minimum size specified, make it the same as the
* requested size
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 427fc22f72b6..adc76fa58d1e 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -13,9 +13,7 @@
#include <linux/export.h>
#include <linux/mod_devicetable.h>
#include <linux/pci.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/atomic.h>
#include <asm/errno.h>
@@ -62,12 +60,8 @@ static int of_pci_phb_probe(struct platform_device *dev)
/* Init pci_dn data structures */
pci_devs_phb_init_dynamic(phb);
- /* Create EEH devices for the PHB */
- eeh_dev_phb_init_dynamic(phb);
-
- /* Register devices with EEH */
- if (dev->dev.of_node->child)
- eeh_add_device_tree_early(PCI_DN(dev->dev.of_node));
+ /* Create EEH PE for the PHB */
+ eeh_phb_pe_create(phb);
/* Scan the bus */
pcibios_scan_phb(phb);
@@ -80,15 +74,9 @@ static int of_pci_phb_probe(struct platform_device *dev)
*/
pcibios_claim_one_bus(phb->bus);
- /* Finish EEH setup */
- eeh_add_device_tree_late(phb->bus);
-
/* Add probed PCI devices to the device model */
pci_bus_add_devices(phb->bus);
- /* sysfs files should only be added after devices are added */
- eeh_add_sysfs_files(phb->bus);
-
return 0;
}
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 024f7aad1952..004fae2044a3 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -16,25 +16,18 @@
#include <asm/code-patching.h>
#include <asm/sstep.h>
#include <asm/ppc-opcode.h>
+#include <asm/inst.h>
-#define TMPL_CALL_HDLR_IDX \
- (optprobe_template_call_handler - optprobe_template_entry)
-#define TMPL_EMULATE_IDX \
- (optprobe_template_call_emulate - optprobe_template_entry)
-#define TMPL_RET_IDX \
- (optprobe_template_ret - optprobe_template_entry)
-#define TMPL_OP_IDX \
- (optprobe_template_op_address - optprobe_template_entry)
-#define TMPL_INSN_IDX \
- (optprobe_template_insn - optprobe_template_entry)
-#define TMPL_END_IDX \
- (optprobe_template_end - optprobe_template_entry)
-
-DEFINE_INSN_CACHE_OPS(ppc_optinsn);
+#define TMPL_CALL_HDLR_IDX (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_IDX (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_OP_IDX (optprobe_template_op_address - optprobe_template_entry)
+#define TMPL_INSN_IDX (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX (optprobe_template_end - optprobe_template_entry)
static bool insn_page_in_use;
-static void *__ppc_alloc_insn_page(void)
+void *alloc_optinsn_page(void)
{
if (insn_page_in_use)
return NULL;
@@ -42,20 +35,11 @@ static void *__ppc_alloc_insn_page(void)
return &optinsn_slot;
}
-static void __ppc_free_insn_page(void *page __maybe_unused)
+void free_optinsn_page(void *page)
{
insn_page_in_use = false;
}
-struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
- .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
- .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
- /* insn_size initialized later */
- .alloc = __ppc_alloc_insn_page,
- .free = __ppc_free_insn_page,
- .nr_garbage = 0,
-};
-
/*
* Check if we can optimize this probe. Returns NIP post-emulation if this can
* be optimized and 0 otherwise.
@@ -65,14 +49,15 @@ static unsigned long can_optimize(struct kprobe *p)
struct pt_regs regs;
struct instruction_op op;
unsigned long nip = 0;
+ unsigned long addr = (unsigned long)p->addr;
/*
* kprobe placed for kretprobe during boot time
* has a 'nop' instruction, which can be emulated.
* So further checks can be skipped.
*/
- if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
- return (unsigned long)p->addr + sizeof(kprobe_opcode_t);
+ if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline)
+ return addr + sizeof(kprobe_opcode_t);
/*
* We only support optimizing kernel addresses, but not
@@ -80,11 +65,11 @@ static unsigned long can_optimize(struct kprobe *p)
*
* FIXME: Optimize kprobes placed in module addresses.
*/
- if (!is_kernel_addr((unsigned long)p->addr))
+ if (!is_kernel_addr(addr))
return 0;
memset(&regs, 0, sizeof(struct pt_regs));
- regs.nip = (unsigned long)p->addr;
+ regs.nip = addr;
regs.trap = 0x0;
regs.msr = MSR_KERNEL;
@@ -99,8 +84,8 @@ static unsigned long can_optimize(struct kprobe *p)
* Ensure that the instruction is not a conditional branch,
* and that can be emulated.
*/
- if (!is_conditional_branch(*p->ainsn.insn) &&
- analyse_instr(&op, &regs, *p->ainsn.insn) == 1) {
+ if (!is_conditional_branch(ppc_inst_read(p->ainsn.insn)) &&
+ analyse_instr(&op, &regs, ppc_inst_read(p->ainsn.insn)) == 1) {
emulate_update_regs(&regs, &op);
nip = regs.nip;
}
@@ -121,88 +106,66 @@ static void optimized_callback(struct optimized_kprobe *op,
kprobes_inc_nmissed_count(&op->kp);
} else {
__this_cpu_write(current_kprobe, &op->kp);
- regs->nip = (unsigned long)op->kp.addr;
+ regs_set_return_ip(regs, (unsigned long)op->kp.addr);
get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
opt_pre_handler(&op->kp, regs);
__this_cpu_write(current_kprobe, NULL);
}
- preempt_enable_no_resched();
+ preempt_enable();
}
NOKPROBE_SYMBOL(optimized_callback);
void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
{
if (op->optinsn.insn) {
- free_ppc_optinsn_slot(op->optinsn.insn, 1);
+ free_optinsn_slot(op->optinsn.insn, 1);
op->optinsn.insn = NULL;
}
}
-/*
- * emulate_step() requires insn to be emulated as
- * second parameter. Load register 'r4' with the
- * instruction.
- */
-void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr)
+static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
{
- /* addis r4,0,(insn)@h */
- patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(4) |
- ((val >> 16) & 0xffff));
- addr++;
-
- /* ori r4,r4,(insn)@l */
- patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(4) |
- ___PPC_RS(4) | (val & 0xffff));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HI(val))));
+ patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val))));
}
/*
* Generate instructions to load provided immediate 64-bit value
- * to register 'r3' and patch these instructions at 'addr'.
+ * to register 'reg' and patch these instructions at 'addr'.
*/
-void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr)
+static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr)
{
- /* lis r3,(op)@highest */
- patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(3) |
- ((val >> 48) & 0xffff));
- addr++;
-
- /* ori r3,r3,(op)@higher */
- patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) |
- ___PPC_RS(3) | ((val >> 32) & 0xffff));
- addr++;
-
- /* rldicr r3,r3,32,31 */
- patch_instruction(addr, PPC_INST_RLDICR | ___PPC_RA(3) |
- ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31));
- addr++;
-
- /* oris r3,r3,(op)@h */
- patch_instruction(addr, PPC_INST_ORIS | ___PPC_RA(3) |
- ___PPC_RS(3) | ((val >> 16) & 0xffff));
- addr++;
-
- /* ori r3,r3,(op)@l */
- patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) |
- ___PPC_RS(3) | (val & 0xffff));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HIGHEST(val))));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_HIGHER(val))));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_SLDI(reg, reg, 32)));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_ORIS(reg, reg, PPC_HI(val))));
+ patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val))));
+}
+
+static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
+{
+ if (IS_ENABLED(CONFIG_PPC64))
+ patch_imm64_load_insns(val, reg, addr);
+ else
+ patch_imm32_load_insns(val, reg, addr);
}
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
{
- kprobe_opcode_t *buff, branch_op_callback, branch_emulate_step;
- kprobe_opcode_t *op_callback_addr, *emulate_step_addr;
+ ppc_inst_t branch_op_callback, branch_emulate_step, temp;
+ unsigned long op_callback_addr, emulate_step_addr;
+ kprobe_opcode_t *buff;
long b_offset;
unsigned long nip, size;
int rc, i;
- kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
-
nip = can_optimize(p);
if (!nip)
return -EILSEQ;
/* Allocate instruction slot for detour buffer */
- buff = get_ppc_optinsn_slot();
+ buff = get_optinsn_slot();
if (!buff)
return -ENOMEM;
@@ -220,8 +183,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
goto error;
/* Check if the return address is also within 32MB range */
- b_offset = (unsigned long)(buff + TMPL_RET_IDX) -
- (unsigned long)nip;
+ b_offset = (unsigned long)(buff + TMPL_RET_IDX) - nip;
if (!is_offset_in_branch_range(b_offset))
goto error;
@@ -230,7 +192,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int);
pr_devel("Copying template to %p, size %lu\n", buff, size);
for (i = 0; i < size; i++) {
- rc = patch_instruction(buff + i, *(optprobe_template_entry + i));
+ rc = patch_instruction(buff + i, ppc_inst(*(optprobe_template_entry + i)));
if (rc < 0)
goto error;
}
@@ -239,27 +201,25 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
* Fixup the template with instructions to:
* 1. load the address of the actual probepoint
*/
- patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX);
+ patch_imm_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);
/*
* 2. branch to optimized_callback() and emulate_step()
*/
- op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback");
- emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step");
+ op_callback_addr = ppc_kallsyms_lookup_name("optimized_callback");
+ emulate_step_addr = ppc_kallsyms_lookup_name("emulate_step");
if (!op_callback_addr || !emulate_step_addr) {
WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n");
goto error;
}
- branch_op_callback = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX,
- (unsigned long)op_callback_addr,
- BRANCH_SET_LINK);
+ rc = create_branch(&branch_op_callback, buff + TMPL_CALL_HDLR_IDX,
+ op_callback_addr, BRANCH_SET_LINK);
- branch_emulate_step = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX,
- (unsigned long)emulate_step_addr,
- BRANCH_SET_LINK);
+ rc |= create_branch(&branch_emulate_step, buff + TMPL_EMULATE_IDX,
+ emulate_step_addr, BRANCH_SET_LINK);
- if (!branch_op_callback || !branch_emulate_step)
+ if (rc)
goto error;
patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback);
@@ -268,22 +228,22 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
/*
* 3. load instruction to be emulated into relevant register, and
*/
- patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX);
+ temp = ppc_inst_read(p->ainsn.insn);
+ patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
/*
* 4. branch back from trampoline
*/
- patch_branch(buff + TMPL_RET_IDX, (unsigned long)nip, 0);
+ patch_branch(buff + TMPL_RET_IDX, nip, 0);
- flush_icache_range((unsigned long)buff,
- (unsigned long)(&buff[TMPL_END_IDX]));
+ flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX]));
op->optinsn.insn = buff;
return 0;
error:
- free_ppc_optinsn_slot(buff, 0);
+ free_optinsn_slot(buff, 0);
return -ERANGE;
}
@@ -305,6 +265,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)
void arch_optimize_kprobes(struct list_head *oplist)
{
+ ppc_inst_t instr;
struct optimized_kprobe *op;
struct optimized_kprobe *tmp;
@@ -313,11 +274,9 @@ void arch_optimize_kprobes(struct list_head *oplist)
* Backup instructions which will be replaced
* by jump address
*/
- memcpy(op->optinsn.copied_insn, op->kp.addr,
- RELATIVEJUMP_SIZE);
- patch_instruction(op->kp.addr,
- create_branch((unsigned int *)op->kp.addr,
- (unsigned long)op->optinsn.insn, 0));
+ memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE);
+ create_branch(&instr, op->kp.addr, (unsigned long)op->optinsn.insn, 0);
+ patch_instruction(op->kp.addr, instr);
list_del_init(&op->list);
}
}
@@ -327,8 +286,7 @@ void arch_unoptimize_kprobe(struct optimized_kprobe *op)
arch_arm_kprobe(&op->kp);
}
-void arch_unoptimize_kprobes(struct list_head *oplist,
- struct list_head *done_list)
+void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list)
{
struct optimized_kprobe *op;
struct optimized_kprobe *tmp;
@@ -339,9 +297,8 @@ void arch_unoptimize_kprobes(struct list_head *oplist,
}
}
-int arch_within_optimized_kprobe(struct optimized_kprobe *op,
- unsigned long addr)
+int arch_within_optimized_kprobe(struct optimized_kprobe *op, kprobe_opcode_t *addr)
{
- return ((unsigned long)op->kp.addr <= addr &&
- (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr);
+ return (op->kp.addr <= addr &&
+ op->kp.addr + (RELATIVEJUMP_SIZE / sizeof(kprobe_opcode_t)) > addr);
}
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index cf383520843f..35932f45fb4e 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -9,6 +9,16 @@
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
+#ifdef CONFIG_PPC64
+#define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base)
+#define REST_30GPRS(base) REST_GPRS(2, 31, base)
+#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop
+#else
+#define SAVE_30GPRS(base) stmw r2, GPR2(base)
+#define REST_30GPRS(base) lmw r2, GPR2(base)
+#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop
+#endif
+
#define OPT_SLOT_SIZE 65536
.balign 4
@@ -30,39 +40,41 @@ optinsn_slot:
.global optprobe_template_entry
optprobe_template_entry:
/* Create an in-memory pt_regs */
- stdu r1,-INT_FRAME_SIZE(r1)
+ PPC_STLU r1,-INT_FRAME_SIZE(r1)
SAVE_GPR(0,r1)
/* Save the previous SP into stack */
addi r0,r1,INT_FRAME_SIZE
- std r0,GPR1(r1)
- SAVE_10GPRS(2,r1)
- SAVE_10GPRS(12,r1)
- SAVE_10GPRS(22,r1)
+ PPC_STL r0,GPR1(r1)
+ SAVE_30GPRS(r1)
/* Save SPRS */
mfmsr r5
- std r5,_MSR(r1)
+ PPC_STL r5,_MSR(r1)
li r5,0x700
- std r5,_TRAP(r1)
+ PPC_STL r5,_TRAP(r1)
li r5,0
- std r5,ORIG_GPR3(r1)
- std r5,RESULT(r1)
+ PPC_STL r5,ORIG_GPR3(r1)
+ PPC_STL r5,RESULT(r1)
mfctr r5
- std r5,_CTR(r1)
+ PPC_STL r5,_CTR(r1)
mflr r5
- std r5,_LINK(r1)
+ PPC_STL r5,_LINK(r1)
mfspr r5,SPRN_XER
- std r5,_XER(r1)
+ PPC_STL r5,_XER(r1)
mfcr r5
- std r5,_CCR(r1)
+ PPC_STL r5,_CCR(r1)
+#ifdef CONFIG_PPC64
lbz r5,PACAIRQSOFTMASK(r13)
std r5,SOFTE(r1)
+#endif
/*
* We may get here from a module, so load the kernel TOC in r2.
* The original TOC gets restored when pt_regs is restored
* further below.
*/
- ld r2,PACATOC(r13)
+#ifdef CONFIG_PPC64
+ LOAD_PACA_TOC()
+#endif
.global optprobe_template_op_address
optprobe_template_op_address:
@@ -70,13 +82,10 @@ optprobe_template_op_address:
* Parameters to optimized_callback():
* 1. optimized_kprobe structure in r3
*/
- nop
- nop
- nop
- nop
- nop
+ TEMPLATE_FOR_IMM_LOAD_INSNS
+
/* 2. pt_regs pointer in r4 */
- addi r4,r1,STACK_FRAME_OVERHEAD
+ addi r4,r1,STACK_INT_FRAME_REGS
.global optprobe_template_call_handler
optprobe_template_call_handler:
@@ -87,13 +96,12 @@ optprobe_template_call_handler:
* Parameters for instruction emulation:
* 1. Pass SP in register r3.
*/
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
.global optprobe_template_insn
optprobe_template_insn:
/* 2, Pass instruction to be emulated in r4 */
- nop
- nop
+ TEMPLATE_FOR_IMM_LOAD_INSNS
.global optprobe_template_call_emulate
optprobe_template_call_emulate:
@@ -104,20 +112,18 @@ optprobe_template_call_emulate:
* All done.
* Now, restore the registers...
*/
- ld r5,_MSR(r1)
+ PPC_LL r5,_MSR(r1)
mtmsr r5
- ld r5,_CTR(r1)
+ PPC_LL r5,_CTR(r1)
mtctr r5
- ld r5,_LINK(r1)
+ PPC_LL r5,_LINK(r1)
mtlr r5
- ld r5,_XER(r1)
+ PPC_LL r5,_XER(r1)
mtxer r5
- ld r5,_CCR(r1)
+ PPC_LL r5,_CCR(r1)
mtcr r5
REST_GPR(0,r1)
- REST_10GPRS(2,r1)
- REST_10GPRS(12,r1)
- REST_10GPRS(22,r1)
+ REST_30GPRS(r1)
/* Restore the previous SP */
addi r1,r1,INT_FRAME_SIZE
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 949eceb254d8..7502066c3c53 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -8,11 +8,11 @@
#include <linux/memblock.h>
#include <linux/sched/task.h>
#include <linux/numa.h>
+#include <linux/pgtable.h>
#include <asm/lppaca.h>
#include <asm/paca.h>
#include <asm/sections.h>
-#include <asm/pgtable.h>
#include <asm/kexec.h>
#include <asm/svm.h>
#include <asm/ultravisor.h>
@@ -56,8 +56,8 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align,
#define LPPACA_SIZE 0x400
-static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align,
- unsigned long limit, int cpu)
+static void *__init alloc_shared_lppaca(unsigned long size, unsigned long limit,
+ int cpu)
{
size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE);
static unsigned long shared_lppaca_size;
@@ -67,6 +67,13 @@ static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align,
if (!shared_lppaca) {
memblock_set_bottom_up(true);
+ /*
+ * See Documentation/arch/powerpc/ultravisor.rst for more details.
+ *
+ * UV/HV data sharing is in PAGE_SIZE granularity. In order to
+ * minimize the number of pages shared, align the allocation to
+ * PAGE_SIZE.
+ */
shared_lppaca =
memblock_alloc_try_nid(shared_lppaca_total_size,
PAGE_SIZE, MEMBLOCK_LOW_LIMIT,
@@ -86,7 +93,7 @@ static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align,
* This is very early in boot, so no harm done if the kernel crashes at
* this point.
*/
- BUG_ON(shared_lppaca_size >= shared_lppaca_total_size);
+ BUG_ON(shared_lppaca_size > shared_lppaca_total_size);
return ptr;
}
@@ -121,7 +128,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
return NULL;
if (is_secure_guest())
- lp = alloc_shared_lppaca(LPPACA_SIZE, 0x400, limit, cpu);
+ lp = alloc_shared_lppaca(LPPACA_SIZE, limit, cpu);
else
lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu);
@@ -131,8 +138,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
}
#endif /* CONFIG_PPC_PSERIES */
-#ifdef CONFIG_PPC_BOOK3S_64
-
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* 3 persistent SLBs are allocated here. The buffer will be zero
* initially, hence will all be invaild until we actually write them.
@@ -161,8 +167,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
return s;
}
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
/* The Paca is an array with one entry per processor. Each contains an
* lppaca, which contains the information shared between the
@@ -181,12 +186,14 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
#ifdef CONFIG_PPC_PSERIES
new_paca->lppaca_ptr = NULL;
#endif
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
new_paca->kernel_pgd = swapper_pg_dir;
#endif
new_paca->lock_token = 0x8000;
new_paca->paca_index = cpu;
+#ifndef CONFIG_PPC_KERNEL_PCREL
new_paca->kernel_toc = kernel_toc_addr();
+#endif
new_paca->kernelbase = (unsigned long) _stext;
/* Only set MSR:IR/DR when MMU is initialized */
new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR);
@@ -194,11 +201,11 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->kexec_state = KEXEC_STATE_NONE;
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
new_paca->slb_shadow_ptr = NULL;
#endif
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
/* For now -- if we have threads this will be adjusted later */
new_paca->tcd_ptr = &new_paca->tcd;
#endif
@@ -210,15 +217,19 @@ void setup_paca(struct paca_struct *new_paca)
/* Setup r13 */
local_paca = new_paca;
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
/* On Book3E, initialize the TLB miss exception frames */
mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
#else
- /* In HV mode, we setup both HPACA and PACA to avoid problems
+ /*
+ * In HV mode, we setup both HPACA and PACA to avoid problems
* if we do a GET_PACA() before the feature fixups have been
- * applied
+ * applied.
+ *
+ * Normally you should test against CPU_FTR_HVMODE, but CPU features
+ * are not yet set up when we first reach here.
*/
- if (early_cpu_has_feature(CPU_FTR_HVMODE))
+ if (mfmsr() & MSR_HV)
mtspr(SPRN_SPRG_HPACA, local_paca);
#endif
mtspr(SPRN_SPRG_PACA, local_paca);
@@ -267,7 +278,7 @@ void __init allocate_paca(int cpu)
#ifdef CONFIG_PPC_PSERIES
paca->lppaca_ptr = new_lppaca(cpu, limit);
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
#endif
paca_struct_size += sizeof(struct paca_struct);
@@ -279,17 +290,17 @@ void __init free_unused_pacas(void)
new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
if (new_ptrs_size < paca_ptrs_size)
- memblock_free(__pa(paca_ptrs) + new_ptrs_size,
- paca_ptrs_size - new_ptrs_size);
+ memblock_phys_free(__pa(paca_ptrs) + new_ptrs_size,
+ paca_ptrs_size - new_ptrs_size);
paca_nr_cpu_ids = nr_cpu_ids;
paca_ptrs_size = new_ptrs_size;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (early_radix_enabled()) {
/* Ugly fixup, see new_slb_shadow() */
- memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
- sizeof(struct slb_shadow));
+ memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
+ sizeof(struct slb_shadow));
paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL;
}
#endif
@@ -298,24 +309,15 @@ void __init free_unused_pacas(void)
paca_ptrs_size + paca_struct_size, nr_cpu_ids);
}
+#ifdef CONFIG_PPC_64S_HASH_MMU
void copy_mm_to_paca(struct mm_struct *mm)
{
-#ifdef CONFIG_PPC_BOOK3S
mm_context_t *context = &mm->context;
- get_paca()->mm_ctx_id = context->id;
-#ifdef CONFIG_PPC_MM_SLICES
VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
- get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context);
memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
LOW_SLICE_ARRAY_SZ);
memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
TASK_SLICE_ARRAY_SZ(context));
-#else /* CONFIG_PPC_MM_SLICES */
- get_paca()->mm_ctx_user_psize = context->user_psize;
- get_paca()->mm_ctx_sllp = context->sllp;
-#endif
-#else /* !CONFIG_PPC_BOOK3S */
- return;
-#endif
}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 1c448cf25506..d95a48eff412 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -29,19 +29,21 @@
#include <linux/slab.h>
#include <linux/vgaarb.h>
#include <linux/numa.h>
+#include <linux/msi.h>
+#include <linux/irqdomain.h>
#include <asm/processor.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/byteorder.h>
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
#include <asm/eeh.h>
+#include <asm/setup.h>
#include "../../../drivers/pci/pci.h"
-/* hose_spinlock protects accesses to the the phb_bitmap. */
+/* hose_spinlock protects accesses to the phb_bitmap. */
static DEFINE_SPINLOCK(hose_spinlock);
LIST_HEAD(hose_list);
@@ -61,28 +63,40 @@ EXPORT_SYMBOL(isa_mem_base);
static const struct dma_map_ops *pci_dma_ops;
-void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
+void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops)
{
pci_dma_ops = dma_ops;
}
-/*
- * This function should run under locking protection, specifically
- * hose_spinlock.
- */
static int get_phb_number(struct device_node *dn)
{
int ret, phb_id = -1;
- u32 prop_32;
u64 prop;
/*
* Try fixed PHB numbering first, by checking archs and reading
- * the respective device-tree properties. Firstly, try powernv by
- * reading "ibm,opal-phbid", only present in OPAL environment.
+ * the respective device-tree properties. Firstly, try reading
+ * standard "linux,pci-domain", then try reading "ibm,opal-phbid"
+ * (only present in powernv OPAL environment), then try device-tree
+ * alias and as the last try to use lower bits of "reg" property.
*/
- ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
+ ret = of_get_pci_domain_nr(dn);
+ if (ret >= 0) {
+ prop = ret;
+ ret = 0;
+ }
+ if (ret)
+ ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
+
if (ret) {
+ ret = of_alias_get_id(dn, "pci");
+ if (ret >= 0) {
+ prop = ret;
+ ret = 0;
+ }
+ }
+ if (ret) {
+ u32 prop_32;
ret = of_property_read_u32_index(dn, "reg", 1, &prop_32);
prop = prop_32;
}
@@ -90,18 +104,20 @@ static int get_phb_number(struct device_node *dn)
if (!ret)
phb_id = (int)(prop & (MAX_PHBS - 1));
+ spin_lock(&hose_spinlock);
+
/* We need to be sure to not use the same PHB number twice. */
if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap))
- return phb_id;
+ goto out_unlock;
- /*
- * If not pseries nor powernv, or if fixed PHB numbering tried to add
- * the same PHB number twice, then fallback to dynamic PHB numbering.
- */
+ /* If everything fails then fallback to dynamic PHB numbering. */
phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS);
BUG_ON(phb_id >= MAX_PHBS);
set_bit(phb_id, phb_bitmap);
+out_unlock:
+ spin_unlock(&hose_spinlock);
+
return phb_id;
}
@@ -109,14 +125,17 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
{
struct pci_controller *phb;
- phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL);
+ phb = kzalloc(sizeof(struct pci_controller), GFP_KERNEL);
if (phb == NULL)
return NULL;
- spin_lock(&hose_spinlock);
+
phb->global_number = get_phb_number(dev);
+
+ spin_lock(&hose_spinlock);
list_add_tail(&phb->list_node, &hose_list);
spin_unlock(&hose_spinlock);
- phb->dn = dev;
+
+ phb->dn = of_node_get(dev);
phb->is_dynamic = slab_is_available();
#ifdef CONFIG_PPC64
if (dev) {
@@ -139,7 +158,7 @@ void pcibios_free_controller(struct pci_controller *phb)
/* Clear bit of phb_bitmap to allow reuse of this PHB number. */
if (phb->global_number < MAX_PHBS)
clear_bit(phb->global_number, phb_bitmap);
-
+ of_node_put(phb->dn);
list_del(&phb->list_node);
spin_unlock(&hose_spinlock);
@@ -261,12 +280,6 @@ int pcibios_sriov_disable(struct pci_dev *pdev)
#endif /* CONFIG_PCI_IOV */
-void pcibios_bus_add_device(struct pci_dev *pdev)
-{
- if (ppc_md.pcibios_bus_add_device)
- ppc_md.pcibios_bus_add_device(pdev);
-}
-
static resource_size_t pcibios_io_size(const struct pci_controller *hose)
{
#ifdef CONFIG_PPC64
@@ -359,6 +372,55 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr)
return NULL;
}
+struct pci_intx_virq {
+ int virq;
+ struct kref kref;
+ struct list_head list_node;
+};
+
+static LIST_HEAD(intx_list);
+static DEFINE_MUTEX(intx_mutex);
+
+static void ppc_pci_intx_release(struct kref *kref)
+{
+ struct pci_intx_virq *vi = container_of(kref, struct pci_intx_virq, kref);
+
+ list_del(&vi->list_node);
+ irq_dispose_mapping(vi->virq);
+ kfree(vi);
+}
+
+static int ppc_pci_unmap_irq_line(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct pci_dev *pdev = to_pci_dev(data);
+
+ if (action == BUS_NOTIFY_DEL_DEVICE) {
+ struct pci_intx_virq *vi;
+
+ mutex_lock(&intx_mutex);
+ list_for_each_entry(vi, &intx_list, list_node) {
+ if (vi->virq == pdev->irq) {
+ kref_put(&vi->kref, ppc_pci_intx_release);
+ break;
+ }
+ }
+ mutex_unlock(&intx_mutex);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_pci_unmap_irq_notifier = {
+ .notifier_call = ppc_pci_unmap_irq_line,
+};
+
+static int ppc_pci_register_irq_notifier(void)
+{
+ return bus_register_notifier(&pci_bus_type, &ppc_pci_unmap_irq_notifier);
+}
+arch_initcall(ppc_pci_register_irq_notifier);
+
/*
* Reads the interrupt pin to determine if interrupt is use by card.
* If the interrupt is used, then gets the interrupt line from the
@@ -367,6 +429,12 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr)
static int pci_read_irq_line(struct pci_dev *pci_dev)
{
int virq;
+ struct pci_intx_virq *vi, *vitmp;
+
+ /* Preallocate vi as rewind is complex if this fails after mapping */
+ vi = kzalloc(sizeof(struct pci_intx_virq), GFP_KERNEL);
+ if (!vi)
+ return -1;
pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
@@ -383,12 +451,12 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
* function.
*/
if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin))
- return -1;
+ goto error_exit;
if (pin == 0)
- return -1;
+ goto error_exit;
if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) ||
line == 0xff || line == 0) {
- return -1;
+ goto error_exit;
}
pr_debug(" No map ! Using line %d (pin %d) from PCI config\n",
line, pin);
@@ -400,14 +468,33 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
if (!virq) {
pr_debug(" Failed to map !\n");
- return -1;
+ goto error_exit;
}
pr_debug(" Mapped to linux irq %d\n", virq);
pci_dev->irq = virq;
+ mutex_lock(&intx_mutex);
+ list_for_each_entry(vitmp, &intx_list, list_node) {
+ if (vitmp->virq == virq) {
+ kref_get(&vitmp->kref);
+ kfree(vi);
+ vi = NULL;
+ break;
+ }
+ }
+ if (vi) {
+ vi->virq = virq;
+ kref_init(&vi->kref);
+ list_add_tail(&vi->list_node, &intx_list);
+ }
+ mutex_unlock(&intx_mutex);
+
return 0;
+error_exit:
+ kfree(vi);
+ return -1;
}
/*
@@ -434,8 +521,7 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma)
* PCI device, it tries to find the PCI device first and calls the
* above routine
*/
-pgprot_t pci_phys_mem_access_prot(struct file *file,
- unsigned long pfn,
+pgprot_t pci_phys_mem_access_prot(unsigned long pfn,
unsigned long size,
pgprot_t prot)
{
@@ -734,7 +820,7 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
" MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
range.cpu_addr, range.cpu_addr + range.size - 1,
range.pci_addr,
- (range.pci_space & 0x40000000) ?
+ (range.flags & IORESOURCE_PREFETCH) ?
"Prefetch" : "");
/* We support only 3 memory ranges */
@@ -793,6 +879,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
static void pcibios_fixup_resources(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct resource *res;
int i;
if (!hose) {
@@ -804,9 +891,9 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
if (dev->is_virtfn)
return;
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- struct resource *res = dev->resource + i;
+ pci_dev_for_each_resource(dev, res, i) {
struct pci_bus_region reg;
+
if (!res->flags)
continue;
@@ -964,7 +1051,7 @@ void pcibios_setup_bus_self(struct pci_bus *bus)
phb->controller_ops.dma_bus_setup(bus);
}
-static void pcibios_setup_device(struct pci_dev *dev)
+void pcibios_bus_add_device(struct pci_dev *dev)
{
struct pci_controller *phb;
/* Fixup NUMA node as it may not be setup yet by the generic
@@ -985,43 +1072,26 @@ static void pcibios_setup_device(struct pci_dev *dev)
pci_read_irq_line(dev);
if (ppc_md.pci_irq_fixup)
ppc_md.pci_irq_fixup(dev);
+
+ if (ppc_md.pcibios_bus_add_device)
+ ppc_md.pcibios_bus_add_device(dev);
}
-int pcibios_add_device(struct pci_dev *dev)
+int pcibios_device_add(struct pci_dev *dev)
{
- /*
- * We can only call pcibios_setup_device() after bus setup is complete,
- * since some of the platform specific DMA setup code depends on it.
- */
- if (dev->bus->is_added)
- pcibios_setup_device(dev);
+ struct irq_domain *d;
#ifdef CONFIG_PCI_IOV
if (ppc_md.pcibios_fixup_sriov)
ppc_md.pcibios_fixup_sriov(dev);
#endif /* CONFIG_PCI_IOV */
+ d = dev_get_msi_domain(&dev->bus->dev);
+ if (d)
+ dev_set_msi_domain(&dev->dev, d);
return 0;
}
-void pcibios_setup_bus_devices(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- pr_debug("PCI: Fixup bus devices %d (%s)\n",
- bus->number, bus->self ? pci_name(bus->self) : "PHB");
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- /* Cardbus can call us to add new devices to a bus, so ignore
- * those who are already fully discovered
- */
- if (pci_dev_is_added(dev))
- continue;
-
- pcibios_setup_device(dev);
- }
-}
-
void pcibios_set_master(struct pci_dev *dev)
{
/* No special bus mastering setup handling */
@@ -1035,21 +1105,11 @@ void pcibios_fixup_bus(struct pci_bus *bus)
*/
pci_read_bridge_bases(bus);
- /* Now fixup the bus bus */
+ /* Now fixup the bus */
pcibios_setup_bus_self(bus);
-
- /* Now fixup devices on that bus */
- pcibios_setup_bus_devices(bus);
}
EXPORT_SYMBOL(pcibios_fixup_bus);
-void pci_fixup_cardbus(struct pci_bus *bus)
-{
- /* Now fixup devices on that bus */
- pcibios_setup_bus_devices(bus);
-}
-
-
static int skip_isa_ioresource_align(struct pci_dev *dev)
{
if (pci_has_flag(PCI_CAN_SKIP_ISA_ALIGN) &&
@@ -1392,11 +1452,10 @@ void pcibios_claim_one_bus(struct pci_bus *bus)
struct pci_bus *child_bus;
list_for_each_entry(dev, &bus->devices, bus_list) {
+ struct resource *r;
int i;
- for (i = 0; i < PCI_NUM_RESOURCES; i++) {
- struct resource *r = &dev->resource[i];
-
+ pci_dev_for_each_resource(dev, r, i) {
if (r->parent || !r->start || !r->flags)
continue;
@@ -1437,14 +1496,8 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
pci_assign_unassigned_bus_resources(bus);
}
- /* Fixup EEH */
- eeh_add_device_tree_late(bus);
-
/* Add new devices to global lists. Register in proc, sysfs. */
pci_bus_add_devices(bus);
-
- /* sysfs files should only be added after devices are added */
- eeh_add_sysfs_files(bus);
}
EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus);
@@ -1651,21 +1704,32 @@ EXPORT_SYMBOL_GPL(pcibios_scan_phb);
static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
{
- int i, class = dev->class >> 8;
- /* When configured as agent, programing interface = 1 */
+ int class = dev->class >> 8;
+ /* When configured as agent, programming interface = 1 */
int prog_if = dev->class & 0xf;
+ struct resource *r;
if ((class == PCI_CLASS_PROCESSOR_POWERPC ||
class == PCI_CLASS_BRIDGE_OTHER) &&
(dev->hdr_type == PCI_HEADER_TYPE_NORMAL) &&
(prog_if == 0) &&
(dev->bus->parent == NULL)) {
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- dev->resource[i].start = 0;
- dev->resource[i].end = 0;
- dev->resource[i].flags = 0;
+ pci_dev_for_each_resource(dev, r) {
+ r->start = 0;
+ r->end = 0;
+ r->flags = 0;
}
}
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl);
+
+
+static int __init discover_phbs(void)
+{
+ if (ppc_md.discover_phbs)
+ ppc_md.discover_phbs();
+
+ return 0;
+}
+core_initcall(discover_phbs);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index fc62c4bc47b1..0fe251c6ac2c 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/export.h>
+#include <linux/of.h>
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
#include <asm/firmware.h>
@@ -57,8 +58,6 @@ void pcibios_release_device(struct pci_dev *dev)
struct pci_controller *phb = pci_bus_to_host(dev->bus);
struct pci_dn *pdn = pci_get_pdn(dev);
- eeh_remove_device(dev);
-
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
@@ -112,8 +111,6 @@ void pci_hp_add_devices(struct pci_bus *bus)
struct pci_controller *phb;
struct device_node *dn = pci_bus_to_OF_node(bus);
- eeh_add_device_tree_early(PCI_DN(dn));
-
phb = pci_bus_to_host(bus);
mode = PCI_PROBE_NORMAL;
@@ -134,7 +131,6 @@ void pci_hp_add_devices(struct pci_bus *bus)
*/
slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
- pcibios_setup_bus_devices(bus);
max = bus->busn_res.start;
/*
* Scan bridges that are already configured. We don't touch
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index b49e1060a3bf..ce0c8623e563 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -21,7 +21,6 @@
#include <asm/processor.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/sections.h>
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
@@ -37,18 +36,13 @@ int pcibios_assign_bus_offset = 1;
EXPORT_SYMBOL(isa_io_base);
EXPORT_SYMBOL(pci_dram_offset);
-void pcibios_make_OF_bus_map(void);
-
static void fixup_cpc710_pci64(struct pci_dev* dev);
-static u8* pci_to_OF_bus_map;
/* By default, we don't re-assign bus numbers. We do this only on
* some pmacs
*/
static int pci_assign_all_buses;
-static int pci_bus_count;
-
/* This will remain NULL for now, until isa-bridge.c is made common
* to both 32-bit and 64-bit.
*/
@@ -68,6 +62,11 @@ fixup_cpc710_pci64(struct pci_dev* dev)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64);
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
+
+static u8* pci_to_OF_bus_map;
+static int pci_bus_count;
+
/*
* Functions below are used on OpenFirmware machines.
*/
@@ -109,7 +108,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
}
}
-void
+static void __init
pcibios_make_OF_bus_map(void)
{
int i;
@@ -153,14 +152,18 @@ pcibios_make_OF_bus_map(void)
}
#endif
}
+#endif // CONFIG_PPC_PCI_OF_BUS_MAP
+#ifdef CONFIG_PPC_PMAC
/*
* Returns the PCI device matching a given OF node
*/
int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn)
{
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
struct pci_dev *dev = NULL;
+#endif
const __be32 *reg;
int size;
@@ -175,6 +178,9 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn)
*bus = (be32_to_cpup(&reg[0]) >> 16) & 0xff;
*devfn = (be32_to_cpup(&reg[0]) >> 8) & 0xff;
+#ifndef CONFIG_PPC_PCI_OF_BUS_MAP
+ return 0;
+#else
/* Ok, here we need some tweak. If we have already renumbered
* all busses, we can't rely on the OF bus number any more.
* the pci_to_OF_bus_map is not enough as several PCI busses
@@ -192,9 +198,12 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn)
}
return -ENODEV;
+#endif // CONFIG_PPC_PCI_OF_BUS_MAP
}
EXPORT_SYMBOL(pci_device_from_OF_node);
+#endif
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
/* We create the "pci-OF-bus-map" property now so it appears in the
* /proc device tree
*/
@@ -219,6 +228,7 @@ pci_create_OF_bus_map(void)
of_node_put(dn);
}
}
+#endif // CONFIG_PPC_PCI_OF_BUS_MAP
void pcibios_setup_phb_io_space(struct pci_controller *hose)
{
@@ -234,23 +244,41 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose)
static int __init pcibios_init(void)
{
struct pci_controller *hose, *tmp;
+#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
int next_busno = 0;
+#endif
printk(KERN_INFO "PCI: Probing PCI hardware\n");
+#ifdef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
+ /*
+ * Enable PCI domains in /proc when PCI bus numbers are not unique
+ * across all PCI domains to prevent conflicts. And keep PCI domain 0
+ * backward compatible in /proc for video cards.
+ */
+ pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
+#endif
+
if (pci_has_flag(PCI_REASSIGN_ALL_BUS))
pci_assign_all_buses = 1;
/* Scan all of the recorded PCI controllers. */
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
if (pci_assign_all_buses)
hose->first_busno = next_busno;
+#endif
hose->last_busno = 0xff;
pcibios_scan_phb(hose);
pci_bus_add_devices(hose->bus);
+#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
if (pci_assign_all_buses || next_busno <= hose->last_busno)
next_busno = hose->last_busno + pcibios_assign_bus_offset;
+#endif
}
+
+#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP)
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
pci_bus_count = next_busno;
/* OpenFirmware based machines need a map of OF bus
@@ -259,6 +287,8 @@ static int __init pcibios_init(void)
*/
if (pci_assign_all_buses)
pcibios_make_OF_bus_map();
+#endif
+#endif
/* Call common code to handle resource allocation */
pcibios_resource_survey();
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index f83d1f69b1dd..e27342ef128b 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -19,10 +19,10 @@
#include <linux/syscalls.h>
#include <linux/irq.h>
#include <linux/vmalloc.h>
+#include <linux/of.h>
#include <asm/processor.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/byteorder.h>
#include <asm/machdep.h>
@@ -73,7 +73,7 @@ static int __init pcibios_init(void)
return 0;
}
-subsys_initcall(pcibios_init);
+subsys_initcall_sync(pcibios_init);
int pcibios_unmap_io_space(struct pci_bus *bus)
{
@@ -100,7 +100,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
pci_name(bus->self));
#ifdef CONFIG_PPC_BOOK3S_64
- __flush_hash_table_range(&init_mm, res->start + _IO_BASE,
+ __flush_hash_table_range(res->start + _IO_BASE,
res->end + _IO_BASE + 1);
#endif
return 0;
@@ -109,29 +109,53 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
/* Get the host bridge */
hose = pci_bus_to_host(bus);
- /* Check if we have IOs allocated */
- if (hose->io_base_alloc == NULL)
- return 0;
-
pr_debug("IO unmapping for PHB %pOF\n", hose->dn);
pr_debug(" alloc=0x%p\n", hose->io_base_alloc);
- /* This is a PHB, we fully unmap the IO area */
- vunmap(hose->io_base_alloc);
-
+ iounmap(hose->io_base_alloc);
return 0;
}
EXPORT_SYMBOL_GPL(pcibios_unmap_io_space);
-static int pcibios_map_phb_io_space(struct pci_controller *hose)
+void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size)
{
struct vm_struct *area;
+ unsigned long addr;
+
+ WARN_ON_ONCE(paddr & ~PAGE_MASK);
+ WARN_ON_ONCE(size & ~PAGE_MASK);
+
+ /*
+ * Let's allocate some IO space for that guy. We don't pass VM_IOREMAP
+ * because we don't care about alignment tricks that the core does in
+ * that case. Maybe we should due to stupid card with incomplete
+ * address decoding but I'd rather not deal with those outside of the
+ * reserved 64K legacy region.
+ */
+ area = __get_vm_area_caller(size, VM_IOREMAP, PHB_IO_BASE, PHB_IO_END,
+ __builtin_return_address(0));
+ if (!area)
+ return NULL;
+
+ addr = (unsigned long)area->addr;
+ if (ioremap_page_range(addr, addr + size, paddr,
+ pgprot_noncached(PAGE_KERNEL))) {
+ vunmap_range(addr, addr + size);
+ return NULL;
+ }
+
+ return (void __iomem *)addr;
+}
+EXPORT_SYMBOL_GPL(ioremap_phb);
+
+static int pcibios_map_phb_io_space(struct pci_controller *hose)
+{
unsigned long phys_page;
unsigned long size_page;
unsigned long io_virt_offset;
- phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE);
- size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE);
+ phys_page = ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE);
+ size_page = ALIGN(hose->pci_io_size, PAGE_SIZE);
/* Make sure IO area address is clear */
hose->io_base_alloc = NULL;
@@ -146,12 +170,11 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
* with incomplete address decoding but I'd rather not deal with
* those outside of the reserved 64K legacy region.
*/
- area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END);
- if (area == NULL)
+ hose->io_base_alloc = ioremap_phb(phys_page, size_page);
+ if (!hose->io_base_alloc)
return -ENOMEM;
- hose->io_base_alloc = area->addr;
- hose->io_base_virt = (void __iomem *)(area->addr +
- hose->io_base_phys - phys_page);
+ hose->io_base_virt = hose->io_base_alloc +
+ hose->io_base_phys - phys_page;
pr_debug("IO mapping for PHB %pOF\n", hose->dn);
pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n",
@@ -159,11 +182,6 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
pr_debug(" size=0x%016llx (alloc=0x%016lx)\n",
hose->pci_io_size, size_page);
- /* Establish the mapping */
- if (__ioremap_at(phys_page, area->addr, size_page,
- pgprot_noncached(PAGE_KERNEL)) == NULL)
- return -ENOMEM;
-
/* Fixup hose IO resource */
io_virt_offset = pcibios_io_space_offset(hose);
hose->io_resource.start += io_virt_offset;
@@ -267,3 +285,14 @@ int pcibus_to_node(struct pci_bus *bus)
}
EXPORT_SYMBOL(pcibus_to_node);
#endif
+
+#ifdef CONFIG_PPC_PMAC
+int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn)
+{
+ if (!PCI_DN(np))
+ return -ENODEV;
+ *bus = PCI_DN(np)->busno;
+ *devfn = PCI_DN(np)->devfn;
+ return 0;
+}
+#endif
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 9524009ca1ae..38561d6a2079 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -12,9 +12,9 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/gfp.h>
+#include <linux/of.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
#include <asm/firmware.h>
@@ -124,9 +124,28 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
return NULL;
}
+#ifdef CONFIG_EEH
+static struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev;
+
+ /* Allocate EEH device */
+ edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+ if (!edev)
+ return NULL;
+
+ /* Associate EEH device with OF node */
+ pdn->edev = edev;
+ edev->pdn = pdn;
+ edev->bdfn = (pdn->busno << 8) | pdn->devfn;
+ edev->controller = pdn->phb;
+
+ return edev;
+}
+#endif /* CONFIG_EEH */
+
#ifdef CONFIG_PCI_IOV
-static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
- int vf_index,
+static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent,
int busno, int devfn)
{
struct pci_dn *pdn;
@@ -143,7 +162,6 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
pdn->parent = parent;
pdn->busno = busno;
pdn->devfn = devfn;
- pdn->vf_index = vf_index;
pdn->pe_number = IODA_INVALID_PE;
INIT_LIST_HEAD(&pdn->child_list);
INIT_LIST_HEAD(&pdn->list);
@@ -151,17 +169,15 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
return pdn;
}
-#endif
-struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
+struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev)
{
-#ifdef CONFIG_PCI_IOV
struct pci_dn *parent, *pdn;
int i;
/* Only support IOV for now */
- if (!pdev->is_physfn)
- return pci_get_pdn(pdev);
+ if (WARN_ON(!pdev->is_physfn))
+ return NULL;
/* Check if VFs have been populated */
pdn = pci_get_pdn(pdev);
@@ -176,7 +192,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
struct eeh_dev *edev __maybe_unused;
- pdn = add_one_dev_pci_data(parent, i,
+ pdn = add_one_sriov_vf_pdn(parent,
pci_iov_virtfn_bus(pdev, i),
pci_iov_virtfn_devfn(pdev, i));
if (!pdn) {
@@ -189,34 +205,23 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
/* Create the EEH device for the VF */
edev = eeh_dev_init(pdn);
BUG_ON(!edev);
+
+ /* FIXME: these should probably be populated by the EEH probe */
edev->physfn = pdev;
+ edev->vf_index = i;
#endif /* CONFIG_EEH */
}
-#endif /* CONFIG_PCI_IOV */
-
return pci_get_pdn(pdev);
}
-void remove_dev_pci_data(struct pci_dev *pdev)
+void remove_sriov_vf_pdns(struct pci_dev *pdev)
{
-#ifdef CONFIG_PCI_IOV
struct pci_dn *parent;
struct pci_dn *pdn, *tmp;
int i;
- /*
- * VF and VF PE are created/released dynamically, so we need to
- * bind/unbind them. Otherwise the VF and VF PE would be mismatched
- * when re-enabling SR-IOV.
- */
- if (pdev->is_virtfn) {
- pdn = pci_get_pdn(pdev);
- pdn->pe_number = IODA_INVALID_PE;
- return;
- }
-
/* Only support IOV PF for now */
- if (!pdev->is_physfn)
+ if (WARN_ON(!pdev->is_physfn))
return;
/* Check if VFs have been populated */
@@ -244,9 +249,22 @@ void remove_dev_pci_data(struct pci_dev *pdev)
continue;
#ifdef CONFIG_EEH
- /* Release EEH device for the VF */
+ /*
+ * Release EEH state for this VF. The PCI core
+ * has already torn down the pci_dev for this VF, but
+ * we're responsible to removing the eeh_dev since it
+ * has the same lifetime as the pci_dn that spawned it.
+ */
edev = pdn_to_eeh_dev(pdn);
if (edev) {
+ /*
+ * We allocate pci_dn's for the totalvfs count,
+ * but only the vfs that were activated
+ * have a configured PE.
+ */
+ if (edev->pe)
+ eeh_pe_tree_remove(edev);
+
pdn->edev = NULL;
kfree(edev);
}
@@ -258,8 +276,8 @@ void remove_dev_pci_data(struct pci_dev *pdev)
kfree(pdn);
}
}
-#endif /* CONFIG_PCI_IOV */
}
+#endif /* CONFIG_PCI_IOV */
struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
struct device_node *dn)
@@ -312,6 +330,7 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
INIT_LIST_HEAD(&pdn->list);
parent = of_get_parent(dn);
pdn->parent = parent ? PCI_DN(parent) : NULL;
+ of_node_put(parent);
if (pdn->parent)
list_add_tail(&pdn->list, &pdn->parent->child_list);
@@ -425,46 +444,6 @@ void *pci_traverse_device_nodes(struct device_node *start,
}
EXPORT_SYMBOL_GPL(pci_traverse_device_nodes);
-static struct pci_dn *pci_dn_next_one(struct pci_dn *root,
- struct pci_dn *pdn)
-{
- struct list_head *next = pdn->child_list.next;
-
- if (next != &pdn->child_list)
- return list_entry(next, struct pci_dn, list);
-
- while (1) {
- if (pdn == root)
- return NULL;
-
- next = pdn->list.next;
- if (next != &pdn->parent->child_list)
- break;
-
- pdn = pdn->parent;
- }
-
- return list_entry(next, struct pci_dn, list);
-}
-
-void *traverse_pci_dn(struct pci_dn *root,
- void *(*fn)(struct pci_dn *, void *),
- void *data)
-{
- struct pci_dn *pdn = root;
- void *ret;
-
- /* Only scan the child nodes */
- for (pdn = pci_dn_next_one(root, pdn); pdn;
- pdn = pci_dn_next_one(root, pdn)) {
- ret = fn(pdn, data);
- if (ret)
- return ret;
- }
-
- return NULL;
-}
-
static void *add_pdn(struct device_node *dn, void *data)
{
struct pci_controller *hose = data;
@@ -503,28 +482,6 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb)
pci_traverse_device_nodes(dn, add_pdn, phb);
}
-/**
- * pci_devs_phb_init - Initialize phbs and pci devs under them.
- *
- * This routine walks over all phb's (pci-host bridges) on the
- * system, and sets up assorted pci-related structures
- * (including pci info in the device node structs) for each
- * pci device found underneath. This routine runs once,
- * early in the boot sequence.
- */
-static int __init pci_devs_phb_init(void)
-{
- struct pci_controller *phb, *tmp;
-
- /* This must be done first so the device nodes have valid pci info! */
- list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
- pci_devs_phb_init_dynamic(phb);
-
- return 0;
-}
-
-core_initcall(pci_devs_phb_init);
-
static void pci_dev_pdn_setup(struct pci_dev *pdev)
{
struct pci_dn *pdn;
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index f91d7e94872e..756043dd06e9 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -13,8 +13,8 @@
#include <linux/pci.h>
#include <linux/export.h>
+#include <linux/of.h>
#include <asm/pci-bridge.h>
-#include <asm/prom.h>
/**
* get_int_prop - Decode a u32 from a device tree property
@@ -244,7 +244,7 @@ EXPORT_SYMBOL(of_create_pci_dev);
* @dev: pci_dev structure for the bridge
*
* of_scan_bus() calls this routine for each PCI bridge that it finds, and
- * this routine in turn call of_scan_bus() recusively to scan for more child
+ * this routine in turn call of_scan_bus() recursively to scan for more child
* devices.
*/
void of_scan_pci_bridge(struct pci_dev *dev)
@@ -414,7 +414,6 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
*/
if (!rescan_existing)
pcibios_setup_bus_self(bus);
- pcibios_setup_bus_devices(bus);
/* Now scan child busses */
for_each_pci_bridge(dev, bus)
diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c
index 15414c8a2837..9fabb4d9235e 100644
--- a/arch/powerpc/kernel/pmc.c
+++ b/arch/powerpc/kernel/pmc.c
@@ -74,7 +74,7 @@ void release_pmc_hardware(void)
}
EXPORT_SYMBOL_GPL(release_pmc_hardware);
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
void power4_enable_pmcs(void)
{
unsigned long hid0;
diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h
deleted file mode 100644
index 2346f8c7ff2e..000000000000
--- a/arch/powerpc/kernel/ppc32.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _PPC64_PPC32_H
-#define _PPC64_PPC32_H
-
-#include <linux/compat.h>
-#include <asm/siginfo.h>
-#include <asm/signal.h>
-
-/*
- * Data types and macros for providing 32b PowerPC support.
- */
-
-/* These are here to support 32-bit syscalls on a 64-bit kernel. */
-
-struct pt_regs32 {
- unsigned int gpr[32];
- unsigned int nip;
- unsigned int msr;
- unsigned int orig_gpr3; /* Used for restarting system calls */
- unsigned int ctr;
- unsigned int link;
- unsigned int xer;
- unsigned int ccr;
- unsigned int mq; /* 601 only (not used at present) */
- unsigned int trap; /* Reason for being here */
- unsigned int dar; /* Fault registers */
- unsigned int dsisr;
- unsigned int result; /* Result of a system call */
-};
-
-struct sigcontext32 {
- unsigned int _unused[4];
- int signal;
- compat_uptr_t handler;
- unsigned int oldmask;
- compat_uptr_t regs; /* 4 byte pointer to the pt_regs32 structure. */
-};
-
-struct mcontext32 {
- elf_gregset_t32 mc_gregs;
- elf_fpregset_t mc_fregs;
- unsigned int mc_pad[2];
- elf_vrregset_t32 mc_vregs __attribute__((__aligned__(16)));
- elf_vsrreghalf_t32 mc_vsregs __attribute__((__aligned__(16)));
-};
-
-struct ucontext32 {
- unsigned int uc_flags;
- unsigned int uc_link;
- compat_stack_t uc_stack;
- int uc_pad[7];
- compat_uptr_t uc_regs; /* points to uc_mcontext field */
- compat_sigset_t uc_sigmask; /* mask last for extensibility */
- /* glibc has 1024-bit signal masks, ours are 64-bit */
- int uc_maskext[30];
- int uc_pad2[3];
- struct mcontext32 uc_mcontext;
-};
-
-#endif /* _PPC64_PPC32_H */
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
index f3bd0bbf2ae8..a9b9c32d0c1f 100644
--- a/arch/powerpc/kernel/ppc_save_regs.S
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -21,56 +21,33 @@
* different ABIs, though).
*/
_GLOBAL(ppc_save_regs)
- PPC_STL r0,0*SZL(r3)
+ /* This allows stack frame accessor macros and offsets to be used */
+ subi r3,r3,STACK_INT_FRAME_REGS
+ PPC_STL r0,GPR0(r3)
#ifdef CONFIG_PPC32
- stmw r2, 2*SZL(r3)
+ stmw r2,GPR2(r3)
#else
- PPC_STL r2,2*SZL(r3)
- PPC_STL r3,3*SZL(r3)
- PPC_STL r4,4*SZL(r3)
- PPC_STL r5,5*SZL(r3)
- PPC_STL r6,6*SZL(r3)
- PPC_STL r7,7*SZL(r3)
- PPC_STL r8,8*SZL(r3)
- PPC_STL r9,9*SZL(r3)
- PPC_STL r10,10*SZL(r3)
- PPC_STL r11,11*SZL(r3)
- PPC_STL r12,12*SZL(r3)
- PPC_STL r13,13*SZL(r3)
- PPC_STL r14,14*SZL(r3)
- PPC_STL r15,15*SZL(r3)
- PPC_STL r16,16*SZL(r3)
- PPC_STL r17,17*SZL(r3)
- PPC_STL r18,18*SZL(r3)
- PPC_STL r19,19*SZL(r3)
- PPC_STL r20,20*SZL(r3)
- PPC_STL r21,21*SZL(r3)
- PPC_STL r22,22*SZL(r3)
- PPC_STL r23,23*SZL(r3)
- PPC_STL r24,24*SZL(r3)
- PPC_STL r25,25*SZL(r3)
- PPC_STL r26,26*SZL(r3)
- PPC_STL r27,27*SZL(r3)
- PPC_STL r28,28*SZL(r3)
- PPC_STL r29,29*SZL(r3)
- PPC_STL r30,30*SZL(r3)
- PPC_STL r31,31*SZL(r3)
+ SAVE_GPRS(2, 31, r3)
+ lbz r0,PACAIRQSOFTMASK(r13)
+ PPC_STL r0,SOFTE(r3)
#endif
- /* go up one stack frame for SP */
- PPC_LL r4,0(r1)
- PPC_STL r4,1*SZL(r3)
+ /* store current SP */
+ PPC_STL r1,GPR1(r3)
/* get caller's LR */
+ PPC_LL r4,0(r1)
PPC_LL r0,LRSAVE(r4)
- PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
- PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_LINK(r3)
+ mflr r0
+ PPC_STL r0,_NIP(r3)
mfmsr r0
- PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_MSR(r3)
mfctr r0
- PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_CTR(r3)
mfxer r0
- PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_XER(r3)
mfcr r0
- PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_CCR(r3)
li r0,0
- PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_TRAP(r3)
+ PPC_STL r0,ORIG_GPR3(r3)
blr
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index be3758d54e59..b109cd7b5d01 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -7,12 +7,12 @@
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
+#include <linux/of.h>
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
#include <asm/rtas.h>
#include <linux/uaccess.h>
-#include <asm/prom.h>
#ifdef CONFIG_PPC64
@@ -25,7 +25,7 @@ static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes
loff_t *ppos)
{
return simple_read_from_buffer(buf, nbytes, ppos,
- PDE_DATA(file_inode(file)), PAGE_SIZE);
+ pde_data(file_inode(file)), PAGE_SIZE);
}
static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
@@ -34,15 +34,15 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
return -EINVAL;
remap_pfn_range(vma, vma->vm_start,
- __pa(PDE_DATA(file_inode(file))) >> PAGE_SHIFT,
+ __pa(pde_data(file_inode(file))) >> PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot);
return 0;
}
-static const struct file_operations page_map_fops = {
- .llseek = page_map_seek,
- .read = page_map_read,
- .mmap = page_map_mmap
+static const struct proc_ops page_map_proc_ops = {
+ .proc_lseek = page_map_seek,
+ .proc_read = page_map_read,
+ .proc_mmap = page_map_mmap,
};
@@ -51,7 +51,7 @@ static int __init proc_ppc64_init(void)
struct proc_dir_entry *pde;
pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL,
- &page_map_fops, vdso_data);
+ &page_map_proc_ops, vdso_data);
if (!pde)
return 1;
proc_set_size(pde, PAGE_SIZE);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4df94b6e2f32..9452a54d356c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -34,18 +34,15 @@
#include <linux/ftrace.h>
#include <linux/kernel_stat.h>
#include <linux/personality.h>
-#include <linux/random.h>
#include <linux/hw_breakpoint.h>
#include <linux/uaccess.h>
-#include <linux/elf-randomize.h>
#include <linux/pkeys.h>
#include <linux/seq_buf.h>
-#include <asm/pgtable.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/mmu.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/time.h>
#include <asm/runlatch.h>
@@ -96,7 +93,8 @@ static void check_if_tm_restore_required(struct task_struct *tsk)
if (tsk == current && tsk->thread.regs &&
MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
!test_thread_flag(TIF_RESTORE_TM)) {
- tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr;
+ regs_set_return_msr(&tsk->thread.ckpt_regs,
+ tsk->thread.regs->msr);
set_thread_flag(TIF_RESTORE_TM);
}
}
@@ -125,13 +123,11 @@ unsigned long notrace msr_check_and_set(unsigned long bits)
newmsr = oldmsr | bits;
-#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
newmsr |= MSR_VSX;
-#endif
if (oldmsr != newmsr)
- mtmsr_isync(newmsr);
+ newmsr = mtmsr_isync_irqsafe(newmsr);
return newmsr;
}
@@ -145,13 +141,11 @@ void notrace __msr_check_and_clear(unsigned long bits)
newmsr = oldmsr & ~bits;
-#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
newmsr &= ~MSR_VSX;
-#endif
if (oldmsr != newmsr)
- mtmsr_isync(newmsr);
+ mtmsr_isync_irqsafe(newmsr);
}
EXPORT_SYMBOL(__msr_check_and_clear);
@@ -163,11 +157,9 @@ static void __giveup_fpu(struct task_struct *tsk)
save_fpu(tsk);
msr = tsk->thread.regs->msr;
msr &= ~(MSR_FP|MSR_FE0|MSR_FE1);
-#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX))
msr &= ~MSR_VSX;
-#endif
- tsk->thread.regs->msr = msr;
+ regs_set_return_msr(tsk->thread.regs, msr);
}
void giveup_fpu(struct task_struct *tsk)
@@ -236,23 +228,11 @@ void enable_kernel_fp(void)
}
}
EXPORT_SYMBOL(enable_kernel_fp);
-
-static int restore_fp(struct task_struct *tsk)
-{
- if (tsk->thread.load_fp) {
- load_fp_state(&current->thread.fp_state);
- current->thread.load_fp++;
- return 1;
- }
- return 0;
-}
#else
-static int restore_fp(struct task_struct *tsk) { return 0; }
+static inline void __giveup_fpu(struct task_struct *tsk) { }
#endif /* CONFIG_PPC_FPU */
#ifdef CONFIG_ALTIVEC
-#define loadvec(thr) ((thr).load_vec)
-
static void __giveup_altivec(struct task_struct *tsk)
{
unsigned long msr;
@@ -260,11 +240,9 @@ static void __giveup_altivec(struct task_struct *tsk)
save_altivec(tsk);
msr = tsk->thread.regs->msr;
msr &= ~MSR_VEC;
-#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX))
msr &= ~MSR_VSX;
-#endif
- tsk->thread.regs->msr = msr;
+ regs_set_return_msr(tsk->thread.regs, msr);
}
void giveup_altivec(struct task_struct *tsk)
@@ -318,21 +296,6 @@ void flush_altivec_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
-
-static int restore_altivec(struct task_struct *tsk)
-{
- if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
- load_vr_state(&tsk->thread.vr_state);
- tsk->thread.used_vr = 1;
- tsk->thread.load_vec++;
-
- return 1;
- }
- return 0;
-}
-#else
-#define loadvec(thr) 0
-static inline int restore_altivec(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
@@ -341,7 +304,7 @@ static void __giveup_vsx(struct task_struct *tsk)
unsigned long msr = tsk->thread.regs->msr;
/*
- * We should never be ssetting MSR_VSX without also setting
+ * We should never be setting MSR_VSX without also setting
* MSR_FP and MSR_VEC
*/
WARN_ON((msr & MSR_VSX) && !((msr & MSR_FP) && (msr & MSR_VEC)));
@@ -400,18 +363,6 @@ void flush_vsx_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
-
-static int restore_vsx(struct task_struct *tsk)
-{
- if (cpu_has_feature(CPU_FTR_VSX)) {
- tsk->thread.used_vsr = 1;
- return 1;
- }
-
- return 0;
-}
-#else
-static inline int restore_vsx(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
@@ -456,21 +407,14 @@ static unsigned long msr_all_available;
static int __init init_msr_all_available(void)
{
-#ifdef CONFIG_PPC_FPU
- msr_all_available |= MSR_FP;
-#endif
-#ifdef CONFIG_ALTIVEC
+ if (IS_ENABLED(CONFIG_PPC_FPU))
+ msr_all_available |= MSR_FP;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
msr_all_available |= MSR_VEC;
-#endif
-#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX))
msr_all_available |= MSR_VSX;
-#endif
-#ifdef CONFIG_SPE
if (cpu_has_feature(CPU_FTR_SPE))
msr_all_available |= MSR_SPE;
-#endif
return 0;
}
@@ -494,23 +438,72 @@ void giveup_all(struct task_struct *tsk)
WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
-#ifdef CONFIG_PPC_FPU
if (usermsr & MSR_FP)
__giveup_fpu(tsk);
-#endif
-#ifdef CONFIG_ALTIVEC
if (usermsr & MSR_VEC)
__giveup_altivec(tsk);
-#endif
-#ifdef CONFIG_SPE
if (usermsr & MSR_SPE)
__giveup_spe(tsk);
-#endif
msr_check_and_clear(msr_all_available);
}
EXPORT_SYMBOL(giveup_all);
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_FPU
+static bool should_restore_fp(void)
+{
+ if (current->thread.load_fp) {
+ current->thread.load_fp++;
+ return true;
+ }
+ return false;
+}
+
+static void do_restore_fp(void)
+{
+ load_fp_state(&current->thread.fp_state);
+}
+#else
+static bool should_restore_fp(void) { return false; }
+static void do_restore_fp(void) { }
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+static bool should_restore_altivec(void)
+{
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && (current->thread.load_vec)) {
+ current->thread.load_vec++;
+ return true;
+ }
+ return false;
+}
+
+static void do_restore_altivec(void)
+{
+ load_vr_state(&current->thread.vr_state);
+ current->thread.used_vr = 1;
+}
+#else
+static bool should_restore_altivec(void) { return false; }
+static void do_restore_altivec(void) { }
+#endif /* CONFIG_ALTIVEC */
+
+static bool should_restore_vsx(void)
+{
+ if (cpu_has_feature(CPU_FTR_VSX))
+ return true;
+ return false;
+}
+#ifdef CONFIG_VSX
+static void do_restore_vsx(void)
+{
+ current->thread.used_vsr = 1;
+}
+#else
+static void do_restore_vsx(void) { }
+#endif /* CONFIG_VSX */
+
/*
* The exception exit path calls restore_math() with interrupts hard disabled
* but the soft irq state not "reconciled". ftrace code that calls
@@ -524,33 +517,50 @@ EXPORT_SYMBOL(giveup_all);
void notrace restore_math(struct pt_regs *regs)
{
unsigned long msr;
-
- if (!MSR_TM_ACTIVE(regs->msr) &&
- !current->thread.load_fp && !loadvec(current->thread))
- return;
+ unsigned long new_msr = 0;
msr = regs->msr;
- msr_check_and_set(msr_all_available);
/*
- * Only reload if the bit is not set in the user MSR, the bit BEING set
- * indicates that the registers are hot
+ * new_msr tracks the facilities that are to be restored. Only reload
+ * if the bit is not set in the user MSR (if it is set, the registers
+ * are live for the user thread).
*/
- if ((!(msr & MSR_FP)) && restore_fp(current))
- msr |= MSR_FP | current->thread.fpexc_mode;
+ if ((!(msr & MSR_FP)) && should_restore_fp())
+ new_msr |= MSR_FP;
- if ((!(msr & MSR_VEC)) && restore_altivec(current))
- msr |= MSR_VEC;
+ if ((!(msr & MSR_VEC)) && should_restore_altivec())
+ new_msr |= MSR_VEC;
- if ((msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC) &&
- restore_vsx(current)) {
- msr |= MSR_VSX;
+ if ((!(msr & MSR_VSX)) && should_restore_vsx()) {
+ if (((msr | new_msr) & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC))
+ new_msr |= MSR_VSX;
}
- msr_check_and_clear(msr_all_available);
+ if (new_msr) {
+ unsigned long fpexc_mode = 0;
- regs->msr = msr;
+ msr_check_and_set(new_msr);
+
+ if (new_msr & MSR_FP) {
+ do_restore_fp();
+
+ // This also covers VSX, because VSX implies FP
+ fpexc_mode = current->thread.fpexc_mode;
+ }
+
+ if (new_msr & MSR_VEC)
+ do_restore_altivec();
+
+ if (new_msr & MSR_VSX)
+ do_restore_vsx();
+
+ msr_check_and_clear(new_msr);
+
+ regs_set_return_msr(regs, regs->msr | new_msr | fpexc_mode);
+ }
}
+#endif /* CONFIG_PPC_BOOK3S_64 */
static void save_all(struct task_struct *tsk)
{
@@ -578,7 +588,6 @@ static void save_all(struct task_struct *tsk)
__giveup_spe(tsk);
msr_check_and_clear(msr_all_available);
- thread_pkey_regs_save(&tsk->thread);
}
void flush_all_to_thread(struct task_struct *tsk)
@@ -611,26 +620,70 @@ void do_send_trap(struct pt_regs *regs, unsigned long address,
(void __user *)address);
}
#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
-void do_break (struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
+
+static void do_break_handler(struct pt_regs *regs)
+{
+ struct arch_hw_breakpoint null_brk = {0};
+ struct arch_hw_breakpoint *info;
+ ppc_inst_t instr = ppc_inst(0);
+ int type = 0;
+ int size = 0;
+ unsigned long ea;
+ int i;
+
+ /*
+ * If underneath hw supports only one watchpoint, we know it
+ * caused exception. 8xx also falls into this category.
+ */
+ if (nr_wp_slots() == 1) {
+ __set_breakpoint(0, &null_brk);
+ current->thread.hw_brk[0] = null_brk;
+ current->thread.hw_brk[0].flags |= HW_BRK_FLAG_DISABLED;
+ return;
+ }
+
+ /* Otherwise find out which DAWR caused exception and disable it. */
+ wp_get_instr_detail(regs, &instr, &type, &size, &ea);
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ info = &current->thread.hw_brk[i];
+ if (!info->address)
+ continue;
+
+ if (wp_check_constraints(regs, instr, ea, type, size, info)) {
+ __set_breakpoint(i, &null_brk);
+ current->thread.hw_brk[i] = null_brk;
+ current->thread.hw_brk[i].flags |= HW_BRK_FLAG_DISABLED;
+ }
+ }
+}
+
+DEFINE_INTERRUPT_HANDLER(do_break)
{
current->thread.trap_nr = TRAP_HWBKPT;
- if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+ if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, regs->dsisr,
11, SIGSEGV) == NOTIFY_STOP)
return;
if (debugger_break_match(regs))
return;
- /* Clear the breakpoint */
- hw_breakpoint_disable();
+ /*
+ * We reach here only when watchpoint exception is generated by ptrace
+ * event (or hw is buggy!). Now if CONFIG_HAVE_HW_BREAKPOINT is set,
+ * watchpoint is already handled by hw_breakpoint_handler() so we don't
+ * have to do anything. But when CONFIG_HAVE_HW_BREAKPOINT is not set,
+ * we need to manually handle the watchpoint here.
+ */
+ if (!IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT))
+ do_break_handler(regs);
/* Deliver the signal to userspace */
- force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)regs->dar);
}
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk);
+static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk[HBP_NUM_MAX]);
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
/*
@@ -704,70 +757,52 @@ void switch_booke_debug_regs(struct debug_reg *new_debug)
EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
#ifndef CONFIG_HAVE_HW_BREAKPOINT
-static void set_breakpoint(struct arch_hw_breakpoint *brk)
+static void set_breakpoint(int i, struct arch_hw_breakpoint *brk)
{
preempt_disable();
- __set_breakpoint(brk);
+ __set_breakpoint(i, brk);
preempt_enable();
}
static void set_debug_reg_defaults(struct thread_struct *thread)
{
- thread->hw_brk.address = 0;
- thread->hw_brk.type = 0;
- thread->hw_brk.len = 0;
- thread->hw_brk.hw_len = 0;
- if (ppc_breakpoint_available())
- set_breakpoint(&thread->hw_brk);
-}
-#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
+ int i;
+ struct arch_hw_breakpoint null_brk = {0};
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
-{
- mtspr(SPRN_DAC1, dabr);
-#ifdef CONFIG_PPC_47x
- isync();
-#endif
- return 0;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ thread->hw_brk[i] = null_brk;
+ if (ppc_breakpoint_available())
+ set_breakpoint(i, &thread->hw_brk[i]);
+ }
}
-#elif defined(CONFIG_PPC_BOOK3S)
-static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
+
+static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
+ struct arch_hw_breakpoint *b)
{
- mtspr(SPRN_DABR, dabr);
- if (cpu_has_feature(CPU_FTR_DABRX))
- mtspr(SPRN_DABRX, dabrx);
- return 0;
+ if (a->address != b->address)
+ return false;
+ if (a->type != b->type)
+ return false;
+ if (a->len != b->len)
+ return false;
+ /* no need to check hw_len. it's calculated from address and len */
+ return true;
}
-#elif defined(CONFIG_PPC_8xx)
-static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
-{
- unsigned long addr = dabr & ~HW_BRK_TYPE_DABR;
- unsigned long lctrl1 = 0x90000000; /* compare type: equal on E & F */
- unsigned long lctrl2 = 0x8e000002; /* watchpoint 1 on cmp E | F */
- if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ)
- lctrl1 |= 0xa0000;
- else if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE)
- lctrl1 |= 0xf0000;
- else if ((dabr & HW_BRK_TYPE_RDWR) == 0)
- lctrl2 = 0;
+static void switch_hw_breakpoint(struct task_struct *new)
+{
+ int i;
- mtspr(SPRN_LCTRL2, 0);
- mtspr(SPRN_CMPE, addr);
- mtspr(SPRN_CMPF, addr + 4);
- mtspr(SPRN_LCTRL1, lctrl1);
- mtspr(SPRN_LCTRL2, lctrl2);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (likely(hw_brk_match(this_cpu_ptr(&current_brk[i]),
+ &new->thread.hw_brk[i])))
+ continue;
- return 0;
-}
-#else
-static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
-{
- return -EINVAL;
+ __set_breakpoint(i, &new->thread.hw_brk[i]);
+ }
}
-#endif
+#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
+#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
static inline int set_dabr(struct arch_hw_breakpoint *brk)
{
@@ -779,16 +814,61 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk)
if (ppc_md.set_dabr)
return ppc_md.set_dabr(dabr, dabrx);
- return __set_dabr(dabr, dabrx);
+ if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) {
+ mtspr(SPRN_DAC1, dabr);
+ if (IS_ENABLED(CONFIG_PPC_47x))
+ isync();
+ return 0;
+ } else if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
+ mtspr(SPRN_DABR, dabr);
+ if (cpu_has_feature(CPU_FTR_DABRX))
+ mtspr(SPRN_DABRX, dabrx);
+ return 0;
+ } else {
+ return -EINVAL;
+ }
}
-void __set_breakpoint(struct arch_hw_breakpoint *brk)
+static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
{
- memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
+ unsigned long lctrl1 = LCTRL1_CTE_GT | LCTRL1_CTF_LT | LCTRL1_CRWE_RW |
+ LCTRL1_CRWF_RW;
+ unsigned long lctrl2 = LCTRL2_LW0EN | LCTRL2_LW0LADC | LCTRL2_SLW0EN;
+ unsigned long start_addr = ALIGN_DOWN(brk->address, HW_BREAKPOINT_SIZE);
+ unsigned long end_addr = ALIGN(brk->address + brk->len, HW_BREAKPOINT_SIZE);
+ if (start_addr == 0)
+ lctrl2 |= LCTRL2_LW0LA_F;
+ else if (end_addr == 0)
+ lctrl2 |= LCTRL2_LW0LA_E;
+ else
+ lctrl2 |= LCTRL2_LW0LA_EandF;
+
+ mtspr(SPRN_LCTRL2, 0);
+
+ if ((brk->type & HW_BRK_TYPE_RDWR) == 0)
+ return 0;
+
+ if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ)
+ lctrl1 |= LCTRL1_CRWE_RO | LCTRL1_CRWF_RO;
+ if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE)
+ lctrl1 |= LCTRL1_CRWE_WO | LCTRL1_CRWF_WO;
+
+ mtspr(SPRN_CMPE, start_addr - 1);
+ mtspr(SPRN_CMPF, end_addr);
+ mtspr(SPRN_LCTRL1, lctrl1);
+ mtspr(SPRN_LCTRL2, lctrl2);
+
+ return 0;
+}
+
+static void set_hw_breakpoint(int nr, struct arch_hw_breakpoint *brk)
+{
if (dawr_enabled())
// Power8 or later
- set_dawr(brk);
+ set_dawr(nr, brk);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ set_breakpoint_8xx(brk);
else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
// Power7 or earlier
set_dabr(brk);
@@ -797,6 +877,12 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
WARN_ON_ONCE(1);
}
+void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
+{
+ memcpy(this_cpu_ptr(&current_brk[nr]), brk, sizeof(*brk));
+ set_hw_breakpoint(nr, brk);
+}
+
/* Check if we have DAWR or DABR hardware */
bool ppc_breakpoint_available(void)
{
@@ -809,17 +895,32 @@ bool ppc_breakpoint_available(void)
}
EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
-static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
- struct arch_hw_breakpoint *b)
+/* Disable the breakpoint in hardware without touching current_brk[] */
+void suspend_breakpoints(void)
{
- if (a->address != b->address)
- return false;
- if (a->type != b->type)
- return false;
- if (a->len != b->len)
- return false;
- /* no need to check hw_len. it's calculated from address and len */
- return true;
+ struct arch_hw_breakpoint brk = {0};
+ int i;
+
+ if (!ppc_breakpoint_available())
+ return;
+
+ for (i = 0; i < nr_wp_slots(); i++)
+ set_hw_breakpoint(i, &brk);
+}
+
+/*
+ * Re-enable breakpoints suspended by suspend_breakpoints() in hardware
+ * from current_brk[]
+ */
+void restore_breakpoints(void)
+{
+ int i;
+
+ if (!ppc_breakpoint_available())
+ return;
+
+ for (i = 0; i < nr_wp_slots(); i++)
+ set_hw_breakpoint(i, this_cpu_ptr(&current_brk[i]));
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -1043,12 +1144,13 @@ void restore_tm_state(struct pt_regs *regs)
#endif
restore_math(regs);
- regs->msr |= msr_diff;
+ regs_set_return_msr(regs, regs->msr | msr_diff);
}
-#else
+#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
#define tm_recheckpoint_new_task(new)
#define __switch_to_tm(prev, new)
+void tm_reclaim_current(uint8_t cause) {}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
static inline void save_sprs(struct thread_struct *t)
@@ -1057,6 +1159,10 @@ static inline void save_sprs(struct thread_struct *t)
if (cpu_has_feature(CPU_FTR_ALTIVEC))
t->vrsave = mfspr(SPRN_VRSAVE);
#endif
+#ifdef CONFIG_SPE
+ if (cpu_has_feature(CPU_FTR_SPE))
+ t->spefscr = mfspr(SPRN_SPEFSCR);
+#endif
#ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_DSCR))
t->dscr = mfspr(SPRN_DSCR);
@@ -1076,10 +1182,45 @@ static inline void save_sprs(struct thread_struct *t)
*/
t->tar = mfspr(SPRN_TAR);
}
+
+ if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE))
+ t->hashkeyr = mfspr(SPRN_HASHKEYR);
+#endif
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void kvmppc_save_user_regs(void)
+{
+ unsigned long usermsr;
+
+ if (!current->thread.regs)
+ return;
+
+ usermsr = current->thread.regs->msr;
+
+ /* Caller has enabled FP/VEC/VSX/TM in MSR */
+ if (usermsr & MSR_FP)
+ __giveup_fpu(current);
+ if (usermsr & MSR_VEC)
+ __giveup_altivec(current);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (usermsr & MSR_TM) {
+ current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+ current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+ current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+ current->thread.regs->msr &= ~MSR_TM;
+ }
#endif
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_user_regs);
- thread_pkey_regs_save(t);
+void kvmppc_save_current_sprs(void)
+{
+ save_sprs(&current->thread);
}
+EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
static inline void restore_sprs(struct thread_struct *old_thread,
struct thread_struct *new_thread)
@@ -1089,6 +1230,11 @@ static inline void restore_sprs(struct thread_struct *old_thread,
old_thread->vrsave != new_thread->vrsave)
mtspr(SPRN_VRSAVE, new_thread->vrsave);
#endif
+#ifdef CONFIG_SPE
+ if (cpu_has_feature(CPU_FTR_SPE) &&
+ old_thread->spefscr != new_thread->spefscr)
+ mtspr(SPRN_SPEFSCR, new_thread->spefscr);
+#endif
#ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_DSCR)) {
u64 dscr = get_paca()->dscr_default;
@@ -1117,9 +1263,12 @@ static inline void restore_sprs(struct thread_struct *old_thread,
if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
old_thread->tidr != new_thread->tidr)
mtspr(SPRN_TIDR, new_thread->tidr);
+
+ if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) &&
+ old_thread->hashkeyr != new_thread->hashkeyr)
+ mtspr(SPRN_HASHKEYR, new_thread->hashkeyr);
#endif
- thread_pkey_regs_restore(new_thread, old_thread);
}
struct task_struct *__switch_to(struct task_struct *prev,
@@ -1127,7 +1276,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
{
struct thread_struct *new_thread, *old_thread;
struct task_struct *last;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct ppc64_tlb_batch *batch;
#endif
@@ -1136,7 +1285,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
WARN_ON(!irqs_disabled());
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
batch = this_cpu_ptr(&ppc64_tlb_batch);
if (batch->active) {
current_thread_info()->local_flags |= _TLF_LAZY_MMU;
@@ -1144,6 +1293,19 @@ struct task_struct *__switch_to(struct task_struct *prev,
__flush_tlb_pending(batch);
batch->active = 0;
}
+
+ /*
+ * On POWER9 the copy-paste buffer can only paste into
+ * foreign real addresses, so unprivileged processes can not
+ * see the data or use it in any way unless they have
+ * foreign real mappings. If the new process has the foreign
+ * real address mappings, we must issue a cp_abort to clear
+ * any state and prevent snooping, corruption or a covert
+ * channel. ISA v3.1 supports paste into local memory.
+ */
+ if (new->mm && (cpu_has_feature(CPU_FTR_ARCH_31) ||
+ atomic_read(&new->mm->context.vas_windows)))
+ asm volatile(PPC_CP_ABORT);
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -1154,8 +1316,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
* schedule DABR
*/
#ifndef CONFIG_HAVE_HW_BREAKPOINT
- if (unlikely(!hw_brk_match(this_cpu_ptr(&current_brk), &new->thread.hw_brk)))
- __set_breakpoint(&new->thread.hw_brk);
+ switch_hw_breakpoint(new);
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif
@@ -1180,37 +1341,50 @@ struct task_struct *__switch_to(struct task_struct *prev,
}
/*
- * Call restore_sprs() before calling _switch(). If we move it after
- * _switch() then we miss out on calling it for new tasks. The reason
- * for this is we manually create a stack frame for new tasks that
- * directly returns through ret_from_fork() or
+ * Call restore_sprs() and set_return_regs_changed() before calling
+ * _switch(). If we move it after _switch() then we miss out on calling
+ * it for new tasks. The reason for this is we manually create a stack
+ * frame for new tasks that directly returns through ret_from_fork() or
* ret_from_kernel_thread(). See copy_thread() for details.
*/
restore_sprs(old_thread, new_thread);
+ set_return_regs_changed(); /* _switch changes stack (and regs) */
+
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ kuap_assert_locked();
+
last = _switch(old_thread, new_thread);
+ /*
+ * Nothing after _switch will be run for newly created tasks,
+ * because they switch directly to ret_from_fork/ret_from_kernel_thread
+ * etc. Code added here should have a comment explaining why that is
+ * okay.
+ */
+
#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ /*
+ * This applies to a process that was context switched while inside
+ * arch_enter_lazy_mmu_mode(), to re-activate the batch that was
+ * deactivated above, before _switch(). This will never be the case
+ * for new tasks.
+ */
if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
+#endif
- if (current->thread.regs) {
+ /*
+ * Math facilities are masked out of the child MSR in copy_thread.
+ * A new task does not need to restore_math because it will
+ * demand fault them.
+ */
+ if (current->thread.regs)
restore_math(current->thread.regs);
-
- /*
- * The copy-paste buffer can only store into foreign real
- * addresses, so unprivileged processes can not see the
- * data or use it in any way unless they have foreign real
- * mappings. If the new process has the foreign real address
- * mappings, we must issue a cp_abort to clear any state and
- * prevent snooping, corruption or a covert channel.
- */
- if (current->thread.used_vas)
- asm volatile(PPC_CP_ABORT);
- }
#endif /* CONFIG_PPC_BOOK3S_64 */
return last;
@@ -1221,29 +1395,27 @@ struct task_struct *__switch_to(struct task_struct *prev,
static void show_instructions(struct pt_regs *regs)
{
int i;
+ unsigned long nip = regs->nip;
unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
- printk("Instruction dump:");
+ printk("Code: ");
+
+ /*
+ * If we were executing with the MMU off for instructions, adjust pc
+ * rather than printing XXXXXXXX.
+ */
+ if (!IS_ENABLED(CONFIG_BOOKE) && !(regs->msr & MSR_IR)) {
+ pc = (unsigned long)phys_to_virt(pc);
+ nip = (unsigned long)phys_to_virt(regs->nip);
+ }
for (i = 0; i < NR_INSN_TO_PRINT; i++) {
int instr;
- if (!(i % 8))
- pr_cont("\n");
-
-#if !defined(CONFIG_BOOKE)
- /* If executing with the IMMU off, adjust pc rather
- * than print XXXXXXXX.
- */
- if (!(regs->msr & MSR_IR))
- pc = (unsigned long)phys_to_virt(pc);
-#endif
-
- if (!__kernel_text_address(pc) ||
- probe_kernel_address((const void *)pc, instr)) {
+ if (get_kernel_nofault(instr, (const void *)pc)) {
pr_cont("XXXXXXXX ");
} else {
- if (regs->nip == pc)
+ if (nip == pc)
pr_cont("<%08x> ", instr);
else
pr_cont("%08x ", instr);
@@ -1264,16 +1436,6 @@ void show_user_instructions(struct pt_regs *regs)
pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
- /*
- * Make sure the NIP points at userspace, not kernel text/data or
- * elsewhere.
- */
- if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) {
- pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
- current->comm, current->pid);
- return;
- }
-
seq_buf_init(&s, buf, sizeof(buf));
while (n) {
@@ -1284,7 +1446,8 @@ void show_user_instructions(struct pt_regs *regs)
for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
int instr;
- if (probe_kernel_address((const void *)pc, instr)) {
+ if (copy_from_user_nofault(&instr, (void __user *)pc,
+ sizeof(instr))) {
seq_buf_printf(&s, "XXXXXXXX ");
continue;
}
@@ -1381,19 +1544,15 @@ static void print_msr_bits(unsigned long val)
#ifdef CONFIG_PPC64
#define REG "%016lx"
#define REGS_PER_LINE 4
-#define LAST_VOLATILE 13
#else
#define REG "%08lx"
#define REGS_PER_LINE 8
-#define LAST_VOLATILE 12
#endif
-void show_regs(struct pt_regs * regs)
+static void __show_regs(struct pt_regs *regs)
{
int i, trap;
- show_regs_print_info(KERN_DEFAULT);
-
printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
regs->nip, regs->link, regs->ctr);
printk("REGS: %px TRAP: %04lx %s (%s)\n",
@@ -1402,14 +1561,17 @@ void show_regs(struct pt_regs * regs)
print_msr_bits(regs->msr);
pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
- if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
+ if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR))
pr_cont("CFAR: "REG" ", regs->orig_gpr3);
- if (trap == 0x200 || trap == 0x300 || trap == 0x600)
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
-#else
- pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
-#endif
+ if (trap == INTERRUPT_MACHINE_CHECK ||
+ trap == INTERRUPT_DATA_STORAGE ||
+ trap == INTERRUPT_ALIGNMENT) {
+ if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE))
+ pr_cont("DEAR: "REG" ESR: "REG" ", regs->dear, regs->esr);
+ else
+ pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+ }
+
#ifdef CONFIG_PPC64
pr_cont("IRQMASK: %lx ", regs->softe);
#endif
@@ -1422,19 +1584,23 @@ void show_regs(struct pt_regs * regs)
if ((i % REGS_PER_LINE) == 0)
pr_cont("\nGPR%02d: ", i);
pr_cont(REG " ", regs->gpr[i]);
- if (i == LAST_VOLATILE && !FULL_REGS(regs))
- break;
}
pr_cont("\n");
-#ifdef CONFIG_KALLSYMS
/*
* Lookup NIP late so we have the best change of getting the
* above info out without failing
*/
- printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
- printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
-#endif
- show_stack(current, (unsigned long *) regs->gpr[1]);
+ if (IS_ENABLED(CONFIG_KALLSYMS)) {
+ printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
+ printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
+ }
+}
+
+void show_regs(struct pt_regs *regs)
+{
+ show_regs_print_info(KERN_DEFAULT);
+ __show_regs(regs);
+ show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT);
if (!user_mode(regs))
show_instructions(regs);
}
@@ -1448,38 +1614,30 @@ void flush_thread(void)
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
}
-#ifdef CONFIG_PPC_BOOK3S_64
void arch_setup_new_exec(void)
{
- if (radix_enabled())
- return;
- hash__setup_new_exec();
-}
-#endif
-int set_thread_uses_vas(void)
-{
#ifdef CONFIG_PPC_BOOK3S_64
- if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return -EINVAL;
-
- current->thread.used_vas = 1;
-
+ if (!radix_enabled())
+ hash__setup_new_exec();
+#endif
/*
- * Even a process that has no foreign real address mapping can use
- * an unpaired COPY instruction (to no real effect). Issue CP_ABORT
- * to clear any pending COPY and prevent a covert channel.
- *
- * __switch_to() will issue CP_ABORT on future context switches.
+ * If we exec out of a kernel thread then thread.regs will not be
+ * set. Do it now.
*/
- asm volatile(PPC_CP_ABORT);
+ if (!current->thread.regs) {
+ struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE;
+ current->thread.regs = regs - 1;
+ }
-#endif /* CONFIG_PPC_BOOK3S_64 */
- return 0;
+#ifdef CONFIG_PPC_MEM_KEYS
+ current->thread.regs->amr = default_amr;
+ current->thread.regs->iamr = default_iamr;
+#endif
}
#ifdef CONFIG_PPC64
-/**
+/*
* Assign a TIDR (thread ID) for task @t and set it in the thread
* structure. For now, we only support setting TIDR for 'current' task.
*
@@ -1532,11 +1690,6 @@ EXPORT_SYMBOL_GPL(set_thread_tidr);
#endif /* CONFIG_PPC64 */
-void
-release_thread(struct task_struct *t)
-{
-}
-
/*
* this gets called so that we can store coprocessor state into memory and
* copy the current task into the new thread.
@@ -1565,7 +1718,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
unsigned long sp_vsid;
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
@@ -1590,59 +1743,85 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
/*
* Copy architecture-specific thread state
*/
-int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p,
- unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
- struct pt_regs *childregs, *kregs;
+ struct pt_regs *kregs; /* Switch frame regs */
extern void ret_from_fork(void);
- extern void ret_from_kernel_thread(void);
+ extern void ret_from_fork_scv(void);
+ extern void ret_from_kernel_user_thread(void);
+ extern void start_kernel_thread(void);
void (*f)(void);
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
- struct thread_info *ti = task_thread_info(p);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int i;
+#endif
klp_init_thread_info(p);
- /* Copy registers */
- sp -= sizeof(struct pt_regs);
- childregs = (struct pt_regs *) sp;
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
- memset(childregs, 0, sizeof(struct pt_regs));
- childregs->gpr[1] = sp + sizeof(struct pt_regs);
- /* function */
- if (usp)
- childregs->gpr[14] = ppc_function_entry((void *)usp);
-#ifdef CONFIG_PPC64
- clear_tsk_thread_flag(p, TIF_32BIT);
- childregs->softe = IRQS_ENABLED;
-#endif
- childregs->gpr[15] = kthread_arg;
+
+ /* Create initial minimum stack frame. */
+ sp -= STACK_FRAME_MIN_SIZE;
+ ((unsigned long *)sp)[0] = 0;
+
+ f = start_kernel_thread;
p->thread.regs = NULL; /* no user register state */
- ti->flags |= _TIF_RESTOREALL;
- f = ret_from_kernel_thread;
+ clear_tsk_compat_task(p);
} else {
/* user thread */
- struct pt_regs *regs = current_pt_regs();
- CHECK_FULL_REGS(regs);
- *childregs = *regs;
- if (usp)
- childregs->gpr[1] = usp;
- p->thread.regs = childregs;
- childregs->gpr[3] = 0; /* Result from fork() */
- if (clone_flags & CLONE_SETTLS) {
+ struct pt_regs *childregs;
+
+ /* Create initial user return stack frame. */
+ sp -= STACK_USER_INT_FRAME_SIZE;
+ *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER;
+
+ childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
+
+ if (unlikely(args->fn)) {
+ /*
+ * A user space thread, but it first runs a kernel
+ * thread, and then returns as though it had called
+ * execve rather than fork, so user regs will be
+ * filled in (e.g., by kernel_execve()).
+ */
+ ((unsigned long *)sp)[0] = 0;
+ memset(childregs, 0, sizeof(struct pt_regs));
#ifdef CONFIG_PPC64
- if (!is_32bit_task())
- childregs->gpr[13] = tls;
- else
+ childregs->softe = IRQS_ENABLED;
#endif
- childregs->gpr[2] = tls;
+ f = ret_from_kernel_user_thread;
+ } else {
+ struct pt_regs *regs = current_pt_regs();
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+
+ /* Copy registers */
+ *childregs = *regs;
+ if (usp)
+ childregs->gpr[1] = usp;
+ ((unsigned long *)sp)[0] = childregs->gpr[1];
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON_ONCE(childregs->softe != IRQS_ENABLED);
+#endif
+ if (clone_flags & CLONE_SETTLS) {
+ unsigned long tls = args->tls;
+
+ if (!is_32bit_task())
+ childregs->gpr[13] = tls;
+ else
+ childregs->gpr[2] = tls;
+ }
+
+ if (trap_is_scv(regs))
+ f = ret_from_fork_scv;
+ else
+ f = ret_from_fork;
}
- f = ret_from_fork;
+ childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
+ p->thread.regs = childregs;
}
- childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
- sp -= STACK_FRAME_OVERHEAD;
/*
* The way this works is that at some point in the future
@@ -1652,22 +1831,39 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
* do some house keeping and then return from the fork or clone
* system call, using the stack frame created above.
*/
- ((unsigned long *)sp)[0] = 0;
- sp -= sizeof(struct pt_regs);
- kregs = (struct pt_regs *) sp;
- sp -= STACK_FRAME_OVERHEAD;
+ ((unsigned long *)sp)[STACK_FRAME_LR_SAVE] = (unsigned long)f;
+ sp -= STACK_SWITCH_FRAME_SIZE;
+ ((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE;
+ kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS);
+ kregs->nip = ppc_function_entry(f);
+ if (unlikely(args->fn)) {
+ /*
+ * Put kthread fn, arg parameters in non-volatile GPRs in the
+ * switch frame so they are loaded by _switch before it returns
+ * to ret_from_kernel_thread.
+ */
+ kregs->gpr[14] = ppc_function_entry((void *)args->fn);
+ kregs->gpr[15] = (unsigned long)args->fn_arg;
+ }
p->thread.ksp = sp;
-#ifdef CONFIG_PPC32
- p->thread.ksp_limit = (unsigned long)end_of_stack(p);
-#endif
+
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- p->thread.ptrace_bps[0] = NULL;
+ for (i = 0; i < nr_wp_slots(); i++)
+ p->thread.ptrace_bps[i] = NULL;
#endif
+#ifdef CONFIG_PPC_FPU_REGS
p->thread.fp_save_area = NULL;
+#endif
#ifdef CONFIG_ALTIVEC
p->thread.vr_save_area = NULL;
#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+ p->thread.kuap = KUAP_NONE;
+#endif
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+ p->thread.pid = MMU_NO_CONTEXT;
+#endif
setup_ksp_vsid(p, sp);
@@ -1676,12 +1872,13 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
p->thread.dscr_inherit = current->thread.dscr_inherit;
p->thread.dscr = mfspr(SPRN_DSCR);
}
- if (cpu_has_feature(CPU_FTR_HAS_PPR))
- childregs->ppr = DEFAULT_PPR;
p->thread.tidr = 0;
#endif
- kregs->nip = ppc_function_entry(f);
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE))
+ p->thread.hashkeyr = current->thread.hashkeyr;
+#endif
return 0;
}
@@ -1695,20 +1892,9 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
#ifdef CONFIG_PPC64
unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
-#ifdef CONFIG_PPC_BOOK3S_64
- if (!radix_enabled())
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled())
preload_new_slb_context(start, sp);
#endif
-#endif
-
- /*
- * If we exec out of a kernel thread then thread.regs will not be
- * set. Do it now.
- */
- if (!current->thread.regs) {
- struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE;
- current->thread.regs = regs - 1;
- }
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
@@ -1720,20 +1906,13 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
tm_reclaim_current(0);
#endif
- memset(regs->gpr, 0, sizeof(regs->gpr));
+ memset(&regs->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0]));
regs->ctr = 0;
regs->link = 0;
regs->xer = 0;
regs->ccr = 0;
regs->gpr[1] = sp;
- /*
- * We have just cleared all the nonvolatile GPRs, so make
- * FULL_REGS(regs) return true. This is necessary to allow
- * ptrace to examine the thread immediately after exec.
- */
- regs->trap &= ~1UL;
-
#ifdef CONFIG_PPC32
regs->mq = 0;
regs->nip = start;
@@ -1778,21 +1957,24 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
}
regs->gpr[2] = toc;
}
- regs->nip = entry;
- regs->msr = MSR_USER64;
+ regs_set_return_ip(regs, entry);
+ regs_set_return_msr(regs, MSR_USER64);
} else {
- regs->nip = start;
regs->gpr[2] = 0;
- regs->msr = MSR_USER32;
+ regs_set_return_ip(regs, start);
+ regs_set_return_msr(regs, MSR_USER32);
}
+
#endif
#ifdef CONFIG_VSX
current->thread.used_vsr = 0;
#endif
current->thread.load_slb = 0;
current->thread.load_fp = 0;
+#ifdef CONFIG_PPC_FPU_REGS
memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
current->thread.fp_save_area = NULL;
+#endif
#ifdef CONFIG_ALTIVEC
memset(&current->thread.vr_state, 0, sizeof(current->thread.vr_state));
current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */
@@ -1813,8 +1995,12 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.tm_tfiar = 0;
current->thread.load_tm = 0;
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
- thread_pkey_regs_init(&current->thread);
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) {
+ current->thread.hashkeyr = get_random_long();
+ mtspr(SPRN_HASHKEYR, current->thread.hashkeyr);
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
}
EXPORT_SYMBOL(start_thread);
@@ -1830,7 +2016,6 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
* fpexc_mode. fpexc_mode is also used for setting FP exception
* mode (asyn, precise, disabled) for 'Classic' FP. */
if (val & PR_FP_EXC_SW_ENABLE) {
-#ifdef CONFIG_SPE
if (cpu_has_feature(CPU_FTR_SPE)) {
/*
* When the sticky exception bits are set
@@ -1844,16 +2029,15 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
* anyway to restore the prctl settings from
* the saved environment.
*/
+#ifdef CONFIG_SPE
tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
tsk->thread.fpexc_mode = val &
(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
+#endif
return 0;
} else {
return -EINVAL;
}
-#else
- return -EINVAL;
-#endif
}
/* on a CONFIG_SPE this does not hurt us. The bits that
@@ -1864,18 +2048,18 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
if (val > PR_FP_EXC_PRECISE)
return -EINVAL;
tsk->thread.fpexc_mode = __pack_fe01(val);
- if (regs != NULL && (regs->msr & MSR_FP) != 0)
- regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
- | tsk->thread.fpexc_mode;
+ if (regs != NULL && (regs->msr & MSR_FP) != 0) {
+ regs_set_return_msr(regs, (regs->msr & ~(MSR_FE0|MSR_FE1))
+ | tsk->thread.fpexc_mode);
+ }
return 0;
}
int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
{
- unsigned int val;
+ unsigned int val = 0;
- if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
-#ifdef CONFIG_SPE
+ if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
if (cpu_has_feature(CPU_FTR_SPE)) {
/*
* When the sticky exception bits are set
@@ -1889,15 +2073,15 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
* anyway to restore the prctl settings from
* the saved environment.
*/
+#ifdef CONFIG_SPE
tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
val = tsk->thread.fpexc_mode;
+#endif
} else
return -EINVAL;
-#else
- return -EINVAL;
-#endif
- else
+ } else {
val = __unpack_fe01(tsk->thread.fpexc_mode);
+ }
return put_user(val, (unsigned int __user *) adr);
}
@@ -1913,9 +2097,9 @@ int set_endian(struct task_struct *tsk, unsigned int val)
return -EINVAL;
if (val == PR_ENDIAN_BIG)
- regs->msr &= ~MSR_LE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_LE);
else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE)
- regs->msr |= MSR_LE;
+ regs_set_return_msr(regs, regs->msr | MSR_LE);
else
return -EINVAL;
@@ -1962,6 +2146,9 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
unsigned long stack_page;
unsigned long cpu = task_cpu(p);
+ if (!hardirq_ctx[cpu] || !softirq_ctx[cpu])
+ return 0;
+
stack_page = (unsigned long)hardirq_ctx[cpu];
if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
return 1;
@@ -1973,8 +2160,48 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
return 0;
}
-int validate_sp(unsigned long sp, struct task_struct *p,
- unsigned long nbytes)
+static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes)
+{
+#ifdef CONFIG_PPC64
+ unsigned long stack_page;
+ unsigned long cpu = task_cpu(p);
+
+ if (!paca_ptrs)
+ return 0;
+
+ if (!paca_ptrs[cpu]->emergency_sp)
+ return 0;
+
+# ifdef CONFIG_PPC_BOOK3S_64
+ if (!paca_ptrs[cpu]->nmi_emergency_sp || !paca_ptrs[cpu]->mc_emergency_sp)
+ return 0;
+#endif
+
+ stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+# ifdef CONFIG_PPC_BOOK3S_64
+ stack_page = (unsigned long)paca_ptrs[cpu]->nmi_emergency_sp - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+ stack_page = (unsigned long)paca_ptrs[cpu]->mc_emergency_sp - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+# endif
+#endif
+
+ return 0;
+}
+
+/*
+ * validate the stack frame of a particular minimum size, used for when we are
+ * looking at a certain object in the stack beyond the minimum.
+ */
+int validate_sp_size(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes)
{
unsigned long stack_page = (unsigned long)task_stack_page(p);
@@ -1984,30 +2211,32 @@ int validate_sp(unsigned long sp, struct task_struct *p,
if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
return 1;
- return valid_irq_stack(sp, p, nbytes);
+ if (valid_irq_stack(sp, p, nbytes))
+ return 1;
+
+ return valid_emergency_stack(sp, p, nbytes);
}
-EXPORT_SYMBOL(validate_sp);
+int validate_sp(unsigned long sp, struct task_struct *p)
+{
+ return validate_sp_size(sp, p, STACK_FRAME_MIN_SIZE);
+}
-static unsigned long __get_wchan(struct task_struct *p)
+static unsigned long ___get_wchan(struct task_struct *p)
{
unsigned long ip, sp;
int count = 0;
- if (!p || p == current || p->state == TASK_RUNNING)
- return 0;
-
sp = p->thread.ksp;
- if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, p))
return 0;
do {
- sp = *(unsigned long *)sp;
- if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
- p->state == TASK_RUNNING)
+ sp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+ if (!validate_sp(sp, p) || task_is_running(p))
return 0;
if (count > 0) {
- ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
+ ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]);
if (!in_sched_functions(ip))
return ip;
}
@@ -2015,31 +2244,47 @@ static unsigned long __get_wchan(struct task_struct *p)
return 0;
}
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
unsigned long ret;
if (!try_get_task_stack(p))
return 0;
- ret = __get_wchan(p);
+ ret = ___get_wchan(p);
put_task_stack(p);
return ret;
}
+static bool empty_user_regs(struct pt_regs *regs, struct task_struct *tsk)
+{
+ unsigned long stack_page;
+
+ // A non-empty pt_regs should never have a zero MSR or TRAP value.
+ if (regs->msr || regs->trap)
+ return false;
+
+ // Check it sits at the very base of the stack
+ stack_page = (unsigned long)task_stack_page(tsk);
+ if ((unsigned long)(regs + 1) != stack_page + THREAD_SIZE)
+ return false;
+
+ return true;
+}
+
static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
-void show_stack(struct task_struct *tsk, unsigned long *stack)
+void __no_sanitize_address show_stack(struct task_struct *tsk,
+ unsigned long *stack,
+ const char *loglvl)
{
unsigned long sp, ip, lr, newsp;
int count = 0;
int firstframe = 1;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
unsigned long ret_addr;
int ftrace_idx = 0;
-#endif
if (tsk == NULL)
tsk = current;
@@ -2050,28 +2295,27 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
sp = (unsigned long) stack;
if (sp == 0) {
if (tsk == current)
- sp = current_stack_pointer();
+ sp = current_stack_frame();
else
sp = tsk->thread.ksp;
}
lr = 0;
- printk("Call Trace:\n");
+ printk("%sCall Trace:\n", loglvl);
do {
- if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, tsk))
break;
stack = (unsigned long *) sp;
newsp = stack[0];
ip = stack[STACK_FRAME_LR_SAVE];
if (!firstframe || ip != lr) {
- printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ printk("%s["REG"] ["REG"] %pS",
+ loglvl, sp, ip, (void *)ip);
ret_addr = ftrace_graph_ret_addr(current,
&ftrace_idx, ip, stack);
if (ret_addr != ip)
pr_cont(" (%pS)", (void *)ret_addr);
-#endif
if (firstframe)
pr_cont(" (unreliable)");
pr_cont("\n");
@@ -2080,15 +2324,28 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
/*
* See if this is an exception frame.
- * We look for the "regshere" marker in the current frame.
+ * We look for the "regs" marker in the current frame.
+ *
+ * STACK_SWITCH_FRAME_SIZE being the smallest frame that
+ * could hold a pt_regs, if that does not fit then it can't
+ * have regs.
*/
- if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE)
- && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ if (validate_sp_size(sp, tsk, STACK_SWITCH_FRAME_SIZE)
+ && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
struct pt_regs *regs = (struct pt_regs *)
- (sp + STACK_FRAME_OVERHEAD);
+ (sp + STACK_INT_FRAME_REGS);
+
lr = regs->link;
- printk("--- interrupt: %lx at %pS\n LR = %pS\n",
- regs->trap, (void *)regs->nip, (void *)lr);
+ printk("%s--- interrupt: %lx at %pS\n",
+ loglvl, regs->trap, (void *)regs->nip);
+
+ // Detect the case of an empty pt_regs at the very base
+ // of the stack and suppress showing it in full.
+ if (!empty_user_regs(regs, tsk)) {
+ __show_regs(regs);
+ printk("%s--- interrupt: %lx\n", loglvl, regs->trap);
+ }
+
firstframe = 1;
}
@@ -2148,46 +2405,6 @@ void notrace __ppc64_runlatch_off(void)
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
- sp -= get_random_int() & ~PAGE_MASK;
+ sp -= get_random_u32_below(PAGE_SIZE);
return sp & ~0xf;
}
-
-static inline unsigned long brk_rnd(void)
-{
- unsigned long rnd = 0;
-
- /* 8MB for 32bit, 1GB for 64bit */
- if (is_32bit_task())
- rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
- else
- rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
-
- return rnd << PAGE_SHIFT;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
- unsigned long base = mm->brk;
- unsigned long ret;
-
-#ifdef CONFIG_PPC_BOOK3S_64
- /*
- * If we are using 1TB segments and we are allowed to randomise
- * the heap, we can put it above 1TB so it is backed by a 1TB
- * segment. Otherwise the heap will be in the bottom 1TB
- * which always uses 256MB segments and this may result in a
- * performance penalty. We don't need to worry about radix. For
- * radix, mmu_highuser_ssize remains unchanged from 256MB.
- */
- if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
- base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
-#endif
-
- ret = PAGE_ALIGN(base + brk_rnd());
-
- if (ret < mm->brk)
- return mm->brk;
-
- return ret;
-}
-
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 6620f37abe73..0b5878c3125b 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -11,7 +11,6 @@
#undef DEBUG
-#include <stdarg.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
@@ -30,8 +29,9 @@
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
#include <linux/cpu.h>
+#include <linux/pgtable.h>
+#include <linux/seq_buf.h>
-#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/page.h>
#include <asm/processor.h>
@@ -41,12 +41,11 @@
#include <asm/smp.h>
#include <asm/mmu.h>
#include <asm/paca.h>
-#include <asm/pgtable.h>
#include <asm/powernv.h>
#include <asm/iommu.h>
#include <asm/btext.h>
#include <asm/sections.h>
-#include <asm/machdep.h>
+#include <asm/setup.h>
#include <asm/pci-bridge.h>
#include <asm/kexec.h>
#include <asm/opal.h>
@@ -56,6 +55,8 @@
#include <asm/dt_cpu_ftrs.h>
#include <asm/drmem.h>
#include <asm/ultravisor.h>
+#include <asm/prom.h>
+#include <asm/plpks.h>
#include <mm/mmu_decl.h>
@@ -65,11 +66,14 @@
#define DBG(fmt...)
#endif
+int *chip_id_lookup_table;
+
#ifdef CONFIG_PPC64
int __initdata iommu_is_off;
int __initdata iommu_force_on;
unsigned long tce_alloc_start, tce_alloc_end;
u64 ppc64_rma_size;
+unsigned int boot_cpu_node_count __ro_after_init;
#endif
static phys_addr_t first_memblock_size;
static int __initdata boot_cpu_count;
@@ -96,8 +100,8 @@ static inline int overlaps_initrd(unsigned long start, unsigned long size)
if (!initrd_start)
return 0;
- return (start + size) > _ALIGN_DOWN(initrd_start, PAGE_SIZE) &&
- start <= _ALIGN_UP(initrd_end, PAGE_SIZE);
+ return (start + size) > ALIGN_DOWN(initrd_start, PAGE_SIZE) &&
+ start <= ALIGN(initrd_end, PAGE_SIZE);
#else
return 0;
#endif
@@ -136,7 +140,7 @@ static void __init move_device_tree(void)
}
/*
- * ibm,pa-features is a per-cpu property that contains a string of
+ * ibm,pa/pi-features is a per-cpu property that contains a string of
* attribute descriptors, each of which has a 2 byte header plus up
* to 254 bytes worth of processor attribute bits. First header
* byte specifies the number of bytes following the header.
@@ -148,24 +152,25 @@ static void __init move_device_tree(void)
* is supported/not supported. Note that the bit numbers are
* big-endian to match the definition in PAPR.
*/
-static struct ibm_pa_feature {
+struct ibm_feature {
unsigned long cpu_features; /* CPU_FTR_xxx bit */
unsigned long mmu_features; /* MMU_FTR_xxx bit */
unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */
unsigned int cpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */
- unsigned char pabyte; /* byte number in ibm,pa-features */
+ unsigned char pabyte; /* byte number in ibm,pa/pi-features */
unsigned char pabit; /* bit number (big-endian) */
unsigned char invert; /* if 1, pa bit set => clear feature */
-} ibm_pa_features[] __initdata = {
+};
+
+static struct ibm_feature ibm_pa_features[] __initdata = {
{ .pabyte = 0, .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU },
{ .pabyte = 0, .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU },
{ .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL },
{ .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE },
{ .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE },
#ifdef CONFIG_PPC_RADIX_MMU
- { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX },
+ { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE },
#endif
- { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN },
{ .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE,
.cpu_user_ftrs = PPC_FEATURE_TRUE_LE },
/*
@@ -175,11 +180,24 @@ static struct ibm_pa_feature {
*/
{ .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP,
.cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP },
+
+ { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 },
+ { .pabyte = 68, .pabit = 5, .cpu_features = CPU_FTR_DEXCR_NPHIE },
+};
+
+/*
+ * ibm,pi-features property provides the support of processor specific
+ * options not described in ibm,pa-features. Right now use byte 0, bit 3
+ * which indicates the occurrence of DSI interrupt when the paste operation
+ * on the suspended NX window.
+ */
+static struct ibm_feature ibm_pi_features[] __initdata = {
+ { .pabyte = 0, .pabit = 3, .mmu_features = MMU_FTR_NX_DSI },
};
static void __init scan_features(unsigned long node, const unsigned char *ftrs,
unsigned long tablelen,
- struct ibm_pa_feature *fp,
+ struct ibm_feature *fp,
unsigned long ft_size)
{
unsigned long i, len, bit;
@@ -216,20 +234,21 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs,
}
}
-static void __init check_cpu_pa_features(unsigned long node)
+static void __init check_cpu_features(unsigned long node, char *name,
+ struct ibm_feature *fp,
+ unsigned long size)
{
const unsigned char *pa_ftrs;
int tablelen;
- pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen);
+ pa_ftrs = of_get_flat_dt_prop(node, name, &tablelen);
if (pa_ftrs == NULL)
return;
- scan_features(node, pa_ftrs, tablelen,
- ibm_pa_features, ARRAY_SIZE(ibm_pa_features));
+ scan_features(node, pa_ftrs, tablelen, fp, size);
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
static void __init init_mmu_slb_size(unsigned long node)
{
const __be32 *slb_size_ptr;
@@ -266,7 +285,7 @@ static struct feature_property {
};
#if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU)
-static inline void identical_pvr_fixup(unsigned long node)
+static __init void identical_pvr_fixup(unsigned long node)
{
unsigned int pvr;
const char *model = of_get_flat_dt_prop(node, "model", NULL);
@@ -319,6 +338,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
+ if (IS_ENABLED(CONFIG_PPC64))
+ boot_cpu_node_count++;
+
/* Get physical cpuid */
intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len);
if (!intserv)
@@ -350,6 +372,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
be32_to_cpu(intserv[found_thread]));
boot_cpuid = found;
+ if (IS_ENABLED(CONFIG_PPC64))
+ boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
+
/*
* PAPR defines "logical" PVR values for cpus that
* meet various levels of the architecture:
@@ -371,11 +396,16 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
*/
if (!dt_cpu_ftrs_in_use()) {
prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
- if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
+ if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) {
identify_cpu(0, be32_to_cpup(prop));
+ seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(prop));
+ }
check_cpu_feature_properties(node);
- check_cpu_pa_features(node);
+ check_cpu_features(node, "ibm,pa-features", ibm_pa_features,
+ ARRAY_SIZE(ibm_pa_features));
+ check_cpu_features(node, "ibm,pi-features", ibm_pi_features,
+ ARRAY_SIZE(ibm_pi_features));
}
identical_pvr_fixup(node);
@@ -386,9 +416,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
else if (!dt_cpu_ftrs_in_use())
cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
- allocate_paca(boot_cpuid);
#endif
- set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
return 0;
}
@@ -400,7 +428,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */
/* Use common scan routine to determine if this is the chosen node */
- if (early_init_dt_scan_chosen(node, uname, depth, data) == 0)
+ if (early_init_dt_scan_chosen(data) < 0)
return 0;
#ifdef CONFIG_PPC64
@@ -445,7 +473,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
*/
#ifdef CONFIG_SPARSEMEM
-static bool validate_mem_limit(u64 base, u64 *size)
+static bool __init validate_mem_limit(u64 base, u64 *size)
{
u64 max_mem = 1UL << (MAX_PHYSMEM_BITS);
@@ -456,7 +484,7 @@ static bool validate_mem_limit(u64 base, u64 *size)
return true;
}
#else
-static bool validate_mem_limit(u64 base, u64 *size)
+static bool __init validate_mem_limit(u64 base, u64 *size)
{
return true;
}
@@ -468,8 +496,9 @@ static bool validate_mem_limit(u64 base, u64 *size)
* This contains a list of memory blocks along with NUMA affinity
* information.
*/
-static void __init early_init_drmem_lmb(struct drmem_lmb *lmb,
- const __be32 **usm)
+static int __init early_init_drmem_lmb(struct drmem_lmb *lmb,
+ const __be32 **usm,
+ void *data)
{
u64 base, size;
int is_kexec_kdump = 0, rngs;
@@ -484,7 +513,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb,
*/
if ((lmb->flags & DRCONF_MEM_RESERVED) ||
!(lmb->flags & DRCONF_MEM_ASSIGNED))
- return;
+ return 0;
if (*usm)
is_kexec_kdump = 1;
@@ -499,7 +528,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb,
*/
rngs = dt_mem_next_cell(dt_root_size_cells, usm);
if (!rngs) /* there are no (base, size) duple */
- return;
+ return 0;
}
do {
@@ -515,26 +544,32 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb,
size = 0x80000000ul - base;
}
+ if (!validate_mem_limit(base, &size))
+ continue;
+
DBG("Adding: %llx -> %llx\n", base, size);
- if (validate_mem_limit(base, &size))
- memblock_add(base, size);
+ memblock_add(base, size);
+
+ if (lmb->flags & DRCONF_MEM_HOTREMOVABLE)
+ memblock_mark_hotplug(base, size);
} while (--rngs);
+
+ return 0;
}
#endif /* CONFIG_PPC_PSERIES */
-static int __init early_init_dt_scan_memory_ppc(unsigned long node,
- const char *uname,
- int depth, void *data)
+static int __init early_init_dt_scan_memory_ppc(void)
{
#ifdef CONFIG_PPC_PSERIES
- if (depth == 1 &&
- strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
- walk_drmem_lmbs_early(node, early_init_drmem_lmb);
- return 0;
- }
+ const void *fdt = initial_boot_params;
+ int node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory");
+
+ if (node > 0)
+ walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb);
+
#endif
-
- return early_init_dt_scan_memory(node, uname, depth, data);
+
+ return early_init_dt_scan_memory();
}
/*
@@ -623,13 +658,15 @@ static void __init early_reserve_mem(void)
#ifdef CONFIG_BLK_DEV_INITRD
/* Then reserve the initrd, if any */
if (initrd_start && (initrd_end > initrd_start)) {
- memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE),
- _ALIGN_UP(initrd_end, PAGE_SIZE) -
- _ALIGN_DOWN(initrd_start, PAGE_SIZE));
+ memblock_reserve(ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE),
+ ALIGN(initrd_end, PAGE_SIZE) -
+ ALIGN_DOWN(initrd_start, PAGE_SIZE));
}
#endif /* CONFIG_BLK_DEV_INITRD */
-#ifdef CONFIG_PPC32
+ if (!IS_ENABLED(CONFIG_PPC32))
+ return;
+
/*
* Handle the case where we might be booting from an old kexec
* image that setup the mem_rsvmap as pairs of 32-bit values
@@ -650,7 +687,6 @@ static void __init early_reserve_mem(void)
}
return;
}
-#endif
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -685,6 +721,40 @@ static void __init tm_init(void)
static void tm_init(void) { }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+static int __init
+early_init_dt_scan_model(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ const char *prop;
+
+ if (depth != 0)
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "model", NULL);
+ if (prop)
+ seq_buf_printf(&ppc_hw_desc, "%s ", prop);
+
+ /* break now */
+ return 1;
+}
+
+#ifdef CONFIG_PPC64
+static void __init save_fscr_to_task(void)
+{
+ /*
+ * Ensure the init_task (pid 0, aka swapper) uses the value of FSCR we
+ * have configured via the device tree features or via __init_FSCR().
+ * That value will then be propagated to pid 1 (init) and all future
+ * processes.
+ */
+ if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+ init_task.thread.fscr = mfspr(SPRN_FSCR);
+}
+#else
+static inline void save_fscr_to_task(void) {}
+#endif
+
+
void __init early_init_devtree(void *params)
{
phys_addr_t limit;
@@ -695,6 +765,8 @@ void __init early_init_devtree(void *params)
if (!early_init_dt_verify(params))
panic("BUG: Failed verifying flat device tree, bad version?");
+ of_scan_flat_dt(early_init_dt_scan_model, NULL);
+
#ifdef CONFIG_PPC_RTAS
/* Some machines might need RTAS info for debugging, grab it now. */
of_scan_flat_dt(early_init_dt_scan_rtas, NULL);
@@ -720,9 +792,16 @@ void __init early_init_devtree(void *params)
of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line);
/* Scan memory nodes and rebuild MEMBLOCKs */
- of_scan_flat_dt(early_init_dt_scan_root, NULL);
- of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+ early_init_dt_scan_root();
+ early_init_dt_scan_memory_ppc();
+ /*
+ * As generic code authors expect to be able to use static keys
+ * in early_param() handlers, we initialize the static keys just
+ * before parsing early params (it's fine to call jump_label_init()
+ * more than once).
+ */
+ jump_label_init();
parse_early_param();
/* make sure we've parsed cmdline for mem= before this */
@@ -730,7 +809,7 @@ void __init early_init_devtree(void *params)
first_memblock_size = min_t(u64, first_memblock_size, memory_limit);
setup_initial_memory_limit(memstart_addr, first_memblock_size);
/* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */
- memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START);
+ memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START);
/* If relocatable, reserve first 32k for interrupt vectors etc. */
if (PHYSICAL_START > MEMORY_START)
memblock_reserve(MEMORY_START, 0x8000);
@@ -749,6 +828,11 @@ void __init early_init_devtree(void *params)
limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
memblock_enforce_memory_limit(limit);
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES)
+ if (!early_radix_enabled())
+ memblock_cap_memory_range(0, 1UL << (H_MAX_PHYSMEM_BITS));
+#endif
+
memblock_allow_resize();
memblock_dump_all();
@@ -758,12 +842,13 @@ void __init early_init_devtree(void *params)
* FIXME .. and the initrd too? */
move_device_tree();
- allocate_paca_ptrs();
-
DBG("Scanning CPUs ...\n");
dt_cpu_ftrs_scan();
+ // We can now add the CPU name & PVR to the hardware description
+ seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR));
+
/* Retrieve CPU related informations from the flat tree
* (altivec support, boot CPU ID, ...)
*/
@@ -773,6 +858,8 @@ void __init early_init_devtree(void *params)
BUG();
}
+ save_fscr_to_task();
+
#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
/* We'll later wait for secondaries to check in; there are
* NCPUS-1 non-boot CPUs :-)
@@ -791,12 +878,20 @@ void __init early_init_devtree(void *params)
/* Now try to figure out if we are running on LPAR and so on */
pseries_probe_fw_features();
+ /*
+ * Initialize pkey features and default AMR/IAMR values
+ */
+ pkey_early_init_devtree();
+
#ifdef CONFIG_PPC_PS3
/* Identify PS3 firmware */
if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3"))
powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE;
#endif
+ /* If kexec left a PLPKS password in the DT, get it and clear it */
+ plpks_early_init_devtree();
+
tm_init();
DBG(" <- early_init_devtree()\n");
@@ -817,8 +912,8 @@ void __init early_get_first_memblock_info(void *params, phys_addr_t *size)
* mess the memblock.
*/
add_mem_to_memblock = 0;
- of_scan_flat_dt(early_init_dt_scan_root, NULL);
- of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+ early_init_dt_scan_root();
+ early_init_dt_scan_memory_ppc();
add_mem_to_memblock = 1;
if (size)
@@ -876,13 +971,22 @@ EXPORT_SYMBOL(of_get_ibm_chip_id);
int cpu_to_chip_id(int cpu)
{
struct device_node *np;
+ int ret = -1, idx;
+
+ idx = cpu / threads_per_core;
+ if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1)
+ return chip_id_lookup_table[idx];
np = of_get_cpu_node(cpu, NULL);
- if (!np)
- return -1;
+ if (np) {
+ ret = of_get_ibm_chip_id(np);
+ of_node_put(np);
+
+ if (chip_id_lookup_table)
+ chip_id_lookup_table[idx] = ret;
+ }
- of_node_put(np);
- return of_get_ibm_chip_id(np);
+ return ret;
}
EXPORT_SYMBOL(cpu_to_chip_id);
diff --git a/arch/powerpc/kernel/prom_entry_64.S b/arch/powerpc/kernel/prom_entry_64.S
new file mode 100644
index 000000000000..f1b8793d28c6
--- /dev/null
+++ b/arch/powerpc/kernel/prom_entry_64.S
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ * Adapted for Power Macintosh by Paul Mackerras.
+ * Low-level exception handlers and MMU support
+ * rewritten by Paul Mackerras.
+ * Copyright (C) 1996 Paul Mackerras.
+ * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ * This file contains the 64-bit prom entry code.
+ */
+#include <asm/asm-offsets.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
+#include <asm/ppc_asm.h>
+
+.section ".text","ax",@progbits
+
+_GLOBAL(enter_prom)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
+
+ /* Because PROM is running in 32b mode, it clobbers the high order half
+ * of all registers that it saves. We therefore save those registers
+ * PROM might touch to the stack. (r0, r3-r13 are caller saved)
+ */
+ SAVE_GPR(2, r1)
+ SAVE_GPR(13, r1)
+ SAVE_NVGPRS(r1)
+ mfcr r10
+ mfmsr r11
+ std r10,_CCR(r1)
+ std r11,_MSR(r1)
+
+ /* Put PROM address in SRR0 */
+ mtsrr0 r4
+
+ /* Setup our trampoline return addr in LR */
+ bcl 20,31,$+4
+0: mflr r4
+ addi r4,r4,(1f - 0b)
+ mtlr r4
+
+ /* Prepare a 32-bit mode big endian MSR
+ */
+#ifdef CONFIG_PPC_BOOK3E_64
+ rlwinm r11,r11,0,1,31
+ mtsrr1 r11
+ rfi
+#else /* CONFIG_PPC_BOOK3E_64 */
+ LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE)
+ andc r11,r11,r12
+ mtsrr1 r11
+ RFI_TO_KERNEL
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
+1: /* Return from OF */
+ FIXUP_ENDIAN
+
+ /* Just make sure that r1 top 32 bits didn't get
+ * corrupt by OF
+ */
+ rldicl r1,r1,0,32
+
+ /* Restore the MSR (back to 64 bits) */
+ ld r0,_MSR(r1)
+ MTMSRD(r0)
+ isync
+
+ /* Restore other registers */
+ REST_GPR(2, r1)
+ REST_GPR(13, r1)
+ REST_NVGPRS(r1)
+ ld r4,_CCR(r1)
+ mtcr r4
+
+ addi r1,r1,SWITCH_FRAME_SIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 577345382b23..e67effdba85c 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -14,7 +14,7 @@
/* we cannot use FORTIFY as it brings in new symbols */
#define __NO_FORTIFY
-#include <stdarg.h>
+#include <linux/stdarg.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
@@ -26,26 +26,30 @@
#include <linux/delay.h>
#include <linux/initrd.h>
#include <linux/bitops.h>
+#include <linux/pgtable.h>
+#include <linux/printk.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/page.h>
#include <asm/processor.h>
+#include <asm/interrupt.h>
#include <asm/irq.h>
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/iommu.h>
#include <asm/btext.h>
#include <asm/sections.h>
-#include <asm/machdep.h>
+#include <asm/setup.h>
#include <asm/asm-prototypes.h>
#include <asm/ultravisor-api.h>
#include <linux/linux_logo.h>
/* All of prom_init bss lives here */
-#define __prombss __section(.bss.prominit)
+#define __prombss __section(".bss.prominit")
/*
* Eventually bump that one up
@@ -92,12 +96,6 @@ static int of_workarounds __prombss;
#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */
#define OF_WA_LONGTRAIL 2 /* work around longtrail bugs */
-#define PROM_BUG() do { \
- prom_printf("kernel BUG at %s line 0x%x!\n", \
- __FILE__, __LINE__); \
- __builtin_trap(); \
-} while (0)
-
#ifdef DEBUG_PROM
#define prom_debug(x...) prom_printf(x)
#else
@@ -169,6 +167,7 @@ static unsigned long __prombss prom_tce_alloc_end;
#ifdef CONFIG_PPC_PSERIES
static bool __prombss prom_radix_disable;
+static bool __prombss prom_radix_gtse_disable;
static bool __prombss prom_xive_disable;
#endif
@@ -241,13 +240,31 @@ static int __init prom_strcmp(const char *cs, const char *ct)
return 0;
}
-static char __init *prom_strcpy(char *dest, const char *src)
+static ssize_t __init prom_strscpy_pad(char *dest, const char *src, size_t n)
{
- char *tmp = dest;
+ ssize_t rc;
+ size_t i;
- while ((*dest++ = *src++) != '\0')
- /* nothing */;
- return tmp;
+ if (n == 0 || n > INT_MAX)
+ return -E2BIG;
+
+ // Copy up to n bytes
+ for (i = 0; i < n && src[i] != '\0'; i++)
+ dest[i] = src[i];
+
+ rc = i;
+
+ // If we copied all n then we have run out of space for the nul
+ if (rc == n) {
+ // Rewind by one character to ensure nul termination
+ i--;
+ rc = -E2BIG;
+ }
+
+ for (; i < n; i++)
+ dest[i] = '\0';
+
+ return rc;
}
static int __init prom_strncmp(const char *cs, const char *ct, size_t count)
@@ -354,6 +371,7 @@ static int __init prom_strtobool(const char *s, bool *res)
default:
break;
}
+ break;
default:
break;
}
@@ -650,7 +668,7 @@ static inline int __init prom_getproplen(phandle node, const char *pname)
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
-static void add_string(char **str, const char *q)
+static void __init add_string(char **str, const char *q)
{
char *p = *str;
@@ -660,7 +678,7 @@ static void add_string(char **str, const char *q)
*str = p;
}
-static char *tohex(unsigned int x)
+static char *__init tohex(unsigned int x)
{
static const char digits[] __initconst = "0123456789abcdef";
static char result[9] __prombss;
@@ -699,29 +717,28 @@ static int __init prom_setprop(phandle node, const char *nodename,
}
/* We can't use the standard versions because of relocation headaches. */
-#define isxdigit(c) (('0' <= (c) && (c) <= '9') \
- || ('a' <= (c) && (c) <= 'f') \
- || ('A' <= (c) && (c) <= 'F'))
+#define prom_isxdigit(c) \
+ (('0' <= (c) && (c) <= '9') || ('a' <= (c) && (c) <= 'f') || ('A' <= (c) && (c) <= 'F'))
-#define isdigit(c) ('0' <= (c) && (c) <= '9')
-#define islower(c) ('a' <= (c) && (c) <= 'z')
-#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c))
+#define prom_isdigit(c) ('0' <= (c) && (c) <= '9')
+#define prom_islower(c) ('a' <= (c) && (c) <= 'z')
+#define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c))
-static unsigned long prom_strtoul(const char *cp, const char **endp)
+static unsigned long __init prom_strtoul(const char *cp, const char **endp)
{
unsigned long result = 0, base = 10, value;
if (*cp == '0') {
base = 8;
cp++;
- if (toupper(*cp) == 'X') {
+ if (prom_toupper(*cp) == 'X') {
cp++;
base = 16;
}
}
- while (isxdigit(*cp) &&
- (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) {
+ while (prom_isxdigit(*cp) &&
+ (value = prom_isdigit(*cp) ? *cp - '0' : prom_toupper(*cp) - 'A' + 10) < base) {
result = result * base + value;
cp++;
}
@@ -732,7 +749,7 @@ static unsigned long prom_strtoul(const char *cp, const char **endp)
return result;
}
-static unsigned long prom_memparse(const char *ptr, const char **retptr)
+static unsigned long __init prom_memparse(const char *ptr, const char **retptr)
{
unsigned long ret = prom_strtoul(ptr, retptr);
int shift = 0;
@@ -823,6 +840,12 @@ static void __init early_cmdline_parse(void)
if (prom_radix_disable)
prom_debug("Radix disabled from cmdline\n");
+ opt = prom_strstr(prom_cmd_line, "radix_hcall_invalidate=on");
+ if (opt) {
+ prom_radix_gtse_disable = true;
+ prom_debug("Radix GTSE disabled from cmdline\n");
+ }
+
opt = prom_strstr(prom_cmd_line, "xive=off");
if (opt) {
prom_xive_disable = true;
@@ -919,8 +942,12 @@ struct option_vector6 {
u8 os_name;
} __packed;
+struct option_vector7 {
+ u8 os_id[256];
+} __packed;
+
struct ibm_arch_vec {
- struct { u32 mask, val; } pvrs[12];
+ struct { __be32 mask, val; } pvrs[14];
u8 num_vectors;
@@ -941,6 +968,9 @@ struct ibm_arch_vec {
u8 vec6_len;
struct option_vector6 vec6;
+
+ u8 vec7_len;
+ struct option_vector7 vec7;
} __packed;
static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
@@ -974,6 +1004,14 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
.val = cpu_to_be32(0x004e0000),
},
{
+ .mask = cpu_to_be32(0xffff0000), /* POWER10 */
+ .val = cpu_to_be32(0x00800000),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* all 3.1-compliant */
+ .val = cpu_to_be32(0x0f000006),
+ },
+ {
.mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */
.val = cpu_to_be32(0x0f000005),
},
@@ -1002,7 +1040,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
.byte1 = 0,
.arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
- .arch_versions3 = OV1_PPC_3_00,
+ .arch_versions3 = OV1_PPC_3_00 | OV1_PPC_3_1,
},
.vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)),
@@ -1054,7 +1092,8 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
#else
0,
#endif
- .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN),
+ .associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN) |
+ OV5_FEAT(OV5_FORM2_AFFINITY),
.bin_opts = OV5_FEAT(OV5_RESIZE_HPT) | OV5_FEAT(OV5_HP_EVT),
.micro_checkpoint = 0,
.reserved0 = 0,
@@ -1079,6 +1118,9 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
.secondary_pteg = 0,
.os_name = OV6_LINUX,
},
+
+ /* option vector 7: OS Identification */
+ .vec7_len = VECTOR_LENGTH(sizeof(struct option_vector7)),
};
static struct ibm_arch_vec __prombss ibm_architecture_vec ____cacheline_aligned;
@@ -1277,10 +1319,8 @@ static void __init prom_parse_platform_support(u8 index, u8 val,
prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support);
break;
case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */
- if (val & OV5_FEAT(OV5_RADIX_GTSE)) {
- prom_debug("Radix - GTSE supported\n");
- support->radix_gtse = true;
- }
+ if (val & OV5_FEAT(OV5_RADIX_GTSE))
+ support->radix_gtse = !prom_radix_gtse_disable;
break;
case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */
prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT),
@@ -1309,6 +1349,8 @@ static void __init prom_check_platform_support(void)
memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template,
sizeof(ibm_architecture_vec));
+ prom_strscpy_pad(ibm_architecture_vec.vec7.os_id, linux_banner, 256);
+
if (prop_len > 1) {
int i;
u8 vec[8];
@@ -1317,23 +1359,22 @@ static void __init prom_check_platform_support(void)
if (prop_len > sizeof(vec))
prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n",
prop_len);
- prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support",
- &vec, sizeof(vec));
- for (i = 0; i < sizeof(vec); i += 2) {
- prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2
- , vec[i]
- , vec[i + 1]);
- prom_parse_platform_support(vec[i], vec[i + 1],
- &supported);
+ prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", &vec, sizeof(vec));
+ for (i = 0; i < prop_len; i += 2) {
+ prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2, vec[i], vec[i + 1]);
+ prom_parse_platform_support(vec[i], vec[i + 1], &supported);
}
}
- if (supported.radix_mmu && supported.radix_gtse &&
- IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
- /* Radix preferred - but we require GTSE for now */
- prom_debug("Asking for radix with GTSE\n");
+ if (supported.radix_mmu && IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
+ /* Radix preferred - Check if GTSE is also supported */
+ prom_debug("Asking for radix\n");
ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
- ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE);
+ if (supported.radix_gtse)
+ ibm_architecture_vec.vec5.radix_ext =
+ OV5_FEAT(OV5_RADIX_GTSE);
+ else
+ prom_debug("Radix GTSE isn't supported\n");
} else if (supported.hash_mmu) {
/* Default to hash mmu (if we can) */
prom_debug("Asking for hash\n");
@@ -1449,18 +1490,18 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align)
unsigned long addr = 0;
if (align)
- base = _ALIGN_UP(base, align);
+ base = ALIGN(base, align);
prom_debug("%s(%lx, %lx)\n", __func__, size, align);
if (ram_top == 0)
prom_panic("alloc_up() called with mem not initialized\n");
if (align)
- base = _ALIGN_UP(alloc_bottom, align);
+ base = ALIGN(alloc_bottom, align);
else
base = alloc_bottom;
for(; (base + size) <= alloc_top;
- base = _ALIGN_UP(base + 0x100000, align)) {
+ base = ALIGN(base + 0x100000, align)) {
prom_debug(" trying: 0x%lx\n\r", base);
addr = (unsigned long)prom_claim(base, size, 0);
if (addr != PROM_ERROR && addr != 0)
@@ -1500,7 +1541,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align,
if (highmem) {
/* Carve out storage for the TCE table. */
- addr = _ALIGN_DOWN(alloc_top_high - size, align);
+ addr = ALIGN_DOWN(alloc_top_high - size, align);
if (addr <= alloc_bottom)
return 0;
/* Will we bump into the RMO ? If yes, check out that we
@@ -1518,9 +1559,9 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align,
goto bail;
}
- base = _ALIGN_DOWN(alloc_top - size, align);
+ base = ALIGN_DOWN(alloc_top - size, align);
for (; base > alloc_bottom;
- base = _ALIGN_DOWN(base - 0x100000, align)) {
+ base = ALIGN_DOWN(base - 0x100000, align)) {
prom_debug(" trying: 0x%lx\n\r", base);
addr = (unsigned long)prom_claim(base, size, 0);
if (addr != PROM_ERROR && addr != 0)
@@ -1586,8 +1627,8 @@ static void __init reserve_mem(u64 base, u64 size)
* have our terminator with "size" set to 0 since we are
* dumb and just copy this entire array to the boot params
*/
- base = _ALIGN_DOWN(base, PAGE_SIZE);
- top = _ALIGN_UP(top, PAGE_SIZE);
+ base = ALIGN_DOWN(base, PAGE_SIZE);
+ top = ALIGN(top, PAGE_SIZE);
size = top - base;
if (cnt >= (MEM_RESERVE_MAP_SIZE - 1))
@@ -1741,7 +1782,7 @@ static void __init prom_close_stdin(void)
}
#ifdef CONFIG_PPC_SVM
-static int prom_rtas_hcall(uint64_t args)
+static int __init prom_rtas_hcall(uint64_t args)
{
register uint64_t arg1 asm("r3") = H_RTAS;
register uint64_t arg2 asm("r4") = args;
@@ -1749,6 +1790,8 @@ static int prom_rtas_hcall(uint64_t args)
asm volatile("sc 1\n" : "=r" (arg1) :
"r" (arg1),
"r" (arg2) :);
+ srr_regs_clobbered();
+
return arg1;
}
@@ -1773,6 +1816,9 @@ static void __init prom_rtas_os_term(char *str)
if (token == 0)
prom_panic("Could not get token for ibm,os-term\n");
os_term_args.token = cpu_to_be32(token);
+ os_term_args.nargs = cpu_to_be32(1);
+ os_term_args.nret = cpu_to_be32(1);
+ os_term_args.args[0] = cpu_to_be32(__pa(str));
prom_rtas_hcall((uint64_t)&os_term_args);
}
#endif /* CONFIG_PPC_SVM */
@@ -2250,7 +2296,7 @@ static void __init prom_init_stdout(void)
static int __init prom_find_machine_type(void)
{
- char compat[256];
+ static char compat[256] __prombss;
int len, i = 0;
#ifdef CONFIG_PPC64
phandle rtas;
@@ -2403,10 +2449,19 @@ static void __init prom_check_displays(void)
u32 width, height, pitch, addr;
prom_printf("Setting btext !\n");
- prom_getprop(node, "width", &width, 4);
- prom_getprop(node, "height", &height, 4);
- prom_getprop(node, "linebytes", &pitch, 4);
- prom_getprop(node, "address", &addr, 4);
+
+ if (prom_getprop(node, "width", &width, 4) == PROM_ERROR)
+ return;
+
+ if (prom_getprop(node, "height", &height, 4) == PROM_ERROR)
+ return;
+
+ if (prom_getprop(node, "linebytes", &pitch, 4) == PROM_ERROR)
+ return;
+
+ if (prom_getprop(node, "address", &addr, 4) == PROM_ERROR)
+ return;
+
prom_printf("W=%d H=%d LB=%d addr=0x%x\n",
width, height, pitch, addr);
btext_setup_display(width, height, 8, pitch, addr);
@@ -2423,7 +2478,7 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end,
{
void *ret;
- *mem_start = _ALIGN(*mem_start, align);
+ *mem_start = ALIGN(*mem_start, align);
while ((*mem_start + needed) > *mem_end) {
unsigned long room, chunk;
@@ -2559,7 +2614,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
*lp++ = *p;
}
*lp = 0;
- *mem_start = _ALIGN((unsigned long)lp + 1, 4);
+ *mem_start = ALIGN((unsigned long)lp + 1, 4);
}
/* get it again for debugging */
@@ -2605,7 +2660,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
/* push property content */
valp = make_room(mem_start, mem_end, l, 4);
call_prom("getprop", 4, 1, node, pname, valp, l);
- *mem_start = _ALIGN(*mem_start, 4);
+ *mem_start = ALIGN(*mem_start, 4);
if (!prom_strcmp(pname, "phandle"))
has_phandle = 1;
@@ -2664,7 +2719,7 @@ static void __init flatten_device_tree(void)
prom_panic ("couldn't get device tree root\n");
/* Build header and make room for mem rsv map */
- mem_start = _ALIGN(mem_start, 4);
+ mem_start = ALIGN(mem_start, 4);
hdr = make_room(&mem_start, &mem_end,
sizeof(struct boot_param_header), 4);
dt_header_start = (unsigned long)hdr;
@@ -2677,7 +2732,7 @@ static void __init flatten_device_tree(void)
/* Add "phandle" in there, we'll need it */
namep = make_room(&mem_start, &mem_end, 16, 1);
- prom_strcpy(namep, "phandle");
+ prom_strscpy_pad(namep, "phandle", sizeof("phandle"));
mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
/* Build string array */
@@ -2932,7 +2987,7 @@ static void __init fixup_device_tree_efika_add_phy(void)
/* Check if the phy-handle property exists - bail if it does */
rv = prom_getprop(node, "phy-handle", prop, sizeof(prop));
- if (!rv)
+ if (rv <= 0)
return;
/*
@@ -2958,7 +3013,7 @@ static void __init fixup_device_tree_efika_add_phy(void)
" 0x3 encode-int encode+"
" s\" interrupts\" property"
" finish-device");
- };
+ }
/* Check for a PHY device node - if missing then create one and
* give it's phandle to the ethernet node */
@@ -3185,59 +3240,11 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
#endif /* CONFIG_BLK_DEV_INITRD */
}
-#ifdef CONFIG_PPC64
-#ifdef CONFIG_RELOCATABLE
-static void reloc_toc(void)
-{
-}
-
-static void unreloc_toc(void)
-{
-}
-#else
-static void __reloc_toc(unsigned long offset, unsigned long nr_entries)
-{
- unsigned long i;
- unsigned long *toc_entry;
-
- /* Get the start of the TOC by using r2 directly. */
- asm volatile("addi %0,2,-0x8000" : "=b" (toc_entry));
-
- for (i = 0; i < nr_entries; i++) {
- *toc_entry = *toc_entry + offset;
- toc_entry++;
- }
-}
-
-static void reloc_toc(void)
-{
- unsigned long offset = reloc_offset();
- unsigned long nr_entries =
- (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long);
-
- __reloc_toc(offset, nr_entries);
-
- mb();
-}
-
-static void unreloc_toc(void)
-{
- unsigned long offset = reloc_offset();
- unsigned long nr_entries =
- (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long);
-
- mb();
-
- __reloc_toc(-offset, nr_entries);
-}
-#endif
-#endif
-
#ifdef CONFIG_PPC_SVM
/*
* Perform the Enter Secure Mode ultracall.
*/
-static int enter_secure_mode(unsigned long kbase, unsigned long fdt)
+static int __init enter_secure_mode(unsigned long kbase, unsigned long fdt)
{
register unsigned long r3 asm("r3") = UV_ESM;
register unsigned long r4 asm("r4") = kbase;
@@ -3251,7 +3258,7 @@ static int enter_secure_mode(unsigned long kbase, unsigned long fdt)
/*
* Call the Ultravisor to transfer us to secure memory if we have an ESM blob.
*/
-static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt)
{
int ret;
@@ -3266,14 +3273,12 @@ static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
* relocated it so the check will fail. Restore the original image by
* relocating it back to the kernel virtual base address.
*/
- if (IS_ENABLED(CONFIG_RELOCATABLE))
- relocate(KERNELBASE);
+ relocate(KERNELBASE);
ret = enter_secure_mode(kbase, fdt);
/* Relocate the kernel again. */
- if (IS_ENABLED(CONFIG_RELOCATABLE))
- relocate(kbase);
+ relocate(kbase);
if (ret != U_SUCCESS) {
prom_printf("Returned %d from switching to secure mode.\n", ret);
@@ -3281,7 +3286,7 @@ static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
}
}
#else
-static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt)
{
}
#endif /* CONFIG_PPC_SVM */
@@ -3301,8 +3306,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
#ifdef CONFIG_PPC32
unsigned long offset = reloc_offset();
reloc_got2(offset);
-#else
- reloc_toc();
#endif
/*
@@ -3409,7 +3412,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*
* PowerMacs use a different mechanism to spin CPUs
*
- * (This must be done after instanciating RTAS)
+ * (This must be done after instantiating RTAS)
*/
if (of_platform != PLATFORM_POWERMAC)
prom_hold_cpus();
@@ -3474,14 +3477,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*/
hdr = dt_header_start;
- /* Don't print anything after quiesce under OPAL, it crashes OFW */
prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
prom_debug("->dt_header_start=0x%lx\n", hdr);
#ifdef CONFIG_PPC32
reloc_got2(-offset);
-#else
- unreloc_toc();
#endif
/* Move to secure memory if we're supposed to be secure guests. */
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index b183ab9c5107..69623b9045d5 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -13,8 +13,13 @@
# If you really need to reference something from prom_init.o add
# it to the list below:
-grep "^CONFIG_KASAN=y$" .config >/dev/null
-if [ $? -eq 0 ]
+has_renamed_memintrinsics()
+{
+ grep -q "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} && \
+ ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" ${KCONFIG_CONFIG}
+}
+
+if has_renamed_memintrinsics
then
MEM_FUNCS="__memcpy __memset"
else
@@ -26,8 +31,7 @@ _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
logo_linux_clut224 btext_prepare_BAT
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
-__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC.
-relocate"
+btext_setup_display TOC. relocate"
NM="$1"
OBJ="$2"
@@ -52,11 +56,10 @@ do
# a leading . on the name, so strip it off here.
UNDEF="${UNDEF#.}"
- if [ $KBUILD_VERBOSE ]; then
- if [ $KBUILD_VERBOSE -ne 0 ]; then
- echo "Checking prom_init.o symbol '$UNDEF'"
- fi
- fi
+ case "$KBUILD_VERBOSE" in
+ *1*)
+ echo "Checking prom_init.o symbol '$UNDEF'" ;;
+ esac
OK=0
for WHITE in $WHITELIST
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
deleted file mode 100644
index 25c0424e8868..000000000000
--- a/arch/powerpc/kernel/ptrace.c
+++ /dev/null
@@ -1,3468 +0,0 @@
-/*
- * PowerPC version
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Derived from "arch/m68k/kernel/ptrace.c"
- * Copyright (C) 1994 by Hamish Macdonald
- * Taken from linux/kernel/ptrace.c and modified for M680x0.
- * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
- *
- * Modified by Cort Dougan (cort@hq.fsmlabs.com)
- * and Paul Mackerras (paulus@samba.org).
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file README.legal in the main directory of
- * this archive for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/regset.h>
-#include <linux/tracehook.h>
-#include <linux/elf.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/signal.h>
-#include <linux/seccomp.h>
-#include <linux/audit.h>
-#include <trace/syscall.h>
-#include <linux/hw_breakpoint.h>
-#include <linux/perf_event.h>
-#include <linux/context_tracking.h>
-#include <linux/nospec.h>
-
-#include <linux/uaccess.h>
-#include <linux/pkeys.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/switch_to.h>
-#include <asm/tm.h>
-#include <asm/asm-prototypes.h>
-#include <asm/debug.h>
-#include <asm/hw_breakpoint.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
-/*
- * The parameter save area on the stack is used to store arguments being passed
- * to callee function and is located at fixed offset from stack pointer.
- */
-#ifdef CONFIG_PPC32
-#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */
-#else /* CONFIG_PPC32 */
-#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */
-#endif
-
-struct pt_regs_offset {
- const char *name;
- int offset;
-};
-
-#define STR(s) #s /* convert to string */
-#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
-#define GPR_OFFSET_NAME(num) \
- {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \
- {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
-#define REG_OFFSET_END {.name = NULL, .offset = 0}
-
-#define TVSO(f) (offsetof(struct thread_vr_state, f))
-#define TFSO(f) (offsetof(struct thread_fp_state, f))
-#define TSO(f) (offsetof(struct thread_struct, f))
-
-static const struct pt_regs_offset regoffset_table[] = {
- GPR_OFFSET_NAME(0),
- GPR_OFFSET_NAME(1),
- GPR_OFFSET_NAME(2),
- GPR_OFFSET_NAME(3),
- GPR_OFFSET_NAME(4),
- GPR_OFFSET_NAME(5),
- GPR_OFFSET_NAME(6),
- GPR_OFFSET_NAME(7),
- GPR_OFFSET_NAME(8),
- GPR_OFFSET_NAME(9),
- GPR_OFFSET_NAME(10),
- GPR_OFFSET_NAME(11),
- GPR_OFFSET_NAME(12),
- GPR_OFFSET_NAME(13),
- GPR_OFFSET_NAME(14),
- GPR_OFFSET_NAME(15),
- GPR_OFFSET_NAME(16),
- GPR_OFFSET_NAME(17),
- GPR_OFFSET_NAME(18),
- GPR_OFFSET_NAME(19),
- GPR_OFFSET_NAME(20),
- GPR_OFFSET_NAME(21),
- GPR_OFFSET_NAME(22),
- GPR_OFFSET_NAME(23),
- GPR_OFFSET_NAME(24),
- GPR_OFFSET_NAME(25),
- GPR_OFFSET_NAME(26),
- GPR_OFFSET_NAME(27),
- GPR_OFFSET_NAME(28),
- GPR_OFFSET_NAME(29),
- GPR_OFFSET_NAME(30),
- GPR_OFFSET_NAME(31),
- REG_OFFSET_NAME(nip),
- REG_OFFSET_NAME(msr),
- REG_OFFSET_NAME(ctr),
- REG_OFFSET_NAME(link),
- REG_OFFSET_NAME(xer),
- REG_OFFSET_NAME(ccr),
-#ifdef CONFIG_PPC64
- REG_OFFSET_NAME(softe),
-#else
- REG_OFFSET_NAME(mq),
-#endif
- REG_OFFSET_NAME(trap),
- REG_OFFSET_NAME(dar),
- REG_OFFSET_NAME(dsisr),
- REG_OFFSET_END,
-};
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static void flush_tmregs_to_thread(struct task_struct *tsk)
-{
- /*
- * If task is not current, it will have been flushed already to
- * it's thread_struct during __switch_to().
- *
- * A reclaim flushes ALL the state or if not in TM save TM SPRs
- * in the appropriate thread structures from live.
- */
-
- if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current))
- return;
-
- if (MSR_TM_SUSPENDED(mfmsr())) {
- tm_reclaim_current(TM_CAUSE_SIGNAL);
- } else {
- tm_enable();
- tm_save_sprs(&(tsk->thread));
- }
-}
-#else
-static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
-#endif
-
-/**
- * regs_query_register_offset() - query register offset from its name
- * @name: the name of a register
- *
- * regs_query_register_offset() returns the offset of a register in struct
- * pt_regs from its name. If the name is invalid, this returns -EINVAL;
- */
-int regs_query_register_offset(const char *name)
-{
- const struct pt_regs_offset *roff;
- for (roff = regoffset_table; roff->name != NULL; roff++)
- if (!strcmp(roff->name, name))
- return roff->offset;
- return -EINVAL;
-}
-
-/**
- * regs_query_register_name() - query register name from its offset
- * @offset: the offset of a register in struct pt_regs.
- *
- * regs_query_register_name() returns the name of a register from its
- * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
- */
-const char *regs_query_register_name(unsigned int offset)
-{
- const struct pt_regs_offset *roff;
- for (roff = regoffset_table; roff->name != NULL; roff++)
- if (roff->offset == offset)
- return roff->name;
- return NULL;
-}
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-/*
- * Set of msr bits that gdb can change on behalf of a process.
- */
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-#define MSR_DEBUGCHANGE 0
-#else
-#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE)
-#endif
-
-/*
- * Max register writeable via put_reg
- */
-#ifdef CONFIG_PPC32
-#define PT_MAX_PUT_REG PT_MQ
-#else
-#define PT_MAX_PUT_REG PT_CCR
-#endif
-
-static unsigned long get_user_msr(struct task_struct *task)
-{
- return task->thread.regs->msr | task->thread.fpexc_mode;
-}
-
-static int set_user_msr(struct task_struct *task, unsigned long msr)
-{
- task->thread.regs->msr &= ~MSR_DEBUGCHANGE;
- task->thread.regs->msr |= msr & MSR_DEBUGCHANGE;
- return 0;
-}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static unsigned long get_user_ckpt_msr(struct task_struct *task)
-{
- return task->thread.ckpt_regs.msr | task->thread.fpexc_mode;
-}
-
-static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr)
-{
- task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE;
- task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE;
- return 0;
-}
-
-static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap)
-{
- task->thread.ckpt_regs.trap = trap & 0xfff0;
- return 0;
-}
-#endif
-
-#ifdef CONFIG_PPC64
-static int get_user_dscr(struct task_struct *task, unsigned long *data)
-{
- *data = task->thread.dscr;
- return 0;
-}
-
-static int set_user_dscr(struct task_struct *task, unsigned long dscr)
-{
- task->thread.dscr = dscr;
- task->thread.dscr_inherit = 1;
- return 0;
-}
-#else
-static int get_user_dscr(struct task_struct *task, unsigned long *data)
-{
- return -EIO;
-}
-
-static int set_user_dscr(struct task_struct *task, unsigned long dscr)
-{
- return -EIO;
-}
-#endif
-
-/*
- * We prevent mucking around with the reserved area of trap
- * which are used internally by the kernel.
- */
-static int set_user_trap(struct task_struct *task, unsigned long trap)
-{
- task->thread.regs->trap = trap & 0xfff0;
- return 0;
-}
-
-/*
- * Get contents of register REGNO in task TASK.
- */
-int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
-{
- unsigned int regs_max;
-
- if ((task->thread.regs == NULL) || !data)
- return -EIO;
-
- if (regno == PT_MSR) {
- *data = get_user_msr(task);
- return 0;
- }
-
- if (regno == PT_DSCR)
- return get_user_dscr(task, data);
-
-#ifdef CONFIG_PPC64
- /*
- * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
- * no more used as a flag, lets force usr to alway see the softe value as 1
- * which means interrupts are not soft disabled.
- */
- if (regno == PT_SOFTE) {
- *data = 1;
- return 0;
- }
-#endif
-
- regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long);
- if (regno < regs_max) {
- regno = array_index_nospec(regno, regs_max);
- *data = ((unsigned long *)task->thread.regs)[regno];
- return 0;
- }
-
- return -EIO;
-}
-
-/*
- * Write contents of register REGNO in task TASK.
- */
-int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
-{
- if (task->thread.regs == NULL)
- return -EIO;
-
- if (regno == PT_MSR)
- return set_user_msr(task, data);
- if (regno == PT_TRAP)
- return set_user_trap(task, data);
- if (regno == PT_DSCR)
- return set_user_dscr(task, data);
-
- if (regno <= PT_MAX_PUT_REG) {
- regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1);
- ((unsigned long *)task->thread.regs)[regno] = data;
- return 0;
- }
- return -EIO;
-}
-
-static int gpr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int i, ret;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- if (!FULL_REGS(target->thread.regs)) {
- /* We have a partial register set. Fill 14-31 with bogus values */
- for (i = 14; i < 32; i++)
- target->thread.regs->gpr[i] = NV_REG_POISON;
- }
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- target->thread.regs,
- 0, offsetof(struct pt_regs, msr));
- if (!ret) {
- unsigned long msr = get_user_msr(target);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
- offsetof(struct pt_regs, msr),
- offsetof(struct pt_regs, msr) +
- sizeof(msr));
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->orig_gpr3,
- offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct user_pt_regs));
- if (!ret)
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct user_pt_regs), -1);
-
- return ret;
-}
-
-static int gpr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- unsigned long reg;
- int ret;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- CHECK_FULL_REGS(target->thread.regs);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.regs,
- 0, PT_MSR * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_MSR * sizeof(reg),
- (PT_MSR + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_msr(target, reg);
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->orig_gpr3,
- PT_ORIG_R3 * sizeof(reg),
- (PT_MAX_PUT_REG + 1) * sizeof(reg));
-
- if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_MAX_PUT_REG + 1) * sizeof(reg),
- PT_TRAP * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_TRAP * sizeof(reg),
- (PT_TRAP + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_trap(target, reg);
- }
-
- if (!ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_TRAP + 1) * sizeof(reg), -1);
-
- return ret;
-}
-
-/*
- * Regardless of transactions, 'fp_state' holds the current running
- * value of all FPR registers and 'ckfp_state' holds the last checkpointed
- * value of all FPR registers for the current transaction.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * u64 fpr[32];
- * u64 fpscr;
- * };
- */
-static int fpr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
-#ifdef CONFIG_VSX
- u64 buf[33];
- int i;
-
- flush_fp_to_thread(target);
-
- /* copy to local buffer then write that out */
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_FPR(i);
- buf[32] = target->thread.fp_state.fpscr;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
-#else
- BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
- offsetof(struct thread_fp_state, fpr[32]));
-
- flush_fp_to_thread(target);
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fp_state, 0, -1);
-#endif
-}
-
-/*
- * Regardless of transactions, 'fp_state' holds the current running
- * value of all FPR registers and 'ckfp_state' holds the last checkpointed
- * value of all FPR registers for the current transaction.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * u64 fpr[32];
- * u64 fpscr;
- * };
- *
- */
-static int fpr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
-#ifdef CONFIG_VSX
- u64 buf[33];
- int i;
-
- flush_fp_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_FPR(i);
- buf[32] = target->thread.fp_state.fpscr;
-
- /* copy to local buffer then write that out */
- i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
- if (i)
- return i;
-
- for (i = 0; i < 32 ; i++)
- target->thread.TS_FPR(i) = buf[i];
- target->thread.fp_state.fpscr = buf[32];
- return 0;
-#else
- BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
- offsetof(struct thread_fp_state, fpr[32]));
-
- flush_fp_to_thread(target);
-
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fp_state, 0, -1);
-#endif
-}
-
-#ifdef CONFIG_ALTIVEC
-/*
- * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
- * The transfer totals 34 quadword. Quadwords 0-31 contain the
- * corresponding vector registers. Quadword 32 contains the vscr as the
- * last word (offset 12) within that quadword. Quadword 33 contains the
- * vrsave as the first word (offset 0) within the quadword.
- *
- * This definition of the VMX state is compatible with the current PPC32
- * ptrace interface. This allows signal handling and ptrace to use the
- * same structures. This also simplifies the implementation of a bi-arch
- * (combined (32- and 64-bit) gdb.
- */
-
-static int vr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- flush_altivec_to_thread(target);
- return target->thread.used_vr ? regset->n : 0;
-}
-
-/*
- * Regardless of transactions, 'vr_state' holds the current running
- * value of all the VMX registers and 'ckvr_state' holds the last
- * checkpointed value of all the VMX registers for the current
- * transaction to fall back on in case it aborts.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * vector128 vr[32];
- * vector128 vscr;
- * vector128 vrsave;
- * };
- */
-static int vr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- flush_altivec_to_thread(target);
-
- BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
- offsetof(struct thread_vr_state, vr[32]));
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.vr_state, 0,
- 33 * sizeof(vector128));
- if (!ret) {
- /*
- * Copy out only the low-order word of vrsave.
- */
- int start, end;
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
-
- vrsave.word = target->thread.vrsave;
-
- start = 33 * sizeof(vector128);
- end = start + sizeof(vrsave);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
- start, end);
- }
-
- return ret;
-}
-
-/*
- * Regardless of transactions, 'vr_state' holds the current running
- * value of all the VMX registers and 'ckvr_state' holds the last
- * checkpointed value of all the VMX registers for the current
- * transaction to fall back on in case it aborts.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * vector128 vr[32];
- * vector128 vscr;
- * vector128 vrsave;
- * };
- */
-static int vr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- flush_altivec_to_thread(target);
-
- BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
- offsetof(struct thread_vr_state, vr[32]));
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.vr_state, 0,
- 33 * sizeof(vector128));
- if (!ret && count > 0) {
- /*
- * We use only the first word of vrsave.
- */
- int start, end;
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
-
- vrsave.word = target->thread.vrsave;
-
- start = 33 * sizeof(vector128);
- end = start + sizeof(vrsave);
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
- start, end);
- if (!ret)
- target->thread.vrsave = vrsave.word;
- }
-
- return ret;
-}
-#endif /* CONFIG_ALTIVEC */
-
-#ifdef CONFIG_VSX
-/*
- * Currently to set and and get all the vsx state, you need to call
- * the fp and VMX calls as well. This only get/sets the lower 32
- * 128bit VSX registers.
- */
-
-static int vsr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- flush_vsx_to_thread(target);
- return target->thread.used_vsr ? regset->n : 0;
-}
-
-/*
- * Regardless of transactions, 'fp_state' holds the current running
- * value of all FPR registers and 'ckfp_state' holds the last
- * checkpointed value of all FPR registers for the current
- * transaction.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * u64 vsx[32];
- * };
- */
-static int vsr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- u64 buf[32];
- int ret, i;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
-
- return ret;
-}
-
-/*
- * Regardless of transactions, 'fp_state' holds the current running
- * value of all FPR registers and 'ckfp_state' holds the last
- * checkpointed value of all FPR registers for the current
- * transaction.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * u64 vsx[32];
- * };
- */
-static int vsr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 buf[32];
- int ret,i;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
- if (!ret)
- for (i = 0; i < 32 ; i++)
- target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
-
- return ret;
-}
-#endif /* CONFIG_VSX */
-
-#ifdef CONFIG_SPE
-
-/*
- * For get_evrregs/set_evrregs functions 'data' has the following layout:
- *
- * struct {
- * u32 evr[32];
- * u64 acc;
- * u32 spefscr;
- * }
- */
-
-static int evr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- flush_spe_to_thread(target);
- return target->thread.used_spe ? regset->n : 0;
-}
-
-static int evr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- flush_spe_to_thread(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.evr,
- 0, sizeof(target->thread.evr));
-
- BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
- offsetof(struct thread_struct, spefscr));
-
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.acc,
- sizeof(target->thread.evr), -1);
-
- return ret;
-}
-
-static int evr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- flush_spe_to_thread(target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.evr,
- 0, sizeof(target->thread.evr));
-
- BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
- offsetof(struct thread_struct, spefscr));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.acc,
- sizeof(target->thread.evr), -1);
-
- return ret;
-}
-#endif /* CONFIG_SPE */
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/**
- * tm_cgpr_active - get active number of registers in CGPR
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks for the active number of available
- * regisers in transaction checkpointed GPR category.
- */
-static int tm_cgpr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return 0;
-
- return regset->n;
-}
-
-/**
- * tm_cgpr_get - get CGPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets transaction checkpointed GPR registers.
- *
- * When the transaction is active, 'ckpt_regs' holds all the checkpointed
- * GPR register values for the current transaction to fall back on if it
- * aborts in between. This function gets those checkpointed GPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct data {
- * struct pt_regs ckpt_regs;
- * };
- */
-static int tm_cgpr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckpt_regs,
- 0, offsetof(struct pt_regs, msr));
- if (!ret) {
- unsigned long msr = get_user_ckpt_msr(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
- offsetof(struct pt_regs, msr),
- offsetof(struct pt_regs, msr) +
- sizeof(msr));
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckpt_regs.orig_gpr3,
- offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct user_pt_regs));
- if (!ret)
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct user_pt_regs), -1);
-
- return ret;
-}
-
-/*
- * tm_cgpr_set - set the CGPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets in transaction checkpointed GPR registers.
- *
- * When the transaction is active, 'ckpt_regs' holds the checkpointed
- * GPR register values for the current transaction to fall back on if it
- * aborts in between. This function sets those checkpointed GPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct data {
- * struct pt_regs ckpt_regs;
- * };
- */
-static int tm_cgpr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- unsigned long reg;
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckpt_regs,
- 0, PT_MSR * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_MSR * sizeof(reg),
- (PT_MSR + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_ckpt_msr(target, reg);
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckpt_regs.orig_gpr3,
- PT_ORIG_R3 * sizeof(reg),
- (PT_MAX_PUT_REG + 1) * sizeof(reg));
-
- if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_MAX_PUT_REG + 1) * sizeof(reg),
- PT_TRAP * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_TRAP * sizeof(reg),
- (PT_TRAP + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_ckpt_trap(target, reg);
- }
-
- if (!ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_TRAP + 1) * sizeof(reg), -1);
-
- return ret;
-}
-
-/**
- * tm_cfpr_active - get active number of registers in CFPR
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks for the active number of available
- * regisers in transaction checkpointed FPR category.
- */
-static int tm_cfpr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return 0;
-
- return regset->n;
-}
-
-/**
- * tm_cfpr_get - get CFPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets in transaction checkpointed FPR registers.
- *
- * When the transaction is active 'ckfp_state' holds the checkpointed
- * values for the current transaction to fall back on if it aborts
- * in between. This function gets those checkpointed FPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct data {
- * u64 fpr[32];
- * u64 fpscr;
- *};
- */
-static int tm_cfpr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- u64 buf[33];
- int i;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- /* copy to local buffer then write that out */
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_CKFPR(i);
- buf[32] = target->thread.ckfp_state.fpscr;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
-}
-
-/**
- * tm_cfpr_set - set CFPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets in transaction checkpointed FPR registers.
- *
- * When the transaction is active 'ckfp_state' holds the checkpointed
- * FPR register values for the current transaction to fall back on
- * if it aborts in between. This function sets these checkpointed
- * FPR registers. The userspace interface buffer layout is as follows.
- *
- * struct data {
- * u64 fpr[32];
- * u64 fpscr;
- *};
- */
-static int tm_cfpr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 buf[33];
- int i;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- for (i = 0; i < 32; i++)
- buf[i] = target->thread.TS_CKFPR(i);
- buf[32] = target->thread.ckfp_state.fpscr;
-
- /* copy to local buffer then write that out */
- i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
- if (i)
- return i;
- for (i = 0; i < 32 ; i++)
- target->thread.TS_CKFPR(i) = buf[i];
- target->thread.ckfp_state.fpscr = buf[32];
- return 0;
-}
-
-/**
- * tm_cvmx_active - get active number of registers in CVMX
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks for the active number of available
- * regisers in checkpointed VMX category.
- */
-static int tm_cvmx_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return 0;
-
- return regset->n;
-}
-
-/**
- * tm_cvmx_get - get CMVX registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets in transaction checkpointed VMX registers.
- *
- * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
- * the checkpointed values for the current transaction to fall
- * back on if it aborts in between. The userspace interface buffer
- * layout is as follows.
- *
- * struct data {
- * vector128 vr[32];
- * vector128 vscr;
- * vector128 vrsave;
- *};
- */
-static int tm_cvmx_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- /* Flush the state */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckvr_state, 0,
- 33 * sizeof(vector128));
- if (!ret) {
- /*
- * Copy out only the low-order word of vrsave.
- */
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
- vrsave.word = target->thread.ckvrsave;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
- }
-
- return ret;
-}
-
-/**
- * tm_cvmx_set - set CMVX registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets in transaction checkpointed VMX registers.
- *
- * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
- * the checkpointed values for the current transaction to fall
- * back on if it aborts in between. The userspace interface buffer
- * layout is as follows.
- *
- * struct data {
- * vector128 vr[32];
- * vector128 vscr;
- * vector128 vrsave;
- *};
- */
-static int tm_cvmx_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckvr_state, 0,
- 33 * sizeof(vector128));
- if (!ret && count > 0) {
- /*
- * We use only the low-order word of vrsave.
- */
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
- vrsave.word = target->thread.ckvrsave;
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
- if (!ret)
- target->thread.ckvrsave = vrsave.word;
- }
-
- return ret;
-}
-
-/**
- * tm_cvsx_active - get active number of registers in CVSX
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks for the active number of available
- * regisers in transaction checkpointed VSX category.
- */
-static int tm_cvsx_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return 0;
-
- flush_vsx_to_thread(target);
- return target->thread.used_vsr ? regset->n : 0;
-}
-
-/**
- * tm_cvsx_get - get CVSX registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets in transaction checkpointed VSX registers.
- *
- * When the transaction is active 'ckfp_state' holds the checkpointed
- * values for the current transaction to fall back on if it aborts
- * in between. This function gets those checkpointed VSX registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct data {
- * u64 vsx[32];
- *};
- */
-static int tm_cvsx_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- u64 buf[32];
- int ret, i;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- /* Flush the state */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
-
- return ret;
-}
-
-/**
- * tm_cvsx_set - set CFPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets in transaction checkpointed VSX registers.
- *
- * When the transaction is active 'ckfp_state' holds the checkpointed
- * VSX register values for the current transaction to fall back on
- * if it aborts in between. This function sets these checkpointed
- * FPR registers. The userspace interface buffer layout is as follows.
- *
- * struct data {
- * u64 vsx[32];
- *};
- */
-static int tm_cvsx_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 buf[32];
- int ret, i;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- /* Flush the state */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
- if (!ret)
- for (i = 0; i < 32 ; i++)
- target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
-
- return ret;
-}
-
-/**
- * tm_spr_active - get active number of registers in TM SPR
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks the active number of available
- * regisers in the transactional memory SPR category.
- */
-static int tm_spr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- return regset->n;
-}
-
-/**
- * tm_spr_get - get the TM related SPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets transactional memory related SPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct {
- * u64 tm_tfhar;
- * u64 tm_texasr;
- * u64 tm_tfiar;
- * };
- */
-static int tm_spr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- /* Build tests */
- BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
- BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
- BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- /* Flush the states */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- /* TFHAR register */
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tfhar, 0, sizeof(u64));
-
- /* TEXASR register */
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_texasr, sizeof(u64),
- 2 * sizeof(u64));
-
- /* TFIAR register */
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tfiar,
- 2 * sizeof(u64), 3 * sizeof(u64));
- return ret;
-}
-
-/**
- * tm_spr_set - set the TM related SPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets transactional memory related SPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct {
- * u64 tm_tfhar;
- * u64 tm_texasr;
- * u64 tm_tfiar;
- * };
- */
-static int tm_spr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- /* Build tests */
- BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
- BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
- BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- /* Flush the states */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- /* TFHAR register */
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tfhar, 0, sizeof(u64));
-
- /* TEXASR register */
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_texasr, sizeof(u64),
- 2 * sizeof(u64));
-
- /* TFIAR register */
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tfiar,
- 2 * sizeof(u64), 3 * sizeof(u64));
- return ret;
-}
-
-static int tm_tar_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (MSR_TM_ACTIVE(target->thread.regs->msr))
- return regset->n;
-
- return 0;
-}
-
-static int tm_tar_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tar, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_tar_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tar, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_ppr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (MSR_TM_ACTIVE(target->thread.regs->msr))
- return regset->n;
-
- return 0;
-}
-
-
-static int tm_ppr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_ppr, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_ppr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_ppr, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_dscr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (MSR_TM_ACTIVE(target->thread.regs->msr))
- return regset->n;
-
- return 0;
-}
-
-static int tm_dscr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_dscr, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_dscr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_dscr, 0, sizeof(u64));
- return ret;
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
-#ifdef CONFIG_PPC64
-static int ppr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->ppr, 0, sizeof(u64));
-}
-
-static int ppr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->ppr, 0, sizeof(u64));
-}
-
-static int dscr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.dscr, 0, sizeof(u64));
-}
-static int dscr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.dscr, 0, sizeof(u64));
-}
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
-static int tar_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tar, 0, sizeof(u64));
-}
-static int tar_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tar, 0, sizeof(u64));
-}
-
-static int ebb_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- if (target->thread.used_ebb)
- return regset->n;
-
- return 0;
-}
-
-static int ebb_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- /* Build tests */
- BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
- BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- if (!target->thread.used_ebb)
- return -ENODATA;
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ebbrr, 0, 3 * sizeof(unsigned long));
-}
-
-static int ebb_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret = 0;
-
- /* Build tests */
- BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
- BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- if (target->thread.used_ebb)
- return -ENODATA;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ebbrr, 0, sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ebbhr, sizeof(unsigned long),
- 2 * sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.bescr,
- 2 * sizeof(unsigned long), 3 * sizeof(unsigned long));
-
- return ret;
-}
-static int pmu_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- return regset->n;
-}
-
-static int pmu_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- /* Build tests */
- BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
- BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
- BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
- BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.siar, 0,
- 5 * sizeof(unsigned long));
-}
-
-static int pmu_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret = 0;
-
- /* Build tests */
- BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
- BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
- BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
- BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.siar, 0,
- sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.sdar, sizeof(unsigned long),
- 2 * sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.sier, 2 * sizeof(unsigned long),
- 3 * sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.mmcr2, 3 * sizeof(unsigned long),
- 4 * sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.mmcr0, 4 * sizeof(unsigned long),
- 5 * sizeof(unsigned long));
- return ret;
-}
-#endif
-
-#ifdef CONFIG_PPC_MEM_KEYS
-static int pkey_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!arch_pkeys_enabled())
- return -ENODEV;
-
- return regset->n;
-}
-
-static int pkey_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr));
- BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor));
-
- if (!arch_pkeys_enabled())
- return -ENODEV;
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.amr, 0,
- ELF_NPKEY * sizeof(unsigned long));
-}
-
-static int pkey_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 new_amr;
- int ret;
-
- if (!arch_pkeys_enabled())
- return -ENODEV;
-
- /* Only the AMR can be set from userspace */
- if (pos != 0 || count != sizeof(new_amr))
- return -EINVAL;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &new_amr, 0, sizeof(new_amr));
- if (ret)
- return ret;
-
- /* UAMOR determines which bits of the AMR can be set from userspace. */
- target->thread.amr = (new_amr & target->thread.uamor) |
- (target->thread.amr & ~target->thread.uamor);
-
- return 0;
-}
-#endif /* CONFIG_PPC_MEM_KEYS */
-
-/*
- * These are our native regset flavors.
- */
-enum powerpc_regset {
- REGSET_GPR,
- REGSET_FPR,
-#ifdef CONFIG_ALTIVEC
- REGSET_VMX,
-#endif
-#ifdef CONFIG_VSX
- REGSET_VSX,
-#endif
-#ifdef CONFIG_SPE
- REGSET_SPE,
-#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- REGSET_TM_CGPR, /* TM checkpointed GPR registers */
- REGSET_TM_CFPR, /* TM checkpointed FPR registers */
- REGSET_TM_CVMX, /* TM checkpointed VMX registers */
- REGSET_TM_CVSX, /* TM checkpointed VSX registers */
- REGSET_TM_SPR, /* TM specific SPR registers */
- REGSET_TM_CTAR, /* TM checkpointed TAR register */
- REGSET_TM_CPPR, /* TM checkpointed PPR register */
- REGSET_TM_CDSCR, /* TM checkpointed DSCR register */
-#endif
-#ifdef CONFIG_PPC64
- REGSET_PPR, /* PPR register */
- REGSET_DSCR, /* DSCR register */
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- REGSET_TAR, /* TAR register */
- REGSET_EBB, /* EBB registers */
- REGSET_PMR, /* Performance Monitor Registers */
-#endif
-#ifdef CONFIG_PPC_MEM_KEYS
- REGSET_PKEY, /* AMR register */
-#endif
-};
-
-static const struct user_regset native_regsets[] = {
- [REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
- .size = sizeof(long), .align = sizeof(long),
- .get = gpr_get, .set = gpr_set
- },
- [REGSET_FPR] = {
- .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .get = fpr_get, .set = fpr_set
- },
-#ifdef CONFIG_ALTIVEC
- [REGSET_VMX] = {
- .core_note_type = NT_PPC_VMX, .n = 34,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = vr_active, .get = vr_get, .set = vr_set
- },
-#endif
-#ifdef CONFIG_VSX
- [REGSET_VSX] = {
- .core_note_type = NT_PPC_VSX, .n = 32,
- .size = sizeof(double), .align = sizeof(double),
- .active = vsr_active, .get = vsr_get, .set = vsr_set
- },
-#endif
-#ifdef CONFIG_SPE
- [REGSET_SPE] = {
- .core_note_type = NT_PPC_SPE, .n = 35,
- .size = sizeof(u32), .align = sizeof(u32),
- .active = evr_active, .get = evr_get, .set = evr_set
- },
-#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- [REGSET_TM_CGPR] = {
- .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
- .size = sizeof(long), .align = sizeof(long),
- .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set
- },
- [REGSET_TM_CFPR] = {
- .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
- },
- [REGSET_TM_CVMX] = {
- .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
- },
- [REGSET_TM_CVSX] = {
- .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
- .size = sizeof(double), .align = sizeof(double),
- .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
- },
- [REGSET_TM_SPR] = {
- .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
- },
- [REGSET_TM_CTAR] = {
- .core_note_type = NT_PPC_TM_CTAR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
- },
- [REGSET_TM_CPPR] = {
- .core_note_type = NT_PPC_TM_CPPR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
- },
- [REGSET_TM_CDSCR] = {
- .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
- },
-#endif
-#ifdef CONFIG_PPC64
- [REGSET_PPR] = {
- .core_note_type = NT_PPC_PPR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = ppr_get, .set = ppr_set
- },
- [REGSET_DSCR] = {
- .core_note_type = NT_PPC_DSCR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = dscr_get, .set = dscr_set
- },
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- [REGSET_TAR] = {
- .core_note_type = NT_PPC_TAR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = tar_get, .set = tar_set
- },
- [REGSET_EBB] = {
- .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = ebb_active, .get = ebb_get, .set = ebb_set
- },
- [REGSET_PMR] = {
- .core_note_type = NT_PPC_PMU, .n = ELF_NPMU,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = pmu_active, .get = pmu_get, .set = pmu_set
- },
-#endif
-#ifdef CONFIG_PPC_MEM_KEYS
- [REGSET_PKEY] = {
- .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = pkey_active, .get = pkey_get, .set = pkey_set
- },
-#endif
-};
-
-static const struct user_regset_view user_ppc_native_view = {
- .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
- .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
-};
-
-#ifdef CONFIG_PPC64
-#include <linux/compat.h>
-
-static int gpr32_get_common(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf,
- unsigned long *regs)
-{
- compat_ulong_t *k = kbuf;
- compat_ulong_t __user *u = ubuf;
- compat_ulong_t reg;
-
- pos /= sizeof(reg);
- count /= sizeof(reg);
-
- if (kbuf)
- for (; count > 0 && pos < PT_MSR; --count)
- *k++ = regs[pos++];
- else
- for (; count > 0 && pos < PT_MSR; --count)
- if (__put_user((compat_ulong_t) regs[pos++], u++))
- return -EFAULT;
-
- if (count > 0 && pos == PT_MSR) {
- reg = get_user_msr(target);
- if (kbuf)
- *k++ = reg;
- else if (__put_user(reg, u++))
- return -EFAULT;
- ++pos;
- --count;
- }
-
- if (kbuf)
- for (; count > 0 && pos < PT_REGS_COUNT; --count)
- *k++ = regs[pos++];
- else
- for (; count > 0 && pos < PT_REGS_COUNT; --count)
- if (__put_user((compat_ulong_t) regs[pos++], u++))
- return -EFAULT;
-
- kbuf = k;
- ubuf = u;
- pos *= sizeof(reg);
- count *= sizeof(reg);
- return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- PT_REGS_COUNT * sizeof(reg), -1);
-}
-
-static int gpr32_set_common(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf,
- unsigned long *regs)
-{
- const compat_ulong_t *k = kbuf;
- const compat_ulong_t __user *u = ubuf;
- compat_ulong_t reg;
-
- pos /= sizeof(reg);
- count /= sizeof(reg);
-
- if (kbuf)
- for (; count > 0 && pos < PT_MSR; --count)
- regs[pos++] = *k++;
- else
- for (; count > 0 && pos < PT_MSR; --count) {
- if (__get_user(reg, u++))
- return -EFAULT;
- regs[pos++] = reg;
- }
-
-
- if (count > 0 && pos == PT_MSR) {
- if (kbuf)
- reg = *k++;
- else if (__get_user(reg, u++))
- return -EFAULT;
- set_user_msr(target, reg);
- ++pos;
- --count;
- }
-
- if (kbuf) {
- for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
- regs[pos++] = *k++;
- for (; count > 0 && pos < PT_TRAP; --count, ++pos)
- ++k;
- } else {
- for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
- if (__get_user(reg, u++))
- return -EFAULT;
- regs[pos++] = reg;
- }
- for (; count > 0 && pos < PT_TRAP; --count, ++pos)
- if (__get_user(reg, u++))
- return -EFAULT;
- }
-
- if (count > 0 && pos == PT_TRAP) {
- if (kbuf)
- reg = *k++;
- else if (__get_user(reg, u++))
- return -EFAULT;
- set_user_trap(target, reg);
- ++pos;
- --count;
- }
-
- kbuf = k;
- ubuf = u;
- pos *= sizeof(reg);
- count *= sizeof(reg);
- return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
- (PT_TRAP + 1) * sizeof(reg), -1);
-}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static int tm_cgpr32_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return gpr32_get_common(target, regset, pos, count, kbuf, ubuf,
- &target->thread.ckpt_regs.gpr[0]);
-}
-
-static int tm_cgpr32_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
- &target->thread.ckpt_regs.gpr[0]);
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
-static int gpr32_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int i;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- if (!FULL_REGS(target->thread.regs)) {
- /*
- * We have a partial register set.
- * Fill 14-31 with bogus values.
- */
- for (i = 14; i < 32; i++)
- target->thread.regs->gpr[i] = NV_REG_POISON;
- }
- return gpr32_get_common(target, regset, pos, count, kbuf, ubuf,
- &target->thread.regs->gpr[0]);
-}
-
-static int gpr32_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- if (target->thread.regs == NULL)
- return -EIO;
-
- CHECK_FULL_REGS(target->thread.regs);
- return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
- &target->thread.regs->gpr[0]);
-}
-
-/*
- * These are the regset flavors matching the CONFIG_PPC32 native set.
- */
-static const struct user_regset compat_regsets[] = {
- [REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
- .size = sizeof(compat_long_t), .align = sizeof(compat_long_t),
- .get = gpr32_get, .set = gpr32_set
- },
- [REGSET_FPR] = {
- .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .get = fpr_get, .set = fpr_set
- },
-#ifdef CONFIG_ALTIVEC
- [REGSET_VMX] = {
- .core_note_type = NT_PPC_VMX, .n = 34,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = vr_active, .get = vr_get, .set = vr_set
- },
-#endif
-#ifdef CONFIG_SPE
- [REGSET_SPE] = {
- .core_note_type = NT_PPC_SPE, .n = 35,
- .size = sizeof(u32), .align = sizeof(u32),
- .active = evr_active, .get = evr_get, .set = evr_set
- },
-#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- [REGSET_TM_CGPR] = {
- .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
- .size = sizeof(long), .align = sizeof(long),
- .active = tm_cgpr_active,
- .get = tm_cgpr32_get, .set = tm_cgpr32_set
- },
- [REGSET_TM_CFPR] = {
- .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
- },
- [REGSET_TM_CVMX] = {
- .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
- },
- [REGSET_TM_CVSX] = {
- .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
- .size = sizeof(double), .align = sizeof(double),
- .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
- },
- [REGSET_TM_SPR] = {
- .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
- },
- [REGSET_TM_CTAR] = {
- .core_note_type = NT_PPC_TM_CTAR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
- },
- [REGSET_TM_CPPR] = {
- .core_note_type = NT_PPC_TM_CPPR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
- },
- [REGSET_TM_CDSCR] = {
- .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
- },
-#endif
-#ifdef CONFIG_PPC64
- [REGSET_PPR] = {
- .core_note_type = NT_PPC_PPR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = ppr_get, .set = ppr_set
- },
- [REGSET_DSCR] = {
- .core_note_type = NT_PPC_DSCR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = dscr_get, .set = dscr_set
- },
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- [REGSET_TAR] = {
- .core_note_type = NT_PPC_TAR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = tar_get, .set = tar_set
- },
- [REGSET_EBB] = {
- .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = ebb_active, .get = ebb_get, .set = ebb_set
- },
-#endif
-};
-
-static const struct user_regset_view user_ppc_compat_view = {
- .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI,
- .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets)
-};
-#endif /* CONFIG_PPC64 */
-
-const struct user_regset_view *task_user_regset_view(struct task_struct *task)
-{
-#ifdef CONFIG_PPC64
- if (test_tsk_thread_flag(task, TIF_32BIT))
- return &user_ppc_compat_view;
-#endif
- return &user_ppc_native_view;
-}
-
-
-void user_enable_single_step(struct task_struct *task)
-{
- struct pt_regs *regs = task->thread.regs;
-
- if (regs != NULL) {
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- task->thread.debug.dbcr0 &= ~DBCR0_BT;
- task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
- regs->msr |= MSR_DE;
-#else
- regs->msr &= ~MSR_BE;
- regs->msr |= MSR_SE;
-#endif
- }
- set_tsk_thread_flag(task, TIF_SINGLESTEP);
-}
-
-void user_enable_block_step(struct task_struct *task)
-{
- struct pt_regs *regs = task->thread.regs;
-
- if (regs != NULL) {
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- task->thread.debug.dbcr0 &= ~DBCR0_IC;
- task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
- regs->msr |= MSR_DE;
-#else
- regs->msr &= ~MSR_SE;
- regs->msr |= MSR_BE;
-#endif
- }
- set_tsk_thread_flag(task, TIF_SINGLESTEP);
-}
-
-void user_disable_single_step(struct task_struct *task)
-{
- struct pt_regs *regs = task->thread.regs;
-
- if (regs != NULL) {
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- /*
- * The logic to disable single stepping should be as
- * simple as turning off the Instruction Complete flag.
- * And, after doing so, if all debug flags are off, turn
- * off DBCR0(IDM) and MSR(DE) .... Torez
- */
- task->thread.debug.dbcr0 &= ~(DBCR0_IC|DBCR0_BT);
- /*
- * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
- */
- if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
- task->thread.debug.dbcr1)) {
- /*
- * All debug events were off.....
- */
- task->thread.debug.dbcr0 &= ~DBCR0_IDM;
- regs->msr &= ~MSR_DE;
- }
-#else
- regs->msr &= ~(MSR_SE | MSR_BE);
-#endif
- }
- clear_tsk_thread_flag(task, TIF_SINGLESTEP);
-}
-
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
-void ptrace_triggered(struct perf_event *bp,
- struct perf_sample_data *data, struct pt_regs *regs)
-{
- struct perf_event_attr attr;
-
- /*
- * Disable the breakpoint request here since ptrace has defined a
- * one-shot behaviour for breakpoint exceptions in PPC64.
- * The SIGTRAP signal is generated automatically for us in do_dabr().
- * We don't have to do anything about that here
- */
- attr = bp->attr;
- attr.disabled = true;
- modify_user_hw_breakpoint(bp, &attr);
-}
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-
-static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
- unsigned long data)
-{
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- int ret;
- struct thread_struct *thread = &(task->thread);
- struct perf_event *bp;
- struct perf_event_attr attr;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- bool set_bp = true;
- struct arch_hw_breakpoint hw_brk;
-#endif
-
- /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
- * For embedded processors we support one DAC and no IAC's at the
- * moment.
- */
- if (addr > 0)
- return -EINVAL;
-
- /* The bottom 3 bits in dabr are flags */
- if ((data & ~0x7UL) >= TASK_SIZE)
- return -EIO;
-
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
- * It was assumed, on previous implementations, that 3 bits were
- * passed together with the data address, fitting the design of the
- * DABR register, as follows:
- *
- * bit 0: Read flag
- * bit 1: Write flag
- * bit 2: Breakpoint translation
- *
- * Thus, we use them here as so.
- */
-
- /* Ensure breakpoint translation bit is set */
- if (data && !(data & HW_BRK_TYPE_TRANSLATE))
- return -EIO;
- hw_brk.address = data & (~HW_BRK_TYPE_DABR);
- hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
- hw_brk.len = DABR_MAX_LEN;
- hw_brk.hw_len = DABR_MAX_LEN;
- set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- bp = thread->ptrace_bps[0];
- if (!set_bp) {
- if (bp) {
- unregister_hw_breakpoint(bp);
- thread->ptrace_bps[0] = NULL;
- }
- return 0;
- }
- if (bp) {
- attr = bp->attr;
- attr.bp_addr = hw_brk.address;
- attr.bp_len = DABR_MAX_LEN;
- arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
-
- /* Enable breakpoint */
- attr.disabled = false;
-
- ret = modify_user_hw_breakpoint(bp, &attr);
- if (ret) {
- return ret;
- }
- thread->ptrace_bps[0] = bp;
- thread->hw_brk = hw_brk;
- return 0;
- }
-
- /* Create a new breakpoint request if one doesn't exist already */
- hw_breakpoint_init(&attr);
- attr.bp_addr = hw_brk.address;
- attr.bp_len = DABR_MAX_LEN;
- arch_bp_generic_fields(hw_brk.type,
- &attr.bp_type);
-
- thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
- ptrace_triggered, NULL, task);
- if (IS_ERR(bp)) {
- thread->ptrace_bps[0] = NULL;
- return PTR_ERR(bp);
- }
-
-#else /* !CONFIG_HAVE_HW_BREAKPOINT */
- if (set_bp && (!ppc_breakpoint_available()))
- return -ENODEV;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
- task->thread.hw_brk = hw_brk;
-#else /* CONFIG_PPC_ADV_DEBUG_REGS */
- /* As described above, it was assumed 3 bits were passed with the data
- * address, but we will assume only the mode bits will be passed
- * as to not cause alignment restrictions for DAC-based processors.
- */
-
- /* DAC's hold the whole address without any mode flags */
- task->thread.debug.dac1 = data & ~0x3UL;
-
- if (task->thread.debug.dac1 == 0) {
- dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
- if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
- task->thread.debug.dbcr1)) {
- task->thread.regs->msr &= ~MSR_DE;
- task->thread.debug.dbcr0 &= ~DBCR0_IDM;
- }
- return 0;
- }
-
- /* Read or Write bits must be set */
-
- if (!(data & 0x3UL))
- return -EINVAL;
-
- /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
- register */
- task->thread.debug.dbcr0 |= DBCR0_IDM;
-
- /* Check for write and read flags and set DBCR0
- accordingly */
- dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W);
- if (data & 0x1UL)
- dbcr_dac(task) |= DBCR_DAC1R;
- if (data & 0x2UL)
- dbcr_dac(task) |= DBCR_DAC1W;
- task->thread.regs->msr |= MSR_DE;
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
- return 0;
-}
-
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure single step bits etc are not set.
- */
-void ptrace_disable(struct task_struct *child)
-{
- /* make sure the single step bit is not set. */
- user_disable_single_step(child);
-}
-
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-static long set_instruction_bp(struct task_struct *child,
- struct ppc_hw_breakpoint *bp_info)
-{
- int slot;
- int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
- int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
- int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
- int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
-
- if (dbcr_iac_range(child) & DBCR_IAC12MODE)
- slot2_in_use = 1;
- if (dbcr_iac_range(child) & DBCR_IAC34MODE)
- slot4_in_use = 1;
-
- if (bp_info->addr >= TASK_SIZE)
- return -EIO;
-
- if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
-
- /* Make sure range is valid. */
- if (bp_info->addr2 >= TASK_SIZE)
- return -EIO;
-
- /* We need a pair of IAC regsisters */
- if ((!slot1_in_use) && (!slot2_in_use)) {
- slot = 1;
- child->thread.debug.iac1 = bp_info->addr;
- child->thread.debug.iac2 = bp_info->addr2;
- child->thread.debug.dbcr0 |= DBCR0_IAC1;
- if (bp_info->addr_mode ==
- PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
- dbcr_iac_range(child) |= DBCR_IAC12X;
- else
- dbcr_iac_range(child) |= DBCR_IAC12I;
-#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- } else if ((!slot3_in_use) && (!slot4_in_use)) {
- slot = 3;
- child->thread.debug.iac3 = bp_info->addr;
- child->thread.debug.iac4 = bp_info->addr2;
- child->thread.debug.dbcr0 |= DBCR0_IAC3;
- if (bp_info->addr_mode ==
- PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
- dbcr_iac_range(child) |= DBCR_IAC34X;
- else
- dbcr_iac_range(child) |= DBCR_IAC34I;
-#endif
- } else
- return -ENOSPC;
- } else {
- /* We only need one. If possible leave a pair free in
- * case a range is needed later
- */
- if (!slot1_in_use) {
- /*
- * Don't use iac1 if iac1-iac2 are free and either
- * iac3 or iac4 (but not both) are free
- */
- if (slot2_in_use || (slot3_in_use == slot4_in_use)) {
- slot = 1;
- child->thread.debug.iac1 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC1;
- goto out;
- }
- }
- if (!slot2_in_use) {
- slot = 2;
- child->thread.debug.iac2 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC2;
-#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- } else if (!slot3_in_use) {
- slot = 3;
- child->thread.debug.iac3 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC3;
- } else if (!slot4_in_use) {
- slot = 4;
- child->thread.debug.iac4 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC4;
-#endif
- } else
- return -ENOSPC;
- }
-out:
- child->thread.debug.dbcr0 |= DBCR0_IDM;
- child->thread.regs->msr |= MSR_DE;
-
- return slot;
-}
-
-static int del_instruction_bp(struct task_struct *child, int slot)
-{
- switch (slot) {
- case 1:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
- /* address range - clear slots 1 & 2 */
- child->thread.debug.iac2 = 0;
- dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
- }
- child->thread.debug.iac1 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
- break;
- case 2:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC12MODE)
- /* used in a range */
- return -EINVAL;
- child->thread.debug.iac2 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
- break;
-#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- case 3:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
- /* address range - clear slots 3 & 4 */
- child->thread.debug.iac4 = 0;
- dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
- }
- child->thread.debug.iac3 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
- break;
- case 4:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC34MODE)
- /* Used in a range */
- return -EINVAL;
- child->thread.debug.iac4 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
- break;
-#endif
- default:
- return -EINVAL;
- }
- return 0;
-}
-
-static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
-{
- int byte_enable =
- (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT)
- & 0xf;
- int condition_mode =
- bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE;
- int slot;
-
- if (byte_enable && (condition_mode == 0))
- return -EINVAL;
-
- if (bp_info->addr >= TASK_SIZE)
- return -EIO;
-
- if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) {
- slot = 1;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- dbcr_dac(child) |= DBCR_DAC1R;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- dbcr_dac(child) |= DBCR_DAC1W;
- child->thread.debug.dac1 = (unsigned long)bp_info->addr;
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- if (byte_enable) {
- child->thread.debug.dvc1 =
- (unsigned long)bp_info->condition_value;
- child->thread.debug.dbcr2 |=
- ((byte_enable << DBCR2_DVC1BE_SHIFT) |
- (condition_mode << DBCR2_DVC1M_SHIFT));
- }
-#endif
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
- /* Both dac1 and dac2 are part of a range */
- return -ENOSPC;
-#endif
- } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) {
- slot = 2;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- dbcr_dac(child) |= DBCR_DAC2R;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- dbcr_dac(child) |= DBCR_DAC2W;
- child->thread.debug.dac2 = (unsigned long)bp_info->addr;
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- if (byte_enable) {
- child->thread.debug.dvc2 =
- (unsigned long)bp_info->condition_value;
- child->thread.debug.dbcr2 |=
- ((byte_enable << DBCR2_DVC2BE_SHIFT) |
- (condition_mode << DBCR2_DVC2M_SHIFT));
- }
-#endif
- } else
- return -ENOSPC;
- child->thread.debug.dbcr0 |= DBCR0_IDM;
- child->thread.regs->msr |= MSR_DE;
-
- return slot + 4;
-}
-
-static int del_dac(struct task_struct *child, int slot)
-{
- if (slot == 1) {
- if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
- return -ENOENT;
-
- child->thread.debug.dac1 = 0;
- dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
- child->thread.debug.dac2 = 0;
- child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
- }
- child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
-#endif
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- child->thread.debug.dvc1 = 0;
-#endif
- } else if (slot == 2) {
- if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
- return -ENOENT;
-
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
- /* Part of a range */
- return -EINVAL;
- child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
-#endif
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- child->thread.debug.dvc2 = 0;
-#endif
- child->thread.debug.dac2 = 0;
- dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
- } else
- return -EINVAL;
-
- return 0;
-}
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-static int set_dac_range(struct task_struct *child,
- struct ppc_hw_breakpoint *bp_info)
-{
- int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK;
-
- /* We don't allow range watchpoints to be used with DVC */
- if (bp_info->condition_mode)
- return -EINVAL;
-
- /*
- * Best effort to verify the address range. The user/supervisor bits
- * prevent trapping in kernel space, but let's fail on an obvious bad
- * range. The simple test on the mask is not fool-proof, and any
- * exclusive range will spill over into kernel space.
- */
- if (bp_info->addr >= TASK_SIZE)
- return -EIO;
- if (mode == PPC_BREAKPOINT_MODE_MASK) {
- /*
- * dac2 is a bitmask. Don't allow a mask that makes a
- * kernel space address from a valid dac1 value
- */
- if (~((unsigned long)bp_info->addr2) >= TASK_SIZE)
- return -EIO;
- } else {
- /*
- * For range breakpoints, addr2 must also be a valid address
- */
- if (bp_info->addr2 >= TASK_SIZE)
- return -EIO;
- }
-
- if (child->thread.debug.dbcr0 &
- (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
- return -ENOSPC;
-
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
- child->thread.debug.dac1 = bp_info->addr;
- child->thread.debug.dac2 = bp_info->addr2;
- if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
- child->thread.debug.dbcr2 |= DBCR2_DAC12M;
- else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
- child->thread.debug.dbcr2 |= DBCR2_DAC12MX;
- else /* PPC_BREAKPOINT_MODE_MASK */
- child->thread.debug.dbcr2 |= DBCR2_DAC12MM;
- child->thread.regs->msr |= MSR_DE;
-
- return 5;
-}
-#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */
-
-static long ppc_set_hwdebug(struct task_struct *child,
- struct ppc_hw_breakpoint *bp_info)
-{
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- int len = 0;
- struct thread_struct *thread = &(child->thread);
- struct perf_event *bp;
- struct perf_event_attr attr;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- struct arch_hw_breakpoint brk;
-#endif
-
- if (bp_info->version != 1)
- return -ENOTSUPP;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- /*
- * Check for invalid flags and combinations
- */
- if ((bp_info->trigger_type == 0) ||
- (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE |
- PPC_BREAKPOINT_TRIGGER_RW)) ||
- (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) ||
- (bp_info->condition_mode &
- ~(PPC_BREAKPOINT_CONDITION_MODE |
- PPC_BREAKPOINT_CONDITION_BE_ALL)))
- return -EINVAL;
-#if CONFIG_PPC_ADV_DEBUG_DVCS == 0
- if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
- return -EINVAL;
-#endif
-
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) {
- if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) ||
- (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE))
- return -EINVAL;
- return set_instruction_bp(child, bp_info);
- }
- if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
- return set_dac(child, bp_info);
-
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- return set_dac_range(child, bp_info);
-#else
- return -EINVAL;
-#endif
-#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */
- /*
- * We only support one data breakpoint
- */
- if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
- (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
- bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
- return -EINVAL;
-
- if ((unsigned long)bp_info->addr >= TASK_SIZE)
- return -EIO;
-
- brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN;
- brk.type = HW_BRK_TYPE_TRANSLATE;
- brk.len = DABR_MAX_LEN;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- brk.type |= HW_BRK_TYPE_READ;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- brk.type |= HW_BRK_TYPE_WRITE;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
- len = bp_info->addr2 - bp_info->addr;
- else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
- len = 1;
- else
- return -EINVAL;
- bp = thread->ptrace_bps[0];
- if (bp)
- return -ENOSPC;
-
- /* Create a new breakpoint request if one doesn't exist already */
- hw_breakpoint_init(&attr);
- attr.bp_addr = (unsigned long)bp_info->addr;
- attr.bp_len = len;
- arch_bp_generic_fields(brk.type, &attr.bp_type);
-
- thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
- ptrace_triggered, NULL, child);
- if (IS_ERR(bp)) {
- thread->ptrace_bps[0] = NULL;
- return PTR_ERR(bp);
- }
-
- return 1;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-
- if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
- return -EINVAL;
-
- if (child->thread.hw_brk.address)
- return -ENOSPC;
-
- if (!ppc_breakpoint_available())
- return -ENODEV;
-
- child->thread.hw_brk = brk;
-
- return 1;
-#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */
-}
-
-static long ppc_del_hwdebug(struct task_struct *child, long data)
-{
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- int ret = 0;
- struct thread_struct *thread = &(child->thread);
- struct perf_event *bp;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- int rc;
-
- if (data <= 4)
- rc = del_instruction_bp(child, (int)data);
- else
- rc = del_dac(child, (int)data - 4);
-
- if (!rc) {
- if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
- child->thread.debug.dbcr1)) {
- child->thread.debug.dbcr0 &= ~DBCR0_IDM;
- child->thread.regs->msr &= ~MSR_DE;
- }
- }
- return rc;
-#else
- if (data != 1)
- return -EINVAL;
-
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- bp = thread->ptrace_bps[0];
- if (bp) {
- unregister_hw_breakpoint(bp);
- thread->ptrace_bps[0] = NULL;
- } else
- ret = -ENOENT;
- return ret;
-#else /* CONFIG_HAVE_HW_BREAKPOINT */
- if (child->thread.hw_brk.address == 0)
- return -ENOENT;
-
- child->thread.hw_brk.address = 0;
- child->thread.hw_brk.type = 0;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-
- return 0;
-#endif
-}
-
-long arch_ptrace(struct task_struct *child, long request,
- unsigned long addr, unsigned long data)
-{
- int ret = -EPERM;
- void __user *datavp = (void __user *) data;
- unsigned long __user *datalp = datavp;
-
- switch (request) {
- /* read the word at location addr in the USER area. */
- case PTRACE_PEEKUSR: {
- unsigned long index, tmp;
-
- ret = -EIO;
- /* convert to index and check */
-#ifdef CONFIG_PPC32
- index = addr >> 2;
- if ((addr & 3) || (index > PT_FPSCR)
- || (child->thread.regs == NULL))
-#else
- index = addr >> 3;
- if ((addr & 7) || (index > PT_FPSCR))
-#endif
- break;
-
- CHECK_FULL_REGS(child->thread.regs);
- if (index < PT_FPR0) {
- ret = ptrace_get_reg(child, (int) index, &tmp);
- if (ret)
- break;
- } else {
- unsigned int fpidx = index - PT_FPR0;
-
- flush_fp_to_thread(child);
- if (fpidx < (PT_FPSCR - PT_FPR0))
- memcpy(&tmp, &child->thread.TS_FPR(fpidx),
- sizeof(long));
- else
- tmp = child->thread.fp_state.fpscr;
- }
- ret = put_user(tmp, datalp);
- break;
- }
-
- /* write the word at location addr in the USER area */
- case PTRACE_POKEUSR: {
- unsigned long index;
-
- ret = -EIO;
- /* convert to index and check */
-#ifdef CONFIG_PPC32
- index = addr >> 2;
- if ((addr & 3) || (index > PT_FPSCR)
- || (child->thread.regs == NULL))
-#else
- index = addr >> 3;
- if ((addr & 7) || (index > PT_FPSCR))
-#endif
- break;
-
- CHECK_FULL_REGS(child->thread.regs);
- if (index < PT_FPR0) {
- ret = ptrace_put_reg(child, index, data);
- } else {
- unsigned int fpidx = index - PT_FPR0;
-
- flush_fp_to_thread(child);
- if (fpidx < (PT_FPSCR - PT_FPR0))
- memcpy(&child->thread.TS_FPR(fpidx), &data,
- sizeof(long));
- else
- child->thread.fp_state.fpscr = data;
- ret = 0;
- }
- break;
- }
-
- case PPC_PTRACE_GETHWDBGINFO: {
- struct ppc_debug_info dbginfo;
-
- dbginfo.version = 1;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS;
- dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS;
- dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS;
- dbginfo.data_bp_alignment = 4;
- dbginfo.sizeof_condition = 4;
- dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE |
- PPC_DEBUG_FEATURE_INSN_BP_MASK;
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- dbginfo.features |=
- PPC_DEBUG_FEATURE_DATA_BP_RANGE |
- PPC_DEBUG_FEATURE_DATA_BP_MASK;
-#endif
-#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
- dbginfo.num_instruction_bps = 0;
- if (ppc_breakpoint_available())
- dbginfo.num_data_bps = 1;
- else
- dbginfo.num_data_bps = 0;
- dbginfo.num_condition_regs = 0;
-#ifdef CONFIG_PPC64
- dbginfo.data_bp_alignment = 8;
-#else
- dbginfo.data_bp_alignment = 4;
-#endif
- dbginfo.sizeof_condition = 0;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
- if (dawr_enabled())
- dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
-#else
- dbginfo.features = 0;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-
- if (copy_to_user(datavp, &dbginfo,
- sizeof(struct ppc_debug_info)))
- return -EFAULT;
- return 0;
- }
-
- case PPC_PTRACE_SETHWDEBUG: {
- struct ppc_hw_breakpoint bp_info;
-
- if (copy_from_user(&bp_info, datavp,
- sizeof(struct ppc_hw_breakpoint)))
- return -EFAULT;
- return ppc_set_hwdebug(child, &bp_info);
- }
-
- case PPC_PTRACE_DELHWDEBUG: {
- ret = ppc_del_hwdebug(child, data);
- break;
- }
-
- case PTRACE_GET_DEBUGREG: {
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- unsigned long dabr_fake;
-#endif
- ret = -EINVAL;
- /* We only support one DABR and no IABRS at the moment */
- if (addr > 0)
- break;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- ret = put_user(child->thread.debug.dac1, datalp);
-#else
- dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
- (child->thread.hw_brk.type & HW_BRK_TYPE_DABR));
- ret = put_user(dabr_fake, datalp);
-#endif
- break;
- }
-
- case PTRACE_SET_DEBUGREG:
- ret = ptrace_set_debugreg(child, addr, data);
- break;
-
-#ifdef CONFIG_PPC64
- case PTRACE_GETREGS64:
-#endif
- case PTRACE_GETREGS: /* Get all pt_regs from the child. */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_GPR,
- 0, sizeof(struct user_pt_regs),
- datavp);
-
-#ifdef CONFIG_PPC64
- case PTRACE_SETREGS64:
-#endif
- case PTRACE_SETREGS: /* Set all gp regs in the child. */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_GPR,
- 0, sizeof(struct user_pt_regs),
- datavp);
-
- case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_FPR,
- 0, sizeof(elf_fpregset_t),
- datavp);
-
- case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_FPR,
- 0, sizeof(elf_fpregset_t),
- datavp);
-
-#ifdef CONFIG_ALTIVEC
- case PTRACE_GETVRREGS:
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_VMX,
- 0, (33 * sizeof(vector128) +
- sizeof(u32)),
- datavp);
-
- case PTRACE_SETVRREGS:
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_VMX,
- 0, (33 * sizeof(vector128) +
- sizeof(u32)),
- datavp);
-#endif
-#ifdef CONFIG_VSX
- case PTRACE_GETVSRREGS:
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_VSX,
- 0, 32 * sizeof(double),
- datavp);
-
- case PTRACE_SETVSRREGS:
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_VSX,
- 0, 32 * sizeof(double),
- datavp);
-#endif
-#ifdef CONFIG_SPE
- case PTRACE_GETEVRREGS:
- /* Get the child spe register state. */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_SPE, 0, 35 * sizeof(u32),
- datavp);
-
- case PTRACE_SETEVRREGS:
- /* Set the child spe register state. */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_SPE, 0, 35 * sizeof(u32),
- datavp);
-#endif
-
- default:
- ret = ptrace_request(child, request, addr, data);
- break;
- }
- return ret;
-}
-
-#ifdef CONFIG_SECCOMP
-static int do_seccomp(struct pt_regs *regs)
-{
- if (!test_thread_flag(TIF_SECCOMP))
- return 0;
-
- /*
- * The ABI we present to seccomp tracers is that r3 contains
- * the syscall return value and orig_gpr3 contains the first
- * syscall parameter. This is different to the ptrace ABI where
- * both r3 and orig_gpr3 contain the first syscall parameter.
- */
- regs->gpr[3] = -ENOSYS;
-
- /*
- * We use the __ version here because we have already checked
- * TIF_SECCOMP. If this fails, there is nothing left to do, we
- * have already loaded -ENOSYS into r3, or seccomp has put
- * something else in r3 (via SECCOMP_RET_ERRNO/TRACE).
- */
- if (__secure_computing(NULL))
- return -1;
-
- /*
- * The syscall was allowed by seccomp, restore the register
- * state to what audit expects.
- * Note that we use orig_gpr3, which means a seccomp tracer can
- * modify the first syscall parameter (in orig_gpr3) and also
- * allow the syscall to proceed.
- */
- regs->gpr[3] = regs->orig_gpr3;
-
- return 0;
-}
-#else
-static inline int do_seccomp(struct pt_regs *regs) { return 0; }
-#endif /* CONFIG_SECCOMP */
-
-/**
- * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
- * @regs: the pt_regs of the task to trace (current)
- *
- * Performs various types of tracing on syscall entry. This includes seccomp,
- * ptrace, syscall tracepoints and audit.
- *
- * The pt_regs are potentially visible to userspace via ptrace, so their
- * contents is ABI.
- *
- * One or more of the tracers may modify the contents of pt_regs, in particular
- * to modify arguments or even the syscall number itself.
- *
- * It's also possible that a tracer can choose to reject the system call. In
- * that case this function will return an illegal syscall number, and will put
- * an appropriate return value in regs->r3.
- *
- * Return: the (possibly changed) syscall number.
- */
-long do_syscall_trace_enter(struct pt_regs *regs)
-{
- u32 flags;
-
- user_exit();
-
- flags = READ_ONCE(current_thread_info()->flags) &
- (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
-
- if (flags) {
- int rc = tracehook_report_syscall_entry(regs);
-
- if (unlikely(flags & _TIF_SYSCALL_EMU)) {
- /*
- * A nonzero return code from
- * tracehook_report_syscall_entry() tells us to prevent
- * the syscall execution, but we are not going to
- * execute it anyway.
- *
- * Returning -1 will skip the syscall execution. We want
- * to avoid clobbering any registers, so we don't goto
- * the skip label below.
- */
- return -1;
- }
-
- if (rc) {
- /*
- * The tracer decided to abort the syscall. Note that
- * the tracer may also just change regs->gpr[0] to an
- * invalid syscall number, that is handled below on the
- * exit path.
- */
- goto skip;
- }
- }
-
- /* Run seccomp after ptrace; allow it to set gpr[3]. */
- if (do_seccomp(regs))
- return -1;
-
- /* Avoid trace and audit when syscall is invalid. */
- if (regs->gpr[0] >= NR_syscalls)
- goto skip;
-
- if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_enter(regs, regs->gpr[0]);
-
-#ifdef CONFIG_PPC64
- if (!is_32bit_task())
- audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
- regs->gpr[5], regs->gpr[6]);
- else
-#endif
- audit_syscall_entry(regs->gpr[0],
- regs->gpr[3] & 0xffffffff,
- regs->gpr[4] & 0xffffffff,
- regs->gpr[5] & 0xffffffff,
- regs->gpr[6] & 0xffffffff);
-
- /* Return the possibly modified but valid syscall number */
- return regs->gpr[0];
-
-skip:
- /*
- * If we are aborting explicitly, or if the syscall number is
- * now invalid, set the return value to -ENOSYS.
- */
- regs->gpr[3] = -ENOSYS;
- return -1;
-}
-
-void do_syscall_trace_leave(struct pt_regs *regs)
-{
- int step;
-
- audit_syscall_exit(regs);
-
- if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_exit(regs, regs->result);
-
- step = test_thread_flag(TIF_SINGLESTEP);
- if (step || test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall_exit(regs, step);
-
- user_enter();
-}
-
-void __init pt_regs_check(void);
-
-/*
- * Dummy function, its purpose is to break the build if struct pt_regs and
- * struct user_pt_regs don't match.
- */
-void __init pt_regs_check(void)
-{
- BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
- offsetof(struct user_pt_regs, gpr));
- BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
- offsetof(struct user_pt_regs, nip));
- BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
- offsetof(struct user_pt_regs, msr));
- BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
- offsetof(struct user_pt_regs, msr));
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct user_pt_regs, orig_gpr3));
- BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
- offsetof(struct user_pt_regs, ctr));
- BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
- offsetof(struct user_pt_regs, link));
- BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
- offsetof(struct user_pt_regs, xer));
- BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
- offsetof(struct user_pt_regs, ccr));
-#ifdef __powerpc64__
- BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
- offsetof(struct user_pt_regs, softe));
-#else
- BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
- offsetof(struct user_pt_regs, mq));
-#endif
- BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
- offsetof(struct user_pt_regs, trap));
- BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
- offsetof(struct user_pt_regs, dar));
- BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
- offsetof(struct user_pt_regs, dsisr));
- BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
- offsetof(struct user_pt_regs, result));
-
- BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
-
- // Now check that the pt_regs offsets match the uapi #defines
- #define CHECK_REG(_pt, _reg) \
- BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
- sizeof(unsigned long)));
-
- CHECK_REG(PT_R0, gpr[0]);
- CHECK_REG(PT_R1, gpr[1]);
- CHECK_REG(PT_R2, gpr[2]);
- CHECK_REG(PT_R3, gpr[3]);
- CHECK_REG(PT_R4, gpr[4]);
- CHECK_REG(PT_R5, gpr[5]);
- CHECK_REG(PT_R6, gpr[6]);
- CHECK_REG(PT_R7, gpr[7]);
- CHECK_REG(PT_R8, gpr[8]);
- CHECK_REG(PT_R9, gpr[9]);
- CHECK_REG(PT_R10, gpr[10]);
- CHECK_REG(PT_R11, gpr[11]);
- CHECK_REG(PT_R12, gpr[12]);
- CHECK_REG(PT_R13, gpr[13]);
- CHECK_REG(PT_R14, gpr[14]);
- CHECK_REG(PT_R15, gpr[15]);
- CHECK_REG(PT_R16, gpr[16]);
- CHECK_REG(PT_R17, gpr[17]);
- CHECK_REG(PT_R18, gpr[18]);
- CHECK_REG(PT_R19, gpr[19]);
- CHECK_REG(PT_R20, gpr[20]);
- CHECK_REG(PT_R21, gpr[21]);
- CHECK_REG(PT_R22, gpr[22]);
- CHECK_REG(PT_R23, gpr[23]);
- CHECK_REG(PT_R24, gpr[24]);
- CHECK_REG(PT_R25, gpr[25]);
- CHECK_REG(PT_R26, gpr[26]);
- CHECK_REG(PT_R27, gpr[27]);
- CHECK_REG(PT_R28, gpr[28]);
- CHECK_REG(PT_R29, gpr[29]);
- CHECK_REG(PT_R30, gpr[30]);
- CHECK_REG(PT_R31, gpr[31]);
- CHECK_REG(PT_NIP, nip);
- CHECK_REG(PT_MSR, msr);
- CHECK_REG(PT_ORIG_R3, orig_gpr3);
- CHECK_REG(PT_CTR, ctr);
- CHECK_REG(PT_LNK, link);
- CHECK_REG(PT_XER, xer);
- CHECK_REG(PT_CCR, ccr);
-#ifdef CONFIG_PPC64
- CHECK_REG(PT_SOFTE, softe);
-#else
- CHECK_REG(PT_MQ, mq);
-#endif
- CHECK_REG(PT_TRAP, trap);
- CHECK_REG(PT_DAR, dar);
- CHECK_REG(PT_DSISR, dsisr);
- CHECK_REG(PT_RESULT, result);
- #undef CHECK_REG
-
- BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
-
- /*
- * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
- * real registers.
- */
- BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
-}
diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile
new file mode 100644
index 000000000000..77abd1a5a508
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
+obj-y += ptrace.o ptrace-view.o
+obj-y += ptrace-fpu.o
+obj-$(CONFIG_COMPAT) += ptrace32.o
+obj-$(CONFIG_VSX) += ptrace-vsx.o
+ifneq ($(CONFIG_VSX),y)
+obj-y += ptrace-novsx.o
+endif
+obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o
+obj-$(CONFIG_SPE) += ptrace-spe.o
+obj-$(CONFIG_PPC_TRANSACTIONAL_MEM) += ptrace-tm.o
+obj-$(CONFIG_PPC_ADV_DEBUG_REGS) += ptrace-adv.o
+ifneq ($(CONFIG_PPC_ADV_DEBUG_REGS),y)
+obj-y += ptrace-noadv.o
+endif
diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c
new file mode 100644
index 000000000000..399f5d94a3df
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c
@@ -0,0 +1,494 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
+
+#include "ptrace-decl.h"
+
+void user_enable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ task->thread.debug.dbcr0 &= ~DBCR0_BT;
+ task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ task->thread.debug.dbcr0 &= ~DBCR0_IC;
+ task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ /*
+ * The logic to disable single stepping should be as
+ * simple as turning off the Instruction Complete flag.
+ * And, after doing so, if all debug flags are off, turn
+ * off DBCR0(IDM) and MSR(DE) .... Torez
+ */
+ task->thread.debug.dbcr0 &= ~(DBCR0_IC | DBCR0_BT);
+ /*
+ * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
+ */
+ if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+ task->thread.debug.dbcr1)) {
+ /*
+ * All debug events were off.....
+ */
+ task->thread.debug.dbcr0 &= ~DBCR0_IDM;
+ regs_set_return_msr(regs, regs->msr & ~MSR_DE);
+ }
+ }
+ clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
+{
+ dbginfo->version = 1;
+ dbginfo->num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS;
+ dbginfo->num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS;
+ dbginfo->num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS;
+ dbginfo->data_bp_alignment = 4;
+ dbginfo->sizeof_condition = 4;
+ dbginfo->features = PPC_DEBUG_FEATURE_INSN_BP_RANGE |
+ PPC_DEBUG_FEATURE_INSN_BP_MASK;
+ if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_DAC_RANGE))
+ dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_RANGE |
+ PPC_DEBUG_FEATURE_DATA_BP_MASK;
+}
+
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+ unsigned long __user *datalp)
+{
+ /* We only support one DABR and no IABRS at the moment */
+ if (addr > 0)
+ return -EINVAL;
+ return put_user(child->thread.debug.dac1, datalp);
+}
+
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data)
+{
+ struct pt_regs *regs = task->thread.regs;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret;
+ struct thread_struct *thread = &task->thread;
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+ * For embedded processors we support one DAC and no IAC's at the
+ * moment.
+ */
+ if (addr > 0)
+ return -EINVAL;
+
+ /* The bottom 3 bits in dabr are flags */
+ if ((data & ~0x7UL) >= TASK_SIZE)
+ return -EIO;
+
+ /* As described above, it was assumed 3 bits were passed with the data
+ * address, but we will assume only the mode bits will be passed
+ * as to not cause alignment restrictions for DAC-based processors.
+ */
+
+ /* DAC's hold the whole address without any mode flags */
+ task->thread.debug.dac1 = data & ~0x3UL;
+
+ if (task->thread.debug.dac1 == 0) {
+ dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+ if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+ task->thread.debug.dbcr1)) {
+ regs_set_return_msr(regs, regs->msr & ~MSR_DE);
+ task->thread.debug.dbcr0 &= ~DBCR0_IDM;
+ }
+ return 0;
+ }
+
+ /* Read or Write bits must be set */
+
+ if (!(data & 0x3UL))
+ return -EINVAL;
+
+ /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 register */
+ task->thread.debug.dbcr0 |= DBCR0_IDM;
+
+ /* Check for write and read flags and set DBCR0 accordingly */
+ dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+ if (data & 0x1UL)
+ dbcr_dac(task) |= DBCR_DAC1R;
+ if (data & 0x2UL)
+ dbcr_dac(task) |= DBCR_DAC1W;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
+ return 0;
+}
+
+static long set_instruction_bp(struct task_struct *child,
+ struct ppc_hw_breakpoint *bp_info)
+{
+ int slot;
+ int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
+ int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
+ int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
+ int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE)
+ slot2_in_use = 1;
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE)
+ slot4_in_use = 1;
+
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
+ /* Make sure range is valid. */
+ if (bp_info->addr2 >= TASK_SIZE)
+ return -EIO;
+
+ /* We need a pair of IAC regsisters */
+ if (!slot1_in_use && !slot2_in_use) {
+ slot = 1;
+ child->thread.debug.iac1 = bp_info->addr;
+ child->thread.debug.iac2 = bp_info->addr2;
+ child->thread.debug.dbcr0 |= DBCR0_IAC1;
+ if (bp_info->addr_mode ==
+ PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ dbcr_iac_range(child) |= DBCR_IAC12X;
+ else
+ dbcr_iac_range(child) |= DBCR_IAC12I;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ } else if ((!slot3_in_use) && (!slot4_in_use)) {
+ slot = 3;
+ child->thread.debug.iac3 = bp_info->addr;
+ child->thread.debug.iac4 = bp_info->addr2;
+ child->thread.debug.dbcr0 |= DBCR0_IAC3;
+ if (bp_info->addr_mode ==
+ PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ dbcr_iac_range(child) |= DBCR_IAC34X;
+ else
+ dbcr_iac_range(child) |= DBCR_IAC34I;
+#endif
+ } else {
+ return -ENOSPC;
+ }
+ } else {
+ /* We only need one. If possible leave a pair free in
+ * case a range is needed later
+ */
+ if (!slot1_in_use) {
+ /*
+ * Don't use iac1 if iac1-iac2 are free and either
+ * iac3 or iac4 (but not both) are free
+ */
+ if (slot2_in_use || slot3_in_use == slot4_in_use) {
+ slot = 1;
+ child->thread.debug.iac1 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC1;
+ goto out;
+ }
+ }
+ if (!slot2_in_use) {
+ slot = 2;
+ child->thread.debug.iac2 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC2;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ } else if (!slot3_in_use) {
+ slot = 3;
+ child->thread.debug.iac3 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC3;
+ } else if (!slot4_in_use) {
+ slot = 4;
+ child->thread.debug.iac4 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC4;
+#endif
+ } else {
+ return -ENOSPC;
+ }
+ }
+out:
+ child->thread.debug.dbcr0 |= DBCR0_IDM;
+ regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
+
+ return slot;
+}
+
+static int del_instruction_bp(struct task_struct *child, int slot)
+{
+ switch (slot) {
+ case 1:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
+ /* address range - clear slots 1 & 2 */
+ child->thread.debug.iac2 = 0;
+ dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
+ }
+ child->thread.debug.iac1 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
+ break;
+ case 2:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE)
+ /* used in a range */
+ return -EINVAL;
+ child->thread.debug.iac2 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case 3:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
+ /* address range - clear slots 3 & 4 */
+ child->thread.debug.iac4 = 0;
+ dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
+ }
+ child->thread.debug.iac3 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
+ break;
+ case 4:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE)
+ /* Used in a range */
+ return -EINVAL;
+ child->thread.debug.iac4 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+ int byte_enable =
+ (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT)
+ & 0xf;
+ int condition_mode =
+ bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE;
+ int slot;
+
+ if (byte_enable && condition_mode == 0)
+ return -EINVAL;
+
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) {
+ slot = 1;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ dbcr_dac(child) |= DBCR_DAC1R;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ dbcr_dac(child) |= DBCR_DAC1W;
+ child->thread.debug.dac1 = (unsigned long)bp_info->addr;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ if (byte_enable) {
+ child->thread.debug.dvc1 =
+ (unsigned long)bp_info->condition_value;
+ child->thread.debug.dbcr2 |=
+ ((byte_enable << DBCR2_DVC1BE_SHIFT) |
+ (condition_mode << DBCR2_DVC1M_SHIFT));
+ }
+#endif
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+ /* Both dac1 and dac2 are part of a range */
+ return -ENOSPC;
+#endif
+ } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) {
+ slot = 2;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ dbcr_dac(child) |= DBCR_DAC2R;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ dbcr_dac(child) |= DBCR_DAC2W;
+ child->thread.debug.dac2 = (unsigned long)bp_info->addr;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ if (byte_enable) {
+ child->thread.debug.dvc2 =
+ (unsigned long)bp_info->condition_value;
+ child->thread.debug.dbcr2 |=
+ ((byte_enable << DBCR2_DVC2BE_SHIFT) |
+ (condition_mode << DBCR2_DVC2M_SHIFT));
+ }
+#endif
+ } else {
+ return -ENOSPC;
+ }
+ child->thread.debug.dbcr0 |= DBCR0_IDM;
+ regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
+
+ return slot + 4;
+}
+
+static int del_dac(struct task_struct *child, int slot)
+{
+ if (slot == 1) {
+ if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
+ return -ENOENT;
+
+ child->thread.debug.dac1 = 0;
+ dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+ child->thread.debug.dac2 = 0;
+ child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
+ }
+ child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
+#endif
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ child->thread.debug.dvc1 = 0;
+#endif
+ } else if (slot == 2) {
+ if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
+ return -ENOENT;
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
+ /* Part of a range */
+ return -EINVAL;
+ child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
+#endif
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ child->thread.debug.dvc2 = 0;
+#endif
+ child->thread.debug.dac2 = 0;
+ dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+static int set_dac_range(struct task_struct *child,
+ struct ppc_hw_breakpoint *bp_info)
+{
+ int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK;
+
+ /* We don't allow range watchpoints to be used with DVC */
+ if (bp_info->condition_mode)
+ return -EINVAL;
+
+ /*
+ * Best effort to verify the address range. The user/supervisor bits
+ * prevent trapping in kernel space, but let's fail on an obvious bad
+ * range. The simple test on the mask is not fool-proof, and any
+ * exclusive range will spill over into kernel space.
+ */
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+ if (mode == PPC_BREAKPOINT_MODE_MASK) {
+ /*
+ * dac2 is a bitmask. Don't allow a mask that makes a
+ * kernel space address from a valid dac1 value
+ */
+ if (~((unsigned long)bp_info->addr2) >= TASK_SIZE)
+ return -EIO;
+ } else {
+ /*
+ * For range breakpoints, addr2 must also be a valid address
+ */
+ if (bp_info->addr2 >= TASK_SIZE)
+ return -EIO;
+ }
+
+ if (child->thread.debug.dbcr0 &
+ (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
+ return -ENOSPC;
+
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
+ child->thread.debug.dac1 = bp_info->addr;
+ child->thread.debug.dac2 = bp_info->addr2;
+ if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
+ child->thread.debug.dbcr2 |= DBCR2_DAC12M;
+ else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ child->thread.debug.dbcr2 |= DBCR2_DAC12MX;
+ else /* PPC_BREAKPOINT_MODE_MASK */
+ child->thread.debug.dbcr2 |= DBCR2_DAC12MM;
+ regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
+
+ return 5;
+}
+#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */
+
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+ if (bp_info->version != 1)
+ return -ENOTSUPP;
+ /*
+ * Check for invalid flags and combinations
+ */
+ if (bp_info->trigger_type == 0 ||
+ (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE |
+ PPC_BREAKPOINT_TRIGGER_RW)) ||
+ (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) ||
+ (bp_info->condition_mode &
+ ~(PPC_BREAKPOINT_CONDITION_MODE |
+ PPC_BREAKPOINT_CONDITION_BE_ALL)))
+ return -EINVAL;
+#if CONFIG_PPC_ADV_DEBUG_DVCS == 0
+ if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+#endif
+
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) {
+ if (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE ||
+ bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+ return set_instruction_bp(child, bp_info);
+ }
+ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
+ return set_dac(child, bp_info);
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ return set_dac_range(child, bp_info);
+#else
+ return -EINVAL;
+#endif
+}
+
+long ppc_del_hwdebug(struct task_struct *child, long data)
+{
+ int rc;
+
+ if (data <= 4)
+ rc = del_instruction_bp(child, (int)data);
+ else
+ rc = del_dac(child, (int)data - 4);
+
+ if (!rc) {
+ if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
+ child->thread.debug.dbcr1)) {
+ child->thread.debug.dbcr0 &= ~DBCR0_IDM;
+ regs_set_return_msr(child->thread.regs,
+ child->thread.regs->msr & ~MSR_DE);
+ }
+ }
+ return rc;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-altivec.c b/arch/powerpc/kernel/ptrace/ptrace-altivec.c
new file mode 100644
index 000000000000..0d9bc4bd4972
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-altivec.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/elf.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+ * The transfer totals 34 quadword. Quadwords 0-31 contain the
+ * corresponding vector registers. Quadword 32 contains the vscr as the
+ * last word (offset 12) within that quadword. Quadword 33 contains the
+ * vrsave as the first word (offset 0) within the quadword.
+ *
+ * This definition of the VMX state is compatible with the current PPC32
+ * ptrace interface. This allows signal handling and ptrace to use the
+ * same structures. This also simplifies the implementation of a bi-arch
+ * (combined (32- and 64-bit) gdb.
+ */
+
+int vr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ flush_altivec_to_thread(target);
+ return target->thread.used_vr ? regset->n : 0;
+}
+
+/*
+ * Regardless of transactions, 'vr_state' holds the current running
+ * value of all the VMX registers and 'ckvr_state' holds the last
+ * checkpointed value of all the VMX registers for the current
+ * transaction to fall back on in case it aborts.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ * };
+ */
+int vr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+
+ flush_altivec_to_thread(target);
+
+ BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+ offsetof(struct thread_vr_state, vr[32]));
+
+ membuf_write(&to, &target->thread.vr_state, 33 * sizeof(vector128));
+ /*
+ * Copy out only the low-order word of vrsave.
+ */
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.vrsave;
+ return membuf_write(&to, &vrsave, sizeof(vrsave));
+}
+
+/*
+ * Regardless of transactions, 'vr_state' holds the current running
+ * value of all the VMX registers and 'ckvr_state' holds the last
+ * checkpointed value of all the VMX registers for the current
+ * transaction to fall back on in case it aborts.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ * };
+ */
+int vr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ flush_altivec_to_thread(target);
+
+ BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+ offsetof(struct thread_vr_state, vr[32]));
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.vr_state, 0,
+ 33 * sizeof(vector128));
+ if (!ret && count > 0) {
+ /*
+ * We use only the first word of vrsave.
+ */
+ int start, end;
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+
+ vrsave.word = target->thread.vrsave;
+
+ start = 33 * sizeof(vector128);
+ end = start + sizeof(vrsave);
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+ start, end);
+ if (!ret)
+ target->thread.vrsave = vrsave.word;
+ }
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h
new file mode 100644
index 000000000000..4171a5727197
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <linux/regset.h>
+
+/*
+ * Set of msr bits that gdb can change on behalf of a process.
+ */
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+#define MSR_DEBUGCHANGE 0
+#else
+#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE)
+#endif
+
+/*
+ * Max register writeable via put_reg
+ */
+#ifdef CONFIG_PPC32
+#define PT_MAX_PUT_REG PT_MQ
+#else
+#define PT_MAX_PUT_REG PT_CCR
+#endif
+
+#define TVSO(f) (offsetof(struct thread_vr_state, f))
+#define TFSO(f) (offsetof(struct thread_fp_state, f))
+#define TSO(f) (offsetof(struct thread_struct, f))
+
+/*
+ * These are our native regset flavors.
+ */
+enum powerpc_regset {
+ REGSET_GPR,
+ REGSET_FPR,
+#ifdef CONFIG_ALTIVEC
+ REGSET_VMX,
+#endif
+#ifdef CONFIG_VSX
+ REGSET_VSX,
+#endif
+#ifdef CONFIG_SPE
+ REGSET_SPE,
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ REGSET_TM_CGPR, /* TM checkpointed GPR registers */
+ REGSET_TM_CFPR, /* TM checkpointed FPR registers */
+ REGSET_TM_CVMX, /* TM checkpointed VMX registers */
+ REGSET_TM_CVSX, /* TM checkpointed VSX registers */
+ REGSET_TM_SPR, /* TM specific SPR registers */
+ REGSET_TM_CTAR, /* TM checkpointed TAR register */
+ REGSET_TM_CPPR, /* TM checkpointed PPR register */
+ REGSET_TM_CDSCR, /* TM checkpointed DSCR register */
+#endif
+#ifdef CONFIG_PPC64
+ REGSET_PPR, /* PPR register */
+ REGSET_DSCR, /* DSCR register */
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ REGSET_TAR, /* TAR register */
+ REGSET_EBB, /* EBB registers */
+ REGSET_PMR, /* Performance Monitor Registers */
+ REGSET_DEXCR, /* DEXCR registers */
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ REGSET_HASHKEYR, /* HASHKEYR register */
+#endif
+#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ REGSET_PKEY, /* AMR register */
+#endif
+};
+
+/* ptrace-(no)vsx */
+
+user_regset_get2_fn fpr_get;
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-vsx */
+
+int vsr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn vsr_get;
+int vsr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-altivec */
+
+int vr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn vr_get;
+int vr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-spe */
+
+int evr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn evr_get;
+int evr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace */
+
+int gpr32_get_common(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to,
+ unsigned long *regs);
+int gpr32_set_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ unsigned long *regs);
+
+/* ptrace-tm */
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void flush_tmregs_to_thread(struct task_struct *tsk);
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
+#endif
+
+int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_cgpr_get;
+int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_cfpr_get;
+int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_cvmx_get;
+int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_cvsx_get;
+int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_spr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_spr_get;
+int tm_spr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_tar_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_tar_get;
+int tm_tar_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_ppr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_ppr_get;
+int tm_ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_dscr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_dscr_get;
+int tm_dscr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+user_regset_get2_fn tm_cgpr32_get;
+int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-view */
+
+int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data);
+int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data);
+
+extern const struct user_regset_view user_ppc_native_view;
+
+/* ptrace-fpu */
+int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data);
+int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data);
+
+/* ptrace-(no)adv */
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo);
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+ unsigned long __user *datalp);
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data);
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info);
+long ppc_del_hwdebug(struct task_struct *child, long data);
diff --git a/arch/powerpc/kernel/ptrace/ptrace-fpu.c b/arch/powerpc/kernel/ptrace/ptrace-fpu.c
new file mode 100644
index 000000000000..09c49632bfe5
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-fpu.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data)
+{
+#ifdef CONFIG_PPC_FPU_REGS
+ unsigned int fpidx = index - PT_FPR0;
+#endif
+
+ if (index > PT_FPSCR)
+ return -EIO;
+
+#ifdef CONFIG_PPC_FPU_REGS
+ flush_fp_to_thread(child);
+ if (fpidx < (PT_FPSCR - PT_FPR0)) {
+ if (IS_ENABLED(CONFIG_PPC32))
+ // On 32-bit the index we are passed refers to 32-bit words
+ *data = ((u32 *)child->thread.fp_state.fpr)[fpidx];
+ else
+ memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long));
+ } else
+ *data = child->thread.fp_state.fpscr;
+#else
+ *data = 0;
+#endif
+
+ return 0;
+}
+
+int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data)
+{
+#ifdef CONFIG_PPC_FPU_REGS
+ unsigned int fpidx = index - PT_FPR0;
+#endif
+
+ if (index > PT_FPSCR)
+ return -EIO;
+
+#ifdef CONFIG_PPC_FPU_REGS
+ flush_fp_to_thread(child);
+ if (fpidx < (PT_FPSCR - PT_FPR0)) {
+ if (IS_ENABLED(CONFIG_PPC32))
+ // On 32-bit the index we are passed refers to 32-bit words
+ ((u32 *)child->thread.fp_state.fpr)[fpidx] = data;
+ else
+ memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long));
+ } else
+ child->thread.fp_state.fpscr = data;
+#endif
+
+ return 0;
+}
+
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
new file mode 100644
index 000000000000..a5dd7d2e2c9e
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
+
+#include <asm/debug.h>
+
+#include "ptrace-decl.h"
+
+void user_enable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL)
+ regs_set_return_msr(regs, (regs->msr & ~MSR_BE) | MSR_SE);
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL)
+ regs_set_return_msr(regs, (regs->msr & ~MSR_SE) | MSR_BE);
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL)
+ regs_set_return_msr(regs, regs->msr & ~(MSR_SE | MSR_BE));
+
+ clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
+{
+ dbginfo->version = 1;
+ dbginfo->num_instruction_bps = 0;
+ if (ppc_breakpoint_available())
+ dbginfo->num_data_bps = nr_wp_slots();
+ else
+ dbginfo->num_data_bps = 0;
+ dbginfo->num_condition_regs = 0;
+ dbginfo->data_bp_alignment = sizeof(long);
+ dbginfo->sizeof_condition = 0;
+ if (IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT)) {
+ dbginfo->features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
+ if (dawr_enabled())
+ dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
+ } else {
+ dbginfo->features = 0;
+ }
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_ARCH_31;
+}
+
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+ unsigned long __user *datalp)
+{
+ unsigned long dabr_fake;
+
+ /* We only support one DABR and no IABRS at the moment */
+ if (addr > 0)
+ return -EINVAL;
+ dabr_fake = ((child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) |
+ (child->thread.hw_brk[0].type & HW_BRK_TYPE_DABR));
+ return put_user(dabr_fake, datalp);
+}
+
+/*
+ * ptrace_set_debugreg() fakes DABR and DABR is only one. So even if
+ * internal hw supports more than one watchpoint, we support only one
+ * watchpoint with this interface.
+ */
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret;
+ struct thread_struct *thread = &task->thread;
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ bool set_bp = true;
+ struct arch_hw_breakpoint hw_brk;
+
+ /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+ * For embedded processors we support one DAC and no IAC's at the
+ * moment.
+ */
+ if (addr > 0)
+ return -EINVAL;
+
+ /* The bottom 3 bits in dabr are flags */
+ if ((data & ~0x7UL) >= TASK_SIZE)
+ return -EIO;
+
+ /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
+ * It was assumed, on previous implementations, that 3 bits were
+ * passed together with the data address, fitting the design of the
+ * DABR register, as follows:
+ *
+ * bit 0: Read flag
+ * bit 1: Write flag
+ * bit 2: Breakpoint translation
+ *
+ * Thus, we use them here as so.
+ */
+
+ /* Ensure breakpoint translation bit is set */
+ if (data && !(data & HW_BRK_TYPE_TRANSLATE))
+ return -EIO;
+ hw_brk.address = data & (~HW_BRK_TYPE_DABR);
+ hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
+ hw_brk.len = DABR_MAX_LEN;
+ hw_brk.hw_len = DABR_MAX_LEN;
+ set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ bp = thread->ptrace_bps[0];
+ if (!set_bp) {
+ if (bp) {
+ unregister_hw_breakpoint(bp);
+ thread->ptrace_bps[0] = NULL;
+ }
+ return 0;
+ }
+ if (bp) {
+ attr = bp->attr;
+ attr.bp_addr = hw_brk.address;
+ attr.bp_len = DABR_MAX_LEN;
+ arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
+
+ /* Enable breakpoint */
+ attr.disabled = false;
+
+ ret = modify_user_hw_breakpoint(bp, &attr);
+ if (ret)
+ return ret;
+
+ thread->ptrace_bps[0] = bp;
+ thread->hw_brk[0] = hw_brk;
+ return 0;
+ }
+
+ /* Create a new breakpoint request if one doesn't exist already */
+ hw_breakpoint_init(&attr);
+ attr.bp_addr = hw_brk.address;
+ attr.bp_len = DABR_MAX_LEN;
+ arch_bp_generic_fields(hw_brk.type,
+ &attr.bp_type);
+
+ thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
+ ptrace_triggered, NULL, task);
+ if (IS_ERR(bp)) {
+ thread->ptrace_bps[0] = NULL;
+ return PTR_ERR(bp);
+ }
+
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+ if (set_bp && (!ppc_breakpoint_available()))
+ return -ENODEV;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ task->thread.hw_brk[0] = hw_brk;
+ return 0;
+}
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static int find_empty_ptrace_bp(struct thread_struct *thread)
+{
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!thread->ptrace_bps[i])
+ return i;
+ }
+ return -1;
+}
+#endif
+
+static int find_empty_hw_brk(struct thread_struct *thread)
+{
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!thread->hw_brk[i].address)
+ return i;
+ }
+ return -1;
+}
+
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+ int i;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int len = 0;
+ struct thread_struct *thread = &child->thread;
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ struct arch_hw_breakpoint brk;
+
+ if (bp_info->version != 1)
+ return -ENOTSUPP;
+ /*
+ * We only support one data breakpoint
+ */
+ if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
+ (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
+ bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+
+ if ((unsigned long)bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE);
+ brk.type = HW_BRK_TYPE_TRANSLATE | HW_BRK_TYPE_PRIV_ALL;
+ brk.len = DABR_MAX_LEN;
+ brk.hw_len = DABR_MAX_LEN;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ brk.type |= HW_BRK_TYPE_READ;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ brk.type |= HW_BRK_TYPE_WRITE;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
+ len = bp_info->addr2 - bp_info->addr;
+ else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
+ len = 1;
+ else
+ return -EINVAL;
+
+ i = find_empty_ptrace_bp(thread);
+ if (i < 0)
+ return -ENOSPC;
+
+ /* Create a new breakpoint request if one doesn't exist already */
+ hw_breakpoint_init(&attr);
+ attr.bp_addr = (unsigned long)bp_info->addr;
+ attr.bp_len = len;
+ arch_bp_generic_fields(brk.type, &attr.bp_type);
+
+ bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, child);
+ thread->ptrace_bps[i] = bp;
+ if (IS_ERR(bp)) {
+ thread->ptrace_bps[i] = NULL;
+ return PTR_ERR(bp);
+ }
+
+ return i + 1;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
+ return -EINVAL;
+
+ i = find_empty_hw_brk(&child->thread);
+ if (i < 0)
+ return -ENOSPC;
+
+ if (!ppc_breakpoint_available())
+ return -ENODEV;
+
+ child->thread.hw_brk[i] = brk;
+
+ return i + 1;
+}
+
+long ppc_del_hwdebug(struct task_struct *child, long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret = 0;
+ struct thread_struct *thread = &child->thread;
+ struct perf_event *bp;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ if (data < 1 || data > nr_wp_slots())
+ return -EINVAL;
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ bp = thread->ptrace_bps[data - 1];
+ if (bp) {
+ unregister_hw_breakpoint(bp);
+ thread->ptrace_bps[data - 1] = NULL;
+ } else {
+ ret = -ENOENT;
+ }
+ return ret;
+#else /* CONFIG_HAVE_HW_BREAKPOINT */
+ if (!(child->thread.hw_brk[data - 1].flags & HW_BRK_FLAG_DISABLED) &&
+ child->thread.hw_brk[data - 1].address == 0)
+ return -ENOENT;
+
+ child->thread.hw_brk[data - 1].address = 0;
+ child->thread.hw_brk[data - 1].type = 0;
+ child->thread.hw_brk[data - 1].flags = 0;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
new file mode 100644
index 000000000000..7433f3db979a
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ */
+int fpr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+#ifdef CONFIG_PPC_FPU_REGS
+ BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+ offsetof(struct thread_fp_state, fpr[32]));
+
+ flush_fp_to_thread(target);
+
+ return membuf_write(&to, &target->thread.fp_state, 33 * sizeof(u64));
+#else
+ return membuf_write(&to, &empty_zero_page, 33 * sizeof(u64));
+#endif
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ *
+ */
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+#ifdef CONFIG_PPC_FPU_REGS
+ BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+ offsetof(struct thread_fp_state, fpr[32]));
+
+ flush_fp_to_thread(target);
+
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fp_state, 0, -1);
+#else
+ return 0;
+#endif
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-spe.c b/arch/powerpc/kernel/ptrace/ptrace-spe.c
new file mode 100644
index 000000000000..47034d069045
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-spe.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * For get_evrregs/set_evrregs functions 'data' has the following layout:
+ *
+ * struct {
+ * u32 evr[32];
+ * u64 acc;
+ * u32 spefscr;
+ * }
+ */
+
+int evr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ flush_spe_to_thread(target);
+ return target->thread.used_spe ? regset->n : 0;
+}
+
+int evr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ flush_spe_to_thread(target);
+
+ membuf_write(&to, &target->thread.evr, sizeof(target->thread.evr));
+
+ BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
+ offsetof(struct thread_struct, spefscr));
+
+ return membuf_write(&to, &target->thread.acc,
+ sizeof(u64) + sizeof(u32));
+}
+
+int evr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ flush_spe_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.evr,
+ 0, sizeof(target->thread.evr));
+
+ BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
+ offsetof(struct thread_struct, spefscr));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.acc,
+ sizeof(target->thread.evr), -1);
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c
new file mode 100644
index 000000000000..210ea834e603
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c
@@ -0,0 +1,788 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+#include <asm/tm.h>
+#include <asm/asm-prototypes.h>
+
+#include "ptrace-decl.h"
+
+void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+ /*
+ * If task is not current, it will have been flushed already to
+ * it's thread_struct during __switch_to().
+ *
+ * A reclaim flushes ALL the state or if not in TM save TM SPRs
+ * in the appropriate thread structures from live.
+ */
+
+ if (!cpu_has_feature(CPU_FTR_TM) || tsk != current)
+ return;
+
+ if (MSR_TM_SUSPENDED(mfmsr())) {
+ tm_reclaim_current(TM_CAUSE_SIGNAL);
+ } else {
+ tm_enable();
+ tm_save_sprs(&tsk->thread);
+ }
+}
+
+static unsigned long get_user_ckpt_msr(struct task_struct *task)
+{
+ return task->thread.ckpt_regs.msr | task->thread.fpexc_mode;
+}
+
+static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr)
+{
+ task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE;
+ task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE;
+ return 0;
+}
+
+static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap)
+{
+ set_trap(&task->thread.ckpt_regs, trap);
+ return 0;
+}
+
+/**
+ * tm_cgpr_active - get active number of registers in CGPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed GPR category.
+ */
+int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cgpr_get - get CGPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @to: Destination of copy.
+ *
+ * This function gets transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds all the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function gets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * struct pt_regs ckpt_regs;
+ * };
+ */
+int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
+#ifdef CONFIG_PPC64
+ struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe));
+#endif
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ membuf_write(&to, &target->thread.ckpt_regs, sizeof(struct user_pt_regs));
+
+ membuf_store(&to_msr, get_user_ckpt_msr(target));
+#ifdef CONFIG_PPC64
+ membuf_store(&to_softe, 0x1ul);
+#endif
+ return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
+ sizeof(struct user_pt_regs));
+}
+
+/*
+ * tm_cgpr_set - set the CGPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function sets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * struct pt_regs ckpt_regs;
+ * };
+ */
+int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ unsigned long reg;
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs,
+ 0, PT_MSR * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_MSR * sizeof(reg),
+ (PT_MSR + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_ckpt_msr(target, reg);
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs.orig_gpr3,
+ PT_ORIG_R3 * sizeof(reg),
+ (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+ if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_MAX_PUT_REG + 1) * sizeof(reg),
+ PT_TRAP * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_TRAP * sizeof(reg),
+ (PT_TRAP + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_ckpt_trap(target, reg);
+ }
+
+ if (!ret)
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+
+ return ret;
+}
+
+/**
+ * tm_cfpr_active - get active number of registers in CFPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed FPR category.
+ */
+int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cfpr_get - get CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @to: Destination of copy.
+ *
+ * This function gets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed FPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ *};
+ */
+int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ u64 buf[33];
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ /* copy to local buffer then write that out */
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_CKFPR(i);
+ buf[32] = target->thread.ckfp_state.fpscr;
+ return membuf_write(&to, buf, sizeof(buf));
+}
+
+/**
+ * tm_cfpr_set - set CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * FPR register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ *};
+ */
+int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ for (i = 0; i < 32; i++)
+ buf[i] = target->thread.TS_CKFPR(i);
+ buf[32] = target->thread.ckfp_state.fpscr;
+
+ /* copy to local buffer then write that out */
+ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+ if (i)
+ return i;
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_CKFPR(i) = buf[i];
+ target->thread.ckfp_state.fpscr = buf[32];
+ return 0;
+}
+
+/**
+ * tm_cvmx_active - get active number of registers in CVMX
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in checkpointed VMX category.
+ */
+int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cvmx_get - get CMVX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @to: Destination of copy.
+ *
+ * This function gets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ *};
+ */
+int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ membuf_write(&to, &target->thread.ckvr_state, 33 * sizeof(vector128));
+ /*
+ * Copy out only the low-order word of vrsave.
+ */
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.ckvrsave;
+ return membuf_write(&to, &vrsave, sizeof(vrsave));
+}
+
+/**
+ * tm_cvmx_set - set CMVX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ *};
+ */
+int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state,
+ 0, 33 * sizeof(vector128));
+ if (!ret && count > 0) {
+ /*
+ * We use only the low-order word of vrsave.
+ */
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.ckvrsave;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+ 33 * sizeof(vector128), -1);
+ if (!ret)
+ target->thread.ckvrsave = vrsave.word;
+ }
+
+ return ret;
+}
+
+/**
+ * tm_cvsx_active - get active number of registers in CVSX
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed VSX category.
+ */
+int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ flush_vsx_to_thread(target);
+ return target->thread.used_vsr ? regset->n : 0;
+}
+
+/**
+ * tm_cvsx_get - get CVSX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @to: Destination of copy.
+ *
+ * This function gets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed VSX registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 vsx[32];
+ *};
+ */
+int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ u64 buf[32];
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+ return membuf_write(&to, buf, 32 * sizeof(double));
+}
+
+/**
+ * tm_cvsx_set - set CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * VSX register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 vsx[32];
+ *};
+ */
+int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+ if (!ret)
+ for (i = 0; i < 32 ; i++)
+ target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+ return ret;
+}
+
+/**
+ * tm_spr_active - get active number of registers in TM SPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks the active number of available
+ * regisers in the transactional memory SPR category.
+ */
+int tm_spr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+/**
+ * tm_spr_get - get the TM related SPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @to: Destination of copy.
+ *
+ * This function gets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ * u64 tm_tfhar;
+ * u64 tm_texasr;
+ * u64 tm_tfiar;
+ * };
+ */
+int tm_spr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ /* Build tests */
+ BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+ BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+ BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ /* Flush the states */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ /* TFHAR register */
+ membuf_write(&to, &target->thread.tm_tfhar, sizeof(u64));
+ /* TEXASR register */
+ membuf_write(&to, &target->thread.tm_texasr, sizeof(u64));
+ /* TFIAR register */
+ return membuf_write(&to, &target->thread.tm_tfiar, sizeof(u64));
+}
+
+/**
+ * tm_spr_set - set the TM related SPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ * u64 tm_tfhar;
+ * u64 tm_texasr;
+ * u64 tm_tfiar;
+ * };
+ */
+int tm_spr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+ BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+ BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ /* Flush the states */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ /* TFHAR register */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfhar, 0, sizeof(u64));
+
+ /* TEXASR register */
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_texasr, sizeof(u64),
+ 2 * sizeof(u64));
+
+ /* TFIAR register */
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfiar,
+ 2 * sizeof(u64), 3 * sizeof(u64));
+ return ret;
+}
+
+int tm_tar_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+int tm_tar_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ return membuf_write(&to, &target->thread.tm_tar, sizeof(u64));
+}
+
+int tm_tar_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tar, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_ppr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+
+int tm_ppr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ return membuf_write(&to, &target->thread.tm_ppr, sizeof(u64));
+}
+
+int tm_ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_ppr, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_dscr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+int tm_dscr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ return membuf_write(&to, &target->thread.tm_dscr, sizeof(u64));
+}
+
+int tm_dscr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_dscr, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ gpr32_get_common(target, regset, to,
+ &target->thread.ckpt_regs.gpr[0]);
+ return membuf_zero(&to, ELF_NGREG * sizeof(u32));
+}
+
+int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.ckpt_regs.gpr[0]);
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
new file mode 100644
index 000000000000..584cf5c3df50
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -0,0 +1,953 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/nospec.h>
+#include <linux/pkeys.h>
+
+#include "ptrace-decl.h"
+
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define STR(s) #s /* convert to string */
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define GPR_OFFSET_NAME(num) \
+ {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \
+ {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+ GPR_OFFSET_NAME(0),
+ GPR_OFFSET_NAME(1),
+ GPR_OFFSET_NAME(2),
+ GPR_OFFSET_NAME(3),
+ GPR_OFFSET_NAME(4),
+ GPR_OFFSET_NAME(5),
+ GPR_OFFSET_NAME(6),
+ GPR_OFFSET_NAME(7),
+ GPR_OFFSET_NAME(8),
+ GPR_OFFSET_NAME(9),
+ GPR_OFFSET_NAME(10),
+ GPR_OFFSET_NAME(11),
+ GPR_OFFSET_NAME(12),
+ GPR_OFFSET_NAME(13),
+ GPR_OFFSET_NAME(14),
+ GPR_OFFSET_NAME(15),
+ GPR_OFFSET_NAME(16),
+ GPR_OFFSET_NAME(17),
+ GPR_OFFSET_NAME(18),
+ GPR_OFFSET_NAME(19),
+ GPR_OFFSET_NAME(20),
+ GPR_OFFSET_NAME(21),
+ GPR_OFFSET_NAME(22),
+ GPR_OFFSET_NAME(23),
+ GPR_OFFSET_NAME(24),
+ GPR_OFFSET_NAME(25),
+ GPR_OFFSET_NAME(26),
+ GPR_OFFSET_NAME(27),
+ GPR_OFFSET_NAME(28),
+ GPR_OFFSET_NAME(29),
+ GPR_OFFSET_NAME(30),
+ GPR_OFFSET_NAME(31),
+ REG_OFFSET_NAME(nip),
+ REG_OFFSET_NAME(msr),
+ REG_OFFSET_NAME(ctr),
+ REG_OFFSET_NAME(link),
+ REG_OFFSET_NAME(xer),
+ REG_OFFSET_NAME(ccr),
+#ifdef CONFIG_PPC64
+ REG_OFFSET_NAME(softe),
+#else
+ REG_OFFSET_NAME(mq),
+#endif
+ REG_OFFSET_NAME(trap),
+ REG_OFFSET_NAME(dar),
+ REG_OFFSET_NAME(dsisr),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset: the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (roff->offset == offset)
+ return roff->name;
+ return NULL;
+}
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+static unsigned long get_user_msr(struct task_struct *task)
+{
+ return task->thread.regs->msr | task->thread.fpexc_mode;
+}
+
+static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr)
+{
+ unsigned long newmsr = (task->thread.regs->msr & ~MSR_DEBUGCHANGE) |
+ (msr & MSR_DEBUGCHANGE);
+ regs_set_return_msr(task->thread.regs, newmsr);
+ return 0;
+}
+
+#ifdef CONFIG_PPC64
+static int get_user_dscr(struct task_struct *task, unsigned long *data)
+{
+ *data = task->thread.dscr;
+ return 0;
+}
+
+static int set_user_dscr(struct task_struct *task, unsigned long dscr)
+{
+ task->thread.dscr = dscr;
+ task->thread.dscr_inherit = 1;
+ return 0;
+}
+#else
+static int get_user_dscr(struct task_struct *task, unsigned long *data)
+{
+ return -EIO;
+}
+
+static int set_user_dscr(struct task_struct *task, unsigned long dscr)
+{
+ return -EIO;
+}
+#endif
+
+/*
+ * We prevent mucking around with the reserved area of trap
+ * which are used internally by the kernel.
+ */
+static __always_inline int set_user_trap(struct task_struct *task, unsigned long trap)
+{
+ set_trap(task->thread.regs, trap);
+ return 0;
+}
+
+/*
+ * Get contents of register REGNO in task TASK.
+ */
+int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
+{
+ unsigned int regs_max;
+
+ if (task->thread.regs == NULL || !data)
+ return -EIO;
+
+ if (regno == PT_MSR) {
+ *data = get_user_msr(task);
+ return 0;
+ }
+
+ if (regno == PT_DSCR)
+ return get_user_dscr(task, data);
+
+ /*
+ * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
+ * no more used as a flag, lets force usr to always see the softe value as 1
+ * which means interrupts are not soft disabled.
+ */
+ if (IS_ENABLED(CONFIG_PPC64) && regno == PT_SOFTE) {
+ *data = 1;
+ return 0;
+ }
+
+ regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long);
+ if (regno < regs_max) {
+ regno = array_index_nospec(regno, regs_max);
+ *data = ((unsigned long *)task->thread.regs)[regno];
+ return 0;
+ }
+
+ return -EIO;
+}
+
+/*
+ * Write contents of register REGNO in task TASK.
+ */
+int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
+{
+ if (task->thread.regs == NULL)
+ return -EIO;
+
+ if (regno == PT_MSR)
+ return set_user_msr(task, data);
+ if (regno == PT_TRAP)
+ return set_user_trap(task, data);
+ if (regno == PT_DSCR)
+ return set_user_dscr(task, data);
+
+ if (regno <= PT_MAX_PUT_REG) {
+ regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1);
+ ((unsigned long *)task->thread.regs)[regno] = data;
+ return 0;
+ }
+ return -EIO;
+}
+
+static int gpr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
+#ifdef CONFIG_PPC64
+ struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe));
+#endif
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs));
+
+ membuf_store(&to_msr, get_user_msr(target));
+#ifdef CONFIG_PPC64
+ membuf_store(&to_softe, 0x1ul);
+#endif
+ return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
+ sizeof(struct user_pt_regs));
+}
+
+static int gpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ unsigned long reg;
+ int ret;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.regs,
+ 0, PT_MSR * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_MSR * sizeof(reg),
+ (PT_MSR + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_msr(target, reg);
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->orig_gpr3,
+ PT_ORIG_R3 * sizeof(reg),
+ (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+ if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_MAX_PUT_REG + 1) * sizeof(reg),
+ PT_TRAP * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_TRAP * sizeof(reg),
+ (PT_TRAP + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_trap(target, reg);
+ }
+
+ if (!ret)
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC64
+static int ppr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!target->thread.regs)
+ return -EINVAL;
+
+ return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64));
+}
+
+static int ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ if (!target->thread.regs)
+ return -EINVAL;
+
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->ppr, 0, sizeof(u64));
+}
+
+static int dscr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ return membuf_write(&to, &target->thread.dscr, sizeof(u64));
+}
+static int dscr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
+}
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+static int tar_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ return membuf_write(&to, &target->thread.tar, sizeof(u64));
+}
+static int tar_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
+}
+
+static int ebb_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (target->thread.used_ebb)
+ return regset->n;
+
+ return 0;
+}
+
+static int ebb_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ /* Build tests */
+ BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+ BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (!target->thread.used_ebb)
+ return -ENODATA;
+
+ return membuf_write(&to, &target->thread.ebbrr, 3 * sizeof(unsigned long));
+}
+
+static int ebb_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ int ret = 0;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+ BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (target->thread.used_ebb)
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr,
+ 0, sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ebbhr, sizeof(unsigned long),
+ 2 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.bescr, 2 * sizeof(unsigned long),
+ 3 * sizeof(unsigned long));
+
+ return ret;
+}
+static int pmu_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pmu_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ /* Build tests */
+ BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+ BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+ BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+ BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ return membuf_write(&to, &target->thread.siar, 5 * sizeof(unsigned long));
+}
+
+static int pmu_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ int ret = 0;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+ BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+ BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+ BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.siar,
+ 0, sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.sdar, sizeof(unsigned long),
+ 2 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.sier, 2 * sizeof(unsigned long),
+ 3 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.mmcr2, 3 * sizeof(unsigned long),
+ 4 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.mmcr0, 4 * sizeof(unsigned long),
+ 5 * sizeof(unsigned long));
+ return ret;
+}
+
+static int dexcr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int dexcr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return -ENODEV;
+
+ /*
+ * The DEXCR is currently static across all CPUs, so we don't
+ * store the target's value anywhere, but the static value
+ * will also be correct.
+ */
+ membuf_store(&to, (u64)lower_32_bits(DEXCR_INIT));
+
+ /*
+ * Technically the HDEXCR is per-cpu, but a hypervisor can't reasonably
+ * change it between CPUs of the same guest.
+ */
+ return membuf_store(&to, (u64)lower_32_bits(mfspr(SPRN_HDEXCR_RO)));
+}
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static int hashkeyr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int hashkeyr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return -ENODEV;
+
+ return membuf_store(&to, target->thread.hashkeyr);
+}
+
+static int hashkeyr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return -ENODEV;
+
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.hashkeyr,
+ 0, sizeof(unsigned long));
+}
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_MEM_KEYS
+static int pkey_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pkey_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ membuf_store(&to, target->thread.regs->amr);
+ membuf_store(&to, target->thread.regs->iamr);
+ return membuf_store(&to, default_uamor);
+}
+
+static int pkey_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ u64 new_amr;
+ int ret;
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ /* Only the AMR can be set from userspace */
+ if (pos != 0 || count != sizeof(new_amr))
+ return -EINVAL;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &new_amr, 0, sizeof(new_amr));
+ if (ret)
+ return ret;
+
+ /*
+ * UAMOR determines which bits of the AMR can be set from userspace.
+ * UAMOR value 0b11 indicates that the AMR value can be modified
+ * from userspace. If the kernel is using a specific key, we avoid
+ * userspace modifying the AMR value for that key by masking them
+ * via UAMOR 0b00.
+ *
+ * Pick the AMR values for the keys that kernel is using. This
+ * will be indicated by the ~default_uamor bits.
+ */
+ target->thread.regs->amr = (new_amr & default_uamor) |
+ (target->thread.regs->amr & ~default_uamor);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
+static const struct user_regset native_regsets[] = {
+ [REGSET_GPR] = {
+ .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .regset_get = gpr_get, .set = gpr_set
+ },
+ [REGSET_FPR] = {
+ .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .regset_get = fpr_get, .set = fpr_set
+ },
+#ifdef CONFIG_ALTIVEC
+ [REGSET_VMX] = {
+ .core_note_type = NT_PPC_VMX, .n = 34,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = vr_active, .regset_get = vr_get, .set = vr_set
+ },
+#endif
+#ifdef CONFIG_VSX
+ [REGSET_VSX] = {
+ .core_note_type = NT_PPC_VSX, .n = 32,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = vsr_active, .regset_get = vsr_get, .set = vsr_set
+ },
+#endif
+#ifdef CONFIG_SPE
+ [REGSET_SPE] = {
+ .core_note_type = NT_PPC_SPE, .n = 35,
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = evr_active, .regset_get = evr_get, .set = evr_set
+ },
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ [REGSET_TM_CGPR] = {
+ .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .active = tm_cgpr_active, .regset_get = tm_cgpr_get, .set = tm_cgpr_set
+ },
+ [REGSET_TM_CFPR] = {
+ .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set
+ },
+ [REGSET_TM_CVMX] = {
+ .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set
+ },
+ [REGSET_TM_CVSX] = {
+ .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set
+ },
+ [REGSET_TM_SPR] = {
+ .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set
+ },
+ [REGSET_TM_CTAR] = {
+ .core_note_type = NT_PPC_TM_CTAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set
+ },
+ [REGSET_TM_CPPR] = {
+ .core_note_type = NT_PPC_TM_CPPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set
+ },
+ [REGSET_TM_CDSCR] = {
+ .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC64
+ [REGSET_PPR] = {
+ .core_note_type = NT_PPC_PPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .regset_get = ppr_get, .set = ppr_set
+ },
+ [REGSET_DSCR] = {
+ .core_note_type = NT_PPC_DSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .regset_get = dscr_get, .set = dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ [REGSET_TAR] = {
+ .core_note_type = NT_PPC_TAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .regset_get = tar_get, .set = tar_set
+ },
+ [REGSET_EBB] = {
+ .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = ebb_active, .regset_get = ebb_get, .set = ebb_set
+ },
+ [REGSET_PMR] = {
+ .core_note_type = NT_PPC_PMU, .n = ELF_NPMU,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pmu_active, .regset_get = pmu_get, .set = pmu_set
+ },
+ [REGSET_DEXCR] = {
+ .core_note_type = NT_PPC_DEXCR, .n = ELF_NDEXCR,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = dexcr_active, .regset_get = dexcr_get
+ },
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ [REGSET_HASHKEYR] = {
+ .core_note_type = NT_PPC_HASHKEYR, .n = ELF_NHASHKEYR,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = hashkeyr_active, .regset_get = hashkeyr_get, .set = hashkeyr_set
+ },
+#endif
+#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ [REGSET_PKEY] = {
+ .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pkey_active, .regset_get = pkey_get, .set = pkey_set
+ },
+#endif
+};
+
+const struct user_regset_view user_ppc_native_view = {
+ .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
+ .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+#include <linux/compat.h>
+
+int gpr32_get_common(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to, unsigned long *regs)
+{
+ int i;
+
+ for (i = 0; i < PT_MSR; i++)
+ membuf_store(&to, (u32)regs[i]);
+ membuf_store(&to, (u32)get_user_msr(target));
+ for (i++ ; i < PT_REGS_COUNT; i++)
+ membuf_store(&to, (u32)regs[i]);
+ return membuf_zero(&to, (ELF_NGREG - PT_REGS_COUNT) * sizeof(u32));
+}
+
+static int gpr32_set_common_kernel(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, unsigned long *regs)
+{
+ const compat_ulong_t *k = kbuf;
+
+ pos /= sizeof(compat_ulong_t);
+ count /= sizeof(compat_ulong_t);
+
+ for (; count > 0 && pos < PT_MSR; --count)
+ regs[pos++] = *k++;
+
+ if (count > 0 && pos == PT_MSR) {
+ set_user_msr(target, *k++);
+ ++pos;
+ --count;
+ }
+
+ for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
+ regs[pos++] = *k++;
+ for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+ ++k;
+
+ if (count > 0 && pos == PT_TRAP) {
+ set_user_trap(target, *k++);
+ ++pos;
+ --count;
+ }
+
+ kbuf = k;
+ pos *= sizeof(compat_ulong_t);
+ count *= sizeof(compat_ulong_t);
+ user_regset_copyin_ignore(&pos, &count, &kbuf, NULL,
+ (PT_TRAP + 1) * sizeof(compat_ulong_t), -1);
+ return 0;
+}
+
+static int gpr32_set_common_user(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void __user *ubuf, unsigned long *regs)
+{
+ const compat_ulong_t __user *u = ubuf;
+ const void *kbuf = NULL;
+ compat_ulong_t reg;
+
+ if (!user_read_access_begin(u, count))
+ return -EFAULT;
+
+ pos /= sizeof(reg);
+ count /= sizeof(reg);
+
+ for (; count > 0 && pos < PT_MSR; --count) {
+ unsafe_get_user(reg, u++, Efault);
+ regs[pos++] = reg;
+ }
+
+ if (count > 0 && pos == PT_MSR) {
+ unsafe_get_user(reg, u++, Efault);
+ set_user_msr(target, reg);
+ ++pos;
+ --count;
+ }
+
+ for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
+ unsafe_get_user(reg, u++, Efault);
+ regs[pos++] = reg;
+ }
+ for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+ unsafe_get_user(reg, u++, Efault);
+
+ if (count > 0 && pos == PT_TRAP) {
+ unsafe_get_user(reg, u++, Efault);
+ set_user_trap(target, reg);
+ ++pos;
+ --count;
+ }
+ user_read_access_end();
+
+ ubuf = u;
+ pos *= sizeof(reg);
+ count *= sizeof(reg);
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+ return 0;
+
+Efault:
+ user_read_access_end();
+ return -EFAULT;
+}
+
+int gpr32_set_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ unsigned long *regs)
+{
+ if (kbuf)
+ return gpr32_set_common_kernel(target, regset, pos, count, kbuf, regs);
+ else
+ return gpr32_set_common_user(target, regset, pos, count, ubuf, regs);
+}
+
+static int gpr32_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ return gpr32_get_common(target, regset, to,
+ &target->thread.regs->gpr[0]);
+}
+
+static int gpr32_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.regs->gpr[0]);
+}
+
+/*
+ * These are the regset flavors matching the CONFIG_PPC32 native set.
+ */
+static const struct user_regset compat_regsets[] = {
+ [REGSET_GPR] = {
+ .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
+ .size = sizeof(compat_long_t), .align = sizeof(compat_long_t),
+ .regset_get = gpr32_get, .set = gpr32_set
+ },
+ [REGSET_FPR] = {
+ .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .regset_get = fpr_get, .set = fpr_set
+ },
+#ifdef CONFIG_ALTIVEC
+ [REGSET_VMX] = {
+ .core_note_type = NT_PPC_VMX, .n = 34,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = vr_active, .regset_get = vr_get, .set = vr_set
+ },
+#endif
+#ifdef CONFIG_SPE
+ [REGSET_SPE] = {
+ .core_note_type = NT_PPC_SPE, .n = 35,
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = evr_active, .regset_get = evr_get, .set = evr_set
+ },
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ [REGSET_TM_CGPR] = {
+ .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .active = tm_cgpr_active,
+ .regset_get = tm_cgpr32_get, .set = tm_cgpr32_set
+ },
+ [REGSET_TM_CFPR] = {
+ .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set
+ },
+ [REGSET_TM_CVMX] = {
+ .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set
+ },
+ [REGSET_TM_CVSX] = {
+ .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set
+ },
+ [REGSET_TM_SPR] = {
+ .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set
+ },
+ [REGSET_TM_CTAR] = {
+ .core_note_type = NT_PPC_TM_CTAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set
+ },
+ [REGSET_TM_CPPR] = {
+ .core_note_type = NT_PPC_TM_CPPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set
+ },
+ [REGSET_TM_CDSCR] = {
+ .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC64
+ [REGSET_PPR] = {
+ .core_note_type = NT_PPC_PPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .regset_get = ppr_get, .set = ppr_set
+ },
+ [REGSET_DSCR] = {
+ .core_note_type = NT_PPC_DSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .regset_get = dscr_get, .set = dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ [REGSET_TAR] = {
+ .core_note_type = NT_PPC_TAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .regset_get = tar_get, .set = tar_set
+ },
+ [REGSET_EBB] = {
+ .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = ebb_active, .regset_get = ebb_get, .set = ebb_set
+ },
+#endif
+};
+
+static const struct user_regset_view user_ppc_compat_view = {
+ .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI,
+ .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+ if (IS_ENABLED(CONFIG_COMPAT) && is_tsk_32bit_task(task))
+ return &user_ppc_compat_view;
+ return &user_ppc_native_view;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c
new file mode 100644
index 000000000000..7df08004c47d
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ */
+int fpr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ u64 buf[33];
+ int i;
+
+ flush_fp_to_thread(target);
+
+ /* copy to local buffer then write that out */
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_FPR(i);
+ buf[32] = target->thread.fp_state.fpscr;
+ return membuf_write(&to, buf, 33 * sizeof(u64));
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ *
+ */
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ flush_fp_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_FPR(i);
+ buf[32] = target->thread.fp_state.fpscr;
+
+ /* copy to local buffer then write that out */
+ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+ if (i)
+ return i;
+
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_FPR(i) = buf[i];
+ target->thread.fp_state.fpscr = buf[32];
+ return 0;
+}
+
+/*
+ * Currently to set and get all the vsx state, you need to call
+ * the fp and VMX calls as well. This only get/sets the lower 32
+ * 128bit VSX registers.
+ */
+
+int vsr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ flush_vsx_to_thread(target);
+ return target->thread.used_vsr ? regset->n : 0;
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last
+ * checkpointed value of all FPR registers for the current
+ * transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 vsx[32];
+ * };
+ */
+int vsr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ u64 buf[32];
+ int i;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+
+ return membuf_write(&to, buf, 32 * sizeof(double));
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last
+ * checkpointed value of all FPR registers for the current
+ * transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 vsx[32];
+ * };
+ */
+int vsr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+ if (!ret)
+ for (i = 0; i < 32 ; i++)
+ target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
new file mode 100644
index 000000000000..727ed4a14545
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -0,0 +1,447 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Derived from "arch/m68k/kernel/ptrace.c"
+ * Copyright (C) 1994 by Hamish Macdonald
+ * Taken from linux/kernel/ptrace.c and modified for M680x0.
+ * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
+ *
+ * Modified by Cort Dougan (cort@hq.fsmlabs.com)
+ * and Paul Mackerras (paulus@samba.org).
+ */
+
+#include <linux/regset.h>
+#include <linux/ptrace.h>
+#include <linux/audit.h>
+#include <linux/context_tracking.h>
+#include <linux/syscalls.h>
+
+#include <asm/switch_to.h>
+#include <asm/debug.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+ /* make sure the single step bit is not set. */
+ user_disable_single_step(child);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
+ int ret = -EPERM;
+ void __user *datavp = (void __user *) data;
+ unsigned long __user *datalp = datavp;
+
+ switch (request) {
+ /* read the word at location addr in the USER area. */
+ case PTRACE_PEEKUSR: {
+ unsigned long index, tmp;
+
+ ret = -EIO;
+ /* convert to index and check */
+ index = addr / sizeof(long);
+ if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
+ break;
+
+ if (index < PT_FPR0)
+ ret = ptrace_get_reg(child, (int) index, &tmp);
+ else
+ ret = ptrace_get_fpr(child, index, &tmp);
+
+ if (ret)
+ break;
+ ret = put_user(tmp, datalp);
+ break;
+ }
+
+ /* write the word at location addr in the USER area */
+ case PTRACE_POKEUSR: {
+ unsigned long index;
+
+ ret = -EIO;
+ /* convert to index and check */
+ index = addr / sizeof(long);
+ if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
+ break;
+
+ if (index < PT_FPR0)
+ ret = ptrace_put_reg(child, index, data);
+ else
+ ret = ptrace_put_fpr(child, index, data);
+ break;
+ }
+
+ case PPC_PTRACE_GETHWDBGINFO: {
+ struct ppc_debug_info dbginfo;
+
+ ppc_gethwdinfo(&dbginfo);
+
+ if (copy_to_user(datavp, &dbginfo,
+ sizeof(struct ppc_debug_info)))
+ return -EFAULT;
+ return 0;
+ }
+
+ case PPC_PTRACE_SETHWDEBUG: {
+ struct ppc_hw_breakpoint bp_info;
+
+ if (copy_from_user(&bp_info, datavp,
+ sizeof(struct ppc_hw_breakpoint)))
+ return -EFAULT;
+ return ppc_set_hwdebug(child, &bp_info);
+ }
+
+ case PPC_PTRACE_DELHWDEBUG: {
+ ret = ppc_del_hwdebug(child, data);
+ break;
+ }
+
+ case PTRACE_GET_DEBUGREG:
+ ret = ptrace_get_debugreg(child, addr, datalp);
+ break;
+
+ case PTRACE_SET_DEBUGREG:
+ ret = ptrace_set_debugreg(child, addr, data);
+ break;
+
+#ifdef CONFIG_PPC64
+ case PTRACE_GETREGS64:
+#endif
+ case PTRACE_GETREGS: /* Get all pt_regs from the child. */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_GPR,
+ 0, sizeof(struct user_pt_regs),
+ datavp);
+
+#ifdef CONFIG_PPC64
+ case PTRACE_SETREGS64:
+#endif
+ case PTRACE_SETREGS: /* Set all gp regs in the child. */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_GPR,
+ 0, sizeof(struct user_pt_regs),
+ datavp);
+
+ case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_FPR,
+ 0, sizeof(elf_fpregset_t),
+ datavp);
+
+ case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_FPR,
+ 0, sizeof(elf_fpregset_t),
+ datavp);
+
+#ifdef CONFIG_ALTIVEC
+ case PTRACE_GETVRREGS:
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_VMX,
+ 0, (33 * sizeof(vector128) +
+ sizeof(u32)),
+ datavp);
+
+ case PTRACE_SETVRREGS:
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_VMX,
+ 0, (33 * sizeof(vector128) +
+ sizeof(u32)),
+ datavp);
+#endif
+#ifdef CONFIG_VSX
+ case PTRACE_GETVSRREGS:
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_VSX,
+ 0, 32 * sizeof(double),
+ datavp);
+
+ case PTRACE_SETVSRREGS:
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_VSX,
+ 0, 32 * sizeof(double),
+ datavp);
+#endif
+#ifdef CONFIG_SPE
+ case PTRACE_GETEVRREGS:
+ /* Get the child spe register state. */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_SPE, 0, 35 * sizeof(u32),
+ datavp);
+
+ case PTRACE_SETEVRREGS:
+ /* Set the child spe register state. */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_SPE, 0, 35 * sizeof(u32),
+ datavp);
+#endif
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ break;
+ }
+ return ret;
+}
+
+#ifdef CONFIG_SECCOMP
+static int do_seccomp(struct pt_regs *regs)
+{
+ if (!test_thread_flag(TIF_SECCOMP))
+ return 0;
+
+ /*
+ * The ABI we present to seccomp tracers is that r3 contains
+ * the syscall return value and orig_gpr3 contains the first
+ * syscall parameter. This is different to the ptrace ABI where
+ * both r3 and orig_gpr3 contain the first syscall parameter.
+ */
+ regs->gpr[3] = -ENOSYS;
+
+ /*
+ * We use the __ version here because we have already checked
+ * TIF_SECCOMP. If this fails, there is nothing left to do, we
+ * have already loaded -ENOSYS into r3, or seccomp has put
+ * something else in r3 (via SECCOMP_RET_ERRNO/TRACE).
+ */
+ if (__secure_computing(NULL))
+ return -1;
+
+ /*
+ * The syscall was allowed by seccomp, restore the register
+ * state to what audit expects.
+ * Note that we use orig_gpr3, which means a seccomp tracer can
+ * modify the first syscall parameter (in orig_gpr3) and also
+ * allow the syscall to proceed.
+ */
+ regs->gpr[3] = regs->orig_gpr3;
+
+ return 0;
+}
+#else
+static inline int do_seccomp(struct pt_regs *regs) { return 0; }
+#endif /* CONFIG_SECCOMP */
+
+/**
+ * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
+ * @regs: the pt_regs of the task to trace (current)
+ *
+ * Performs various types of tracing on syscall entry. This includes seccomp,
+ * ptrace, syscall tracepoints and audit.
+ *
+ * The pt_regs are potentially visible to userspace via ptrace, so their
+ * contents is ABI.
+ *
+ * One or more of the tracers may modify the contents of pt_regs, in particular
+ * to modify arguments or even the syscall number itself.
+ *
+ * It's also possible that a tracer can choose to reject the system call. In
+ * that case this function will return an illegal syscall number, and will put
+ * an appropriate return value in regs->r3.
+ *
+ * Return: the (possibly changed) syscall number.
+ */
+long do_syscall_trace_enter(struct pt_regs *regs)
+{
+ u32 flags;
+
+ flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
+
+ if (flags) {
+ int rc = ptrace_report_syscall_entry(regs);
+
+ if (unlikely(flags & _TIF_SYSCALL_EMU)) {
+ /*
+ * A nonzero return code from
+ * ptrace_report_syscall_entry() tells us to prevent
+ * the syscall execution, but we are not going to
+ * execute it anyway.
+ *
+ * Returning -1 will skip the syscall execution. We want
+ * to avoid clobbering any registers, so we don't goto
+ * the skip label below.
+ */
+ return -1;
+ }
+
+ if (rc) {
+ /*
+ * The tracer decided to abort the syscall. Note that
+ * the tracer may also just change regs->gpr[0] to an
+ * invalid syscall number, that is handled below on the
+ * exit path.
+ */
+ goto skip;
+ }
+ }
+
+ /* Run seccomp after ptrace; allow it to set gpr[3]. */
+ if (do_seccomp(regs))
+ return -1;
+
+ /* Avoid trace and audit when syscall is invalid. */
+ if (regs->gpr[0] >= NR_syscalls)
+ goto skip;
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_enter(regs, regs->gpr[0]);
+
+ if (!is_32bit_task())
+ audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
+ regs->gpr[5], regs->gpr[6]);
+ else
+ audit_syscall_entry(regs->gpr[0],
+ regs->gpr[3] & 0xffffffff,
+ regs->gpr[4] & 0xffffffff,
+ regs->gpr[5] & 0xffffffff,
+ regs->gpr[6] & 0xffffffff);
+
+ /* Return the possibly modified but valid syscall number */
+ return regs->gpr[0];
+
+skip:
+ /*
+ * If we are aborting explicitly, or if the syscall number is
+ * now invalid, set the return value to -ENOSYS.
+ */
+ regs->gpr[3] = -ENOSYS;
+ return -1;
+}
+
+void do_syscall_trace_leave(struct pt_regs *regs)
+{
+ int step;
+
+ audit_syscall_exit(regs);
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_exit(regs, regs->result);
+
+ step = test_thread_flag(TIF_SINGLESTEP);
+ if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+ ptrace_report_syscall_exit(regs, step);
+}
+
+void __init pt_regs_check(void);
+
+/*
+ * Dummy function, its purpose is to break the build if struct pt_regs and
+ * struct user_pt_regs don't match.
+ */
+void __init pt_regs_check(void)
+{
+ BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
+ offsetof(struct user_pt_regs, gpr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
+ offsetof(struct user_pt_regs, nip));
+ BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+ offsetof(struct user_pt_regs, msr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct user_pt_regs, orig_gpr3));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
+ offsetof(struct user_pt_regs, ctr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
+ offsetof(struct user_pt_regs, link));
+ BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
+ offsetof(struct user_pt_regs, xer));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
+ offsetof(struct user_pt_regs, ccr));
+#ifdef __powerpc64__
+ BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
+ offsetof(struct user_pt_regs, softe));
+#else
+ BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
+ offsetof(struct user_pt_regs, mq));
+#endif
+ BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
+ offsetof(struct user_pt_regs, trap));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
+ offsetof(struct user_pt_regs, dar));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dear) !=
+ offsetof(struct user_pt_regs, dar));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
+ offsetof(struct user_pt_regs, dsisr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, esr) !=
+ offsetof(struct user_pt_regs, dsisr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
+ offsetof(struct user_pt_regs, result));
+
+ BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+
+ // Now check that the pt_regs offsets match the uapi #defines
+ #define CHECK_REG(_pt, _reg) \
+ BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
+ sizeof(unsigned long)));
+
+ CHECK_REG(PT_R0, gpr[0]);
+ CHECK_REG(PT_R1, gpr[1]);
+ CHECK_REG(PT_R2, gpr[2]);
+ CHECK_REG(PT_R3, gpr[3]);
+ CHECK_REG(PT_R4, gpr[4]);
+ CHECK_REG(PT_R5, gpr[5]);
+ CHECK_REG(PT_R6, gpr[6]);
+ CHECK_REG(PT_R7, gpr[7]);
+ CHECK_REG(PT_R8, gpr[8]);
+ CHECK_REG(PT_R9, gpr[9]);
+ CHECK_REG(PT_R10, gpr[10]);
+ CHECK_REG(PT_R11, gpr[11]);
+ CHECK_REG(PT_R12, gpr[12]);
+ CHECK_REG(PT_R13, gpr[13]);
+ CHECK_REG(PT_R14, gpr[14]);
+ CHECK_REG(PT_R15, gpr[15]);
+ CHECK_REG(PT_R16, gpr[16]);
+ CHECK_REG(PT_R17, gpr[17]);
+ CHECK_REG(PT_R18, gpr[18]);
+ CHECK_REG(PT_R19, gpr[19]);
+ CHECK_REG(PT_R20, gpr[20]);
+ CHECK_REG(PT_R21, gpr[21]);
+ CHECK_REG(PT_R22, gpr[22]);
+ CHECK_REG(PT_R23, gpr[23]);
+ CHECK_REG(PT_R24, gpr[24]);
+ CHECK_REG(PT_R25, gpr[25]);
+ CHECK_REG(PT_R26, gpr[26]);
+ CHECK_REG(PT_R27, gpr[27]);
+ CHECK_REG(PT_R28, gpr[28]);
+ CHECK_REG(PT_R29, gpr[29]);
+ CHECK_REG(PT_R30, gpr[30]);
+ CHECK_REG(PT_R31, gpr[31]);
+ CHECK_REG(PT_NIP, nip);
+ CHECK_REG(PT_MSR, msr);
+ CHECK_REG(PT_ORIG_R3, orig_gpr3);
+ CHECK_REG(PT_CTR, ctr);
+ CHECK_REG(PT_LNK, link);
+ CHECK_REG(PT_XER, xer);
+ CHECK_REG(PT_CCR, ccr);
+#ifdef CONFIG_PPC64
+ CHECK_REG(PT_SOFTE, softe);
+#else
+ CHECK_REG(PT_MQ, mq);
+#endif
+ CHECK_REG(PT_TRAP, trap);
+ CHECK_REG(PT_DAR, dar);
+ CHECK_REG(PT_DSISR, dsisr);
+ CHECK_REG(PT_RESULT, result);
+ #undef CHECK_REG
+
+ BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+ /*
+ * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
+ * real registers.
+ */
+ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+ // ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX));
+}
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c
index f37eb53de1a1..19c224808982 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace/ptrace32.c
@@ -17,23 +17,14 @@
* this archive for more details.
*/
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/regset.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/signal.h>
#include <linux/compat.h>
-#include <linux/uaccess.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/switch_to.h>
+#include "ptrace-decl.h"
+
/*
* does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
@@ -92,7 +83,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if ((addr & 3) || (index > PT_FPSCR32))
break;
- CHECK_FULL_REGS(child->thread.regs);
if (index < PT_FPR0) {
ret = ptrace_get_reg(child, index, &tmp);
if (ret)
@@ -142,7 +132,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if ((addr & 3) || numReg > PT_FPSCR)
break;
- CHECK_FULL_REGS(child->thread.regs);
if (numReg >= PT_FPR0) {
flush_fp_to_thread(child);
/* get 64 bit FPR */
@@ -196,7 +185,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if ((addr & 3) || (index > PT_FPSCR32))
break;
- CHECK_FULL_REGS(child->thread.regs);
if (index < PT_FPR0) {
ret = ptrace_put_reg(child, index, data);
} else {
@@ -235,7 +223,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
*/
if ((addr & 3) || (numReg > PT_FPSCR))
break;
- CHECK_FULL_REGS(child->thread.regs);
if (numReg < PT_FPR0) {
unsigned long freg;
ret = ptrace_get_reg(child, numReg, &freg);
@@ -270,8 +257,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
ret = put_user(child->thread.debug.dac1, (u32 __user *)data);
#else
dabr_fake = (
- (child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
- (child->thread.hw_brk.type & HW_BRK_TYPE_DABR));
+ (child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) |
+ (child->thread.hw_brk[0].type & HW_BRK_TYPE_DABR));
ret = put_user(dabr_fake, (u32 __user *)data);
#endif
break;
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
index 10e96f3e22fe..0508c14b4c28 100644
--- a/arch/powerpc/kernel/reloc_32.S
+++ b/arch/powerpc/kernel/reloc_32.S
@@ -30,7 +30,7 @@ R_PPC_RELATIVE = 22
_GLOBAL(relocate)
mflr r0 /* Save our LR */
- bl 0f /* Find our current runtime address */
+ bcl 20,31,$+4 /* Find our current runtime address */
0: mflr r12 /* Make it accessible */
mtlr r0
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
index 02d4719bf43a..efd52f2e7033 100644
--- a/arch/powerpc/kernel/reloc_64.S
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -8,8 +8,10 @@
#include <asm/ppc_asm.h>
RELA = 7
-RELACOUNT = 0x6ffffff9
+RELASZ = 8
+RELAENT = 9
R_PPC64_RELATIVE = 22
+R_PPC64_UADDR64 = 43
/*
* r3 = desired final address of kernel
@@ -25,29 +27,38 @@ _GLOBAL(relocate)
add r9,r9,r12 /* r9 has runtime addr of .rela.dyn section */
ld r10,(p_st - 0b)(r12)
add r10,r10,r12 /* r10 has runtime addr of _stext */
+ ld r4,(p_sym - 0b)(r12)
+ add r4,r4,r12 /* r4 has runtime addr of .dynsym */
/*
- * Scan the dynamic section for the RELA and RELACOUNT entries.
+ * Scan the dynamic section for the RELA, RELASZ and RELAENT entries.
*/
li r7,0
li r8,0
-1: ld r6,0(r11) /* get tag */
+.Ltags:
+ ld r6,0(r11) /* get tag */
cmpdi r6,0
- beq 4f /* end of list */
+ beq .Lend_of_list /* end of list */
cmpdi r6,RELA
bne 2f
ld r7,8(r11) /* get RELA pointer in r7 */
- b 3f
-2: addis r6,r6,(-RELACOUNT)@ha
- cmpdi r6,RELACOUNT@l
+ b 4f
+2: cmpdi r6,RELASZ
bne 3f
- ld r8,8(r11) /* get RELACOUNT value in r8 */
-3: addi r11,r11,16
- b 1b
-4: cmpdi r7,0 /* check we have both RELA and RELACOUNT */
+ ld r8,8(r11) /* get RELASZ value in r8 */
+ b 4f
+3: cmpdi r6,RELAENT
+ bne 4f
+ ld r12,8(r11) /* get RELAENT value in r12 */
+4: addi r11,r11,16
+ b .Ltags
+.Lend_of_list:
+ cmpdi r7,0 /* check we have RELA, RELASZ, RELAENT */
cmpdi cr1,r8,0
- beq 6f
- beq cr1,6f
+ beq .Lout
+ beq cr1,.Lout
+ cmpdi r12,0
+ beq .Lout
/*
* Work out linktime address of _stext and hence the
@@ -62,23 +73,39 @@ _GLOBAL(relocate)
/*
* Run through the list of relocations and process the
- * R_PPC64_RELATIVE ones.
+ * R_PPC64_RELATIVE and R_PPC64_UADDR64 ones.
*/
+ divd r8,r8,r12 /* RELASZ / RELAENT */
mtctr r8
-5: ld r0,8(9) /* ELF64_R_TYPE(reloc->r_info) */
+.Lrels: ld r0,8(r9) /* ELF64_R_TYPE(reloc->r_info) */
cmpdi r0,R_PPC64_RELATIVE
- bne 6f
+ bne .Luaddr64
ld r6,0(r9) /* reloc->r_offset */
ld r0,16(r9) /* reloc->r_addend */
+ b .Lstore
+.Luaddr64:
+ srdi r5,r0,32 /* ELF64_R_SYM(reloc->r_info) */
+ clrldi r0,r0,32
+ cmpdi r0,R_PPC64_UADDR64
+ bne .Lnext
+ ld r6,0(r9)
+ ld r0,16(r9)
+ mulli r5,r5,24 /* 24 == sizeof(elf64_sym) */
+ add r5,r5,r4 /* elf64_sym[ELF64_R_SYM] */
+ ld r5,8(r5)
+ add r0,r0,r5
+.Lstore:
add r0,r0,r3
stdx r0,r7,r6
- addi r9,r9,24
- bdnz 5b
-
-6: blr
+.Lnext:
+ add r9,r9,r12
+ bdnz .Lrels
+.Lout:
+ blr
.balign 8
p_dyn: .8byte __dynamic_start - 0b
p_rela: .8byte __rela_dyn_start - 0b
+p_sym: .8byte __dynamic_symtab - 0b
p_st: .8byte _stext - 0b
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 487dcd8da4de..f38df72e64b8 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -24,11 +24,11 @@
#include <linux/seq_file.h>
#include <linux/bitops.h>
#include <linux/rtc.h>
+#include <linux/of.h>
#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/machdep.h> /* for ppc_md */
#include <asm/time.h>
@@ -159,12 +159,12 @@ static int poweron_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_poweron_show, NULL);
}
-static const struct file_operations ppc_rtas_poweron_operations = {
- .open = poweron_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_poweron_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_poweron_proc_ops = {
+ .proc_open = poweron_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_poweron_write,
+ .proc_release = single_release,
};
static int progress_open(struct inode *inode, struct file *file)
@@ -172,12 +172,12 @@ static int progress_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_progress_show, NULL);
}
-static const struct file_operations ppc_rtas_progress_operations = {
- .open = progress_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_progress_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_progress_proc_ops = {
+ .proc_open = progress_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_progress_write,
+ .proc_release = single_release,
};
static int clock_open(struct inode *inode, struct file *file)
@@ -185,12 +185,12 @@ static int clock_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_clock_show, NULL);
}
-static const struct file_operations ppc_rtas_clock_operations = {
- .open = clock_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_clock_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_clock_proc_ops = {
+ .proc_open = clock_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_clock_write,
+ .proc_release = single_release,
};
static int tone_freq_open(struct inode *inode, struct file *file)
@@ -198,12 +198,12 @@ static int tone_freq_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_tone_freq_show, NULL);
}
-static const struct file_operations ppc_rtas_tone_freq_operations = {
- .open = tone_freq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_tone_freq_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_tone_freq_proc_ops = {
+ .proc_open = tone_freq_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_tone_freq_write,
+ .proc_release = single_release,
};
static int tone_volume_open(struct inode *inode, struct file *file)
@@ -211,12 +211,12 @@ static int tone_volume_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_tone_volume_show, NULL);
}
-static const struct file_operations ppc_rtas_tone_volume_operations = {
- .open = tone_volume_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_tone_volume_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_tone_volume_proc_ops = {
+ .proc_open = tone_volume_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_tone_volume_write,
+ .proc_release = single_release,
};
static int ppc_rtas_find_all_sensors(void);
@@ -238,17 +238,17 @@ static int __init proc_rtas_init(void)
return -ENODEV;
proc_create("powerpc/rtas/progress", 0644, NULL,
- &ppc_rtas_progress_operations);
+ &ppc_rtas_progress_proc_ops);
proc_create("powerpc/rtas/clock", 0644, NULL,
- &ppc_rtas_clock_operations);
+ &ppc_rtas_clock_proc_ops);
proc_create("powerpc/rtas/poweron", 0644, NULL,
- &ppc_rtas_poweron_operations);
+ &ppc_rtas_poweron_proc_ops);
proc_create_single("powerpc/rtas/sensors", 0444, NULL,
ppc_rtas_sensors_show);
proc_create("powerpc/rtas/frequency", 0644, NULL,
- &ppc_rtas_tone_freq_operations);
+ &ppc_rtas_tone_freq_proc_ops);
proc_create("powerpc/rtas/volume", 0644, NULL,
- &ppc_rtas_tone_volume_operations);
+ &ppc_rtas_tone_volume_proc_ops);
proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL,
ppc_rtas_rmo_buf_show);
return 0;
@@ -259,7 +259,6 @@ __initcall(proc_rtas_init);
static int parse_number(const char __user *p, size_t count, u64 *val)
{
char buf[40];
- char *end;
if (count > 39)
return -EINVAL;
@@ -269,11 +268,7 @@ static int parse_number(const char __user *p, size_t count, u64 *val)
buf[count] = 0;
- *val = simple_strtoull(buf, &end, 10);
- if (*end && *end != '\n')
- return -EINVAL;
-
- return 0;
+ return kstrtoull(buf, 10, val);
}
/* ****************************************************************** */
@@ -292,9 +287,9 @@ static ssize_t ppc_rtas_poweron_write(struct file *file,
rtc_time64_to_tm(nowtime, &tm);
- error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL,
- tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
- tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */);
+ error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_FOR_POWER_ON), 7, 1, NULL,
+ tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */);
if (error)
printk(KERN_WARNING "error: setting poweron time returned: %s\n",
ppc_rtas_process_error(error));
@@ -355,9 +350,9 @@ static ssize_t ppc_rtas_clock_write(struct file *file,
return error;
rtc_time64_to_tm(nowtime, &tm);
- error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL,
- tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
- tm.tm_hour, tm.tm_min, tm.tm_sec, 0);
+ error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL,
+ tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, 0);
if (error)
printk(KERN_WARNING "error: setting the clock returned: %s\n",
ppc_rtas_process_error(error));
@@ -367,7 +362,7 @@ static ssize_t ppc_rtas_clock_write(struct file *file,
static int ppc_rtas_clock_show(struct seq_file *m, void *v)
{
int ret[8];
- int error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+ int error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret);
if (error) {
printk(KERN_WARNING "error: reading the clock returned: %s\n",
@@ -390,7 +385,7 @@ static int ppc_rtas_sensors_show(struct seq_file *m, void *v)
{
int i,j;
int state, error;
- int get_sensor_state = rtas_token("get-sensor-state");
+ int get_sensor_state = rtas_function_token(RTAS_FN_GET_SENSOR_STATE);
seq_printf(m, "RTAS (RunTime Abstraction Services) Sensor Information\n");
seq_printf(m, "Sensor\t\tValue\t\tCondition\tLocation\n");
@@ -713,8 +708,8 @@ static ssize_t ppc_rtas_tone_freq_write(struct file *file,
return error;
rtas_tone_frequency = freq; /* save it for later */
- error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL,
- TONE_FREQUENCY, 0, freq);
+ error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL,
+ TONE_FREQUENCY, 0, freq);
if (error)
printk(KERN_WARNING "error: setting tone frequency returned: %s\n",
ppc_rtas_process_error(error));
@@ -741,8 +736,8 @@ static ssize_t ppc_rtas_tone_volume_write(struct file *file,
volume = 100;
rtas_tone_volume = volume; /* save it for later */
- error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL,
- TONE_VOLUME, 0, volume);
+ error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL,
+ TONE_VOLUME, 0, volume);
if (error)
printk(KERN_WARNING "error: setting tone volume returned: %s\n",
ppc_rtas_process_error(error));
@@ -755,11 +750,20 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v)
return 0;
}
-#define RMO_READ_BUF_MAX 30
-
-/* RTAS Userspace access */
+/**
+ * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space.
+ * @m: seq_file output target.
+ * @v: Unused.
+ *
+ * Base + size description of a range of RTAS-addressable memory set
+ * aside for user space to use as work area(s) for certain RTAS
+ * functions. User space accesses this region via /dev/mem. Apart from
+ * security policies, the kernel does not arbitrate or serialize
+ * access to this region, and user space must ensure that concurrent
+ * users do not interfere with each other.
+ */
static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v)
{
- seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX);
+ seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_USER_REGION_SIZE);
return 0;
}
diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c
index a28239b8b0c0..6996214532bd 100644
--- a/arch/powerpc/kernel/rtas-rtc.c
+++ b/arch/powerpc/kernel/rtas-rtc.c
@@ -6,13 +6,12 @@
#include <linux/rtc.h>
#include <linux/delay.h>
#include <linux/ratelimit.h>
-#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/time.h>
#define MAX_RTC_WAIT 5000 /* 5 sec */
-#define RTAS_CLOCK_BUSY (-2)
+
time64_t __init rtas_get_boot_time(void)
{
int ret[8];
@@ -22,7 +21,7 @@ time64_t __init rtas_get_boot_time(void)
max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
do {
- error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+ error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret);
wait_time = rtas_busy_delay_time(error);
if (wait_time) {
@@ -54,7 +53,7 @@ void rtas_get_rtc_time(struct rtc_time *rtc_tm)
max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
do {
- error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+ error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret);
wait_time = rtas_busy_delay_time(error);
if (wait_time) {
@@ -91,7 +90,7 @@ int rtas_set_rtc_time(struct rtc_time *tm)
max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
do {
- error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL,
+ error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL,
tm->tm_year + 1900, tm->tm_mon + 1,
tm->tm_mday, tm->tm_hour, tm->tm_min,
tm->tm_sec, 0);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index c5fa251b8950..7e793b503e29 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -7,86 +7,745 @@
* Copyright (C) 2001 IBM.
*/
-#include <stdarg.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/init.h>
+#define pr_fmt(fmt) "rtas: " fmt
+
+#include <linux/bsearch.h>
#include <linux/capability.h>
#include <linux/delay.h>
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/completion.h>
-#include <linux/cpumask.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/kconfig.h>
+#include <linux/kernel.h>
+#include <linux/lockdep.h>
#include <linux/memblock.h>
-#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stdarg.h>
#include <linux/syscalls.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/xarray.h>
-#include <asm/prom.h>
-#include <asm/rtas.h>
-#include <asm/hvcall.h>
-#include <asm/machdep.h>
+#include <asm/delay.h>
#include <asm/firmware.h>
+#include <asm/interrupt.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
#include <asm/page.h>
-#include <asm/param.h>
-#include <asm/delay.h>
-#include <linux/uaccess.h>
-#include <asm/udbg.h>
-#include <asm/syscalls.h>
-#include <asm/smp.h>
-#include <linux/atomic.h>
+#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
#include <asm/time.h>
-#include <asm/mmu.h>
-#include <asm/topology.h>
+#include <asm/trace.h>
+#include <asm/udbg.h>
-/* This is here deliberately so it's only used in this file */
-void enter_rtas(unsigned long);
+struct rtas_filter {
+ /* Indexes into the args buffer, -1 if not used */
+ const int buf_idx1;
+ const int size_idx1;
+ const int buf_idx2;
+ const int size_idx2;
+ /*
+ * Assumed buffer size per the spec if the function does not
+ * have a size parameter, e.g. ibm,errinjct. 0 if unused.
+ */
+ const int fixed_size;
+};
-struct rtas_t rtas = {
- .lock = __ARCH_SPIN_LOCK_UNLOCKED
+/**
+ * struct rtas_function - Descriptor for RTAS functions.
+ *
+ * @token: Value of @name if it exists under the /rtas node.
+ * @name: Function name.
+ * @filter: If non-NULL, invoking this function via the rtas syscall is
+ * generally allowed, and @filter describes constraints on the
+ * arguments. See also @banned_for_syscall_on_le.
+ * @banned_for_syscall_on_le: Set when call via sys_rtas is generally allowed
+ * but specifically restricted on ppc64le. Such
+ * functions are believed to have no users on
+ * ppc64le, and we want to keep it that way. It does
+ * not make sense for this to be set when @filter
+ * is NULL.
+ * @lock: Pointer to an optional dedicated per-function mutex. This
+ * should be set for functions that require multiple calls in
+ * sequence to complete a single operation, and such sequences
+ * will disrupt each other if allowed to interleave. Users of
+ * this function are required to hold the associated lock for
+ * the duration of the call sequence. Add an explanatory
+ * comment to the function table entry if setting this member.
+ */
+struct rtas_function {
+ s32 token;
+ const bool banned_for_syscall_on_le:1;
+ const char * const name;
+ const struct rtas_filter *filter;
+ struct mutex *lock;
};
-EXPORT_SYMBOL(rtas);
-DEFINE_SPINLOCK(rtas_data_buf_lock);
-EXPORT_SYMBOL(rtas_data_buf_lock);
+/*
+ * Per-function locks for sequence-based RTAS functions.
+ */
+static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock);
+static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock);
+static DEFINE_MUTEX(rtas_ibm_get_indices_lock);
+static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock);
+static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock);
+static DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock);
+DEFINE_MUTEX(rtas_ibm_get_vpd_lock);
+
+static struct rtas_function rtas_function_table[] __ro_after_init = {
+ [RTAS_FNIDX__CHECK_EXCEPTION] = {
+ .name = "check-exception",
+ },
+ [RTAS_FNIDX__DISPLAY_CHARACTER] = {
+ .name = "display-character",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__EVENT_SCAN] = {
+ .name = "event-scan",
+ },
+ [RTAS_FNIDX__FREEZE_TIME_BASE] = {
+ .name = "freeze-time-base",
+ },
+ [RTAS_FNIDX__GET_POWER_LEVEL] = {
+ .name = "get-power-level",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__GET_SENSOR_STATE] = {
+ .name = "get-sensor-state",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__GET_TERM_CHAR] = {
+ .name = "get-term-char",
+ },
+ [RTAS_FNIDX__GET_TIME_OF_DAY] = {
+ .name = "get-time-of-day",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE] = {
+ .name = "ibm,activate-firmware",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ as of v2.13 doesn't explicitly impose any
+ * restriction, but this typically requires multiple
+ * calls before success, and there's no reason to
+ * allow sequences to interleave.
+ */
+ .lock = &rtas_ibm_activate_firmware_lock,
+ },
+ [RTAS_FNIDX__IBM_CBE_START_PTCAL] = {
+ .name = "ibm,cbe-start-ptcal",
+ },
+ [RTAS_FNIDX__IBM_CBE_STOP_PTCAL] = {
+ .name = "ibm,cbe-stop-ptcal",
+ },
+ [RTAS_FNIDX__IBM_CHANGE_MSI] = {
+ .name = "ibm,change-msi",
+ },
+ [RTAS_FNIDX__IBM_CLOSE_ERRINJCT] = {
+ .name = "ibm,close-errinjct",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_CONFIGURE_BRIDGE] = {
+ .name = "ibm,configure-bridge",
+ },
+ [RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR] = {
+ .name = "ibm,configure-connector",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = -1,
+ .buf_idx2 = 1, .size_idx2 = -1,
+ .fixed_size = 4096,
+ },
+ },
+ [RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP] = {
+ .name = "ibm,configure-kernel-dump",
+ },
+ [RTAS_FNIDX__IBM_CONFIGURE_PE] = {
+ .name = "ibm,configure-pe",
+ },
+ [RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW] = {
+ .name = "ibm,create-pe-dma-window",
+ },
+ [RTAS_FNIDX__IBM_DISPLAY_MESSAGE] = {
+ .name = "ibm,display-message",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_ERRINJCT] = {
+ .name = "ibm,errinjct",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 2, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ .fixed_size = 1024,
+ },
+ },
+ [RTAS_FNIDX__IBM_EXTI2C] = {
+ .name = "ibm,exti2c",
+ },
+ [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO] = {
+ .name = "ibm,get-config-addr-info",
+ },
+ [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2] = {
+ .name = "ibm,get-config-addr-info2",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE] = {
+ .name = "ibm,get-dynamic-sensor-state",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ v2.13 R1–7.3.19–3 is explicit that the OS
+ * must not call ibm,get-dynamic-sensor-state with
+ * different inputs until a non-retry status has been
+ * returned.
+ */
+ .lock = &rtas_ibm_get_dynamic_sensor_state_lock,
+ },
+ [RTAS_FNIDX__IBM_GET_INDICES] = {
+ .name = "ibm,get-indices",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 2, .size_idx1 = 3,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ v2.13 R1–7.3.17–2 says that the OS must not
+ * interleave ibm,get-indices call sequences with
+ * different inputs.
+ */
+ .lock = &rtas_ibm_get_indices_lock,
+ },
+ [RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = {
+ .name = "ibm,get-rio-topology",
+ },
+ [RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER] = {
+ .name = "ibm,get-system-parameter",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 1, .size_idx1 = 2,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_GET_VPD] = {
+ .name = "ibm,get-vpd",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = -1,
+ .buf_idx2 = 1, .size_idx2 = 2,
+ },
+ /*
+ * PAPR+ v2.13 R1–7.3.20–4 indicates that sequences
+ * should not be allowed to interleave.
+ */
+ .lock = &rtas_ibm_get_vpd_lock,
+ },
+ [RTAS_FNIDX__IBM_GET_XIVE] = {
+ .name = "ibm,get-xive",
+ },
+ [RTAS_FNIDX__IBM_INT_OFF] = {
+ .name = "ibm,int-off",
+ },
+ [RTAS_FNIDX__IBM_INT_ON] = {
+ .name = "ibm,int-on",
+ },
+ [RTAS_FNIDX__IBM_IO_QUIESCE_ACK] = {
+ .name = "ibm,io-quiesce-ack",
+ },
+ [RTAS_FNIDX__IBM_LPAR_PERFTOOLS] = {
+ .name = "ibm,lpar-perftools",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 2, .size_idx1 = 3,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ v2.13 R1–7.3.26–6 says the OS should allow
+ * only one call sequence in progress at a time.
+ */
+ .lock = &rtas_ibm_lpar_perftools_lock,
+ },
+ [RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = {
+ .name = "ibm,manage-flash-image",
+ },
+ [RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION] = {
+ .name = "ibm,manage-storage-preservation",
+ },
+ [RTAS_FNIDX__IBM_NMI_INTERLOCK] = {
+ .name = "ibm,nmi-interlock",
+ },
+ [RTAS_FNIDX__IBM_NMI_REGISTER] = {
+ .name = "ibm,nmi-register",
+ },
+ [RTAS_FNIDX__IBM_OPEN_ERRINJCT] = {
+ .name = "ibm,open-errinjct",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE] = {
+ .name = "ibm,open-sriov-allow-unfreeze",
+ },
+ [RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER] = {
+ .name = "ibm,open-sriov-map-pe-number",
+ },
+ [RTAS_FNIDX__IBM_OS_TERM] = {
+ .name = "ibm,os-term",
+ },
+ [RTAS_FNIDX__IBM_PARTNER_CONTROL] = {
+ .name = "ibm,partner-control",
+ },
+ [RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION] = {
+ .name = "ibm,physical-attestation",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = 1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * This follows a sequence-based pattern similar to
+ * ibm,get-vpd et al. Since PAPR+ restricts
+ * interleaving call sequences for other functions of
+ * this style, assume the restriction applies here,
+ * even though it's not explicit in the spec.
+ */
+ .lock = &rtas_ibm_physical_attestation_lock,
+ },
+ [RTAS_FNIDX__IBM_PLATFORM_DUMP] = {
+ .name = "ibm,platform-dump",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 4, .size_idx1 = 5,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ v2.13 7.3.3.4.1 indicates that concurrent
+ * sequences of ibm,platform-dump are allowed if they
+ * are operating on different dump tags. So leave the
+ * lock pointer unset for now. This may need
+ * reconsideration if kernel-internal users appear.
+ */
+ },
+ [RTAS_FNIDX__IBM_POWER_OFF_UPS] = {
+ .name = "ibm,power-off-ups",
+ },
+ [RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER] = {
+ .name = "ibm,query-interrupt-source-number",
+ },
+ [RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW] = {
+ .name = "ibm,query-pe-dma-window",
+ },
+ [RTAS_FNIDX__IBM_READ_PCI_CONFIG] = {
+ .name = "ibm,read-pci-config",
+ },
+ [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE] = {
+ .name = "ibm,read-slot-reset-state",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2] = {
+ .name = "ibm,read-slot-reset-state2",
+ },
+ [RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = {
+ .name = "ibm,remove-pe-dma-window",
+ },
+ [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS] = {
+ .name = "ibm,reset-pe-dma-windows",
+ },
+ [RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = {
+ .name = "ibm,scan-log-dump",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = 1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR] = {
+ .name = "ibm,set-dynamic-indicator",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 2, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ v2.13 R1–7.3.18–3 says the OS must not call
+ * this function with different inputs until a
+ * non-retry status has been returned.
+ */
+ .lock = &rtas_ibm_set_dynamic_indicator_lock,
+ },
+ [RTAS_FNIDX__IBM_SET_EEH_OPTION] = {
+ .name = "ibm,set-eeh-option",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_SET_SLOT_RESET] = {
+ .name = "ibm,set-slot-reset",
+ },
+ [RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER] = {
+ .name = "ibm,set-system-parameter",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_SET_XIVE] = {
+ .name = "ibm,set-xive",
+ },
+ [RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL] = {
+ .name = "ibm,slot-error-detail",
+ },
+ [RTAS_FNIDX__IBM_SUSPEND_ME] = {
+ .name = "ibm,suspend-me",
+ .banned_for_syscall_on_le = true,
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_TUNE_DMA_PARMS] = {
+ .name = "ibm,tune-dma-parms",
+ },
+ [RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT] = {
+ .name = "ibm,update-flash-64-and-reboot",
+ },
+ [RTAS_FNIDX__IBM_UPDATE_NODES] = {
+ .name = "ibm,update-nodes",
+ .banned_for_syscall_on_le = true,
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ .fixed_size = 4096,
+ },
+ },
+ [RTAS_FNIDX__IBM_UPDATE_PROPERTIES] = {
+ .name = "ibm,update-properties",
+ .banned_for_syscall_on_le = true,
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ .fixed_size = 4096,
+ },
+ },
+ [RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE] = {
+ .name = "ibm,validate-flash-image",
+ },
+ [RTAS_FNIDX__IBM_WRITE_PCI_CONFIG] = {
+ .name = "ibm,write-pci-config",
+ },
+ [RTAS_FNIDX__NVRAM_FETCH] = {
+ .name = "nvram-fetch",
+ },
+ [RTAS_FNIDX__NVRAM_STORE] = {
+ .name = "nvram-store",
+ },
+ [RTAS_FNIDX__POWER_OFF] = {
+ .name = "power-off",
+ },
+ [RTAS_FNIDX__PUT_TERM_CHAR] = {
+ .name = "put-term-char",
+ },
+ [RTAS_FNIDX__QUERY_CPU_STOPPED_STATE] = {
+ .name = "query-cpu-stopped-state",
+ },
+ [RTAS_FNIDX__READ_PCI_CONFIG] = {
+ .name = "read-pci-config",
+ },
+ [RTAS_FNIDX__RTAS_LAST_ERROR] = {
+ .name = "rtas-last-error",
+ },
+ [RTAS_FNIDX__SET_INDICATOR] = {
+ .name = "set-indicator",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__SET_POWER_LEVEL] = {
+ .name = "set-power-level",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__SET_TIME_FOR_POWER_ON] = {
+ .name = "set-time-for-power-on",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__SET_TIME_OF_DAY] = {
+ .name = "set-time-of-day",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__START_CPU] = {
+ .name = "start-cpu",
+ },
+ [RTAS_FNIDX__STOP_SELF] = {
+ .name = "stop-self",
+ },
+ [RTAS_FNIDX__SYSTEM_REBOOT] = {
+ .name = "system-reboot",
+ },
+ [RTAS_FNIDX__THAW_TIME_BASE] = {
+ .name = "thaw-time-base",
+ },
+ [RTAS_FNIDX__WRITE_PCI_CONFIG] = {
+ .name = "write-pci-config",
+ },
+};
-char rtas_data_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned;
-EXPORT_SYMBOL(rtas_data_buf);
+#define for_each_rtas_function(funcp) \
+ for (funcp = &rtas_function_table[0]; \
+ funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \
+ ++funcp)
-unsigned long rtas_rmo_buf;
+/*
+ * Nearly all RTAS calls need to be serialized. All uses of the
+ * default rtas_args block must hold rtas_lock.
+ *
+ * Exceptions to the RTAS serialization requirement (e.g. stop-self)
+ * must use a separate rtas_args structure.
+ */
+static DEFINE_RAW_SPINLOCK(rtas_lock);
+static struct rtas_args rtas_args;
+
+/**
+ * rtas_function_token() - RTAS function token lookup.
+ * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
+ *
+ * Context: Any context.
+ * Return: the token value for the function if implemented by this platform,
+ * otherwise RTAS_UNKNOWN_SERVICE.
+ */
+s32 rtas_function_token(const rtas_fn_handle_t handle)
+{
+ const size_t index = handle.index;
+ const bool out_of_bounds = index >= ARRAY_SIZE(rtas_function_table);
+
+ if (WARN_ONCE(out_of_bounds, "invalid function index %zu", index))
+ return RTAS_UNKNOWN_SERVICE;
+ /*
+ * Various drivers attempt token lookups on non-RTAS
+ * platforms.
+ */
+ if (!rtas.dev)
+ return RTAS_UNKNOWN_SERVICE;
+
+ return rtas_function_table[index].token;
+}
+EXPORT_SYMBOL_GPL(rtas_function_token);
+
+static int rtas_function_cmp(const void *a, const void *b)
+{
+ const struct rtas_function *f1 = a;
+ const struct rtas_function *f2 = b;
+
+ return strcmp(f1->name, f2->name);
+}
/*
- * If non-NULL, this gets called when the kernel terminates.
- * This is done like this so rtas_flash can be a module.
+ * Boot-time initialization of the function table needs the lookup to
+ * return a non-const-qualified object. Use rtas_name_to_function()
+ * in all other contexts.
*/
-void (*rtas_flash_term_hook)(int);
-EXPORT_SYMBOL(rtas_flash_term_hook);
+static struct rtas_function *__rtas_name_to_function(const char *name)
+{
+ const struct rtas_function key = {
+ .name = name,
+ };
+ struct rtas_function *found;
+
+ found = bsearch(&key, rtas_function_table, ARRAY_SIZE(rtas_function_table),
+ sizeof(rtas_function_table[0]), rtas_function_cmp);
-/* RTAS use home made raw locking instead of spin_lock_irqsave
- * because those can be called from within really nasty contexts
- * such as having the timebase stopped which would lockup with
- * normal locks and spinlock debugging enabled
+ return found;
+}
+
+static const struct rtas_function *rtas_name_to_function(const char *name)
+{
+ return __rtas_name_to_function(name);
+}
+
+static DEFINE_XARRAY(rtas_token_to_function_xarray);
+
+static int __init rtas_token_to_function_xarray_init(void)
+{
+ const struct rtas_function *func;
+ int err = 0;
+
+ for_each_rtas_function(func) {
+ const s32 token = func->token;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ continue;
+
+ err = xa_err(xa_store(&rtas_token_to_function_xarray,
+ token, (void *)func, GFP_KERNEL));
+ if (err)
+ break;
+ }
+
+ return err;
+}
+arch_initcall(rtas_token_to_function_xarray_init);
+
+/*
+ * For use by sys_rtas(), where the token value is provided by user
+ * space and we don't want to warn on failed lookups.
*/
-static unsigned long lock_rtas(void)
+static const struct rtas_function *rtas_token_to_function_untrusted(s32 token)
{
- unsigned long flags;
+ return xa_load(&rtas_token_to_function_xarray, token);
+}
+
+/*
+ * Reverse lookup for deriving the function descriptor from a
+ * known-good token value in contexts where the former is not already
+ * available. @token must be valid, e.g. derived from the result of a
+ * prior lookup against the function table.
+ */
+static const struct rtas_function *rtas_token_to_function(s32 token)
+{
+ const struct rtas_function *func;
+
+ if (WARN_ONCE(token < 0, "invalid token %d", token))
+ return NULL;
+
+ func = rtas_token_to_function_untrusted(token);
+ if (func)
+ return func;
+ /*
+ * Fall back to linear scan in case the reverse mapping hasn't
+ * been initialized yet.
+ */
+ if (xa_empty(&rtas_token_to_function_xarray)) {
+ for_each_rtas_function(func) {
+ if (func->token == token)
+ return func;
+ }
+ }
- local_irq_save(flags);
- preempt_disable();
- arch_spin_lock(&rtas.lock);
- return flags;
+ WARN_ONCE(true, "unexpected failed lookup for token %d", token);
+ return NULL;
}
-static void unlock_rtas(unsigned long flags)
+/* This is here deliberately so it's only used in this file */
+void enter_rtas(unsigned long);
+
+static void __do_enter_rtas(struct rtas_args *args)
{
- arch_spin_unlock(&rtas.lock);
- local_irq_restore(flags);
- preempt_enable();
+ enter_rtas(__pa(args));
+ srr_regs_clobbered(); /* rtas uses SRRs, invalidate */
+}
+
+static void __do_enter_rtas_trace(struct rtas_args *args)
+{
+ const struct rtas_function *func = rtas_token_to_function(be32_to_cpu(args->token));
+
+ /*
+ * If there is a per-function lock, it must be held by the
+ * caller.
+ */
+ if (func->lock)
+ lockdep_assert_held(func->lock);
+
+ if (args == &rtas_args)
+ lockdep_assert_held(&rtas_lock);
+
+ trace_rtas_input(args, func->name);
+ trace_rtas_ll_entry(args);
+
+ __do_enter_rtas(args);
+
+ trace_rtas_ll_exit(args);
+ trace_rtas_output(args, func->name);
}
+static void do_enter_rtas(struct rtas_args *args)
+{
+ const unsigned long msr = mfmsr();
+ /*
+ * Situations where we want to skip any active tracepoints for
+ * safety reasons:
+ *
+ * 1. The last code executed on an offline CPU as it stops,
+ * i.e. we're about to call stop-self. The tracepoints'
+ * function name lookup uses xarray, which uses RCU, which
+ * isn't valid to call on an offline CPU. Any events
+ * emitted on an offline CPU will be discarded anyway.
+ *
+ * 2. In real mode, as when invoking ibm,nmi-interlock from
+ * the pseries MCE handler. We cannot count on trace
+ * buffers or the entries in rtas_token_to_function_xarray
+ * to be contained in the RMO.
+ */
+ const unsigned long mask = MSR_IR | MSR_DR;
+ const bool can_trace = likely(cpu_online(raw_smp_processor_id()) &&
+ (msr & mask) == mask);
+ /*
+ * Make sure MSR[RI] is currently enabled as it will be forced later
+ * in enter_rtas.
+ */
+ BUG_ON(!(msr & MSR_RI));
+
+ BUG_ON(!irqs_disabled());
+
+ hard_irq_disable(); /* Ensure MSR[EE] is disabled on PPC64 */
+
+ if (can_trace)
+ __do_enter_rtas_trace(args);
+ else
+ __do_enter_rtas(args);
+}
+
+struct rtas_t rtas;
+
+DEFINE_SPINLOCK(rtas_data_buf_lock);
+EXPORT_SYMBOL_GPL(rtas_data_buf_lock);
+
+char rtas_data_buf[RTAS_DATA_BUF_SIZE] __aligned(SZ_4K);
+EXPORT_SYMBOL_GPL(rtas_data_buf);
+
+unsigned long rtas_rmo_buf;
+
+/*
+ * If non-NULL, this gets called when the kernel terminates.
+ * This is done like this so rtas_flash can be a module.
+ */
+void (*rtas_flash_term_hook)(int);
+EXPORT_SYMBOL_GPL(rtas_flash_term_hook);
+
/*
* call_rtas_display_status and call_rtas_display_status_delay
* are designed only for very early low-level debugging, which
@@ -94,14 +753,14 @@ static void unlock_rtas(unsigned long flags)
*/
static void call_rtas_display_status(unsigned char c)
{
- unsigned long s;
+ unsigned long flags;
if (!rtas.base)
return;
- s = lock_rtas();
- rtas_call_unlocked(&rtas.args, 10, 1, 1, NULL, c);
- unlock_rtas(s);
+ raw_spin_lock_irqsave(&rtas_lock, flags);
+ rtas_call_unlocked(&rtas_args, 10, 1, 1, NULL, c);
+ raw_spin_unlock_irqrestore(&rtas_lock, flags);
}
static void call_rtas_display_status_delay(char c)
@@ -109,7 +768,7 @@ static void call_rtas_display_status_delay(char c)
static int pending_newline = 0; /* did last write end with unprinted newline? */
static int width = 16;
- if (c == '\n') {
+ if (c == '\n') {
while (width-- > 0)
call_rtas_display_status(' ');
width = 16;
@@ -119,7 +778,7 @@ static void call_rtas_display_status_delay(char c)
if (pending_newline) {
call_rtas_display_status('\r');
call_rtas_display_status('\n');
- }
+ }
pending_newline = 0;
if (width--) {
call_rtas_display_status(c);
@@ -225,8 +884,8 @@ void rtas_progress(char *s, unsigned short hex)
"ibm,display-truncation-length", NULL);
of_node_put(root);
}
- display_character = rtas_token("display-character");
- set_indicator = rtas_token("set-indicator");
+ display_character = rtas_function_token(RTAS_FN_DISPLAY_CHARACTER);
+ set_indicator = rtas_function_token(RTAS_FN_SET_INDICATOR);
}
if (display_character == RTAS_UNKNOWN_SERVICE) {
@@ -259,7 +918,7 @@ void rtas_progress(char *s, unsigned short hex)
else
rtas_call(display_character, 1, 1, NULL, '\r');
}
-
+
if (row_width)
width = row_width[current_line];
else
@@ -279,9 +938,9 @@ void rtas_progress(char *s, unsigned short hex)
spin_unlock(&progress_lock);
return;
}
-
+
/* RTAS wants CR-LF, not just LF */
-
+
if (*os == '\n') {
rtas_call(display_character, 1, 1, NULL, '\r');
rtas_call(display_character, 1, 1, NULL, '\n');
@@ -291,7 +950,7 @@ void rtas_progress(char *s, unsigned short hex)
*/
rtas_call(display_character, 1, 1, NULL, *os);
}
-
+
if (row_width)
width = row_width[current_line];
else
@@ -300,36 +959,49 @@ void rtas_progress(char *s, unsigned short hex)
width--;
rtas_call(display_character, 1, 1, NULL, *os);
}
-
+
os++;
-
+
/* if we overwrite the screen length */
if (width <= 0)
while ((*os != 0) && (*os != '\n') && (*os != '\r'))
os++;
}
-
+
spin_unlock(&progress_lock);
}
-EXPORT_SYMBOL(rtas_progress); /* needed by rtas_flash module */
+EXPORT_SYMBOL_GPL(rtas_progress); /* needed by rtas_flash module */
int rtas_token(const char *service)
{
+ const struct rtas_function *func;
const __be32 *tokp;
+
if (rtas.dev == NULL)
return RTAS_UNKNOWN_SERVICE;
+
+ func = rtas_name_to_function(service);
+ if (func)
+ return func->token;
+ /*
+ * The caller is looking up a name that is not known to be an
+ * RTAS function. Either it's a function that needs to be
+ * added to the table, or they're misusing rtas_token() to
+ * access non-function properties of the /rtas node. Warn and
+ * fall back to the legacy behavior.
+ */
+ WARN_ONCE(1, "unknown function `%s`, should it be added to rtas_function_table?\n",
+ service);
+
tokp = of_get_property(rtas.dev, service, NULL);
return tokp ? be32_to_cpu(*tokp) : RTAS_UNKNOWN_SERVICE;
}
-EXPORT_SYMBOL(rtas_token);
-
-int rtas_service_present(const char *service)
-{
- return rtas_token(service) != RTAS_UNKNOWN_SERVICE;
-}
-EXPORT_SYMBOL(rtas_service_present);
+EXPORT_SYMBOL_GPL(rtas_token);
#ifdef CONFIG_RTAS_ERROR_LOGGING
+
+static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX;
+
/*
* Return the firmware-specified size of the error log buffer
* for all rtas calls that require an error buffer argument.
@@ -337,56 +1009,66 @@ EXPORT_SYMBOL(rtas_service_present);
*/
int rtas_get_error_log_max(void)
{
- static int rtas_error_log_max;
- if (rtas_error_log_max)
- return rtas_error_log_max;
-
- rtas_error_log_max = rtas_token ("rtas-error-log-max");
- if ((rtas_error_log_max == RTAS_UNKNOWN_SERVICE) ||
- (rtas_error_log_max > RTAS_ERROR_LOG_MAX)) {
- printk (KERN_WARNING "RTAS: bad log buffer size %d\n",
- rtas_error_log_max);
- rtas_error_log_max = RTAS_ERROR_LOG_MAX;
- }
return rtas_error_log_max;
}
-EXPORT_SYMBOL(rtas_get_error_log_max);
+
+static void __init init_error_log_max(void)
+{
+ static const char propname[] __initconst = "rtas-error-log-max";
+ u32 max;
+
+ if (of_property_read_u32(rtas.dev, propname, &max)) {
+ pr_warn("%s not found, using default of %u\n",
+ propname, RTAS_ERROR_LOG_MAX);
+ max = RTAS_ERROR_LOG_MAX;
+ }
+
+ if (max > RTAS_ERROR_LOG_MAX) {
+ pr_warn("%s = %u, clamping max error log size to %u\n",
+ propname, max, RTAS_ERROR_LOG_MAX);
+ max = RTAS_ERROR_LOG_MAX;
+ }
+
+ rtas_error_log_max = max;
+}
static char rtas_err_buf[RTAS_ERROR_LOG_MAX];
-static int rtas_last_error_token;
/** Return a copy of the detailed error text associated with the
* most recent failed call to rtas. Because the error text
* might go stale if there are any other intervening rtas calls,
* this routine must be called atomically with whatever produced
- * the error (i.e. with rtas.lock still held from the previous call).
+ * the error (i.e. with rtas_lock still held from the previous call).
*/
static char *__fetch_rtas_last_error(char *altbuf)
{
+ const s32 token = rtas_function_token(RTAS_FN_RTAS_LAST_ERROR);
struct rtas_args err_args, save_args;
u32 bufsz;
char *buf = NULL;
- if (rtas_last_error_token == -1)
+ lockdep_assert_held(&rtas_lock);
+
+ if (token == -1)
return NULL;
bufsz = rtas_get_error_log_max();
- err_args.token = cpu_to_be32(rtas_last_error_token);
+ err_args.token = cpu_to_be32(token);
err_args.nargs = cpu_to_be32(2);
err_args.nret = cpu_to_be32(1);
err_args.args[0] = cpu_to_be32(__pa(rtas_err_buf));
err_args.args[1] = cpu_to_be32(bufsz);
err_args.args[2] = 0;
- save_args = rtas.args;
- rtas.args = err_args;
+ save_args = rtas_args;
+ rtas_args = err_args;
- enter_rtas(__pa(&rtas.args));
+ do_enter_rtas(&rtas_args);
- err_args = rtas.args;
- rtas.args = save_args;
+ err_args = rtas_args;
+ rtas_args = save_args;
/* Log the error in the unlikely case that there was one. */
if (unlikely(err_args.args[2] == 0)) {
@@ -398,7 +1080,7 @@ static char *__fetch_rtas_last_error(char *altbuf)
buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC);
}
if (buf)
- memcpy(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
+ memmove(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
}
return buf;
@@ -409,6 +1091,7 @@ static char *__fetch_rtas_last_error(char *altbuf)
#else /* CONFIG_RTAS_ERROR_LOGGING */
#define __fetch_rtas_last_error(x) NULL
#define get_errorlog_buffer() NULL
+static void __init init_error_log_max(void) {}
#endif
@@ -429,9 +1112,26 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
for (i = 0; i < nret; ++i)
args->rets[i] = 0;
- enter_rtas(__pa(args));
+ do_enter_rtas(args);
}
+/**
+ * rtas_call_unlocked() - Invoke an RTAS firmware function without synchronization.
+ * @args: RTAS parameter block to be used for the call, must obey RTAS addressing
+ * constraints.
+ * @token: Identifies the function being invoked.
+ * @nargs: Number of input parameters. Does not include token.
+ * @nret: Number of output parameters, including the call status.
+ * @....: List of @nargs input parameters.
+ *
+ * Invokes the RTAS function indicated by @token, which the caller
+ * should obtain via rtas_function_token().
+ *
+ * This function is similar to rtas_call(), but must be used with a
+ * limited set of RTAS calls specifically exempted from the general
+ * requirement that only one RTAS call may be in progress at any
+ * time. Examples include stop-self and ibm,nmi-interlock.
+ */
void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...)
{
va_list list;
@@ -441,38 +1141,120 @@ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
va_end(list);
}
+static bool token_is_restricted_errinjct(s32 token)
+{
+ return token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT) ||
+ token == rtas_function_token(RTAS_FN_IBM_ERRINJCT);
+}
+
+/**
+ * rtas_call() - Invoke an RTAS firmware function.
+ * @token: Identifies the function being invoked.
+ * @nargs: Number of input parameters. Does not include token.
+ * @nret: Number of output parameters, including the call status.
+ * @outputs: Array of @nret output words.
+ * @....: List of @nargs input parameters.
+ *
+ * Invokes the RTAS function indicated by @token, which the caller
+ * should obtain via rtas_function_token().
+ *
+ * The @nargs and @nret arguments must match the number of input and
+ * output parameters specified for the RTAS function.
+ *
+ * rtas_call() returns RTAS status codes, not conventional Linux errno
+ * values. Callers must translate any failure to an appropriate errno
+ * in syscall context. Most callers of RTAS functions that can return
+ * -2 or 990x should use rtas_busy_delay() to correctly handle those
+ * statuses before calling again.
+ *
+ * The return value descriptions are adapted from 7.2.8 [RTAS] Return
+ * Codes of the PAPR and CHRP specifications.
+ *
+ * Context: Process context preferably, interrupt context if
+ * necessary. Acquires an internal spinlock and may perform
+ * GFP_ATOMIC slab allocation in error path. Unsafe for NMI
+ * context.
+ * Return:
+ * * 0 - RTAS function call succeeded.
+ * * -1 - RTAS function encountered a hardware or
+ * platform error, or the token is invalid,
+ * or the function is restricted by kernel policy.
+ * * -2 - Specs say "A necessary hardware device was busy,
+ * and the requested function could not be
+ * performed. The operation should be retried at
+ * a later time." This is misleading, at least with
+ * respect to current RTAS implementations. What it
+ * usually means in practice is that the function
+ * could not be completed while meeting RTAS's
+ * deadline for returning control to the OS (250us
+ * for PAPR/PowerVM, typically), but the call may be
+ * immediately reattempted to resume work on it.
+ * * -3 - Parameter error.
+ * * -7 - Unexpected state change.
+ * * 9000...9899 - Vendor-specific success codes.
+ * * 9900...9905 - Advisory extended delay. Caller should try
+ * again after ~10^x ms has elapsed, where x is
+ * the last digit of the status [0-5]. Again going
+ * beyond the PAPR text, 990x on PowerVM indicates
+ * contention for RTAS-internal resources. Other
+ * RTAS call sequences in progress should be
+ * allowed to complete before reattempting the
+ * call.
+ * * -9000 - Multi-level isolation error.
+ * * -9999...-9004 - Vendor-specific error codes.
+ * * Additional negative values - Function-specific error.
+ * * Additional positive values - Function-specific success.
+ */
int rtas_call(int token, int nargs, int nret, int *outputs, ...)
{
+ struct pin_cookie cookie;
va_list list;
int i;
- unsigned long s;
- struct rtas_args *rtas_args;
+ unsigned long flags;
+ struct rtas_args *args;
char *buff_copy = NULL;
int ret;
if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
return -1;
- s = lock_rtas();
+ if (token_is_restricted_errinjct(token)) {
+ /*
+ * It would be nicer to not discard the error value
+ * from security_locked_down(), but callers expect an
+ * RTAS status, not an errno.
+ */
+ if (security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION))
+ return -1;
+ }
+
+ if ((mfmsr() & (MSR_IR|MSR_DR)) != (MSR_IR|MSR_DR)) {
+ WARN_ON_ONCE(1);
+ return -1;
+ }
+
+ raw_spin_lock_irqsave(&rtas_lock, flags);
+ cookie = lockdep_pin_lock(&rtas_lock);
/* We use the global rtas args buffer */
- rtas_args = &rtas.args;
+ args = &rtas_args;
va_start(list, outputs);
- va_rtas_call_unlocked(rtas_args, token, nargs, nret, list);
+ va_rtas_call_unlocked(args, token, nargs, nret, list);
va_end(list);
/* A -1 return code indicates that the last command couldn't
be completed due to a hardware error. */
- if (be32_to_cpu(rtas_args->rets[0]) == -1)
+ if (be32_to_cpu(args->rets[0]) == -1)
buff_copy = __fetch_rtas_last_error(NULL);
if (nret > 1 && outputs != NULL)
for (i = 0; i < nret-1; ++i)
- outputs[i] = be32_to_cpu(rtas_args->rets[i+1]);
- ret = (nret > 0)? be32_to_cpu(rtas_args->rets[0]): 0;
+ outputs[i] = be32_to_cpu(args->rets[i + 1]);
+ ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0;
- unlock_rtas(s);
+ lockdep_unpin_lock(&rtas_lock, cookie);
+ raw_spin_unlock_irqrestore(&rtas_lock, flags);
if (buff_copy) {
log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0);
@@ -481,10 +1263,27 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
}
return ret;
}
-EXPORT_SYMBOL(rtas_call);
+EXPORT_SYMBOL_GPL(rtas_call);
-/* For RTAS_BUSY (-2), delay for 1 millisecond. For an extended busy status
- * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds.
+/**
+ * rtas_busy_delay_time() - From an RTAS status value, calculate the
+ * suggested delay time in milliseconds.
+ *
+ * @status: a value returned from rtas_call() or similar APIs which return
+ * the status of a RTAS function call.
+ *
+ * Context: Any context.
+ *
+ * Return:
+ * * 100000 - If @status is 9905.
+ * * 10000 - If @status is 9904.
+ * * 1000 - If @status is 9903.
+ * * 100 - If @status is 9902.
+ * * 10 - If @status is 9901.
+ * * 1 - If @status is either 9900 or -2. This is "wrong" for -2, but
+ * some callers depend on this behavior, and the worst outcome
+ * is that they will delay for longer than necessary.
+ * * 0 - If @status is not a busy or extended delay value.
*/
unsigned int rtas_busy_delay_time(int status)
{
@@ -502,54 +1301,160 @@ unsigned int rtas_busy_delay_time(int status)
return ms;
}
-EXPORT_SYMBOL(rtas_busy_delay_time);
-/* For an RTAS busy status code, perform the hinted delay. */
-unsigned int rtas_busy_delay(int status)
+/*
+ * Early boot fallback for rtas_busy_delay().
+ */
+static bool __init rtas_busy_delay_early(int status)
+{
+ static size_t successive_ext_delays __initdata;
+ bool retry;
+
+ switch (status) {
+ case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+ /*
+ * In the unlikely case that we receive an extended
+ * delay status in early boot, the OS is probably not
+ * the cause, and there's nothing we can do to clear
+ * the condition. Best we can do is delay for a bit
+ * and hope it's transient. Lie to the caller if it
+ * seems like we're stuck in a retry loop.
+ */
+ mdelay(1);
+ retry = true;
+ successive_ext_delays += 1;
+ if (successive_ext_delays > 1000) {
+ pr_err("too many extended delays, giving up\n");
+ dump_stack();
+ retry = false;
+ successive_ext_delays = 0;
+ }
+ break;
+ case RTAS_BUSY:
+ retry = true;
+ successive_ext_delays = 0;
+ break;
+ default:
+ retry = false;
+ successive_ext_delays = 0;
+ break;
+ }
+
+ return retry;
+}
+
+/**
+ * rtas_busy_delay() - helper for RTAS busy and extended delay statuses
+ *
+ * @status: a value returned from rtas_call() or similar APIs which return
+ * the status of a RTAS function call.
+ *
+ * Context: Process context. May sleep or schedule.
+ *
+ * Return:
+ * * true - @status is RTAS_BUSY or an extended delay hint. The
+ * caller may assume that the CPU has been yielded if necessary,
+ * and that an appropriate delay for @status has elapsed.
+ * Generally the caller should reattempt the RTAS call which
+ * yielded @status.
+ *
+ * * false - @status is not @RTAS_BUSY nor an extended delay hint. The
+ * caller is responsible for handling @status.
+ */
+bool __ref rtas_busy_delay(int status)
{
unsigned int ms;
+ bool ret;
- might_sleep();
- ms = rtas_busy_delay_time(status);
- if (ms && need_resched())
- msleep(ms);
+ /*
+ * Can't do timed sleeps before timekeeping is up.
+ */
+ if (system_state < SYSTEM_SCHEDULING)
+ return rtas_busy_delay_early(status);
- return ms;
+ switch (status) {
+ case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+ ret = true;
+ ms = rtas_busy_delay_time(status);
+ /*
+ * The extended delay hint can be as high as 100 seconds.
+ * Surely any function returning such a status is either
+ * buggy or isn't going to be significantly slowed by us
+ * polling at 1HZ. Clamp the sleep time to one second.
+ */
+ ms = clamp(ms, 1U, 1000U);
+ /*
+ * The delay hint is an order-of-magnitude suggestion, not
+ * a minimum. It is fine, possibly even advantageous, for
+ * us to pause for less time than hinted. For small values,
+ * use usleep_range() to ensure we don't sleep much longer
+ * than actually needed.
+ *
+ * See Documentation/timers/timers-howto.rst for
+ * explanation of the threshold used here. In effect we use
+ * usleep_range() for 9900 and 9901, msleep() for
+ * 9902-9905.
+ */
+ if (ms <= 20)
+ usleep_range(ms * 100, ms * 1000);
+ else
+ msleep(ms);
+ break;
+ case RTAS_BUSY:
+ ret = true;
+ /*
+ * We should call again immediately if there's no other
+ * work to do.
+ */
+ cond_resched();
+ break;
+ default:
+ ret = false;
+ /*
+ * Not a busy or extended delay status; the caller should
+ * handle @status itself. Ensure we warn on misuses in
+ * atomic context regardless.
+ */
+ might_sleep();
+ break;
+ }
+
+ return ret;
}
-EXPORT_SYMBOL(rtas_busy_delay);
+EXPORT_SYMBOL_GPL(rtas_busy_delay);
-static int rtas_error_rc(int rtas_rc)
+int rtas_error_rc(int rtas_rc)
{
int rc;
switch (rtas_rc) {
- case -1: /* Hardware Error */
- rc = -EIO;
- break;
- case -3: /* Bad indicator/domain/etc */
- rc = -EINVAL;
- break;
- case -9000: /* Isolation error */
- rc = -EFAULT;
- break;
- case -9001: /* Outstanding TCE/PTE */
- rc = -EEXIST;
- break;
- case -9002: /* No usable slot */
- rc = -ENODEV;
- break;
- default:
- printk(KERN_ERR "%s: unexpected RTAS error %d\n",
- __func__, rtas_rc);
- rc = -ERANGE;
- break;
+ case RTAS_HARDWARE_ERROR: /* Hardware Error */
+ rc = -EIO;
+ break;
+ case RTAS_INVALID_PARAMETER: /* Bad indicator/domain/etc */
+ rc = -EINVAL;
+ break;
+ case -9000: /* Isolation error */
+ rc = -EFAULT;
+ break;
+ case -9001: /* Outstanding TCE/PTE */
+ rc = -EEXIST;
+ break;
+ case -9002: /* No usable slot */
+ rc = -ENODEV;
+ break;
+ default:
+ pr_err("%s: unexpected error %d\n", __func__, rtas_rc);
+ rc = -ERANGE;
+ break;
}
return rc;
}
+EXPORT_SYMBOL_GPL(rtas_error_rc);
int rtas_get_power_level(int powerdomain, int *level)
{
- int token = rtas_token("get-power-level");
+ int token = rtas_function_token(RTAS_FN_GET_POWER_LEVEL);
int rc;
if (token == RTAS_UNKNOWN_SERVICE)
@@ -562,11 +1467,11 @@ int rtas_get_power_level(int powerdomain, int *level)
return rtas_error_rc(rc);
return rc;
}
-EXPORT_SYMBOL(rtas_get_power_level);
+EXPORT_SYMBOL_GPL(rtas_get_power_level);
int rtas_set_power_level(int powerdomain, int level, int *setlevel)
{
- int token = rtas_token("set-power-level");
+ int token = rtas_function_token(RTAS_FN_SET_POWER_LEVEL);
int rc;
if (token == RTAS_UNKNOWN_SERVICE)
@@ -580,11 +1485,11 @@ int rtas_set_power_level(int powerdomain, int level, int *setlevel)
return rtas_error_rc(rc);
return rc;
}
-EXPORT_SYMBOL(rtas_set_power_level);
+EXPORT_SYMBOL_GPL(rtas_set_power_level);
int rtas_get_sensor(int sensor, int index, int *state)
{
- int token = rtas_token("get-sensor-state");
+ int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE);
int rc;
if (token == RTAS_UNKNOWN_SERVICE)
@@ -598,11 +1503,11 @@ int rtas_get_sensor(int sensor, int index, int *state)
return rtas_error_rc(rc);
return rc;
}
-EXPORT_SYMBOL(rtas_get_sensor);
+EXPORT_SYMBOL_GPL(rtas_get_sensor);
int rtas_get_sensor_fast(int sensor, int index, int *state)
{
- int token = rtas_token("get-sensor-state");
+ int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE);
int rc;
if (token == RTAS_UNKNOWN_SERVICE)
@@ -641,11 +1546,10 @@ bool rtas_indicator_present(int token, int *maxindex)
return false;
}
-EXPORT_SYMBOL(rtas_indicator_present);
int rtas_set_indicator(int indicator, int index, int new_value)
{
- int token = rtas_token("set-indicator");
+ int token = rtas_function_token(RTAS_FN_SET_INDICATOR);
int rc;
if (token == RTAS_UNKNOWN_SERVICE)
@@ -659,15 +1563,15 @@ int rtas_set_indicator(int indicator, int index, int new_value)
return rtas_error_rc(rc);
return rc;
}
-EXPORT_SYMBOL(rtas_set_indicator);
+EXPORT_SYMBOL_GPL(rtas_set_indicator);
/*
* Ignoring RTAS extended delay
*/
int rtas_set_indicator_fast(int indicator, int index, int new_value)
{
+ int token = rtas_function_token(RTAS_FN_SET_INDICATOR);
int rc;
- int token = rtas_token("set-indicator");
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
@@ -683,12 +1587,70 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value)
return rc;
}
+/**
+ * rtas_ibm_suspend_me() - Call ibm,suspend-me to suspend the LPAR.
+ *
+ * @fw_status: RTAS call status will be placed here if not NULL.
+ *
+ * rtas_ibm_suspend_me() should be called only on a CPU which has
+ * received H_CONTINUE from the H_JOIN hcall. All other active CPUs
+ * should be waiting to return from H_JOIN.
+ *
+ * rtas_ibm_suspend_me() may suspend execution of the OS
+ * indefinitely. Callers should take appropriate measures upon return, such as
+ * resetting watchdog facilities.
+ *
+ * Callers may choose to retry this call if @fw_status is
+ * %RTAS_THREADS_ACTIVE.
+ *
+ * Return:
+ * 0 - The partition has resumed from suspend, possibly after
+ * migration to a different host.
+ * -ECANCELED - The operation was aborted.
+ * -EAGAIN - There were other CPUs not in H_JOIN at the time of the call.
+ * -EBUSY - Some other condition prevented the suspend from succeeding.
+ * -EIO - Hardware/platform error.
+ */
+int rtas_ibm_suspend_me(int *fw_status)
+{
+ int token = rtas_function_token(RTAS_FN_IBM_SUSPEND_ME);
+ int fwrc;
+ int ret;
+
+ fwrc = rtas_call(token, 0, 1, NULL);
+
+ switch (fwrc) {
+ case 0:
+ ret = 0;
+ break;
+ case RTAS_SUSPEND_ABORTED:
+ ret = -ECANCELED;
+ break;
+ case RTAS_THREADS_ACTIVE:
+ ret = -EAGAIN;
+ break;
+ case RTAS_NOT_SUSPENDABLE:
+ case RTAS_OUTSTANDING_COPROC:
+ ret = -EBUSY;
+ break;
+ case -1:
+ default:
+ ret = -EIO;
+ break;
+ }
+
+ if (fw_status)
+ *fw_status = fwrc;
+
+ return ret;
+}
+
void __noreturn rtas_restart(char *cmd)
{
if (rtas_flash_term_hook)
rtas_flash_term_hook(SYS_RESTART);
- printk("RTAS system-reboot returned %d\n",
- rtas_call(rtas_token("system-reboot"), 0, 1, NULL));
+ pr_emerg("system-reboot returned %d\n",
+ rtas_call(rtas_function_token(RTAS_FN_SYSTEM_REBOOT), 0, 1, NULL));
for (;;);
}
@@ -697,8 +1659,8 @@ void rtas_power_off(void)
if (rtas_flash_term_hook)
rtas_flash_term_hook(SYS_POWER_OFF);
/* allow power on only with power button press */
- printk("RTAS power-off returned %d\n",
- rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
+ pr_emerg("power-off returned %d\n",
+ rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1));
for (;;);
}
@@ -707,16 +1669,19 @@ void __noreturn rtas_halt(void)
if (rtas_flash_term_hook)
rtas_flash_term_hook(SYS_HALT);
/* allow power on only with power button press */
- printk("RTAS power-off returned %d\n",
- rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
+ pr_emerg("power-off returned %d\n",
+ rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1));
for (;;);
}
/* Must be in the RMO region, so we place it here */
static char rtas_os_term_buf[2048];
+static bool ibm_extended_os_term;
void rtas_os_term(char *str)
{
+ s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM);
+ static struct rtas_args args;
int status;
/*
@@ -725,311 +1690,69 @@ void rtas_os_term(char *str)
* this property may terminate the partition which we want to avoid
* since it interferes with panic_timeout.
*/
- if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") ||
- RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term"))
+
+ if (token == RTAS_UNKNOWN_SERVICE || !ibm_extended_os_term)
return;
snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str);
+ /*
+ * Keep calling as long as RTAS returns a "try again" status,
+ * but don't use rtas_busy_delay(), which potentially
+ * schedules.
+ */
do {
- status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL,
- __pa(rtas_os_term_buf));
- } while (rtas_busy_delay(status));
+ rtas_call_unlocked(&args, token, 1, 1, NULL, __pa(rtas_os_term_buf));
+ status = be32_to_cpu(args.rets[0]);
+ } while (rtas_busy_delay_time(status));
if (status != 0)
- printk(KERN_EMERG "ibm,os-term call failed %d\n", status);
+ pr_emerg("ibm,os-term call failed %d\n", status);
}
-static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
-#ifdef CONFIG_PPC_PSERIES
-static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
-{
- u16 slb_size = mmu_slb_size;
- int rc = H_MULTI_THREADS_ACTIVE;
- int cpu;
-
- slb_set_size(SLB_MIN_SIZE);
- printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id());
-
- while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) &&
- !atomic_read(&data->error))
- rc = rtas_call(data->token, 0, 1, NULL);
-
- if (rc || atomic_read(&data->error)) {
- printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc);
- slb_set_size(slb_size);
- }
-
- if (atomic_read(&data->error))
- rc = atomic_read(&data->error);
-
- atomic_set(&data->error, rc);
- pSeries_coalesce_init();
-
- if (wake_when_done) {
- atomic_set(&data->done, 1);
-
- for_each_online_cpu(cpu)
- plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
- }
-
- if (atomic_dec_return(&data->working) == 0)
- complete(data->complete);
-
- return rc;
-}
-
-int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data)
-{
- atomic_inc(&data->working);
- return __rtas_suspend_last_cpu(data, 0);
-}
-
-static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
-{
- long rc = H_SUCCESS;
- unsigned long msr_save;
- int cpu;
-
- atomic_inc(&data->working);
-
- /* really need to ensure MSR.EE is off for H_JOIN */
- msr_save = mfmsr();
- mtmsr(msr_save & ~(MSR_EE));
-
- while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error))
- rc = plpar_hcall_norets(H_JOIN);
-
- mtmsr(msr_save);
-
- if (rc == H_SUCCESS) {
- /* This cpu was prodded and the suspend is complete. */
- goto out;
- } else if (rc == H_CONTINUE) {
- /* All other cpus are in H_JOIN, this cpu does
- * the suspend.
- */
- return __rtas_suspend_last_cpu(data, wake_when_done);
- } else {
- printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n",
- smp_processor_id(), rc);
- atomic_set(&data->error, rc);
- }
-
- if (wake_when_done) {
- atomic_set(&data->done, 1);
-
- /* This cpu did the suspend or got an error; in either case,
- * we need to prod all other other cpus out of join state.
- * Extra prods are harmless.
- */
- for_each_online_cpu(cpu)
- plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
- }
-out:
- if (atomic_dec_return(&data->working) == 0)
- complete(data->complete);
- return rc;
-}
-
-int rtas_suspend_cpu(struct rtas_suspend_me_data *data)
-{
- return __rtas_suspend_cpu(data, 0);
-}
-
-static void rtas_percpu_suspend_me(void *info)
-{
- __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
-}
-
-enum rtas_cpu_state {
- DOWN,
- UP,
-};
-
-#ifndef CONFIG_SMP
-static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
- cpumask_var_t cpus)
-{
- if (!cpumask_empty(cpus)) {
- cpumask_clear(cpus);
- return -EINVAL;
- } else
- return 0;
-}
-#else
-/* On return cpumask will be altered to indicate CPUs changed.
- * CPUs with states changed will be set in the mask,
- * CPUs with status unchanged will be unset in the mask. */
-static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
- cpumask_var_t cpus)
-{
- int cpu;
- int cpuret = 0;
- int ret = 0;
-
- if (cpumask_empty(cpus))
- return 0;
-
- for_each_cpu(cpu, cpus) {
- struct device *dev = get_cpu_device(cpu);
-
- switch (state) {
- case DOWN:
- cpuret = device_offline(dev);
- break;
- case UP:
- cpuret = device_online(dev);
- break;
- }
- if (cpuret < 0) {
- pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
- __func__,
- ((state == UP) ? "up" : "down"),
- cpu, cpuret);
- if (!ret)
- ret = cpuret;
- if (state == UP) {
- /* clear bits for unchanged cpus, return */
- cpumask_shift_right(cpus, cpus, cpu);
- cpumask_shift_left(cpus, cpus, cpu);
- break;
- } else {
- /* clear bit for unchanged cpu, continue */
- cpumask_clear_cpu(cpu, cpus);
- }
- }
- cond_resched();
- }
-
- return ret;
-}
-#endif
-
-int rtas_online_cpus_mask(cpumask_var_t cpus)
-{
- int ret;
-
- ret = rtas_cpu_state_change_mask(UP, cpus);
-
- if (ret) {
- cpumask_var_t tmp_mask;
-
- if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL))
- return ret;
-
- /* Use tmp_mask to preserve cpus mask from first failure */
- cpumask_copy(tmp_mask, cpus);
- rtas_offline_cpus_mask(tmp_mask);
- free_cpumask_var(tmp_mask);
- }
-
- return ret;
-}
-
-int rtas_offline_cpus_mask(cpumask_var_t cpus)
-{
- return rtas_cpu_state_change_mask(DOWN, cpus);
-}
-
-int rtas_ibm_suspend_me(u64 handle)
+/**
+ * rtas_activate_firmware() - Activate a new version of firmware.
+ *
+ * Context: This function may sleep.
+ *
+ * Activate a new version of partition firmware. The OS must call this
+ * after resuming from a partition hibernation or migration in order
+ * to maintain the ability to perform live firmware updates. It's not
+ * catastrophic for this method to be absent or to fail; just log the
+ * condition in that case.
+ */
+void rtas_activate_firmware(void)
{
- long state;
- long rc;
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
- struct rtas_suspend_me_data data;
- DECLARE_COMPLETION_ONSTACK(done);
- cpumask_var_t offline_mask;
- int cpuret;
-
- if (!rtas_service_present("ibm,suspend-me"))
- return -ENOSYS;
-
- /* Make sure the state is valid */
- rc = plpar_hcall(H_VASI_STATE, retbuf, handle);
-
- state = retbuf[0];
-
- if (rc) {
- printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc);
- return rc;
- } else if (state == H_VASI_ENABLED) {
- return -EAGAIN;
- } else if (state != H_VASI_SUSPENDING) {
- printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state %ld\n",
- state);
- return -EIO;
- }
-
- if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL))
- return -ENOMEM;
-
- atomic_set(&data.working, 0);
- atomic_set(&data.done, 0);
- atomic_set(&data.error, 0);
- data.token = rtas_token("ibm,suspend-me");
- data.complete = &done;
-
- lock_device_hotplug();
+ int token = rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE);
+ int fwrc;
- /* All present CPUs must be online */
- cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
- cpuret = rtas_online_cpus_mask(offline_mask);
- if (cpuret) {
- pr_err("%s: Could not bring present CPUs online.\n", __func__);
- atomic_set(&data.error, cpuret);
- goto out;
- }
-
- cpu_hotplug_disable();
-
- /* Check if we raced with a CPU-Offline Operation */
- if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) {
- pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__);
- atomic_set(&data.error, -EAGAIN);
- goto out_hotplug_enable;
+ if (token == RTAS_UNKNOWN_SERVICE) {
+ pr_notice("ibm,activate-firmware method unavailable\n");
+ return;
}
- /* Call function on all CPUs. One of us will make the
- * rtas call
- */
- on_each_cpu(rtas_percpu_suspend_me, &data, 0);
-
- wait_for_completion(&done);
-
- if (atomic_read(&data.error) != 0)
- printk(KERN_ERR "Error doing global join\n");
+ mutex_lock(&rtas_ibm_activate_firmware_lock);
-out_hotplug_enable:
- cpu_hotplug_enable();
+ do {
+ fwrc = rtas_call(token, 0, 1, NULL);
+ } while (rtas_busy_delay(fwrc));
- /* Take down CPUs not online prior to suspend */
- cpuret = rtas_offline_cpus_mask(offline_mask);
- if (cpuret)
- pr_warn("%s: Could not restore CPUs to offline state.\n",
- __func__);
+ mutex_unlock(&rtas_ibm_activate_firmware_lock);
-out:
- unlock_device_hotplug();
- free_cpumask_var(offline_mask);
- return atomic_read(&data.error);
-}
-#else /* CONFIG_PPC_PSERIES */
-int rtas_ibm_suspend_me(u64 handle)
-{
- return -ENOSYS;
+ if (fwrc)
+ pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
}
-#endif
/**
- * Find a specific pseries error log in an RTAS extended event log.
+ * get_pseries_errorlog() - Find a specific pseries error log in an RTAS
+ * extended event log.
* @log: RTAS error/event log
* @section_id: two character section identifier
*
- * Returns a pointer to the specified errorlog or NULL if not found.
+ * Return: A pointer to the specified errorlog or NULL if not found.
*/
-struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
- uint16_t section_id)
+noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
+ uint16_t section_id)
{
struct rtas_ext_event_log_v6 *ext_log =
(struct rtas_ext_event_log_v6 *)log->buffer;
@@ -1058,9 +1781,113 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
return NULL;
}
+/*
+ * The sys_rtas syscall, as originally designed, allows root to pass
+ * arbitrary physical addresses to RTAS calls. A number of RTAS calls
+ * can be abused to write to arbitrary memory and do other things that
+ * are potentially harmful to system integrity, and thus should only
+ * be used inside the kernel and not exposed to userspace.
+ *
+ * All known legitimate users of the sys_rtas syscall will only ever
+ * pass addresses that fall within the RMO buffer, and use a known
+ * subset of RTAS calls.
+ *
+ * Accordingly, we filter RTAS requests to check that the call is
+ * permitted, and that provided pointers fall within the RMO buffer.
+ * If a function is allowed to be invoked via the syscall, then its
+ * entry in the rtas_functions table points to a rtas_filter that
+ * describes its constraints, with the indexes of the parameters which
+ * are expected to contain addresses and sizes of buffers allocated
+ * inside the RMO buffer.
+ */
+
+static bool in_rmo_buf(u32 base, u32 end)
+{
+ return base >= rtas_rmo_buf &&
+ base < (rtas_rmo_buf + RTAS_USER_REGION_SIZE) &&
+ base <= end &&
+ end >= rtas_rmo_buf &&
+ end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE);
+}
+
+static bool block_rtas_call(const struct rtas_function *func, int nargs,
+ struct rtas_args *args)
+{
+ const struct rtas_filter *f;
+ const bool is_platform_dump =
+ func == &rtas_function_table[RTAS_FNIDX__IBM_PLATFORM_DUMP];
+ const bool is_config_conn =
+ func == &rtas_function_table[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR];
+ u32 base, size, end;
+
+ /*
+ * Only functions with filters attached are allowed.
+ */
+ f = func->filter;
+ if (!f)
+ goto err;
+ /*
+ * And some functions aren't allowed on LE.
+ */
+ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) && func->banned_for_syscall_on_le)
+ goto err;
+
+ if (f->buf_idx1 != -1) {
+ base = be32_to_cpu(args->args[f->buf_idx1]);
+ if (f->size_idx1 != -1)
+ size = be32_to_cpu(args->args[f->size_idx1]);
+ else if (f->fixed_size)
+ size = f->fixed_size;
+ else
+ size = 1;
+
+ end = base + size - 1;
+
+ /*
+ * Special case for ibm,platform-dump - NULL buffer
+ * address is used to indicate end of dump processing
+ */
+ if (is_platform_dump && base == 0)
+ return false;
+
+ if (!in_rmo_buf(base, end))
+ goto err;
+ }
+
+ if (f->buf_idx2 != -1) {
+ base = be32_to_cpu(args->args[f->buf_idx2]);
+ if (f->size_idx2 != -1)
+ size = be32_to_cpu(args->args[f->size_idx2]);
+ else if (f->fixed_size)
+ size = f->fixed_size;
+ else
+ size = 1;
+ end = base + size - 1;
+
+ /*
+ * Special case for ibm,configure-connector where the
+ * address can be 0
+ */
+ if (is_config_conn && base == 0)
+ return false;
+
+ if (!in_rmo_buf(base, end))
+ goto err;
+ }
+
+ return false;
+err:
+ pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n");
+ pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n",
+ func->name, nargs, current->comm);
+ return true;
+}
+
/* We assume to be passed big endian arguments */
SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
{
+ const struct rtas_function *func;
+ struct pin_cookie cookie;
struct rtas_args args;
unsigned long flags;
char *buff_copy, *errbuf = NULL;
@@ -1089,14 +1916,30 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
nargs * sizeof(rtas_arg_t)) != 0)
return -EFAULT;
- if (token == RTAS_UNKNOWN_SERVICE)
+ /*
+ * If this token doesn't correspond to a function the kernel
+ * understands, you're not allowed to call it.
+ */
+ func = rtas_token_to_function_untrusted(token);
+ if (!func)
return -EINVAL;
args.rets = &args.args[nargs];
memset(args.rets, 0, nret * sizeof(rtas_arg_t));
+ if (block_rtas_call(func, nargs, &args))
+ return -EINVAL;
+
+ if (token_is_restricted_errinjct(token)) {
+ int err;
+
+ err = security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION);
+ if (err)
+ return err;
+ }
+
/* Need to handle ibm,suspend_me call specially */
- if (token == ibm_suspend_me_token) {
+ if (token == rtas_function_token(RTAS_FN_IBM_SUSPEND_ME)) {
/*
* rtas_ibm_suspend_me assumes the streamid handle is in cpu
@@ -1105,7 +1948,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
int rc = 0;
u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32)
| be32_to_cpu(args.args[1]);
- rc = rtas_ibm_suspend_me(handle);
+ rc = rtas_syscall_dispatch_ibm_suspend_me(handle);
if (rc == -EAGAIN)
args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE);
else if (rc == -EIO)
@@ -1117,18 +1960,32 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
buff_copy = get_errorlog_buffer();
- flags = lock_rtas();
+ /*
+ * If this function has a mutex assigned to it, we must
+ * acquire it to avoid interleaving with any kernel-based uses
+ * of the same function. Kernel-based sequences acquire the
+ * appropriate mutex explicitly.
+ */
+ if (func->lock)
+ mutex_lock(func->lock);
+
+ raw_spin_lock_irqsave(&rtas_lock, flags);
+ cookie = lockdep_pin_lock(&rtas_lock);
- rtas.args = args;
- enter_rtas(__pa(&rtas.args));
- args = rtas.args;
+ rtas_args = args;
+ do_enter_rtas(&rtas_args);
+ args = rtas_args;
/* A -1 return code indicates that the last command couldn't
be completed due to a hardware error. */
if (be32_to_cpu(args.rets[0]) == -1)
errbuf = __fetch_rtas_last_error(buff_copy);
- unlock_rtas(flags);
+ lockdep_unpin_lock(&rtas_lock, cookie);
+ raw_spin_unlock_irqrestore(&rtas_lock, flags);
+
+ if (func->lock)
+ mutex_unlock(func->lock);
if (buff_copy) {
if (errbuf)
@@ -1146,6 +2003,54 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
return 0;
}
+static void __init rtas_function_table_init(void)
+{
+ struct property *prop;
+
+ for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) {
+ struct rtas_function *curr = &rtas_function_table[i];
+ struct rtas_function *prior;
+ int cmp;
+
+ curr->token = RTAS_UNKNOWN_SERVICE;
+
+ if (i == 0)
+ continue;
+ /*
+ * Ensure table is sorted correctly for binary search
+ * on function names.
+ */
+ prior = &rtas_function_table[i - 1];
+
+ cmp = strcmp(prior->name, curr->name);
+ if (cmp < 0)
+ continue;
+
+ if (cmp == 0) {
+ pr_err("'%s' has duplicate function table entries\n",
+ curr->name);
+ } else {
+ pr_err("function table unsorted: '%s' wrongly precedes '%s'\n",
+ prior->name, curr->name);
+ }
+ }
+
+ for_each_property_of_node(rtas.dev, prop) {
+ struct rtas_function *func;
+
+ if (prop->length != sizeof(u32))
+ continue;
+
+ func = __rtas_name_to_function(prop->name);
+ if (!func)
+ continue;
+
+ func->token = be32_to_cpup((__be32 *)prop->value);
+
+ pr_debug("function %s has token %u\n", func->name, func->token);
+ }
+}
+
/*
* Call early during boot, before mem init, to retrieve the RTAS
* information from the device-tree and allocate the RMO buffer for userland
@@ -1177,24 +2082,31 @@ void __init rtas_initialize(void)
no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry);
rtas.entry = no_entry ? rtas.base : entry;
+ init_error_log_max();
+
+ /* Must be called before any function token lookups */
+ rtas_function_table_init();
+
+ /*
+ * Discover this now to avoid a device tree lookup in the
+ * panic path.
+ */
+ ibm_extended_os_term = of_property_read_bool(rtas.dev, "ibm,extended-os-term");
+
/* If RTAS was found, allocate the RMO buffer for it and look for
* the stop-self token if any
*/
#ifdef CONFIG_PPC64
- if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ if (firmware_has_feature(FW_FEATURE_LPAR))
rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX);
- ibm_suspend_me_token = rtas_token("ibm,suspend-me");
- }
#endif
- rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE,
+ rtas_rmo_buf = memblock_phys_alloc_range(RTAS_USER_REGION_SIZE, PAGE_SIZE,
0, rtas_region);
if (!rtas_rmo_buf)
panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
PAGE_SIZE, &rtas_region);
-#ifdef CONFIG_RTAS_ERROR_LOGGING
- rtas_last_error_token = rtas_token("rtas-last-error");
-#endif
+ rtas_work_area_reserve_arena(rtas_region);
}
int __init early_init_dt_scan_rtas(unsigned long node,
@@ -1209,6 +2121,12 @@ int __init early_init_dt_scan_rtas(unsigned long node,
entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
sizep = of_get_flat_dt_prop(node, "rtas-size", NULL);
+#ifdef CONFIG_PPC64
+ /* need this feature to decide the crashkernel offset */
+ if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL))
+ powerpc_firmware_features |= FW_FEATURE_LPAR;
+#endif
+
if (basep && entryp && sizep) {
rtas.base = *basep;
rtas.entry = *entryp;
@@ -1234,23 +2152,22 @@ int __init early_init_dt_scan_rtas(unsigned long node,
return 1;
}
-static arch_spinlock_t timebase_lock;
+static DEFINE_RAW_SPINLOCK(timebase_lock);
static u64 timebase = 0;
void rtas_give_timebase(void)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_spin_lock_irqsave(&timebase_lock, flags);
hard_irq_disable();
- arch_spin_lock(&timebase_lock);
- rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
+ rtas_call(rtas_function_token(RTAS_FN_FREEZE_TIME_BASE), 0, 1, NULL);
timebase = get_tb();
- arch_spin_unlock(&timebase_lock);
+ raw_spin_unlock(&timebase_lock);
while (timebase)
barrier();
- rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
+ rtas_call(rtas_function_token(RTAS_FN_THAW_TIME_BASE), 0, 1, NULL);
local_irq_restore(flags);
}
@@ -1258,8 +2175,8 @@ void rtas_take_timebase(void)
{
while (!timebase)
barrier();
- arch_spin_lock(&timebase_lock);
+ raw_spin_lock(&timebase_lock);
set_tb(timebase >> 32, timebase & 0xffffffff);
timebase = 0;
- arch_spin_unlock(&timebase_lock);
+ raw_spin_unlock(&timebase_lock);
}
diff --git a/arch/powerpc/kernel/rtas_entry.S b/arch/powerpc/kernel/rtas_entry.S
new file mode 100644
index 000000000000..6ce95ddadbcd
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_entry.S
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <asm/asm-offsets.h>
+#include <asm/bug.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * RTAS is called with MSR IR, DR, EE disabled, and LR in the return address.
+ *
+ * Note: r3 is an input parameter to rtas, so don't trash it...
+ */
+
+#ifdef CONFIG_PPC32
+_GLOBAL(enter_rtas)
+ stwu r1,-INT_FRAME_SIZE(r1)
+ mflr r0
+ stw r0,INT_FRAME_SIZE+4(r1)
+ LOAD_REG_ADDR(r4, rtas)
+ lis r6,1f@ha /* physical return address for rtas */
+ addi r6,r6,1f@l
+ tophys(r6,r6)
+ lwz r8,RTASENTRY(r4)
+ lwz r4,RTASBASE(r4)
+ mfmsr r9
+ stw r9,8(r1)
+ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+ mtlr r6
+ stw r1, THREAD + RTAS_SP(r2)
+ mtspr SPRN_SRR0,r8
+ mtspr SPRN_SRR1,r9
+ rfi
+1:
+ lis r8, 1f@h
+ ori r8, r8, 1f@l
+ LOAD_REG_IMMEDIATE(r9,MSR_KERNEL)
+ mtspr SPRN_SRR0,r8
+ mtspr SPRN_SRR1,r9
+ rfi /* Reactivate MMU translation */
+1:
+ lwz r8,INT_FRAME_SIZE+4(r1) /* get return address */
+ lwz r9,8(r1) /* original msr value */
+ addi r1,r1,INT_FRAME_SIZE
+ li r0,0
+ stw r0, THREAD + RTAS_SP(r2)
+ mtlr r8
+ mtmsr r9
+ blr /* return to caller */
+_ASM_NOKPROBE_SYMBOL(enter_rtas)
+
+#else /* CONFIG_PPC32 */
+#include <asm/exception-64s.h>
+
+/*
+ * 32-bit rtas on 64-bit machines has the additional problem that RTAS may
+ * not preserve the upper parts of registers it uses.
+ */
+_GLOBAL(enter_rtas)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
+
+ /* Because RTAS is running in 32b mode, it clobbers the high order half
+ * of all registers that it saves. We therefore save those registers
+ * RTAS might touch to the stack. (r0, r3-r12 are caller saved)
+ */
+ SAVE_GPR(2, r1) /* Save the TOC */
+ SAVE_NVGPRS(r1) /* Save the non-volatiles */
+
+ mfcr r4
+ std r4,_CCR(r1)
+ mfctr r5
+ std r5,_CTR(r1)
+ mfspr r6,SPRN_XER
+ std r6,_XER(r1)
+ mfdar r7
+ std r7,_DAR(r1)
+ mfdsisr r8
+ std r8,_DSISR(r1)
+
+ /* Temporary workaround to clear CR until RTAS can be modified to
+ * ignore all bits.
+ */
+ li r0,0
+ mtcr r0
+
+ mfmsr r6
+
+ /* Unfortunately, the stack pointer and the MSR are also clobbered,
+ * so they are saved in the PACA which allows us to restore
+ * our original state after RTAS returns.
+ */
+ std r1,PACAR1(r13)
+ std r6,PACASAVEDMSR(r13)
+
+ /* Setup our real return addr */
+ LOAD_REG_ADDR(r4,rtas_return_loc)
+ clrldi r4,r4,2 /* convert to realmode address */
+ mtlr r4
+
+__enter_rtas:
+ LOAD_REG_ADDR(r4, rtas)
+ ld r5,RTASENTRY(r4) /* get the rtas->entry value */
+ ld r4,RTASBASE(r4) /* get the rtas->base value */
+
+ /*
+ * RTAS runs in 32-bit big endian real mode, but leave MSR[RI] on as we
+ * may hit NMI (SRESET or MCE) while in RTAS. RTAS should disable RI in
+ * its critical regions (as specified in PAPR+ section 7.2.1). MSR[S]
+ * is not impacted by RFI_TO_KERNEL (only urfid can unset it). So if
+ * MSR[S] is set, it will remain when entering RTAS.
+ * If we're in HV mode, RTAS must also run in HV mode, so extract MSR_HV
+ * from the saved MSR value and insert into the value RTAS will use.
+ */
+ extrdi r0, r6, 1, 63 - MSR_HV_LG
+ LOAD_REG_IMMEDIATE(r6, MSR_ME | MSR_RI)
+ insrdi r6, r0, 1, 63 - MSR_HV_LG
+
+ li r0,0
+ mtmsrd r0,1 /* disable RI before using SRR0/1 */
+
+ mtspr SPRN_SRR0,r5
+ mtspr SPRN_SRR1,r6
+ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+rtas_return_loc:
+ FIXUP_ENDIAN
+
+ /* Set SF before anything. */
+ LOAD_REG_IMMEDIATE(r6, MSR_KERNEL & ~(MSR_IR|MSR_DR))
+ mtmsrd r6
+
+ /* relocation is off at this point */
+ GET_PACA(r13)
+
+ bcl 20,31,$+4
+0: mflr r3
+ ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */
+
+ ld r1,PACAR1(r13) /* Restore our SP */
+ ld r4,PACASAVEDMSR(r13) /* Restore our MSR */
+
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+_ASM_NOKPROBE_SYMBOL(enter_rtas)
+_ASM_NOKPROBE_SYMBOL(__enter_rtas)
+_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
+
+ .align 3
+1: .8byte rtas_restore_regs
+
+rtas_restore_regs:
+ /* relocation is on at this point */
+ REST_GPR(2, r1) /* Restore the TOC */
+ REST_NVGPRS(r1) /* Restore the non-volatiles */
+
+ ld r4,_CCR(r1)
+ mtcr r4
+ ld r5,_CTR(r1)
+ mtctr r5
+ ld r6,_XER(r1)
+ mtspr SPRN_XER,r6
+ ld r7,_DAR(r1)
+ mtdar r7
+ ld r8,_DSISR(r1)
+ mtdsisr r8
+
+ addi r1,r1,SWITCH_FRAME_SIZE /* Unstack our frame */
+ ld r0,16(r1) /* get return address */
+
+ mtlr r0
+ blr /* return to caller */
+
+#endif /* CONFIG_PPC32 */
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 84f794782c62..359577ec1680 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -120,7 +120,7 @@ static struct kmem_cache *flash_block_cache = NULL;
/*
* Local copy of the flash block list.
*
- * The rtas_firmware_flash_list varable will be
+ * The rtas_firmware_flash_list variable will be
* set once the data is fully read.
*
* For convenience as we build the list we use virtual addrs,
@@ -376,7 +376,7 @@ static void manage_flash(struct rtas_manage_flash_t *args_buf, unsigned int op)
s32 rc;
do {
- rc = rtas_call(rtas_token("ibm,manage-flash-image"), 1, 1,
+ rc = rtas_call(rtas_function_token(RTAS_FN_IBM_MANAGE_FLASH_IMAGE), 1, 1,
NULL, op);
} while (rtas_busy_delay(rc));
@@ -444,7 +444,7 @@ error:
*/
static void validate_flash(struct rtas_validate_flash_t *args_buf)
{
- int token = rtas_token("ibm,validate-flash-image");
+ int token = rtas_function_token(RTAS_FN_IBM_VALIDATE_FLASH_IMAGE);
int update_results;
s32 rc;
@@ -570,7 +570,7 @@ static void rtas_flash_firmware(int reboot_type)
return;
}
- update_token = rtas_token("ibm,update-flash-64-and-reboot");
+ update_token = rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT);
if (update_token == RTAS_UNKNOWN_SERVICE) {
printk(KERN_ALERT "FLASH: ibm,update-flash-64-and-reboot "
"is not available -- not a service partition?\n");
@@ -653,46 +653,46 @@ static void rtas_flash_firmware(int reboot_type)
*/
struct rtas_flash_file {
const char *filename;
- const char *rtas_call_name;
+ const rtas_fn_handle_t handle;
int *status;
- const struct file_operations fops;
+ const struct proc_ops ops;
};
static const struct rtas_flash_file rtas_flash_files[] = {
{
.filename = "powerpc/rtas/" FIRMWARE_FLASH_NAME,
- .rtas_call_name = "ibm,update-flash-64-and-reboot",
+ .handle = RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT,
.status = &rtas_update_flash_data.status,
- .fops.read = rtas_flash_read_msg,
- .fops.write = rtas_flash_write,
- .fops.release = rtas_flash_release,
- .fops.llseek = default_llseek,
+ .ops.proc_read = rtas_flash_read_msg,
+ .ops.proc_write = rtas_flash_write,
+ .ops.proc_release = rtas_flash_release,
+ .ops.proc_lseek = default_llseek,
},
{
.filename = "powerpc/rtas/" FIRMWARE_UPDATE_NAME,
- .rtas_call_name = "ibm,update-flash-64-and-reboot",
+ .handle = RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT,
.status = &rtas_update_flash_data.status,
- .fops.read = rtas_flash_read_num,
- .fops.write = rtas_flash_write,
- .fops.release = rtas_flash_release,
- .fops.llseek = default_llseek,
+ .ops.proc_read = rtas_flash_read_num,
+ .ops.proc_write = rtas_flash_write,
+ .ops.proc_release = rtas_flash_release,
+ .ops.proc_lseek = default_llseek,
},
{
.filename = "powerpc/rtas/" VALIDATE_FLASH_NAME,
- .rtas_call_name = "ibm,validate-flash-image",
+ .handle = RTAS_FN_IBM_VALIDATE_FLASH_IMAGE,
.status = &rtas_validate_flash_data.status,
- .fops.read = validate_flash_read,
- .fops.write = validate_flash_write,
- .fops.release = validate_flash_release,
- .fops.llseek = default_llseek,
+ .ops.proc_read = validate_flash_read,
+ .ops.proc_write = validate_flash_write,
+ .ops.proc_release = validate_flash_release,
+ .ops.proc_lseek = default_llseek,
},
{
.filename = "powerpc/rtas/" MANAGE_FLASH_NAME,
- .rtas_call_name = "ibm,manage-flash-image",
+ .handle = RTAS_FN_IBM_MANAGE_FLASH_IMAGE,
.status = &rtas_manage_flash_data.status,
- .fops.read = manage_flash_read,
- .fops.write = manage_flash_write,
- .fops.llseek = default_llseek,
+ .ops.proc_read = manage_flash_read,
+ .ops.proc_write = manage_flash_write,
+ .ops.proc_lseek = default_llseek,
}
};
@@ -700,8 +700,7 @@ static int __init rtas_flash_init(void)
{
int i;
- if (rtas_token("ibm,update-flash-64-and-reboot") ==
- RTAS_UNKNOWN_SERVICE) {
+ if (rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT) == RTAS_UNKNOWN_SERVICE) {
pr_info("rtas_flash: no firmware flash support\n");
return -EINVAL;
}
@@ -710,9 +709,9 @@ static int __init rtas_flash_init(void)
if (!rtas_validate_flash_data.buf)
return -ENOMEM;
- flash_block_cache = kmem_cache_create("rtas_flash_cache",
- RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0,
- NULL);
+ flash_block_cache = kmem_cache_create_usercopy("rtas_flash_cache",
+ RTAS_BLK_SIZE, RTAS_BLK_SIZE,
+ 0, 0, RTAS_BLK_SIZE, NULL);
if (!flash_block_cache) {
printk(KERN_ERR "%s: failed to create block cache\n",
__func__);
@@ -723,14 +722,14 @@ static int __init rtas_flash_init(void)
const struct rtas_flash_file *f = &rtas_flash_files[i];
int token;
- if (!proc_create(f->filename, 0600, NULL, &f->fops))
+ if (!proc_create(f->filename, 0600, NULL, &f->ops))
goto enomem;
/*
* This code assumes that the status int is the first member of the
* struct
*/
- token = rtas_token(f->rtas_call_name);
+ token = rtas_function_token(f->handle);
if (token == RTAS_UNKNOWN_SERVICE)
*f->status = FLASH_AUTH;
else
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index ae5e43eaca48..fccf96e897f6 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -13,11 +13,12 @@
#include <linux/pci.h>
#include <linux/string.h>
#include <linux/init.h>
+#include <linux/pgtable.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
#include <asm/io.h>
-#include <asm/pgtable.h>
#include <asm/irq.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <asm/iommu.h>
@@ -42,7 +43,7 @@ static inline int config_access_valid(struct pci_dn *dn, int where)
return 0;
}
-int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
{
int returnval = -1;
unsigned long buid, addr;
@@ -86,7 +87,7 @@ static int rtas_pci_read_config(struct pci_bus *bus,
pdn = pci_get_pdn_by_devfn(bus, devfn);
/* Validity of pdn is checked in here */
- ret = rtas_read_config(pdn, where, size, val);
+ ret = rtas_pci_dn_read_config(pdn, where, size, val);
if (*val == EEH_IO_ERROR_VALUE(size) &&
eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
return PCIBIOS_DEVICE_NOT_FOUND;
@@ -94,7 +95,7 @@ static int rtas_pci_read_config(struct pci_bus *bus,
return ret;
}
-int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val)
{
unsigned long buid, addr;
int ret;
@@ -133,7 +134,7 @@ static int rtas_pci_write_config(struct pci_bus *bus,
pdn = pci_get_pdn_by_devfn(bus, devfn);
/* Validity of pdn is checked in here. */
- return rtas_write_config(pdn, where, size, val);
+ return rtas_pci_dn_write_config(pdn, where, size, val);
}
static struct pci_ops rtas_pci_ops = {
@@ -190,10 +191,10 @@ static void python_countermeasures(struct device_node *dev)
void __init init_pci_config_tokens(void)
{
- read_pci_config = rtas_token("read-pci-config");
- write_pci_config = rtas_token("write-pci-config");
- ibm_read_pci_config = rtas_token("ibm,read-pci-config");
- ibm_write_pci_config = rtas_token("ibm,write-pci-config");
+ read_pci_config = rtas_function_token(RTAS_FN_READ_PCI_CONFIG);
+ write_pci_config = rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG);
+ ibm_read_pci_config = rtas_function_token(RTAS_FN_IBM_READ_PCI_CONFIG);
+ ibm_write_pci_config = rtas_function_token(RTAS_FN_IBM_WRITE_PCI_CONFIG);
}
unsigned long get_phb_buid(struct device_node *phb)
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 8d02e047f96a..9bba469239fc 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -9,6 +9,7 @@
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
+#include <linux/of.h>
#include <linux/poll.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
@@ -22,7 +23,6 @@
#include <linux/uaccess.h>
#include <asm/io.h>
#include <asm/rtas.h>
-#include <asm/prom.h>
#include <asm/nvram.h>
#include <linux/atomic.h>
#include <asm/machdep.h>
@@ -273,37 +273,15 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
}
}
-#ifdef CONFIG_PPC_PSERIES
-static void handle_prrn_event(s32 scope)
-{
- /*
- * For PRRN, we must pass the negative of the scope value in
- * the RTAS event.
- */
- pseries_devicetree_update(-scope);
- numa_update_cpu_topology(false);
-}
-
static void handle_rtas_event(const struct rtas_error_log *log)
{
- if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled())
+ if (!machine_is(pseries))
return;
- /* For PRRN Events the extended log length is used to denote
- * the scope for calling rtas update-nodes.
- */
- handle_prrn_event(rtas_error_extended_log_length(log));
-}
-
-#else
-
-static void handle_rtas_event(const struct rtas_error_log *log)
-{
- return;
+ if (rtas_error_type(log) == RTAS_TYPE_PRRN)
+ pr_info_ratelimited("Platform resource reassignment ignored.\n");
}
-#endif
-
static int rtas_log_open(struct inode * inode, struct file * file)
{
return 0;
@@ -385,12 +363,12 @@ static __poll_t rtas_log_poll(struct file *file, poll_table * wait)
return 0;
}
-static const struct file_operations proc_rtas_log_operations = {
- .read = rtas_log_read,
- .poll = rtas_log_poll,
- .open = rtas_log_open,
- .release = rtas_log_release,
- .llseek = noop_llseek,
+static const struct proc_ops rtas_log_proc_ops = {
+ .proc_read = rtas_log_read,
+ .proc_poll = rtas_log_poll,
+ .proc_open = rtas_log_open,
+ .proc_release = rtas_log_release,
+ .proc_lseek = noop_llseek,
};
static int enable_surveillance(int timeout)
@@ -451,7 +429,7 @@ static void rtas_event_scan(struct work_struct *w)
do_event_scan();
- get_online_cpus();
+ cpus_read_lock();
/* raw_ OK because just using CPU as starting point. */
cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
@@ -473,11 +451,11 @@ static void rtas_event_scan(struct work_struct *w)
schedule_delayed_work_on(cpu, &event_scan_work,
__round_jiffies_relative(event_scan_delay, cpu));
- put_online_cpus();
+ cpus_read_unlock();
}
#ifdef CONFIG_PPC64
-static void retrieve_nvram_error_log(void)
+static void __init retrieve_nvram_error_log(void)
{
unsigned int err_type ;
int rc ;
@@ -495,12 +473,12 @@ static void retrieve_nvram_error_log(void)
}
}
#else /* CONFIG_PPC64 */
-static void retrieve_nvram_error_log(void)
+static void __init retrieve_nvram_error_log(void)
{
}
#endif /* CONFIG_PPC64 */
-static void start_event_scan(void)
+static void __init start_event_scan(void)
{
printk(KERN_DEBUG "RTAS daemon started\n");
pr_debug("rtasd: will sleep for %d milliseconds\n",
@@ -522,18 +500,20 @@ EXPORT_SYMBOL_GPL(rtas_cancel_event_scan);
static int __init rtas_event_scan_init(void)
{
+ int err;
+
if (!machine_is(pseries) && !machine_is(chrp))
return 0;
/* No RTAS */
- event_scan = rtas_token("event-scan");
+ event_scan = rtas_function_token(RTAS_FN_EVENT_SCAN);
if (event_scan == RTAS_UNKNOWN_SERVICE) {
printk(KERN_INFO "rtasd: No event-scan on system\n");
return -ENODEV;
}
- rtas_event_scan_rate = rtas_token("rtas-event-scan-rate");
- if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) {
+ err = of_property_read_u32(rtas.dev, "rtas-event-scan-rate", &rtas_event_scan_rate);
+ if (err) {
printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n");
return -ENODEV;
}
@@ -572,7 +552,7 @@ static int __init rtas_init(void)
return -ENODEV;
entry = proc_create("powerpc/rtas/error_log", 0400, NULL,
- &proc_rtas_log_operations);
+ &rtas_log_proc_ops);
if (!entry)
printk(KERN_ERR "Failed to create error_log proc entry\n");
diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c
index 4b982324d368..f9af305d9579 100644
--- a/arch/powerpc/kernel/secure_boot.c
+++ b/arch/powerpc/kernel/secure_boot.c
@@ -23,12 +23,19 @@ bool is_ppc_secureboot_enabled(void)
{
struct device_node *node;
bool enabled = false;
+ u32 secureboot;
node = get_ppc_fw_sb_node();
enabled = of_property_read_bool(node, "os-secureboot-enforcing");
-
of_node_put(node);
+ if (enabled)
+ goto out;
+
+ if (!of_property_read_u32(of_root, "ibm,secure-boot", &secureboot))
+ enabled = (secureboot > 1);
+
+out:
pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled");
return enabled;
@@ -38,12 +45,19 @@ bool is_ppc_trustedboot_enabled(void)
{
struct device_node *node;
bool enabled = false;
+ u32 trustedboot;
node = get_ppc_fw_sb_node();
enabled = of_property_read_bool(node, "trusted-enabled");
-
of_node_put(node);
+ if (enabled)
+ goto out;
+
+ if (!of_property_read_u32(of_root, "ibm,trusted-boot", &trustedboot))
+ enabled = (trustedboot > 0);
+
+out:
pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled");
return enabled;
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index bd70f5be1c27..4856e1a5161c 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -7,29 +7,35 @@
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/device.h>
+#include <linux/memblock.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
#include <linux/seq_buf.h>
+#include <linux/debugfs.h>
#include <asm/asm-prototypes.h>
#include <asm/code-patching.h>
-#include <asm/debugfs.h>
#include <asm/security_features.h>
+#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/inst.h>
+#include "setup.h"
u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
-enum count_cache_flush_type {
- COUNT_CACHE_FLUSH_NONE = 0x1,
- COUNT_CACHE_FLUSH_SW = 0x2,
- COUNT_CACHE_FLUSH_HW = 0x4,
+enum branch_cache_flush_type {
+ BRANCH_CACHE_FLUSH_NONE = 0x1,
+ BRANCH_CACHE_FLUSH_SW = 0x2,
+ BRANCH_CACHE_FLUSH_HW = 0x4,
};
-static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
-static bool link_stack_flush_enabled;
+static enum branch_cache_flush_type count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE;
+static enum branch_cache_flush_type link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE;
bool barrier_nospec_enabled;
static bool no_nospec;
static bool btb_flush_enabled;
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
+#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64)
static bool no_spectrev2;
#endif
@@ -39,7 +45,7 @@ static void enable_barrier_nospec(bool enable)
do_barrier_nospec_fixups(enable);
}
-void setup_barrier_nospec(void)
+void __init setup_barrier_nospec(void)
{
bool enable;
@@ -101,7 +107,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_barrier_nospec, barrier_nospec_get,
static __init int barrier_nospec_debugfs_init(void)
{
debugfs_create_file_unsafe("barrier_nospec", 0600,
- powerpc_debugfs_root, NULL,
+ arch_debugfs_dir, NULL,
&fops_barrier_nospec);
return 0;
}
@@ -109,14 +115,14 @@ device_initcall(barrier_nospec_debugfs_init);
static __init int security_feature_debugfs_init(void)
{
- debugfs_create_x64("security_features", 0400, powerpc_debugfs_root,
+ debugfs_create_x64("security_features", 0400, arch_debugfs_dir,
&powerpc_security_features);
return 0;
}
device_initcall(security_feature_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
+#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64)
static int __init handle_nospectre_v2(char *p)
{
no_spectrev2 = true;
@@ -124,17 +130,17 @@ static int __init handle_nospectre_v2(char *p)
return 0;
}
early_param("nospectre_v2", handle_nospectre_v2);
-#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_E500 || CONFIG_PPC_BOOK3S_64 */
-#ifdef CONFIG_PPC_FSL_BOOK3E
-void setup_spectre_v2(void)
+#ifdef CONFIG_PPC_E500
+void __init setup_spectre_v2(void)
{
if (no_spectrev2 || cpu_mitigations_off())
do_btb_flush_fixups();
else
btb_flush_enabled = true;
}
-#endif /* CONFIG_PPC_FSL_BOOK3E */
+#endif /* CONFIG_PPC_E500 */
#ifdef CONFIG_PPC_BOOK3S_64
ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
@@ -216,24 +222,25 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
if (ccd)
seq_buf_printf(&s, "Indirect branch cache disabled");
- if (link_stack_flush_enabled)
- seq_buf_printf(&s, ", Software link stack flush");
-
- } else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
+ } else if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) {
seq_buf_printf(&s, "Mitigation: Software count cache flush");
- if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW)
+ if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW)
seq_buf_printf(&s, " (hardware accelerated)");
- if (link_stack_flush_enabled)
- seq_buf_printf(&s, ", Software link stack flush");
-
} else if (btb_flush_enabled) {
seq_buf_printf(&s, "Mitigation: Branch predictor state flush");
} else {
seq_buf_printf(&s, "Vulnerable");
}
+ if (bcs || ccd || count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) {
+ if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE)
+ seq_buf_printf(&s, ", Software link stack flush");
+ if (link_stack_flush_type == BRANCH_CACHE_FLUSH_HW)
+ seq_buf_printf(&s, " (hardware accelerated)");
+ }
+
seq_buf_printf(&s, "\n");
return s.len;
@@ -246,7 +253,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
static enum stf_barrier_type stf_enabled_flush_types;
static bool no_stf_barrier;
-bool stf_barrier;
+static bool stf_barrier;
static int __init handle_no_stf_barrier(char *p)
{
@@ -257,6 +264,11 @@ static int __init handle_no_stf_barrier(char *p)
early_param("no_stf_barrier", handle_no_stf_barrier);
+enum stf_barrier_type stf_barrier_type_get(void)
+{
+ return stf_enabled_flush_types;
+}
+
/* This is the generic flag used by other architectures */
static int __init handle_ssbd(char *p)
{
@@ -294,9 +306,7 @@ static void stf_barrier_enable(bool enable)
void setup_stf_barrier(void)
{
enum stf_barrier_type type;
- bool enable, hv;
-
- hv = cpu_has_feature(CPU_FTR_HVMODE);
+ bool enable;
/* Default to fallback in case fw-features are not available */
if (cpu_has_feature(CPU_FTR_ARCH_300))
@@ -309,8 +319,7 @@ void setup_stf_barrier(void)
type = STF_BARRIER_NONE;
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
- (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) ||
- (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv));
+ security_ftr_enabled(SEC_FTR_STF_BARRIER);
if (type == STF_BARRIER_FALLBACK) {
pr_info("stf-barrier: fallback barrier available\n");
@@ -353,6 +362,41 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *
return sprintf(buf, "Vulnerable\n");
}
+static int ssb_prctl_get(struct task_struct *task)
+{
+ /*
+ * The STF_BARRIER feature is on by default, so if it's off that means
+ * firmware has explicitly said the CPU is not vulnerable via either
+ * the hypercall or device tree.
+ */
+ if (!security_ftr_enabled(SEC_FTR_STF_BARRIER))
+ return PR_SPEC_NOT_AFFECTED;
+
+ /*
+ * If the system's CPU has no known barrier (see setup_stf_barrier())
+ * then assume that the CPU is not vulnerable.
+ */
+ if (stf_enabled_flush_types == STF_BARRIER_NONE)
+ return PR_SPEC_NOT_AFFECTED;
+
+ /*
+ * Otherwise the CPU is vulnerable. The barrier is not a global or
+ * per-process mitigation, so the only value that can be reported here
+ * is PR_SPEC_ENABLE, which appears as "vulnerable" in /proc.
+ */
+ return PR_SPEC_ENABLE;
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_get(task);
+ default:
+ return -ENODEV;
+ }
+}
+
#ifdef CONFIG_DEBUG_FS
static int stf_barrier_set(void *data, u64 val)
{
@@ -383,65 +427,103 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set,
static __init int stf_barrier_debugfs_init(void)
{
- debugfs_create_file_unsafe("stf_barrier", 0600, powerpc_debugfs_root,
+ debugfs_create_file_unsafe("stf_barrier", 0600, arch_debugfs_dir,
NULL, &fops_stf_barrier);
return 0;
}
device_initcall(stf_barrier_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
-static void no_count_cache_flush(void)
+static void update_branch_cache_flush(void)
{
- count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
- pr_info("count-cache-flush: software flush disabled.\n");
-}
+ u32 *site, __maybe_unused *site2;
-static void toggle_count_cache_flush(bool enable)
-{
- if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) &&
- !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK))
- enable = false;
-
- if (!enable) {
- patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP);
-#endif
- pr_info("link-stack-flush: software flush disabled.\n");
- link_stack_flush_enabled = false;
- no_count_cache_flush();
- return;
+ site = &patch__call_kvm_flush_link_stack;
+ site2 = &patch__call_kvm_flush_link_stack_p9;
+ // This controls the branch from guest_exit_cont to kvm_flush_link_stack
+ if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
+ patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+ patch_instruction_site(site2, ppc_inst(PPC_RAW_NOP()));
+ } else {
+ // Could use HW flush, but that could also flush count cache
+ patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+ patch_branch_site(site2, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
}
+#endif
- // This enables the branch from _switch to flush_count_cache
- patch_branch_site(&patch__call_flush_count_cache,
- (u64)&flush_count_cache, BRANCH_SET_LINK);
+ // Patch out the bcctr first, then nop the rest
+ site = &patch__call_flush_branch_caches3;
+ patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+ site = &patch__call_flush_branch_caches2;
+ patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+ site = &patch__call_flush_branch_caches1;
+ patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+
+ // This controls the branch from _switch to flush_branch_caches
+ if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE &&
+ link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
+ // Nothing to be done
+
+ } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW &&
+ link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) {
+ // Patch in the bcctr last
+ site = &patch__call_flush_branch_caches1;
+ patch_instruction_site(site, ppc_inst(0x39207fff)); // li r9,0x7fff
+ site = &patch__call_flush_branch_caches2;
+ patch_instruction_site(site, ppc_inst(0x7d2903a6)); // mtctr r9
+ site = &patch__call_flush_branch_caches3;
+ patch_instruction_site(site, ppc_inst(PPC_INST_BCCTR_FLUSH));
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- // This enables the branch from guest_exit_cont to kvm_flush_link_stack
- patch_branch_site(&patch__call_kvm_flush_link_stack,
- (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
-#endif
+ } else {
+ patch_branch_site(site, (u64)&flush_branch_caches, BRANCH_SET_LINK);
- pr_info("link-stack-flush: software flush enabled.\n");
- link_stack_flush_enabled = true;
+ // If we just need to flush the link stack, early return
+ if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) {
+ patch_instruction_site(&patch__flush_link_stack_return,
+ ppc_inst(PPC_RAW_BLR()));
- // If we just need to flush the link stack, patch an early return
- if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
- patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR);
- no_count_cache_flush();
- return;
+ // If we have flush instruction, early return
+ } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) {
+ patch_instruction_site(&patch__flush_count_cache_return,
+ ppc_inst(PPC_RAW_BLR()));
+ }
}
+}
- if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
- count_cache_flush_type = COUNT_CACHE_FLUSH_SW;
- pr_info("count-cache-flush: full software flush sequence enabled.\n");
- return;
+static void toggle_branch_cache_flush(bool enable)
+{
+ if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+ if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE)
+ count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE;
+
+ pr_info("count-cache-flush: flush disabled.\n");
+ } else {
+ if (security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
+ count_cache_flush_type = BRANCH_CACHE_FLUSH_HW;
+ pr_info("count-cache-flush: hardware flush enabled.\n");
+ } else {
+ count_cache_flush_type = BRANCH_CACHE_FLUSH_SW;
+ pr_info("count-cache-flush: software flush enabled.\n");
+ }
}
- patch_instruction_site(&patch__flush_count_cache_return, PPC_INST_BLR);
- count_cache_flush_type = COUNT_CACHE_FLUSH_HW;
- pr_info("count-cache-flush: hardware assisted flush sequence enabled\n");
+ if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) {
+ if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE)
+ link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE;
+
+ pr_info("link-stack-flush: flush disabled.\n");
+ } else {
+ if (security_ftr_enabled(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST)) {
+ link_stack_flush_type = BRANCH_CACHE_FLUSH_HW;
+ pr_info("link-stack-flush: hardware flush enabled.\n");
+ } else {
+ link_stack_flush_type = BRANCH_CACHE_FLUSH_SW;
+ pr_info("link-stack-flush: software flush enabled.\n");
+ }
+ }
+
+ update_branch_cache_flush();
}
void setup_count_cache_flush(void)
@@ -465,7 +547,179 @@ void setup_count_cache_flush(void)
security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE))
security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
- toggle_count_cache_flush(enable);
+ toggle_branch_cache_flush(enable);
+}
+
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+static bool no_entry_flush;
+static bool no_uaccess_flush;
+bool rfi_flush;
+static bool entry_flush;
+static bool uaccess_flush;
+DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
+EXPORT_SYMBOL(uaccess_flush_key);
+
+static int __init handle_no_rfi_flush(char *p)
+{
+ pr_info("rfi-flush: disabled on command line.");
+ no_rfi_flush = true;
+ return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+static int __init handle_no_entry_flush(char *p)
+{
+ pr_info("entry-flush: disabled on command line.");
+ no_entry_flush = true;
+ return 0;
+}
+early_param("no_entry_flush", handle_no_entry_flush);
+
+static int __init handle_no_uaccess_flush(char *p)
+{
+ pr_info("uaccess-flush: disabled on command line.");
+ no_uaccess_flush = true;
+ return 0;
+}
+early_param("no_uaccess_flush", handle_no_uaccess_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+ pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+ handle_no_rfi_flush(NULL);
+ return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+ /*
+ * We don't need to do the flush explicitly, just enter+exit kernel is
+ * sufficient, the RFI exit handlers will do the right thing.
+ */
+}
+
+void rfi_flush_enable(bool enable)
+{
+ if (enable) {
+ do_rfi_flush_fixups(enabled_flush_types);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else
+ do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+ rfi_flush = enable;
+}
+
+static void entry_flush_enable(bool enable)
+{
+ if (enable) {
+ do_entry_flush_fixups(enabled_flush_types);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else {
+ do_entry_flush_fixups(L1D_FLUSH_NONE);
+ }
+
+ entry_flush = enable;
+}
+
+static void uaccess_flush_enable(bool enable)
+{
+ if (enable) {
+ do_uaccess_flush_fixups(enabled_flush_types);
+ static_branch_enable(&uaccess_flush_key);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else {
+ static_branch_disable(&uaccess_flush_key);
+ do_uaccess_flush_fixups(L1D_FLUSH_NONE);
+ }
+
+ uaccess_flush = enable;
+}
+
+static void __ref init_fallback_flush(void)
+{
+ u64 l1d_size, limit;
+ int cpu;
+
+ /* Only allocate the fallback flush area once (at boot time). */
+ if (l1d_flush_fallback_area)
+ return;
+
+ l1d_size = ppc64_caches.l1d.size;
+
+ /*
+ * If there is no d-cache-size property in the device tree, l1d_size
+ * could be zero. That leads to the loop in the asm wrapping around to
+ * 2^64-1, and then walking off the end of the fallback area and
+ * eventually causing a page fault which is fatal. Just default to
+ * something vaguely sane.
+ */
+ if (!l1d_size)
+ l1d_size = (64 * 1024);
+
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
+
+ /*
+ * Align to L1d size, and size it at 2x L1d size, to catch possible
+ * hardware prefetch runoff. We don't have a recipe for load patterns to
+ * reliably avoid the prefetcher.
+ */
+ l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
+ l1d_size, MEMBLOCK_LOW_LIMIT,
+ limit, NUMA_NO_NODE);
+ if (!l1d_flush_fallback_area)
+ panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n",
+ __func__, l1d_size * 2, l1d_size, &limit);
+
+
+ for_each_possible_cpu(cpu) {
+ struct paca_struct *paca = paca_ptrs[cpu];
+ paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
+ paca->l1d_flush_size = l1d_size;
+ }
+}
+
+void setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+ if (types & L1D_FLUSH_FALLBACK) {
+ pr_info("rfi-flush: fallback displacement flush available\n");
+ init_fallback_flush();
+ }
+
+ if (types & L1D_FLUSH_ORI)
+ pr_info("rfi-flush: ori type flush available\n");
+
+ if (types & L1D_FLUSH_MTTRIG)
+ pr_info("rfi-flush: mttrig type flush available\n");
+
+ enabled_flush_types = types;
+
+ if (!cpu_mitigations_off() && !no_rfi_flush)
+ rfi_flush_enable(enable);
+}
+
+void setup_entry_flush(bool enable)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!no_entry_flush)
+ entry_flush_enable(enable);
+}
+
+void setup_uaccess_flush(bool enable)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!no_uaccess_flush)
+ uaccess_flush_enable(enable);
}
#ifdef CONFIG_DEBUG_FS
@@ -480,14 +734,24 @@ static int count_cache_flush_set(void *data, u64 val)
else
return -EINVAL;
- toggle_count_cache_flush(enable);
+ toggle_branch_cache_flush(enable);
return 0;
}
static int count_cache_flush_get(void *data, u64 *val)
{
- if (count_cache_flush_type == COUNT_CACHE_FLUSH_NONE)
+ if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE)
+ *val = 0;
+ else
+ *val = 1;
+
+ return 0;
+}
+
+static int link_stack_flush_get(void *data, u64 *val)
+{
+ if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE)
*val = 0;
else
*val = 1;
@@ -497,14 +761,106 @@ static int count_cache_flush_get(void *data, u64 *val)
DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
count_cache_flush_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_link_stack_flush, link_stack_flush_get,
+ count_cache_flush_set, "%llu\n");
static __init int count_cache_flush_debugfs_init(void)
{
debugfs_create_file_unsafe("count_cache_flush", 0600,
- powerpc_debugfs_root, NULL,
+ arch_debugfs_dir, NULL,
&fops_count_cache_flush);
+ debugfs_create_file_unsafe("link_stack_flush", 0600,
+ arch_debugfs_dir, NULL,
+ &fops_link_stack_flush);
return 0;
}
device_initcall(count_cache_flush_debugfs_init);
+
+static int rfi_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != rfi_flush)
+ rfi_flush_enable(enable);
+
+ return 0;
+}
+
+static int rfi_flush_get(void *data, u64 *val)
+{
+ *val = rfi_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+
+static int entry_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != entry_flush)
+ entry_flush_enable(enable);
+
+ return 0;
+}
+
+static int entry_flush_get(void *data, u64 *val)
+{
+ *val = entry_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
+
+static int uaccess_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != uaccess_flush)
+ uaccess_flush_enable(enable);
+
+ return 0;
+}
+
+static int uaccess_flush_get(void *data, u64 *val)
+{
+ *val = uaccess_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
+
+static __init int rfi_flush_debugfs_init(void)
+{
+ debugfs_create_file("rfi_flush", 0600, arch_debugfs_dir, NULL, &fops_rfi_flush);
+ debugfs_create_file("entry_flush", 0600, arch_debugfs_dir, NULL, &fops_entry_flush);
+ debugfs_create_file("uaccess_flush", 0600, arch_debugfs_dir, NULL, &fops_uaccess_flush);
+ return 0;
+}
+device_initcall(rfi_flush_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c
index 6a29777d6a2d..19172a2804f0 100644
--- a/arch/powerpc/kernel/secvar-ops.c
+++ b/arch/powerpc/kernel/secvar-ops.c
@@ -8,10 +8,16 @@
#include <linux/cache.h>
#include <asm/secvar.h>
+#include <asm/bug.h>
-const struct secvar_operations *secvar_ops __ro_after_init;
+const struct secvar_operations *secvar_ops __ro_after_init = NULL;
-void set_secvar_ops(const struct secvar_operations *ops)
+int set_secvar_ops(const struct secvar_operations *ops)
{
+ if (WARN_ON_ONCE(secvar_ops))
+ return -EBUSY;
+
secvar_ops = ops;
+
+ return 0;
}
diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c
index a0a78aba2083..eb3c053f323f 100644
--- a/arch/powerpc/kernel/secvar-sysfs.c
+++ b/arch/powerpc/kernel/secvar-sysfs.c
@@ -21,53 +21,48 @@ static struct kset *secvar_kset;
static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
- ssize_t rc = 0;
- struct device_node *node;
- const char *format;
+ char tmp[32];
+ ssize_t len = secvar_ops->format(tmp, sizeof(tmp));
- node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
- if (!of_device_is_available(node))
- return -ENODEV;
-
- rc = of_property_read_string(node, "format", &format);
- if (rc)
- return rc;
+ if (len > 0)
+ return sysfs_emit(buf, "%s\n", tmp);
+ else if (len < 0)
+ pr_err("Error %zd reading format string\n", len);
+ else
+ pr_err("Got empty format string from backend\n");
- rc = sprintf(buf, "%s\n", format);
-
- of_node_put(node);
-
- return rc;
+ return -EIO;
}
static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
- uint64_t dsize;
+ u64 dsize;
int rc;
rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
if (rc) {
- pr_err("Error retrieving %s variable size %d\n", kobj->name,
- rc);
+ if (rc != -ENOENT)
+ pr_err("Error retrieving %s variable size %d\n", kobj->name, rc);
return rc;
}
- return sprintf(buf, "%llu\n", dsize);
+ return sysfs_emit(buf, "%llu\n", dsize);
}
static ssize_t data_read(struct file *filep, struct kobject *kobj,
struct bin_attribute *attr, char *buf, loff_t off,
size_t count)
{
- uint64_t dsize;
char *data;
+ u64 dsize;
int rc;
rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
if (rc) {
- pr_err("Error getting %s variable size %d\n", kobj->name, rc);
+ if (rc != -ENOENT)
+ pr_err("Error getting %s variable size %d\n", kobj->name, rc);
return rc;
}
pr_debug("dsize is %llu\n", dsize);
@@ -138,34 +133,58 @@ static struct kobj_type secvar_ktype = {
static int update_kobj_size(void)
{
- struct device_node *node;
u64 varsize;
- int rc = 0;
+ int rc = secvar_ops->max_size(&varsize);
- node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
- if (!of_device_is_available(node)) {
- rc = -ENODEV;
- goto out;
- }
-
- rc = of_property_read_u64(node, "max-var-size", &varsize);
if (rc)
- goto out;
+ return rc;
data_attr.size = varsize;
update_attr.size = varsize;
-out:
- of_node_put(node);
+ return 0;
+}
- return rc;
+static int secvar_sysfs_config(struct kobject *kobj)
+{
+ struct attribute_group config_group = {
+ .name = "config",
+ .attrs = (struct attribute **)secvar_ops->config_attrs,
+ };
+
+ if (secvar_ops->config_attrs)
+ return sysfs_create_group(kobj, &config_group);
+
+ return 0;
+}
+
+static int add_var(const char *name)
+{
+ struct kobject *kobj;
+ int rc;
+
+ kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+ if (!kobj)
+ return -ENOMEM;
+
+ kobject_init(kobj, &secvar_ktype);
+
+ rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name);
+ if (rc) {
+ pr_warn("kobject_add error %d for attribute: %s\n", rc,
+ name);
+ kobject_put(kobj);
+ return rc;
+ }
+
+ kobject_uevent(kobj, KOBJ_ADD);
+ return 0;
}
static int secvar_sysfs_load(void)
{
+ u64 namesize = 0;
char *name;
- uint64_t namesize = 0;
- struct kobject *kobj;
int rc;
name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL);
@@ -176,73 +195,99 @@ static int secvar_sysfs_load(void)
rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE);
if (rc) {
if (rc != -ENOENT)
- pr_err("error getting secvar from firmware %d\n",
- rc);
- break;
- }
+ pr_err("error getting secvar from firmware %d\n", rc);
+ else
+ rc = 0;
- kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
- if (!kobj) {
- rc = -ENOMEM;
break;
}
- kobject_init(kobj, &secvar_ktype);
-
- rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name);
- if (rc) {
- pr_warn("kobject_add error %d for attribute: %s\n", rc,
- name);
- kobject_put(kobj);
- kobj = NULL;
- }
-
- if (kobj)
- kobject_uevent(kobj, KOBJ_ADD);
-
+ rc = add_var(name);
} while (!rc);
kfree(name);
return rc;
}
+static int secvar_sysfs_load_static(void)
+{
+ const char * const *name_ptr = secvar_ops->var_names;
+ int rc;
+
+ while (*name_ptr) {
+ rc = add_var(*name_ptr);
+ if (rc)
+ return rc;
+ name_ptr++;
+ }
+
+ return 0;
+}
+
static int secvar_sysfs_init(void)
{
+ u64 max_size;
int rc;
if (!secvar_ops) {
- pr_warn("secvar: failed to retrieve secvar operations.\n");
+ pr_warn("Failed to retrieve secvar operations\n");
return -ENODEV;
}
secvar_kobj = kobject_create_and_add("secvar", firmware_kobj);
if (!secvar_kobj) {
- pr_err("secvar: Failed to create firmware kobj\n");
+ pr_err("Failed to create firmware kobj\n");
return -ENOMEM;
}
rc = sysfs_create_file(secvar_kobj, &format_attr.attr);
if (rc) {
- kobject_put(secvar_kobj);
- return -ENOMEM;
+ pr_err("Failed to create format object\n");
+ rc = -ENOMEM;
+ goto err;
}
secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj);
if (!secvar_kset) {
- pr_err("secvar: sysfs kobject registration failed.\n");
- kobject_put(secvar_kobj);
- return -ENOMEM;
+ pr_err("sysfs kobject registration failed\n");
+ rc = -ENOMEM;
+ goto err;
}
rc = update_kobj_size();
if (rc) {
pr_err("Cannot read the size of the attribute\n");
- return rc;
+ goto err;
+ }
+
+ rc = secvar_sysfs_config(secvar_kobj);
+ if (rc) {
+ pr_err("Failed to create config directory\n");
+ goto err;
}
- secvar_sysfs_load();
+ if (secvar_ops->get_next)
+ rc = secvar_sysfs_load();
+ else
+ rc = secvar_sysfs_load_static();
+
+ if (rc) {
+ pr_err("Failed to create variable attributes\n");
+ goto err;
+ }
+
+ // Due to sysfs limitations, we will only ever get a write buffer of
+ // up to 1 page in size. Print a warning if this is potentially going
+ // to cause problems, so that the user is aware.
+ secvar_ops->max_size(&max_size);
+ if (max_size > PAGE_SIZE)
+ pr_warn_ratelimited("PAGE_SIZE (%lu) is smaller than maximum object size (%llu), writes are limited to PAGE_SIZE\n",
+ PAGE_SIZE, max_size);
return 0;
+err:
+ kobject_put(secvar_kobj);
+ return rc;
}
late_initcall(secvar_sysfs_init);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 488f1eecc0de..9b142b9d5187 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -9,6 +9,7 @@
#undef DEBUG
#include <linux/export.h>
+#include <linux/panic_notifier.h>
#include <linux/string.h>
#include <linux/sched.h>
#include <linux/init.h>
@@ -17,27 +18,27 @@
#include <linux/delay.h>
#include <linux/initrd.h>
#include <linux/platform_device.h>
+#include <linux/printk.h>
#include <linux/seq_file.h>
#include <linux/ioport.h>
#include <linux/console.h>
-#include <linux/screen_info.h>
#include <linux/root_dev.h>
-#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/unistd.h>
+#include <linux/seq_buf.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
#include <linux/percpu.h>
#include <linux/memblock.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_irq.h>
#include <linux/hugetlb.h>
-#include <asm/debugfs.h>
+#include <linux/pgtable.h>
#include <asm/io.h>
#include <asm/paca.h>
-#include <asm/prom.h>
#include <asm/processor.h>
#include <asm/vdso_datapage.h>
-#include <asm/pgtable.h>
#include <asm/smp.h>
#include <asm/elf.h>
#include <asm/machdep.h>
@@ -57,6 +58,7 @@
#include <asm/xmon.h>
#include <asm/cputhreads.h>
#include <mm/mmu_decl.h>
+#include <asm/archrandom.h>
#include <asm/fadump.h>
#include <asm/udbg.h>
#include <asm/hugetlb.h>
@@ -64,11 +66,11 @@
#include <asm/mmu_context.h>
#include <asm/cpu_has_feature.h>
#include <asm/kasan.h>
+#include <asm/mce.h>
#include "setup.h"
#ifdef DEBUG
-#include <asm/udbg.h>
#define DBG(fmt...) udbg_printf(fmt)
#else
#define DBG(fmt...)
@@ -84,31 +86,16 @@ EXPORT_SYMBOL(machine_id);
int boot_cpuid = -1;
EXPORT_SYMBOL_GPL(boot_cpuid);
+#ifdef CONFIG_PPC64
+int boot_cpu_hwid = -1;
+#endif
+
/*
* These are used in binfmt_elf.c to put aux entries on the stack
* for each elf executable being started.
*/
int dcache_bsize;
int icache_bsize;
-int ucache_bsize;
-
-
-unsigned long klimit = (unsigned long) _end;
-
-/*
- * This still seems to be needed... -- paulus
- */
-struct screen_info screen_info = {
- .orig_x = 0,
- .orig_y = 25,
- .orig_video_cols = 80,
- .orig_video_lines = 25,
- .orig_video_isVGA = 1,
- .orig_video_points = 16
-};
-#if defined(CONFIG_FB_VGA16_MODULE)
-EXPORT_SYMBOL(screen_info);
-#endif
/* Variables required to store legacy IO irq routing */
int of_i8042_kbd_irq;
@@ -165,9 +152,7 @@ void machine_restart(char *cmd)
void machine_power_off(void)
{
machine_shutdown();
- if (pm_power_off)
- pm_power_off();
-
+ do_kernel_power_off();
smp_send_stop();
machine_hang();
}
@@ -177,6 +162,14 @@ EXPORT_SYMBOL_GPL(machine_power_off);
void (*pm_power_off)(void);
EXPORT_SYMBOL_GPL(pm_power_off);
+size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
+{
+ if (max_longs && ppc_md.get_random_seed && ppc_md.get_random_seed(v))
+ return 1;
+ return 0;
+}
+EXPORT_SYMBOL(arch_get_random_seed_longs);
+
void machine_halt(void)
{
machine_shutdown();
@@ -239,18 +232,17 @@ static int show_cpuinfo(struct seq_file *m, void *v)
maj = (pvr >> 8) & 0xFF;
min = pvr & 0xFF;
- seq_printf(m, "processor\t: %lu\n", cpu_id);
- seq_printf(m, "cpu\t\t: ");
+ seq_printf(m, "processor\t: %lu\ncpu\t\t: ", cpu_id);
if (cur_cpu_spec->pvr_mask && cur_cpu_spec->cpu_name)
- seq_printf(m, "%s", cur_cpu_spec->cpu_name);
+ seq_puts(m, cur_cpu_spec->cpu_name);
else
seq_printf(m, "unknown (%08x)", pvr);
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- seq_printf(m, ", altivec supported");
+ seq_puts(m, ", altivec supported");
- seq_printf(m, "\n");
+ seq_putc(m, '\n');
#ifdef CONFIG_TAU
if (cpu_has_feature(CPU_FTR_TAU)) {
@@ -283,11 +275,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "clock\t\t: %lu.%06luMHz\n",
proc_freq / 1000000, proc_freq % 1000000);
- if (ppc_md.show_percpuinfo != NULL)
- ppc_md.show_percpuinfo(m, cpu_id);
-
/* If we are a Freescale core do a simple check so
- * we dont have to keep adding cases in the future */
+ * we don't have to keep adding cases in the future */
if (PVR_VER(pvr) & 0x8000) {
switch (PVR_VER(pvr)) {
case 0x8000: /* 7441/7450/7451, Voyager */
@@ -306,15 +295,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
}
} else {
switch (PVR_VER(pvr)) {
- case 0x0020: /* 403 family */
- maj = PVR_MAJ(pvr) + 1;
- min = PVR_MIN(pvr);
- break;
case 0x1008: /* 740P/750P ?? */
maj = ((pvr >> 8) & 0xFF) - 1;
min = pvr & 0xFF;
break;
case 0x004e: /* POWER9 bits 12-15 give chip type */
+ case 0x0080: /* POWER10 bit 12 gives SMT8/4 */
maj = (pvr >> 8) & 0x0F;
min = pvr & 0xFF;
break;
@@ -332,7 +318,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ),
(loops_per_jiffy / (5000 / HZ)) % 100);
- seq_printf(m, "\n");
+ seq_putc(m, '\n');
/* If this is the last cpu, print the summary */
if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
@@ -467,8 +453,8 @@ void __init smp_setup_cpu_maps(void)
intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
&len);
if (intserv) {
- DBG(" ibm,ppc-interrupt-server#s -> %d threads\n",
- nthreads);
+ DBG(" ibm,ppc-interrupt-server#s -> %lu threads\n",
+ (len / sizeof(int)));
} else {
DBG(" no ibm,ppc-interrupt-server#s -> 1 thread\n");
intserv = of_get_property(dn, "reg", &len);
@@ -593,7 +579,15 @@ static __init int add_pcspkr(void)
device_initcall(add_pcspkr);
#endif /* CONFIG_PCSPKR_PLATFORM */
-void probe_machine(void)
+static char ppc_hw_desc_buf[128] __initdata;
+
+struct seq_buf ppc_hw_desc __initdata = {
+ .buffer = ppc_hw_desc_buf,
+ .size = sizeof(ppc_hw_desc_buf),
+ .len = 0,
+};
+
+static __init void probe_machine(void)
{
extern struct machdep_calls __machine_desc_start;
extern struct machdep_calls __machine_desc_end;
@@ -619,13 +613,14 @@ void probe_machine(void)
for (machine_id = &__machine_desc_start;
machine_id < &__machine_desc_end;
machine_id++) {
- DBG(" %s ...", machine_id->name);
+ DBG(" %s ...\n", machine_id->name);
+ if (machine_id->compatible && !of_machine_is_compatible(machine_id->compatible))
+ continue;
memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls));
- if (ppc_md.probe()) {
- DBG(" match !\n");
- break;
- }
- DBG("\n");
+ if (ppc_md.probe && !ppc_md.probe())
+ continue;
+ DBG(" %s match !\n", machine_id->name);
+ break;
}
/* What can we do if we didn't find ? */
if (machine_id >= &__machine_desc_end) {
@@ -633,7 +628,13 @@ void probe_machine(void)
for (;;);
}
- printk(KERN_INFO "Using %s machine description\n", ppc_md.name);
+ // Append the machine name to other info we've gathered
+ seq_buf_puts(&ppc_hw_desc, ppc_md.name);
+
+ // Set the generic hardware description shown in oopses
+ dump_stack_set_arch_desc(ppc_hw_desc.buffer);
+
+ pr_info("Hardware name: %s\n", ppc_hw_desc.buffer);
}
/* Match a class of boards, not a specific device configuration. */
@@ -691,8 +692,25 @@ int check_legacy_ioport(unsigned long base_port)
}
EXPORT_SYMBOL(check_legacy_ioport);
-static int ppc_panic_event(struct notifier_block *this,
- unsigned long event, void *ptr)
+/*
+ * Panic notifiers setup
+ *
+ * We have 3 notifiers for powerpc, each one from a different "nature":
+ *
+ * - ppc_panic_fadump_handler() is a hypervisor notifier, which hard-disables
+ * IRQs and deal with the Firmware-Assisted dump, when it is configured;
+ * should run early in the panic path.
+ *
+ * - dump_kernel_offset() is an informative notifier, just showing the KASLR
+ * offset if we have RANDOMIZE_BASE set.
+ *
+ * - ppc_panic_platform_handler() is a low-level handler that's registered
+ * only if the platform wishes to perform final actions in the panic path,
+ * hence it should run late and might not even return. Currently, only
+ * pseries and ps3 platforms register callbacks.
+ */
+static int ppc_panic_fadump_handler(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
/*
* panic does a local_irq_disable, but we really
@@ -702,45 +720,63 @@ static int ppc_panic_event(struct notifier_block *this,
/*
* If firmware-assisted dump has been registered then trigger
- * firmware-assisted dump and let firmware handle everything else.
+ * its callback and let the firmware handles everything else.
*/
crash_fadump(NULL, ptr);
- if (ppc_md.panic)
- ppc_md.panic(ptr); /* May not return */
+
return NOTIFY_DONE;
}
-static struct notifier_block ppc_panic_block = {
- .notifier_call = ppc_panic_event,
- .priority = INT_MIN /* may not return; must be done last */
-};
-
-/*
- * Dump out kernel offset information on panic.
- */
static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
void *p)
{
pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
kaslr_offset(), KERNELBASE);
- return 0;
+ return NOTIFY_DONE;
}
+static int ppc_panic_platform_handler(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ /*
+ * This handler is only registered if we have a panic callback
+ * on ppc_md, hence NULL check is not needed.
+ * Also, it may not return, so it runs really late on panic path.
+ */
+ ppc_md.panic(ptr);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_fadump_block = {
+ .notifier_call = ppc_panic_fadump_handler,
+ .priority = INT_MAX, /* run early, to notify the firmware ASAP */
+};
+
static struct notifier_block kernel_offset_notifier = {
- .notifier_call = dump_kernel_offset
+ .notifier_call = dump_kernel_offset,
+};
+
+static struct notifier_block ppc_panic_block = {
+ .notifier_call = ppc_panic_platform_handler,
+ .priority = INT_MIN, /* may not return; must be done last */
};
void __init setup_panic(void)
{
+ /* Hard-disables IRQs + deal with FW-assisted dump (fadump) */
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &ppc_fadump_block);
+
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0)
atomic_notifier_chain_register(&panic_notifier_list,
&kernel_offset_notifier);
- /* PPC64 always does a hard irq disable in its panic handler */
- if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic)
- return;
- atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
+ /* Low-level platform-specific routines that should run on panic */
+ if (ppc_md.panic)
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &ppc_panic_block);
}
#ifdef CONFIG_CHECK_CACHE_COHERENCY
@@ -780,19 +816,6 @@ static int __init check_cache_coherency(void)
late_initcall(check_cache_coherency);
#endif /* CONFIG_CHECK_CACHE_COHERENCY */
-#ifdef CONFIG_DEBUG_FS
-struct dentry *powerpc_debugfs_root;
-EXPORT_SYMBOL(powerpc_debugfs_root);
-
-static int powerpc_debugfs_init(void)
-{
- powerpc_debugfs_root = debugfs_create_dir("powerpc", NULL);
-
- return powerpc_debugfs_root == NULL;
-}
-arch_initcall(powerpc_debugfs_init);
-#endif
-
void ppc_printk_progress(char *s, unsigned short hex)
{
pr_info("%s\n", s);
@@ -806,8 +829,6 @@ static __init void print_system_info(void)
pr_info("dcache_bsize = 0x%x\n", dcache_bsize);
pr_info("icache_bsize = 0x%x\n", icache_bsize);
- if (ucache_bsize != 0)
- pr_info("ucache_bsize = 0x%x\n", ucache_bsize);
pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features);
pr_info(" possible = 0x%016lx\n",
@@ -837,7 +858,7 @@ static __init void print_system_info(void)
}
#ifdef CONFIG_SMP
-static void smp_setup_pacas(void)
+static void __init smp_setup_pacas(void)
{
int cpu;
@@ -848,7 +869,7 @@ static void smp_setup_pacas(void)
set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]);
}
- memblock_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32));
+ memblock_free(cpu_to_phys_id, nr_cpu_ids * sizeof(u32));
cpu_to_phys_id = NULL;
}
#endif
@@ -910,6 +931,8 @@ void __init setup_arch(char **cmdline_p)
/* Parse memory topology */
mem_topology_setup();
+ /* Set max_mapnr before paging_init() */
+ set_max_mapnr(max_pfn);
/*
* Release secondary cpus out of their spinloops at 0x60 now that
@@ -923,34 +946,37 @@ void __init setup_arch(char **cmdline_p)
/* On BookE, setup per-core TLB data structures. */
setup_tlb_core_data();
-
- smp_release_cpus();
#endif
/* Print various info about the machine that has been gathered so far. */
print_system_info();
- /* Reserve large chunks of memory for use by CMA for KVM. */
- kvm_cma_reserve();
-
klp_init_thread_info(&init_task);
- init_mm.start_code = (unsigned long)_stext;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = klimit;
-
+ setup_initial_init_mm(_stext, _etext, _edata, _end);
+ /* sched_init() does the mmgrab(&init_mm) for the primary CPU */
+ VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(&init_mm));
+ inc_mm_active_cpus(&init_mm);
mm_iommu_init(&init_mm);
+
irqstack_early_init();
exc_lvl_early_init();
emergency_stack_init();
+ mce_init();
+ smp_release_cpus();
+
initmem_init();
- early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
+ /*
+ * Reserve large chunks of memory for use by CMA for KVM and hugetlb. These must
+ * be called after initmem_init(), so that pageblock_order is initialised.
+ */
+ kvm_cma_reserve();
+ gigantic_hugetlb_cma_reserve();
- if (IS_ENABLED(CONFIG_DUMMY_CONSOLE))
- conswitchp = &dummy_con;
+ early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
if (ppc_md.setup_arch)
ppc_md.setup_arch();
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index c82577c4b15d..7912bb50a7cb 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -14,31 +14,31 @@ void irqstack_early_init(void);
#ifdef CONFIG_PPC32
void setup_power_save(void);
#else
-static inline void setup_power_save(void) { };
+static inline void setup_power_save(void) { }
#endif
#if defined(CONFIG_PPC64) && defined(CONFIG_SMP)
void check_smt_enabled(void);
#else
-static inline void check_smt_enabled(void) { };
+static inline void check_smt_enabled(void) { }
#endif
-#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
+#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP)
void setup_tlb_core_data(void);
#else
-static inline void setup_tlb_core_data(void) { };
+static inline void setup_tlb_core_data(void) { }
#endif
-#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
void exc_lvl_early_init(void);
#else
-static inline void exc_lvl_early_init(void) { };
+static inline void exc_lvl_early_init(void) { }
#endif
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) || defined(CONFIG_VMAP_STACK)
void emergency_stack_init(void);
#else
-static inline void emergency_stack_init(void) { };
+static inline void emergency_stack_init(void) { }
#endif
#ifdef CONFIG_PPC64
@@ -55,7 +55,7 @@ extern unsigned long spr_default_dscr;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
void kvm_cma_reserve(void);
#else
-static inline void kvm_cma_reserve(void) { };
+static inline void kvm_cma_reserve(void) { }
#endif
#ifdef CONFIG_TAU
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index dcffe927f5b9..b761cc1a403c 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -19,11 +19,12 @@
#include <linux/memblock.h>
#include <linux/export.h>
#include <linux/nvram.h>
+#include <linux/pgtable.h>
+#include <linux/of_fdt.h>
+#include <linux/irq.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/processor.h>
-#include <asm/pgtable.h>
#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/elf.h>
@@ -58,7 +59,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid_phys);
int smp_hw_index[NR_CPUS];
EXPORT_SYMBOL(smp_hw_index);
-unsigned long ISA_DMA_THRESHOLD;
unsigned int DMA_MODE_READ;
unsigned int DMA_MODE_WRITE;
@@ -75,20 +75,20 @@ EXPORT_SYMBOL(DMA_MODE_WRITE);
*/
notrace void __init machine_init(u64 dt_ptr)
{
- unsigned int *addr = (unsigned int *)patch_site_addr(&patch__memset_nocache);
- unsigned long insn;
+ u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache);
+ ppc_inst_t insn;
/* Configure static keys first, now that we're relocated. */
setup_feature_keys();
- early_ioremap_setup();
+ early_ioremap_init();
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
- patch_instruction_site(&patch__memcpy_nocache, PPC_INST_NOP);
+ patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_RAW_NOP()));
- insn = create_cond_branch(addr, branch_target(addr), 0x820000);
+ create_cond_branch(&insn, addr, branch_target(addr), 0x820000);
patch_instruction(addr, insn); /* replace b by bne cr0 */
/* Do some early initialization based on the flat device tree */
@@ -140,7 +140,7 @@ arch_initcall(ppc_init);
static void *__init alloc_stack(void)
{
- void *ptr = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+ void *ptr = memblock_alloc(THREAD_SIZE, THREAD_ALIGN);
if (!ptr)
panic("cannot allocate %d bytes for stack at %pS\n",
@@ -153,6 +153,9 @@ void __init irqstack_early_init(void)
{
unsigned int i;
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ return;
+
/* interrupt stacks must be in lowmem, we get that for free on ppc32
* as the memblock is limited to lowmem by default */
for_each_possible_cpu(i) {
@@ -161,7 +164,19 @@ void __init irqstack_early_init(void)
}
}
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_VMAP_STACK
+void *emergency_ctx[NR_CPUS] __ro_after_init = {[0] = &init_stack};
+
+void __init emergency_stack_init(void)
+{
+ unsigned int i;
+
+ for_each_possible_cpu(i)
+ emergency_ctx[i] = alloc_stack();
+}
+#endif
+
+#ifdef CONFIG_BOOKE_OR_40x
void __init exc_lvl_early_init(void)
{
unsigned int i, hw_cpu;
@@ -192,7 +207,7 @@ void __init setup_power_save(void)
ppc_md.power_save = ppc6xx_idle;
#endif
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
cpu_has_feature(CPU_FTR_CAN_NAP))
ppc_md.power_save = e500_idle;
@@ -208,7 +223,4 @@ __init void initialize_cache_info(void)
*/
dcache_bsize = cur_cpu_spec->dcache_bsize;
icache_bsize = cur_cpu_spec->icache_bsize;
- ucache_bsize = 0;
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_601) || IS_ENABLED(CONFIG_E200))
- ucache_bsize = icache_bsize = dcache_bsize;
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6104917a282d..2f19d5e94485 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -30,13 +30,15 @@
#include <linux/lockdep.h>
#include <linux/memory.h>
#include <linux/nmi.h>
+#include <linux/pgtable.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
-#include <asm/debugfs.h>
+#include <asm/asm-prototypes.h>
+#include <asm/kvm_guest.h>
#include <asm/io.h>
#include <asm/kdump.h>
-#include <asm/prom.h>
#include <asm/processor.h>
-#include <asm/pgtable.h>
#include <asm/smp.h>
#include <asm/elf.h>
#include <asm/machdep.h>
@@ -59,13 +61,14 @@
#include <asm/udbg.h>
#include <asm/kexec.h>
#include <asm/code-patching.h>
-#include <asm/livepatch.h>
+#include <asm/ftrace.h>
#include <asm/opal.h>
#include <asm/cputhreads.h>
#include <asm/hw_irq.h>
#include <asm/feature-fixups.h>
#include <asm/kup.h>
#include <asm/early_ioremap.h>
+#include <asm/pgalloc.h>
#include "setup.h"
@@ -84,7 +87,7 @@ struct ppc64_caches ppc64_caches = {
};
EXPORT_SYMBOL_GPL(ppc64_caches);
-#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
+#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP)
void __init setup_tlb_core_data(void)
{
int cpu;
@@ -111,7 +114,6 @@ void __init setup_tlb_core_data(void)
* Should we panic instead?
*/
WARN_ONCE(smt_enabled_at_boot >= 2 &&
- !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
book3e_htw_mode != PPC_HTW_E6500,
"%s: unsupported MMU configuration\n", __func__);
}
@@ -175,14 +177,26 @@ early_param("smt-enabled", early_smt_enabled);
#endif /* CONFIG_SMP */
/** Fix up paca fields required for the boot cpu */
-static void __init fixup_boot_paca(void)
+static void __init fixup_boot_paca(struct paca_struct *boot_paca)
{
/* The boot cpu is started */
- get_paca()->cpu_start = 1;
+ boot_paca->cpu_start = 1;
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * Give the early boot machine check stack somewhere to use, use
+ * half of the init stack. This is a bit hacky but there should not be
+ * deep stack usage in early init so shouldn't overflow it or overwrite
+ * things.
+ */
+ boot_paca->mc_emergency_sp = (void *)&init_thread_union +
+ (THREAD_SIZE/2);
+#endif
/* Allow percpu accesses to work until we setup percpu data */
- get_paca()->data_offset = 0;
- /* Mark interrupts disabled in PACA */
- irq_soft_mask_set(IRQS_DISABLED);
+ boot_paca->data_offset = 0;
+ /* Mark interrupts soft and hard disabled in PACA */
+ boot_paca->irq_soft_mask = IRQS_DISABLED;
+ boot_paca->irq_happened = PACA_IRQ_HARD_DIS;
+ WARN_ON(mfmsr() & MSR_EE);
}
static void __init configure_exceptions(void)
@@ -195,8 +209,39 @@ static void __init configure_exceptions(void)
/* Under a PAPR hypervisor, we need hypercalls */
if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+ /*
+ * - PR KVM does not support AIL mode interrupts in the host
+ * while a PR guest is running.
+ *
+ * - SCV system call interrupt vectors are only implemented for
+ * AIL mode interrupts.
+ *
+ * - On pseries, AIL mode can only be enabled and disabled
+ * system-wide so when a PR VM is created on a pseries host,
+ * all CPUs of the host are set to AIL=0 mode.
+ *
+ * - Therefore host CPUs must not execute scv while a PR VM
+ * exists.
+ *
+ * - SCV support can not be disabled dynamically because the
+ * feature is advertised to host userspace. Disabling the
+ * facility and emulating it would be possible but is not
+ * implemented.
+ *
+ * - So SCV support is blanket disabled if PR KVM could possibly
+ * run. That is, PR support compiled in, booting on pseries
+ * with hash MMU.
+ */
+ if (IS_ENABLED(CONFIG_KVM_BOOK3S_PR_POSSIBLE) && !radix_enabled()) {
+ init_task.thread.fscr &= ~FSCR_SCV;
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+ }
+
/* Enable AIL if possible */
- pseries_enable_reloc_on_exc();
+ if (!pseries_enable_reloc_on_exc()) {
+ init_task.thread.fscr &= ~FSCR_SCV;
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+ }
/*
* Tell the hypervisor that we want our exceptions to
@@ -227,10 +272,23 @@ static void cpu_ready_for_interrupts(void)
* If we are not in hypervisor mode the job is done once for
* the whole partition in configure_exceptions().
*/
- if (cpu_has_feature(CPU_FTR_HVMODE) &&
- cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
unsigned long lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+ unsigned long new_lpcr = lpcr;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ /* P10 DD1 does not have HAIL */
+ if (pvr_version_is(PVR_POWER10) &&
+ (mfspr(SPRN_PVR) & 0xf00) == 0x100)
+ new_lpcr |= LPCR_AIL_3;
+ else
+ new_lpcr |= LPCR_HAIL;
+ } else if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ new_lpcr |= LPCR_AIL_3;
+ }
+
+ if (new_lpcr != lpcr)
+ mtspr(SPRN_LPCR, new_lpcr);
}
/*
@@ -254,7 +312,7 @@ static void cpu_ready_for_interrupts(void)
unsigned long spr_default_dscr = 0;
-void __init record_spr_defaults(void)
+static void __init record_spr_defaults(void)
{
if (early_cpu_has_feature(CPU_FTR_DSCR))
spr_default_dscr = mfspr(SPRN_DSCR);
@@ -285,18 +343,40 @@ void __init early_setup(unsigned long dt_ptr)
/* -------- printk is _NOT_ safe to use here ! ------- */
+ /*
+ * Assume we're on cpu 0 for now.
+ *
+ * We need to load a PACA very early for a few reasons.
+ *
+ * The stack protector canary is stored in the paca, so as soon as we
+ * call any stack protected code we need r13 pointing somewhere valid.
+ *
+ * If we are using kcov it will call in_task() in its instrumentation,
+ * which relies on the current task from the PACA.
+ *
+ * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as
+ * printk(), which can trigger both stack protector and kcov.
+ *
+ * percpu variables and spin locks also use the paca.
+ *
+ * So set up a temporary paca. It will be replaced below once we know
+ * what CPU we are on.
+ */
+ initialise_paca(&boot_paca, 0);
+ fixup_boot_paca(&boot_paca);
+ WARN_ON(local_paca);
+ setup_paca(&boot_paca); /* install the paca into registers */
+
+ /* -------- printk is now safe to use ------- */
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && (mfmsr() & MSR_HV))
+ enable_machine_check();
+
/* Try new device tree based feature discovery ... */
if (!dt_cpu_ftrs_init(__va(dt_ptr)))
/* Otherwise use the old style CPU table */
identify_cpu(0, mfspr(SPRN_PVR));
- /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
- initialise_paca(&boot_paca, 0);
- setup_paca(&boot_paca);
- fixup_boot_paca();
-
- /* -------- printk is now safe to use ------- */
-
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
@@ -305,17 +385,21 @@ void __init early_setup(unsigned long dt_ptr)
/*
* Do early initialization using the flattened device
* tree, such as retrieving the physical memory map or
- * calculating/retrieving the hash table size.
+ * calculating/retrieving the hash table size, discover
+ * boot_cpuid and boot_cpu_hwid.
*/
early_init_devtree(__va(dt_ptr));
- /* Now we know the logical id of our boot cpu, setup the paca. */
- if (boot_cpuid != 0) {
- /* Poison paca_ptrs[0] again if it's not the boot cpu */
- memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0]));
- }
- setup_paca(paca_ptrs[boot_cpuid]);
- fixup_boot_paca();
+ allocate_paca_ptrs();
+ allocate_paca(boot_cpuid);
+ set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid);
+ fixup_boot_paca(paca_ptrs[boot_cpuid]);
+ setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */
+ // smp_processor_id() now reports boot_cpuid
+
+#ifdef CONFIG_SMP
+ task_thread_info(current)->cpu = boot_cpuid; // fix task_cpu(current)
+#endif
/*
* Configure exception handlers. This include setting up trampolines
@@ -333,11 +417,11 @@ void __init early_setup(unsigned long dt_ptr)
apply_feature_fixups();
setup_feature_keys();
- early_ioremap_setup();
-
/* Initialize the hash table or TLB handling */
early_init_mmu();
+ early_ioremap_setup();
+
/*
* After firmware and early platform setup code has set things up,
* we note the SPR values for configurable control/performance
@@ -396,7 +480,7 @@ void early_setup_secondary(void)
#endif /* CONFIG_SMP */
-void panic_smp_self_stop(void)
+void __noreturn panic_smp_self_stop(void)
{
hard_irq_disable();
spin_begin();
@@ -463,7 +547,7 @@ void smp_release_cpus(void)
* routines and/or provided to userland
*/
-static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
+static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
u32 bsize, u32 sets)
{
info->size = size;
@@ -516,6 +600,8 @@ static bool __init parse_cache_info(struct device_node *np,
lsizep = of_get_property(np, propnames[3], NULL);
if (bsizep == NULL)
bsizep = lsizep;
+ if (lsizep == NULL)
+ lsizep = bsizep;
if (lsizep != NULL)
lsize = be32_to_cpu(*lsizep);
if (bsizep != NULL)
@@ -608,7 +694,7 @@ void __init initialize_cache_info(void)
*/
__init u64 ppc64_bolted_size(void)
{
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
/* Freescale BookE bolts the entire linear mapping */
/* XXX: BookE ppc64_rma_limit setup seems to disagree? */
if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E))
@@ -633,7 +719,7 @@ static void *__init alloc_stack(unsigned long limit, int cpu)
BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
- ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_SIZE,
+ ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_ALIGN,
MEMBLOCK_LOW_LIMIT, limit,
early_cpu_to_node(cpu));
if (!ptr)
@@ -658,7 +744,7 @@ void __init irqstack_early_init(void)
}
}
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
void __init exc_lvl_early_init(void)
{
unsigned int i;
@@ -691,7 +777,7 @@ void __init exc_lvl_early_init(void)
*/
void __init emergency_stack_init(void)
{
- u64 limit;
+ u64 limit, mce_limit;
unsigned int i;
/*
@@ -708,7 +794,16 @@ void __init emergency_stack_init(void)
* initialized in kernel/irq.c. These are initialized here in order
* to have emergency stacks available as early as possible.
*/
- limit = min(ppc64_bolted_size(), ppc64_rma_size);
+ limit = mce_limit = min(ppc64_bolted_size(), ppc64_rma_size);
+
+ /*
+ * Machine check on pseries calls rtas, but can't use the static
+ * rtas_args due to a machine check hitting while the lock is held.
+ * rtas args have to be under 4GB, so the machine check stack is
+ * limited to 4GB so args can be put on stack.
+ */
+ if (firmware_has_feature(FW_FEATURE_LPAR) && mce_limit > SZ_4G)
+ mce_limit = SZ_4G;
for_each_possible_cpu(i) {
paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
@@ -718,27 +813,12 @@ void __init emergency_stack_init(void)
paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
- paca_ptrs[i]->mc_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
+ paca_ptrs[i]->mc_emergency_sp = alloc_stack(mce_limit, i) + THREAD_SIZE;
#endif
}
}
#ifdef CONFIG_SMP
-#define PCPU_DYN_SIZE ()
-
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
-{
- return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
- MEMBLOCK_ALLOC_ACCESSIBLE,
- early_cpu_to_node(cpu));
-
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- memblock_free(__pa(ptr), size);
-}
-
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (early_cpu_to_node(from) == early_cpu_to_node(to))
@@ -747,6 +827,11 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
return REMOTE_DISTANCE;
}
+static __init int pcpu_cpu_to_node(int cpu)
+{
+ return early_cpu_to_node(cpu);
+}
+
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
@@ -756,20 +841,38 @@ void __init setup_per_cpu_areas(void)
size_t atom_size;
unsigned long delta;
unsigned int cpu;
- int rc;
+ int rc = -EINVAL;
/*
- * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
- * to group units. For larger mappings, use 1M atom which
- * should be large enough to contain a number of units.
+ * BookE and BookS radix are historical values and should be revisited.
*/
- if (mmu_linear_psize == MMU_PAGE_4K)
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
+ atom_size = SZ_1M;
+ } else if (radix_enabled()) {
atom_size = PAGE_SIZE;
- else
- atom_size = 1 << 20;
+ } else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) {
+ /*
+ * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
+ * to group units. For larger mappings, use 1M atom which
+ * should be large enough to contain a number of units.
+ */
+ if (mmu_linear_psize == MMU_PAGE_4K)
+ atom_size = PAGE_SIZE;
+ else
+ atom_size = SZ_1M;
+ }
+
+ if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+ rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+ pcpu_cpu_to_node);
+ if (rc)
+ pr_warn("PERCPU: %s allocator failed (%d), "
+ "falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
+ }
- rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+ if (rc < 0)
+ rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
@@ -781,7 +884,7 @@ void __init setup_per_cpu_areas(void)
}
#endif
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
unsigned long memory_block_size_bytes(void)
{
if (ppc_md.memory_block_size)
@@ -808,161 +911,22 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh)
* disable it by default. Book3S has a soft-nmi hardlockup detector based
* on the decrementer interrupt, so it does not suffer from this problem.
*
- * It is likely to get false positives in VM guests, so disable it there
- * by default too.
+ * It is likely to get false positives in KVM guests, so disable it there
+ * by default too. PowerVM will not stop or arbitrarily oversubscribe
+ * CPUs, but give a minimum regular allotment even with SPLPAR, so enable
+ * the detector for non-KVM guests, assume PowerVM.
*/
static int __init disable_hardlockup_detector(void)
{
#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
hardlockup_detector_disable();
#else
- if (firmware_has_feature(FW_FEATURE_LPAR))
- hardlockup_detector_disable();
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ if (is_kvm_guest())
+ hardlockup_detector_disable();
+ }
#endif
return 0;
}
early_initcall(disable_hardlockup_detector);
-
-#ifdef CONFIG_PPC_BOOK3S_64
-static enum l1d_flush_type enabled_flush_types;
-static void *l1d_flush_fallback_area;
-static bool no_rfi_flush;
-bool rfi_flush;
-
-static int __init handle_no_rfi_flush(char *p)
-{
- pr_info("rfi-flush: disabled on command line.");
- no_rfi_flush = true;
- return 0;
-}
-early_param("no_rfi_flush", handle_no_rfi_flush);
-
-/*
- * The RFI flush is not KPTI, but because users will see doco that says to use
- * nopti we hijack that option here to also disable the RFI flush.
- */
-static int __init handle_no_pti(char *p)
-{
- pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
- handle_no_rfi_flush(NULL);
- return 0;
-}
-early_param("nopti", handle_no_pti);
-
-static void do_nothing(void *unused)
-{
- /*
- * We don't need to do the flush explicitly, just enter+exit kernel is
- * sufficient, the RFI exit handlers will do the right thing.
- */
-}
-
-void rfi_flush_enable(bool enable)
-{
- if (enable) {
- do_rfi_flush_fixups(enabled_flush_types);
- on_each_cpu(do_nothing, NULL, 1);
- } else
- do_rfi_flush_fixups(L1D_FLUSH_NONE);
-
- rfi_flush = enable;
-}
-
-static void __ref init_fallback_flush(void)
-{
- u64 l1d_size, limit;
- int cpu;
-
- /* Only allocate the fallback flush area once (at boot time). */
- if (l1d_flush_fallback_area)
- return;
-
- l1d_size = ppc64_caches.l1d.size;
-
- /*
- * If there is no d-cache-size property in the device tree, l1d_size
- * could be zero. That leads to the loop in the asm wrapping around to
- * 2^64-1, and then walking off the end of the fallback area and
- * eventually causing a page fault which is fatal. Just default to
- * something vaguely sane.
- */
- if (!l1d_size)
- l1d_size = (64 * 1024);
-
- limit = min(ppc64_bolted_size(), ppc64_rma_size);
-
- /*
- * Align to L1d size, and size it at 2x L1d size, to catch possible
- * hardware prefetch runoff. We don't have a recipe for load patterns to
- * reliably avoid the prefetcher.
- */
- l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
- l1d_size, MEMBLOCK_LOW_LIMIT,
- limit, NUMA_NO_NODE);
- if (!l1d_flush_fallback_area)
- panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n",
- __func__, l1d_size * 2, l1d_size, &limit);
-
-
- for_each_possible_cpu(cpu) {
- struct paca_struct *paca = paca_ptrs[cpu];
- paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
- paca->l1d_flush_size = l1d_size;
- }
-}
-
-void setup_rfi_flush(enum l1d_flush_type types, bool enable)
-{
- if (types & L1D_FLUSH_FALLBACK) {
- pr_info("rfi-flush: fallback displacement flush available\n");
- init_fallback_flush();
- }
-
- if (types & L1D_FLUSH_ORI)
- pr_info("rfi-flush: ori type flush available\n");
-
- if (types & L1D_FLUSH_MTTRIG)
- pr_info("rfi-flush: mttrig type flush available\n");
-
- enabled_flush_types = types;
-
- if (!no_rfi_flush && !cpu_mitigations_off())
- rfi_flush_enable(enable);
-}
-
-#ifdef CONFIG_DEBUG_FS
-static int rfi_flush_set(void *data, u64 val)
-{
- bool enable;
-
- if (val == 1)
- enable = true;
- else if (val == 0)
- enable = false;
- else
- return -EINVAL;
-
- /* Only do anything if we're changing state */
- if (enable != rfi_flush)
- rfi_flush_enable(enable);
-
- return 0;
-}
-
-static int rfi_flush_get(void *data, u64 *val)
-{
- *val = rfi_flush ? 1 : 0;
- return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
-
-static __init int rfi_flush_debugfs_init(void)
-{
- debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
- return 0;
-}
-device_initcall(rfi_flush_debugfs_init);
-#endif
-#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index e6c30cee6abf..aa17e62f3754 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -1,15 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Common signal handling code for both 32 and 64 bits
*
* Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation
* Extracted from signal_32.c and signal_64.c
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file README.legal in the main directory of
- * this archive for more details.
*/
-#include <linux/tracehook.h>
+#include <linux/resume_user_mode.h>
#include <linux/signal.h>
#include <linux/uprobes.h>
#include <linux/key.h>
@@ -18,35 +15,163 @@
#include <linux/syscalls.h>
#include <asm/hw_breakpoint.h>
#include <linux/uaccess.h>
+#include <asm/switch_to.h>
#include <asm/unistd.h>
#include <asm/debug.h>
#include <asm/tm.h>
#include "signal.h"
+#ifdef CONFIG_VSX
+unsigned long copy_fpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+ u64 buf[ELF_NFPREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ buf[i] = task->thread.TS_FPR(i);
+ buf[i] = task->thread.fp_state.fpscr;
+ return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_fpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+ u64 buf[ELF_NFPREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ task->thread.TS_FPR(i) = buf[i];
+ task->thread.fp_state.fpscr = buf[i];
+
+ return 0;
+}
+
+unsigned long copy_vsx_to_user(void __user *to,
+ struct task_struct *task)
+{
+ u64 buf[ELF_NVSRHALFREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < ELF_NVSRHALFREG; i++)
+ buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+ return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_vsx_from_user(struct task_struct *task,
+ void __user *from)
+{
+ u64 buf[ELF_NVSRHALFREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < ELF_NVSRHALFREG ; i++)
+ task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+ return 0;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+unsigned long copy_ckfpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+ u64 buf[ELF_NFPREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ buf[i] = task->thread.TS_CKFPR(i);
+ buf[i] = task->thread.ckfp_state.fpscr;
+ return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_ckfpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+ u64 buf[ELF_NFPREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ task->thread.TS_CKFPR(i) = buf[i];
+ task->thread.ckfp_state.fpscr = buf[i];
+
+ return 0;
+}
+
+unsigned long copy_ckvsx_to_user(void __user *to,
+ struct task_struct *task)
+{
+ u64 buf[ELF_NVSRHALFREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < ELF_NVSRHALFREG; i++)
+ buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+ return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_ckvsx_from_user(struct task_struct *task,
+ void __user *from)
+{
+ u64 buf[ELF_NVSRHALFREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < ELF_NVSRHALFREG ; i++)
+ task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+ return 0;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#endif
+
/* Log an error when sending an unhandled signal to a process. Controlled
* through debug.exception-trace sysctl.
*/
int show_unhandled_signals = 1;
+unsigned long get_min_sigframe_size(void)
+{
+ if (IS_ENABLED(CONFIG_PPC64))
+ return get_min_sigframe_size_64();
+ else
+ return get_min_sigframe_size_32();
+}
+
+#ifdef CONFIG_COMPAT
+unsigned long get_min_sigframe_size_compat(void)
+{
+ return get_min_sigframe_size_32();
+}
+#endif
+
/*
* Allocate space for the signal frame
*/
-void __user *get_sigframe(struct ksignal *ksig, unsigned long sp,
- size_t frame_size, int is_32)
+static unsigned long get_tm_stackpointer(struct task_struct *tsk);
+
+void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk,
+ size_t frame_size, int is_32)
{
unsigned long oldsp, newsp;
+ unsigned long sp = get_tm_stackpointer(tsk);
/* Default to using normal stack */
- oldsp = get_clean_sp(sp, is_32);
+ if (is_32)
+ oldsp = sp & 0x0ffffffffUL;
+ else
+ oldsp = sp;
oldsp = sigsp(oldsp, ksig);
newsp = (oldsp - frame_size) & ~0xFUL;
- /* Check access */
- if (!access_ok((void __user *)newsp, oldsp - newsp))
- return NULL;
-
return (void __user *)newsp;
}
@@ -57,12 +182,21 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
int restart = 1;
/* syscall ? */
- if (TRAP(regs) != 0x0C00)
+ if (!trap_is_syscall(regs))
+ return;
+
+ if (trap_norestart(regs))
return;
/* error signalled ? */
- if (!(regs->ccr & 0x10000000))
+ if (trap_is_scv(regs)) {
+ /* 32-bit compat mode sign extend? */
+ if (!IS_ERR_VALUE(ret))
+ return;
+ ret = -ret;
+ } else if (!(regs->ccr & 0x10000000)) {
return;
+ }
switch (ret) {
case ERESTART_RESTARTBLOCK:
@@ -92,12 +226,17 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
regs->gpr[0] = __NR_restart_syscall;
else
regs->gpr[3] = regs->orig_gpr3;
- regs->nip -= 4;
+ regs_add_return_ip(regs, -4);
regs->result = 0;
} else {
- regs->result = -EINTR;
- regs->gpr[3] = EINTR;
- regs->ccr |= 0x10000000;
+ if (trap_is_scv(regs)) {
+ regs->result = -EINTR;
+ regs->gpr[3] = -EINTR;
+ } else {
+ regs->result = -EINTR;
+ regs->gpr[3] = EINTR;
+ regs->ccr |= 0x10000000;
+ }
}
}
@@ -106,7 +245,6 @@ static void do_signal(struct task_struct *tsk)
sigset_t *oldset = sigmask_to_save();
struct ksignal ksig = { .sig = 0 };
int ret;
- int is32 = is_32bit_task();
BUG_ON(tsk != current);
@@ -118,25 +256,30 @@ static void do_signal(struct task_struct *tsk)
if (ksig.sig <= 0) {
/* No signal to deliver -- put the saved sigmask back */
restore_saved_sigmask();
- tsk->thread.regs->trap = 0;
+ set_trap_norestart(tsk->thread.regs);
return; /* no signals delivered */
}
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
/*
* Reenable the DABR before delivering the signal to
* user space. The DABR will have been cleared if it
* triggered inside the kernel.
*/
- if (tsk->thread.hw_brk.address && tsk->thread.hw_brk.type)
- __set_breakpoint(&tsk->thread.hw_brk);
-#endif
+ if (!IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) {
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (tsk->thread.hw_brk[i].address && tsk->thread.hw_brk[i].type)
+ __set_breakpoint(i, &tsk->thread.hw_brk[i]);
+ }
+ }
+
/* Re-enable the breakpoints for the signal stack */
thread_change_pc(tsk, tsk->thread.regs);
rseq_signal_deliver(&ksig, tsk->thread.regs);
- if (is32) {
+ if (is_32bit_task()) {
if (ksig.ka.sa.sa_flags & SA_SIGINFO)
ret = handle_rt_signal32(&ksig, oldset, tsk);
else
@@ -145,38 +288,28 @@ static void do_signal(struct task_struct *tsk)
ret = handle_rt_signal64(&ksig, oldset, tsk);
}
- tsk->thread.regs->trap = 0;
+ set_trap_norestart(tsk->thread.regs);
signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP));
}
void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
{
- user_exit();
-
- /* Check valid addr_limit, TIF check is done there */
- addr_limit_user_check();
-
if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
if (thread_info_flags & _TIF_PATCH_PENDING)
klp_update_patch_state(current);
- if (thread_info_flags & _TIF_SIGPENDING) {
+ if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) {
BUG_ON(regs != current->thread.regs);
do_signal(current);
}
- if (thread_info_flags & _TIF_NOTIFY_RESUME) {
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
- rseq_handle_notify_resume(NULL, regs);
- }
-
- user_enter();
+ if (thread_info_flags & _TIF_NOTIFY_RESUME)
+ resume_user_mode_work(regs);
}
-unsigned long get_tm_stackpointer(struct task_struct *tsk)
+static unsigned long get_tm_stackpointer(struct task_struct *tsk)
{
/* When in an active transaction that takes a signal, we need to be
* careful with the stack. It's possible that the stack has moved back
@@ -199,15 +332,39 @@ unsigned long get_tm_stackpointer(struct task_struct *tsk)
* For signals taken in non-TM or suspended mode, we use the
* normal/non-checkpointed stack pointer.
*/
+ struct pt_regs *regs = tsk->thread.regs;
+ unsigned long ret = regs->gpr[1];
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BUG_ON(tsk != current);
- if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) {
+ if (MSR_TM_ACTIVE(regs->msr)) {
+ preempt_disable();
tm_reclaim_current(TM_CAUSE_SIGNAL);
- if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr))
- return tsk->thread.ckpt_regs.gpr[1];
+ if (MSR_TM_TRANSACTIONAL(regs->msr))
+ ret = tsk->thread.ckpt_regs.gpr[1];
+
+ /*
+ * If we treclaim, we must clear the current thread's TM bits
+ * before re-enabling preemption. Otherwise we might be
+ * preempted and have the live MSR[TS] changed behind our back
+ * (tm_recheckpoint_new_task() would recheckpoint). Besides, we
+ * enter the signal handler in non-transactional state.
+ */
+ regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
+ preempt_enable();
}
#endif
- return tsk->thread.regs->gpr[1];
+ return ret;
+}
+
+static const char fm32[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %08lx lr %08lx\n";
+static const char fm64[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %016lx lr %016lx\n";
+
+void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
+ const char *where, void __user *ptr)
+{
+ if (show_unhandled_signals)
+ printk_ratelimited(regs->msr & MSR_64BIT ? fm64 : fm32, tsk->comm,
+ task_pid_nr(tsk), where, ptr, regs->nip, regs->link);
}
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 800433685888..58ecea1cdc27 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -1,19 +1,14 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
* Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation
* Extracted from signal_32.c and signal_64.c
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file README.legal in the main directory of
- * this archive for more details.
*/
#ifndef _POWERPC_ARCH_SIGNAL_H
#define _POWERPC_ARCH_SIGNAL_H
-extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
-
-extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp,
- size_t frame_size, int is_32);
+void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk,
+ size_t frame_size, int is_32);
extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
struct task_struct *tsk);
@@ -21,15 +16,20 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
struct task_struct *tsk);
-extern unsigned long copy_fpr_to_user(void __user *to,
- struct task_struct *task);
-extern unsigned long copy_ckfpr_to_user(void __user *to,
- struct task_struct *task);
-extern unsigned long copy_fpr_from_user(struct task_struct *task,
- void __user *from);
-extern unsigned long copy_ckfpr_from_user(struct task_struct *task,
- void __user *from);
-extern unsigned long get_tm_stackpointer(struct task_struct *tsk);
+static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src)
+{
+ BUILD_BUG_ON(sizeof(sigset_t) != sizeof(u64));
+
+ return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]);
+}
+#define unsafe_get_user_sigset(dst, src, label) do { \
+ sigset_t *__dst = dst; \
+ const sigset_t __user *__src = src; \
+ int i; \
+ \
+ for (i = 0; i < _NSIG_WORDS; i++) \
+ unsafe_get_user(__dst->sig[i], &__src->sig[i], label); \
+} while (0)
#ifdef CONFIG_VSX
extern unsigned long copy_vsx_to_user(void __user *to,
@@ -40,6 +40,150 @@ extern unsigned long copy_vsx_from_user(struct task_struct *task,
void __user *from);
extern unsigned long copy_ckvsx_from_user(struct task_struct *task,
void __user *from);
+unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task);
+unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task);
+unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from);
+unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
+
+#define unsafe_copy_fpr_to_user(to, task, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)to; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NFPREG - 1 ; i++) \
+ unsafe_put_user(__t->thread.TS_FPR(i), &buf[i], label); \
+ unsafe_put_user(__t->thread.fp_state.fpscr, &buf[i], label); \
+} while (0)
+
+#define unsafe_copy_vsx_to_user(to, task, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)to; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NVSRHALFREG ; i++) \
+ unsafe_put_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \
+ &buf[i], label);\
+} while (0)
+
+#define unsafe_copy_fpr_from_user(task, from, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)from; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NFPREG - 1; i++) \
+ unsafe_get_user(__t->thread.TS_FPR(i), &buf[i], label); \
+ unsafe_get_user(__t->thread.fp_state.fpscr, &buf[i], label); \
+} while (0)
+
+#define unsafe_copy_vsx_from_user(task, from, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)from; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NVSRHALFREG ; i++) \
+ unsafe_get_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \
+ &buf[i], label); \
+} while (0)
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define unsafe_copy_ckfpr_to_user(to, task, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)to; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NFPREG - 1 ; i++) \
+ unsafe_put_user(__t->thread.TS_CKFPR(i), &buf[i], label);\
+ unsafe_put_user(__t->thread.ckfp_state.fpscr, &buf[i], label); \
+} while (0)
+
+#define unsafe_copy_ckvsx_to_user(to, task, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)to; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NVSRHALFREG ; i++) \
+ unsafe_put_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \
+ &buf[i], label);\
+} while (0)
+
+#define unsafe_copy_ckfpr_from_user(task, from, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)from; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NFPREG - 1 ; i++) \
+ unsafe_get_user(__t->thread.TS_CKFPR(i), &buf[i], label);\
+ unsafe_get_user(__t->thread.ckfp_state.fpscr, &buf[i], failed); \
+} while (0)
+
+#define unsafe_copy_ckvsx_from_user(task, from, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)from; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NVSRHALFREG ; i++) \
+ unsafe_get_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \
+ &buf[i], label); \
+} while (0)
+#endif
+#elif defined(CONFIG_PPC_FPU_REGS)
+
+#define unsafe_copy_fpr_to_user(to, task, label) \
+ unsafe_copy_to_user(to, (task)->thread.fp_state.fpr, \
+ ELF_NFPREG * sizeof(double), label)
+
+#define unsafe_copy_fpr_from_user(task, from, label) \
+ unsafe_copy_from_user((task)->thread.fp_state.fpr, from, \
+ ELF_NFPREG * sizeof(double), label)
+
+static inline unsigned long
+copy_fpr_to_user(void __user *to, struct task_struct *task)
+{
+ return __copy_to_user(to, task->thread.fp_state.fpr,
+ ELF_NFPREG * sizeof(double));
+}
+
+static inline unsigned long
+copy_fpr_from_user(struct task_struct *task, void __user *from)
+{
+ return __copy_from_user(task->thread.fp_state.fpr, from,
+ ELF_NFPREG * sizeof(double));
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define unsafe_copy_ckfpr_to_user(to, task, label) \
+ unsafe_copy_to_user(to, (task)->thread.ckfp_state.fpr, \
+ ELF_NFPREG * sizeof(double), label)
+
+inline unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task)
+{
+ return __copy_to_user(to, task->thread.ckfp_state.fpr,
+ ELF_NFPREG * sizeof(double));
+}
+
+static inline unsigned long
+copy_ckfpr_from_user(struct task_struct *task, void __user *from)
+{
+ return __copy_from_user(task->thread.ckfp_state.fpr, from,
+ ELF_NFPREG * sizeof(double));
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#else
+#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0)
+
+#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0)
+
+static inline unsigned long
+copy_fpr_to_user(void __user *to, struct task_struct *task)
+{
+ return 0;
+}
+
+static inline unsigned long
+copy_fpr_from_user(struct task_struct *task, void __user *from)
+{
+ return 0;
+}
#endif
#ifdef CONFIG_PPC64
@@ -49,9 +193,6 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
#else /* CONFIG_PPC64 */
-extern long sys_rt_sigreturn(void);
-extern long sys_sigreturn(void);
-
static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
struct task_struct *tsk)
{
@@ -60,4 +201,7 @@ static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
#endif /* !defined(CONFIG_PPC64) */
+void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
+ const char *where, void __user *ptr);
+
#endif /* _POWERPC_ARCH_SIGNAL_H */
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 98600b276f76..7a718ed32b27 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -43,11 +43,10 @@
#include <asm/tm.h>
#include <asm/asm-prototypes.h>
#ifdef CONFIG_PPC64
-#include "ppc32.h"
+#include <asm/syscalls_32.h>
#include <asm/unistd.h>
#else
#include <asm/ucontext.h>
-#include <asm/pgtable.h>
#endif
#include "signal.h"
@@ -59,8 +58,6 @@
#define mcontext mcontext32
#define ucontext ucontext32
-#define __save_altstack __compat_save_altstack
-
/*
* Userspace code may pass a ucontext which doesn't include VSX added
* at the end. We need to check for this case.
@@ -85,47 +82,35 @@
* Functions for flipping sigsets (thanks to brain dead generic
* implementation that makes things simple for little endian only)
*/
-static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
-{
- return put_compat_sigset(uset, set, sizeof(*uset));
-}
-
-static inline int get_sigset_t(sigset_t *set,
- const compat_sigset_t __user *uset)
-{
- return get_compat_sigset(set, uset);
-}
+#define unsafe_put_sigset_t unsafe_put_compat_sigset
+#define unsafe_get_sigset_t unsafe_get_compat_sigset
#define to_user_ptr(p) ptr_to_compat(p)
#define from_user_ptr(p) compat_ptr(p)
-static inline int save_general_regs(struct pt_regs *regs,
- struct mcontext __user *frame)
+static __always_inline int
+__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame)
{
elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
- int i;
- /* Force usr to alway see softe as 1 (interrupts enabled) */
- elf_greg_t64 softe = 0x1;
-
- WARN_ON(!FULL_REGS(regs));
+ int val, i;
for (i = 0; i <= PT_RESULT; i ++) {
- if (i == 14 && !FULL_REGS(regs))
- i = 32;
- if ( i == PT_SOFTE) {
- if(__put_user((unsigned int)softe, &frame->mc_gregs[i]))
- return -EFAULT;
- else
- continue;
- }
- if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
- return -EFAULT;
+ /* Force usr to alway see softe as 1 (interrupts enabled) */
+ if (i == PT_SOFTE)
+ val = 1;
+ else
+ val = gregs[i];
+
+ unsafe_put_user(val, &frame->mc_gregs[i], failed);
}
return 0;
+
+failed:
+ return 1;
}
-static inline int restore_general_regs(struct pt_regs *regs,
- struct mcontext __user *sr)
+static __always_inline int
+__unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr)
{
elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
int i;
@@ -133,51 +118,67 @@ static inline int restore_general_regs(struct pt_regs *regs,
for (i = 0; i <= PT_RESULT; i++) {
if ((i == PT_MSR) || (i == PT_SOFTE))
continue;
- if (__get_user(gregs[i], &sr->mc_gregs[i]))
- return -EFAULT;
+ unsafe_get_user(gregs[i], &sr->mc_gregs[i], failed);
}
return 0;
+
+failed:
+ return 1;
}
#else /* CONFIG_PPC64 */
#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
-static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set)
-{
- return copy_to_user(uset, set, sizeof(*uset));
-}
+#define unsafe_put_sigset_t(uset, set, label) do { \
+ sigset_t __user *__us = uset ; \
+ const sigset_t *__s = set; \
+ \
+ unsafe_copy_to_user(__us, __s, sizeof(*__us), label); \
+} while (0)
-static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset)
-{
- return copy_from_user(set, uset, sizeof(*uset));
-}
+#define unsafe_get_sigset_t unsafe_get_user_sigset
#define to_user_ptr(p) ((unsigned long)(p))
#define from_user_ptr(p) ((void __user *)(p))
-static inline int save_general_regs(struct pt_regs *regs,
- struct mcontext __user *frame)
+static __always_inline int
+__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame)
{
- WARN_ON(!FULL_REGS(regs));
- return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE);
+ unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed);
+ return 0;
+
+failed:
+ return 1;
}
-static inline int restore_general_regs(struct pt_regs *regs,
- struct mcontext __user *sr)
+static __always_inline
+int __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr)
{
/* copy up to but not including MSR */
- if (__copy_from_user(regs, &sr->mc_gregs,
- PT_MSR * sizeof(elf_greg_t)))
- return -EFAULT;
+ unsafe_copy_from_user(regs, &sr->mc_gregs, PT_MSR * sizeof(elf_greg_t), failed);
+
/* copy from orig_r3 (the word after the MSR) up to the end */
- if (__copy_from_user(&regs->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3],
- GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t)))
- return -EFAULT;
+ unsafe_copy_from_user(&regs->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3],
+ GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t), failed);
+
return 0;
+
+failed:
+ return 1;
}
#endif
+#define unsafe_save_general_regs(regs, frame, label) do { \
+ if (__unsafe_save_general_regs(regs, frame)) \
+ goto label; \
+} while (0)
+
+#define unsafe_restore_general_regs(regs, frame, label) do { \
+ if (__unsafe_restore_general_regs(regs, frame)) \
+ goto label; \
+} while (0)
+
/*
* When we have signals to deliver, we set up on the
* user stack, going down from the original stack pointer:
@@ -204,9 +205,6 @@ struct sigframe {
int abigap[56];
};
-/* We use the mc_pad field for the signal return trampoline. */
-#define tramp mc_pad
-
/*
* When we have rt signals to deliver, we set up on the
* user stack, going down from the original stack pointer:
@@ -235,171 +233,51 @@ struct rt_sigframe {
int abigap[56];
};
-#ifdef CONFIG_VSX
-unsigned long copy_fpr_to_user(void __user *to,
- struct task_struct *task)
+unsigned long get_min_sigframe_size_32(void)
{
- u64 buf[ELF_NFPREG];
- int i;
-
- /* save FPR copy to local buffer then write to the thread_struct */
- for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- buf[i] = task->thread.TS_FPR(i);
- buf[i] = task->thread.fp_state.fpscr;
- return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+ return max(sizeof(struct rt_sigframe) + __SIGNAL_FRAMESIZE + 16,
+ sizeof(struct sigframe) + __SIGNAL_FRAMESIZE);
}
-unsigned long copy_fpr_from_user(struct task_struct *task,
- void __user *from)
-{
- u64 buf[ELF_NFPREG];
- int i;
-
- if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
- return 1;
- for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- task->thread.TS_FPR(i) = buf[i];
- task->thread.fp_state.fpscr = buf[i];
-
- return 0;
-}
-
-unsigned long copy_vsx_to_user(void __user *to,
- struct task_struct *task)
-{
- u64 buf[ELF_NVSRHALFREG];
- int i;
-
- /* save FPR copy to local buffer then write to the thread_struct */
- for (i = 0; i < ELF_NVSRHALFREG; i++)
- buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
- return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
-}
-
-unsigned long copy_vsx_from_user(struct task_struct *task,
- void __user *from)
-{
- u64 buf[ELF_NVSRHALFREG];
- int i;
-
- if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
- return 1;
- for (i = 0; i < ELF_NVSRHALFREG ; i++)
- task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
- return 0;
-}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-unsigned long copy_ckfpr_to_user(void __user *to,
- struct task_struct *task)
-{
- u64 buf[ELF_NFPREG];
- int i;
-
- /* save FPR copy to local buffer then write to the thread_struct */
- for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- buf[i] = task->thread.TS_CKFPR(i);
- buf[i] = task->thread.ckfp_state.fpscr;
- return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
-}
-
-unsigned long copy_ckfpr_from_user(struct task_struct *task,
- void __user *from)
-{
- u64 buf[ELF_NFPREG];
- int i;
-
- if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
- return 1;
- for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- task->thread.TS_CKFPR(i) = buf[i];
- task->thread.ckfp_state.fpscr = buf[i];
-
- return 0;
-}
-
-unsigned long copy_ckvsx_to_user(void __user *to,
- struct task_struct *task)
-{
- u64 buf[ELF_NVSRHALFREG];
- int i;
-
- /* save FPR copy to local buffer then write to the thread_struct */
- for (i = 0; i < ELF_NVSRHALFREG; i++)
- buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
- return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
-}
-
-unsigned long copy_ckvsx_from_user(struct task_struct *task,
- void __user *from)
-{
- u64 buf[ELF_NVSRHALFREG];
- int i;
-
- if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
- return 1;
- for (i = 0; i < ELF_NVSRHALFREG ; i++)
- task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
- return 0;
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-#else
-inline unsigned long copy_fpr_to_user(void __user *to,
- struct task_struct *task)
-{
- return __copy_to_user(to, task->thread.fp_state.fpr,
- ELF_NFPREG * sizeof(double));
-}
-
-inline unsigned long copy_fpr_from_user(struct task_struct *task,
- void __user *from)
-{
- return __copy_from_user(task->thread.fp_state.fpr, from,
- ELF_NFPREG * sizeof(double));
-}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-inline unsigned long copy_ckfpr_to_user(void __user *to,
- struct task_struct *task)
-{
- return __copy_to_user(to, task->thread.ckfp_state.fpr,
- ELF_NFPREG * sizeof(double));
-}
-
-inline unsigned long copy_ckfpr_from_user(struct task_struct *task,
- void __user *from)
-{
- return __copy_from_user(task->thread.ckfp_state.fpr, from,
- ELF_NFPREG * sizeof(double));
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-#endif
-
/*
* Save the current user registers on the user stack.
* We only save the altivec/spe registers if the process has used
* altivec/spe instructions at some point.
*/
-static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
- struct mcontext __user *tm_frame, int sigret,
- int ctx_has_vsx_region)
+static void prepare_save_user_regs(int ctx_has_vsx_region)
{
- unsigned long msr = regs->msr;
-
/* Make sure floating point registers are stored in regs */
flush_fp_to_thread(current);
+#ifdef CONFIG_ALTIVEC
+ if (current->thread.used_vr)
+ flush_altivec_to_thread(current);
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ current->thread.vrsave = mfspr(SPRN_VRSAVE);
+#endif
+#ifdef CONFIG_VSX
+ if (current->thread.used_vsr && ctx_has_vsx_region)
+ flush_vsx_to_thread(current);
+#endif
+#ifdef CONFIG_SPE
+ if (current->thread.used_spe)
+ flush_spe_to_thread(current);
+#endif
+}
+
+static __always_inline int
+__unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
+ struct mcontext __user *tm_frame, int ctx_has_vsx_region)
+{
+ unsigned long msr = regs->msr;
/* save general registers */
- if (save_general_regs(regs, frame))
- return 1;
+ unsafe_save_general_regs(regs, frame, failed);
#ifdef CONFIG_ALTIVEC
/* save altivec registers */
if (current->thread.used_vr) {
- flush_altivec_to_thread(current);
- if (__copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
- ELF_NVRREG * sizeof(vector128)))
- return 1;
+ unsafe_copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
+ ELF_NVRREG * sizeof(vector128), failed);
/* set MSR_VEC in the saved MSR value to indicate that
frame->mc_vregs contains valid data */
msr |= MSR_VEC;
@@ -412,13 +290,10 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
* most significant bits of that same vector. --BenH
* Note that the current VRSAVE value is in the SPR at this point.
*/
- if (cpu_has_feature(CPU_FTR_ALTIVEC))
- current->thread.vrsave = mfspr(SPRN_VRSAVE);
- if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32]))
- return 1;
+ unsafe_put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32],
+ failed);
#endif /* CONFIG_ALTIVEC */
- if (copy_fpr_to_user(&frame->mc_fregs, current))
- return 1;
+ unsafe_copy_fpr_to_user(&frame->mc_fregs, current, failed);
/*
* Clear the MSR VSX bit to indicate there is no valid state attached
@@ -433,19 +308,15 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
* contains valid data
*/
if (current->thread.used_vsr && ctx_has_vsx_region) {
- flush_vsx_to_thread(current);
- if (copy_vsx_to_user(&frame->mc_vsregs, current))
- return 1;
+ unsafe_copy_vsx_to_user(&frame->mc_vsregs, current, failed);
msr |= MSR_VSX;
}
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
/* save spe registers */
if (current->thread.used_spe) {
- flush_spe_to_thread(current);
- if (__copy_to_user(&frame->mc_vregs, current->thread.evr,
- ELF_NEVRREG * sizeof(u32)))
- return 1;
+ unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr,
+ ELF_NEVRREG * sizeof(u32), failed);
/* set MSR_SPE in the saved MSR value to indicate that
frame->mc_vregs contains valid data */
msr |= MSR_SPE;
@@ -453,30 +324,29 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
/* else assert((regs->msr & MSR_SPE) == 0) */
/* We always copy to/from spefscr */
- if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG))
- return 1;
+ unsafe_put_user(current->thread.spefscr,
+ (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed);
#endif /* CONFIG_SPE */
- if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
- return 1;
+ unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed);
+
/* We need to write 0 the MSR top 32 bits in the tm frame so that we
* can check it on the restore to see if TM is active
*/
- if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR]))
- return 1;
-
- if (sigret) {
- /* Set up the sigreturn trampoline: li 0,sigret; sc */
- if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0])
- || __put_user(PPC_INST_SC, &frame->tramp[1]))
- return 1;
- flush_icache_range((unsigned long) &frame->tramp[0],
- (unsigned long) &frame->tramp[2]);
- }
+ if (tm_frame)
+ unsafe_put_user(0, &tm_frame->mc_gregs[PT_MSR], failed);
return 0;
+
+failed:
+ return 1;
}
+#define unsafe_save_user_regs(regs, frame, tm_frame, has_vsx, label) do { \
+ if (__unsafe_save_user_regs(regs, frame, tm_frame, has_vsx)) \
+ goto label; \
+} while (0)
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
* Save the current user registers on the user stack.
@@ -485,27 +355,23 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
* We also save the transactional registers to a second ucontext in the
* frame.
*
- * See save_user_regs() and signal_64.c:setup_tm_sigcontexts().
+ * See __unsafe_save_user_regs() and signal_64.c:setup_tm_sigcontexts().
*/
-static int save_tm_user_regs(struct pt_regs *regs,
- struct mcontext __user *frame,
- struct mcontext __user *tm_frame, int sigret)
+static void prepare_save_tm_user_regs(void)
{
- unsigned long msr = regs->msr;
-
WARN_ON(tm_suspend_disabled);
- /* Remove TM bits from thread's MSR. The MSR in the sigcontext
- * just indicates to userland that we were doing a transaction, but we
- * don't want to return in transactional state. This also ensures
- * that flush_fp_to_thread won't set TIF_RESTORE_TM again.
- */
- regs->msr &= ~MSR_TS_MASK;
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ current->thread.ckvrsave = mfspr(SPRN_VRSAVE);
+}
+static __always_inline int
+save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
+ struct mcontext __user *tm_frame, unsigned long msr)
+{
/* Save both sets of general registers */
- if (save_general_regs(&current->thread.ckpt_regs, frame)
- || save_general_regs(regs, tm_frame))
- return 1;
+ unsafe_save_general_regs(&current->thread.ckpt_regs, frame, failed);
+ unsafe_save_general_regs(regs, tm_frame, failed);
/* Stash the top half of the 64bit MSR into the 32bit MSR word
* of the transactional mcontext. This way we have a backward-compatible
@@ -513,26 +379,20 @@ static int save_tm_user_regs(struct pt_regs *regs,
* also look at what type of transaction (T or S) was active at the
* time of the signal.
*/
- if (__put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR]))
- return 1;
+ unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed);
-#ifdef CONFIG_ALTIVEC
/* save altivec registers */
if (current->thread.used_vr) {
- if (__copy_to_user(&frame->mc_vregs, &current->thread.ckvr_state,
- ELF_NVRREG * sizeof(vector128)))
- return 1;
- if (msr & MSR_VEC) {
- if (__copy_to_user(&tm_frame->mc_vregs,
- &current->thread.vr_state,
- ELF_NVRREG * sizeof(vector128)))
- return 1;
- } else {
- if (__copy_to_user(&tm_frame->mc_vregs,
- &current->thread.ckvr_state,
- ELF_NVRREG * sizeof(vector128)))
- return 1;
- }
+ unsafe_copy_to_user(&frame->mc_vregs, &current->thread.ckvr_state,
+ ELF_NVRREG * sizeof(vector128), failed);
+ if (msr & MSR_VEC)
+ unsafe_copy_to_user(&tm_frame->mc_vregs,
+ &current->thread.vr_state,
+ ELF_NVRREG * sizeof(vector128), failed);
+ else
+ unsafe_copy_to_user(&tm_frame->mc_vregs,
+ &current->thread.ckvr_state,
+ ELF_NVRREG * sizeof(vector128), failed);
/* set MSR_VEC in the saved MSR value to indicate that
* frame->mc_vregs contains valid data
@@ -545,33 +405,21 @@ static int save_tm_user_regs(struct pt_regs *regs,
* significant bits of a vector, we "cheat" and stuff VRSAVE in the
* most significant bits of that same vector. --BenH
*/
- if (cpu_has_feature(CPU_FTR_ALTIVEC))
- current->thread.ckvrsave = mfspr(SPRN_VRSAVE);
- if (__put_user(current->thread.ckvrsave,
- (u32 __user *)&frame->mc_vregs[32]))
- return 1;
- if (msr & MSR_VEC) {
- if (__put_user(current->thread.vrsave,
- (u32 __user *)&tm_frame->mc_vregs[32]))
- return 1;
- } else {
- if (__put_user(current->thread.ckvrsave,
- (u32 __user *)&tm_frame->mc_vregs[32]))
- return 1;
- }
-#endif /* CONFIG_ALTIVEC */
+ unsafe_put_user(current->thread.ckvrsave,
+ (u32 __user *)&frame->mc_vregs[32], failed);
+ if (msr & MSR_VEC)
+ unsafe_put_user(current->thread.vrsave,
+ (u32 __user *)&tm_frame->mc_vregs[32], failed);
+ else
+ unsafe_put_user(current->thread.ckvrsave,
+ (u32 __user *)&tm_frame->mc_vregs[32], failed);
- if (copy_ckfpr_to_user(&frame->mc_fregs, current))
- return 1;
- if (msr & MSR_FP) {
- if (copy_fpr_to_user(&tm_frame->mc_fregs, current))
- return 1;
- } else {
- if (copy_ckfpr_to_user(&tm_frame->mc_fregs, current))
- return 1;
- }
+ unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed);
+ if (msr & MSR_FP)
+ unsafe_copy_fpr_to_user(&tm_frame->mc_fregs, current, failed);
+ else
+ unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed);
-#ifdef CONFIG_VSX
/*
* Copy VSR 0-31 upper half from thread_struct to local
* buffer, then write that to userspace. Also set MSR_VSX in
@@ -579,54 +427,38 @@ static int save_tm_user_regs(struct pt_regs *regs,
* contains valid data
*/
if (current->thread.used_vsr) {
- if (copy_ckvsx_to_user(&frame->mc_vsregs, current))
- return 1;
- if (msr & MSR_VSX) {
- if (copy_vsx_to_user(&tm_frame->mc_vsregs,
- current))
- return 1;
- } else {
- if (copy_ckvsx_to_user(&tm_frame->mc_vsregs, current))
- return 1;
- }
+ unsafe_copy_ckvsx_to_user(&frame->mc_vsregs, current, failed);
+ if (msr & MSR_VSX)
+ unsafe_copy_vsx_to_user(&tm_frame->mc_vsregs, current, failed);
+ else
+ unsafe_copy_ckvsx_to_user(&tm_frame->mc_vsregs, current, failed);
msr |= MSR_VSX;
}
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
- /* SPE regs are not checkpointed with TM, so this section is
- * simply the same as in save_user_regs().
- */
- if (current->thread.used_spe) {
- flush_spe_to_thread(current);
- if (__copy_to_user(&frame->mc_vregs, current->thread.evr,
- ELF_NEVRREG * sizeof(u32)))
- return 1;
- /* set MSR_SPE in the saved MSR value to indicate that
- * frame->mc_vregs contains valid data */
- msr |= MSR_SPE;
- }
- /* We always copy to/from spefscr */
- if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG))
- return 1;
-#endif /* CONFIG_SPE */
+ unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed);
- if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
- return 1;
- if (sigret) {
- /* Set up the sigreturn trampoline: li 0,sigret; sc */
- if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0])
- || __put_user(PPC_INST_SC, &frame->tramp[1]))
- return 1;
- flush_icache_range((unsigned long) &frame->tramp[0],
- (unsigned long) &frame->tramp[2]);
- }
+ return 0;
+
+failed:
+ return 1;
+}
+#else
+static void prepare_save_tm_user_regs(void) { }
+static __always_inline int
+save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
+ struct mcontext __user *tm_frame, unsigned long msr)
+{
return 0;
}
#endif
+#define unsafe_save_tm_user_regs(regs, frame, tm_frame, msr, label) do { \
+ if (save_tm_user_regs_unsafe(regs, frame, tm_frame, msr)) \
+ goto label; \
+} while (0)
+
/*
* Restore the current user register values from the user stack,
* (except for MSR).
@@ -634,69 +466,64 @@ static int save_tm_user_regs(struct pt_regs *regs,
static long restore_user_regs(struct pt_regs *regs,
struct mcontext __user *sr, int sig)
{
- long err;
unsigned int save_r2 = 0;
unsigned long msr;
#ifdef CONFIG_VSX
int i;
#endif
+ if (!user_read_access_begin(sr, sizeof(*sr)))
+ return 1;
/*
* restore general registers but not including MSR or SOFTE. Also
* take care of keeping r2 (TLS) intact if not a signal
*/
if (!sig)
save_r2 = (unsigned int)regs->gpr[2];
- err = restore_general_regs(regs, sr);
- regs->trap = 0;
- err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
+ unsafe_restore_general_regs(regs, sr, failed);
+ set_trap_norestart(regs);
+ unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
if (!sig)
regs->gpr[2] = (unsigned long) save_r2;
- if (err)
- return 1;
/* if doing signal return, restore the previous little-endian mode */
if (sig)
- regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
#ifdef CONFIG_ALTIVEC
/*
* Force the process to reload the altivec registers from
* current->thread when it next does altivec instructions
*/
- regs->msr &= ~MSR_VEC;
+ regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
if (msr & MSR_VEC) {
/* restore altivec registers from the stack */
- if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
- sizeof(sr->mc_vregs)))
- return 1;
+ unsafe_copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
+ sizeof(sr->mc_vregs), failed);
current->thread.used_vr = true;
} else if (current->thread.used_vr)
memset(&current->thread.vr_state, 0,
ELF_NVRREG * sizeof(vector128));
/* Always get VRSAVE back */
- if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32]))
- return 1;
+ unsafe_get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32], failed);
if (cpu_has_feature(CPU_FTR_ALTIVEC))
mtspr(SPRN_VRSAVE, current->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
- if (copy_fpr_from_user(current, &sr->mc_fregs))
- return 1;
+ unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
#ifdef CONFIG_VSX
/*
* Force the process to reload the VSX registers from
* current->thread when it next does VSX instruction.
*/
- regs->msr &= ~MSR_VSX;
+ regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
if (msr & MSR_VSX) {
/*
* Restore altivec registers from the stack to a local
* buffer, then write this out to the thread_struct
*/
- if (copy_vsx_from_user(current, &sr->mc_vsregs))
- return 1;
+ unsafe_copy_vsx_from_user(current, &sr->mc_vsregs, failed);
current->thread.used_vsr = true;
} else if (current->thread.used_vsr)
for (i = 0; i < 32 ; i++)
@@ -706,27 +533,34 @@ static long restore_user_regs(struct pt_regs *regs,
* force the process to reload the FP registers from
* current->thread when it next does FP instructions
*/
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
#ifdef CONFIG_SPE
- /* force the process to reload the spe registers from
- current->thread when it next does spe instructions */
- regs->msr &= ~MSR_SPE;
+ /*
+ * Force the process to reload the spe registers from
+ * current->thread when it next does spe instructions.
+ * Since this is user ABI, we must enforce the sizing.
+ */
+ BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32));
+ regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
if (msr & MSR_SPE) {
/* restore spe registers from the stack */
- if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
- ELF_NEVRREG * sizeof(u32)))
- return 1;
+ unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs,
+ sizeof(current->thread.spe), failed);
current->thread.used_spe = true;
} else if (current->thread.used_spe)
- memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
+ memset(&current->thread.spe, 0, sizeof(current->thread.spe));
/* Always get SPEFSCR back */
- if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG))
- return 1;
+ unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
#endif /* CONFIG_SPE */
+ user_read_access_end();
return 0;
+
+failed:
+ user_read_access_end();
+ return 1;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -739,11 +573,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
struct mcontext __user *sr,
struct mcontext __user *tm_sr)
{
- long err;
unsigned long msr, msr_hi;
-#ifdef CONFIG_VSX
int i;
-#endif
if (tm_suspend_disabled)
return 1;
@@ -754,28 +585,21 @@ static long restore_tm_user_regs(struct pt_regs *regs,
* TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR
* were set by the signal delivery.
*/
- err = restore_general_regs(regs, tm_sr);
- err |= restore_general_regs(&current->thread.ckpt_regs, sr);
-
- err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]);
-
- err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
- if (err)
+ if (!user_read_access_begin(sr, sizeof(*sr)))
return 1;
+ unsafe_restore_general_regs(&current->thread.ckpt_regs, sr, failed);
+ unsafe_get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP], failed);
+ unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
+
/* Restore the previous little-endian mode */
- regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
-#ifdef CONFIG_ALTIVEC
- regs->msr &= ~MSR_VEC;
+ regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
if (msr & MSR_VEC) {
/* restore altivec registers from the stack */
- if (__copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
- sizeof(sr->mc_vregs)) ||
- __copy_from_user(&current->thread.vr_state,
- &tm_sr->mc_vregs,
- sizeof(sr->mc_vregs)))
- return 1;
+ unsafe_copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
+ sizeof(sr->mc_vregs), failed);
current->thread.used_vr = true;
} else if (current->thread.used_vr) {
memset(&current->thread.vr_state, 0,
@@ -785,62 +609,62 @@ static long restore_tm_user_regs(struct pt_regs *regs,
}
/* Always get VRSAVE back */
- if (__get_user(current->thread.ckvrsave,
- (u32 __user *)&sr->mc_vregs[32]) ||
- __get_user(current->thread.vrsave,
- (u32 __user *)&tm_sr->mc_vregs[32]))
- return 1;
+ unsafe_get_user(current->thread.ckvrsave,
+ (u32 __user *)&sr->mc_vregs[32], failed);
if (cpu_has_feature(CPU_FTR_ALTIVEC))
mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
-#endif /* CONFIG_ALTIVEC */
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
- if (copy_fpr_from_user(current, &sr->mc_fregs) ||
- copy_ckfpr_from_user(current, &tm_sr->mc_fregs))
- return 1;
+ unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
-#ifdef CONFIG_VSX
- regs->msr &= ~MSR_VSX;
+ regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
if (msr & MSR_VSX) {
/*
* Restore altivec registers from the stack to a local
* buffer, then write this out to the thread_struct
*/
- if (copy_vsx_from_user(current, &tm_sr->mc_vsregs) ||
- copy_ckvsx_from_user(current, &sr->mc_vsregs))
- return 1;
+ unsafe_copy_ckvsx_from_user(current, &sr->mc_vsregs, failed);
current->thread.used_vsr = true;
} else if (current->thread.used_vsr)
for (i = 0; i < 32 ; i++) {
current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
}
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
- /* SPE regs are not checkpointed with TM, so this section is
- * simply the same as in restore_user_regs().
- */
- regs->msr &= ~MSR_SPE;
- if (msr & MSR_SPE) {
- if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
- ELF_NEVRREG * sizeof(u32)))
- return 1;
- current->thread.used_spe = true;
- } else if (current->thread.used_spe)
- memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
+ user_read_access_end();
- /* Always get SPEFSCR back */
- if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs
- + ELF_NEVRREG))
+ if (!user_read_access_begin(tm_sr, sizeof(*tm_sr)))
return 1;
-#endif /* CONFIG_SPE */
+
+ unsafe_restore_general_regs(regs, tm_sr, failed);
+
+ /* restore altivec registers from the stack */
+ if (msr & MSR_VEC)
+ unsafe_copy_from_user(&current->thread.vr_state, &tm_sr->mc_vregs,
+ sizeof(sr->mc_vregs), failed);
+
+ /* Always get VRSAVE back */
+ unsafe_get_user(current->thread.vrsave,
+ (u32 __user *)&tm_sr->mc_vregs[32], failed);
+
+ unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed);
+
+ if (msr & MSR_VSX) {
+ /*
+ * Restore altivec registers from the stack to a local
+ * buffer, then write this out to the thread_struct
+ */
+ unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed);
+ current->thread.used_vsr = true;
+ }
/* Get the top half of the MSR from the user context */
- if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
- return 1;
+ unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed);
msr_hi <<= 32;
+
+ user_read_access_end();
+
/* If TM bits are set to the reserved value, it's an invalid context */
if (MSR_TM_RESV(msr_hi))
return 1;
@@ -861,7 +685,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
*
* Pull in the MSR TM bits from the user context
*/
- regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK));
/* Now, recheckpoint. This loads up all of the checkpointed (older)
* registers, including FP and V[S]Rs. After recheckpointing, the
* transactional versions should be loaded.
@@ -876,18 +700,26 @@ static long restore_tm_user_regs(struct pt_regs *regs,
msr_check_and_set(msr & (MSR_FP | MSR_VEC));
if (msr & MSR_FP) {
load_fp_state(&current->thread.fp_state);
- regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+ regs_set_return_msr(regs, regs->msr | (MSR_FP | current->thread.fpexc_mode));
}
-#ifdef CONFIG_ALTIVEC
if (msr & MSR_VEC) {
load_vr_state(&current->thread.vr_state);
- regs->msr |= MSR_VEC;
+ regs_set_return_msr(regs, regs->msr | MSR_VEC);
}
-#endif
preempt_enable();
return 0;
+
+failed:
+ user_read_access_end();
+ return 1;
+}
+#else
+static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr,
+ struct mcontext __user *tm_sr)
+{
+ return 0;
}
#endif
@@ -904,92 +736,183 @@ static long restore_tm_user_regs(struct pt_regs *regs,
int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
struct task_struct *tsk)
{
- struct rt_sigframe __user *rt_sf;
- struct mcontext __user *frame;
- struct mcontext __user *tm_frame = NULL;
- void __user *addr;
+ struct rt_sigframe __user *frame;
+ struct mcontext __user *mctx;
+ struct mcontext __user *tm_mctx = NULL;
unsigned long newsp = 0;
- int sigret;
unsigned long tramp;
struct pt_regs *regs = tsk->thread.regs;
-
- BUG_ON(tsk != current);
+ /* Save the thread's msr before get_tm_stackpointer() changes it */
+ unsigned long msr = regs->msr;
/* Set up Signal Frame */
- /* Put a Real Time Context onto stack */
- rt_sf = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*rt_sf), 1);
- addr = rt_sf;
- if (unlikely(rt_sf == NULL))
+ frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
+ mctx = &frame->uc.uc_mcontext;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ tm_mctx = &frame->uc_transact.uc_mcontext;
+#endif
+ if (MSR_TM_ACTIVE(msr))
+ prepare_save_tm_user_regs();
+ else
+ prepare_save_user_regs(1);
+
+ if (!user_access_begin(frame, sizeof(*frame)))
goto badframe;
/* Put the siginfo & fill in most of the ucontext */
- if (copy_siginfo_to_user(&rt_sf->info, &ksig->info)
- || __put_user(0, &rt_sf->uc.uc_flags)
- || __save_altstack(&rt_sf->uc.uc_stack, regs->gpr[1])
- || __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext),
- &rt_sf->uc.uc_regs)
- || put_sigset_t(&rt_sf->uc.uc_sigmask, oldset))
- goto badframe;
+ unsafe_put_user(0, &frame->uc.uc_flags, failed);
+#ifdef CONFIG_PPC64
+ unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed);
+#else
+ unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed);
+#endif
+ unsafe_put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs, failed);
+
+ if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ unsafe_put_user((unsigned long)&frame->uc_transact,
+ &frame->uc.uc_link, failed);
+ unsafe_put_user((unsigned long)tm_mctx,
+ &frame->uc_transact.uc_regs, failed);
+#endif
+ unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed);
+ } else {
+ unsafe_put_user(0, &frame->uc.uc_link, failed);
+ unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed);
+ }
/* Save user registers on the stack */
- frame = &rt_sf->uc.uc_mcontext;
- addr = frame;
- if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) {
- sigret = 0;
- tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp;
+ if (tsk->mm->context.vdso) {
+ tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32);
} else {
- sigret = __NR_rt_sigreturn;
- tramp = (unsigned long) frame->tramp;
+ tramp = (unsigned long)mctx->mc_pad;
+ unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0], failed);
+ unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
+ asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
}
+ unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed);
+
+ user_access_end();
+
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
+ goto badframe;
+ regs->link = tramp;
+
+#ifdef CONFIG_PPC_FPU_REGS
+ tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
+#endif
+
+ /* create a stack frame for the caller of the handler */
+ newsp = ((unsigned long)frame) - (__SIGNAL_FRAMESIZE + 16);
+ if (put_user(regs->gpr[1], (u32 __user *)newsp))
+ goto badframe;
+
+ /* Fill registers for signal handler */
+ regs->gpr[1] = newsp;
+ regs->gpr[3] = ksig->sig;
+ regs->gpr[4] = (unsigned long)&frame->info;
+ regs->gpr[5] = (unsigned long)&frame->uc;
+ regs->gpr[6] = (unsigned long)frame;
+ regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
+ /* enter the signal handler in native-endian mode */
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
+
+ return 0;
+
+failed:
+ user_access_end();
+
+badframe:
+ signal_fault(tsk, regs, "handle_rt_signal32", frame);
+
+ return 1;
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+ struct task_struct *tsk)
+{
+ struct sigcontext __user *sc;
+ struct sigframe __user *frame;
+ struct mcontext __user *mctx;
+ struct mcontext __user *tm_mctx = NULL;
+ unsigned long newsp = 0;
+ unsigned long tramp;
+ struct pt_regs *regs = tsk->thread.regs;
+ /* Save the thread's msr before get_tm_stackpointer() changes it */
+ unsigned long msr = regs->msr;
+
+ /* Set up Signal Frame */
+ frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
+ mctx = &frame->mctx;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- tm_frame = &rt_sf->uc_transact.uc_mcontext;
- if (MSR_TM_ACTIVE(regs->msr)) {
- if (__put_user((unsigned long)&rt_sf->uc_transact,
- &rt_sf->uc.uc_link) ||
- __put_user((unsigned long)tm_frame,
- &rt_sf->uc_transact.uc_regs))
- goto badframe;
- if (save_tm_user_regs(regs, frame, tm_frame, sigret))
- goto badframe;
- }
+ tm_mctx = &frame->mctx_transact;
+#endif
+ if (MSR_TM_ACTIVE(msr))
+ prepare_save_tm_user_regs();
else
+ prepare_save_user_regs(1);
+
+ if (!user_access_begin(frame, sizeof(*frame)))
+ goto badframe;
+ sc = (struct sigcontext __user *) &frame->sctx;
+
+#if _NSIG != 64
+#error "Please adjust handle_signal()"
#endif
- {
- if (__put_user(0, &rt_sf->uc.uc_link))
- goto badframe;
- if (save_user_regs(regs, frame, tm_frame, sigret, 1))
- goto badframe;
+ unsafe_put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler, failed);
+ unsafe_put_user(oldset->sig[0], &sc->oldmask, failed);
+#ifdef CONFIG_PPC64
+ unsafe_put_user((oldset->sig[0] >> 32), &sc->_unused[3], failed);
+#else
+ unsafe_put_user(oldset->sig[1], &sc->_unused[3], failed);
+#endif
+ unsafe_put_user(to_user_ptr(mctx), &sc->regs, failed);
+ unsafe_put_user(ksig->sig, &sc->signal, failed);
+
+ if (MSR_TM_ACTIVE(msr))
+ unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed);
+ else
+ unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed);
+
+ if (tsk->mm->context.vdso) {
+ tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32);
+ } else {
+ tramp = (unsigned long)mctx->mc_pad;
+ unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0], failed);
+ unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
+ asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
}
+ user_access_end();
+
regs->link = tramp;
+#ifdef CONFIG_PPC_FPU_REGS
tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
+#endif
/* create a stack frame for the caller of the handler */
- newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16);
- addr = (void __user *)regs->gpr[1];
+ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
if (put_user(regs->gpr[1], (u32 __user *)newsp))
goto badframe;
- /* Fill registers for signal handler */
regs->gpr[1] = newsp;
regs->gpr[3] = ksig->sig;
- regs->gpr[4] = (unsigned long) &rt_sf->info;
- regs->gpr[5] = (unsigned long) &rt_sf->uc;
- regs->gpr[6] = (unsigned long) rt_sf;
- regs->nip = (unsigned long) ksig->ka.sa.sa_handler;
+ regs->gpr[4] = (unsigned long) sc;
+ regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
/* enter the signal handler in native-endian mode */
- regs->msr &= ~MSR_LE;
- regs->msr |= (MSR_KERNEL & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
+
return 0;
+failed:
+ user_access_end();
+
badframe:
- if (show_unhandled_signals)
- printk_ratelimited(KERN_INFO
- "%s[%d]: bad frame in handle_rt_signal32: "
- "%p nip %08lx lr %08lx\n",
- tsk->comm, tsk->pid,
- addr, regs->nip, regs->link);
+ signal_fault(tsk, regs, "handle_signal32", frame);
return 1;
}
@@ -999,28 +922,31 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int
sigset_t set;
struct mcontext __user *mcp;
- if (get_sigset_t(&set, &ucp->uc_sigmask))
+ if (!user_read_access_begin(ucp, sizeof(*ucp)))
return -EFAULT;
+
+ unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
#ifdef CONFIG_PPC64
{
u32 cmcp;
- if (__get_user(cmcp, &ucp->uc_regs))
- return -EFAULT;
+ unsafe_get_user(cmcp, &ucp->uc_regs, failed);
mcp = (struct mcontext __user *)(u64)cmcp;
- /* no need to check access_ok(mcp), since mcp < 4GB */
}
#else
- if (__get_user(mcp, &ucp->uc_regs))
- return -EFAULT;
- if (!access_ok(mcp, sizeof(*mcp)))
- return -EFAULT;
+ unsafe_get_user(mcp, &ucp->uc_regs, failed);
#endif
+ user_read_access_end();
+
set_current_blocked(&set);
if (restore_user_regs(regs, mcp, sig))
return -EFAULT;
return 0;
+
+failed:
+ user_read_access_end();
+ return -EFAULT;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -1034,11 +960,15 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
u32 cmcp;
u32 tm_cmcp;
- if (get_sigset_t(&set, &ucp->uc_sigmask))
+ if (!user_read_access_begin(ucp, sizeof(*ucp)))
return -EFAULT;
- if (__get_user(cmcp, &ucp->uc_regs) ||
- __get_user(tm_cmcp, &tm_ucp->uc_regs))
+ unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
+ unsafe_get_user(cmcp, &ucp->uc_regs, failed);
+
+ user_read_access_end();
+
+ if (__get_user(tm_cmcp, &tm_ucp->uc_regs))
return -EFAULT;
mcp = (struct mcontext __user *)(u64)cmcp;
tm_mcp = (struct mcontext __user *)(u64)tm_cmcp;
@@ -1049,6 +979,10 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
return -EFAULT;
return 0;
+
+failed:
+ user_read_access_end();
+ return -EFAULT;
}
#endif
@@ -1116,16 +1050,18 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
*/
mctx = (struct mcontext __user *)
((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
- if (!access_ok(old_ctx, ctx_size)
- || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region)
- || put_sigset_t(&old_ctx->uc_sigmask, &current->blocked)
- || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs))
+ prepare_save_user_regs(ctx_has_vsx_region);
+ if (!user_write_access_begin(old_ctx, ctx_size))
return -EFAULT;
+ unsafe_save_user_regs(regs, mctx, NULL, ctx_has_vsx_region, failed);
+ unsafe_put_sigset_t(&old_ctx->uc_sigmask, &current->blocked, failed);
+ unsafe_put_user(to_user_ptr(mctx), &old_ctx->uc_regs, failed);
+ user_write_access_end();
}
if (new_ctx == NULL)
return 0;
if (!access_ok(new_ctx, ctx_size) ||
- fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
+ fault_in_readable((char __user *)new_ctx, ctx_size))
return -EFAULT;
/*
@@ -1139,11 +1075,17 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
* or if another thread unmaps the region containing the context.
* We kill the task with a SIGSEGV in this situation.
*/
- if (do_setcontext(new_ctx, regs, 0))
- do_exit(SIGSEGV);
+ if (do_setcontext(new_ctx, regs, 0)) {
+ force_exit_sig(SIGSEGV);
+ return -EFAULT;
+ }
set_thread_flag(TIF_RESTOREALL);
return 0;
+
+failed:
+ user_write_access_end();
+ return -EFAULT;
}
#ifdef CONFIG_PPC64
@@ -1215,7 +1157,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
* set, and recheckpoint was not called. This avoid
* hitting a TM Bad thing at RFID
*/
- regs->msr &= ~MSR_TS_MASK;
+ regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
}
/* Fall through, for non-TM restore */
#endif
@@ -1241,12 +1183,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
return 0;
bad:
- if (show_unhandled_signals)
- printk_ratelimited(KERN_INFO
- "%s[%d]: bad frame in sys_rt_sigreturn: "
- "%p nip %08lx lr %08lx\n",
- current->comm, current->pid,
- rt_sf, regs->nip, regs->link);
+ signal_fault(current, regs, "sys_rt_sigreturn", rt_sf);
force_sig(SIGSEGV);
return 0;
@@ -1309,13 +1246,13 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
affect the contents of these registers. After this point,
failure is a problem, anyway, and it's very unlikely unless
the user is really doing something wrong. */
- regs->msr = new_msr;
+ regs_set_return_msr(regs, new_msr);
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
current->thread.debug.dbcr0 = new_dbcr0;
#endif
if (!access_ok(ctx, sizeof(*ctx)) ||
- fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx)))
+ fault_in_readable((char __user *)ctx, sizeof(*ctx)))
return -EFAULT;
/*
@@ -1330,12 +1267,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
* We kill the task with a SIGSEGV in this situation.
*/
if (do_setcontext(ctx, regs, 1)) {
- if (show_unhandled_signals)
- printk_ratelimited(KERN_INFO "%s[%d]: bad frame in "
- "sys_debug_setcontext: %p nip %08lx "
- "lr %08lx\n",
- current->comm, current->pid,
- ctx, regs->nip, regs->link);
+ signal_fault(current, regs, "sys_debug_setcontext", ctx);
force_sig(SIGSEGV);
goto out;
@@ -1357,92 +1289,6 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
#endif
/*
- * OK, we're invoking a handler
- */
-int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
- struct task_struct *tsk)
-{
- struct sigcontext __user *sc;
- struct sigframe __user *frame;
- struct mcontext __user *tm_mctx = NULL;
- unsigned long newsp = 0;
- int sigret;
- unsigned long tramp;
- struct pt_regs *regs = tsk->thread.regs;
-
- BUG_ON(tsk != current);
-
- /* Set up Signal Frame */
- frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 1);
- if (unlikely(frame == NULL))
- goto badframe;
- sc = (struct sigcontext __user *) &frame->sctx;
-
-#if _NSIG != 64
-#error "Please adjust handle_signal()"
-#endif
- if (__put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler)
- || __put_user(oldset->sig[0], &sc->oldmask)
-#ifdef CONFIG_PPC64
- || __put_user((oldset->sig[0] >> 32), &sc->_unused[3])
-#else
- || __put_user(oldset->sig[1], &sc->_unused[3])
-#endif
- || __put_user(to_user_ptr(&frame->mctx), &sc->regs)
- || __put_user(ksig->sig, &sc->signal))
- goto badframe;
-
- if (vdso32_sigtramp && tsk->mm->context.vdso_base) {
- sigret = 0;
- tramp = tsk->mm->context.vdso_base + vdso32_sigtramp;
- } else {
- sigret = __NR_sigreturn;
- tramp = (unsigned long) frame->mctx.tramp;
- }
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- tm_mctx = &frame->mctx_transact;
- if (MSR_TM_ACTIVE(regs->msr)) {
- if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact,
- sigret))
- goto badframe;
- }
- else
-#endif
- {
- if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1))
- goto badframe;
- }
-
- regs->link = tramp;
-
- tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
-
- /* create a stack frame for the caller of the handler */
- newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
- if (put_user(regs->gpr[1], (u32 __user *)newsp))
- goto badframe;
-
- regs->gpr[1] = newsp;
- regs->gpr[3] = ksig->sig;
- regs->gpr[4] = (unsigned long) sc;
- regs->nip = (unsigned long) (unsigned long)ksig->ka.sa.sa_handler;
- /* enter the signal handler in big-endian mode */
- regs->msr &= ~MSR_LE;
- return 0;
-
-badframe:
- if (show_unhandled_signals)
- printk_ratelimited(KERN_INFO
- "%s[%d]: bad frame in handle_signal32: "
- "%p nip %08lx lr %08lx\n",
- tsk->comm, tsk->pid,
- frame, regs->nip, regs->link);
-
- return 1;
-}
-
-/*
* Do a signal return; undo the signal stack.
*/
#ifdef CONFIG_PPC64
@@ -1456,19 +1302,16 @@ SYSCALL_DEFINE0(sigreturn)
struct sigcontext __user *sc;
struct sigcontext sigctx;
struct mcontext __user *sr;
- void __user *addr;
sigset_t set;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- struct mcontext __user *mcp, *tm_mcp;
- unsigned long msr_hi;
-#endif
+ struct mcontext __user *mcp;
+ struct mcontext __user *tm_mcp = NULL;
+ unsigned long long msr_hi = 0;
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
sc = &sf->sctx;
- addr = sc;
if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
goto badframe;
@@ -1484,36 +1327,32 @@ SYSCALL_DEFINE0(sigreturn)
#endif
set_current_blocked(&set);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
mcp = (struct mcontext __user *)&sf->mctx;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
goto badframe;
+#endif
if (MSR_TM_ACTIVE(msr_hi<<32)) {
if (!cpu_has_feature(CPU_FTR_TM))
goto badframe;
if (restore_tm_user_regs(regs, mcp, tm_mcp))
goto badframe;
- } else
-#endif
- {
+ } else {
sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
- addr = sr;
- if (!access_ok(sr, sizeof(*sr))
- || restore_user_regs(regs, sr, 1))
- goto badframe;
+ if (restore_user_regs(regs, sr, 1)) {
+ signal_fault(current, regs, "sys_sigreturn", sr);
+
+ force_sig(SIGSEGV);
+ return 0;
+ }
}
set_thread_flag(TIF_RESTOREALL);
return 0;
badframe:
- if (show_unhandled_signals)
- printk_ratelimited(KERN_INFO
- "%s[%d]: bad frame in sys_sigreturn: "
- "%p nip %08lx lr %08lx\n",
- current->comm, current->pid,
- addr, regs->nip, regs->link);
+ signal_fault(current, regs, "sys_sigreturn", sc);
force_sig(SIGSEGV);
return 0;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 117515564ec7..86bb5bb4c143 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -21,11 +21,11 @@
#include <linux/ptrace.h>
#include <linux/ratelimit.h>
#include <linux/syscalls.h>
+#include <linux/pagemap.h>
#include <asm/sigcontext.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
-#include <asm/pgtable.h>
#include <asm/unistd.h>
#include <asm/cacheflush.h>
#include <asm/syscalls.h>
@@ -40,8 +40,8 @@
#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
#define FP_REGS_SIZE sizeof(elf_fpregset_t)
-#define TRAMP_TRACEBACK 3
-#define TRAMP_SIZE 6
+#define TRAMP_TRACEBACK 4
+#define TRAMP_SIZE 7
/*
* When we have signals to deliver, we set up on the user stack,
@@ -66,10 +66,10 @@ struct rt_sigframe {
char abigap[USER_REDZONE_SIZE];
} __attribute__ ((aligned (16)));
-static const char fmt32[] = KERN_INFO \
- "%s[%d]: bad frame in %s: %08lx nip %08lx lr %08lx\n";
-static const char fmt64[] = KERN_INFO \
- "%s[%d]: bad frame in %s: %016lx nip %016lx lr %016lx\n";
+unsigned long get_min_sigframe_size_64(void)
+{
+ return sizeof(struct rt_sigframe) + __SIGNAL_FRAMESIZE;
+}
/*
* This computes a quad word aligned pointer inside the vmx_reserve array
@@ -84,13 +84,36 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
}
#endif
+static void prepare_setup_sigcontext(struct task_struct *tsk)
+{
+#ifdef CONFIG_ALTIVEC
+ /* save altivec registers */
+ if (tsk->thread.used_vr)
+ flush_altivec_to_thread(tsk);
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ tsk->thread.vrsave = mfspr(SPRN_VRSAVE);
+#endif /* CONFIG_ALTIVEC */
+
+ flush_fp_to_thread(tsk);
+
+#ifdef CONFIG_VSX
+ if (tsk->thread.used_vsr)
+ flush_vsx_to_thread(tsk);
+#endif /* CONFIG_VSX */
+}
+
/*
* Set up the sigcontext for the signal frame.
*/
-static long setup_sigcontext(struct sigcontext __user *sc,
- struct task_struct *tsk, int signr, sigset_t *set,
- unsigned long handler, int ctx_has_vsx_region)
+#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region, label)\
+do { \
+ if (__unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region))\
+ goto label; \
+} while (0)
+static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc,
+ struct task_struct *tsk, int signr, sigset_t *set,
+ unsigned long handler, int ctx_has_vsx_region)
{
/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
* process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -102,25 +125,22 @@ static long setup_sigcontext(struct sigcontext __user *sc,
*/
#ifdef CONFIG_ALTIVEC
elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
- unsigned long vrsave;
#endif
struct pt_regs *regs = tsk->thread.regs;
unsigned long msr = regs->msr;
- long err = 0;
- /* Force usr to alway see softe as 1 (interrupts enabled) */
+ /* Force usr to always see softe as 1 (interrupts enabled) */
unsigned long softe = 0x1;
BUG_ON(tsk != current);
#ifdef CONFIG_ALTIVEC
- err |= __put_user(v_regs, &sc->v_regs);
+ unsafe_put_user(v_regs, &sc->v_regs, efault_out);
/* save altivec registers */
if (tsk->thread.used_vr) {
- flush_altivec_to_thread(tsk);
/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
- err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
- 33 * sizeof(vector128));
+ unsafe_copy_to_user(v_regs, &tsk->thread.vr_state,
+ 33 * sizeof(vector128), efault_out);
/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
* contains valid data.
*/
@@ -129,19 +149,12 @@ static long setup_sigcontext(struct sigcontext __user *sc,
/* We always copy to/from vrsave, it's 0 if we don't have or don't
* use altivec.
*/
- vrsave = 0;
- if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
- vrsave = mfspr(SPRN_VRSAVE);
- tsk->thread.vrsave = vrsave;
- }
-
- err |= __put_user(vrsave, (u32 __user *)&v_regs[33]);
+ unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
#else /* CONFIG_ALTIVEC */
- err |= __put_user(0, &sc->v_regs);
+ unsafe_put_user(0, &sc->v_regs, efault_out);
#endif /* CONFIG_ALTIVEC */
- flush_fp_to_thread(tsk);
/* copy fpr regs and fpscr */
- err |= copy_fpr_to_user(&sc->fp_regs, tsk);
+ unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out);
/*
* Clear the MSR VSX bit to indicate there is no valid state attached
@@ -155,26 +168,27 @@ static long setup_sigcontext(struct sigcontext __user *sc,
* VMX data.
*/
if (tsk->thread.used_vsr && ctx_has_vsx_region) {
- flush_vsx_to_thread(tsk);
v_regs += ELF_NVRREG;
- err |= copy_vsx_to_user(v_regs, tsk);
+ unsafe_copy_vsx_to_user(v_regs, tsk, efault_out);
/* set MSR_VSX in the MSR value in the frame to
* indicate that sc->vs_reg) contains valid data.
*/
msr |= MSR_VSX;
}
#endif /* CONFIG_VSX */
- err |= __put_user(&sc->gp_regs, &sc->regs);
- WARN_ON(!FULL_REGS(regs));
- err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
- err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
- err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]);
- err |= __put_user(signr, &sc->signal);
- err |= __put_user(handler, &sc->handler);
+ unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out);
+ unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out);
+ unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out);
+ unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out);
+ unsafe_put_user(signr, &sc->signal, efault_out);
+ unsafe_put_user(handler, &sc->handler, efault_out);
if (set != NULL)
- err |= __put_user(set->sig[0], &sc->oldmask);
+ unsafe_put_user(set->sig[0], &sc->oldmask, efault_out);
- return err;
+ return 0;
+
+efault_out:
+ return -EFAULT;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -192,7 +206,8 @@ static long setup_sigcontext(struct sigcontext __user *sc,
static long setup_tm_sigcontexts(struct sigcontext __user *sc,
struct sigcontext __user *tm_sc,
struct task_struct *tsk,
- int signr, sigset_t *set, unsigned long handler)
+ int signr, sigset_t *set, unsigned long handler,
+ unsigned long msr)
{
/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
* process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -207,12 +222,11 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc);
#endif
struct pt_regs *regs = tsk->thread.regs;
- unsigned long msr = tsk->thread.regs->msr;
long err = 0;
BUG_ON(tsk != current);
- BUG_ON(!MSR_TM_ACTIVE(regs->msr));
+ BUG_ON(!MSR_TM_ACTIVE(msr));
WARN_ON(tm_suspend_disabled);
@@ -222,13 +236,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
*/
msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX);
- /* Remove TM bits from thread's MSR. The MSR in the sigcontext
- * just indicates to userland that we were doing a transaction, but we
- * don't want to return in transactional state. This also ensures
- * that flush_fp_to_thread won't set TIF_RESTORE_TM again.
- */
- regs->msr &= ~MSR_TS_MASK;
-
#ifdef CONFIG_ALTIVEC
err |= __put_user(v_regs, &sc->v_regs);
err |= __put_user(tm_v_regs, &tm_sc->v_regs);
@@ -306,7 +313,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
err |= __put_user(&sc->gp_regs, &sc->regs);
err |= __put_user(&tm_sc->gp_regs, &tm_sc->regs);
- WARN_ON(!FULL_REGS(regs));
err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE);
err |= __copy_to_user(&sc->gp_regs,
&tsk->thread.ckpt_regs, GP_REGS_SIZE);
@@ -324,14 +330,16 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
/*
* Restore the sigcontext from the signal frame.
*/
-
-static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
- struct sigcontext __user *sc)
+#define unsafe_restore_sigcontext(tsk, set, sig, sc, label) do { \
+ if (__unsafe_restore_sigcontext(tsk, set, sig, sc)) \
+ goto label; \
+} while (0)
+static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_t *set,
+ int sig, struct sigcontext __user *sc)
{
#ifdef CONFIG_ALTIVEC
elf_vrreg_t __user *v_regs;
#endif
- unsigned long err = 0;
unsigned long save_r13 = 0;
unsigned long msr;
struct pt_regs *regs = tsk->thread.regs;
@@ -346,59 +354,60 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
save_r13 = regs->gpr[13];
/* copy the GPRs */
- err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr));
- err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]);
+ unsafe_copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr), efault_out);
+ unsafe_get_user(regs->nip, &sc->gp_regs[PT_NIP], efault_out);
/* get MSR separately, transfer the LE bit if doing signal return */
- err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+ unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out);
if (sig)
- regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
- err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]);
- err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]);
- err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]);
- err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]);
- err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]);
- /* skip SOFTE */
- regs->trap = 0;
- err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
- err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
- err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
+ unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out);
+ unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out);
+ unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out);
+ unsafe_get_user(regs->xer, &sc->gp_regs[PT_XER], efault_out);
+ unsafe_get_user(regs->ccr, &sc->gp_regs[PT_CCR], efault_out);
+ /* Don't allow userspace to set SOFTE */
+ set_trap_norestart(regs);
+ unsafe_get_user(regs->dar, &sc->gp_regs[PT_DAR], efault_out);
+ unsafe_get_user(regs->dsisr, &sc->gp_regs[PT_DSISR], efault_out);
+ unsafe_get_user(regs->result, &sc->gp_regs[PT_RESULT], efault_out);
if (!sig)
regs->gpr[13] = save_r13;
if (set != NULL)
- err |= __get_user(set->sig[0], &sc->oldmask);
+ unsafe_get_user(set->sig[0], &sc->oldmask, efault_out);
/*
- * Force reload of FP/VEC.
- * This has to be done before copying stuff into tsk->thread.fpr/vr
- * for the reasons explained in the previous comment.
+ * Force reload of FP/VEC/VSX so userspace sees any changes.
+ * Clear these bits from the user process' MSR before copying into the
+ * thread struct. If we are rescheduled or preempted and another task
+ * uses FP/VEC/VSX, and this process has the MSR bits set, then the
+ * context switch code will save the current CPU state into the
+ * thread_struct - possibly overwriting the data we are updating here.
*/
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX));
#ifdef CONFIG_ALTIVEC
- err |= __get_user(v_regs, &sc->v_regs);
- if (err)
- return err;
+ unsafe_get_user(v_regs, &sc->v_regs, efault_out);
if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
return -EFAULT;
/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
if (v_regs != NULL && (msr & MSR_VEC) != 0) {
- err |= __copy_from_user(&tsk->thread.vr_state, v_regs,
- 33 * sizeof(vector128));
+ unsafe_copy_from_user(&tsk->thread.vr_state, v_regs,
+ 33 * sizeof(vector128), efault_out);
tsk->thread.used_vr = true;
} else if (tsk->thread.used_vr) {
memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
}
/* Always get VRSAVE back */
if (v_regs != NULL)
- err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
+ unsafe_get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
else
tsk->thread.vrsave = 0;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
/* restore floating point */
- err |= copy_fpr_from_user(tsk, &sc->fp_regs);
+ unsafe_copy_fpr_from_user(tsk, &sc->fp_regs, efault_out);
#ifdef CONFIG_VSX
/*
* Get additional VSX data. Update v_regs to point after the
@@ -407,14 +416,17 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
*/
v_regs += ELF_NVRREG;
if ((msr & MSR_VSX) != 0) {
- err |= copy_vsx_from_user(tsk, v_regs);
+ unsafe_copy_vsx_from_user(tsk, v_regs, efault_out);
tsk->thread.used_vsr = true;
} else {
for (i = 0; i < 32 ; i++)
tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
}
#endif
- return err;
+ return 0;
+
+efault_out:
+ return -EFAULT;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -464,7 +476,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
return -EINVAL;
/* pull in MSR LE from user context */
- regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
/* The following non-GPR non-FPR non-VR state is also checkpointed: */
err |= __get_user(regs->ctr, &tm_sc->gp_regs[PT_CTR]);
@@ -479,9 +491,9 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
&sc->gp_regs[PT_XER]);
err |= __get_user(tsk->thread.ckpt_regs.ccr,
&sc->gp_regs[PT_CCR]);
-
+ /* Don't allow userspace to set SOFTE */
+ set_trap_norestart(regs);
/* These regs are not checkpointed; they can go in 'regs'. */
- err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]);
err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
@@ -491,7 +503,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
* This has to be done before copying stuff into tsk->thread.fpr/vr
* for the reasons explained in the previous comment.
*/
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX));
#ifdef CONFIG_ALTIVEC
err |= __get_user(v_regs, &sc->v_regs);
@@ -561,7 +573,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
preempt_disable();
/* pull in MSR TS bits from user context */
- regs->msr |= msr & MSR_TS_MASK;
+ regs_set_return_msr(regs, regs->msr | (msr & MSR_TS_MASK));
/*
* Ensure that TM is enabled in regs->msr before we leave the signal
@@ -579,7 +591,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
* to be de-scheduled with MSR[TS] set but without calling
* tm_recheckpoint(). This can cause a bug.
*/
- regs->msr |= MSR_TM;
+ regs_set_return_msr(regs, regs->msr | MSR_TM);
/* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(&tsk->thread);
@@ -587,17 +599,23 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
msr_check_and_set(msr & (MSR_FP | MSR_VEC));
if (msr & MSR_FP) {
load_fp_state(&tsk->thread.fp_state);
- regs->msr |= (MSR_FP | tsk->thread.fpexc_mode);
+ regs_set_return_msr(regs, regs->msr | (MSR_FP | tsk->thread.fpexc_mode));
}
if (msr & MSR_VEC) {
load_vr_state(&tsk->thread.vr_state);
- regs->msr |= MSR_VEC;
+ regs_set_return_msr(regs, regs->msr | MSR_VEC);
}
preempt_enable();
return err;
}
+#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
+static long restore_tm_sigcontexts(struct task_struct *tsk, struct sigcontext __user *sc,
+ struct sigcontext __user *tm_sc)
+{
+ return -EINVAL;
+}
#endif
/*
@@ -608,13 +626,12 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
int i;
long err = 0;
- /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */
- err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) |
- (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]);
- /* li r0, __NR_[rt_]sigreturn| */
- err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[1]);
- /* sc */
- err |= __put_user(PPC_INST_SC, &tramp[2]);
+ /* Call the handler and pop the dummy stackframe*/
+ err |= __put_user(PPC_RAW_BCTRL(), &tramp[0]);
+ err |= __put_user(PPC_RAW_ADDI(_R1, _R1, __SIGNAL_FRAMESIZE), &tramp[1]);
+
+ err |= __put_user(PPC_RAW_LI(_R0, syscall), &tramp[2]);
+ err |= __put_user(PPC_RAW_SC(), &tramp[3]);
/* Minimal traceback info */
for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++)
@@ -640,7 +657,6 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
struct ucontext __user *, new_ctx, long, ctx_size)
{
- unsigned char tmp;
sigset_t set;
unsigned long new_msr = 0;
int ctx_has_vsx_region = 0;
@@ -666,18 +682,21 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
ctx_has_vsx_region = 1;
if (old_ctx != NULL) {
- if (!access_ok(old_ctx, ctx_size)
- || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
- ctx_has_vsx_region)
- || __copy_to_user(&old_ctx->uc_sigmask,
- &current->blocked, sizeof(sigset_t)))
+ prepare_setup_sigcontext(current);
+ if (!user_write_access_begin(old_ctx, ctx_size))
return -EFAULT;
+
+ unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
+ 0, ctx_has_vsx_region, efault_out);
+ unsafe_copy_to_user(&old_ctx->uc_sigmask, &current->blocked,
+ sizeof(sigset_t), efault_out);
+
+ user_write_access_end();
}
if (new_ctx == NULL)
return 0;
- if (!access_ok(new_ctx, ctx_size)
- || __get_user(tmp, (u8 __user *) new_ctx)
- || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))
+ if (!access_ok(new_ctx, ctx_size) ||
+ fault_in_readable((char __user *)new_ctx, ctx_size))
return -EFAULT;
/*
@@ -692,15 +711,29 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
* We kill the task with a SIGSEGV in this situation.
*/
- if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
- do_exit(SIGSEGV);
+ if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) {
+ force_exit_sig(SIGSEGV);
+ return -EFAULT;
+ }
set_current_blocked(&set);
- if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext))
- do_exit(SIGSEGV);
+
+ if (!user_read_access_begin(new_ctx, ctx_size))
+ return -EFAULT;
+ if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
+ user_read_access_end();
+ force_exit_sig(SIGSEGV);
+ return -EFAULT;
+ }
+ user_read_access_end();
/* This returns like rt_sigreturn */
set_thread_flag(TIF_RESTOREALL);
+
return 0;
+
+efault_out:
+ user_write_access_end();
+ return -EFAULT;
}
@@ -713,9 +746,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
struct pt_regs *regs = current_pt_regs();
struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
sigset_t set;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
unsigned long msr;
-#endif
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
@@ -723,52 +754,54 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (!access_ok(uc, sizeof(*uc)))
goto badframe;
- if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
+ if (__get_user_sigset(&set, &uc->uc_sigmask))
goto badframe;
set_current_blocked(&set);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /*
- * If there is a transactional state then throw it away.
- * The purpose of a sigreturn is to destroy all traces of the
- * signal frame, this includes any transactional state created
- * within in. We only check for suspended as we can never be
- * active in the kernel, we are active, there is nothing better to
- * do than go ahead and Bad Thing later.
- * The cause is not important as there will never be a
- * recheckpoint so it's not user visible.
- */
- if (MSR_TM_SUSPENDED(mfmsr()))
- tm_reclaim_current(0);
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM)) {
+ /*
+ * If there is a transactional state then throw it away.
+ * The purpose of a sigreturn is to destroy all traces of the
+ * signal frame, this includes any transactional state created
+ * within in. We only check for suspended as we can never be
+ * active in the kernel, we are active, there is nothing better to
+ * do than go ahead and Bad Thing later.
+ * The cause is not important as there will never be a
+ * recheckpoint so it's not user visible.
+ */
+ if (MSR_TM_SUSPENDED(mfmsr()))
+ tm_reclaim_current(0);
- /*
- * Disable MSR[TS] bit also, so, if there is an exception in the
- * code below (as a page fault in copy_ckvsx_to_user()), it does
- * not recheckpoint this task if there was a context switch inside
- * the exception.
- *
- * A major page fault can indirectly call schedule(). A reschedule
- * process in the middle of an exception can have a side effect
- * (Changing the CPU MSR[TS] state), since schedule() is called
- * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
- * (switch_to() calls tm_recheckpoint() for the 'new' process). In
- * this case, the process continues to be the same in the CPU, but
- * the CPU state just changed.
- *
- * This can cause a TM Bad Thing, since the MSR in the stack will
- * have the MSR[TS]=0, and this is what will be used to RFID.
- *
- * Clearing MSR[TS] state here will avoid a recheckpoint if there
- * is any process reschedule in kernel space. The MSR[TS] state
- * does not need to be saved also, since it will be replaced with
- * the MSR[TS] that came from user context later, at
- * restore_tm_sigcontexts.
- */
- regs->msr &= ~MSR_TS_MASK;
+ /*
+ * Disable MSR[TS] bit also, so, if there is an exception in the
+ * code below (as a page fault in copy_ckvsx_to_user()), it does
+ * not recheckpoint this task if there was a context switch inside
+ * the exception.
+ *
+ * A major page fault can indirectly call schedule(). A reschedule
+ * process in the middle of an exception can have a side effect
+ * (Changing the CPU MSR[TS] state), since schedule() is called
+ * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
+ * (switch_to() calls tm_recheckpoint() for the 'new' process). In
+ * this case, the process continues to be the same in the CPU, but
+ * the CPU state just changed.
+ *
+ * This can cause a TM Bad Thing, since the MSR in the stack will
+ * have the MSR[TS]=0, and this is what will be used to RFID.
+ *
+ * Clearing MSR[TS] state here will avoid a recheckpoint if there
+ * is any process reschedule in kernel space. The MSR[TS] state
+ * does not need to be saved also, since it will be replaced with
+ * the MSR[TS] that came from user context later, at
+ * restore_tm_sigcontexts.
+ */
+ regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
- if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
- goto badframe;
- if (MSR_TM_ACTIVE(msr)) {
+ if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
+ goto badframe;
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && MSR_TM_ACTIVE(msr)) {
/* We recheckpoint on return. */
struct ucontext __user *uc_transact;
@@ -781,9 +814,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
&uc_transact->uc_mcontext))
goto badframe;
- } else
-#endif
- {
+ } else {
/*
* Fall through, for non-TM restore
*
@@ -796,22 +827,28 @@ SYSCALL_DEFINE0(rt_sigreturn)
* MSR[TS] set, but without CPU in the proper state,
* causing a TM bad thing.
*/
- current->thread.regs->msr &= ~MSR_TS_MASK;
- if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext))
+ regs_set_return_msr(current->thread.regs,
+ current->thread.regs->msr & ~MSR_TS_MASK);
+ if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext)))
goto badframe;
+
+ unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext,
+ badframe_block);
+
+ user_read_access_end();
}
if (restore_altstack(&uc->uc_stack))
goto badframe;
set_thread_flag(TIF_RESTOREALL);
+
return 0;
+badframe_block:
+ user_read_access_end();
badframe:
- if (show_unhandled_signals)
- printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
- current->comm, current->pid, "rt_sigreturn",
- (long)uc, regs->nip, regs->link);
+ signal_fault(current, regs, "rt_sigreturn", uc);
force_sig(SIGSEGV);
return 0;
@@ -824,55 +861,73 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
unsigned long newsp = 0;
long err = 0;
struct pt_regs *regs = tsk->thread.regs;
+ /* Save the thread's msr before get_tm_stackpointer() changes it */
+ unsigned long msr = regs->msr;
- BUG_ON(tsk != current);
+ frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
- frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 0);
- if (unlikely(frame == NULL))
- goto badframe;
+ /*
+ * This only applies when calling unsafe_setup_sigcontext() and must be
+ * called before opening the uaccess window.
+ */
+ if (!MSR_TM_ACTIVE(msr))
+ prepare_setup_sigcontext(tsk);
- err |= __put_user(&frame->info, &frame->pinfo);
- err |= __put_user(&frame->uc, &frame->puc);
- err |= copy_siginfo_to_user(&frame->info, &ksig->info);
- if (err)
+ if (!user_write_access_begin(frame, sizeof(*frame)))
goto badframe;
+ unsafe_put_user(&frame->info, &frame->pinfo, badframe_block);
+ unsafe_put_user(&frame->uc, &frame->puc, badframe_block);
+
/* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
+ unsafe_put_user(0, &frame->uc.uc_flags, badframe_block);
+ unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block);
+
+ if (MSR_TM_ACTIVE(msr)) {
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(regs->msr)) {
/* The ucontext_t passed to userland points to the second
* ucontext_t (for transactional state) with its uc_link ptr.
*/
- err |= __put_user(&frame->uc_transact, &frame->uc.uc_link);
+ unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
+
+ user_write_access_end();
+
err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
&frame->uc_transact.uc_mcontext,
tsk, ksig->sig, NULL,
- (unsigned long)ksig->ka.sa.sa_handler);
- } else
+ (unsigned long)ksig->ka.sa.sa_handler,
+ msr);
+
+ if (!user_write_access_begin(&frame->uc.uc_sigmask,
+ sizeof(frame->uc.uc_sigmask)))
+ goto badframe;
+
#endif
- {
- err |= __put_user(0, &frame->uc.uc_link);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
+ } else {
+ unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
+ unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
NULL, (unsigned long)ksig->ka.sa.sa_handler,
- 1);
+ 1, badframe_block);
}
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
+
+ unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
+ user_write_access_end();
+
+ /* Save the siginfo outside of the unsafe block. */
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
goto badframe;
/* Make sure signal handler doesn't get spurious FP exceptions */
tsk->thread.fp_state.fpscr = 0;
/* Set up to return from userspace. */
- if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) {
- regs->link = tsk->mm->context.vdso_base + vdso64_rt_sigtramp;
+ if (tsk->mm->context.vdso) {
+ regs_set_return_ip(regs, VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64));
} else {
err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
if (err)
goto badframe;
- regs->link = (unsigned long) &frame->tramp[0];
+ regs_set_return_ip(regs, (unsigned long) &frame->tramp[0]);
}
/* Allocate a dummy caller frame for the signal handler. */
@@ -881,30 +936,29 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
/* Set up "regs" so we "return" to the signal handler. */
if (is_elf2_task()) {
- regs->nip = (unsigned long) ksig->ka.sa.sa_handler;
- regs->gpr[12] = regs->nip;
+ regs->ctr = (unsigned long) ksig->ka.sa.sa_handler;
+ regs->gpr[12] = regs->ctr;
} else {
/* Handler is *really* a pointer to the function descriptor for
* the signal routine. The first entry in the function
* descriptor is the entry address of signal and the second
* entry is the TOC value we need to use.
*/
- func_descr_t __user *funct_desc_ptr =
- (func_descr_t __user *) ksig->ka.sa.sa_handler;
+ struct func_desc __user *ptr =
+ (struct func_desc __user *)ksig->ka.sa.sa_handler;
- err |= get_user(regs->nip, &funct_desc_ptr->entry);
- err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
+ err |= get_user(regs->ctr, &ptr->addr);
+ err |= get_user(regs->gpr[2], &ptr->toc);
}
/* enter the signal handler in native-endian mode */
- regs->msr &= ~MSR_LE;
- regs->msr |= (MSR_KERNEL & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
regs->gpr[1] = newsp;
regs->gpr[3] = ksig->sig;
regs->result = 0;
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
- err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo);
- err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc);
+ regs->gpr[4] = (unsigned long)&frame->info;
+ regs->gpr[5] = (unsigned long)&frame->uc;
regs->gpr[6] = (unsigned long) frame;
} else {
regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext;
@@ -914,11 +968,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
return 0;
+badframe_block:
+ user_write_access_end();
badframe:
- if (show_unhandled_signals)
- printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
- tsk->comm, tsk->pid, "setup_rt_frame",
- (long)frame, regs->nip, regs->link);
+ signal_fault(current, regs, "handle_rt_signal64", frame);
return 1;
}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ea6adbf6a221..693334c20d07 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -33,6 +33,9 @@
#include <linux/processor.h>
#include <linux/random.h>
#include <linux/stackprotector.h>
+#include <linux/pgtable.h>
+#include <linux/clockchips.h>
+#include <linux/kexec.h>
#include <asm/ptrace.h>
#include <linux/atomic.h>
@@ -41,11 +44,10 @@
#include <asm/kvm_ppc.h>
#include <asm/dbell.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/time.h>
#include <asm/machdep.h>
+#include <asm/mmu_context.h>
#include <asm/cputhreads.h>
#include <asm/cputable.h>
#include <asm/mpic.h>
@@ -55,10 +57,12 @@
#endif
#include <asm/vdso.h>
#include <asm/debug.h>
-#include <asm/kexec.h>
-#include <asm/asm-prototypes.h>
#include <asm/cpu_has_feature.h>
#include <asm/ftrace.h>
+#include <asm/kup.h>
+#include <asm/fadump.h>
+
+#include <trace/events/ipi.h>
#ifdef DEBUG
#include <asm/udbg.h>
@@ -73,12 +77,16 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
#endif
struct task_struct *secondary_current;
-bool has_big_cores;
+bool has_big_cores __ro_after_init;
+bool coregroup_enabled __ro_after_init;
+bool thread_group_shares_l2 __ro_after_init;
+bool thread_group_shares_l3 __ro_after_init;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
+static DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
@@ -87,6 +95,7 @@ EXPORT_SYMBOL_GPL(has_big_cores);
#define MAX_THREAD_LIST_SIZE 8
#define THREAD_GROUP_SHARE_L1 1
+#define THREAD_GROUP_SHARE_L2_L3 2
struct thread_groups {
unsigned int property;
unsigned int nr_groups;
@@ -94,11 +103,33 @@ struct thread_groups {
unsigned int thread_list[MAX_THREAD_LIST_SIZE];
};
+/* Maximum number of properties that groups of threads within a core can share */
+#define MAX_THREAD_GROUP_PROPERTIES 2
+
+struct thread_groups_list {
+ unsigned int nr_properties;
+ struct thread_groups property_tgs[MAX_THREAD_GROUP_PROPERTIES];
+};
+
+static struct thread_groups_list tgl[NR_CPUS] __initdata;
/*
- * On big-cores system, cpu_l1_cache_map for each CPU corresponds to
+ * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
* the set its siblings that share the L1-cache.
*/
-DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+
+/*
+ * On some big-cores system, thread_group_l2_cache_map for each CPU
+ * corresponds to the set its siblings within the core that share the
+ * L2-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+
+/*
+ * On P10, thread_group_l3_cache_map for each CPU is equal to the
+ * thread_group_l2_cache_map
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
/* SMP operations for this machine */
struct smp_ops_t *smp_ops;
@@ -252,7 +283,7 @@ void smp_muxed_ipi_set_message(int cpu, int msg)
* Order previous accesses before accesses in the IPI handler.
*/
smp_mb();
- message[msg] = 1;
+ WRITE_ONCE(message[msg], 1);
}
void smp_muxed_ipi_message_pass(int cpu, int msg)
@@ -311,7 +342,7 @@ irqreturn_t smp_ipi_demux_relaxed(void)
if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
nmi_ipi_action(0, NULL);
#endif
- } while (info->messages);
+ } while (READ_ONCE(info->messages));
return IRQ_HANDLED;
}
@@ -327,12 +358,12 @@ static inline void do_message_pass(int cpu, int msg)
#endif
}
-void smp_send_reschedule(int cpu)
+void arch_smp_send_reschedule(int cpu)
{
if (likely(smp_ops))
do_message_pass(cpu, PPC_MSG_RESCHEDULE);
}
-EXPORT_SYMBOL_GPL(smp_send_reschedule);
+EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
void arch_send_call_function_single_ipi(int cpu)
{
@@ -374,32 +405,32 @@ static struct cpumask nmi_ipi_pending_mask;
static bool nmi_ipi_busy = false;
static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
-static void nmi_ipi_lock_start(unsigned long *flags)
+noinstr static void nmi_ipi_lock_start(unsigned long *flags)
{
raw_local_irq_save(*flags);
hard_irq_disable();
- while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
+ while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
raw_local_irq_restore(*flags);
- spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
+ spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
raw_local_irq_save(*flags);
hard_irq_disable();
}
}
-static void nmi_ipi_lock(void)
+noinstr static void nmi_ipi_lock(void)
{
- while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
- spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
+ while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
+ spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
}
-static void nmi_ipi_unlock(void)
+noinstr static void nmi_ipi_unlock(void)
{
smp_mb();
- WARN_ON(atomic_read(&__nmi_ipi_lock) != 1);
- atomic_set(&__nmi_ipi_lock, 0);
+ WARN_ON(raw_atomic_read(&__nmi_ipi_lock) != 1);
+ raw_atomic_set(&__nmi_ipi_lock, 0);
}
-static void nmi_ipi_unlock_end(unsigned long *flags)
+noinstr static void nmi_ipi_unlock_end(unsigned long *flags)
{
nmi_ipi_unlock();
raw_local_irq_restore(*flags);
@@ -408,7 +439,7 @@ static void nmi_ipi_unlock_end(unsigned long *flags)
/*
* Platform NMI handler calls this to ack
*/
-int smp_handle_nmi_ipi(struct pt_regs *regs)
+noinstr int smp_handle_nmi_ipi(struct pt_regs *regs)
{
void (*fn)(struct pt_regs *) = NULL;
unsigned long flags;
@@ -546,7 +577,7 @@ void tick_broadcast(const struct cpumask *mask)
#endif
#ifdef CONFIG_DEBUGGER
-void debugger_ipi_callback(struct pt_regs *regs)
+static void debugger_ipi_callback(struct pt_regs *regs)
{
debugger_ipi(regs);
}
@@ -582,12 +613,42 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
}
#endif
+void crash_smp_send_stop(void)
+{
+ static bool stopped = false;
+
+ /*
+ * In case of fadump, register data for all CPUs is captured by f/w
+ * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
+ * this rtas call to avoid tricky post processing of those CPUs'
+ * backtraces.
+ */
+ if (should_fadump_crash())
+ return;
+
+ if (stopped)
+ return;
+
+ stopped = true;
+
+#ifdef CONFIG_KEXEC_CORE
+ if (kexec_crash_image) {
+ crash_kexec_prepare();
+ return;
+ }
+#endif
+
+ smp_send_stop();
+}
+
#ifdef CONFIG_NMI_IPI
static void nmi_stop_this_cpu(struct pt_regs *regs)
{
/*
* IRQs are already hard disabled by the smp_handle_nmi_ipi.
*/
+ set_cpu_online(smp_processor_id(), false);
+
spin_begin();
while (1)
spin_cpu_relax();
@@ -603,6 +664,15 @@ void smp_send_stop(void)
static void stop_this_cpu(void *dummy)
{
hard_irq_disable();
+
+ /*
+ * Offlining CPUs in stop_this_cpu can result in scheduler warnings,
+ * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants
+ * to know other CPUs are offline before it breaks locks to flush
+ * printk buffers, in case we panic()ed while holding the lock.
+ */
+ set_cpu_online(smp_processor_id(), false);
+
spin_begin();
while (1)
spin_cpu_relax();
@@ -627,12 +697,12 @@ void smp_send_stop(void)
}
#endif /* CONFIG_NMI_IPI */
-struct task_struct *current_set[NR_CPUS];
+static struct task_struct *current_set[NR_CPUS];
static void smp_store_cpu_info(int id)
{
per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
per_cpu(next_tlbcam_idx, id)
= (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
#endif
@@ -659,83 +729,124 @@ static void set_cpus_unrelated(int i, int j,
#endif
/*
+ * Extends set_cpus_related. Instead of setting one CPU at a time in
+ * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask.
+ */
+static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
+ struct cpumask *(*dstmask)(int))
+{
+ struct cpumask *mask;
+ int k;
+
+ mask = srcmask(j);
+ for_each_cpu(k, srcmask(i))
+ cpumask_or(dstmask(k), dstmask(k), mask);
+
+ if (i == j)
+ return;
+
+ mask = srcmask(i);
+ for_each_cpu(k, srcmask(j))
+ cpumask_or(dstmask(k), dstmask(k), mask);
+}
+
+/*
* parse_thread_groups: Parses the "ibm,thread-groups" device tree
* property for the CPU device node @dn and stores
- * the parsed output in the thread_groups
- * structure @tg if the ibm,thread-groups[0]
- * matches @property.
+ * the parsed output in the thread_groups_list
+ * structure @tglp.
*
* @dn: The device node of the CPU device.
- * @tg: Pointer to a thread group structure into which the parsed
+ * @tglp: Pointer to a thread group list structure into which the parsed
* output of "ibm,thread-groups" is stored.
- * @property: The property of the thread-group that the caller is
- * interested in.
*
* ibm,thread-groups[0..N-1] array defines which group of threads in
* the CPU-device node can be grouped together based on the property.
*
- * ibm,thread-groups[0] tells us the property based on which the
+ * This array can represent thread groupings for multiple properties.
+ *
+ * ibm,thread-groups[i + 0] tells us the property based on which the
* threads are being grouped together. If this value is 1, it implies
- * that the threads in the same group share L1, translation cache.
+ * that the threads in the same group share L1, translation cache. If
+ * the value is 2, it implies that the threads in the same group share
+ * the same L2 cache.
*
- * ibm,thread-groups[1] tells us how many such thread groups exist.
+ * ibm,thread-groups[i+1] tells us how many such thread groups exist for the
+ * property ibm,thread-groups[i]
*
- * ibm,thread-groups[2] tells us the number of threads in each such
+ * ibm,thread-groups[i+2] tells us the number of threads in each such
* group.
+ * Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then,
*
- * ibm,thread-groups[3..N-1] is the list of threads identified by
+ * ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by
* "ibm,ppc-interrupt-server#s" arranged as per their membership in
* the grouping.
*
- * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it
- * implies that there are 2 groups of 4 threads each, where each group
- * of threads share L1, translation cache.
+ * Example:
+ * If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15]
+ * This can be decomposed up into two consecutive arrays:
+ * a) [1,2,4,8,10,12,14,9,11,13,15]
+ * b) [2,2,4,8,10,12,14,9,11,13,15]
*
- * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8}
- * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10,
- * 11, 12} structure
+ * where in,
+ *
+ * a) provides information of Property "1" being shared by "2" groups,
+ * each with "4" threads each. The "ibm,ppc-interrupt-server#s" of
+ * the first group is {8,10,12,14} and the
+ * "ibm,ppc-interrupt-server#s" of the second group is
+ * {9,11,13,15}. Property "1" is indicative of the thread in the
+ * group sharing L1 cache, translation cache and Instruction Data
+ * flow.
+ *
+ * b) provides information of Property "2" being shared by "2" groups,
+ * each group with "4" threads. The "ibm,ppc-interrupt-server#s" of
+ * the first group is {8,10,12,14} and the
+ * "ibm,ppc-interrupt-server#s" of the second group is
+ * {9,11,13,15}. Property "2" indicates that the threads in each
+ * group share the L2-cache.
*
* Returns 0 on success, -EINVAL if the property does not exist,
* -ENODATA if property does not have a value, and -EOVERFLOW if the
* property data isn't large enough.
*/
static int parse_thread_groups(struct device_node *dn,
- struct thread_groups *tg,
- unsigned int property)
+ struct thread_groups_list *tglp)
{
- int i;
- u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
- u32 *thread_list;
+ unsigned int property_idx = 0;
+ u32 *thread_group_array;
size_t total_threads;
- int ret;
+ int ret = 0, count;
+ u32 *thread_list;
+ int i = 0;
+ count = of_property_count_u32_elems(dn, "ibm,thread-groups");
+ thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL);
ret = of_property_read_u32_array(dn, "ibm,thread-groups",
- thread_group_array, 3);
+ thread_group_array, count);
if (ret)
- return ret;
-
- tg->property = thread_group_array[0];
- tg->nr_groups = thread_group_array[1];
- tg->threads_per_group = thread_group_array[2];
- if (tg->property != property ||
- tg->nr_groups < 1 ||
- tg->threads_per_group < 1)
- return -ENODATA;
+ goto out_free;
- total_threads = tg->nr_groups * tg->threads_per_group;
+ while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) {
+ int j;
+ struct thread_groups *tg = &tglp->property_tgs[property_idx++];
- ret = of_property_read_u32_array(dn, "ibm,thread-groups",
- thread_group_array,
- 3 + total_threads);
- if (ret)
- return ret;
+ tg->property = thread_group_array[i];
+ tg->nr_groups = thread_group_array[i + 1];
+ tg->threads_per_group = thread_group_array[i + 2];
+ total_threads = tg->nr_groups * tg->threads_per_group;
- thread_list = &thread_group_array[3];
+ thread_list = &thread_group_array[i + 3];
- for (i = 0 ; i < total_threads; i++)
- tg->thread_list[i] = thread_list[i];
+ for (j = 0; j < total_threads; j++)
+ tg->thread_list[j] = thread_list[j];
+ i = i + 3 + total_threads;
+ }
- return 0;
+ tglp->nr_properties = property_idx;
+
+out_free:
+ kfree(thread_group_array);
+ return ret;
}
/*
@@ -746,7 +857,7 @@ static int parse_thread_groups(struct device_node *dn,
* @tg : The thread-group structure of the CPU node which @cpu belongs
* to.
*
- * Returns the index to tg->thread_list that points to the the start
+ * Returns the index to tg->thread_list that points to the start
* of the thread_group that @cpu belongs to.
*
* Returns -1 if cpu doesn't belong to any of the groups pointed to by
@@ -771,59 +882,188 @@ static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
return -1;
}
-static int init_cpu_l1_cache_map(int cpu)
-
+static struct thread_groups *__init get_thread_groups(int cpu,
+ int group_property,
+ int *err)
{
struct device_node *dn = of_get_cpu_node(cpu, NULL);
- struct thread_groups tg = {.property = 0,
- .nr_groups = 0,
- .threads_per_group = 0};
- int first_thread = cpu_first_thread_sibling(cpu);
- int i, cpu_group_start = -1, err = 0;
+ struct thread_groups_list *cpu_tgl = &tgl[cpu];
+ struct thread_groups *tg = NULL;
+ int i;
+ *err = 0;
- if (!dn)
- return -ENODATA;
+ if (!dn) {
+ *err = -ENODATA;
+ return NULL;
+ }
- err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
- if (err)
- goto out;
+ if (!cpu_tgl->nr_properties) {
+ *err = parse_thread_groups(dn, cpu_tgl);
+ if (*err)
+ goto out;
+ }
- zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
- GFP_KERNEL,
- cpu_to_node(cpu));
+ for (i = 0; i < cpu_tgl->nr_properties; i++) {
+ if (cpu_tgl->property_tgs[i].property == group_property) {
+ tg = &cpu_tgl->property_tgs[i];
+ break;
+ }
+ }
- cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
+ if (!tg)
+ *err = -EINVAL;
+out:
+ of_node_put(dn);
+ return tg;
+}
- if (unlikely(cpu_group_start == -1)) {
- WARN_ON_ONCE(1);
- err = -ENODATA;
- goto out;
- }
+static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg,
+ int cpu, int cpu_group_start)
+{
+ int first_thread = cpu_first_thread_sibling(cpu);
+ int i;
+
+ zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
for (i = first_thread; i < first_thread + threads_per_core; i++) {
- int i_group_start = get_cpu_thread_group_start(i, &tg);
+ int i_group_start = get_cpu_thread_group_start(i, tg);
if (unlikely(i_group_start == -1)) {
WARN_ON_ONCE(1);
- err = -ENODATA;
- goto out;
+ return -ENODATA;
}
if (i_group_start == cpu_group_start)
- cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
+ cpumask_set_cpu(i, *mask);
}
-out:
- of_node_put(dn);
- return err;
+ return 0;
+}
+
+static int __init init_thread_group_cache_map(int cpu, int cache_property)
+
+{
+ int cpu_group_start = -1, err = 0;
+ struct thread_groups *tg = NULL;
+ cpumask_var_t *mask = NULL;
+
+ if (cache_property != THREAD_GROUP_SHARE_L1 &&
+ cache_property != THREAD_GROUP_SHARE_L2_L3)
+ return -EINVAL;
+
+ tg = get_thread_groups(cpu, cache_property, &err);
+
+ if (!tg)
+ return err;
+
+ cpu_group_start = get_cpu_thread_group_start(cpu, tg);
+
+ if (unlikely(cpu_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ return -ENODATA;
+ }
+
+ if (cache_property == THREAD_GROUP_SHARE_L1) {
+ mask = &per_cpu(thread_group_l1_cache_map, cpu);
+ update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+ }
+ else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
+ mask = &per_cpu(thread_group_l2_cache_map, cpu);
+ update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+ mask = &per_cpu(thread_group_l3_cache_map, cpu);
+ update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+ }
+
+
+ return 0;
+}
+
+static bool shared_caches __ro_after_init;
+
+#ifdef CONFIG_SCHED_SMT
+/* cpumask of CPUs with asymmetric SMT dependency */
+static int powerpc_smt_flags(void)
+{
+ int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+ printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+ flags |= SD_ASYM_PACKING;
+ }
+ return flags;
+}
+#endif
+
+/*
+ * On shared processor LPARs scheduled on a big core (which has two or more
+ * independent thread groups per core), prefer lower numbered CPUs, so
+ * that workload consolidates to lesser number of cores.
+ */
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack);
+
+/*
+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
+ * This topology makes it *much* cheaper to migrate tasks between adjacent cores
+ * since the migrated task remains cache hot. We want to take advantage of this
+ * at the scheduler level so an extra topology level is required.
+ */
+static int powerpc_shared_cache_flags(void)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
+
+ return SD_SHARE_PKG_RESOURCES;
+}
+
+static int powerpc_shared_proc_flags(void)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return SD_ASYM_PACKING;
+
+ return 0;
+}
+
+/*
+ * We can't just pass cpu_l2_cache_mask() directly because
+ * returns a non-const pointer and the compiler barfs on that.
+ */
+static const struct cpumask *shared_cache_mask(int cpu)
+{
+ return per_cpu(cpu_l2_cache_map, cpu);
+}
+
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *smallcore_smt_mask(int cpu)
+{
+ return cpu_smallcore_mask(cpu);
+}
+#endif
+
+static struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return per_cpu(cpu_coregroup_map, cpu);
+}
+
+static bool has_coregroup_support(void)
+{
+ /* Coregroup identification not available on shared systems */
+ if (is_shared_processor())
+ return 0;
+
+ return coregroup_enabled;
+}
+
+static const struct cpumask *cpu_mc_mask(int cpu)
+{
+ return cpu_coregroup_mask(cpu);
}
-static int init_big_cores(void)
+static int __init init_big_cores(void)
{
int cpu;
for_each_possible_cpu(cpu) {
- int err = init_cpu_l1_cache_map(cpu);
+ int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L1);
if (err)
return err;
@@ -834,17 +1074,29 @@ static int init_big_cores(void)
}
has_big_cores = true;
+
+ for_each_possible_cpu(cpu) {
+ int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
+
+ if (err)
+ return err;
+ }
+
+ thread_group_shares_l2 = true;
+ thread_group_shares_l3 = true;
+ pr_debug("L2/L3 cache only shared by the threads in the small core\n");
+
return 0;
}
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- unsigned int cpu;
+ unsigned int cpu, num_threads;
DBG("smp_prepare_cpus\n");
/*
- * setup_cpu may need to be called on the boot cpu. We havent
+ * setup_cpu may need to be called on the boot cpu. We haven't
* spun any cpus up but lets be paranoid.
*/
BUG_ON(boot_cpuid != smp_processor_id());
@@ -860,6 +1112,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
GFP_KERNEL, cpu_to_node(cpu));
zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
+ if (has_coregroup_support())
+ zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+
+#ifdef CONFIG_NUMA
/*
* numa_node_id() works after this.
*/
@@ -868,6 +1125,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
set_cpu_numa_mem(cpu,
local_memory_node(numa_cpu_lookup_table[cpu]));
}
+#endif
}
/* Init the cpumasks so the boot CPU is related to itself */
@@ -875,14 +1133,37 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
+ if (has_coregroup_support())
+ cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
+
init_big_cores();
if (has_big_cores) {
cpumask_set_cpu(boot_cpuid,
cpu_smallcore_mask(boot_cpuid));
}
+ if (cpu_to_chip_id(boot_cpuid) != -1) {
+ int idx = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+
+ /*
+ * All threads of a core will all belong to the same core,
+ * chip_id_lookup_table will have one entry per core.
+ * Assumption: if boot_cpuid doesn't have a chip-id, then no
+ * other CPUs, will also not have chip-id.
+ */
+ chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL);
+ if (chip_id_lookup_table)
+ memset(chip_id_lookup_table, -1, sizeof(int) * idx);
+ }
+
if (smp_ops && smp_ops->probe)
smp_ops->probe();
+
+ // Initalise the generic SMT topology support
+ num_threads = 1;
+ if (smt_enabled_at_boot)
+ num_threads = smt_enabled_at_boot;
+ cpu_smt_set_num_threads(num_threads, threads_per_core);
}
void smp_prepare_boot_cpu(void)
@@ -980,15 +1261,20 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_PPC64
paca_ptrs[cpu]->__current = idle;
paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
- THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ THREAD_SIZE - STACK_FRAME_MIN_SIZE;
#endif
- idle->cpu = cpu;
+ task_thread_info(idle)->cpu = cpu;
secondary_current = current_set[cpu] = idle;
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
- int rc, c;
+ const unsigned long boot_spin_ms = 5 * MSEC_PER_SEC;
+ const bool booting = system_state < SYSTEM_RUNNING;
+ const unsigned long hp_spin_ms = 1;
+ unsigned long deadline;
+ int rc;
+ const unsigned long spin_wait_ms = booting ? boot_spin_ms : hp_spin_ms;
/*
* Don't allow secondary threads to come online if inhibited
@@ -1033,22 +1319,23 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
}
/*
- * wait to see if the cpu made a callin (is actually up).
- * use this value that I found through experimentation.
- * -- Cort
+ * At boot time, simply spin on the callin word until the
+ * deadline passes.
+ *
+ * At run time, spin for an optimistic amount of time to avoid
+ * sleeping in the common case.
*/
- if (system_state < SYSTEM_RUNNING)
- for (c = 50000; c && !cpu_callin_map[cpu]; c--)
- udelay(100);
-#ifdef CONFIG_HOTPLUG_CPU
- else
- /*
- * CPUs can take much longer to come up in the
- * hotplug case. Wait five seconds.
- */
- for (c = 5000; c && !cpu_callin_map[cpu]; c--)
- msleep(1);
-#endif
+ deadline = jiffies + msecs_to_jiffies(spin_wait_ms);
+ spin_until_cond(cpu_callin_map[cpu] || time_is_before_jiffies(deadline));
+
+ if (!cpu_callin_map[cpu] && system_state >= SYSTEM_RUNNING) {
+ const unsigned long sleep_interval_us = 10 * USEC_PER_MSEC;
+ const unsigned long sleep_wait_ms = 100 * MSEC_PER_SEC;
+
+ deadline = jiffies + msecs_to_jiffies(sleep_wait_ms);
+ while (!cpu_callin_map[cpu] && time_is_after_jiffies(deadline))
+ fsleep(sleep_interval_us);
+ }
if (!cpu_callin_map[cpu]) {
printk(KERN_ERR "Processor %u is stuck.\n", cpu);
@@ -1072,18 +1359,13 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
int cpu_to_core_id(int cpu)
{
struct device_node *np;
- const __be32 *reg;
int id = -1;
np = of_get_cpu_node(cpu, NULL);
if (!np)
goto out;
- reg = of_get_property(np, "reg", NULL);
- if (!reg)
- goto out;
-
- id = be32_to_cpup(reg);
+ id = of_get_cpu_hwid(np, 0);
out:
of_node_put(np);
return id;
@@ -1125,26 +1407,68 @@ static struct device_node *cpu_to_l2cache(int cpu)
return cache;
}
-static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
+static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
{
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
struct device_node *l2_cache, *np;
int i;
+ if (has_big_cores)
+ submask_fn = cpu_smallcore_mask;
+
+ /*
+ * If the threads in a thread-group share L2 cache, then the
+ * L2-mask can be obtained from thread_group_l2_cache_map.
+ */
+ if (thread_group_shares_l2) {
+ cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu));
+
+ for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) {
+ if (cpu_online(i))
+ set_cpus_related(i, cpu, cpu_l2_cache_mask);
+ }
+
+ /* Verify that L1-cache siblings are a subset of L2 cache-siblings */
+ if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) &&
+ !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) {
+ pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n",
+ cpu);
+ }
+
+ return true;
+ }
+
l2_cache = cpu_to_l2cache(cpu);
- if (!l2_cache)
+ if (!l2_cache || !*mask) {
+ /* Assume only core siblings share cache with this CPU */
+ for_each_cpu(i, cpu_sibling_mask(cpu))
+ set_cpus_related(cpu, i, cpu_l2_cache_mask);
+
return false;
+ }
+
+ cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
- for_each_cpu(i, cpu_online_mask) {
+ /* Update l2-cache mask with all the CPUs that are part of submask */
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
+
+ /* Skip all CPUs already part of current CPU l2-cache mask */
+ cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(cpu));
+
+ for_each_cpu(i, *mask) {
/*
* when updating the marks the current CPU has not been marked
* online, but we need to update the cache masks
*/
np = cpu_to_l2cache(i);
- if (!np)
- continue;
- if (np == l2_cache)
- set_cpus_related(cpu, i, mask_fn);
+ /* Skip all CPUs already part of current CPU l2-cache */
+ if (np == l2_cache) {
+ or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask);
+ cpumask_andnot(*mask, *mask, submask_fn(i));
+ } else {
+ cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(i));
+ }
of_node_put(np);
}
@@ -1156,89 +1480,159 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
#ifdef CONFIG_HOTPLUG_CPU
static void remove_cpu_from_masks(int cpu)
{
+ struct cpumask *(*mask_fn)(int) = cpu_sibling_mask;
int i;
- /* NB: cpu_core_mask is a superset of the others */
- for_each_cpu(i, cpu_core_mask(cpu)) {
- set_cpus_unrelated(cpu, i, cpu_core_mask);
+ unmap_cpu_from_node(cpu);
+
+ if (shared_caches)
+ mask_fn = cpu_l2_cache_mask;
+
+ for_each_cpu(i, mask_fn(cpu)) {
set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
set_cpus_unrelated(cpu, i, cpu_sibling_mask);
if (has_big_cores)
set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
}
+
+ for_each_cpu(i, cpu_core_mask(cpu))
+ set_cpus_unrelated(cpu, i, cpu_core_mask);
+
+ if (has_coregroup_support()) {
+ for_each_cpu(i, cpu_coregroup_mask(cpu))
+ set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
+ }
}
#endif
static inline void add_cpu_to_smallcore_masks(int cpu)
{
- struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu);
- int i, first_thread = cpu_first_thread_sibling(cpu);
+ int i;
if (!has_big_cores)
return;
cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
- for (i = first_thread; i < first_thread + threads_per_core; i++) {
- if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map))
+ for_each_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)) {
+ if (cpu_online(i))
set_cpus_related(i, cpu, cpu_smallcore_mask);
}
}
+static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
+{
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
+ int coregroup_id = cpu_to_coregroup_id(cpu);
+ int i;
+
+ if (shared_caches)
+ submask_fn = cpu_l2_cache_mask;
+
+ if (!*mask) {
+ /* Assume only siblings are part of this CPU's coregroup */
+ for_each_cpu(i, submask_fn(cpu))
+ set_cpus_related(cpu, i, cpu_coregroup_mask);
+
+ return;
+ }
+
+ cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
+
+ /* Update coregroup mask with all the CPUs that are part of submask */
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
+
+ /* Skip all CPUs already part of coregroup mask */
+ cpumask_andnot(*mask, *mask, cpu_coregroup_mask(cpu));
+
+ for_each_cpu(i, *mask) {
+ /* Skip all CPUs not part of this coregroup */
+ if (coregroup_id == cpu_to_coregroup_id(i)) {
+ or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask);
+ cpumask_andnot(*mask, *mask, submask_fn(i));
+ } else {
+ cpumask_andnot(*mask, *mask, cpu_coregroup_mask(i));
+ }
+ }
+}
+
static void add_cpu_to_masks(int cpu)
{
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
int first_thread = cpu_first_thread_sibling(cpu);
- int chipid = cpu_to_chip_id(cpu);
+ cpumask_var_t mask;
+ int chip_id = -1;
+ bool ret;
int i;
/*
* This CPU will not be in the online mask yet so we need to manually
* add it to it's own thread sibling mask.
*/
+ map_cpu_to_node(cpu, cpu_to_node(cpu));
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_core_mask(cpu));
for (i = first_thread; i < first_thread + threads_per_core; i++)
if (cpu_online(i))
set_cpus_related(i, cpu, cpu_sibling_mask);
add_cpu_to_smallcore_masks(cpu);
- /*
- * Copy the thread sibling mask into the cache sibling mask
- * and mark any CPUs that share an L2 with this CPU.
- */
- for_each_cpu(i, cpu_sibling_mask(cpu))
- set_cpus_related(cpu, i, cpu_l2_cache_mask);
- update_mask_by_l2(cpu, cpu_l2_cache_mask);
- /*
- * Copy the cache sibling mask into core sibling mask and mark
- * any CPUs on the same chip as this CPU.
- */
- for_each_cpu(i, cpu_l2_cache_mask(cpu))
- set_cpus_related(cpu, i, cpu_core_mask);
+ /* In CPU-hotplug path, hence use GFP_ATOMIC */
+ ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
+ update_mask_by_l2(cpu, &mask);
- if (chipid == -1)
- return;
+ if (has_coregroup_support())
+ update_coregroup_mask(cpu, &mask);
- for_each_cpu(i, cpu_online_mask)
- if (cpu_to_chip_id(i) == chipid)
- set_cpus_related(cpu, i, cpu_core_mask);
-}
+ if (chip_id_lookup_table && ret)
+ chip_id = cpu_to_chip_id(cpu);
+
+ if (shared_caches)
+ submask_fn = cpu_l2_cache_mask;
+
+ /* Update core_mask with all the CPUs that are part of submask */
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask);
-static bool shared_caches;
+ /* Skip all CPUs already part of current CPU core mask */
+ cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
+
+ /* If chip_id is -1; limit the cpu_core_mask to within PKG */
+ if (chip_id == -1)
+ cpumask_and(mask, mask, cpu_cpu_mask(cpu));
+
+ for_each_cpu(i, mask) {
+ if (chip_id == cpu_to_chip_id(i)) {
+ or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask);
+ cpumask_andnot(mask, mask, submask_fn(i));
+ } else {
+ cpumask_andnot(mask, mask, cpu_core_mask(i));
+ }
+ }
+
+ free_cpumask_var(mask);
+}
/* Activate a secondary processor. */
+__no_stack_protector
void start_secondary(void *unused)
{
- unsigned int cpu = smp_processor_id();
- struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
+ unsigned int cpu = raw_smp_processor_id();
+
+ /* PPC64 calls setup_kup() in early_setup_secondary() */
+ if (IS_ENABLED(CONFIG_PPC32))
+ setup_kup();
- mmgrab(&init_mm);
+ mmgrab_lazy_tlb(&init_mm);
current->active_mm = &init_mm;
+ VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+ cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+ inc_mm_active_cpus(&init_mm);
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
- preempt_disable();
+ rcutree_report_cpu_starting(cpu);
cpu_callin_map[cpu] = 1;
if (smp_ops->setup_cpu)
@@ -1254,20 +1648,26 @@ void start_secondary(void *unused)
vdso_getcpu_init();
#endif
+ set_numa_node(numa_cpu_lookup_table[cpu]);
+ set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+
/* Update topology CPU masks */
add_cpu_to_masks(cpu);
- if (has_big_cores)
- sibling_mask = cpu_smallcore_mask;
/*
* Check for any shared caches. Note that this must be done on a
* per-core basis because one core in the pair might be disabled.
*/
- if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
- shared_caches = true;
+ if (!shared_caches) {
+ struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
+ struct cpumask *mask = cpu_l2_cache_mask(cpu);
- set_numa_node(numa_cpu_lookup_table[cpu]);
- set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+ if (has_big_cores)
+ sibling_mask = cpu_smallcore_mask;
+
+ if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
+ shared_caches = true;
+ }
smp_wmb();
notify_cpu_starting(cpu);
@@ -1285,68 +1685,46 @@ void start_secondary(void *unused)
BUG();
}
-int setup_profiling_timer(unsigned int multiplier)
-{
- return 0;
-}
+static struct sched_domain_topology_level powerpc_topology[6];
-#ifdef CONFIG_SCHED_SMT
-/* cpumask of CPUs with asymetric SMT dependancy */
-static int powerpc_smt_flags(void)
+static void __init build_sched_topology(void)
{
- int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+ int i = 0;
- if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
- printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
- flags |= SD_ASYM_PACKING;
- }
- return flags;
-}
-#endif
+ if (is_shared_processor() && has_big_cores)
+ static_branch_enable(&splpar_asym_pack);
-static struct sched_domain_topology_level powerpc_topology[] = {
#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+ if (has_big_cores) {
+ pr_info("Big cores detected but using small core scheduling\n");
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ smallcore_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
+ };
+ } else {
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
+ };
+ }
#endif
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
-
-/*
- * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
- * This topology makes it *much* cheaper to migrate tasks between adjacent cores
- * since the migrated task remains cache hot. We want to take advantage of this
- * at the scheduler level so an extra topology level is required.
- */
-static int powerpc_shared_cache_flags(void)
-{
- return SD_SHARE_PKG_RESOURCES;
-}
+ if (shared_caches) {
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE)
+ };
+ }
+ if (has_coregroup_support()) {
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC)
+ };
+ }
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(PKG)
+ };
-/*
- * We can't just pass cpu_l2_cache_mask() directly because
- * returns a non-const pointer and the compiler barfs on that.
- */
-static const struct cpumask *shared_cache_mask(int cpu)
-{
- return cpu_l2_cache_mask(cpu);
-}
+ /* There must be one trailing NULL entry left. */
+ BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1);
-#ifdef CONFIG_SCHED_SMT
-static const struct cpumask *smallcore_smt_mask(int cpu)
-{
- return cpu_smallcore_mask(cpu);
+ set_sched_topology(powerpc_topology);
}
-#endif
-
-static struct sched_domain_topology_level power9_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
-#endif
- { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
void __init smp_cpus_done(unsigned int max_cpus)
{
@@ -1359,31 +1737,21 @@ void __init smp_cpus_done(unsigned int max_cpus)
if (smp_ops && smp_ops->bringup_done)
smp_ops->bringup_done();
- /*
- * On a shared LPAR, associativity needs to be requested.
- * Hence, get numa topology before dumping cpu topology
- */
- shared_proc_topology_init();
dump_numa_cpu_topology();
+ build_sched_topology();
+}
-#ifdef CONFIG_SCHED_SMT
- if (has_big_cores) {
- pr_info("Using small cores at SMT level\n");
- power9_topology[0].mask = smallcore_smt_mask;
- powerpc_topology[0].mask = smallcore_smt_mask;
- }
-#endif
- /*
- * If any CPU detects that it's sharing a cache with another CPU then
- * use the deeper topology that is aware of this sharing.
- */
- if (shared_caches) {
- pr_info("Using shared cache scheduler topology\n");
- set_sched_topology(power9_topology);
- } else {
- pr_info("Using standard scheduler topology\n");
- set_sched_topology(powerpc_topology);
- }
+/*
+ * For asym packing, by default lower numbered CPU has higher priority.
+ * On shared processors, pack to lower numbered core. However avoid moving
+ * between thread_groups within the same core.
+ */
+int arch_asym_cpu_priority(int cpu)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return -cpu / threads_per_core;
+
+ return -cpu;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -1409,11 +1777,19 @@ int __cpu_disable(void)
void __cpu_die(unsigned int cpu)
{
+ /*
+ * This could perhaps be a generic call in idlea_task_dead(), but
+ * that requires testing from all archs, so first put it here to
+ */
+ VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(&init_mm)));
+ dec_mm_active_cpus(&init_mm);
+ cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+
if (smp_ops->cpu_die)
smp_ops->cpu_die(cpu);
}
-void cpu_die(void)
+void __noreturn arch_cpu_idle_dead(void)
{
/*
* Disable on the down path. This will be re-enabled by
@@ -1421,8 +1797,8 @@ void cpu_die(void)
*/
this_cpu_disable_ftrace();
- if (ppc_md.cpu_die)
- ppc_md.cpu_die();
+ if (smp_ops->cpu_offline_self)
+ smp_ops->cpu_offline_self();
/* If we return, we re-enter start_secondary */
start_secondary_resume();
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index e2a46cfed5fd..e6a958a5da27 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -8,6 +8,7 @@
* Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp.
*/
+#include <linux/delay.h>
#include <linux/export.h>
#include <linux/kallsyms.h>
#include <linux/module.h>
@@ -23,117 +24,66 @@
#include <asm/paca.h>
-/*
- * Save stack-backtrace addresses into a stack_trace buffer.
- */
-static void save_context_stack(struct stack_trace *trace, unsigned long sp,
- struct task_struct *tsk, int savesched)
+void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+ struct task_struct *task, struct pt_regs *regs)
{
+ unsigned long sp;
+
+ if (regs && !consume_entry(cookie, regs->nip))
+ return;
+
+ if (regs)
+ sp = regs->gpr[1];
+ else if (task == current)
+ sp = current_stack_frame();
+ else
+ sp = task->thread.ksp;
+
for (;;) {
unsigned long *stack = (unsigned long *) sp;
unsigned long newsp, ip;
- if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, task))
return;
newsp = stack[0];
ip = stack[STACK_FRAME_LR_SAVE];
- if (savesched || !in_sched_functions(ip)) {
- if (!trace->skip)
- trace->entries[trace->nr_entries++] = ip;
- else
- trace->skip--;
- }
-
- if (trace->nr_entries >= trace->max_entries)
+ if (!consume_entry(cookie, ip))
return;
sp = newsp;
}
}
-void save_stack_trace(struct stack_trace *trace)
-{
- unsigned long sp;
-
- sp = current_stack_pointer();
-
- save_context_stack(trace, sp, current, 1);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-{
- unsigned long sp;
-
- if (!try_get_task_stack(tsk))
- return;
-
- if (tsk == current)
- sp = current_stack_pointer();
- else
- sp = tsk->thread.ksp;
-
- save_context_stack(trace, sp, tsk, 0);
-
- put_task_stack(tsk);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-
-void
-save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
-{
- save_context_stack(trace, regs->gpr[1], current, 0);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_regs);
-
-#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
/*
* This function returns an error if it detects any unreliable features of the
* stack. Otherwise it guarantees that the stack trace is reliable.
*
* If the task is not 'current', the caller *must* ensure the task is inactive.
*/
-static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
- struct stack_trace *trace)
+int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task)
{
unsigned long sp;
unsigned long newsp;
- unsigned long stack_page = (unsigned long)task_stack_page(tsk);
+ unsigned long stack_page = (unsigned long)task_stack_page(task);
unsigned long stack_end;
int graph_idx = 0;
bool firstframe;
stack_end = stack_page + THREAD_SIZE;
- if (!is_idle_task(tsk)) {
- /*
- * For user tasks, this is the SP value loaded on
- * kernel entry, see "PACAKSAVE(r13)" in _switch() and
- * system_call_common()/EXCEPTION_PROLOG_COMMON().
- *
- * Likewise for non-swapper kernel threads,
- * this also happens to be the top of the stack
- * as setup by copy_thread().
- *
- * Note that stack backlinks are not properly setup by
- * copy_thread() and thus, a forked task() will have
- * an unreliable stack trace until it's been
- * _switch()'ed to for the first time.
- */
- stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- } else {
- /*
- * idle tasks have a custom stack layout,
- * c.f. cpu_idle_thread_init().
- */
- stack_end -= STACK_FRAME_OVERHEAD;
- }
- if (tsk == current)
- sp = current_stack_pointer();
+ // See copy_thread() for details.
+ if (task->flags & PF_KTHREAD)
+ stack_end -= STACK_FRAME_MIN_SIZE;
else
- sp = tsk->thread.ksp;
+ stack_end -= STACK_USER_INT_FRAME_SIZE;
+
+ if (task == current)
+ sp = current_stack_frame();
+ else
+ sp = task->thread.ksp;
if (sp < stack_page + sizeof(struct thread_struct) ||
sp > stack_end - STACK_FRAME_MIN_SIZE) {
@@ -169,7 +119,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
/* Mark stacktraces with exception frames as unreliable. */
if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
- stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
return -EINVAL;
}
@@ -182,46 +132,22 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
* FIXME: IMHO these tests do not belong in
* arch-dependent code, they are generic.
*/
- ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
+ ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack);
#ifdef CONFIG_KPROBES
/*
* Mark stacktraces with kretprobed functions on them
* as unreliable.
*/
- if (ip == (unsigned long)kretprobe_trampoline)
+ if (ip == (unsigned long)__kretprobe_trampoline)
return -EINVAL;
#endif
- if (trace->nr_entries >= trace->max_entries)
- return -E2BIG;
- if (!trace->skip)
- trace->entries[trace->nr_entries++] = ip;
- else
- trace->skip--;
+ if (!consume_entry(cookie, ip))
+ return -EINVAL;
}
return 0;
}
-int save_stack_trace_tsk_reliable(struct task_struct *tsk,
- struct stack_trace *trace)
-{
- int ret;
-
- /*
- * If the task doesn't have a stack (e.g., a zombie), the stack is
- * "reliably" empty.
- */
- if (!try_get_task_stack(tsk))
- return 0;
-
- ret = __save_stack_trace_tsk_reliable(tsk, trace);
-
- put_task_stack(tsk);
-
- return ret;
-}
-#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
-
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
static void handle_backtrace_ipi(struct pt_regs *regs)
{
@@ -230,17 +156,31 @@ static void handle_backtrace_ipi(struct pt_regs *regs)
static void raise_backtrace_ipi(cpumask_t *mask)
{
+ struct paca_struct *p;
unsigned int cpu;
+ u64 delay_us;
for_each_cpu(cpu, mask) {
- if (cpu == smp_processor_id())
+ if (cpu == smp_processor_id()) {
handle_backtrace_ipi(NULL);
- else
- smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC);
- }
+ continue;
+ }
- for_each_cpu(cpu, mask) {
- struct paca_struct *p = paca_ptrs[cpu];
+ delay_us = 5 * USEC_PER_SEC;
+
+ if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) {
+ // Now wait up to 5s for the other CPU to do its backtrace
+ while (cpumask_test_cpu(cpu, mask) && delay_us) {
+ udelay(1);
+ delay_us--;
+ }
+
+ // Other CPU cleared itself from the mask
+ if (delay_us)
+ continue;
+ }
+
+ p = paca_ptrs[cpu];
cpumask_clear_cpu(cpu, mask);
@@ -260,12 +200,12 @@ static void raise_backtrace_ipi(cpumask_t *mask)
pr_cont(" current pointer corrupt? (%px)\n", p->__current);
pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1);
- show_stack(p->__current, (unsigned long *)p->saved_r1);
+ show_stack(p->__current, (unsigned long *)p->saved_r1, KERN_WARNING);
}
}
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
{
- nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace_ipi);
}
#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c
new file mode 100644
index 000000000000..863a7aa24650
--- /dev/null
+++ b/arch/powerpc/kernel/static_call.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/memory.h>
+#include <linux/static_call.h>
+
+#include <asm/code-patching.h>
+
+void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
+{
+ int err;
+ bool is_ret0 = (func == __static_call_return0);
+ unsigned long target = (unsigned long)(is_ret0 ? tramp + PPC_SCT_RET0 : func);
+ bool is_short = is_offset_in_branch_range((long)target - (long)tramp);
+
+ if (!tramp)
+ return;
+
+ mutex_lock(&text_mutex);
+
+ if (func && !is_short) {
+ err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target));
+ if (err)
+ goto out;
+ }
+
+ if (!func)
+ err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
+ else if (is_short)
+ err = patch_branch(tramp, target, 0);
+ else
+ err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP()));
+out:
+ mutex_unlock(&text_mutex);
+
+ if (err)
+ panic("%s: patching failed %pS at %pS\n", __func__, func, tramp);
+}
+EXPORT_SYMBOL_GPL(arch_static_call_transform);
diff --git a/arch/powerpc/kernel/switch.S b/arch/powerpc/kernel/switch.S
new file mode 100644
index 000000000000..608c0ce7cec6
--- /dev/null
+++ b/arch/powerpc/kernel/switch.S
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <linux/objtool.h>
+#include <asm/asm-offsets.h>
+#include <asm/code-patching-asm.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/kup.h>
+#include <asm/thread_info.h>
+
+.section ".text","ax",@progbits
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * Cancel all explict user streams as they will have no use after context
+ * switch and will stop the HW from creating streams itself
+ */
+#define STOP_STREAMS \
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
+
+#define FLUSH_COUNT_CACHE \
+1: nop; \
+ patch_site 1b, patch__call_flush_branch_caches1; \
+1: nop; \
+ patch_site 1b, patch__call_flush_branch_caches2; \
+1: nop; \
+ patch_site 1b, patch__call_flush_branch_caches3
+
+.macro nops number
+ .rept \number
+ nop
+ .endr
+.endm
+
+.balign 32
+.global flush_branch_caches
+flush_branch_caches:
+ /* Save LR into r9 */
+ mflr r9
+
+ // Flush the link stack
+ .rept 64
+ ANNOTATE_INTRA_FUNCTION_CALL
+ bl .+4
+ .endr
+ b 1f
+ nops 6
+
+ .balign 32
+ /* Restore LR */
+1: mtlr r9
+
+ // If we're just flushing the link stack, return here
+3: nop
+ patch_site 3b patch__flush_link_stack_return
+
+ li r9,0x7fff
+ mtctr r9
+
+ PPC_BCCTR_FLUSH
+
+2: nop
+ patch_site 2b patch__flush_count_cache_return
+
+ nops 3
+
+ .rept 278
+ .balign 32
+ PPC_BCCTR_FLUSH
+ nops 7
+ .endr
+
+ blr
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+.balign 32
+/*
+ * New stack pointer in r8, old stack pointer in r1, must not clobber r3
+ */
+pin_stack_slb:
+BEGIN_FTR_SECTION
+ clrrdi r6,r8,28 /* get its ESID */
+ clrrdi r9,r1,28 /* get current sp ESID */
+FTR_SECTION_ELSE
+ clrrdi r6,r8,40 /* get its 1T ESID */
+ clrrdi r9,r1,40 /* get current sp 1T ESID */
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_1T_SEGMENT)
+ clrldi. r0,r6,2 /* is new ESID c00000000? */
+ cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */
+ cror eq,4*cr1+eq,eq
+ beq 2f /* if yes, don't slbie it */
+
+ /* Bolt in the new stack SLB entry */
+ ld r7,KSP_VSID(r4) /* Get new stack's VSID */
+ oris r0,r6,(SLB_ESID_V)@h
+ ori r0,r0,(SLB_NUM_BOLTED-1)@l
+BEGIN_FTR_SECTION
+ li r9,MMU_SEGSIZE_1T /* insert B field */
+ oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
+ rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
+
+ /* Update the last bolted SLB. No write barriers are needed
+ * here, provided we only update the current CPU's SLB shadow
+ * buffer.
+ */
+ ld r9,PACA_SLBSHADOWPTR(r13)
+ li r12,0
+ std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
+ li r12,SLBSHADOW_STACKVSID
+ STDX_BE r7,r12,r9 /* Save VSID */
+ li r12,SLBSHADOW_STACKESID
+ STDX_BE r0,r12,r9 /* Save ESID */
+
+ /* No need to check for MMU_FTR_NO_SLBIE_B here, since when
+ * we have 1TB segments, the only CPUs known to have the errata
+ * only support less than 1TB of system memory and we'll never
+ * actually hit this code path.
+ */
+
+ isync
+ slbie r6
+BEGIN_FTR_SECTION
+ slbie r6 /* Workaround POWER5 < DD2.1 issue */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+ slbmte r7,r0
+ isync
+2: blr
+ .size pin_stack_slb,.-pin_stack_slb
+#endif /* CONFIG_PPC_64S_HASH_MMU */
+
+#else
+#define STOP_STREAMS
+#define FLUSH_COUNT_CACHE
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+/*
+ * do_switch_32/64 have the same calling convention as _switch, i.e., r3,r4
+ * are prev and next thread_struct *, and returns prev task_struct * in r3.
+
+ * This switches the stack, current, and does other task switch housekeeping.
+ */
+.macro do_switch_32
+ tophys(r0,r4)
+ mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */
+ lwz r1,KSP(r4) /* Load new stack pointer */
+
+ /* save the old current 'last' for return value */
+ mr r3,r2
+ addi r2,r4,-THREAD /* Update current */
+.endm
+
+.macro do_switch_64
+ ld r8,KSP(r4) /* Load new stack pointer */
+
+ kuap_check_amr r9, r10
+
+ FLUSH_COUNT_CACHE /* Clobbers r9, ctr */
+
+ STOP_STREAMS /* Clobbers r6 */
+
+ addi r3,r3,-THREAD /* old thread -> task_struct for return value */
+ addi r6,r4,-THREAD /* new thread -> task_struct */
+ std r6,PACACURRENT(r13) /* Set new task_struct to 'current' */
+#if defined(CONFIG_STACKPROTECTOR)
+ ld r6, TASK_CANARY(r6)
+ std r6, PACA_CANARY(r13)
+#endif
+ /* Set new PACAKSAVE */
+ clrrdi r7,r8,THREAD_SHIFT /* base of new stack */
+ addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
+ std r7,PACAKSAVE(r13)
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+BEGIN_MMU_FTR_SECTION
+ bl pin_stack_slb
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
+#endif
+ /*
+ * PMU interrupts in radix may come in here. They will use r1, not
+ * PACAKSAVE, so this stack switch will not cause a problem. They
+ * will store to the process stack, which may then be migrated to
+ * another CPU. However the rq lock release on this CPU paired with
+ * the rq lock acquire on the new CPU before the stack becomes
+ * active on the new CPU, will order those stores.
+ */
+ mr r1,r8 /* start using new stack pointer */
+.endm
+
+/*
+ * This routine switches between two different tasks. The process
+ * state of one is saved on its kernel stack. Then the state
+ * of the other is restored from its kernel stack. The memory
+ * management hardware is updated to the second process's state.
+ * Finally, we can return to the second process.
+ * On entry, r3 points to the THREAD for the current task, r4
+ * points to the THREAD for the new task.
+ *
+ * This routine is always called with interrupts disabled.
+ *
+ * Note: there are two ways to get to the "going out" portion
+ * of this code; either by coming in via the entry (_switch)
+ * or via "fork" which must set up an environment equivalent
+ * to the "_switch" path. If you change this , you'll have to
+ * change the fork code also.
+ *
+ * The code which creates the new task context is in 'copy_thread'
+ * in arch/ppc/kernel/process.c
+ *
+ * Note: this uses SWITCH_FRAME_SIZE rather than USER_INT_FRAME_SIZE
+ * because we don't need to leave the redzone ABI gap at the top of
+ * the kernel stack.
+ */
+_GLOBAL(_switch)
+ PPC_CREATE_STACK_FRAME(SWITCH_FRAME_SIZE)
+ PPC_STL r1,KSP(r3) /* Set old stack pointer */
+ SAVE_NVGPRS(r1) /* volatiles are caller-saved -- Cort */
+ PPC_STL r0,_NIP(r1) /* Return to switch caller */
+ mfcr r0
+ stw r0,_CCR(r1)
+
+ /*
+ * On SMP kernels, care must be taken because a task may be
+ * scheduled off CPUx and on to CPUy. Memory ordering must be
+ * considered.
+ *
+ * Cacheable stores on CPUx will be visible when the task is
+ * scheduled on CPUy by virtue of the core scheduler barriers
+ * (see "Notes on Program-Order guarantees on SMP systems." in
+ * kernel/sched/core.c).
+ *
+ * Uncacheable stores in the case of involuntary preemption must
+ * be taken care of. The smp_mb__after_spinlock() in __schedule()
+ * is implemented as hwsync on powerpc, which orders MMIO too. So
+ * long as there is an hwsync in the context switch path, it will
+ * be executed on the source CPU after the task has performed
+ * all MMIO ops on that CPU, and on the destination CPU before the
+ * task performs any MMIO ops there.
+ */
+
+ /*
+ * The kernel context switch path must contain a spin_lock,
+ * which contains larx/stcx, which will clear any reservation
+ * of the task being switched.
+ */
+
+#ifdef CONFIG_PPC32
+ do_switch_32
+#else
+ do_switch_64
+#endif
+
+ lwz r0,_CCR(r1)
+ mtcrf 0xFF,r0
+ REST_NVGPRS(r1) /* volatiles are destroyed -- Cort */
+ PPC_LL r0,_NIP(r1) /* Return to _switch caller in new task */
+ mtlr r0
+ addi r1,r1,SWITCH_FRAME_SIZE
+ blr
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index cbdf86228eaa..ffb79326483c 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -1,5 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/threads.h>
+#include <linux/linkage.h>
+
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/cputable.h>
@@ -181,7 +183,7 @@ _GLOBAL(swsusp_arch_resume)
#ifdef CONFIG_ALTIVEC
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
sync
@@ -395,15 +397,18 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
li r3,0
blr
+_ASM_NOKPROBE_SYMBOL(swsusp_arch_resume)
/* FIXME:This construct is actually not useful since we don't shut
* down the instruction MMU, we could just flip back MSR-DR on.
*/
-turn_on_mmu:
+SYM_FUNC_START_LOCAL(turn_on_mmu)
mflr r4
mtsrr0 r4
mtsrr1 r3
sync
isync
rfi
+_ASM_NOKPROBE_SYMBOL(turn_on_mmu)
+SYM_FUNC_END(turn_on_mmu)
diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c
index aeea97ad85cf..50fa8fc9ef95 100644
--- a/arch/powerpc/kernel/swsusp_64.c
+++ b/arch/powerpc/kernel/swsusp_64.c
@@ -11,14 +11,11 @@
#include <linux/interrupt.h>
#include <linux/nmi.h>
+void do_after_copyback(void);
+
void do_after_copyback(void)
{
iommu_restore();
touch_softlockup_watchdog();
mb();
}
-
-void _iommu_save(void)
-{
- iommu_save();
-}
diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_85xx.S
index 88cfdbd530f1..88cfdbd530f1 100644
--- a/arch/powerpc/kernel/swsusp_booke.S
+++ b/arch/powerpc/kernel/swsusp_85xx.S
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 6d3189830dd3..f645652c2654 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -76,16 +76,10 @@
swsusp_save_area:
.space SL_SIZE
- .section ".toc","aw"
-swsusp_save_area_ptr:
- .tc swsusp_save_area[TC],swsusp_save_area
-restore_pblist_ptr:
- .tc restore_pblist[TC],restore_pblist
-
.section .text
.align 5
_GLOBAL(swsusp_arch_suspend)
- ld r11,swsusp_save_area_ptr@toc(r2)
+ LOAD_REG_ADDR(r11, swsusp_save_area)
SAVE_SPECIAL(LR)
SAVE_REGISTER(r1)
SAVE_SPECIAL(CR)
@@ -128,11 +122,10 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
* stack pointer on the stack like a real stackframe */
addi r1,r1,-128
- bl _iommu_save
bl swsusp_save
/* restore LR */
- ld r11,swsusp_save_area_ptr@toc(r2)
+ LOAD_REG_ADDR(r11, swsusp_save_area)
RESTORE_SPECIAL(LR)
addi r1,r1,128
@@ -142,11 +135,11 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
_GLOBAL(swsusp_arch_resume)
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
- ld r12,restore_pblist_ptr@toc(r2)
+ LOAD_REG_ADDR(r11, restore_pblist)
ld r12,0(r12)
cmpdi r12,0
@@ -188,7 +181,7 @@ nothing_to_copy:
tlbia
#endif
- ld r11,swsusp_save_area_ptr@toc(r2)
+ LOAD_REG_ADDR(r11, swsusp_save_area)
RESTORE_SPECIAL(CR)
@@ -266,7 +259,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
bl do_after_copyback
addi r1,r1,128
- ld r11,swsusp_save_area_ptr@toc(r2)
+ LOAD_REG_ADDR(r11, swsusp_save_area)
RESTORE_SPECIAL(LR)
li r3, 0
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index d36c6391eaf5..d451a8229223 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -1,13 +1,23 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * sys_ppc32.c: Conversion between 32bit and 64bit native syscalls.
+ * sys_ppc32.c: 32-bit system calls with complex calling conventions.
*
* Copyright (C) 2001 IBM
* Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
*
- * These routines maintain argument size conversion between 32bit and 64bit
- * environment.
+ * 32-bit system calls with 64-bit arguments pass those in register pairs.
+ * This must be specially dealt with on 64-bit kernels. The compat_arg_u64_dual
+ * in generic compat syscalls is not always usable because the register
+ * pairing is constrained depending on preceding arguments.
+ *
+ * An analogous problem exists on 32-bit kernels with ARCH_HAS_SYSCALL_WRAPPER,
+ * the defined system call functions take the pt_regs as an argument, and there
+ * is a mapping macro which maps registers to arguments
+ * (SC_POWERPC_REGS_TO_ARGS) which also does not deal with these 64-bit
+ * arguments.
+ *
+ * This file contains these system calls.
*/
#include <linux/kernel.h>
@@ -25,7 +35,6 @@
#include <linux/poll.h>
#include <linux/personality.h>
#include <linux/stat.h>
-#include <linux/mman.h>
#include <linux/in.h>
#include <linux/syscalls.h>
#include <linux/unistd.h>
@@ -48,68 +57,79 @@
#include <asm/syscalls.h>
#include <asm/switch_to.h>
-unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
-{
- /* This should remain 12 even if PAGE_SIZE changes */
- return sys_mmap(addr, len, prot, flags, fd, pgoff << 12);
-}
-
-/*
- * long long munging:
- * The 32 bit ABI passes long longs in an odd even register pair.
- */
-
-compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
- u32 reg6, u32 poshi, u32 poslo)
-{
- return ksys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo);
-}
+#ifdef CONFIG_PPC32
+#define PPC32_SYSCALL_DEFINE4 SYSCALL_DEFINE4
+#define PPC32_SYSCALL_DEFINE5 SYSCALL_DEFINE5
+#define PPC32_SYSCALL_DEFINE6 SYSCALL_DEFINE6
+#else
+#define PPC32_SYSCALL_DEFINE4 COMPAT_SYSCALL_DEFINE4
+#define PPC32_SYSCALL_DEFINE5 COMPAT_SYSCALL_DEFINE5
+#define PPC32_SYSCALL_DEFINE6 COMPAT_SYSCALL_DEFINE6
+#endif
-compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
- u32 reg6, u32 poshi, u32 poslo)
+PPC32_SYSCALL_DEFINE6(ppc_pread64,
+ unsigned int, fd,
+ char __user *, ubuf, compat_size_t, count,
+ u32, reg6, u32, pos1, u32, pos2)
{
- return ksys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo);
+ return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2));
}
-compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count)
+PPC32_SYSCALL_DEFINE6(ppc_pwrite64,
+ unsigned int, fd,
+ const char __user *, ubuf, compat_size_t, count,
+ u32, reg6, u32, pos1, u32, pos2)
{
- return ksys_readahead(fd, ((loff_t)offhi << 32) | offlo, count);
+ return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2));
}
-asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
- unsigned long high, unsigned long low)
+PPC32_SYSCALL_DEFINE5(ppc_readahead,
+ int, fd, u32, r4,
+ u32, offset1, u32, offset2, u32, count)
{
- return ksys_truncate(path, (high << 32) | low);
+ return ksys_readahead(fd, merge_64(offset1, offset2), count);
}
-asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
- u32 lenhi, u32 lenlo)
+PPC32_SYSCALL_DEFINE4(ppc_truncate64,
+ const char __user *, path, u32, reg4,
+ unsigned long, len1, unsigned long, len2)
{
- return ksys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
- ((loff_t)lenhi << 32) | lenlo);
+ return ksys_truncate(path, merge_64(len1, len2));
}
-asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high,
- unsigned long low)
+PPC32_SYSCALL_DEFINE4(ppc_ftruncate64,
+ unsigned int, fd, u32, reg4,
+ unsigned long, len1, unsigned long, len2)
{
- return ksys_ftruncate(fd, (high << 32) | low);
+ return ksys_ftruncate(fd, merge_64(len1, len2));
}
-long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low,
- size_t len, int advice)
+PPC32_SYSCALL_DEFINE6(ppc32_fadvise64,
+ int, fd, u32, unused, u32, offset1, u32, offset2,
+ size_t, len, int, advice)
{
- return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low, len,
+ return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len,
advice);
}
-asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
- unsigned offset_hi, unsigned offset_lo,
- unsigned nbytes_hi, unsigned nbytes_lo)
+PPC32_SYSCALL_DEFINE6(ppc_sync_file_range2,
+ int, fd, unsigned int, flags,
+ unsigned int, offset1, unsigned int, offset2,
+ unsigned int, nbytes1, unsigned int, nbytes2)
{
- loff_t offset = ((loff_t)offset_hi << 32) | offset_lo;
- loff_t nbytes = ((loff_t)nbytes_hi << 32) | nbytes_lo;
+ loff_t offset = merge_64(offset1, offset2);
+ loff_t nbytes = merge_64(nbytes1, nbytes2);
return ksys_sync_file_range(fd, offset, nbytes, flags);
}
+
+#ifdef CONFIG_PPC32
+SYSCALL_DEFINE6(ppc_fallocate,
+ int, fd, int, mode,
+ u32, offset1, u32, offset2, u32, len1, u32, len2)
+{
+ return ksys_fallocate(fd, mode,
+ merge_64(offset1, offset2),
+ merge_64(len1, len2));
+}
+#endif
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
new file mode 100644
index 000000000000..77fedb190c93
--- /dev/null
+++ b/arch/powerpc/kernel/syscall.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/compat.h>
+#include <linux/context_tracking.h>
+#include <linux/randomize_kstack.h>
+
+#include <asm/interrupt.h>
+#include <asm/kup.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+#include <asm/tm.h>
+#include <asm/unistd.h>
+
+
+/* Has to run notrace because it is entered not completely "reconciled" */
+notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
+{
+ long ret;
+ syscall_fn f;
+
+ kuap_lock();
+
+ add_random_kstack_offset();
+
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+
+ trace_hardirqs_off(); /* finish reconciling */
+
+ CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
+ user_exit_irqoff();
+
+ BUG_ON(regs_is_unrecoverable(regs));
+ BUG_ON(!(regs->msr & MSR_PR));
+ BUG_ON(arch_irq_disabled_regs(regs));
+
+#ifdef CONFIG_PPC_PKEY
+ if (mmu_has_feature(MMU_FTR_PKEY)) {
+ unsigned long amr, iamr;
+ bool flush_needed = false;
+ /*
+ * When entering from userspace we mostly have the AMR/IAMR
+ * different from kernel default values. Hence don't compare.
+ */
+ amr = mfspr(SPRN_AMR);
+ iamr = mfspr(SPRN_IAMR);
+ regs->amr = amr;
+ regs->iamr = iamr;
+ if (mmu_has_feature(MMU_FTR_KUAP)) {
+ mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
+ flush_needed = true;
+ }
+ if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
+ mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
+ flush_needed = true;
+ }
+ if (flush_needed)
+ isync();
+ } else
+#endif
+ kuap_assert_locked();
+
+ booke_restore_dbcr0();
+
+ account_cpu_user_entry();
+
+ account_stolen_time();
+
+ /*
+ * This is not required for the syscall exit path, but makes the
+ * stack frame look nicer. If this was initialised in the first stack
+ * frame, or if the unwinder was taught the first stack frame always
+ * returns to user with IRQS_ENABLED, this store could be avoided!
+ */
+ irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
+
+ /*
+ * If system call is called with TM active, set _TIF_RESTOREALL to
+ * prevent RFSCV being used to return to userspace, because POWER9
+ * TM implementation has problems with this instruction returning to
+ * transactional state. Final register values are not relevant because
+ * the transaction will be aborted upon return anyway. Or in the case
+ * of unsupported_scv SIGILL fault, the return state does not much
+ * matter because it's an edge case.
+ */
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
+ set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
+
+ /*
+ * If the system call was made with a transaction active, doom it and
+ * return without performing the system call. Unless it was an
+ * unsupported scv vector, in which case it's treated like an illegal
+ * instruction.
+ */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
+ !trap_is_unsupported_scv(regs)) {
+ /* Enable TM in the kernel, and disable EE (for scv) */
+ hard_irq_disable();
+ mtmsr(mfmsr() | MSR_TM);
+
+ /* tabort, this dooms the transaction, nothing else */
+ asm volatile(".long 0x7c00071d | ((%0) << 16)"
+ :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
+
+ /*
+ * Userspace will never see the return value. Execution will
+ * resume after the tbegin. of the aborted transaction with the
+ * checkpointed register state. A context switch could occur
+ * or signal delivered to the process before resuming the
+ * doomed transaction context, but that should all be handled
+ * as expected.
+ */
+ return -ENOSYS;
+ }
+#endif // CONFIG_PPC_TRANSACTIONAL_MEM
+
+ local_irq_enable();
+
+ if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
+ if (unlikely(trap_is_unsupported_scv(regs))) {
+ /* Unsupported scv vector */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return regs->gpr[3];
+ }
+ /*
+ * We use the return value of do_syscall_trace_enter() as the
+ * syscall number. If the syscall was rejected for any reason
+ * do_syscall_trace_enter() returns an invalid syscall number
+ * and the test against NR_syscalls will fail and the return
+ * value to be used is in regs->gpr[3].
+ */
+ r0 = do_syscall_trace_enter(regs);
+ if (unlikely(r0 >= NR_syscalls))
+ return regs->gpr[3];
+
+ } else if (unlikely(r0 >= NR_syscalls)) {
+ if (unlikely(trap_is_unsupported_scv(regs))) {
+ /* Unsupported scv vector */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return regs->gpr[3];
+ }
+ return -ENOSYS;
+ }
+
+ /* May be faster to do array_index_nospec? */
+ barrier_nospec();
+
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+ // No COMPAT if we have SYSCALL_WRAPPER, see Kconfig
+ f = (void *)sys_call_table[r0];
+ ret = f(regs);
+#else
+ if (unlikely(is_compat_task())) {
+ unsigned long r3, r4, r5, r6, r7, r8;
+
+ f = (void *)compat_sys_call_table[r0];
+
+ r3 = regs->gpr[3] & 0x00000000ffffffffULL;
+ r4 = regs->gpr[4] & 0x00000000ffffffffULL;
+ r5 = regs->gpr[5] & 0x00000000ffffffffULL;
+ r6 = regs->gpr[6] & 0x00000000ffffffffULL;
+ r7 = regs->gpr[7] & 0x00000000ffffffffULL;
+ r8 = regs->gpr[8] & 0x00000000ffffffffULL;
+
+ ret = f(r3, r4, r5, r6, r7, r8);
+ } else {
+ f = (void *)sys_call_table[r0];
+
+ ret = f(regs->gpr[3], regs->gpr[4], regs->gpr[5],
+ regs->gpr[6], regs->gpr[7], regs->gpr[8]);
+ }
+#endif
+
+ /*
+ * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+ * so the maximum stack offset is 1k bytes (10 bits).
+ *
+ * The actual entropy will be further reduced by the compiler when
+ * applying stack alignment constraints: the powerpc architecture
+ * may have two kinds of stack alignment (16-bytes and 8-bytes).
+ *
+ * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3].
+ */
+ choose_random_kstack_offset(mftb());
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 078608ec2e92..68ebb23a5af4 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -35,26 +35,18 @@
#include <asm/syscalls.h>
#include <asm/time.h>
#include <asm/unistd.h>
-#include <asm/asm-prototypes.h>
-static inline long do_mmap2(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long off, int shift)
+static long do_mmap2(unsigned long addr, size_t len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long off, int shift)
{
- long ret = -EINVAL;
-
if (!arch_validate_prot(prot, addr))
- goto out;
+ return -EINVAL;
- if (shift) {
- if (off & ((1 << shift) - 1))
- goto out;
- off >>= shift;
- }
+ if (!IS_ALIGNED(off, 1 << shift))
+ return -EINVAL;
- ret = ksys_mmap_pgoff(addr, len, prot, flags, fd, off);
-out:
- return ret;
+ return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> shift);
}
SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len,
@@ -64,6 +56,16 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len,
return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12);
}
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE6(mmap2,
+ unsigned long, addr, size_t, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, unsigned long, off_4k)
+{
+ return do_mmap2(addr, len, prot, flags, fd, off_4k, PAGE_SHIFT-12);
+}
+#endif
+
SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
unsigned long, prot, unsigned long, flags,
unsigned long, fd, off_t, offset)
@@ -71,58 +73,47 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT);
}
-#ifdef CONFIG_PPC32
-/*
- * Due to some executables calling the wrong select we sometimes
- * get wrong args. This determines how the args are being passed
- * (a single ptr to them all args passed) then calls
- * sys_select() with the appropriate args. -- Cort
- */
-int
-ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
-{
- if ( (unsigned long)n >= 4096 )
- {
- unsigned long __user *buffer = (unsigned long __user *)n;
- if (!access_ok(buffer, 5*sizeof(unsigned long))
- || __get_user(n, buffer)
- || __get_user(inp, ((fd_set __user * __user *)(buffer+1)))
- || __get_user(outp, ((fd_set __user * __user *)(buffer+2)))
- || __get_user(exp, ((fd_set __user * __user *)(buffer+3)))
- || __get_user(tvp, ((struct __kernel_old_timeval __user * __user *)(buffer+4))))
- return -EFAULT;
- }
- return sys_select(n, inp, outp, exp, tvp);
-}
-#endif
-
#ifdef CONFIG_PPC64
-long ppc64_personality(unsigned long personality)
+static long do_ppc64_personality(unsigned long personality)
{
long ret;
if (personality(current->personality) == PER_LINUX32
&& personality(personality) == PER_LINUX)
personality = (personality & ~PER_MASK) | PER_LINUX32;
- ret = sys_personality(personality);
+ ret = ksys_personality(personality);
if (personality(ret) == PER_LINUX32)
ret = (ret & ~PER_MASK) | PER_LINUX;
return ret;
}
-#endif
-long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
- u32 len_high, u32 len_low)
+SYSCALL_DEFINE1(ppc64_personality, unsigned long, personality)
+{
+ return do_ppc64_personality(personality);
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE1(ppc64_personality, unsigned long, personality)
+{
+ return do_ppc64_personality(personality);
+}
+#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_PPC64 */
+
+SYSCALL_DEFINE6(ppc_fadvise64_64,
+ int, fd, int, advice, u32, offset_high, u32, offset_low,
+ u32, len_high, u32, len_low)
{
- return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low,
- (u64)len_high << 32 | len_low, advice);
+ return ksys_fadvise64_64(fd, merge_64(offset_high, offset_low),
+ merge_64(len_high, len_low), advice);
}
SYSCALL_DEFINE0(switch_endian)
{
struct thread_info *ti;
- current->thread.regs->msr ^= MSR_LE;
+ regs_set_return_msr(current->thread.regs,
+ current->thread.regs->msr ^ MSR_LE);
/*
* Set TIF_RESTOREALL so that r3 isn't clobbered on return to
diff --git a/arch/powerpc/kernel/syscalls/Makefile b/arch/powerpc/kernel/syscalls/Makefile
index 27b48954808d..9d7bd81510b8 100644
--- a/arch/powerpc/kernel/syscalls/Makefile
+++ b/arch/powerpc/kernel/syscalls/Makefile
@@ -2,62 +2,47 @@
kapi := arch/$(SRCARCH)/include/generated/asm
uapi := arch/$(SRCARCH)/include/generated/uapi/asm
-_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \
- $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+$(shell mkdir -p $(uapi) $(kapi))
-syscall := $(srctree)/$(src)/syscall.tbl
-syshdr := $(srctree)/$(src)/syscallhdr.sh
-systbl := $(srctree)/$(src)/syscalltbl.sh
+syscall := $(src)/syscall.tbl
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
quiet_cmd_syshdr = SYSHDR $@
- cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
- '$(syshdr_abis_$(basetarget))' \
- '$(syshdr_pfx_$(basetarget))' \
- '$(syshdr_offset_$(basetarget))'
+ cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis $(abis) $< $@
quiet_cmd_systbl = SYSTBL $@
- cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@' \
- '$(systbl_abis_$(basetarget))' \
- '$(systbl_abi_$(basetarget))' \
- '$(systbl_offset_$(basetarget))'
+ cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
-syshdr_abis_unistd_32 := common,nospu,32
-$(uapi)/unistd_32.h: $(syscall) $(syshdr)
+$(uapi)/unistd_32.h: abis := common,nospu,32
+$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
$(call if_changed,syshdr)
-syshdr_abis_unistd_64 := common,nospu,64
-$(uapi)/unistd_64.h: $(syscall) $(syshdr)
+$(uapi)/unistd_64.h: abis := common,nospu,64
+$(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE
$(call if_changed,syshdr)
-systbl_abis_syscall_table_32 := common,nospu,32
-systbl_abi_syscall_table_32 := 32
-$(kapi)/syscall_table_32.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_32.h: abis := common,nospu,32
+$(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE
$(call if_changed,systbl)
-systbl_abis_syscall_table_64 := common,nospu,64
-systbl_abi_syscall_table_64 := 64
-$(kapi)/syscall_table_64.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_64.h: abis := common,nospu,64
+$(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE
$(call if_changed,systbl)
-systbl_abis_syscall_table_c32 := common,nospu,32
-systbl_abi_syscall_table_c32 := c32
-$(kapi)/syscall_table_c32.h: $(syscall) $(systbl)
- $(call if_changed,systbl)
-
-systbl_abis_syscall_table_spu := common,spu
-systbl_abi_syscall_table_spu := spu
-$(kapi)/syscall_table_spu.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_spu.h: abis := common,spu
+$(kapi)/syscall_table_spu.h: $(syscall) $(systbl) FORCE
$(call if_changed,systbl)
uapisyshdr-y += unistd_32.h unistd_64.h
kapisyshdr-y += syscall_table_32.h \
syscall_table_64.h \
- syscall_table_c32.h \
syscall_table_spu.h
-targets += $(uapisyshdr-y) $(kapisyshdr-y)
+uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
PHONY += all
-all: $(addprefix $(uapi)/,$(uapisyshdr-y))
-all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+all: $(uapisyshdr-y) $(kapisyshdr-y)
@:
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 43f736ed47f2..17173b82ca21 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -9,7 +9,7 @@
#
0 nospu restart_syscall sys_restart_syscall
1 nospu exit sys_exit
-2 nospu fork ppc_fork
+2 nospu fork sys_fork
3 common read sys_read
4 common write sys_write
5 common open sys_open compat_sys_open
@@ -32,7 +32,7 @@
18 spu oldstat sys_ni_syscall
19 common lseek sys_lseek compat_sys_lseek
20 common getpid sys_getpid
-21 nospu mount sys_mount compat_sys_mount
+21 nospu mount sys_mount
22 32 umount sys_oldumount
22 64 umount sys_ni_syscall
22 spu umount sys_ni_syscall
@@ -110,7 +110,7 @@
79 common settimeofday sys_settimeofday compat_sys_settimeofday
80 common getgroups sys_getgroups
81 common setgroups sys_setgroups
-82 32 select ppc_select sys_ni_syscall
+82 32 select sys_old_select compat_sys_old_select
82 64 select sys_ni_syscall
82 spu select sys_ni_syscall
83 common symlink sys_symlink
@@ -158,7 +158,7 @@
119 32 sigreturn sys_sigreturn compat_sys_sigreturn
119 64 sigreturn sys_ni_syscall
119 spu sigreturn sys_ni_syscall
-120 nospu clone ppc_clone
+120 nospu clone sys_clone
121 common setdomainname sys_setdomainname
122 common uname sys_newuname
123 common modify_ldt sys_ni_syscall
@@ -176,11 +176,11 @@
131 nospu quotactl sys_quotactl
132 common getpgid sys_getpgid
133 common fchdir sys_fchdir
-134 common bdflush sys_bdflush
+134 common bdflush sys_ni_syscall
135 common sysfs sys_sysfs
-136 32 personality sys_personality ppc64_personality
-136 64 personality ppc64_personality
-136 spu personality ppc64_personality
+136 32 personality sys_personality compat_sys_ppc64_personality
+136 64 personality sys_ppc64_personality
+136 spu personality sys_ppc64_personality
137 common afs_syscall sys_ni_syscall
138 common setfsuid sys_setfsuid
139 common setfsgid sys_setfsgid
@@ -189,11 +189,11 @@
142 common _newselect sys_select compat_sys_select
143 common flock sys_flock
144 common msync sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 nospu _sysctl sys_sysctl compat_sys_sysctl
+149 nospu _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
@@ -228,8 +228,10 @@
176 64 rt_sigtimedwait sys_rt_sigtimedwait
177 nospu rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
-179 common pread64 sys_pread64 compat_sys_pread64
-180 common pwrite64 sys_pwrite64 compat_sys_pwrite64
+179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64
+179 64 pread64 sys_pread64
+180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64
+180 64 pwrite64 sys_pwrite64
181 common chown sys_chown
182 common getcwd sys_getcwd
183 common capget sys_capget
@@ -240,12 +242,13 @@
186 spu sendfile sys_sendfile64
187 common getpmsg sys_ni_syscall
188 common putpmsg sys_ni_syscall
-189 nospu vfork ppc_vfork
+189 nospu vfork sys_vfork
190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
-191 common readahead sys_readahead compat_sys_readahead
+191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead
+191 64 readahead sys_readahead
192 32 mmap2 sys_mmap2 compat_sys_mmap2
-193 32 truncate64 sys_truncate64 compat_sys_truncate64
-194 32 ftruncate64 sys_ftruncate64 compat_sys_ftruncate64
+193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64
+194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64
195 32 stat64 sys_stat64
196 32 lstat64 sys_lstat64
197 32 fstat64 sys_fstat64
@@ -288,9 +291,10 @@
230 common io_submit sys_io_submit compat_sys_io_submit
231 common io_cancel sys_io_cancel
232 nospu set_tid_address sys_set_tid_address
-233 common fadvise64 sys_fadvise64 ppc32_fadvise64
+233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64
+233 64 fadvise64 sys_fadvise64
234 nospu exit_group sys_exit_group
-235 nospu lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
+235 nospu lookup_dcookie sys_ni_syscall
236 common epoll_create sys_epoll_create
237 common epoll_ctl sys_epoll_ctl
238 common epoll_wait sys_epoll_wait
@@ -316,26 +320,24 @@
248 32 clock_nanosleep sys_clock_nanosleep_time32
248 64 clock_nanosleep sys_clock_nanosleep
248 spu clock_nanosleep sys_clock_nanosleep
-249 32 swapcontext ppc_swapcontext ppc32_swapcontext
-249 64 swapcontext ppc64_swapcontext
-249 spu swapcontext sys_ni_syscall
+249 nospu swapcontext sys_swapcontext compat_sys_swapcontext
250 common tgkill sys_tgkill
251 32 utimes sys_utimes_time32
251 64 utimes sys_utimes
251 spu utimes sys_utimes
252 common statfs64 sys_statfs64 compat_sys_statfs64
253 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
-254 32 fadvise64_64 ppc_fadvise64_64
+254 32 fadvise64_64 sys_ppc_fadvise64_64
254 spu fadvise64_64 sys_ni_syscall
255 common rtas sys_rtas
256 32 sys_debug_setcontext sys_debug_setcontext sys_ni_syscall
256 64 sys_debug_setcontext sys_ni_syscall
256 spu sys_debug_setcontext sys_ni_syscall
# 257 reserved for vserver
-258 nospu migrate_pages sys_migrate_pages compat_sys_migrate_pages
-259 nospu mbind sys_mbind compat_sys_mbind
-260 nospu get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
-261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
+258 nospu migrate_pages sys_migrate_pages
+259 nospu mbind sys_mbind
+260 nospu get_mempolicy sys_get_mempolicy
+261 nospu set_mempolicy sys_set_mempolicy
262 nospu mq_open sys_mq_open compat_sys_mq_open
263 nospu mq_unlink sys_mq_unlink
264 32 mq_timedsend sys_mq_timedsend_time32
@@ -363,7 +365,7 @@
282 common unshare sys_unshare
283 common splice sys_splice
284 common tee sys_tee
-285 common vmsplice sys_vmsplice compat_sys_vmsplice
+285 common vmsplice sys_vmsplice
286 common openat sys_openat compat_sys_openat
287 common mkdirat sys_mkdirat
288 common mknodat sys_mknodat
@@ -383,7 +385,7 @@
298 common faccessat sys_faccessat
299 common get_robust_list sys_get_robust_list compat_sys_get_robust_list
300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list
-301 common move_pages sys_move_pages compat_sys_move_pages
+301 common move_pages sys_move_pages
302 common getcpu sys_getcpu
303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
304 32 utimensat sys_utimensat_time32
@@ -392,8 +394,11 @@
305 common signalfd sys_signalfd compat_sys_signalfd
306 common timerfd_create sys_timerfd_create
307 common eventfd sys_eventfd
-308 common sync_file_range2 sys_sync_file_range2 compat_sys_sync_file_range2
-309 nospu fallocate sys_fallocate compat_sys_fallocate
+308 32 sync_file_range2 sys_ppc_sync_file_range2 compat_sys_ppc_sync_file_range2
+308 64 sync_file_range2 sys_sync_file_range2
+308 spu sync_file_range2 sys_sync_file_range2
+309 32 fallocate sys_ppc_fallocate compat_sys_fallocate
+309 64 fallocate sys_fallocate
310 nospu subpage_prot sys_subpage_prot
311 32 timerfd_settime sys_timerfd_settime32
311 64 timerfd_settime sys_timerfd_settime
@@ -427,8 +432,8 @@
336 common recv sys_recv compat_sys_recv
337 common recvfrom sys_recvfrom compat_sys_recvfrom
338 common shutdown sys_shutdown
-339 common setsockopt sys_setsockopt compat_sys_setsockopt
-340 common getsockopt sys_getsockopt compat_sys_getsockopt
+339 common setsockopt sys_setsockopt sys_setsockopt
+340 common getsockopt sys_getsockopt sys_getsockopt
341 common sendmsg sys_sendmsg compat_sys_sendmsg
342 common recvmsg sys_recvmsg compat_sys_recvmsg
343 32 recvmmsg sys_recvmmsg_time32 compat_sys_recvmmsg_time32
@@ -443,8 +448,8 @@
348 common syncfs sys_syncfs
349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
350 common setns sys_setns
-351 nospu process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-352 nospu process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+351 nospu process_vm_readv sys_process_vm_readv
+352 nospu process_vm_writev sys_process_vm_writev
353 nospu finit_module sys_finit_module
354 nospu kcmp sys_kcmp
355 common sched_setattr sys_sched_setattr
@@ -456,7 +461,7 @@
361 common bpf sys_bpf
362 nospu execveat sys_execveat compat_sys_execveat
363 32 switch_endian sys_ni_syscall
-363 64 switch_endian ppc_switch_endian
+363 64 switch_endian sys_switch_endian
363 spu switch_endian sys_ni_syscall
364 common userfaultfd sys_userfaultfd
365 common membarrier sys_membarrier
@@ -516,4 +521,30 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
-435 nospu clone3 ppc_clone3
+435 nospu clone3 sys_clone3
+436 common close_range sys_close_range
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
+441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr
+443 common quotactl_fd sys_quotactl_fd
+444 common landlock_create_ruleset sys_landlock_create_ruleset
+445 common landlock_add_rule sys_landlock_add_rule
+446 common landlock_restrict_self sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
+450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
+453 common map_shadow_stack sys_ni_syscall
+454 common futex_wake sys_futex_wake
+455 common futex_wait sys_futex_wait
+456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount
+459 common lsm_get_self_attr sys_lsm_get_self_attr
+460 common lsm_set_self_attr sys_lsm_set_self_attr
+461 common lsm_list_modules sys_lsm_list_modules
diff --git a/arch/powerpc/kernel/syscalls/syscallhdr.sh b/arch/powerpc/kernel/syscalls/syscallhdr.sh
deleted file mode 100644
index c0a9a32937f1..000000000000
--- a/arch/powerpc/kernel/syscalls/syscallhdr.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=_UAPI_ASM_POWERPC_`basename "$out" | sed \
- -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
- -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- printf "#ifndef %s\n" "${fileguard}"
- printf "#define %s\n" "${fileguard}"
- printf "\n"
-
- nxt=0
- while read nr abi name entry compat ; do
- if [ -z "$offset" ]; then
- printf "#define __NR_%s%s\t%s\n" \
- "${prefix}" "${name}" "${nr}"
- else
- printf "#define __NR_%s%s\t(%s + %s)\n" \
- "${prefix}" "${name}" "${offset}" "${nr}"
- fi
- nxt=$((nr+1))
- done
-
- printf "\n"
- printf "#ifdef __KERNEL__\n"
- printf "#define __NR_syscalls\t%s\n" "${nxt}"
- printf "#endif\n"
- printf "\n"
- printf "#endif /* %s */" "${fileguard}"
- printf "\n"
-) > "$out"
diff --git a/arch/powerpc/kernel/syscalls/syscalltbl.sh b/arch/powerpc/kernel/syscalls/syscalltbl.sh
deleted file mode 100644
index f7393a7b18aa..000000000000
--- a/arch/powerpc/kernel/syscalls/syscalltbl.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-my_abi="$4"
-offset="$5"
-
-emit() {
- t_nxt="$1"
- t_nr="$2"
- t_entry="$3"
-
- while [ $t_nxt -lt $t_nr ]; do
- printf "__SYSCALL(%s,sys_ni_syscall)\n" "${t_nxt}"
- t_nxt=$((t_nxt+1))
- done
- printf "__SYSCALL(%s,%s)\n" "${t_nxt}" "${t_entry}"
-}
-
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- nxt=0
- if [ -z "$offset" ]; then
- offset=0
- fi
-
- while read nr abi name entry compat ; do
- if [ "$my_abi" = "c32" ] && [ ! -z "$compat" ]; then
- emit $((nxt+offset)) $((nr+offset)) $compat
- else
- emit $((nxt+offset)) $((nr+offset)) $entry
- fi
- nxt=$((nr+1))
- done
-) > "$out"
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 80a676da11cb..0f39a6b84132 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -9,16 +9,17 @@
#include <linux/nodemask.h>
#include <linux/cpumask.h>
#include <linux/notifier.h>
+#include <linux/of.h>
#include <asm/current.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/hvcall.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/smp.h>
#include <asm/pmc.h>
#include <asm/firmware.h>
+#include <asm/idle.h>
#include <asm/svm.h>
#include "cacheinfo.h"
@@ -31,29 +32,27 @@
static DEFINE_PER_CPU(struct cpu, cpu_devices);
-/*
- * SMT snooze delay stuff, 64-bit only for now
- */
-
#ifdef CONFIG_PPC64
-/* Time in microseconds we delay before sleeping in the idle loop */
-static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
+/*
+ * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle:
+ * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in
+ * 2014:
+ *
+ * "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean
+ * up the kernel code."
+ *
+ * powerpc-utils stopped using it as of 1.3.8. At some point in the future this
+ * code should be removed.
+ */
static ssize_t store_smt_snooze_delay(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
- ssize_t ret;
- long snooze;
-
- ret = sscanf(buf, "%ld", &snooze);
- if (ret != 1)
- return -EINVAL;
-
- per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
+ pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n",
+ current->comm, current->pid);
return count;
}
@@ -61,9 +60,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
-
- return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
+ pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n",
+ current->comm, current->pid);
+ return sprintf(buf, "100\n");
}
static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
@@ -71,23 +70,170 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
static int __init setup_smt_snooze_delay(char *str)
{
- unsigned int cpu;
- long snooze;
-
if (!cpu_has_feature(CPU_FTR_SMT))
return 1;
- snooze = simple_strtol(str, NULL, 10);
- for_each_possible_cpu(cpu)
- per_cpu(smt_snooze_delay, cpu) = snooze;
-
+ pr_warn("smt-snooze-delay command line option has no effect\n");
return 1;
}
__setup("smt-snooze-delay=", setup_smt_snooze_delay);
#endif /* CONFIG_PPC64 */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \
+static void read_##NAME(void *val) \
+{ \
+ *(unsigned long *)val = mfspr(ADDRESS); \
+} \
+static void write_##NAME(void *val) \
+{ \
+ EXTRA; \
+ mtspr(ADDRESS, *(unsigned long *)val); \
+}
+
+#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+static ssize_t show_##NAME(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, dev); \
+ unsigned long val; \
+ smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \
+ return sprintf(buf, "%lx\n", val); \
+} \
+static ssize_t __used \
+ store_##NAME(struct device *dev, struct device_attribute *attr, \
+ const char *buf, size_t count) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, dev); \
+ unsigned long val; \
+ int ret = sscanf(buf, "%lx", &val); \
+ if (ret != 1) \
+ return -EINVAL; \
+ smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \
+ return count; \
+}
+
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+ __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+#define SYSFS_SPRSETUP(NAME, ADDRESS) \
+ __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#ifdef CONFIG_PPC64
+
+/*
+ * This is the system wide DSCR register default value. Any
+ * change to this default value through the sysfs interface
+ * will update all per cpu DSCR default values across the
+ * system stored in their respective PACA structures.
+ */
+static unsigned long dscr_default;
+
+/**
+ * read_dscr() - Fetch the cpu specific DSCR default
+ * @val: Returned cpu specific DSCR default value
+ *
+ * This function returns the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
+static void read_dscr(void *val)
+{
+ *(unsigned long *)val = get_paca()->dscr_default;
+}
+
+
+/**
+ * write_dscr() - Update the cpu specific DSCR default
+ * @val: New cpu specific DSCR default value to update
+ *
+ * This function updates the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
+static void write_dscr(void *val)
+{
+ get_paca()->dscr_default = *(unsigned long *)val;
+ if (!current->thread.dscr_inherit) {
+ current->thread.dscr = *(unsigned long *)val;
+ mtspr(SPRN_DSCR, *(unsigned long *)val);
+ }
+}
+
+SYSFS_SPRSETUP_SHOW_STORE(dscr);
+static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
+
+static void add_write_permission_dev_attr(struct device_attribute *attr)
+{
+ attr->attr.mode |= 0200;
+}
+
+/**
+ * show_dscr_default() - Fetch the system wide DSCR default
+ * @dev: Device structure
+ * @attr: Device attribute structure
+ * @buf: Interface buffer
+ *
+ * This function returns the system wide DSCR default value.
+ */
+static ssize_t show_dscr_default(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lx\n", dscr_default);
+}
+
+/**
+ * store_dscr_default() - Update the system wide DSCR default
+ * @dev: Device structure
+ * @attr: Device attribute structure
+ * @buf: Interface buffer
+ * @count: Size of the update
+ *
+ * This function updates the system wide DSCR default value.
+ */
+static ssize_t __used store_dscr_default(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ int ret = 0;
+
+ ret = sscanf(buf, "%lx", &val);
+ if (ret != 1)
+ return -EINVAL;
+ dscr_default = val;
+
+ on_each_cpu(write_dscr, &val, 1);
+
+ return count;
+}
+
+static DEVICE_ATTR(dscr_default, 0600,
+ show_dscr_default, store_dscr_default);
+
+static void __init sysfs_create_dscr_default(void)
+{
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ struct device *dev_root;
+ int cpu;
+
+ dscr_default = spr_default_dscr;
+ for_each_possible_cpu(cpu)
+ paca_ptrs[cpu]->dscr_default = dscr_default;
+
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ device_create_file(dev_root, &dev_attr_dscr_default);
+ put_device(dev_root);
+ }
+ }
+}
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC_E500
#define MAX_BIT 63
static u64 pw20_wt;
@@ -407,84 +553,35 @@ void ppc_enable_pmcs(void)
}
EXPORT_SYMBOL(ppc_enable_pmcs);
-#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \
-static void read_##NAME(void *val) \
-{ \
- *(unsigned long *)val = mfspr(ADDRESS); \
-} \
-static void write_##NAME(void *val) \
-{ \
- EXTRA; \
- mtspr(ADDRESS, *(unsigned long *)val); \
-}
-#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \
-static ssize_t show_##NAME(struct device *dev, \
- struct device_attribute *attr, \
- char *buf) \
-{ \
- struct cpu *cpu = container_of(dev, struct cpu, dev); \
- unsigned long val; \
- smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \
- return sprintf(buf, "%lx\n", val); \
-} \
-static ssize_t __used \
- store_##NAME(struct device *dev, struct device_attribute *attr, \
- const char *buf, size_t count) \
-{ \
- struct cpu *cpu = container_of(dev, struct cpu, dev); \
- unsigned long val; \
- int ret = sscanf(buf, "%lx", &val); \
- if (ret != 1) \
- return -EINVAL; \
- smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \
- return count; \
-}
-
-#define SYSFS_PMCSETUP(NAME, ADDRESS) \
- __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \
- __SYSFS_SPRSETUP_SHOW_STORE(NAME)
-#define SYSFS_SPRSETUP(NAME, ADDRESS) \
- __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \
- __SYSFS_SPRSETUP_SHOW_STORE(NAME)
-
-#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \
- __SYSFS_SPRSETUP_SHOW_STORE(NAME)
/* Let's define all possible registers, we'll only hook up the ones
* that are implemented on the current processor
*/
-#if defined(CONFIG_PPC64)
+#ifdef CONFIG_PMU_SYSFS
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_BOOK3S_32)
#define HAS_PPC_PMC_CLASSIC 1
#define HAS_PPC_PMC_IBM 1
-#define HAS_PPC_PMC_PA6T 1
-#elif defined(CONFIG_PPC_BOOK3S_32)
-#define HAS_PPC_PMC_CLASSIC 1
-#define HAS_PPC_PMC_IBM 1
-#define HAS_PPC_PMC_G4 1
#endif
+#ifdef CONFIG_PPC64
+#define HAS_PPC_PMC_PA6T 1
+#define HAS_PPC_PMC56 1
+#endif
-#ifdef HAS_PPC_PMC_CLASSIC
-SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
-SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
-SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
-SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
-SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
-SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
-SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
-SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
-
-#ifdef HAS_PPC_PMC_G4
-SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2);
+#ifdef CONFIG_PPC_BOOK3S_32
+#define HAS_PPC_PMC_G4 1
#endif
+#endif /* CONFIG_PMU_SYSFS */
+#if defined(CONFIG_PPC64) && defined(CONFIG_DEBUG_MISC)
+#define HAS_PPC_PA6T
+#endif
+/*
+ * SPRs which are not related to PMU.
+ */
#ifdef CONFIG_PPC64
-SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
-SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
-
-SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
SYSFS_SPRSETUP(purr, SPRN_PURR);
SYSFS_SPRSETUP(spurr, SPRN_SPURR);
SYSFS_SPRSETUP(pir, SPRN_PIR);
@@ -495,115 +592,40 @@ SYSFS_SPRSETUP(tscr, SPRN_TSCR);
enable write when needed with a separate function.
Lets be conservative and default to pseries.
*/
-static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
static DEVICE_ATTR(pir, 0400, show_pir, NULL);
static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr);
+#endif /* CONFIG_PPC64 */
-/*
- * This is the system wide DSCR register default value. Any
- * change to this default value through the sysfs interface
- * will update all per cpu DSCR default values across the
- * system stored in their respective PACA structures.
- */
-static unsigned long dscr_default;
-
-/**
- * read_dscr() - Fetch the cpu specific DSCR default
- * @val: Returned cpu specific DSCR default value
- *
- * This function returns the per cpu DSCR default value
- * for any cpu which is contained in it's PACA structure.
- */
-static void read_dscr(void *val)
-{
- *(unsigned long *)val = get_paca()->dscr_default;
-}
-
-
-/**
- * write_dscr() - Update the cpu specific DSCR default
- * @val: New cpu specific DSCR default value to update
- *
- * This function updates the per cpu DSCR default value
- * for any cpu which is contained in it's PACA structure.
- */
-static void write_dscr(void *val)
-{
- get_paca()->dscr_default = *(unsigned long *)val;
- if (!current->thread.dscr_inherit) {
- current->thread.dscr = *(unsigned long *)val;
- mtspr(SPRN_DSCR, *(unsigned long *)val);
- }
-}
-
-SYSFS_SPRSETUP_SHOW_STORE(dscr);
-static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
-
-static void add_write_permission_dev_attr(struct device_attribute *attr)
-{
- attr->attr.mode |= 0200;
-}
-
-/**
- * show_dscr_default() - Fetch the system wide DSCR default
- * @dev: Device structure
- * @attr: Device attribute structure
- * @buf: Interface buffer
- *
- * This function returns the system wide DSCR default value.
- */
-static ssize_t show_dscr_default(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sprintf(buf, "%lx\n", dscr_default);
-}
-
-/**
- * store_dscr_default() - Update the system wide DSCR default
- * @dev: Device structure
- * @attr: Device attribute structure
- * @buf: Interface buffer
- * @count: Size of the update
- *
- * This function updates the system wide DSCR default value.
- */
-static ssize_t __used store_dscr_default(struct device *dev,
- struct device_attribute *attr, const char *buf,
- size_t count)
-{
- unsigned long val;
- int ret = 0;
-
- ret = sscanf(buf, "%lx", &val);
- if (ret != 1)
- return -EINVAL;
- dscr_default = val;
+#ifdef HAS_PPC_PMC_CLASSIC
+SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
+SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
+SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
+SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
+SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
+SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
+SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
+#endif
- on_each_cpu(write_dscr, &val, 1);
+#ifdef HAS_PPC_PMC_G4
+SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2);
+#endif
- return count;
-}
+#ifdef HAS_PPC_PMC56
+SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
+SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
-static DEVICE_ATTR(dscr_default, 0600,
- show_dscr_default, store_dscr_default);
+SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
+SYSFS_PMCSETUP(mmcr3, SPRN_MMCR3);
-static void sysfs_create_dscr_default(void)
-{
- if (cpu_has_feature(CPU_FTR_DSCR)) {
- int err = 0;
- int cpu;
+static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(mmcr3, 0600, show_mmcr3, store_mmcr3);
+#endif /* HAS_PPC_PMC56 */
- dscr_default = spr_default_dscr;
- for_each_possible_cpu(cpu)
- paca_ptrs[cpu]->dscr_default = dscr_default;
- err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
- }
-}
-#endif /* CONFIG_PPC64 */
#ifdef HAS_PPC_PMC_PA6T
SYSFS_PMCSETUP(pa6t_pmc0, SPRN_PA6T_PMC0);
@@ -612,7 +634,9 @@ SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2);
SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);
SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);
SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);
-#ifdef CONFIG_DEBUG_MISC
+#endif
+
+#ifdef HAS_PPC_PA6T
SYSFS_SPRSETUP(hid0, SPRN_HID0);
SYSFS_SPRSETUP(hid1, SPRN_HID1);
SYSFS_SPRSETUP(hid4, SPRN_HID4);
@@ -641,15 +665,14 @@ SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0);
SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1);
SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2);
SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3);
-#endif /* CONFIG_DEBUG_MISC */
-#endif /* HAS_PPC_PMC_PA6T */
+#endif /* HAS_PPC_PA6T */
#ifdef HAS_PPC_PMC_IBM
static struct device_attribute ibm_common_attrs[] = {
__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
};
-#endif /* HAS_PPC_PMC_G4 */
+#endif /* HAS_PPC_PMC_IBM */
#ifdef HAS_PPC_PMC_G4
static struct device_attribute g4_common_attrs[] = {
@@ -659,6 +682,7 @@ static struct device_attribute g4_common_attrs[] = {
};
#endif /* HAS_PPC_PMC_G4 */
+#ifdef HAS_PPC_PMC_CLASSIC
static struct device_attribute classic_pmc_attrs[] = {
__ATTR(pmc1, 0600, show_pmc1, store_pmc1),
__ATTR(pmc2, 0600, show_pmc2, store_pmc2),
@@ -666,14 +690,16 @@ static struct device_attribute classic_pmc_attrs[] = {
__ATTR(pmc4, 0600, show_pmc4, store_pmc4),
__ATTR(pmc5, 0600, show_pmc5, store_pmc5),
__ATTR(pmc6, 0600, show_pmc6, store_pmc6),
-#ifdef CONFIG_PPC64
+#ifdef HAS_PPC_PMC56
__ATTR(pmc7, 0600, show_pmc7, store_pmc7),
__ATTR(pmc8, 0600, show_pmc8, store_pmc8),
#endif
};
+#endif
-#ifdef HAS_PPC_PMC_PA6T
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
static struct device_attribute pa6t_attrs[] = {
+#ifdef HAS_PPC_PMC_PA6T
__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
__ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0),
@@ -682,7 +708,8 @@ static struct device_attribute pa6t_attrs[] = {
__ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
__ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
__ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
-#ifdef CONFIG_DEBUG_MISC
+#endif
+#ifdef HAS_PPC_PA6T
__ATTR(hid0, 0600, show_hid0, store_hid0),
__ATTR(hid1, 0600, show_hid1, store_hid1),
__ATTR(hid4, 0600, show_hid4, store_hid4),
@@ -711,10 +738,9 @@ static struct device_attribute pa6t_attrs[] = {
__ATTR(tsr1, 0600, show_tsr1, store_tsr1),
__ATTR(tsr2, 0600, show_tsr2, store_tsr2),
__ATTR(tsr3, 0600, show_tsr3, store_tsr3),
-#endif /* CONFIG_DEBUG_MISC */
+#endif /* HAS_PPC_PA6T */
};
-#endif /* HAS_PPC_PMC_PA6T */
-#endif /* HAS_PPC_PMC_CLASSIC */
+#endif
#ifdef CONFIG_PPC_SVM
static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf)
@@ -723,16 +749,89 @@ static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char
}
static DEVICE_ATTR(svm, 0444, show_svm, NULL);
-static void create_svm_file(void)
+static void __init create_svm_file(void)
{
- device_create_file(cpu_subsys.dev_root, &dev_attr_svm);
+ struct device *dev_root = bus_get_dev_root(&cpu_subsys);
+
+ if (dev_root) {
+ device_create_file(dev_root, &dev_attr_svm);
+ put_device(dev_root);
+ }
}
#else
-static void create_svm_file(void)
+static void __init create_svm_file(void)
{
}
#endif /* CONFIG_PPC_SVM */
+#ifdef CONFIG_PPC_PSERIES
+static void read_idle_purr(void *val)
+{
+ u64 *ret = val;
+
+ *ret = read_this_idle_purr();
+}
+
+static ssize_t idle_purr_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ u64 val;
+
+ smp_call_function_single(cpu->dev.id, read_idle_purr, &val, 1);
+ return sprintf(buf, "%llx\n", val);
+}
+static DEVICE_ATTR(idle_purr, 0400, idle_purr_show, NULL);
+
+static void create_idle_purr_file(struct device *s)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ device_create_file(s, &dev_attr_idle_purr);
+}
+
+static void remove_idle_purr_file(struct device *s)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ device_remove_file(s, &dev_attr_idle_purr);
+}
+
+static void read_idle_spurr(void *val)
+{
+ u64 *ret = val;
+
+ *ret = read_this_idle_spurr();
+}
+
+static ssize_t idle_spurr_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ u64 val;
+
+ smp_call_function_single(cpu->dev.id, read_idle_spurr, &val, 1);
+ return sprintf(buf, "%llx\n", val);
+}
+static DEVICE_ATTR(idle_spurr, 0400, idle_spurr_show, NULL);
+
+static void create_idle_spurr_file(struct device *s)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ device_create_file(s, &dev_attr_idle_spurr);
+}
+
+static void remove_idle_spurr_file(struct device *s)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ device_remove_file(s, &dev_attr_idle_spurr);
+}
+
+#else /* CONFIG_PPC_PSERIES */
+#define create_idle_purr_file(s)
+#define remove_idle_purr_file(s)
+#define create_idle_spurr_file(s)
+#define remove_idle_spurr_file(s)
+#endif /* CONFIG_PPC_PSERIES */
+
static int register_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -754,25 +853,25 @@ static int register_cpu_online(unsigned int cpu)
#ifdef HAS_PPC_PMC_IBM
case PPC_PMC_IBM:
attrs = ibm_common_attrs;
- nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(ibm_common_attrs);
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_IBM */
#ifdef HAS_PPC_PMC_G4
case PPC_PMC_G4:
attrs = g4_common_attrs;
- nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(g4_common_attrs);
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_G4 */
-#ifdef HAS_PPC_PMC_PA6T
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
case PPC_PMC_PA6T:
/* PA Semi starts counting at PMC0 */
attrs = pa6t_attrs;
- nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(pa6t_attrs);
pmc_attrs = NULL;
break;
-#endif /* HAS_PPC_PMC_PA6T */
+#endif
default:
attrs = NULL;
nattrs = 0;
@@ -787,17 +886,25 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, &pmc_attrs[i]);
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PMU_SYSFS
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, &dev_attr_mmcra);
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ device_create_file(s, &dev_attr_mmcr3);
+#endif /* CONFIG_PMU_SYSFS */
+
if (cpu_has_feature(CPU_FTR_PURR)) {
if (!firmware_has_feature(FW_FEATURE_LPAR))
add_write_permission_dev_attr(&dev_attr_purr);
device_create_file(s, &dev_attr_purr);
+ create_idle_purr_file(s);
}
- if (cpu_has_feature(CPU_FTR_SPURR))
+ if (cpu_has_feature(CPU_FTR_SPURR)) {
device_create_file(s, &dev_attr_spurr);
+ create_idle_spurr_file(s);
+ }
if (cpu_has_feature(CPU_FTR_DSCR))
device_create_file(s, &dev_attr_dscr);
@@ -810,7 +917,7 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
device_create_file(s, &dev_attr_pw20_state);
device_create_file(s, &dev_attr_pw20_wait_time);
@@ -831,7 +938,8 @@ static int unregister_cpu_online(unsigned int cpu)
struct device_attribute *attrs, *pmc_attrs;
int i, nattrs;
- BUG_ON(!c->hotpluggable);
+ if (WARN_RATELIMIT(!c->hotpluggable, "cpu %d can't be offlined\n", cpu))
+ return -EBUSY;
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_SMT))
@@ -843,25 +951,25 @@ static int unregister_cpu_online(unsigned int cpu)
#ifdef HAS_PPC_PMC_IBM
case PPC_PMC_IBM:
attrs = ibm_common_attrs;
- nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(ibm_common_attrs);
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_IBM */
#ifdef HAS_PPC_PMC_G4
case PPC_PMC_G4:
attrs = g4_common_attrs;
- nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(g4_common_attrs);
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_G4 */
-#ifdef HAS_PPC_PMC_PA6T
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
case PPC_PMC_PA6T:
/* PA Semi starts counting at PMC0 */
attrs = pa6t_attrs;
- nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(pa6t_attrs);
pmc_attrs = NULL;
break;
-#endif /* HAS_PPC_PMC_PA6T */
+#endif
default:
attrs = NULL;
nattrs = 0;
@@ -876,14 +984,23 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, &pmc_attrs[i]);
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PMU_SYSFS
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, &dev_attr_mmcra);
- if (cpu_has_feature(CPU_FTR_PURR))
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ device_remove_file(s, &dev_attr_mmcr3);
+#endif /* CONFIG_PMU_SYSFS */
+
+ if (cpu_has_feature(CPU_FTR_PURR)) {
device_remove_file(s, &dev_attr_purr);
+ remove_idle_purr_file(s);
+ }
- if (cpu_has_feature(CPU_FTR_SPURR))
+ if (cpu_has_feature(CPU_FTR_SPURR)) {
device_remove_file(s, &dev_attr_spurr);
+ remove_idle_spurr_file(s);
+ }
if (cpu_has_feature(CPU_FTR_DSCR))
device_remove_file(s, &dev_attr_dscr);
@@ -896,7 +1013,7 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
device_remove_file(s, &dev_attr_pw20_state);
device_remove_file(s, &dev_attr_pw20_wait_time);
@@ -1003,14 +1120,6 @@ EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group);
/* NUMA stuff */
#ifdef CONFIG_NUMA
-static void register_nodes(void)
-{
- int i;
-
- for (i = 0; i < MAX_NUMNODES; i++)
- register_one_node(i);
-}
-
int sysfs_add_device_to_node(struct device *dev, int nid)
{
struct node *node = node_devices[nid];
@@ -1025,13 +1134,6 @@ void sysfs_remove_device_from_node(struct device *dev, int nid)
sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj));
}
EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
-
-#else
-static void register_nodes(void)
-{
- return;
-}
-
#endif
/* Only valid if CPU is present. */
@@ -1048,11 +1150,10 @@ static int __init topology_init(void)
{
int cpu, r;
- register_nodes();
-
for_each_possible_cpu(cpu) {
struct cpu *c = &per_cpu(cpu_devices, cpu);
+#ifdef CONFIG_HOTPLUG_CPU
/*
* For now, we just see if the system supports making
* the RTAS calls for CPU hotplug. But, there may be a
@@ -1060,8 +1161,9 @@ static int __init topology_init(void)
* CPU. For instance, the boot cpu might never be valid
* for hotplugging.
*/
- if (ppc_md.cpu_die)
+ if (smp_ops && smp_ops->cpu_offline_self)
c->hotpluggable = 1;
+#endif
if (cpu_online(cpu) || c->hotpluggable) {
register_cpu(c, cpu);
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
deleted file mode 100644
index 5b905a2f4e4d..000000000000
--- a/arch/powerpc/kernel/systbl.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains the table of syscall-handling functions.
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
- * and Paul Mackerras.
- *
- * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
- * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
- */
-
-#include <asm/ppc_asm.h>
-
-.section .rodata,"a"
-
-#ifdef CONFIG_PPC64
- .p2align 3
-#endif
-
-.globl sys_call_table
-sys_call_table:
-#ifdef CONFIG_PPC64
-#define __SYSCALL(nr, entry) .8byte DOTSYM(entry)
-#include <asm/syscall_table_64.h>
-#undef __SYSCALL
-#else
-#define __SYSCALL(nr, entry) .long entry
-#include <asm/syscall_table_32.h>
-#undef __SYSCALL
-#endif
-
-#ifdef CONFIG_COMPAT
-.globl compat_sys_call_table
-compat_sys_call_table:
-#define compat_sys_sigsuspend sys_sigsuspend
-#define __SYSCALL(nr, entry) .8byte DOTSYM(entry)
-#include <asm/syscall_table_c32.h>
-#undef __SYSCALL
-#endif
diff --git a/arch/powerpc/kernel/systbl.c b/arch/powerpc/kernel/systbl.c
new file mode 100644
index 000000000000..4305f2a2162f
--- /dev/null
+++ b/arch/powerpc/kernel/systbl.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains the table of syscall-handling functions.
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
+ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
+ */
+
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <asm/unistd.h>
+#include <asm/syscalls.h>
+
+#undef __SYSCALL_WITH_COMPAT
+#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry)
+
+#undef __SYSCALL
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+#define __SYSCALL(nr, entry) [nr] = entry,
+#else
+/*
+ * Coerce syscall handlers with arbitrary parameters to common type
+ * requires cast to void* to avoid -Wcast-function-type.
+ */
+#define __SYSCALL(nr, entry) [nr] = (void *) entry,
+#endif
+
+const syscall_fn sys_call_table[] = {
+#ifdef CONFIG_PPC64
+#include <asm/syscall_table_64.h>
+#else
+#include <asm/syscall_table_32.h>
+#endif
+};
+
+#ifdef CONFIG_COMPAT
+#undef __SYSCALL_WITH_COMPAT
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat)
+const syscall_fn compat_sys_call_table[] = {
+#include <asm/syscall_table_32.h>
+};
+#endif /* CONFIG_COMPAT */
diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh
deleted file mode 100644
index c7ac3ed657c4..000000000000
--- a/arch/powerpc/kernel/systbl_chk.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0-or-later
-#
-# Just process the CPP output from systbl_chk.c and complain
-# if anything is out of order.
-#
-# Copyright © 2008 IBM Corporation
-#
-
-awk 'BEGIN { num = -1; } # Ignore the beginning of the file
- /^#/ { next; }
- /^[ \t]*$/ { next; }
- /^START_TABLE/ { num = 0; next; }
- /^END_TABLE/ {
- if (num != $2) {
- printf "Error: NR_syscalls (%s) is not one more than the last syscall (%s)\n",
- $2, num - 1;
- exit(1);
- }
- num = -1; # Ignore the rest of the file
- }
- {
- if (num == -1) next;
- if (($1 != -1) && ($1 != num)) {
- printf "Error: Syscall %s out of order (expected %s)\n",
- $1, num;
- exit(1);
- };
- num++;
- }' "$1"
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index e2ab8a111b69..cba6dd15de3b 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -13,21 +13,22 @@
*/
#include <linux/errno.h>
-#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/reg.h>
#include <asm/nvram.h>
#include <asm/cache.h>
#include <asm/8xx_immap.h>
#include <asm/machdep.h>
-#include <asm/asm-prototypes.h>
#include "setup.h"
@@ -39,9 +40,7 @@ static struct tau_temp
unsigned char grew;
} tau[NR_CPUS];
-struct timer_list tau_timer;
-
-#undef DEBUG
+static bool tau_int_enable;
/* TODO: put these in a /proc interface, with some sanity checks, and maybe
* dynamic adjustment to minimize # of interrupts */
@@ -50,72 +49,49 @@ struct timer_list tau_timer;
#define step_size 2 /* step size when temp goes out of range */
#define window_expand 1 /* expand the window by this much */
/* configurable values for shrinking the window */
-#define shrink_timer 2*HZ /* period between shrinking the window */
+#define shrink_timer 2000 /* period between shrinking the window */
#define min_window 2 /* minimum window size, degrees C */
static void set_thresholds(unsigned long cpu)
{
-#ifdef CONFIG_TAU_INT
- /*
- * setup THRM1,
- * threshold, valid bit, enable interrupts, interrupt when below threshold
- */
- mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID);
+ u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0;
- /* setup THRM2,
- * threshold, valid bit, enable interrupts, interrupt when above threshold
- */
- mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE);
-#else
- /* same thing but don't enable interrupts */
- mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID);
- mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V);
-#endif
+ /* setup THRM1, threshold, valid bit, interrupt when below threshold */
+ mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID);
+
+ /* setup THRM2, threshold, valid bit, interrupt when above threshold */
+ mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie);
}
static void TAUupdate(int cpu)
{
- unsigned thrm;
-
-#ifdef DEBUG
- printk("TAUupdate ");
-#endif
+ u32 thrm;
+ u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V;
/* if both thresholds are crossed, the step_sizes cancel out
* and the window winds up getting expanded twice. */
- if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */
- if(thrm & THRM1_TIN){ /* crossed low threshold */
- if (tau[cpu].low >= step_size){
- tau[cpu].low -= step_size;
- tau[cpu].high -= (step_size - window_expand);
- }
- tau[cpu].grew = 1;
-#ifdef DEBUG
- printk("low threshold crossed ");
-#endif
+ thrm = mfspr(SPRN_THRM1);
+ if ((thrm & bits) == bits) {
+ mtspr(SPRN_THRM1, 0);
+
+ if (tau[cpu].low >= step_size) {
+ tau[cpu].low -= step_size;
+ tau[cpu].high -= (step_size - window_expand);
}
+ tau[cpu].grew = 1;
+ pr_debug("%s: low threshold crossed\n", __func__);
}
- if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */
- if(thrm & THRM1_TIN){ /* crossed high threshold */
- if (tau[cpu].high <= 127-step_size){
- tau[cpu].low += (step_size - window_expand);
- tau[cpu].high += step_size;
- }
- tau[cpu].grew = 1;
-#ifdef DEBUG
- printk("high threshold crossed ");
-#endif
+ thrm = mfspr(SPRN_THRM2);
+ if ((thrm & bits) == bits) {
+ mtspr(SPRN_THRM2, 0);
+
+ if (tau[cpu].high <= 127 - step_size) {
+ tau[cpu].low += (step_size - window_expand);
+ tau[cpu].high += step_size;
}
+ tau[cpu].grew = 1;
+ pr_debug("%s: high threshold crossed\n", __func__);
}
-
-#ifdef DEBUG
- printk("grew = %d\n", tau[cpu].grew);
-#endif
-
-#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */
- set_thresholds(cpu);
-#endif
-
}
#ifdef CONFIG_TAU_INT
@@ -124,33 +100,29 @@ static void TAUupdate(int cpu)
* with interrupts disabled
*/
-void TAUException(struct pt_regs * regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(TAUException)
{
int cpu = smp_processor_id();
- irq_enter();
tau[cpu].interrupts++;
TAUupdate(cpu);
-
- irq_exit();
}
#endif /* CONFIG_TAU_INT */
static void tau_timeout(void * info)
{
int cpu;
- unsigned long flags;
int size;
int shrink;
- /* disabling interrupts *should* be okay */
- local_irq_save(flags);
cpu = smp_processor_id();
-#ifndef CONFIG_TAU_INT
- TAUupdate(cpu);
-#endif
+ if (!tau_int_enable)
+ TAUupdate(cpu);
+
+ /* Stop thermal sensor comparisons and interrupts */
+ mtspr(SPRN_THRM3, 0);
size = tau[cpu].high - tau[cpu].low;
if (size > min_window && ! tau[cpu].grew) {
@@ -173,32 +145,26 @@ static void tau_timeout(void * info)
set_thresholds(cpu);
- /*
- * Do the enable every time, since otherwise a bunch of (relatively)
- * complex sleep code needs to be added. One mtspr every time
- * tau_timeout is called is probably not a big deal.
- *
- * Enable thermal sensor and set up sample interval timer
- * need 20 us to do the compare.. until a nice 'cpu_speed' function
- * call is implemented, just assume a 500 mhz clock. It doesn't really
- * matter if we take too long for a compare since it's all interrupt
- * driven anyway.
- *
- * use a extra long time.. (60 us @ 500 mhz)
+ /* Restart thermal sensor comparisons and interrupts.
+ * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
+ * recommends that "the maximum value be set in THRM3 under all
+ * conditions."
*/
- mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E);
-
- local_irq_restore(flags);
+ mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
}
-static void tau_timeout_smp(struct timer_list *unused)
-{
+static struct workqueue_struct *tau_workq;
- /* schedule ourselves to be run again */
- mod_timer(&tau_timer, jiffies + shrink_timer) ;
+static void tau_work_func(struct work_struct *work)
+{
+ msleep(shrink_timer);
on_each_cpu(tau_timeout, NULL, 0);
+ /* schedule ourselves to be run again */
+ queue_work(tau_workq, work);
}
+static DECLARE_WORK(tau_work, tau_work_func);
+
/*
* setup the TAU
*
@@ -231,21 +197,19 @@ static int __init TAU_init(void)
return 1;
}
+ tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) &&
+ !strcmp(cur_cpu_spec->platform, "ppc750");
- /* first, set up the window shrinking timer */
- timer_setup(&tau_timer, tau_timeout_smp, 0);
- tau_timer.expires = jiffies + shrink_timer;
- add_timer(&tau_timer);
+ tau_workq = alloc_ordered_workqueue("tau", 0);
+ if (!tau_workq)
+ return -ENOMEM;
on_each_cpu(TAU_init_smp, NULL, 0);
- printk("Thermal assist unit ");
-#ifdef CONFIG_TAU_INT
- printk("using interrupts, ");
-#else
- printk("using timers, ");
-#endif
- printk("shrink_timer: %d jiffies\n", shrink_timer);
+ queue_work(tau_workq, &tau_work);
+
+ pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n",
+ tau_int_enable ? "interrupts" : "workqueue", shrink_timer);
tau_initialized = 1;
return 0;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 1168e8b37e30..df20cf201f74 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -31,6 +31,7 @@
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/sched/cputime.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
@@ -50,40 +51,32 @@
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/irq_work.h>
-#include <linux/clk-provider.h>
+#include <linux/of_clk.h>
#include <linux/suspend.h>
-#include <linux/sched/cputime.h>
#include <linux/processor.h>
-#include <asm/trace.h>
+#include <linux/mc146818rtc.h>
+#include <linux/platform_device.h>
+#include <asm/trace.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/nvram.h>
#include <asm/cache.h>
#include <asm/machdep.h>
#include <linux/uaccess.h>
#include <asm/time.h>
-#include <asm/prom.h>
#include <asm/irq.h>
#include <asm/div64.h>
#include <asm/smp.h>
#include <asm/vdso_datapage.h>
#include <asm/firmware.h>
-#include <asm/asm-prototypes.h>
+#include <asm/mce.h>
/* powerpc clocksource/clockevent code */
#include <linux/clockchips.h>
#include <linux/timekeeper_internal.h>
-static u64 rtc_read(struct clocksource *);
-static struct clocksource clocksource_rtc = {
- .name = "rtc",
- .rating = 400,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
- .mask = CLOCKSOURCE_MASK(64),
- .read = rtc_read,
-};
-
static u64 timebase_read(struct clocksource *);
static struct clocksource clocksource_timebase = {
.name = "timebase",
@@ -91,10 +84,12 @@ static struct clocksource clocksource_timebase = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.mask = CLOCKSOURCE_MASK(64),
.read = timebase_read,
+ .vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER,
};
#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
+EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev);
@@ -113,7 +108,13 @@ struct clock_event_device decrementer_clockevent = {
};
EXPORT_SYMBOL(decrementer_clockevent);
-DEFINE_PER_CPU(u64, decrementers_next_tb);
+/*
+ * This always puts next_tb beyond now, so the clock event will never fire
+ * with the usual comparison, no need for a separate test for stopped.
+ */
+#define DEC_CLOCKEVENT_STOPPED ~0ULL
+DEFINE_PER_CPU(u64, decrementers_next_tb) = DEC_CLOCKEVENT_STOPPED;
+EXPORT_SYMBOL_GPL(decrementers_next_tb);
static DEFINE_PER_CPU(struct clock_event_device, decrementers);
#define XSEC_PER_SEC (1024*1024)
@@ -129,7 +130,7 @@ unsigned long tb_ticks_per_jiffy;
unsigned long tb_ticks_per_usec = 100; /* sane default */
EXPORT_SYMBOL(tb_ticks_per_usec);
unsigned long tb_ticks_per_sec;
-EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */
+EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime conversions */
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL_GPL(rtc_lock);
@@ -150,25 +151,6 @@ bool tb_invalid;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
- * Factor for converting from cputime_t (timebase ticks) to
- * microseconds. This is stored as 0.64 fixed-point binary fraction.
- */
-u64 __cputime_usec_factor;
-EXPORT_SYMBOL(__cputime_usec_factor);
-
-#ifdef CONFIG_PPC_SPLPAR
-void (*dtl_consumer)(struct dtl_entry *, u64);
-#endif
-
-static void calc_cputime_factors(void)
-{
- struct div_result res;
-
- div128_by_32(1000000, 0, tb_ticks_per_sec, &res);
- __cputime_usec_factor = res.result_low;
-}
-
-/*
* Read the SPURR on systems that have it, otherwise the PURR,
* or if that doesn't exist return the timebase value passed in.
*/
@@ -181,99 +163,6 @@ static inline unsigned long read_spurr(unsigned long tb)
return tb;
}
-#ifdef CONFIG_PPC_SPLPAR
-
-/*
- * Scan the dispatch trace log and count up the stolen time.
- * Should be called with interrupts disabled.
- */
-static u64 scan_dispatch_log(u64 stop_tb)
-{
- u64 i = local_paca->dtl_ridx;
- struct dtl_entry *dtl = local_paca->dtl_curr;
- struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
- struct lppaca *vpa = local_paca->lppaca_ptr;
- u64 tb_delta;
- u64 stolen = 0;
- u64 dtb;
-
- if (!dtl)
- return 0;
-
- if (i == be64_to_cpu(vpa->dtl_idx))
- return 0;
- while (i < be64_to_cpu(vpa->dtl_idx)) {
- dtb = be64_to_cpu(dtl->timebase);
- tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
- be32_to_cpu(dtl->ready_to_enqueue_time);
- barrier();
- if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
- /* buffer has overflowed */
- i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
- dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
- continue;
- }
- if (dtb > stop_tb)
- break;
- if (dtl_consumer)
- dtl_consumer(dtl, i);
- stolen += tb_delta;
- ++i;
- ++dtl;
- if (dtl == dtl_end)
- dtl = local_paca->dispatch_log;
- }
- local_paca->dtl_ridx = i;
- local_paca->dtl_curr = dtl;
- return stolen;
-}
-
-/*
- * Accumulate stolen time by scanning the dispatch trace log.
- * Called on entry from user mode.
- */
-void notrace accumulate_stolen_time(void)
-{
- u64 sst, ust;
- unsigned long save_irq_soft_mask = irq_soft_mask_return();
- struct cpu_accounting_data *acct = &local_paca->accounting;
-
- /* We are called early in the exception entry, before
- * soft/hard_enabled are sync'ed to the expected state
- * for the exception. We are hard disabled but the PACA
- * needs to reflect that so various debug stuff doesn't
- * complain
- */
- irq_soft_mask_set(IRQS_DISABLED);
-
- sst = scan_dispatch_log(acct->starttime_user);
- ust = scan_dispatch_log(acct->starttime);
- acct->stime -= sst;
- acct->utime -= ust;
- acct->steal_time += ust + sst;
-
- irq_soft_mask_set(save_irq_soft_mask);
-}
-
-static inline u64 calculate_stolen_time(u64 stop_tb)
-{
- if (!firmware_has_feature(FW_FEATURE_SPLPAR))
- return 0;
-
- if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
- return scan_dispatch_log(stop_tb);
-
- return 0;
-}
-
-#else /* CONFIG_PPC_SPLPAR */
-static inline u64 calculate_stolen_time(u64 stop_tb)
-{
- return 0;
-}
-
-#endif /* CONFIG_PPC_SPLPAR */
-
/*
* Account time for a transition between system, hard irq
* or soft irq state.
@@ -318,12 +207,11 @@ static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct,
return stime_scaled;
}
-static unsigned long vtime_delta(struct task_struct *tsk,
+static unsigned long vtime_delta(struct cpu_accounting_data *acct,
unsigned long *stime_scaled,
unsigned long *steal_time)
{
unsigned long now, stime;
- struct cpu_accounting_data *acct = get_accounting(tsk);
WARN_ON_ONCE(!irqs_disabled());
@@ -333,34 +221,39 @@ static unsigned long vtime_delta(struct task_struct *tsk,
*stime_scaled = vtime_delta_scaled(acct, now, stime);
- *steal_time = calculate_stolen_time(now);
+ if (IS_ENABLED(CONFIG_PPC_SPLPAR) &&
+ firmware_has_feature(FW_FEATURE_SPLPAR))
+ *steal_time = pseries_calculate_stolen_time(now);
+ else
+ *steal_time = 0;
return stime;
}
+static void vtime_delta_kernel(struct cpu_accounting_data *acct,
+ unsigned long *stime, unsigned long *stime_scaled)
+{
+ unsigned long steal_time;
+
+ *stime = vtime_delta(acct, stime_scaled, &steal_time);
+ *stime -= min(*stime, steal_time);
+ acct->steal_time += steal_time;
+}
+
void vtime_account_kernel(struct task_struct *tsk)
{
- unsigned long stime, stime_scaled, steal_time;
struct cpu_accounting_data *acct = get_accounting(tsk);
+ unsigned long stime, stime_scaled;
- stime = vtime_delta(tsk, &stime_scaled, &steal_time);
-
- stime -= min(stime, steal_time);
- acct->steal_time += steal_time;
+ vtime_delta_kernel(acct, &stime, &stime_scaled);
- if ((tsk->flags & PF_VCPU) && !irq_count()) {
+ if (tsk->flags & PF_VCPU) {
acct->gtime += stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
acct->utime_scaled += stime_scaled;
#endif
} else {
- if (hardirq_count())
- acct->hardirq_time += stime;
- else if (in_serving_softirq())
- acct->softirq_time += stime;
- else
- acct->stime += stime;
-
+ acct->stime += stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
acct->stime_scaled += stime_scaled;
#endif
@@ -373,10 +266,34 @@ void vtime_account_idle(struct task_struct *tsk)
unsigned long stime, stime_scaled, steal_time;
struct cpu_accounting_data *acct = get_accounting(tsk);
- stime = vtime_delta(tsk, &stime_scaled, &steal_time);
+ stime = vtime_delta(acct, &stime_scaled, &steal_time);
acct->idle_time += stime + steal_time;
}
+static void vtime_account_irq_field(struct cpu_accounting_data *acct,
+ unsigned long *field)
+{
+ unsigned long stime, stime_scaled;
+
+ vtime_delta_kernel(acct, &stime, &stime_scaled);
+ *field += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ acct->stime_scaled += stime_scaled;
+#endif
+}
+
+void vtime_account_softirq(struct task_struct *tsk)
+{
+ struct cpu_accounting_data *acct = get_accounting(tsk);
+ vtime_account_irq_field(acct, &acct->softirq_time);
+}
+
+void vtime_account_hardirq(struct task_struct *tsk)
+{
+ struct cpu_accounting_data *acct = get_accounting(tsk);
+ vtime_account_irq_field(acct, &acct->hardirq_time);
+}
+
static void vtime_flush_scaled(struct task_struct *tsk,
struct cpu_accounting_data *acct)
{
@@ -437,27 +354,14 @@ void vtime_flush(struct task_struct *tsk)
acct->hardirq_time = 0;
acct->softirq_time = 0;
}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-#define calc_cputime_factors()
-#endif
-
-void __delay(unsigned long loops)
+void __no_kcsan __delay(unsigned long loops)
{
unsigned long start;
- int diff;
spin_begin();
- if (__USE_RTC()) {
- start = get_rtcl();
- do {
- /* the RTCL register wraps at 1000000000 */
- diff = get_rtcl() - start;
- if (diff < 0)
- diff += 1000000000;
- spin_cpu_relax();
- } while (diff < loops);
- } else if (tb_invalid) {
+ if (tb_invalid) {
/*
* TB is in error state and isn't ticking anymore.
* HMI handler was unable to recover from TB error.
@@ -465,15 +369,15 @@ void __delay(unsigned long loops)
*/
spin_cpu_relax();
} else {
- start = get_tbl();
- while (get_tbl() - start < loops)
+ start = mftb();
+ while (mftb() - start < loops)
spin_cpu_relax();
}
spin_end();
}
EXPORT_SYMBOL(__delay);
-void udelay(unsigned long usecs)
+void __no_kcsan udelay(unsigned long usecs)
{
__delay(tb_ticks_per_usec * usecs);
}
@@ -522,35 +426,6 @@ static inline void clear_irq_work_pending(void)
"i" (offsetof(struct paca_struct, irq_work_pending)));
}
-void arch_irq_work_raise(void)
-{
- preempt_disable();
- set_irq_work_pending_flag();
- /*
- * Non-nmi code running with interrupts disabled will replay
- * irq_happened before it re-enables interrupts, so setthe
- * decrementer there instead of causing a hardware exception
- * which would immediately hit the masked interrupt handler
- * and have the net effect of setting the decrementer in
- * irq_happened.
- *
- * NMI interrupts can not check this when they return, so the
- * decrementer hardware exception is raised, which will fire
- * when interrupts are next enabled.
- *
- * BookE does not support this yet, it must audit all NMI
- * interrupt handlers to ensure they call nmi_enter() so this
- * check would be correct.
- */
- if (IS_ENABLED(CONFIG_BOOKE) || !irqs_disabled() || in_nmi()) {
- set_dec(1);
- } else {
- hard_irq_disable();
- local_paca->irq_happened |= PACA_IRQ_DEC;
- }
- preempt_enable();
-}
-
#else /* 32-bit */
DEFINE_PER_CPU(u8, irq_work_pending);
@@ -559,95 +434,133 @@ DEFINE_PER_CPU(u8, irq_work_pending);
#define test_irq_work_pending() __this_cpu_read(irq_work_pending)
#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0)
+#endif /* 32 vs 64 bit */
+
void arch_irq_work_raise(void)
{
+ /*
+ * 64-bit code that uses irq soft-mask can just cause an immediate
+ * interrupt here that gets soft masked, if this is called under
+ * local_irq_disable(). It might be possible to prevent that happening
+ * by noticing interrupts are disabled and setting decrementer pending
+ * to be replayed when irqs are enabled. The problem there is that
+ * tracing can call irq_work_raise, including in code that does low
+ * level manipulations of irq soft-mask state (e.g., trace_hardirqs_on)
+ * which could get tangled up if we're messing with the same state
+ * here.
+ */
preempt_disable();
set_irq_work_pending_flag();
set_dec(1);
preempt_enable();
}
-#endif /* 32 vs 64 bit */
+static void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+ /* We may have raced with new irq work */
+ if (unlikely(test_irq_work_pending()))
+ set_dec(1);
+}
#else /* CONFIG_IRQ_WORK */
#define test_irq_work_pending() 0
#define clear_irq_work_pending()
+static void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+}
#endif /* CONFIG_IRQ_WORK */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now)
+{
+ u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
+
+ WARN_ON_ONCE(!arch_irqs_disabled());
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ if (now >= *next_tb) {
+ local_paca->irq_happened |= PACA_IRQ_DEC;
+ } else {
+ now = *next_tb - now;
+ if (now > decrementer_max)
+ now = decrementer_max;
+ set_dec_or_work(now);
+ }
+}
+EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
+#endif
+
/*
* timer_interrupt - gets called when the decrementer overflows,
* with interrupts disabled.
*/
-void timer_interrupt(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
{
struct clock_event_device *evt = this_cpu_ptr(&decrementers);
u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
struct pt_regs *old_regs;
u64 now;
- /* Some implementations of hotplug will get timer interrupts while
- * offline, just ignore these and we also need to set
- * decrementers_next_tb as MAX to make sure __check_irq_replay
- * don't replay timer interrupt when return, otherwise we'll trap
- * here infinitely :(
+ /*
+ * Some implementations of hotplug will get timer interrupts while
+ * offline, just ignore these.
*/
if (unlikely(!cpu_online(smp_processor_id()))) {
- *next_tb = ~(u64)0;
set_dec(decrementer_max);
return;
}
- /* Ensure a positive value is written to the decrementer, or else
- * some CPUs will continue to take decrementer exceptions. When the
- * PPC_WATCHDOG (decrementer based) is configured, keep this at most
- * 31 bits, which is about 4 seconds on most systems, which gives
- * the watchdog a chance of catching timer interrupt hard lockups.
- */
- if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
- set_dec(0x7fffffff);
- else
- set_dec(decrementer_max);
-
- /* Conditionally hard-enable interrupts now that the DEC has been
- * bumped to its maximum value
- */
- may_hard_irq_enable();
+ /* Conditionally hard-enable interrupts. */
+ if (should_hard_irq_enable(regs)) {
+ /*
+ * Ensure a positive value is written to the decrementer, or
+ * else some CPUs will continue to take decrementer exceptions.
+ * When the PPC_WATCHDOG (decrementer based) is configured,
+ * keep this at most 31 bits, which is about 4 seconds on most
+ * systems, which gives the watchdog a chance of catching timer
+ * interrupt hard lockups.
+ */
+ if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+ set_dec(0x7fffffff);
+ else
+ set_dec(decrementer_max);
+ do_hard_irq_enable();
+ }
#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
if (atomic_read(&ppc_n_lost_interrupts) != 0)
- do_IRQ(regs);
+ __do_IRQ(regs);
#endif
old_regs = set_irq_regs(regs);
- irq_enter();
+
trace_timer_interrupt_entry(regs);
if (test_irq_work_pending()) {
clear_irq_work_pending();
+ mce_run_irq_context_handlers();
irq_work_run();
}
- now = get_tb_or_rtc();
+ now = get_tb();
if (now >= *next_tb) {
- *next_tb = ~(u64)0;
- if (evt->event_handler)
- evt->event_handler(evt);
+ evt->event_handler(evt);
__this_cpu_inc(irq_stat.timer_irqs_event);
} else {
now = *next_tb - now;
- if (now <= decrementer_max)
- set_dec(now);
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ if (now > decrementer_max)
+ now = decrementer_max;
+ set_dec_or_work(now);
__this_cpu_inc(irq_stat.timer_irqs_others);
}
trace_timer_interrupt_exit(regs);
- irq_exit();
+
set_irq_regs(old_regs);
}
EXPORT_SYMBOL(timer_interrupt);
@@ -655,26 +568,18 @@ EXPORT_SYMBOL(timer_interrupt);
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void timer_broadcast_interrupt(void)
{
- u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
-
- *next_tb = ~(u64)0;
tick_receive_broadcast();
__this_cpu_inc(irq_stat.broadcast_irqs_event);
}
#endif
-/*
- * Hypervisor decrementer interrupts shouldn't occur but are sometimes
- * left pending on exit from a KVM guest. We don't need to do anything
- * to clear them, as they are edge-triggered.
- */
-void hdec_interrupt(struct pt_regs *regs)
-{
-}
-
#ifdef CONFIG_SUSPEND
-static void generic_suspend_disable_irqs(void)
+/* Overrides the weak version in kernel/power/main.c */
+void arch_suspend_disable_irqs(void)
{
+ if (ppc_md.suspend_disable_irqs)
+ ppc_md.suspend_disable_irqs();
+
/* Disable the decrementer, so that it doesn't interfere
* with suspending.
*/
@@ -684,23 +589,11 @@ static void generic_suspend_disable_irqs(void)
set_dec(decrementer_max);
}
-static void generic_suspend_enable_irqs(void)
-{
- local_irq_enable();
-}
-
-/* Overrides the weak version in kernel/power/main.c */
-void arch_suspend_disable_irqs(void)
-{
- if (ppc_md.suspend_disable_irqs)
- ppc_md.suspend_disable_irqs();
- generic_suspend_disable_irqs();
-}
-
/* Overrides the weak version in kernel/power/main.c */
void arch_suspend_enable_irqs(void)
{
- generic_suspend_enable_irqs();
+ local_irq_enable();
+
if (ppc_md.suspend_enable_irqs)
ppc_md.suspend_enable_irqs();
}
@@ -721,8 +614,6 @@ EXPORT_SYMBOL_GPL(tb_to_ns);
*/
notrace unsigned long long sched_clock(void)
{
- if (__USE_RTC())
- return get_rtc();
return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
}
@@ -782,7 +673,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
static void start_cpu_decrementer(void)
{
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
unsigned int tcr;
/* Clear any pending timer interrupts */
@@ -838,7 +729,7 @@ static void __read_persistent_clock(struct timespec64 *ts)
static int first = 1;
ts->tv_nsec = 0;
- /* XXX this is a litle fragile but will work okay in the short term */
+ /* XXX this is a little fragile but will work okay in the short term */
if (first) {
first = 0;
if (ppc_md.time_init)
@@ -872,113 +763,14 @@ void read_persistent_clock64(struct timespec64 *ts)
}
/* clocksource code */
-static notrace u64 rtc_read(struct clocksource *cs)
-{
- return (u64)get_rtc();
-}
-
static notrace u64 timebase_read(struct clocksource *cs)
{
return (u64)get_tb();
}
-
-void update_vsyscall(struct timekeeper *tk)
-{
- struct timespec64 xt;
- struct clocksource *clock = tk->tkr_mono.clock;
- u32 mult = tk->tkr_mono.mult;
- u32 shift = tk->tkr_mono.shift;
- u64 cycle_last = tk->tkr_mono.cycle_last;
- u64 new_tb_to_xs, new_stamp_xsec;
- u64 frac_sec;
-
- if (clock != &clocksource_timebase)
- return;
-
- xt.tv_sec = tk->xtime_sec;
- xt.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
-
- /* Make userspace gettimeofday spin until we're done. */
- ++vdso_data->tb_update_count;
- smp_mb();
-
- /*
- * This computes ((2^20 / 1e9) * mult) >> shift as a
- * 0.64 fixed-point fraction.
- * The computation in the else clause below won't overflow
- * (as long as the timebase frequency is >= 1.049 MHz)
- * but loses precision because we lose the low bits of the constant
- * in the shift. Note that 19342813113834067 ~= 2^(20+64) / 1e9.
- * For a shift of 24 the error is about 0.5e-9, or about 0.5ns
- * over a second. (Shift values are usually 22, 23 or 24.)
- * For high frequency clocks such as the 512MHz timebase clock
- * on POWER[6789], the mult value is small (e.g. 32768000)
- * and so we can shift the constant by 16 initially
- * (295147905179 ~= 2^(20+64-16) / 1e9) and then do the
- * remaining shifts after the multiplication, which gives a
- * more accurate result (e.g. with mult = 32768000, shift = 24,
- * the error is only about 1.2e-12, or 0.7ns over 10 minutes).
- */
- if (mult <= 62500000 && clock->shift >= 16)
- new_tb_to_xs = ((u64) mult * 295147905179ULL) >> (clock->shift - 16);
- else
- new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
-
- /*
- * Compute the fractional second in units of 2^-32 seconds.
- * The fractional second is tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift
- * in nanoseconds, so multiplying that by 2^32 / 1e9 gives
- * it in units of 2^-32 seconds.
- * We assume shift <= 32 because clocks_calc_mult_shift()
- * generates shift values in the range 0 - 32.
- */
- frac_sec = tk->tkr_mono.xtime_nsec << (32 - shift);
- do_div(frac_sec, NSEC_PER_SEC);
-
- /*
- * Work out new stamp_xsec value for any legacy users of systemcfg.
- * stamp_xsec is in units of 2^-20 seconds.
- */
- new_stamp_xsec = frac_sec >> 12;
- new_stamp_xsec += tk->xtime_sec * XSEC_PER_SEC;
-
- /*
- * tb_update_count is used to allow the userspace gettimeofday code
- * to assure itself that it sees a consistent view of the tb_to_xs and
- * stamp_xsec variables. It reads the tb_update_count, then reads
- * tb_to_xs and stamp_xsec and then reads tb_update_count again. If
- * the two values of tb_update_count match and are even then the
- * tb_to_xs and stamp_xsec values are consistent. If not, then it
- * loops back and reads them again until this criteria is met.
- */
- vdso_data->tb_orig_stamp = cycle_last;
- vdso_data->stamp_xsec = new_stamp_xsec;
- vdso_data->tb_to_xs = new_tb_to_xs;
- vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec;
- vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
- vdso_data->stamp_xtime_sec = xt.tv_sec;
- vdso_data->stamp_xtime_nsec = xt.tv_nsec;
- vdso_data->stamp_sec_fraction = frac_sec;
- vdso_data->hrtimer_res = hrtimer_resolution;
- smp_wmb();
- ++(vdso_data->tb_update_count);
-}
-
-void update_vsyscall_tz(void)
-{
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-}
-
static void __init clocksource_init(void)
{
- struct clocksource *clock;
-
- if (__USE_RTC())
- clock = &clocksource_rtc;
- else
- clock = &clocksource_timebase;
+ struct clocksource *clock = &clocksource_timebase;
if (clocksource_register_hz(clock, tb_ticks_per_sec)) {
printk(KERN_ERR "clocksource: %s is already registered\n",
@@ -993,19 +785,17 @@ static void __init clocksource_init(void)
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev)
{
- __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt);
- set_dec(evt);
-
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ __this_cpu_write(decrementers_next_tb, get_tb() + evt);
+ set_dec_or_work(evt);
return 0;
}
static int decrementer_shutdown(struct clock_event_device *dev)
{
- decrementer_set_next_event(decrementer_max, dev);
+ __this_cpu_write(decrementers_next_tb, DEC_CLOCKEVENT_STOPPED);
+ set_dec_or_work(decrementer_max);
+
return 0;
}
@@ -1084,7 +874,7 @@ void secondary_cpu_time_init(void)
*/
start_cpu_decrementer();
- /* FIME: Should make unrelatred change to move snapshot_timebase
+ /* FIME: Should make unrelated change to move snapshot_timebase
* call here ! */
register_decrementer_clockevent(smp_processor_id());
}
@@ -1096,22 +886,20 @@ void __init time_init(void)
u64 scale;
unsigned shift;
- if (__USE_RTC()) {
- /* 601 processor: dec counts down by 128 every 128ns */
- ppc_tb_freq = 1000000000;
- } else {
- /* Normal PowerPC with timebase register */
+ /* Normal PowerPC with timebase register */
+ if (ppc_md.calibrate_decr)
ppc_md.calibrate_decr();
- printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
- ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
- printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
- ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
- }
+ else
+ generic_calibrate_decr();
+
+ printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
+ ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
+ printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
+ ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
tb_ticks_per_jiffy = ppc_tb_freq / HZ;
tb_ticks_per_sec = ppc_tb_freq;
tb_ticks_per_usec = ppc_tb_freq / 1000000;
- calc_cputime_factors();
/*
* Compute scale factor for sched_clock.
@@ -1132,7 +920,7 @@ void __init time_init(void)
tb_to_ns_scale = scale;
tb_to_ns_shift = shift;
/* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
- boot_tb = get_tb_or_rtc();
+ boot_tb = get_tb();
/* If platform provided a timezone (pmac), we correct the time */
if (timezone_offset) {
@@ -1140,7 +928,6 @@ void __init time_init(void)
sys_tz.tz_dsttime = 0;
}
- vdso_data->tb_update_count = 0;
vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
/* initialise and enable the large decrementer (if we have one) */
@@ -1158,9 +945,8 @@ void __init time_init(void)
init_decrementer_clockevent();
tick_setup_hrtimer_broadcast();
-#ifdef CONFIG_COMMON_CLK
of_clk_init(NULL);
-#endif
+ enable_sched_clock_irqtime();
}
/*
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 6ba0fdd1e7f8..a9cd6507163a 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -6,13 +6,13 @@
* Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation.
*/
+#include <linux/export.h>
#include <asm/asm-offsets.h>
#include <asm/ppc_asm.h>
#include <asm/ppc-opcode.h>
#include <asm/ptrace.h>
#include <asm/reg.h>
#include <asm/bug.h>
-#include <asm/export.h>
#include <asm/feature-fixups.h>
#ifdef CONFIG_VSX
@@ -117,11 +117,18 @@ _GLOBAL(tm_reclaim)
std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
- /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
+ /* We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. */
std r3, STK_PARAM(R3)(r1)
SAVE_NVGPRS(r1)
+ /*
+ * Save kernel live AMR since it will be clobbered by treclaim
+ * but can be used elsewhere later in kernel space.
+ */
+ mfspr r3, SPRN_AMR
+ std r3, TM_FRAME_L1(r1)
+
/* We need to setup MSR for VSX register save instructions. */
mfmsr r14
mr r15, r14
@@ -215,15 +222,12 @@ _GLOBAL(tm_reclaim)
* Make r7 look like an exception frame so that we can use the neat
* GPRx(n) macros. r7 is NOT a pt_regs ptr!
*/
- subi r7, r7, STACK_FRAME_OVERHEAD
+ subi r7, r7, STACK_INT_FRAME_REGS
/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
SAVE_GPR(0, r7) /* user r0 */
- SAVE_GPR(2, r7) /* user r2 */
- SAVE_4GPRS(3, r7) /* user r3-r6 */
- SAVE_GPR(8, r7) /* user r8 */
- SAVE_GPR(9, r7) /* user r9 */
- SAVE_GPR(10, r7) /* user r10 */
+ SAVE_GPRS(2, 6, r7) /* user r2-r6 */
+ SAVE_GPRS(8, 10, r7) /* user r8-r10 */
ld r3, GPR1(r1) /* user r1 */
ld r4, GPR7(r1) /* user r7 */
ld r5, GPR11(r1) /* user r11 */
@@ -245,7 +249,7 @@ _GLOBAL(tm_reclaim)
* but is used in signal return to 'wind back' to the abort handler.
*/
- /* ******************** CR,LR,CCR,MSR ********** */
+ /* ***************** CTR, LR, CR, XER ********** */
mfctr r3
mflr r4
mfcr r5
@@ -256,7 +260,6 @@ _GLOBAL(tm_reclaim)
std r5, _CCR(r7)
std r6, _XER(r7)
-
/* ******************** TAR, DSCR ********** */
mfspr r3, SPRN_TAR
mfspr r4, SPRN_DSCR
@@ -264,6 +267,10 @@ _GLOBAL(tm_reclaim)
std r3, THREAD_TM_TAR(r12)
std r4, THREAD_TM_DSCR(r12)
+ /* ******************** AMR **************** */
+ mfspr r3, SPRN_AMR
+ std r3, THREAD_TM_AMR(r12)
+
/*
* MSR and flags: We don't change CRs, and we don't need to alter MSR.
*/
@@ -308,7 +315,9 @@ _GLOBAL(tm_reclaim)
std r3, THREAD_TM_TFHAR(r12)
std r4, THREAD_TM_TFIAR(r12)
- /* AMR is checkpointed too, but is unsupported by Linux. */
+ /* Restore kernel live AMR */
+ ld r8, TM_FRAME_L1(r1)
+ mtspr SPRN_AMR, r8
/* Restore original MSR/IRQ state & clear TM mode */
ld r14, TM_FRAME_L0(r1) /* Orig MSR */
@@ -350,11 +359,18 @@ _GLOBAL(__tm_recheckpoint)
stdu r1, -TM_FRAME_SIZE(r1)
/*
- * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
+ * We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS].
* This is used for backing up the NVGPRs:
*/
SAVE_NVGPRS(r1)
+ /*
+ * Save kernel live AMR since it will be clobbered for trechkpt
+ * but can be used elsewhere later in kernel space.
+ */
+ mfspr r8, SPRN_AMR
+ std r8, TM_FRAME_L0(r1)
+
/* Load complete register state from ts_ckpt* registers */
addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */
@@ -363,7 +379,7 @@ _GLOBAL(__tm_recheckpoint)
* Make r7 look like an exception frame so that we can use the neat
* GPRx(n) macros. r7 is now NOT a pt_regs ptr!
*/
- subi r7, r7, STACK_FRAME_OVERHEAD
+ subi r7, r7, STACK_INT_FRAME_REGS
/* We need to setup MSR for FP/VMX/VSX register save instructions. */
mfmsr r6
@@ -404,7 +420,7 @@ _GLOBAL(__tm_recheckpoint)
restore_gprs:
- /* ******************** CR,LR,CCR,MSR ********** */
+ /* ****************** CTR, LR, XER ************* */
ld r4, _CTR(r7)
ld r5, _LINK(r7)
ld r8, _XER(r7)
@@ -417,17 +433,18 @@ restore_gprs:
ld r4, THREAD_TM_TAR(r3)
mtspr SPRN_TAR, r4
+ /* ******************** AMR ******************** */
+ ld r4, THREAD_TM_AMR(r3)
+ mtspr SPRN_AMR, r4
+
/* Load up the PPR and DSCR in GPRs only at this stage */
ld r5, THREAD_TM_DSCR(r3)
ld r6, THREAD_TM_PPR(r3)
REST_GPR(0, r7) /* GPR0 */
- REST_2GPRS(2, r7) /* GPR2-3 */
- REST_GPR(4, r7) /* GPR4 */
- REST_4GPRS(8, r7) /* GPR8-11 */
- REST_2GPRS(12, r7) /* GPR12-13 */
-
- REST_NVGPRS(r7) /* GPR14-31 */
+ REST_GPRS(2, 4, r7) /* GPR2-4 */
+ REST_GPRS(8, 12, r7) /* GPR8-12 */
+ REST_GPRS(14, 31, r7) /* GPR14-31 */
/* Load up PPR and DSCR here so we don't run with user values for long */
mtspr SPRN_DSCR, r5
@@ -463,18 +480,24 @@ restore_gprs:
REST_GPR(6, r7)
/*
- * Store r1 and r5 on the stack so that we can access them after we
- * clear MSR RI.
+ * Store user r1 and r5 and r13 on the stack (in the unused save
+ * areas / compiler reserved areas), so that we can access them after
+ * we clear MSR RI.
*/
REST_GPR(5, r7)
std r5, -8(r1)
- ld r5, GPR1(r7)
+ ld r5, GPR13(r7)
std r5, -16(r1)
+ ld r5, GPR1(r7)
+ std r5, -24(r1)
REST_GPR(7, r7)
- /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */
+ /* Stash the stack pointer away for use after recheckpoint */
+ std r1, PACAR1(r13)
+
+ /* Clear MSR RI since we are about to clobber r13. EE is already off */
li r5, 0
mtmsrd r5, 1
@@ -485,9 +508,9 @@ restore_gprs:
* until we turn MSR RI back on.
*/
- SET_SCRATCH0(r1)
ld r5, -8(r1)
- ld r1, -16(r1)
+ ld r13, -16(r1)
+ ld r1, -24(r1)
/* Commit register state as checkpointed state: */
TRECHKPT
@@ -503,12 +526,16 @@ restore_gprs:
*/
GET_PACA(r13)
- GET_SCRATCH0(r1)
+ ld r1, PACAR1(r13)
- /* R1 is restored, so we are recoverable again. EE is still off */
+ /* R13, R1 is restored, so we are recoverable again. EE is still off */
li r4, MSR_RI
mtmsrd r4, 1
+ /* Restore kernel live AMR */
+ ld r8, TM_FRAME_L0(r1)
+ mtspr SPRN_AMR, r8
+
REST_NVGPRS(r1)
addi r1, r1, TM_FRAME_SIZE
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
index 858503775c58..125f4ca588b9 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -6,18 +6,15 @@
ifdef CONFIG_FUNCTION_TRACER
# do not trace tracer code
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE)
endif
-obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o
-obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64.o
+obj32-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o
ifdef CONFIG_MPROFILE_KERNEL
-obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_mprofile.o
+obj64-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o
else
-obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o
+obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o ftrace_64_pg_entry.o
endif
-obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
-obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
-obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_TRACING) += trace_clock.o
obj-$(CONFIG_PPC64) += $(obj64-y)
@@ -26,4 +23,9 @@ obj-$(CONFIG_PPC32) += $(obj32-y)
# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_ftrace.o := n
KCOV_INSTRUMENT_ftrace.o := n
+KCSAN_SANITIZE_ftrace.o := n
UBSAN_SANITIZE_ftrace.o := n
+GCOV_PROFILE_ftrace_64_pg.o := n
+KCOV_INSTRUMENT_ftrace_64_pg.o := n
+KCSAN_SANITIZE_ftrace_64_pg.o := n
+UBSAN_SANITIZE_ftrace_64_pg.o := n
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 7ea0ca044b65..82010629cf88 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -22,834 +22,285 @@
#include <linux/init.h>
#include <linux/list.h>
-#include <asm/asm-prototypes.h>
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
#include <asm/ftrace.h>
#include <asm/syscall.h>
+#include <asm/inst.h>
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-/*
- * We generally only have a single long_branch tramp and at most 2 or 3 plt
- * tramps generated. But, we don't use the plt tramps currently. We also allot
- * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
- * tramps in total. Set aside 8 just to be sure.
- */
-#define NUM_FTRACE_TRAMPS 8
+#define NUM_FTRACE_TRAMPS 2
static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
-static unsigned int
-ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
+static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr, int link)
{
- unsigned int op;
-
- addr = ppc_function_entry((void *)addr);
+ ppc_inst_t op;
- /* if (link) set op to 'bl' else 'b' */
- op = create_branch((unsigned int *)ip, addr, link ? 1 : 0);
+ WARN_ON(!is_offset_in_branch_range(addr - ip));
+ create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0);
return op;
}
-static int
-ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
+static inline int ftrace_read_inst(unsigned long ip, ppc_inst_t *op)
{
- unsigned int replaced;
-
- /*
- * Note:
- * We are paranoid about modifying text, as if a bug was to happen, it
- * could cause us to read or write to someplace that could cause harm.
- * Carefully read and modify the code with probe_kernel_*(), and make
- * sure what we read is what we expected it to be before modifying it.
- */
-
- /* read the text we want to modify */
- if (probe_kernel_read(&replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ if (copy_inst_from_kernel_nofault(op, (void *)ip)) {
+ pr_err("0x%lx: fetching instruction failed\n", ip);
return -EFAULT;
-
- /* Make sure it is what we expect it to be */
- if (replaced != old) {
- pr_err("%p: replaced (%#x) != old (%#x)",
- (void *)ip, replaced, old);
- return -EINVAL;
}
- /* replace the text with the new text */
- if (patch_instruction((unsigned int *)ip, new))
- return -EPERM;
-
return 0;
}
-/*
- * Helper functions that are the same for both PPC64 and PPC32.
- */
-static int test_24bit_addr(unsigned long ip, unsigned long addr)
-{
- addr = ppc_function_entry((void *)addr);
-
- /* use the create_branch to verify that this offset can be branched */
- return create_branch((unsigned int *)ip, addr, 0);
-}
-
-static int is_bl_op(unsigned int op)
-{
- return (op & 0xfc000003) == 0x48000001;
-}
-
-static int is_b_op(unsigned int op)
-{
- return (op & 0xfc000003) == 0x48000000;
-}
-
-static unsigned long find_bl_target(unsigned long ip, unsigned int op)
+static inline int ftrace_validate_inst(unsigned long ip, ppc_inst_t inst)
{
- int offset;
+ ppc_inst_t op;
+ int ret;
- offset = (op & 0x03fffffc);
- /* make it signed */
- if (offset & 0x02000000)
- offset |= 0xfe000000;
+ ret = ftrace_read_inst(ip, &op);
+ if (!ret && !ppc_inst_equal(op, inst)) {
+ pr_err("0x%lx: expected (%08lx) != found (%08lx)\n",
+ ip, ppc_inst_as_ulong(inst), ppc_inst_as_ulong(op));
+ ret = -EINVAL;
+ }
- return ip + (long)offset;
+ return ret;
}
-#ifdef CONFIG_MODULES
-#ifdef CONFIG_PPC64
-static int
-__ftrace_make_nop(struct module *mod,
- struct dyn_ftrace *rec, unsigned long addr)
+static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
{
- unsigned long entry, ptr, tramp;
- unsigned long ip = rec->ip;
- unsigned int op, pop;
-
- /* read where this goes */
- if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
- pr_err("Fetching opcode failed.\n");
- return -EFAULT;
- }
-
- /* Make sure that that this is still a 24bit jump */
- if (!is_bl_op(op)) {
- pr_err("Not expected bl: opcode is %x\n", op);
- return -EINVAL;
- }
-
- /* lets find where the pointer goes */
- tramp = find_bl_target(ip, op);
+ int ret = ftrace_validate_inst(ip, old);
- pr_devel("ip:%lx jumps to %lx", ip, tramp);
+ if (!ret)
+ ret = patch_instruction((u32 *)ip, new);
- if (module_trampoline_target(mod, tramp, &ptr)) {
- pr_err("Failed to get trampoline target\n");
- return -EFAULT;
- }
-
- pr_devel("trampoline target %lx", ptr);
-
- entry = ppc_global_function_entry((void *)addr);
- /* This should match what was called */
- if (ptr != entry) {
- pr_err("addr %lx does not match expected %lx\n", ptr, entry);
- return -EINVAL;
- }
-
-#ifdef CONFIG_MPROFILE_KERNEL
- /* When using -mkernel_profile there is no load to jump over */
- pop = PPC_INST_NOP;
-
- if (probe_kernel_read(&op, (void *)(ip - 4), 4)) {
- pr_err("Fetching instruction at %lx failed.\n", ip - 4);
- return -EFAULT;
- }
-
- /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
- if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) {
- pr_err("Unexpected instruction %08x around bl _mcount\n", op);
- return -EINVAL;
- }
-#else
- /*
- * Our original call site looks like:
- *
- * bl <tramp>
- * ld r2,XX(r1)
- *
- * Milton Miller pointed out that we can not simply nop the branch.
- * If a task was preempted when calling a trace function, the nops
- * will remove the way to restore the TOC in r2 and the r2 TOC will
- * get corrupted.
- *
- * Use a b +8 to jump over the load.
- */
-
- pop = PPC_INST_BRANCH | 8; /* b +8 */
-
- /*
- * Check what is in the next instruction. We can see ld r2,40(r1), but
- * on first pass after boot we will see mflr r0.
- */
- if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) {
- pr_err("Fetching op failed.\n");
- return -EFAULT;
- }
-
- if (op != PPC_INST_LD_TOC) {
- pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op);
- return -EINVAL;
- }
-#endif /* CONFIG_MPROFILE_KERNEL */
-
- if (patch_instruction((unsigned int *)ip, pop)) {
- pr_err("Patching NOP failed.\n");
- return -EPERM;
- }
-
- return 0;
+ return ret;
}
-#else /* !PPC64 */
-static int
-__ftrace_make_nop(struct module *mod,
- struct dyn_ftrace *rec, unsigned long addr)
+static int is_bl_op(ppc_inst_t op)
{
- unsigned int op;
- unsigned int jmp[4];
- unsigned long ip = rec->ip;
- unsigned long tramp;
-
- if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
- return -EFAULT;
-
- /* Make sure that that this is still a 24bit jump */
- if (!is_bl_op(op)) {
- pr_err("Not expected bl: opcode is %x\n", op);
- return -EINVAL;
- }
-
- /* lets find where the pointer goes */
- tramp = find_bl_target(ip, op);
-
- /*
- * On PPC32 the trampoline looks like:
- * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha
- * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l
- * 0x7d, 0x89, 0x03, 0xa6 mtctr r12
- * 0x4e, 0x80, 0x04, 0x20 bctr
- */
-
- pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
- /* Find where the trampoline jumps to */
- if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
- pr_err("Failed to read %lx\n", tramp);
- return -EFAULT;
- }
-
- pr_devel(" %08x %08x ", jmp[0], jmp[1]);
-
- /* verify that this is what we expect it to be */
- if (((jmp[0] & 0xffff0000) != 0x3d800000) ||
- ((jmp[1] & 0xffff0000) != 0x398c0000) ||
- (jmp[2] != 0x7d8903a6) ||
- (jmp[3] != 0x4e800420)) {
- pr_err("Not a trampoline\n");
- return -EINVAL;
- }
-
- tramp = (jmp[1] & 0xffff) |
- ((jmp[0] & 0xffff) << 16);
- if (tramp & 0x8000)
- tramp -= 0x10000;
-
- pr_devel(" %lx ", tramp);
-
- if (tramp != addr) {
- pr_err("Trampoline location %08lx does not match addr\n",
- tramp);
- return -EINVAL;
- }
-
- op = PPC_INST_NOP;
-
- if (patch_instruction((unsigned int *)ip, op))
- return -EPERM;
-
- return 0;
+ return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0);
}
-#endif /* PPC64 */
-#endif /* CONFIG_MODULES */
static unsigned long find_ftrace_tramp(unsigned long ip)
{
int i;
- /*
- * We have the compiler generated long_branch tramps at the end
- * and we prefer those
- */
- for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
if (!ftrace_tramps[i])
continue;
- else if (create_branch((void *)ip, ftrace_tramps[i], 0))
+ else if (is_offset_in_branch_range(ftrace_tramps[i] - ip))
return ftrace_tramps[i];
return 0;
}
-static int add_ftrace_tramp(unsigned long tramp)
-{
- int i;
-
- for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
- if (!ftrace_tramps[i]) {
- ftrace_tramps[i] = tramp;
- return 0;
- }
-
- return -1;
-}
-
-/*
- * If this is a compiler generated long_branch trampoline (essentially, a
- * trampoline that has a branch to _mcount()), we re-write the branch to
- * instead go to ftrace_[regs_]caller() and note down the location of this
- * trampoline.
- */
-static int setup_mcount_compiler_tramp(unsigned long tramp)
+static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst)
{
- int i, op;
- unsigned long ptr;
- static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
-
- /* Is this a known long jump tramp? */
- for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
- if (!ftrace_tramps[i])
- break;
- else if (ftrace_tramps[i] == tramp)
- return 0;
-
- /* Is this a known plt tramp? */
- for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
- if (!ftrace_plt_tramps[i])
- break;
- else if (ftrace_plt_tramps[i] == tramp)
- return -1;
-
- /* New trampoline -- read where this goes */
- if (probe_kernel_read(&op, (void *)tramp, sizeof(int))) {
- pr_debug("Fetching opcode failed.\n");
- return -1;
- }
-
- /* Is this a 24 bit branch? */
- if (!is_b_op(op)) {
- pr_debug("Trampoline is not a long branch tramp.\n");
- return -1;
- }
-
- /* lets find where the pointer goes */
- ptr = find_bl_target(tramp, op);
-
- if (ptr != ppc_global_function_entry((void *)_mcount)) {
- pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
- return -1;
- }
+ unsigned long ip = rec->ip;
+ unsigned long stub;
- /* Let's re-write the tramp to go to ftrace_[regs_]caller */
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
- ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
-#else
- ptr = ppc_global_function_entry((void *)ftrace_caller);
+ if (is_offset_in_branch_range(addr - ip)) {
+ /* Within range */
+ stub = addr;
+#ifdef CONFIG_MODULES
+ } else if (rec->arch.mod) {
+ /* Module code would be going to one of the module stubs */
+ stub = (addr == (unsigned long)ftrace_caller ? rec->arch.mod->arch.tramp :
+ rec->arch.mod->arch.tramp_regs);
#endif
- if (!create_branch((void *)tramp, ptr, 0)) {
- pr_debug("%ps is not reachable from existing mcount tramp\n",
- (void *)ptr);
- return -1;
- }
-
- if (patch_branch((unsigned int *)tramp, ptr, 0)) {
- pr_debug("REL24 out of range!\n");
- return -1;
- }
-
- if (add_ftrace_tramp(tramp)) {
- pr_debug("No tramp locations left\n");
- return -1;
- }
-
- return 0;
-}
-
-static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned long tramp, ip = rec->ip;
- unsigned int op;
-
- /* Read where this goes */
- if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
- pr_err("Fetching opcode failed.\n");
- return -EFAULT;
- }
-
- /* Make sure that that this is still a 24bit jump */
- if (!is_bl_op(op)) {
- pr_err("Not expected bl: opcode is %x\n", op);
- return -EINVAL;
- }
-
- /* Let's find where the pointer goes */
- tramp = find_bl_target(ip, op);
-
- pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
- if (setup_mcount_compiler_tramp(tramp)) {
- /* Are other trampolines reachable? */
- if (!find_ftrace_tramp(ip)) {
- pr_err("No ftrace trampolines reachable from %ps\n",
- (void *)ip);
+ } else if (core_kernel_text(ip)) {
+ /* We would be branching to one of our ftrace stubs */
+ stub = find_ftrace_tramp(ip);
+ if (!stub) {
+ pr_err("0x%lx: No ftrace stubs reachable\n", ip);
return -EINVAL;
}
+ } else {
+ return -EINVAL;
}
- if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
- pr_err("Patching NOP failed.\n");
- return -EPERM;
- }
-
+ *call_inst = ftrace_create_branch_inst(ip, stub, 1);
return 0;
}
-int ftrace_make_nop(struct module *mod,
- struct dyn_ftrace *rec, unsigned long addr)
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
{
- unsigned long ip = rec->ip;
- unsigned int old, new;
-
- /*
- * If the calling address is more that 24 bits away,
- * then we had to use a trampoline to make the call.
- * Otherwise just update the call site.
- */
- if (test_24bit_addr(ip, addr)) {
- /* within range */
- old = ftrace_call_replace(ip, addr, 1);
- new = PPC_INST_NOP;
- return ftrace_modify_code(ip, old, new);
- } else if (core_kernel_text(ip))
- return __ftrace_make_nop_kernel(rec, addr);
-
-#ifdef CONFIG_MODULES
- /*
- * Out of range jumps are called from modules.
- * We should either already have a pointer to the module
- * or it has been passed in.
- */
- if (!rec->arch.mod) {
- if (!mod) {
- pr_err("No module loaded addr=%lx\n", addr);
- return -EFAULT;
- }
- rec->arch.mod = mod;
- } else if (mod) {
- if (mod != rec->arch.mod) {
- pr_err("Record mod %p not equal to passed in mod %p\n",
- rec->arch.mod, mod);
- return -EINVAL;
- }
- /* nothing to do if mod == rec->arch.mod */
- } else
- mod = rec->arch.mod;
-
- return __ftrace_make_nop(mod, rec, addr);
-#else
- /* We should not get here without modules */
+ /* This should never be called since we override ftrace_replace_code() */
+ WARN_ON(1);
return -EINVAL;
-#endif /* CONFIG_MODULES */
-}
-
-#ifdef CONFIG_MODULES
-#ifdef CONFIG_PPC64
-/*
- * Examine the existing instructions for __ftrace_make_call.
- * They should effectively be a NOP, and follow formal constraints,
- * depending on the ABI. Return false if they don't.
- */
-#ifndef CONFIG_MPROFILE_KERNEL
-static int
-expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
-{
- /*
- * We expect to see:
- *
- * b +8
- * ld r2,XX(r1)
- *
- * The load offset is different depending on the ABI. For simplicity
- * just mask it out when doing the compare.
- */
- if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000))
- return 0;
- return 1;
-}
-#else
-static int
-expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
-{
- /* look for patched "NOP" on ppc64 with -mprofile-kernel */
- if (op0 != PPC_INST_NOP)
- return 0;
- return 1;
}
#endif
-static int
-__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned int op[2];
- void *ip = (void *)rec->ip;
- unsigned long entry, ptr, tramp;
- struct module *mod = rec->arch.mod;
-
- /* read where this goes */
- if (probe_kernel_read(op, ip, sizeof(op)))
- return -EFAULT;
-
- if (!expected_nop_sequence(ip, op[0], op[1])) {
- pr_err("Unexpected call sequence at %p: %x %x\n",
- ip, op[0], op[1]);
- return -EINVAL;
- }
-
- /* If we never set up ftrace trampoline(s), then bail */
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
- if (!mod->arch.tramp || !mod->arch.tramp_regs) {
-#else
- if (!mod->arch.tramp) {
-#endif
- pr_err("No ftrace trampoline\n");
- return -EINVAL;
- }
-
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
- if (rec->flags & FTRACE_FL_REGS)
- tramp = mod->arch.tramp_regs;
- else
-#endif
- tramp = mod->arch.tramp;
-
- if (module_trampoline_target(mod, tramp, &ptr)) {
- pr_err("Failed to get trampoline target\n");
- return -EFAULT;
- }
-
- pr_devel("trampoline target %lx", ptr);
-
- entry = ppc_global_function_entry((void *)addr);
- /* This should match what was called */
- if (ptr != entry) {
- pr_err("addr %lx does not match expected %lx\n", ptr, entry);
- return -EINVAL;
- }
+ ppc_inst_t old, new;
+ int ret;
- /* Ensure branch is within 24 bits */
- if (!create_branch(ip, tramp, BRANCH_SET_LINK)) {
- pr_err("Branch out of range\n");
+ /* This can only ever be called during module load */
+ if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(rec->ip)))
return -EINVAL;
- }
- if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
- pr_err("REL24 out of range!\n");
- return -EINVAL;
- }
+ old = ppc_inst(PPC_RAW_NOP());
+ ret = ftrace_get_call_inst(rec, addr, &new);
+ if (ret)
+ return ret;
- return 0;
+ return ftrace_modify_code(rec->ip, old, new);
}
-#else /* !CONFIG_PPC64: */
-static int
-__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned int op;
- unsigned long ip = rec->ip;
-
- /* read where this goes */
- if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
- return -EFAULT;
-
- /* It should be pointing to a nop */
- if (op != PPC_INST_NOP) {
- pr_err("Expected NOP but have %x\n", op);
- return -EINVAL;
- }
-
- /* If we never set up a trampoline to ftrace_caller, then bail */
- if (!rec->arch.mod->arch.tramp) {
- pr_err("No ftrace trampoline\n");
- return -EINVAL;
- }
-
- /* create the branch to the trampoline */
- op = create_branch((unsigned int *)ip,
- rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
- if (!op) {
- pr_err("REL24 out of range!\n");
- return -EINVAL;
- }
-
- pr_devel("write to %lx\n", rec->ip);
-
- if (patch_instruction((unsigned int *)ip, op))
- return -EPERM;
-
- return 0;
+ /*
+ * This should never be called since we override ftrace_replace_code(),
+ * as well as ftrace_init_nop()
+ */
+ WARN_ON(1);
+ return -EINVAL;
}
-#endif /* CONFIG_PPC64 */
-#endif /* CONFIG_MODULES */
-static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
+void ftrace_replace_code(int enable)
{
- unsigned int op;
- void *ip = (void *)rec->ip;
- unsigned long tramp, entry, ptr;
+ ppc_inst_t old, new, call_inst, new_call_inst;
+ ppc_inst_t nop_inst = ppc_inst(PPC_RAW_NOP());
+ unsigned long ip, new_addr, addr;
+ struct ftrace_rec_iter *iter;
+ struct dyn_ftrace *rec;
+ int ret = 0, update;
+
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+ ip = rec->ip;
+
+ if (rec->flags & FTRACE_FL_DISABLED && !(rec->flags & FTRACE_FL_ENABLED))
+ continue;
- /* Make sure we're being asked to patch branch to a known ftrace addr */
- entry = ppc_global_function_entry((void *)ftrace_caller);
- ptr = ppc_global_function_entry((void *)addr);
+ addr = ftrace_get_addr_curr(rec);
+ new_addr = ftrace_get_addr_new(rec);
+ update = ftrace_update_record(rec, enable);
- if (ptr != entry) {
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
- entry = ppc_global_function_entry((void *)ftrace_regs_caller);
- if (ptr != entry) {
-#endif
- pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
- return -EINVAL;
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ switch (update) {
+ case FTRACE_UPDATE_IGNORE:
+ default:
+ continue;
+ case FTRACE_UPDATE_MODIFY_CALL:
+ ret = ftrace_get_call_inst(rec, new_addr, &new_call_inst);
+ ret |= ftrace_get_call_inst(rec, addr, &call_inst);
+ old = call_inst;
+ new = new_call_inst;
+ break;
+ case FTRACE_UPDATE_MAKE_NOP:
+ ret = ftrace_get_call_inst(rec, addr, &call_inst);
+ old = call_inst;
+ new = nop_inst;
+ break;
+ case FTRACE_UPDATE_MAKE_CALL:
+ ret = ftrace_get_call_inst(rec, new_addr, &call_inst);
+ old = nop_inst;
+ new = call_inst;
+ break;
}
-#endif
- }
-
- /* Make sure we have a nop */
- if (probe_kernel_read(&op, ip, sizeof(op))) {
- pr_err("Unable to read ftrace location %p\n", ip);
- return -EFAULT;
- }
-
- if (op != PPC_INST_NOP) {
- pr_err("Unexpected call sequence at %p: %x\n", ip, op);
- return -EINVAL;
- }
-
- tramp = find_ftrace_tramp((unsigned long)ip);
- if (!tramp) {
- pr_err("No ftrace trampolines reachable from %ps\n", ip);
- return -EINVAL;
- }
- if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
- pr_err("Error patching branch to ftrace tramp!\n");
- return -EINVAL;
+ if (!ret)
+ ret = ftrace_modify_code(ip, old, new);
+ if (ret)
+ goto out;
}
- return 0;
-}
-
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned long ip = rec->ip;
- unsigned int old, new;
-
- /*
- * If the calling address is more that 24 bits away,
- * then we had to use a trampoline to make the call.
- * Otherwise just update the call site.
- */
- if (test_24bit_addr(ip, addr)) {
- /* within range */
- old = PPC_INST_NOP;
- new = ftrace_call_replace(ip, addr, 1);
- return ftrace_modify_code(ip, old, new);
- } else if (core_kernel_text(ip))
- return __ftrace_make_call_kernel(rec, addr);
-
-#ifdef CONFIG_MODULES
- /*
- * Out of range jumps are called from modules.
- * Being that we are converting from nop, it had better
- * already have a module defined.
- */
- if (!rec->arch.mod) {
- pr_err("No module loaded\n");
- return -EINVAL;
- }
-
- return __ftrace_make_call(rec, addr);
-#else
- /* We should not get here without modules */
- return -EINVAL;
-#endif /* CONFIG_MODULES */
+out:
+ if (ret)
+ ftrace_bug(ret, rec);
+ return;
}
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-#ifdef CONFIG_MODULES
-static int
-__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
- unsigned long addr)
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
{
- unsigned int op;
- unsigned long ip = rec->ip;
- unsigned long entry, ptr, tramp;
- struct module *mod = rec->arch.mod;
-
- /* If we never set up ftrace trampolines, then bail */
- if (!mod->arch.tramp || !mod->arch.tramp_regs) {
- pr_err("No ftrace trampoline\n");
- return -EINVAL;
- }
-
- /* read where this goes */
- if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
- pr_err("Fetching opcode failed.\n");
- return -EFAULT;
- }
-
- /* Make sure that that this is still a 24bit jump */
- if (!is_bl_op(op)) {
- pr_err("Not expected bl: opcode is %x\n", op);
+ unsigned long addr, ip = rec->ip;
+ ppc_inst_t old, new;
+ int ret = 0;
+
+ /* Verify instructions surrounding the ftrace location */
+ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) {
+ /* Expect nops */
+ ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP()));
+ if (!ret)
+ ret = ftrace_validate_inst(ip, ppc_inst(PPC_RAW_NOP()));
+ } else if (IS_ENABLED(CONFIG_PPC32)) {
+ /* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */
+ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
+ if (!ret)
+ ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)));
+ } else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
+ /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */
+ ret = ftrace_read_inst(ip - 4, &old);
+ if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) {
+ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
+ ret |= ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)));
+ }
+ } else {
return -EINVAL;
}
- /* lets find where the pointer goes */
- tramp = find_bl_target(ip, op);
- entry = ppc_global_function_entry((void *)old_addr);
+ if (ret)
+ return ret;
- pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
- if (tramp != entry) {
- /* old_addr is not within range, so we must have used a trampoline */
- if (module_trampoline_target(mod, tramp, &ptr)) {
- pr_err("Failed to get trampoline target\n");
+ if (!core_kernel_text(ip)) {
+ if (!mod) {
+ pr_err("0x%lx: No module provided for non-kernel address\n", ip);
return -EFAULT;
}
-
- pr_devel("trampoline target %lx", ptr);
-
- /* This should match what was called */
- if (ptr != entry) {
- pr_err("addr %lx does not match expected %lx\n", ptr, entry);
- return -EINVAL;
- }
- }
-
- /* The new target may be within range */
- if (test_24bit_addr(ip, addr)) {
- /* within range */
- if (patch_branch((unsigned int *)ip, addr, BRANCH_SET_LINK)) {
- pr_err("REL24 out of range!\n");
- return -EINVAL;
- }
-
- return 0;
- }
-
- if (rec->flags & FTRACE_FL_REGS)
- tramp = mod->arch.tramp_regs;
- else
- tramp = mod->arch.tramp;
-
- if (module_trampoline_target(mod, tramp, &ptr)) {
- pr_err("Failed to get trampoline target\n");
- return -EFAULT;
- }
-
- pr_devel("trampoline target %lx", ptr);
-
- entry = ppc_global_function_entry((void *)addr);
- /* This should match what was called */
- if (ptr != entry) {
- pr_err("addr %lx does not match expected %lx\n", ptr, entry);
- return -EINVAL;
- }
-
- /* Ensure branch is within 24 bits */
- if (!create_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) {
- pr_err("Branch out of range\n");
- return -EINVAL;
- }
-
- if (patch_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) {
- pr_err("REL24 out of range!\n");
- return -EINVAL;
+ rec->arch.mod = mod;
}
- return 0;
-}
-#endif
-
-int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
- unsigned long addr)
-{
- unsigned long ip = rec->ip;
- unsigned int old, new;
-
- /*
- * If the calling address is more that 24 bits away,
- * then we had to use a trampoline to make the call.
- * Otherwise just update the call site.
- */
- if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) {
- /* within range */
- old = ftrace_call_replace(ip, old_addr, 1);
- new = ftrace_call_replace(ip, addr, 1);
- return ftrace_modify_code(ip, old, new);
- } else if (core_kernel_text(ip)) {
+ /* Nop-out the ftrace location */
+ new = ppc_inst(PPC_RAW_NOP());
+ addr = MCOUNT_ADDR;
+ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) {
+ /* we instead patch-in the 'mflr r0' */
+ old = ppc_inst(PPC_RAW_NOP());
+ new = ppc_inst(PPC_RAW_MFLR(_R0));
+ ret = ftrace_modify_code(ip - 4, old, new);
+ } else if (is_offset_in_branch_range(addr - ip)) {
+ /* Within range */
+ old = ftrace_create_branch_inst(ip, addr, 1);
+ ret = ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip) || (IS_ENABLED(CONFIG_MODULES) && mod)) {
/*
- * We always patch out of range locations to go to the regs
- * variant, so there is nothing to do here
+ * We would be branching to a linker-generated stub, or to the module _mcount
+ * stub. Let's just confirm we have a 'bl' here.
*/
- return 0;
- }
-
-#ifdef CONFIG_MODULES
- /*
- * Out of range jumps are called from modules.
- */
- if (!rec->arch.mod) {
- pr_err("No module loaded\n");
+ ret = ftrace_read_inst(ip, &old);
+ if (ret)
+ return ret;
+ if (!is_bl_op(old)) {
+ pr_err("0x%lx: expected (bl) != found (%08lx)\n", ip, ppc_inst_as_ulong(old));
+ return -EINVAL;
+ }
+ ret = patch_instruction((u32 *)ip, new);
+ } else {
return -EINVAL;
}
- return __ftrace_modify_call(rec, old_addr, addr);
-#else
- /* We should not get here without modules */
- return -EINVAL;
-#endif /* CONFIG_MODULES */
+ return ret;
}
-#endif
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
- unsigned int old, new;
+ ppc_inst_t old, new;
int ret;
- old = *(unsigned int *)&ftrace_call;
- new = ftrace_call_replace(ip, (unsigned long)func, 1);
+ old = ppc_inst_read((u32 *)&ftrace_call);
+ new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1);
ret = ftrace_modify_code(ip, old, new);
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
/* Also update the regs callback function */
- if (!ret) {
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) {
ip = (unsigned long)(&ftrace_regs_call);
- old = *(unsigned int *)&ftrace_regs_call;
- new = ftrace_call_replace(ip, (unsigned long)func, 1);
+ old = ppc_inst_read((u32 *)&ftrace_regs_call);
+ new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1);
ret = ftrace_modify_code(ip, old, new);
}
-#endif
return ret;
}
@@ -863,91 +314,103 @@ void arch_ftrace_update_code(int command)
ftrace_modify_all_code(command);
}
-#ifdef CONFIG_PPC64
-#define PACATOC offsetof(struct paca_struct, kernel_toc)
-
-extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
-
-int __init ftrace_dyn_arch_init(void)
+void ftrace_free_init_tramp(void)
{
int i;
- unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
- u32 stub_insns[] = {
- 0xe98d0000 | PACATOC, /* ld r12,PACATOC(r13) */
- 0x3d8c0000, /* addis r12,r12,<high> */
- 0x398c0000, /* addi r12,r12,<low> */
- 0x7d8903a6, /* mtctr r12 */
- 0x4e800420, /* bctr */
- };
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
- unsigned long addr = ppc_global_function_entry((void *)ftrace_regs_caller);
-#else
- unsigned long addr = ppc_global_function_entry((void *)ftrace_caller);
-#endif
- long reladdr = addr - kernel_toc_addr();
- if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
- pr_err("Address of %ps out of range of kernel_toc.\n",
- (void *)addr);
- return -1;
- }
+ for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++)
+ if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) {
+ ftrace_tramps[i] = 0;
+ return;
+ }
+}
- for (i = 0; i < 2; i++) {
- memcpy(tramp[i], stub_insns, sizeof(stub_insns));
- tramp[i][1] |= PPC_HA(reladdr);
- tramp[i][2] |= PPC_LO(reladdr);
- add_ftrace_tramp((unsigned long)tramp[i]);
- }
+static void __init add_ftrace_tramp(unsigned long tramp)
+{
+ int i;
- return 0;
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i]) {
+ ftrace_tramps[i] = tramp;
+ return;
+ }
}
-#else
+
int __init ftrace_dyn_arch_init(void)
{
- return 0;
-}
+ unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+ unsigned long addr = FTRACE_REGS_ADDR;
+ long reladdr;
+ int i;
+ u32 stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /* pla r12,addr */
+ PPC_PREFIX_MLS | __PPC_PRFX_R(1),
+ PPC_INST_PADDI | ___PPC_RT(_R12),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR()
+#elif defined(CONFIG_PPC64)
+ PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),
+ PPC_RAW_ADDIS(_R12, _R12, 0),
+ PPC_RAW_ADDI(_R12, _R12, 0),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR()
+#else
+ PPC_RAW_LIS(_R12, 0),
+ PPC_RAW_ADDI(_R12, _R12, 0),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR()
#endif
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-extern void ftrace_graph_call(void);
-extern void ftrace_graph_stub(void);
+ };
-int ftrace_enable_ftrace_graph_caller(void)
-{
- unsigned long ip = (unsigned long)(&ftrace_graph_call);
- unsigned long addr = (unsigned long)(&ftrace_graph_caller);
- unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- unsigned int old, new;
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ for (i = 0; i < 2; i++) {
+ reladdr = addr - (unsigned long)tramp[i];
- old = ftrace_call_replace(ip, stub, 0);
- new = ftrace_call_replace(ip, addr, 0);
+ if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) {
+ pr_err("Address of %ps out of range of pcrel address.\n",
+ (void *)addr);
+ return -1;
+ }
- return ftrace_modify_code(ip, old, new);
-}
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][0] |= IMM_H18(reladdr);
+ tramp[i][1] |= IMM_L(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+ } else if (IS_ENABLED(CONFIG_PPC64)) {
+ reladdr = addr - kernel_toc_addr();
-int ftrace_disable_ftrace_graph_caller(void)
-{
- unsigned long ip = (unsigned long)(&ftrace_graph_call);
- unsigned long addr = (unsigned long)(&ftrace_graph_caller);
- unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- unsigned int old, new;
+ if (reladdr >= (long)SZ_2G || reladdr < -(long long)SZ_2G) {
+ pr_err("Address of %ps out of range of kernel_toc.\n",
+ (void *)addr);
+ return -1;
+ }
- old = ftrace_call_replace(ip, addr, 0);
- new = ftrace_call_replace(ip, stub, 0);
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][1] |= PPC_HA(reladdr);
+ tramp[i][2] |= PPC_LO(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+ } else {
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][0] |= PPC_HA(addr);
+ tramp[i][1] |= PPC_LO(addr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+ }
- return ftrace_modify_code(ip, old, new);
+ return 0;
}
-/*
- * Hook the return address and push it in the stack of return addrs
- * in current thread info. Return the address we want to divert to.
- */
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
- unsigned long sp)
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- unsigned long return_hooker;
+ unsigned long sp = fregs->regs.gpr[1];
+ int bit;
if (unlikely(ftrace_graph_is_dead()))
goto out;
@@ -955,21 +418,15 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
if (unlikely(atomic_read(&current->tracing_graph_pause)))
goto out;
- return_hooker = ppc_function_entry(return_to_handler);
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0)
+ goto out;
+
+ if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp))
+ parent_ip = ppc_function_entry(return_to_handler);
- if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
- parent = return_hooker;
+ ftrace_test_recursion_unlock(bit);
out:
- return parent;
+ fregs->regs.link = parent_ip;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#ifdef PPC64_ELF_ABI_v1
-char *arch_ftrace_match_adjust(char *str, const char *search)
-{
- if (str[0] == '.' && search[0] != '.')
- return str + 1;
- else
- return str;
-}
-#endif /* PPC64_ELF_ABI_v1 */
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
deleted file mode 100644
index e023ae59c429..000000000000
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ /dev/null
@@ -1,95 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Split from entry_32.S
- */
-
-#include <linux/magic.h>
-#include <asm/reg.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-#include <asm/export.h>
-
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
- /*
- * It is required that _mcount on PPC32 must preserve the
- * link register. But we have r0 to play with. We use r0
- * to push the return address back to the caller of mcount
- * into the ctr register, restore the link register and
- * then jump back using the ctr register.
- */
- mflr r0
- mtctr r0
- lwz r0, 4(r1)
- mtlr r0
- bctr
-
-_GLOBAL(ftrace_caller)
- MCOUNT_SAVE_FRAME
- /* r3 ends up with link register */
- subi r3, r3, MCOUNT_INSN_SIZE
-.globl ftrace_call
-ftrace_call:
- bl ftrace_stub
- nop
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
- b ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
- MCOUNT_RESTORE_FRAME
- /* old link register ends up in ctr reg */
- bctr
-
-EXPORT_SYMBOL(_mcount)
-
-_GLOBAL(ftrace_stub)
- blr
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(ftrace_graph_caller)
- addi r5, r1, 48
- /* load r4 with local address */
- lwz r4, 44(r1)
- subi r4, r4, MCOUNT_INSN_SIZE
-
- /* Grab the LR out of the caller stack frame */
- lwz r3,52(r1)
-
- bl prepare_ftrace_return
- nop
-
- /*
- * prepare_ftrace_return gives us the address we divert to.
- * Change the LR in the callers stack frame to this.
- */
- stw r3,52(r1)
-
- MCOUNT_RESTORE_FRAME
- /* old link register ends up in ctr reg */
- bctr
-
-_GLOBAL(return_to_handler)
- /* need to save return values */
- stwu r1, -32(r1)
- stw r3, 20(r1)
- stw r4, 16(r1)
- stw r31, 12(r1)
- mr r31, r1
-
- bl ftrace_return_to_handler
- nop
-
- /* return value has real return address */
- mtlr r3
-
- lwz r3, 20(r1)
- lwz r4, 16(r1)
- lwz r31,12(r1)
- lwz r1, 0(r1)
-
- /* Jump back to real return address */
- blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S
deleted file mode 100644
index 25e5b9e47c06..000000000000
--- a/arch/powerpc/kernel/trace/ftrace_64.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Split from entry_64.S
- */
-
-#include <linux/magic.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-#include <asm/ppc-opcode.h>
-#include <asm/export.h>
-
-.pushsection ".tramp.ftrace.text","aw",@progbits;
-.globl ftrace_tramp_text
-ftrace_tramp_text:
- .space 64
-.popsection
-
-.pushsection ".tramp.ftrace.init","aw",@progbits;
-.globl ftrace_tramp_init
-ftrace_tramp_init:
- .space 64
-.popsection
-
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
-EXPORT_SYMBOL(_mcount)
- mflr r12
- mtctr r12
- mtlr r0
- bctr
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(return_to_handler)
- /* need to save return values */
- std r4, -32(r1)
- std r3, -24(r1)
- /* save TOC */
- std r2, -16(r1)
- std r31, -8(r1)
- mr r31, r1
- stdu r1, -112(r1)
-
- /*
- * We might be called from a module.
- * Switch to our TOC to run inside the core kernel.
- */
- ld r2, PACATOC(r13)
-
- bl ftrace_return_to_handler
- nop
-
- /* return value has real return address */
- mtlr r3
-
- ld r1, 0(r1)
- ld r4, -32(r1)
- ld r3, -24(r1)
- ld r2, -16(r1)
- ld r31, -8(r1)
-
- /* Jump back to real return address */
- blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c
new file mode 100644
index 000000000000..7b85c3b460a3
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -0,0 +1,846 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
+ *
+ * Added function graph tracer code, taken from x86 that was written
+ * by Frederic Weisbecker, and ported to PPC by Steven Rostedt.
+ *
+ */
+
+#define pr_fmt(fmt) "ftrace-powerpc: " fmt
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/ftrace.h>
+#include <asm/syscall.h>
+#include <asm/inst.h>
+
+/*
+ * We generally only have a single long_branch tramp and at most 2 or 3 plt
+ * tramps generated. But, we don't use the plt tramps currently. We also allot
+ * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
+ * tramps in total. Set aside 8 just to be sure.
+ */
+#define NUM_FTRACE_TRAMPS 8
+static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+
+static ppc_inst_t
+ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
+{
+ ppc_inst_t op;
+
+ addr = ppc_function_entry((void *)addr);
+
+ /* if (link) set op to 'bl' else 'b' */
+ create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0);
+
+ return op;
+}
+
+static inline int
+ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
+{
+ ppc_inst_t replaced;
+
+ /*
+ * Note:
+ * We are paranoid about modifying text, as if a bug was to happen, it
+ * could cause us to read or write to someplace that could cause harm.
+ * Carefully read and modify the code with probe_kernel_*(), and make
+ * sure what we read is what we expected it to be before modifying it.
+ */
+
+ /* read the text we want to modify */
+ if (copy_inst_from_kernel_nofault(&replaced, (void *)ip))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
+ if (!ppc_inst_equal(replaced, old)) {
+ pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip,
+ ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old));
+ return -EINVAL;
+ }
+
+ /* replace the text with the new text */
+ return patch_instruction((u32 *)ip, new);
+}
+
+/*
+ * Helper functions that are the same for both PPC64 and PPC32.
+ */
+static int test_24bit_addr(unsigned long ip, unsigned long addr)
+{
+ addr = ppc_function_entry((void *)addr);
+
+ return is_offset_in_branch_range(addr - ip);
+}
+
+static int is_bl_op(ppc_inst_t op)
+{
+ return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0);
+}
+
+static int is_b_op(ppc_inst_t op)
+{
+ return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0);
+}
+
+static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op)
+{
+ int offset;
+
+ offset = PPC_LI(ppc_inst_val(op));
+ /* make it signed */
+ if (offset & 0x02000000)
+ offset |= 0xfe000000;
+
+ return ip + (long)offset;
+}
+
+#ifdef CONFIG_MODULES
+static int
+__ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long entry, ptr, tramp;
+ unsigned long ip = rec->ip;
+ ppc_inst_t op, pop;
+
+ /* read where this goes */
+ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+ pr_err("Fetching opcode failed.\n");
+ return -EFAULT;
+ }
+
+ /* Make sure that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+
+ /* lets find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ entry = ppc_global_function_entry((void *)addr);
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+
+ if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
+ if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) {
+ pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+ return -EFAULT;
+ }
+
+ /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
+ if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) &&
+ !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
+ pr_err("Unexpected instruction %08lx around bl _mcount\n",
+ ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+ } else if (IS_ENABLED(CONFIG_PPC64)) {
+ /*
+ * Check what is in the next instruction. We can see ld r2,40(r1), but
+ * on first pass after boot we will see mflr r0.
+ */
+ if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) {
+ pr_err("Fetching op failed.\n");
+ return -EFAULT;
+ }
+
+ if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) {
+ pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC,
+ ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+ }
+
+ /*
+ * When using -mprofile-kernel or PPC32 there is no load to jump over.
+ *
+ * Otherwise our original call site looks like:
+ *
+ * bl <tramp>
+ * ld r2,XX(r1)
+ *
+ * Milton Miller pointed out that we can not simply nop the branch.
+ * If a task was preempted when calling a trace function, the nops
+ * will remove the way to restore the TOC in r2 and the r2 TOC will
+ * get corrupted.
+ *
+ * Use a b +8 to jump over the load.
+ */
+ if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32))
+ pop = ppc_inst(PPC_RAW_NOP());
+ else
+ pop = ppc_inst(PPC_RAW_BRANCH(8)); /* b +8 */
+
+ if (patch_instruction((u32 *)ip, pop)) {
+ pr_err("Patching NOP failed.\n");
+ return -EPERM;
+ }
+
+ return 0;
+}
+#else
+static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
+{
+ return 0;
+}
+#endif /* CONFIG_MODULES */
+
+static unsigned long find_ftrace_tramp(unsigned long ip)
+{
+ int i;
+
+ /*
+ * We have the compiler generated long_branch tramps at the end
+ * and we prefer those
+ */
+ for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+ if (!ftrace_tramps[i])
+ continue;
+ else if (is_offset_in_branch_range(ftrace_tramps[i] - ip))
+ return ftrace_tramps[i];
+
+ return 0;
+}
+
+static int add_ftrace_tramp(unsigned long tramp)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i]) {
+ ftrace_tramps[i] = tramp;
+ return 0;
+ }
+
+ return -1;
+}
+
+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+ int i;
+ ppc_inst_t op;
+ unsigned long ptr;
+
+ /* Is this a known long jump tramp? */
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (ftrace_tramps[i] == tramp)
+ return 0;
+
+ /* New trampoline -- read where this goes */
+ if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) {
+ pr_debug("Fetching opcode failed.\n");
+ return -1;
+ }
+
+ /* Is this a 24 bit branch? */
+ if (!is_b_op(op)) {
+ pr_debug("Trampoline is not a long branch tramp.\n");
+ return -1;
+ }
+
+ /* lets find where the pointer goes */
+ ptr = find_bl_target(tramp, op);
+
+ if (ptr != ppc_global_function_entry((void *)_mcount)) {
+ pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
+ return -1;
+ }
+
+ /* Let's re-write the tramp to go to ftrace_[regs_]caller */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
+ else
+ ptr = ppc_global_function_entry((void *)ftrace_caller);
+
+ if (patch_branch((u32 *)tramp, ptr, 0)) {
+ pr_debug("REL24 out of range!\n");
+ return -1;
+ }
+
+ if (add_ftrace_tramp(tramp)) {
+ pr_debug("No tramp locations left\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long tramp, ip = rec->ip;
+ ppc_inst_t op;
+
+ /* Read where this goes */
+ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+ pr_err("Fetching opcode failed.\n");
+ return -EFAULT;
+ }
+
+ /* Make sure that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+
+ /* Let's find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ if (setup_mcount_compiler_tramp(tramp)) {
+ /* Are other trampolines reachable? */
+ if (!find_ftrace_tramp(ip)) {
+ pr_err("No ftrace trampolines reachable from %ps\n",
+ (void *)ip);
+ return -EINVAL;
+ }
+ }
+
+ if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) {
+ pr_err("Patching NOP failed.\n");
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ ppc_inst_t old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ old = ftrace_call_replace(ip, addr, 1);
+ new = ppc_inst(PPC_RAW_NOP());
+ return ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip)) {
+ return __ftrace_make_nop_kernel(rec, addr);
+ } else if (!IS_ENABLED(CONFIG_MODULES)) {
+ return -EINVAL;
+ }
+
+ /*
+ * Out of range jumps are called from modules.
+ * We should either already have a pointer to the module
+ * or it has been passed in.
+ */
+ if (!rec->arch.mod) {
+ if (!mod) {
+ pr_err("No module loaded addr=%lx\n", addr);
+ return -EFAULT;
+ }
+ rec->arch.mod = mod;
+ } else if (mod) {
+ if (mod != rec->arch.mod) {
+ pr_err("Record mod %p not equal to passed in mod %p\n",
+ rec->arch.mod, mod);
+ return -EINVAL;
+ }
+ /* nothing to do if mod == rec->arch.mod */
+ } else
+ mod = rec->arch.mod;
+
+ return __ftrace_make_nop(mod, rec, addr);
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Examine the existing instructions for __ftrace_make_call.
+ * They should effectively be a NOP, and follow formal constraints,
+ * depending on the ABI. Return false if they don't.
+ */
+static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
+{
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()));
+ else
+ return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) &&
+ ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC));
+}
+
+static int
+__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ ppc_inst_t op[2];
+ void *ip = (void *)rec->ip;
+ unsigned long entry, ptr, tramp;
+ struct module *mod = rec->arch.mod;
+
+ /* read where this goes */
+ if (copy_inst_from_kernel_nofault(op, ip))
+ return -EFAULT;
+
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) &&
+ copy_inst_from_kernel_nofault(op + 1, ip + 4))
+ return -EFAULT;
+
+ if (!expected_nop_sequence(ip, op[0], op[1])) {
+ pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip,
+ ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1]));
+ return -EINVAL;
+ }
+
+ /* If we never set up ftrace trampoline(s), then bail */
+ if (!mod->arch.tramp ||
+ (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) {
+ pr_err("No ftrace trampoline\n");
+ return -EINVAL;
+ }
+
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS)
+ tramp = mod->arch.tramp_regs;
+ else
+ tramp = mod->arch.tramp;
+
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ entry = ppc_global_function_entry((void *)addr);
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+
+ if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+ pr_err("REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#else
+static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ return 0;
+}
+#endif /* CONFIG_MODULES */
+
+static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+ ppc_inst_t op;
+ void *ip = (void *)rec->ip;
+ unsigned long tramp, entry, ptr;
+
+ /* Make sure we're being asked to patch branch to a known ftrace addr */
+ entry = ppc_global_function_entry((void *)ftrace_caller);
+ ptr = ppc_global_function_entry((void *)addr);
+
+ if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ entry = ppc_global_function_entry((void *)ftrace_regs_caller);
+
+ if (ptr != entry) {
+ pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
+ return -EINVAL;
+ }
+
+ /* Make sure we have a nop */
+ if (copy_inst_from_kernel_nofault(&op, ip)) {
+ pr_err("Unable to read ftrace location %p\n", ip);
+ return -EFAULT;
+ }
+
+ if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) {
+ pr_err("Unexpected call sequence at %p: %08lx\n",
+ ip, ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+
+ tramp = find_ftrace_tramp((unsigned long)ip);
+ if (!tramp) {
+ pr_err("No ftrace trampolines reachable from %ps\n", ip);
+ return -EINVAL;
+ }
+
+ if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+ pr_err("Error patching branch to ftrace tramp!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ ppc_inst_t old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ old = ppc_inst(PPC_RAW_NOP());
+ new = ftrace_call_replace(ip, addr, 1);
+ return ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip)) {
+ return __ftrace_make_call_kernel(rec, addr);
+ } else if (!IS_ENABLED(CONFIG_MODULES)) {
+ /* We should not get here without modules */
+ return -EINVAL;
+ }
+
+ /*
+ * Out of range jumps are called from modules.
+ * Being that we are converting from nop, it had better
+ * already have a module defined.
+ */
+ if (!rec->arch.mod) {
+ pr_err("No module loaded\n");
+ return -EINVAL;
+ }
+
+ return __ftrace_make_call(rec, addr);
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef CONFIG_MODULES
+static int
+__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ ppc_inst_t op;
+ unsigned long ip = rec->ip;
+ unsigned long entry, ptr, tramp;
+ struct module *mod = rec->arch.mod;
+
+ /* If we never set up ftrace trampolines, then bail */
+ if (!mod->arch.tramp || !mod->arch.tramp_regs) {
+ pr_err("No ftrace trampoline\n");
+ return -EINVAL;
+ }
+
+ /* read where this goes */
+ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+ pr_err("Fetching opcode failed.\n");
+ return -EFAULT;
+ }
+
+ /* Make sure that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+
+ /* lets find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+ entry = ppc_global_function_entry((void *)old_addr);
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ if (tramp != entry) {
+ /* old_addr is not within range, so we must have used a trampoline */
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+ }
+
+ /* The new target may be within range */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) {
+ pr_err("REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+
+ if (rec->flags & FTRACE_FL_REGS)
+ tramp = mod->arch.tramp_regs;
+ else
+ tramp = mod->arch.tramp;
+
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ entry = ppc_global_function_entry((void *)addr);
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+
+ if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) {
+ pr_err("REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#else
+static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
+{
+ return 0;
+}
+#endif
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ ppc_inst_t old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) {
+ /* within range */
+ old = ftrace_call_replace(ip, old_addr, 1);
+ new = ftrace_call_replace(ip, addr, 1);
+ return ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip)) {
+ /*
+ * We always patch out of range locations to go to the regs
+ * variant, so there is nothing to do here
+ */
+ return 0;
+ } else if (!IS_ENABLED(CONFIG_MODULES)) {
+ /* We should not get here without modules */
+ return -EINVAL;
+ }
+
+ /*
+ * Out of range jumps are called from modules.
+ */
+ if (!rec->arch.mod) {
+ pr_err("No module loaded\n");
+ return -EINVAL;
+ }
+
+ return __ftrace_modify_call(rec, old_addr, addr);
+}
+#endif
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ ppc_inst_t old, new;
+ int ret;
+
+ old = ppc_inst_read((u32 *)&ftrace_call);
+ new = ftrace_call_replace(ip, (unsigned long)func, 1);
+ ret = ftrace_modify_code(ip, old, new);
+
+ /* Also update the regs callback function */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) {
+ ip = (unsigned long)(&ftrace_regs_call);
+ old = ppc_inst_read((u32 *)&ftrace_regs_call);
+ new = ftrace_call_replace(ip, (unsigned long)func, 1);
+ ret = ftrace_modify_code(ip, old, new);
+ }
+
+ return ret;
+}
+
+/*
+ * Use the default ftrace_modify_all_code, but without
+ * stop_machine().
+ */
+void arch_ftrace_update_code(int command)
+{
+ ftrace_modify_all_code(command);
+}
+
+#ifdef CONFIG_PPC64
+#define PACATOC offsetof(struct paca_struct, kernel_toc)
+
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+
+void ftrace_free_init_tramp(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++)
+ if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) {
+ ftrace_tramps[i] = 0;
+ return;
+ }
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+ int i;
+ unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+ u32 stub_insns[] = {
+ PPC_RAW_LD(_R12, _R13, PACATOC),
+ PPC_RAW_ADDIS(_R12, _R12, 0),
+ PPC_RAW_ADDI(_R12, _R12, 0),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR()
+ };
+ unsigned long addr;
+ long reladdr;
+
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ addr = ppc_global_function_entry((void *)ftrace_regs_caller);
+ else
+ addr = ppc_global_function_entry((void *)ftrace_caller);
+
+ reladdr = addr - kernel_toc_addr();
+
+ if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) {
+ pr_err("Address of %ps out of range of kernel_toc.\n",
+ (void *)addr);
+ return -1;
+ }
+
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][1] |= PPC_HA(reladdr);
+ tramp[i][2] |= PPC_LO(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+extern void ftrace_graph_call(void);
+extern void ftrace_graph_stub(void);
+
+static int ftrace_modify_ftrace_graph_caller(bool enable)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ unsigned long addr = (unsigned long)(&ftrace_graph_caller);
+ unsigned long stub = (unsigned long)(&ftrace_graph_stub);
+ ppc_inst_t old, new;
+
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS))
+ return 0;
+
+ old = ftrace_call_replace(ip, enable ? stub : addr, 0);
+ new = ftrace_call_replace(ip, enable ? addr : stub, 0);
+
+ return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_ftrace_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_ftrace_graph_caller(false);
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info. Return the address we want to divert to.
+ */
+static unsigned long
+__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp)
+{
+ unsigned long return_hooker;
+ int bit;
+
+ if (unlikely(ftrace_graph_is_dead()))
+ goto out;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ goto out;
+
+ bit = ftrace_test_recursion_trylock(ip, parent);
+ if (bit < 0)
+ goto out;
+
+ return_hooker = ppc_function_entry(return_to_handler);
+
+ if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
+ parent = return_hooker;
+
+ ftrace_test_recursion_unlock(bit);
+out:
+ return parent;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+ fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]);
+}
+#else
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+ unsigned long sp)
+{
+ return __prepare_ftrace_return(parent, ip, sp);
+}
+#endif
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+char *arch_ftrace_match_adjust(char *str, const char *search)
+{
+ if (str[0] == '.' && search[0] != '.')
+ return str + 1;
+ else
+ return str;
+}
+#endif /* CONFIG_PPC64_ELF_ABI_V1 */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S
index 6708e24db0ab..a8a7f28404c8 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S
@@ -3,12 +3,12 @@
* Split from ftrace_64.S
*/
+#include <linux/export.h>
#include <linux/magic.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ftrace.h>
#include <asm/ppc-opcode.h>
-#include <asm/export.h>
_GLOBAL_TOC(ftrace_caller)
lbz r3, PACA_FTRACE_ENABLED(r13)
@@ -65,3 +65,68 @@ _GLOBAL(ftrace_graph_caller)
addi r1, r1, 112
blr
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+ .space 32
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+ .space 32
+.popsection
+
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
+ mflr r12
+ mtctr r12
+ mtlr r0
+ bctr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+#ifdef CONFIG_PPC64
+ std r4, -32(r1)
+ std r3, -24(r1)
+ /* save TOC */
+ std r2, -16(r1)
+ std r31, -8(r1)
+ mr r31, r1
+ stdu r1, -112(r1)
+
+ /*
+ * We might be called from a module.
+ * Switch to our TOC to run inside the core kernel.
+ */
+ LOAD_PACA_TOC()
+#else
+ stwu r1, -16(r1)
+ stw r3, 8(r1)
+ stw r4, 12(r1)
+#endif
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+#ifdef CONFIG_PPC64
+ ld r1, 0(r1)
+ ld r4, -32(r1)
+ ld r3, -24(r1)
+ ld r2, -16(r1)
+ ld r31, -8(r1)
+#else
+ lwz r3, 8(r1)
+ lwz r4, 12(r1)
+ addi r1, r1, 16
+#endif
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_entry.S
index f9fd5f743eba..76dbe9fd2c0f 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_entry.S
@@ -3,12 +3,12 @@
* Split from ftrace_64.S
*/
+#include <linux/export.h>
#include <linux/magic.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ftrace.h>
#include <asm/ppc-opcode.h>
-#include <asm/export.h>
#include <asm/thread_info.h>
#include <asm/bug.h>
#include <asm/ptrace.h>
@@ -32,53 +32,74 @@
* Our job is to save the register state into a struct pt_regs (on the stack)
* and then arrange for the ftrace function to be called.
*/
-_GLOBAL(ftrace_regs_caller)
- /* Save the original return address in A's stack frame */
- std r0,LRSAVE(r1)
+.macro ftrace_regs_entry allregs
+ /* Create a minimal stack frame for representing B */
+ PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1)
/* Create our stack frame + pt_regs */
- stdu r1,-SWITCH_FRAME_SIZE(r1)
+ PPC_STLU r1,-SWITCH_FRAME_SIZE(r1)
/* Save all gprs to pt_regs */
SAVE_GPR(0, r1)
- SAVE_10GPRS(2, r1)
+ SAVE_GPRS(3, 10, r1)
+#ifdef CONFIG_PPC64
+ /* Save the original return address in A's stack frame */
+ std r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1)
/* Ok to continue? */
lbz r3, PACA_FTRACE_ENABLED(r13)
cmpdi r3, 0
beq ftrace_no_trace
+#endif
- SAVE_10GPRS(12, r1)
- SAVE_10GPRS(22, r1)
+ .if \allregs == 1
+ SAVE_GPR(2, r1)
+ SAVE_GPRS(11, 31, r1)
+ .else
+#ifdef CONFIG_LIVEPATCH_64
+ SAVE_GPR(14, r1)
+#endif
+ .endif
/* Save previous stack pointer (r1) */
- addi r8, r1, SWITCH_FRAME_SIZE
- std r8, GPR1(r1)
+ addi r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
+ PPC_STL r8, GPR1(r1)
+ .if \allregs == 1
/* Load special regs for save below */
mfmsr r8
mfctr r9
mfxer r10
mfcr r11
+ .else
+ /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */
+ li r8, 0
+ .endif
/* Get the _mcount() call site out of LR */
mflr r7
/* Save it as pt_regs->nip */
- std r7, _NIP(r1)
+ PPC_STL r7, _NIP(r1)
+ /* Also save it in B's stackframe header for proper unwind */
+ PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1)
/* Save the read LR in pt_regs->link */
- std r0, _LINK(r1)
+ PPC_STL r0, _LINK(r1)
+#ifdef CONFIG_PPC64
/* Save callee's TOC in the ABI compliant location */
- std r2, 24(r1)
- ld r2,PACATOC(r13) /* get kernel TOC in r2 */
-
- addis r3,r2,function_trace_op@toc@ha
- addi r3,r3,function_trace_op@toc@l
+ std r2, STK_GOT(r1)
+ LOAD_PACA_TOC() /* get kernel TOC in r2 */
+ LOAD_REG_ADDR(r3, function_trace_op)
ld r5,0(r3)
+#else
+ lis r3,function_trace_op@ha
+ lwz r5,function_trace_op@l(r3)
+#endif
-#ifdef CONFIG_LIVEPATCH
- mr r14,r7 /* remember old NIP */
+#ifdef CONFIG_LIVEPATCH_64
+ mr r14, r7 /* remember old NIP */
#endif
+
/* Calculate ip from nip-4 into r3 for call below */
subi r3, r7, MCOUNT_INSN_SIZE
@@ -86,130 +107,85 @@ _GLOBAL(ftrace_regs_caller)
mr r4, r0
/* Save special regs */
- std r8, _MSR(r1)
- std r9, _CTR(r1)
- std r10, _XER(r1)
- std r11, _CCR(r1)
+ PPC_STL r8, _MSR(r1)
+ .if \allregs == 1
+ PPC_STL r9, _CTR(r1)
+ PPC_STL r10, _XER(r1)
+ PPC_STL r11, _CCR(r1)
+ .endif
/* Load &pt_regs in r6 for call below */
- addi r6, r1 ,STACK_FRAME_OVERHEAD
-
- /* ftrace_call(r3, r4, r5, r6) */
-.globl ftrace_regs_call
-ftrace_regs_call:
- bl ftrace_stub
- nop
+ addi r6, r1, STACK_INT_FRAME_REGS
+.endm
+.macro ftrace_regs_exit allregs
/* Load ctr with the possibly modified NIP */
- ld r3, _NIP(r1)
+ PPC_LL r3, _NIP(r1)
mtctr r3
-#ifdef CONFIG_LIVEPATCH
+
+#ifdef CONFIG_LIVEPATCH_64
cmpd r14, r3 /* has NIP been altered? */
#endif
/* Restore gprs */
- REST_GPR(0,r1)
- REST_10GPRS(2,r1)
- REST_10GPRS(12,r1)
- REST_10GPRS(22,r1)
+ .if \allregs == 1
+ REST_GPRS(2, 31, r1)
+ .else
+ REST_GPRS(3, 10, r1)
+#ifdef CONFIG_LIVEPATCH_64
+ REST_GPR(14, r1)
+#endif
+ .endif
/* Restore possibly modified LR */
- ld r0, _LINK(r1)
+ PPC_LL r0, _LINK(r1)
mtlr r0
+#ifdef CONFIG_PPC64
/* Restore callee's TOC */
- ld r2, 24(r1)
+ ld r2, STK_GOT(r1)
+#endif
/* Pop our stack frame */
- addi r1, r1, SWITCH_FRAME_SIZE
+ addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
-#ifdef CONFIG_LIVEPATCH
+#ifdef CONFIG_LIVEPATCH_64
/* Based on the cmpd above, if the NIP was altered handle livepatch */
bne- livepatch_handler
#endif
+ bctr /* jump after _mcount site */
+.endm
-ftrace_caller_common:
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
- b ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
+_GLOBAL(ftrace_regs_caller)
+ ftrace_regs_entry 1
+ /* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_regs_call
+ftrace_regs_call:
+ bl ftrace_stub
+ ftrace_regs_exit 1
- bctr /* jump after _mcount site */
+_GLOBAL(ftrace_caller)
+ ftrace_regs_entry 0
+ /* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ ftrace_regs_exit 0
_GLOBAL(ftrace_stub)
blr
+#ifdef CONFIG_PPC64
ftrace_no_trace:
mflr r3
mtctr r3
REST_GPR(3, r1)
- addi r1, r1, SWITCH_FRAME_SIZE
+ addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
mtlr r0
bctr
+#endif
-_GLOBAL(ftrace_caller)
- /* Save the original return address in A's stack frame */
- std r0, LRSAVE(r1)
-
- /* Create our stack frame + pt_regs */
- stdu r1, -SWITCH_FRAME_SIZE(r1)
-
- /* Save all gprs to pt_regs */
- SAVE_8GPRS(3, r1)
-
- lbz r3, PACA_FTRACE_ENABLED(r13)
- cmpdi r3, 0
- beq ftrace_no_trace
-
- /* Get the _mcount() call site out of LR */
- mflr r7
- std r7, _NIP(r1)
-
- /* Save callee's TOC in the ABI compliant location */
- std r2, 24(r1)
- ld r2, PACATOC(r13) /* get kernel TOC in r2 */
-
- addis r3, r2, function_trace_op@toc@ha
- addi r3, r3, function_trace_op@toc@l
- ld r5, 0(r3)
-
- /* Calculate ip from nip-4 into r3 for call below */
- subi r3, r7, MCOUNT_INSN_SIZE
-
- /* Put the original return address in r4 as parent_ip */
- mr r4, r0
-
- /* Set pt_regs to NULL */
- li r6, 0
-
- /* ftrace_call(r3, r4, r5, r6) */
-.globl ftrace_call
-ftrace_call:
- bl ftrace_stub
- nop
-
- ld r3, _NIP(r1)
- mtctr r3
-
- /* Restore gprs */
- REST_8GPRS(3,r1)
-
- /* Restore callee's TOC */
- ld r2, 24(r1)
-
- /* Pop our stack frame */
- addi r1, r1, SWITCH_FRAME_SIZE
-
- /* Reload original LR */
- ld r0, LRSAVE(r1)
- mtlr r0
-
- /* Handle function_graph or go back */
- b ftrace_caller_common
-
-#ifdef CONFIG_LIVEPATCH
+#ifdef CONFIG_LIVEPATCH_64
/*
* This function runs in the mcount context, between two functions. As
* such it can only clobber registers which are volatile and used in
@@ -277,54 +253,69 @@ livepatch_handler:
blr
#endif /* CONFIG_LIVEPATCH */
+#ifndef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
+ mflr r12
+ mtctr r12
+ mtlr r0
+ bctr
+#endif
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(ftrace_graph_caller)
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+#ifdef CONFIG_PPC64
+ std r4, -32(r1)
+ std r3, -24(r1)
+ /* save TOC */
+ std r2, -16(r1)
+ std r31, -8(r1)
+ mr r31, r1
stdu r1, -112(r1)
- /* with -mprofile-kernel, parameter regs are still alive at _mcount */
- std r10, 104(r1)
- std r9, 96(r1)
- std r8, 88(r1)
- std r7, 80(r1)
- std r6, 72(r1)
- std r5, 64(r1)
- std r4, 56(r1)
- std r3, 48(r1)
-
- /* Save callee's TOC in the ABI compliant location */
- std r2, 24(r1)
- ld r2, PACATOC(r13) /* get kernel TOC in r2 */
- addi r5, r1, 112
- mfctr r4 /* ftrace_caller has moved local addr here */
- std r4, 40(r1)
- mflr r3 /* ftrace_caller has restored LR from stack */
- subi r4, r4, MCOUNT_INSN_SIZE
+ /*
+ * We might be called from a module.
+ * Switch to our TOC to run inside the core kernel.
+ */
+ LOAD_PACA_TOC()
+#else
+ stwu r1, -16(r1)
+ stw r3, 8(r1)
+ stw r4, 12(r1)
+#endif
- bl prepare_ftrace_return
+ bl ftrace_return_to_handler
nop
- /*
- * prepare_ftrace_return gives us the address we divert to.
- * Change the LR to this.
- */
+ /* return value has real return address */
mtlr r3
- ld r0, 40(r1)
- mtctr r0
- ld r10, 104(r1)
- ld r9, 96(r1)
- ld r8, 88(r1)
- ld r7, 80(r1)
- ld r6, 72(r1)
- ld r5, 64(r1)
- ld r4, 56(r1)
- ld r3, 48(r1)
-
- /* Restore callee's TOC */
- ld r2, 24(r1)
+#ifdef CONFIG_PPC64
+ ld r1, 0(r1)
+ ld r4, -32(r1)
+ ld r3, -24(r1)
+ ld r2, -16(r1)
+ ld r31, -8(r1)
+#else
+ lwz r3, 8(r1)
+ lwz r4, 12(r1)
+ addi r1, r1, 16
+#endif
- addi r1, r1, 112
- mflr r0
- std r0, LRSAVE(r1)
- bctr
+ /* Jump back to real return address */
+ blr
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+ .space 32
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+ .space 32
+.popsection
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 014ff0701f24..11e062b47d3f 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -37,11 +37,11 @@
#include <linux/smp.h>
#include <linux/console.h>
#include <linux/kmsg_dump.h>
+#include <linux/debugfs.h>
#include <asm/emulated_ops.h>
-#include <asm/pgtable.h>
#include <linux/uaccess.h>
-#include <asm/debugfs.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
@@ -53,7 +53,6 @@
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#include <asm/processor.h>
-#include <asm/tm.h>
#endif
#include <asm/kexec.h>
#include <asm/ppc-opcode.h>
@@ -68,6 +67,8 @@
#include <asm/kprobes.h>
#include <asm/stacktrace.h>
#include <asm/nmi.h>
+#include <asm/disassemble.h>
+#include <asm/udbg.h>
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
int (*__debugger)(struct pt_regs *regs) __read_mostly;
@@ -156,7 +157,7 @@ static int die_owner = -1;
static unsigned int die_nest_count;
static int die_counter;
-extern void panic_flush_kmsg_start(void)
+void panic_flush_kmsg_start(void)
{
/*
* These are mostly taken from kernel/panic.c, but tries to do
@@ -169,9 +170,8 @@ extern void panic_flush_kmsg_start(void)
bust_spinlocks(1);
}
-extern void panic_flush_kmsg_end(void)
+void panic_flush_kmsg_end(void)
{
- printk_safe_flush_on_panic();
kmsg_dump(KMSG_DUMP_PANIC);
bust_spinlocks(0);
debug_locks_off();
@@ -222,7 +222,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
/*
* system_reset_excption handles debugger, crash dump, panic, for 0x100
*/
- if (TRAP(regs) == 0x100)
+ if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
return;
crash_fadump(regs, "die oops");
@@ -246,7 +246,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
if (panic_on_oops)
panic("Fatal exception");
- do_exit(signr);
+ make_task_dead(signr);
}
NOKPROBE_SYMBOL(oops_end);
@@ -290,7 +290,7 @@ void die(const char *str, struct pt_regs *regs, long err)
/*
* system_reset_excption handles debugger, crash dump, panic, for 0x100
*/
- if (TRAP(regs) != 0x100) {
+ if (TRAP(regs) != INTERRUPT_SYSTEM_RESET) {
if (debugger(regs))
return;
}
@@ -341,19 +341,19 @@ static bool exception_common(int signr, struct pt_regs *regs, int code,
return false;
}
- show_signal_msg(signr, regs, code, addr);
+ /*
+ * Must not enable interrupts even for user-mode exception, because
+ * this can be called from machine check, which may be a NMI or IRQ
+ * which don't like interrupts being enabled. Could check for
+ * in_hardirq || in_nmi perhaps, but there doesn't seem to be a good
+ * reason why _exception() should enable irqs for an exception handler,
+ * the handlers themselves do that directly.
+ */
- if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs))
- local_irq_enable();
+ show_signal_msg(signr, regs, code, addr);
current->thread.trap_nr = code;
- /*
- * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need
- * to capture the content, if the task gets killed.
- */
- thread_pkey_regs_save(&current->thread);
-
return true;
}
@@ -394,7 +394,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
* Builds that do not support KVM could take this second option to increase
* the recoverability of NMIs.
*/
-void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
+noinstr void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
{
#ifdef CONFIG_PPC_POWERNV
unsigned long kbase = (unsigned long)_stext;
@@ -411,7 +411,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
* Now test if the interrupt has hit a range that may be using
* HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
* problem ranges all run un-relocated. Test real and virt modes
- * at the same time by droping the high bit of the nip (virt mode
+ * at the same time by dropping the high bit of the nip (virt mode
* entry points still have the +0x4000 offset).
*/
nip &= ~0xc000000000000000ULL;
@@ -435,23 +435,16 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
nonrecoverable:
regs->msr &= ~MSR_RI;
+ local_paca->hsrr_valid = 0;
+ local_paca->srr_valid = 0;
#endif
}
-
-void system_reset_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
{
unsigned long hsrr0, hsrr1;
- bool nested = in_nmi();
bool saved_hsrrs = false;
/*
- * Avoid crashes in case of nested NMI exceptions. Recoverability
- * is determined by RI and in_nmi
- */
- if (!nested)
- nmi_enter();
-
- /*
* System reset can interrupt code where HSRRs are live and MSR[RI]=1.
* The system reset interrupt itself may clobber HSRRs (e.g., to call
* OPAL), so save them here and restore them before returning.
@@ -510,21 +503,23 @@ out:
#ifdef CONFIG_PPC_BOOK3S_64
BUG_ON(get_paca()->in_nmi == 0);
if (get_paca()->in_nmi > 1)
- nmi_panic(regs, "Unrecoverable nested System Reset");
+ die("Unrecoverable nested System Reset", regs, SIGABRT);
#endif
/* Must die if the interrupt is not recoverable */
- if (!(regs->msr & MSR_RI))
- nmi_panic(regs, "Unrecoverable System Reset");
+ if (regs_is_unrecoverable(regs)) {
+ /* For the reason explained in die_mce, nmi_exit before die */
+ nmi_exit();
+ die("Unrecoverable System Reset", regs, SIGABRT);
+ }
if (saved_hsrrs) {
mtspr(SPRN_HSRR0, hsrr0);
mtspr(SPRN_HSRR1, hsrr1);
}
- if (!nested)
- nmi_exit();
-
/* What should we do here? We could issue a shutdown or hard reset. */
+
+ return 0;
}
/*
@@ -532,9 +527,6 @@ out:
* Check if the NIP corresponds to the address of a sync
* instruction for which there is an entry in the exception
* table.
- * Note that the 601 only takes a machine check on TEA
- * (transfer error ack) signal assertion, and does not
- * set any of the top 16 bits of SRR1.
* -- paulus.
*/
static inline int check_io_access(struct pt_regs *regs)
@@ -554,11 +546,11 @@ static inline int check_io_access(struct pt_regs *regs)
* For the debug message, we look at the preceding
* load or store.
*/
- if (*nip == PPC_INST_NOP)
+ if (*nip == PPC_RAW_NOP())
nip -= 2;
- else if (*nip == PPC_INST_ISYNC)
+ else if (*nip == PPC_RAW_ISYNC())
--nip;
- if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) {
+ if (*nip == PPC_RAW_SYNC() || get_op(*nip) == OP_TRAP) {
unsigned int rb;
--nip;
@@ -566,8 +558,8 @@ static inline int check_io_access(struct pt_regs *regs)
printk(KERN_DEBUG "%s bad port %lx at %p\n",
(*nip & 0x100)? "OUT to": "IN from",
regs->gpr[rb] - _IO_BASE, nip);
- regs->msr |= MSR_RI;
- regs->nip = extable_fixup(entry);
+ regs_set_recoverable(regs);
+ regs_set_return_ip(regs, extable_fixup(entry));
return 1;
}
}
@@ -578,11 +570,13 @@ static inline int check_io_access(struct pt_regs *regs)
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
/* On 4xx, the reason for the machine check or program exception
is in the ESR. */
-#define get_reason(regs) ((regs)->dsisr)
+#define get_reason(regs) ((regs)->esr)
#define REASON_FP ESR_FP
#define REASON_ILLEGAL (ESR_PIL | ESR_PUO)
#define REASON_PRIVILEGED ESR_PPR
#define REASON_TRAP ESR_PTR
+#define REASON_PREFIXED 0
+#define REASON_BOUNDARY 0
/* single-step stuff */
#define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC)
@@ -597,13 +591,17 @@ static inline int check_io_access(struct pt_regs *regs)
#define REASON_ILLEGAL SRR1_PROGILL
#define REASON_PRIVILEGED SRR1_PROGPRIV
#define REASON_TRAP SRR1_PROGTRAP
+#define REASON_PREFIXED SRR1_PREFIXED
+#define REASON_BOUNDARY SRR1_BOUNDARY
#define single_stepping(regs) ((regs)->msr & MSR_SE)
-#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
-#define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE)
+#define clear_single_step(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_SE))
+#define clear_br_trace(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_BE))
#endif
-#if defined(CONFIG_E500)
+#define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4)
+
+#if defined(CONFIG_PPC_E500)
int machine_check_e500mc(struct pt_regs *regs)
{
unsigned long mcsr = mfspr(SPRN_MCSR);
@@ -757,31 +755,6 @@ int machine_check_generic(struct pt_regs *regs)
{
return 0;
}
-#elif defined(CONFIG_E200)
-int machine_check_e200(struct pt_regs *regs)
-{
- unsigned long reason = mfspr(SPRN_MCSR);
-
- printk("Machine check in kernel mode.\n");
- printk("Caused by (from MCSR=%lx): ", reason);
-
- if (reason & MCSR_MCP)
- pr_cont("Machine Check Signal\n");
- if (reason & MCSR_CP_PERR)
- pr_cont("Cache Push Parity Error\n");
- if (reason & MCSR_CPERR)
- pr_cont("Cache Parity Error\n");
- if (reason & MCSR_EXCP_ERR)
- pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
- if (reason & MCSR_BUS_IRERR)
- pr_cont("Bus - Read Bus Error on instruction fetch\n");
- if (reason & MCSR_BUS_DRERR)
- pr_cont("Bus - Read Bus Error on data load\n");
- if (reason & MCSR_BUS_WRERR)
- pr_cont("Bus - Write Bus Error on buffered store or cache line push\n");
-
- return 0;
-}
#elif defined(CONFIG_PPC32)
int machine_check_generic(struct pt_regs *regs)
{
@@ -793,7 +766,6 @@ int machine_check_generic(struct pt_regs *regs)
case 0x80000:
pr_cont("Machine check signal\n");
break;
- case 0: /* for 601 */
case 0x40000:
case 0x140000: /* 7450 MSS error and TEA */
pr_cont("Transfer error ack signal\n");
@@ -820,12 +792,31 @@ int machine_check_generic(struct pt_regs *regs)
}
#endif /* everything else */
-void machine_check_exception(struct pt_regs *regs)
+void die_mce(const char *str, struct pt_regs *regs, long err)
+{
+ /*
+ * The machine check wants to kill the interrupted context,
+ * but make_task_dead() checks for in_interrupt() and panics
+ * in that case, so exit the irq/nmi before calling die.
+ */
+ if (in_nmi())
+ nmi_exit();
+ else
+ irq_exit();
+ die(str, regs, err);
+}
+
+/*
+ * BOOK3S_64 does not usually call this handler as a non-maskable interrupt
+ * (it uses its own early real-mode handler to handle the MCE proper
+ * and then raises irq_work to call this handler when interrupts are
+ * enabled). The only time when this is not true is if the early handler
+ * is unrecoverable, then it does call this directly to try to get a
+ * message out.
+ */
+static void __machine_check_exception(struct pt_regs *regs)
{
int recover = 0;
- bool nested = in_nmi();
- if (!nested)
- nmi_enter();
__this_cpu_inc(irq_stat.mce_exceptions);
@@ -851,23 +842,41 @@ void machine_check_exception(struct pt_regs *regs)
if (check_io_access(regs))
goto bail;
- if (!nested)
- nmi_exit();
-
- die("Machine check", regs, SIGBUS);
+ die_mce("Machine check", regs, SIGBUS);
+bail:
/* Must die if the interrupt is not recoverable */
- if (!(regs->msr & MSR_RI))
- nmi_panic(regs, "Unrecoverable Machine check");
+ if (regs_is_unrecoverable(regs))
+ die_mce("Unrecoverable Machine check", regs, SIGBUS);
+}
- return;
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_INTERRUPT_HANDLER_RAW(machine_check_early_boot)
+{
+ udbg_printf("Machine check (early boot)\n");
+ udbg_printf("SRR0=0x%016lx SRR1=0x%016lx\n", regs->nip, regs->msr);
+ udbg_printf(" DAR=0x%016lx DSISR=0x%08lx\n", regs->dar, regs->dsisr);
+ udbg_printf(" LR=0x%016lx R1=0x%08lx\n", regs->link, regs->gpr[1]);
+ udbg_printf("------\n");
+ die("Machine check (early boot)", regs, SIGBUS);
+ for (;;)
+ ;
+ return 0;
+}
-bail:
- if (!nested)
- nmi_exit();
+DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async)
+{
+ __machine_check_exception(regs);
+}
+#endif
+DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
+{
+ __machine_check_exception(regs);
+
+ return 0;
}
-void SMIException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */
{
die("System Management Interrupt", regs, SIGABRT);
}
@@ -877,11 +886,11 @@ static void p9_hmi_special_emu(struct pt_regs *regs)
{
unsigned int ra, rb, t, i, sel, instr, rc;
const void __user *addr;
- u8 vbuf[16], *vdst;
+ u8 vbuf[16] __aligned(16), *vdst;
unsigned long ea, msr, msr_mask;
bool swap;
- if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip))
+ if (__get_user(instr, (unsigned int __user *)regs->nip))
return;
/*
@@ -1049,16 +1058,15 @@ static void p9_hmi_special_emu(struct pt_regs *regs)
#endif /* !__LITTLE_ENDIAN__ */
/* Go to next instruction */
- regs->nip += 4;
+ regs_add_return_ip(regs, 4);
}
#endif /* CONFIG_VSX */
-void handle_hmi_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
{
struct pt_regs *old_regs;
old_regs = set_irq_regs(regs);
- irq_enter();
#ifdef CONFIG_VSX
/* Real mode flagged P9 special emu is needed */
@@ -1078,46 +1086,52 @@ void handle_hmi_exception(struct pt_regs *regs)
if (ppc_md.handle_hmi_exception)
ppc_md.handle_hmi_exception(regs);
- irq_exit();
set_irq_regs(old_regs);
}
-void unknown_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(unknown_exception)
{
- enum ctx_state prev_state = exception_enter();
-
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
regs->nip, regs->msr, regs->trap);
_exception(SIGTRAP, regs, TRAP_UNK, 0);
+}
+
+DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
+{
+ printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+ regs->nip, regs->msr, regs->trap);
- exception_exit(prev_state);
+ _exception(SIGTRAP, regs, TRAP_UNK, 0);
}
-void instruction_breakpoint_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception)
{
- enum ctx_state prev_state = exception_enter();
+ printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+ regs->nip, regs->msr, regs->trap);
+ _exception(SIGTRAP, regs, TRAP_UNK, 0);
+
+ return 0;
+}
+
+DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
+{
if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
- goto bail;
+ return;
if (debugger_iabr_match(regs))
- goto bail;
+ return;
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
-
-bail:
- exception_exit(prev_state);
}
-void RunModeException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(RunModeException)
{
_exception(SIGTRAP, regs, TRAP_UNK, 0);
}
-void single_step_exception(struct pt_regs *regs)
+static void __single_step_exception(struct pt_regs *regs)
{
- enum ctx_state prev_state = exception_enter();
-
clear_single_step(regs);
clear_br_trace(regs);
@@ -1126,16 +1140,17 @@ void single_step_exception(struct pt_regs *regs)
if (notify_die(DIE_SSTEP, "single_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
- goto bail;
+ return;
if (debugger_sstep(regs))
- goto bail;
+ return;
_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
+}
-bail:
- exception_exit(prev_state);
+DEFINE_INTERRUPT_HANDLER(single_step_exception)
+{
+ __single_step_exception(regs);
}
-NOKPROBE_SYMBOL(single_step_exception);
/*
* After we have successfully emulated an instruction, we have to
@@ -1143,12 +1158,13 @@ NOKPROBE_SYMBOL(single_step_exception);
* pretend we got a single-step exception. This was pointed out
* by Kumar Gala. -- paulus
*/
-static void emulate_single_step(struct pt_regs *regs)
+void emulate_single_step(struct pt_regs *regs)
{
if (single_stepping(regs))
- single_step_exception(regs);
+ __single_step_exception(regs);
}
+#ifdef CONFIG_PPC_FPU_REGS
static inline int __parse_fpscr(unsigned long fpscr)
{
int ret = FPE_FLTUNK;
@@ -1175,6 +1191,7 @@ static inline int __parse_fpscr(unsigned long fpscr)
return ret;
}
+#endif
static void parse_fpe(struct pt_regs *regs)
{
@@ -1182,7 +1199,9 @@ static void parse_fpe(struct pt_regs *regs)
flush_fp_to_thread(current);
+#ifdef CONFIG_PPC_FPU_REGS
code = __parse_fpscr(current->thread.fp_state.fpscr);
+#endif
_exception(SIGFPE, regs, code, regs->nip);
}
@@ -1333,7 +1352,6 @@ static int emulate_instruction(struct pt_regs *regs)
if (!user_mode(regs))
return -EINVAL;
- CHECK_FULL_REGS(regs);
if (get_user(instword, (u32 __user *)(regs->nip)))
return -EFAULT;
@@ -1421,16 +1439,17 @@ static int emulate_instruction(struct pt_regs *regs)
return -EINVAL;
}
+#ifdef CONFIG_GENERIC_BUG
int is_valid_bugaddr(unsigned long addr)
{
return is_kernel_addr(addr);
}
+#endif
#ifdef CONFIG_MATH_EMULATION
static int emulate_math(struct pt_regs *regs)
{
int ret;
- extern int do_mathemu(struct pt_regs *regs);
ret = do_mathemu(regs);
if (ret >= 0)
@@ -1457,9 +1476,8 @@ static int emulate_math(struct pt_regs *regs)
static inline int emulate_math(struct pt_regs *regs) { return -1; }
#endif
-void program_check_exception(struct pt_regs *regs)
+static void do_program_check(struct pt_regs *regs)
{
- enum ctx_state prev_state = exception_enter();
unsigned int reason = get_reason(regs);
/* We can now get here via a FP Unavailable exception if the core
@@ -1468,22 +1486,22 @@ void program_check_exception(struct pt_regs *regs)
if (reason & REASON_FP) {
/* IEEE FP exception */
parse_fpe(regs);
- goto bail;
+ return;
}
if (reason & REASON_TRAP) {
unsigned long bugaddr;
/* Debugger is first in line to stop recursive faults in
* rcu_lock, notify_die, or atomic_notifier_call_chain */
if (debugger_bpt(regs))
- goto bail;
+ return;
if (kprobe_handler(regs))
- goto bail;
+ return;
/* trap exception */
if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
== NOTIFY_STOP)
- goto bail;
+ return;
bugaddr = regs->nip;
/*
@@ -1494,11 +1512,15 @@ void program_check_exception(struct pt_regs *regs)
if (!(regs->msr & MSR_PR) && /* not user-mode */
report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
- regs->nip += 4;
- goto bail;
+ regs_add_return_ip(regs, 4);
+ return;
+ }
+
+ /* User mode considers other cases after enabling IRQs */
+ if (!user_mode(regs)) {
+ _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
+ return;
}
- _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
- goto bail;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (reason & REASON_TM) {
@@ -1519,7 +1541,7 @@ void program_check_exception(struct pt_regs *regs)
*/
if (user_mode(regs)) {
_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
- goto bail;
+ return;
} else {
printk(KERN_EMERG "Unexpected TM Bad Thing exception "
"at %lx (msr 0x%lx) tm_scratch=%llx\n",
@@ -1531,17 +1553,43 @@ void program_check_exception(struct pt_regs *regs)
/*
* If we took the program check in the kernel skip down to sending a
- * SIGILL. The subsequent cases all relate to emulating instructions
- * which we should only do for userspace. We also do not want to enable
- * interrupts for kernel faults because that might lead to further
- * faults, and loose the context of the original exception.
+ * SIGILL. The subsequent cases all relate to user space, such as
+ * emulating instructions which we should only do for user space. We
+ * also do not want to enable interrupts for kernel faults because that
+ * might lead to further faults, and loose the context of the original
+ * exception.
*/
if (!user_mode(regs))
goto sigill;
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
+
+ /*
+ * (reason & REASON_TRAP) is mostly handled before enabling IRQs,
+ * except get_user_instr() can sleep so we cannot reliably inspect the
+ * current instruction in that context. Now that we know we are
+ * handling a user space trap and can sleep, we can check if the trap
+ * was a hashchk failure.
+ */
+ if (reason & REASON_TRAP) {
+ if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) {
+ ppc_inst_t insn;
+
+ if (get_user_instr(insn, (void __user *)regs->nip)) {
+ _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
+ return;
+ }
+
+ if (ppc_inst_primary_opcode(insn) == 31 &&
+ get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) {
+ _exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
+ return;
+ }
+ }
+
+ _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
+ return;
+ }
/* (reason & REASON_ILLEGAL) would be the obvious thing here,
* but there seems to be a hardware bug on the 405GP (RevD)
@@ -1552,18 +1600,18 @@ void program_check_exception(struct pt_regs *regs)
* pattern to occurrences etc. -dgibson 31/Mar/2003
*/
if (!emulate_math(regs))
- goto bail;
+ return;
/* Try to emulate it if we should. */
if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
switch (emulate_instruction(regs)) {
case 0:
- regs->nip += 4;
+ regs_add_return_ip(regs, 4);
emulate_single_step(regs);
- goto bail;
+ return;
case -EFAULT:
_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
- goto bail;
+ return;
}
}
@@ -1573,42 +1621,49 @@ sigill:
else
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-bail:
- exception_exit(prev_state);
}
-NOKPROBE_SYMBOL(program_check_exception);
+
+DEFINE_INTERRUPT_HANDLER(program_check_exception)
+{
+ do_program_check(regs);
+}
/*
* This occurs when running in hypervisor mode on POWER6 or later
* and an illegal instruction is encountered.
*/
-void emulation_assist_interrupt(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt)
{
- regs->msr |= REASON_ILLEGAL;
- program_check_exception(regs);
+ regs_set_return_msr(regs, regs->msr | REASON_ILLEGAL);
+ do_program_check(regs);
}
-NOKPROBE_SYMBOL(emulation_assist_interrupt);
-void alignment_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(alignment_exception)
{
- enum ctx_state prev_state = exception_enter();
int sig, code, fixed = 0;
+ unsigned long reason;
+
+ interrupt_cond_local_irq_enable(regs);
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ reason = get_reason(regs);
+ if (reason & REASON_BOUNDARY) {
+ sig = SIGBUS;
+ code = BUS_ADRALN;
+ goto bad;
+ }
if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
- goto bail;
+ return;
/* we don't implement logging of alignment exceptions */
if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
fixed = fix_alignment(regs);
if (fixed == 1) {
- regs->nip += 4; /* skip over emulated instruction */
+ /* skip over emulated instruction */
+ regs_add_return_ip(regs, inst_length(reason));
emulate_single_step(regs);
- goto bail;
+ return;
}
/* Operand address was bad */
@@ -1619,55 +1674,40 @@ void alignment_exception(struct pt_regs *regs)
sig = SIGBUS;
code = BUS_ADRALN;
}
+bad:
if (user_mode(regs))
_exception(sig, regs, code, regs->dar);
else
- bad_page_fault(regs, regs->dar, sig);
-
-bail:
- exception_exit(prev_state);
+ bad_page_fault(regs, sig);
}
-void StackOverflow(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
{
- pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n",
- current->comm, task_pid_nr(current), regs->gpr[1]);
- debugger(regs);
- show_regs(regs);
- panic("kernel stack overflow");
+ die("Kernel stack overflow", regs, SIGSEGV);
}
-void kernel_fp_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception)
{
- enum ctx_state prev_state = exception_enter();
-
printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
"%lx at %lx\n", regs->trap, regs->nip);
die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
-
- exception_exit(prev_state);
}
-void altivec_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception)
{
- enum ctx_state prev_state = exception_enter();
-
if (user_mode(regs)) {
/* A user program has executed an altivec instruction,
but this kernel doesn't support altivec. */
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
- goto bail;
+ return;
}
printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
"%lx at %lx\n", regs->trap, regs->nip);
die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
-
-bail:
- exception_exit(prev_state);
}
-void vsx_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception)
{
if (user_mode(regs)) {
/* A user program has executed an vsx instruction,
@@ -1681,13 +1721,13 @@ void vsx_unavailable_exception(struct pt_regs *regs)
die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
}
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
static void tm_unavailable(struct pt_regs *regs)
{
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (user_mode(regs)) {
current->thread.load_tm++;
- regs->msr |= MSR_TM;
+ regs_set_return_msr(regs, regs->msr | MSR_TM);
tm_enable();
tm_restore_sprs(&current->thread);
return;
@@ -1698,7 +1738,7 @@ static void tm_unavailable(struct pt_regs *regs)
die("Unrecoverable TM Unavailable Exception", regs, SIGABRT);
}
-void facility_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
{
static char *facility_strings[] = {
[FSCR_FP_LG] = "FPU",
@@ -1711,6 +1751,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
[FSCR_TAR_LG] = "TAR",
[FSCR_MSGP_LG] = "MSGP",
[FSCR_SCV_LG] = "SCV",
+ [FSCR_PREFIX_LG] = "PREFIX",
};
char *facility = "unknown";
u64 value;
@@ -1718,7 +1759,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
u8 status;
bool hv;
- hv = (TRAP(regs) == 0xf80);
+ hv = (TRAP(regs) == INTERRUPT_H_FAC_UNAVAIL);
if (hv)
value = mfspr(SPRN_HFSCR);
else
@@ -1737,9 +1778,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
die("Unexpected facility unavailable exception", regs, SIGABRT);
}
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
if (status == FSCR_DSCR_LG) {
/*
@@ -1780,7 +1819,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
pr_err("DSCR based mfspr emulation failed\n");
return;
}
- regs->nip += 4;
+ regs_add_return_ip(regs, 4);
emulate_single_step(regs);
}
return;
@@ -1817,7 +1856,7 @@ out:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void fp_unavailable_tm(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(fp_unavailable_tm)
{
/* Note: This does not handle any kind of FP laziness. */
@@ -1850,7 +1889,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
tm_recheckpoint(&current->thread);
}
-void altivec_unavailable_tm(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(altivec_unavailable_tm)
{
/* See the comments in fp_unavailable_tm(). This function operates
* the same way.
@@ -1865,7 +1904,7 @@ void altivec_unavailable_tm(struct pt_regs *regs)
current->thread.used_vr = 1;
}
-void vsx_unavailable_tm(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm)
{
/* See the comments in fp_unavailable_tm(). This works similarly,
* though we're loading both FP and VEC registers in here.
@@ -1890,11 +1929,40 @@ void vsx_unavailable_tm(struct pt_regs *regs)
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-void performance_monitor_exception(struct pt_regs *regs)
+#ifdef CONFIG_PPC64
+DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi);
+DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi)
{
__this_cpu_inc(irq_stat.pmu_irqs);
perf_irq(regs);
+
+ return 0;
+}
+#endif
+
+DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async);
+DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async)
+{
+ __this_cpu_inc(irq_stat.pmu_irqs);
+
+ perf_irq(regs);
+}
+
+DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception)
+{
+ /*
+ * On 64-bit, if perf interrupts hit in a local_irq_disable
+ * (soft-masked) region, we consider them as NMIs. This is required to
+ * prevent hash faults on user addresses when reading callchains (and
+ * looks better from an irq tracing perspective).
+ */
+ if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
+ performance_monitor_exception_nmi(regs);
+ else
+ performance_monitor_exception_async(regs);
+
+ return 0;
}
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -1948,7 +2016,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
*/
if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
current->thread.debug.dbcr1))
- regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
else
/* Make sure the IDM flag is off */
current->thread.debug.dbcr0 &= ~DBCR0_IDM;
@@ -1957,8 +2025,10 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
}
-void DebugException(struct pt_regs *regs, unsigned long debug_status)
+DEFINE_INTERRUPT_HANDLER(DebugException)
{
+ unsigned long debug_status = regs->dsisr;
+
current->thread.debug.dbsr = debug_status;
/* Hack alert: On BookE, Branch Taken stops on the branch itself, while
@@ -1967,7 +2037,7 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status)
* instead of stopping here when hitting a BT
*/
if (debug_status & DBSR_BT) {
- regs->msr &= ~MSR_DE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_DE);
/* Disable BT */
mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT);
@@ -1978,7 +2048,7 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status)
if (user_mode(regs)) {
current->thread.debug.dbcr0 &= ~DBCR0_BT;
current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
- regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
return;
}
@@ -1992,7 +2062,7 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status)
if (debugger_sstep(regs))
return;
} else if (debug_status & DBSR_IC) { /* Instruction complete */
- regs->msr &= ~MSR_DE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_DE);
/* Disable instruction completion */
mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
@@ -2014,7 +2084,7 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status)
current->thread.debug.dbcr0 &= ~DBCR0_IC;
if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
current->thread.debug.dbcr1))
- regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
else
/* Make sure the IDM bit is off */
current->thread.debug.dbcr0 &= ~DBCR0_IDM;
@@ -2024,19 +2094,10 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status)
} else
handle_debug(regs, debug_status);
}
-NOKPROBE_SYMBOL(DebugException);
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-#if !defined(CONFIG_TAU_INT)
-void TAUException(struct pt_regs *regs)
-{
- printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n",
- regs->nip, regs->msr, regs->trap, print_tainted());
-}
-#endif /* CONFIG_INT_TAU */
-
#ifdef CONFIG_ALTIVEC
-void altivec_assist_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(altivec_assist_exception)
{
int err;
@@ -2051,7 +2112,7 @@ void altivec_assist_exception(struct pt_regs *regs)
PPC_WARN_EMULATED(altivec, regs);
err = emulate_altivec(regs);
if (err == 0) {
- regs->nip += 4; /* skip emulated instruction */
+ regs_add_return_ip(regs, 4); /* skip emulated instruction */
emulate_single_step(regs);
return;
}
@@ -2069,10 +2130,11 @@ void altivec_assist_exception(struct pt_regs *regs)
}
#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_FSL_BOOKE
-void CacheLockingException(struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
+#ifdef CONFIG_PPC_85xx
+DEFINE_INTERRUPT_HANDLER(CacheLockingException)
{
+ unsigned long error_code = regs->dsisr;
+
/* We treat cache locking instructions from the user
* as priv ops, in the future we could try to do
* something smarter
@@ -2081,20 +2143,17 @@ void CacheLockingException(struct pt_regs *regs, unsigned long address,
_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
return;
}
-#endif /* CONFIG_FSL_BOOKE */
+#endif /* CONFIG_PPC_85xx */
#ifdef CONFIG_SPE
-void SPEFloatingPointException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException)
{
- extern int do_spe_mathemu(struct pt_regs *regs);
unsigned long spefscr;
int fpexc_mode;
int code = FPE_FLTUNK;
int err;
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
flush_spe_to_thread(current);
@@ -2117,7 +2176,7 @@ void SPEFloatingPointException(struct pt_regs *regs)
err = do_spe_mathemu(regs);
if (err == 0) {
- regs->nip += 4; /* skip emulated instruction */
+ regs_add_return_ip(regs, 4); /* skip emulated instruction */
emulate_single_step(regs);
return;
}
@@ -2136,24 +2195,21 @@ void SPEFloatingPointException(struct pt_regs *regs)
return;
}
-void SPEFloatingPointRoundException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
{
- extern int speround_handler(struct pt_regs *regs);
int err;
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
preempt_disable();
if (regs->msr & MSR_SPE)
giveup_spe(current);
preempt_enable();
- regs->nip -= 4;
+ regs_add_return_ip(regs, -4);
err = speround_handler(regs);
if (err == 0) {
- regs->nip += 4; /* skip emulated instruction */
+ regs_add_return_ip(regs, 4); /* skip emulated instruction */
emulate_single_step(regs);
return;
}
@@ -2178,30 +2234,22 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
* in the MSR is 0. This indicates that SRR0/1 are live, and that
* we therefore lost state by taking this exception.
*/
-void unrecoverable_exception(struct pt_regs *regs)
+void __noreturn unrecoverable_exception(struct pt_regs *regs)
{
pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
regs->trap, regs->nip, regs->msr);
die("Unrecoverable exception", regs, SIGABRT);
+ /* die() should not return */
+ for (;;)
+ ;
}
-NOKPROBE_SYMBOL(unrecoverable_exception);
#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
-/*
- * Default handler for a Watchdog exception,
- * spins until a reboot occurs
- */
-void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
-{
- /* Generic WatchdogHandler, implement your own */
- mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE));
- return;
-}
-
-void WatchdogException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException)
{
printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
- WatchdogHandler(regs);
+ mtspr(SPRN_TCR, mfspr(SPRN_TCR) & ~TCR_WIE);
+ return 0;
}
#endif
@@ -2209,18 +2257,12 @@ void WatchdogException(struct pt_regs *regs)
* We enter here if we discover during exception entry that we are
* running in supervisor mode with a userspace value in the stack pointer.
*/
-void kernel_bad_stack(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(kernel_bad_stack)
{
printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
regs->gpr[1], regs->nip);
die("Bad kernel stack pointer", regs, SIGABRT);
}
-NOKPROBE_SYMBOL(kernel_bad_stack);
-
-void __init trap_init(void)
-{
-}
-
#ifdef CONFIG_PPC_EMULATED_STATS
@@ -2269,35 +2311,20 @@ void ppc_warn_emulated_print(const char *type)
static int __init ppc_warn_emulated_init(void)
{
- struct dentry *dir, *d;
+ struct dentry *dir;
unsigned int i;
struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
- if (!powerpc_debugfs_root)
- return -ENODEV;
-
dir = debugfs_create_dir("emulated_instructions",
- powerpc_debugfs_root);
- if (!dir)
- return -ENOMEM;
+ arch_debugfs_dir);
- d = debugfs_create_u32("do_warn", 0644, dir,
- &ppc_warn_emulated);
- if (!d)
- goto fail;
+ debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated);
- for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) {
- d = debugfs_create_u32(entries[i].name, 0644, dir,
- (u32 *)&entries[i].val.counter);
- if (!d)
- goto fail;
- }
+ for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++)
+ debugfs_create_u32(entries[i].name, 0644, dir,
+ (u32 *)&entries[i].val.counter);
return 0;
-
-fail:
- debugfs_remove_recursive(dir);
- return -ENOMEM;
}
device_initcall(ppc_warn_emulated_init);
diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
index 07296bc39166..80a1f9a4300a 100644
--- a/arch/powerpc/kernel/ucall.S
+++ b/arch/powerpc/kernel/ucall.S
@@ -5,8 +5,8 @@
* Copyright 2019, IBM Corporation.
*
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
-#include <asm/export.h>
_GLOBAL(ucall_norets)
EXPORT_SYMBOL_GPL(ucall_norets)
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 01595e8cafe7..92b3fc258d11 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -5,7 +5,7 @@
* c 2001 PPC 64 Team, IBM Corp
*/
-#include <stdarg.h>
+#include <linux/stdarg.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/console.h>
@@ -67,6 +67,8 @@ void __init udbg_early_init(void)
udbg_init_debug_opal_raw();
#elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI)
udbg_init_debug_opal_hvsi();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_16550)
+ udbg_init_debug_16550();
#endif
#ifdef CONFIG_PPC_EARLY_DEBUG
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 9356b60d6030..a0467e528b70 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -7,7 +7,7 @@
#include <linux/types.h>
#include <asm/udbg.h>
#include <asm/io.h>
-#include <asm/reg_a2.h>
+#include <asm/early_ioremap.h>
extern u8 real_readb(volatile u8 __iomem *addr);
extern void real_writeb(u8 data, volatile u8 __iomem *addr);
@@ -84,7 +84,7 @@ static int udbg_uart_getc(void)
return udbg_uart_in(UART_RBR);
}
-static void udbg_use_uart(void)
+static void __init udbg_use_uart(void)
{
udbg_putc = udbg_uart_putc;
udbg_flush = udbg_uart_flush;
@@ -92,7 +92,7 @@ static void udbg_use_uart(void)
udbg_getc_poll = udbg_uart_getc_poll;
}
-void udbg_uart_setup(unsigned int speed, unsigned int clock)
+void __init udbg_uart_setup(unsigned int speed, unsigned int clock)
{
unsigned int dll, base_bauds;
@@ -121,7 +121,7 @@ void udbg_uart_setup(unsigned int speed, unsigned int clock)
udbg_uart_out(UART_FCR, 0x7);
}
-unsigned int udbg_probe_uart_speed(unsigned int clock)
+unsigned int __init udbg_probe_uart_speed(unsigned int clock)
{
unsigned int dll, dlm, divisor, prescaler, speed;
u8 old_lcr;
@@ -172,7 +172,7 @@ static void udbg_uart_out_pio(unsigned int reg, u8 data)
outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride));
}
-void udbg_uart_init_pio(unsigned long port, unsigned int stride)
+void __init udbg_uart_init_pio(unsigned long port, unsigned int stride)
{
if (!port)
return;
@@ -194,7 +194,7 @@ static void udbg_uart_out_mmio(unsigned int reg, u8 data)
}
-void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
+void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
{
if (!addr)
return;
@@ -296,3 +296,35 @@ void __init udbg_init_40x_realmode(void)
}
#endif /* CONFIG_PPC_EARLY_DEBUG_40x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_16550
+
+static void __iomem *udbg_uart_early_addr;
+
+void __init udbg_init_debug_16550(void)
+{
+ udbg_uart_early_addr = early_ioremap(CONFIG_PPC_EARLY_DEBUG_16550_PHYSADDR, 0x1000);
+ udbg_uart_init_mmio(udbg_uart_early_addr, CONFIG_PPC_EARLY_DEBUG_16550_STRIDE);
+}
+
+static int __init udbg_init_debug_16550_ioremap(void)
+{
+ void __iomem *addr;
+
+ if (!udbg_uart_early_addr)
+ return 0;
+
+ addr = ioremap(CONFIG_PPC_EARLY_DEBUG_16550_PHYSADDR, 0x1000);
+ if (WARN_ON(!addr))
+ return -ENOMEM;
+
+ udbg_uart_init_mmio(addr, CONFIG_PPC_EARLY_DEBUG_16550_STRIDE);
+ early_iounmap(udbg_uart_early_addr, 0x1000);
+ udbg_uart_early_addr = NULL;
+
+ return 0;
+}
+
+early_initcall(udbg_init_debug_16550_ioremap);
+
+#endif /* CONFIG_PPC_EARLY_DEBUG_16550 */
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 1cfef0e5fec5..95a41ae9dfa7 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -14,6 +14,7 @@
#include <linux/kdebug.h>
#include <asm/sstep.h>
+#include <asm/inst.h>
#define UPROBE_TRAP_NR UINT_MAX
@@ -40,6 +41,18 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
if (addr & 0x03)
return -EINVAL;
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ ppc_inst_prefixed(ppc_inst_read(auprobe->insn)) &&
+ (addr & 0x3f) == 60) {
+ pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n");
+ return -EINVAL;
+ }
+
+ if (!can_single_step(ppc_inst_val(ppc_inst_read(auprobe->insn)))) {
+ pr_info_ratelimited("Cannot register a uprobe on instructions that can't be single stepped\n");
+ return -ENOTSUPP;
+ }
+
return 0;
}
@@ -54,7 +67,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
autask->saved_trap_nr = current->thread.trap_nr;
current->thread.trap_nr = UPROBE_TRAP_NR;
- regs->nip = current->utask->xol_vaddr;
+ regs_set_return_ip(regs, current->utask->xol_vaddr);
user_enable_single_step(current);
return 0;
@@ -111,7 +124,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
* support doesn't exist and have to fix-up the next instruction
* to be executed.
*/
- regs->nip = utask->vaddr + MAX_UINSN_BYTES;
+ regs_set_return_ip(regs, (unsigned long)ppc_inst_next((void *)utask->vaddr, auprobe->insn));
user_disable_single_step(current);
return 0;
@@ -140,6 +153,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self,
case DIE_SSTEP:
if (uprobe_post_sstep_notifier(regs))
return NOTIFY_STOP;
+ break;
default:
break;
}
@@ -173,7 +187,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
* emulate_step() returns 1 if the insn was successfully emulated.
* For all other cases, we need to single-step in hardware.
*/
- ret = emulate_step(regs, auprobe->insn);
+ ret = emulate_step(regs, ppc_inst_read(auprobe->insn));
if (ret > 0)
return true;
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index eae9ddaecbcf..7a2ff9010f17 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -17,12 +17,14 @@
#include <linux/elf.h>
#include <linux/security.h>
#include <linux/memblock.h>
+#include <linux/syscalls.h>
+#include <linux/time_namespace.h>
+#include <vdso/datapage.h>
-#include <asm/pgtable.h>
+#include <asm/syscall.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/sections.h>
@@ -31,39 +33,13 @@
#include <asm/vdso_datapage.h>
#include <asm/setup.h>
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(fmt...) printk(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-/* Max supported size for symbol names */
-#define MAX_SYMNAME 64
-
/* The alignment of the vDSO */
#define VDSO_ALIGNMENT (1 << 16)
-static unsigned int vdso32_pages;
-static void *vdso32_kbase;
-static struct page **vdso32_pagelist;
-unsigned long vdso32_sigtramp;
-unsigned long vdso32_rt_sigtramp;
-
-#ifdef CONFIG_VDSO32
extern char vdso32_start, vdso32_end;
-#endif
-
-#ifdef CONFIG_PPC64
extern char vdso64_start, vdso64_end;
-static void *vdso64_kbase = &vdso64_start;
-static unsigned int vdso64_pages;
-static struct page **vdso64_pagelist;
-unsigned long vdso64_rt_sigtramp;
-#endif /* CONFIG_PPC64 */
-static int vdso_ready;
+long sys_ni_syscall(void);
/*
* The vdso data page (aka. systemcfg for old ppc64 fans) is here.
@@ -71,116 +47,152 @@ static int vdso_ready;
* with it, it will become dynamically allocated
*/
static union {
- struct vdso_data data;
+ struct vdso_arch_data data;
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+struct vdso_arch_data *vdso_data = &vdso_data_store.data;
+
+enum vvar_pages {
+ VVAR_DATA_PAGE_OFFSET,
+ VVAR_TIMENS_PAGE_OFFSET,
+ VVAR_NR_PAGES,
+};
+
+static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma,
+ unsigned long text_size)
+{
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+
+ if (new_size != text_size)
+ return -EINVAL;
-/* Format of the patch table */
-struct vdso_patch_def
+ current->mm->context.vdso = (void __user *)new_vma->vm_start;
+
+ return 0;
+}
+
+static int vdso32_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
{
- unsigned long ftr_mask, ftr_value;
- const char *gen_name;
- const char *fix_name;
+ return vdso_mremap(sm, new_vma, &vdso32_end - &vdso32_start);
+}
+
+static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
+{
+ return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
+}
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf);
+
+static struct vm_special_mapping vvar_spec __ro_after_init = {
+ .name = "[vvar]",
+ .fault = vvar_fault,
};
-/* Table of functions to patch based on the CPU type/revision
- *
- * Currently, we only change sync_dicache to do nothing on processors
- * with a coherent icache
- */
-static struct vdso_patch_def vdso_patches[] = {
- {
- CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
- "__kernel_sync_dicache", "__kernel_sync_dicache_p5"
- },
+static struct vm_special_mapping vdso32_spec __ro_after_init = {
+ .name = "[vdso]",
+ .mremap = vdso32_mremap,
};
+static struct vm_special_mapping vdso64_spec __ro_after_init = {
+ .name = "[vdso]",
+ .mremap = vdso64_mremap,
+};
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+ return ((struct vdso_arch_data *)vvar_page)->data;
+}
+
/*
- * Some infos carried around for each of them during parsing at
- * boot time.
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
*/
-struct lib32_elfinfo
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
{
- Elf32_Ehdr *hdr; /* ptr to ELF */
- Elf32_Sym *dynsym; /* ptr to .dynsym section */
- unsigned long dynsymsize; /* size of .dynsym section */
- char *dynstr; /* ptr to .dynstr section */
- unsigned long text; /* offset of .text section in .so */
-};
+ struct mm_struct *mm = task->mm;
+ VMA_ITERATOR(vmi, mm, 0);
+ struct vm_area_struct *vma;
+
+ mmap_read_lock(mm);
+ for_each_vma(vmi, vma) {
+ if (vma_is_special_mapping(vma, &vvar_spec))
+ zap_vma_pages(vma);
+ }
+ mmap_read_unlock(mm);
-struct lib64_elfinfo
+ return 0;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
{
- Elf64_Ehdr *hdr;
- Elf64_Sym *dynsym;
- unsigned long dynsymsize;
- char *dynstr;
- unsigned long text;
-};
+ struct page *timens_page = find_timens_vvar_page(vma);
+ unsigned long pfn;
+
+ switch (vmf->pgoff) {
+ case VVAR_DATA_PAGE_OFFSET:
+ if (timens_page)
+ pfn = page_to_pfn(timens_page);
+ else
+ pfn = virt_to_pfn(vdso_data);
+ break;
+#ifdef CONFIG_TIME_NS
+ case VVAR_TIMENS_PAGE_OFFSET:
+ /*
+ * If a task belongs to a time namespace then a namespace
+ * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+ * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+ * offset.
+ * See also the comment near timens_setup_vdso_data().
+ */
+ if (!timens_page)
+ return VM_FAULT_SIGBUS;
+ pfn = virt_to_pfn(vdso_data);
+ break;
+#endif /* CONFIG_TIME_NS */
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+ return vmf_insert_pfn(vma, vmf->address, pfn);
+}
/*
* This is called from binfmt_elf, we create the special vma for the
* vDSO and insert it into the mm struct tree
*/
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
+ unsigned long vdso_size, vdso_base, mappings_size;
+ struct vm_special_mapping *vdso_spec;
+ unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE;
struct mm_struct *mm = current->mm;
- struct page **vdso_pagelist;
- unsigned long vdso_pages;
- unsigned long vdso_base;
- int rc;
-
- if (!vdso_ready)
- return 0;
+ struct vm_area_struct *vma;
-#ifdef CONFIG_PPC64
if (is_32bit_task()) {
- vdso_pagelist = vdso32_pagelist;
- vdso_pages = vdso32_pages;
- vdso_base = VDSO32_MBASE;
+ vdso_spec = &vdso32_spec;
+ vdso_size = &vdso32_end - &vdso32_start;
} else {
- vdso_pagelist = vdso64_pagelist;
- vdso_pages = vdso64_pages;
- /*
- * On 64bit we don't have a preferred map address. This
- * allows get_unmapped_area to find an area near other mmaps
- * and most likely share a SLB entry.
- */
- vdso_base = 0;
+ vdso_spec = &vdso64_spec;
+ vdso_size = &vdso64_end - &vdso64_start;
}
-#else
- vdso_pagelist = vdso32_pagelist;
- vdso_pages = vdso32_pages;
- vdso_base = VDSO32_MBASE;
-#endif
- current->mm->context.vdso_base = 0;
-
- /* vDSO has a problem and was disabled, just don't "enable" it for the
- * process
- */
- if (vdso_pages == 0)
- return 0;
- /* Add a page to the vdso size for the data page */
- vdso_pages ++;
+ mappings_size = vdso_size + vvar_size;
+ mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK;
/*
- * pick a base address for the vDSO in process space. We try to put it
- * at vdso_base which is the "natural" base for it, but we might fail
- * and end up putting it elsewhere.
+ * Pick a base address for the vDSO in process space.
* Add enough to the size so that the result can be aligned.
*/
- if (down_write_killable(&mm->mmap_sem))
- return -EINTR;
- vdso_base = get_unmapped_area(NULL, vdso_base,
- (vdso_pages << PAGE_SHIFT) +
- ((VDSO_ALIGNMENT - 1) & PAGE_MASK),
- 0, 0);
- if (IS_ERR_VALUE(vdso_base)) {
- rc = vdso_base;
- goto fail_mmapsem;
- }
+ vdso_base = get_unmapped_area(NULL, 0, mappings_size, 0, 0);
+ if (IS_ERR_VALUE(vdso_base))
+ return vdso_base;
/* Add required alignment. */
vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT);
@@ -188,9 +200,15 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
/*
* Put vDSO base into mm struct. We need to do this before calling
* install_special_mapping or the perf counter mmap tracking code
- * will fail to recognise it as a vDSO (since arch_vma_name fails).
+ * will fail to recognise it as a vDSO.
*/
- current->mm->context.vdso_base = vdso_base;
+ mm->context.vdso = (void __user *)vdso_base + vvar_size;
+
+ vma = _install_special_mapping(mm, vdso_base, vvar_size,
+ VM_READ | VM_MAYREAD | VM_IO |
+ VM_DONTDUMP | VM_PFNMAP, &vvar_spec);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
/*
* our vma flags don't have VM_WRITE so by default, the process isn't
@@ -202,439 +220,57 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
* It's fine to use that for setting breakpoints in the vDSO code
* pages though.
*/
- rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdso_pagelist);
- if (rc) {
- current->mm->context.vdso_base = 0;
- goto fail_mmapsem;
- }
-
- up_write(&mm->mmap_sem);
- return 0;
-
- fail_mmapsem:
- up_write(&mm->mmap_sem);
- return rc;
-}
-
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
- if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base)
- return "[vdso]";
- return NULL;
-}
-
-
-
-#ifdef CONFIG_VDSO32
-static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
- unsigned long *size)
-{
- Elf32_Shdr *sechdrs;
- unsigned int i;
- char *secnames;
-
- /* Grab section headers and strings so we can tell who is who */
- sechdrs = (void *)ehdr + ehdr->e_shoff;
- secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
-
- /* Find the section they want */
- for (i = 1; i < ehdr->e_shnum; i++) {
- if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
- if (size)
- *size = sechdrs[i].sh_size;
- return (void *)ehdr + sechdrs[i].sh_offset;
- }
- }
- *size = 0;
- return NULL;
-}
-
-static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib,
- const char *symname)
-{
- unsigned int i;
- char name[MAX_SYMNAME], *c;
-
- for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
- if (lib->dynsym[i].st_name == 0)
- continue;
- strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
- MAX_SYMNAME);
- c = strchr(name, '@');
- if (c)
- *c = 0;
- if (strcmp(symname, name) == 0)
- return &lib->dynsym[i];
- }
- return NULL;
-}
-
-/* Note that we assume the section is .text and the symbol is relative to
- * the library base
- */
-static unsigned long __init find_function32(struct lib32_elfinfo *lib,
- const char *symname)
-{
- Elf32_Sym *sym = find_symbol32(lib, symname);
-
- if (sym == NULL) {
- printk(KERN_WARNING "vDSO32: function %s not found !\n",
- symname);
- return 0;
- }
- return sym->st_value - VDSO32_LBASE;
-}
-
-static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64,
- const char *orig, const char *fix)
-{
- Elf32_Sym *sym32_gen, *sym32_fix;
-
- sym32_gen = find_symbol32(v32, orig);
- if (sym32_gen == NULL) {
- printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig);
- return -1;
- }
- if (fix == NULL) {
- sym32_gen->st_name = 0;
- return 0;
- }
- sym32_fix = find_symbol32(v32, fix);
- if (sym32_fix == NULL) {
- printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix);
- return -1;
- }
- sym32_gen->st_value = sym32_fix->st_value;
- sym32_gen->st_size = sym32_fix->st_size;
- sym32_gen->st_info = sym32_fix->st_info;
- sym32_gen->st_other = sym32_fix->st_other;
- sym32_gen->st_shndx = sym32_fix->st_shndx;
+ vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size,
+ VM_READ | VM_EXEC | VM_MAYREAD |
+ VM_MAYWRITE | VM_MAYEXEC, vdso_spec);
+ if (IS_ERR(vma))
+ do_munmap(mm, vdso_base, vvar_size, NULL);
- return 0;
-}
-#else /* !CONFIG_VDSO32 */
-static unsigned long __init find_function32(struct lib32_elfinfo *lib,
- const char *symname)
-{
- return 0;
-}
-
-static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64,
- const char *orig, const char *fix)
-{
- return 0;
-}
-#endif /* CONFIG_VDSO32 */
-
-
-#ifdef CONFIG_PPC64
-
-static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
- unsigned long *size)
-{
- Elf64_Shdr *sechdrs;
- unsigned int i;
- char *secnames;
-
- /* Grab section headers and strings so we can tell who is who */
- sechdrs = (void *)ehdr + ehdr->e_shoff;
- secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
-
- /* Find the section they want */
- for (i = 1; i < ehdr->e_shnum; i++) {
- if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
- if (size)
- *size = sechdrs[i].sh_size;
- return (void *)ehdr + sechdrs[i].sh_offset;
- }
- }
- if (size)
- *size = 0;
- return NULL;
-}
-
-static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib,
- const char *symname)
-{
- unsigned int i;
- char name[MAX_SYMNAME], *c;
-
- for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
- if (lib->dynsym[i].st_name == 0)
- continue;
- strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
- MAX_SYMNAME);
- c = strchr(name, '@');
- if (c)
- *c = 0;
- if (strcmp(symname, name) == 0)
- return &lib->dynsym[i];
- }
- return NULL;
+ return PTR_ERR_OR_ZERO(vma);
}
-/* Note that we assume the section is .text and the symbol is relative to
- * the library base
- */
-static unsigned long __init find_function64(struct lib64_elfinfo *lib,
- const char *symname)
-{
- Elf64_Sym *sym = find_symbol64(lib, symname);
-
- if (sym == NULL) {
- printk(KERN_WARNING "vDSO64: function %s not found !\n",
- symname);
- return 0;
- }
-#ifdef VDS64_HAS_DESCRIPTORS
- return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) -
- VDSO64_LBASE;
-#else
- return sym->st_value - VDSO64_LBASE;
-#endif
-}
-
-static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64,
- const char *orig, const char *fix)
-{
- Elf64_Sym *sym64_gen, *sym64_fix;
-
- sym64_gen = find_symbol64(v64, orig);
- if (sym64_gen == NULL) {
- printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig);
- return -1;
- }
- if (fix == NULL) {
- sym64_gen->st_name = 0;
- return 0;
- }
- sym64_fix = find_symbol64(v64, fix);
- if (sym64_fix == NULL) {
- printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix);
- return -1;
- }
- sym64_gen->st_value = sym64_fix->st_value;
- sym64_gen->st_size = sym64_fix->st_size;
- sym64_gen->st_info = sym64_fix->st_info;
- sym64_gen->st_other = sym64_fix->st_other;
- sym64_gen->st_shndx = sym64_fix->st_shndx;
-
- return 0;
-}
-
-#endif /* CONFIG_PPC64 */
-
-
-static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64)
-{
- void *sect;
-
- /*
- * Locate symbol tables & text section
- */
-
-#ifdef CONFIG_VDSO32
- v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
- v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
- if (v32->dynsym == NULL || v32->dynstr == NULL) {
- printk(KERN_ERR "vDSO32: required symbol section not found\n");
- return -1;
- }
- sect = find_section32(v32->hdr, ".text", NULL);
- if (sect == NULL) {
- printk(KERN_ERR "vDSO32: the .text section was not found\n");
- return -1;
- }
- v32->text = sect - vdso32_kbase;
-#endif
-
-#ifdef CONFIG_PPC64
- v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
- v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL);
- if (v64->dynsym == NULL || v64->dynstr == NULL) {
- printk(KERN_ERR "vDSO64: required symbol section not found\n");
- return -1;
- }
- sect = find_section64(v64->hdr, ".text", NULL);
- if (sect == NULL) {
- printk(KERN_ERR "vDSO64: the .text section was not found\n");
- return -1;
- }
- v64->text = sect - vdso64_kbase;
-#endif /* CONFIG_PPC64 */
-
- return 0;
-}
-
-static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
- /*
- * Find signal trampolines
- */
+ struct mm_struct *mm = current->mm;
+ int rc;
-#ifdef CONFIG_PPC64
- vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64");
-#endif
- vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32");
- vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32");
-}
+ mm->context.vdso = NULL;
-static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64)
-{
-#ifdef CONFIG_VDSO32
- Elf32_Sym *sym32;
-#endif
-#ifdef CONFIG_PPC64
- Elf64_Sym *sym64;
-
- sym64 = find_symbol64(v64, "__kernel_datapage_offset");
- if (sym64 == NULL) {
- printk(KERN_ERR "vDSO64: Can't find symbol "
- "__kernel_datapage_offset !\n");
- return -1;
- }
- *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) =
- (vdso64_pages << PAGE_SHIFT) -
- (sym64->st_value - VDSO64_LBASE);
-#endif /* CONFIG_PPC64 */
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
-#ifdef CONFIG_VDSO32
- sym32 = find_symbol32(v32, "__kernel_datapage_offset");
- if (sym32 == NULL) {
- printk(KERN_ERR "vDSO32: Can't find symbol "
- "__kernel_datapage_offset !\n");
- return -1;
- }
- *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
- (vdso32_pages << PAGE_SHIFT) -
- (sym32->st_value - VDSO32_LBASE);
-#endif
+ rc = __arch_setup_additional_pages(bprm, uses_interp);
+ if (rc)
+ mm->context.vdso = NULL;
- return 0;
+ mmap_write_unlock(mm);
+ return rc;
}
+#define VDSO_DO_FIXUPS(type, value, bits, sec) do { \
+ void *__start = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_start); \
+ void *__end = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_end); \
+ \
+ do_##type##_fixups((value), __start, __end); \
+} while (0)
-static __init int vdso_fixup_features(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64)
+static void __init vdso_fixup_features(void)
{
- unsigned long size;
- void *start;
-
#ifdef CONFIG_PPC64
- start = find_section64(v64->hdr, "__ftr_fixup", &size);
- if (start)
- do_feature_fixups(cur_cpu_spec->cpu_features,
- start, start + size);
-
- start = find_section64(v64->hdr, "__mmu_ftr_fixup", &size);
- if (start)
- do_feature_fixups(cur_cpu_spec->mmu_features,
- start, start + size);
-
- start = find_section64(v64->hdr, "__fw_ftr_fixup", &size);
- if (start)
- do_feature_fixups(powerpc_firmware_features,
- start, start + size);
-
- start = find_section64(v64->hdr, "__lwsync_fixup", &size);
- if (start)
- do_lwsync_fixups(cur_cpu_spec->cpu_features,
- start, start + size);
+ VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 64, ftr_fixup);
+ VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 64, mmu_ftr_fixup);
+ VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 64, fw_ftr_fixup);
+ VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 64, lwsync_fixup);
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_VDSO32
- start = find_section32(v32->hdr, "__ftr_fixup", &size);
- if (start)
- do_feature_fixups(cur_cpu_spec->cpu_features,
- start, start + size);
-
- start = find_section32(v32->hdr, "__mmu_ftr_fixup", &size);
- if (start)
- do_feature_fixups(cur_cpu_spec->mmu_features,
- start, start + size);
-
+ VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 32, ftr_fixup);
+ VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 32, mmu_ftr_fixup);
#ifdef CONFIG_PPC64
- start = find_section32(v32->hdr, "__fw_ftr_fixup", &size);
- if (start)
- do_feature_fixups(powerpc_firmware_features,
- start, start + size);
+ VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 32, fw_ftr_fixup);
#endif /* CONFIG_PPC64 */
-
- start = find_section32(v32->hdr, "__lwsync_fixup", &size);
- if (start)
- do_lwsync_fixups(cur_cpu_spec->cpu_features,
- start, start + size);
+ VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 32, lwsync_fixup);
#endif
-
- return 0;
-}
-
-static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) {
- struct vdso_patch_def *patch = &vdso_patches[i];
- int match = (cur_cpu_spec->cpu_features & patch->ftr_mask)
- == patch->ftr_value;
- if (!match)
- continue;
-
- DBG("replacing %s with %s...\n", patch->gen_name,
- patch->fix_name ? "NONE" : patch->fix_name);
-
- /*
- * Patch the 32 bits and 64 bits symbols. Note that we do not
- * patch the "." symbol on 64 bits.
- * It would be easy to do, but doesn't seem to be necessary,
- * patching the OPD symbol is enough.
- */
- vdso_do_func_patch32(v32, v64, patch->gen_name,
- patch->fix_name);
-#ifdef CONFIG_PPC64
- vdso_do_func_patch64(v32, v64, patch->gen_name,
- patch->fix_name);
-#endif /* CONFIG_PPC64 */
- }
-
- return 0;
-}
-
-
-static __init int vdso_setup(void)
-{
- struct lib32_elfinfo v32;
- struct lib64_elfinfo v64;
-
- v32.hdr = vdso32_kbase;
-#ifdef CONFIG_PPC64
- v64.hdr = vdso64_kbase;
-#endif
- if (vdso_do_find_sections(&v32, &v64))
- return -1;
-
- if (vdso_fixup_datapage(&v32, &v64))
- return -1;
-
- if (vdso_fixup_features(&v32, &v64))
- return -1;
-
- if (vdso_fixup_alt_funcs(&v32, &v64))
- return -1;
-
- vdso_setup_trampolines(&v32, &v64);
-
- return 0;
}
/*
@@ -644,26 +280,13 @@ static __init int vdso_setup(void)
static void __init vdso_setup_syscall_map(void)
{
unsigned int i;
- extern unsigned long *sys_call_table;
-#ifdef CONFIG_PPC64
- extern unsigned long *compat_sys_call_table;
-#endif
- extern unsigned long sys_ni_syscall;
-
for (i = 0; i < NR_syscalls; i++) {
-#ifdef CONFIG_PPC64
- if (sys_call_table[i] != sys_ni_syscall)
- vdso_data->syscall_map_64[i >> 5] |=
- 0x80000000UL >> (i & 0x1f);
- if (compat_sys_call_table[i] != sys_ni_syscall)
- vdso_data->syscall_map_32[i >> 5] |=
- 0x80000000UL >> (i & 0x1f);
-#else /* CONFIG_PPC64 */
- if (sys_call_table[i] != sys_ni_syscall)
- vdso_data->syscall_map_32[i >> 5] |=
- 0x80000000UL >> (i & 0x1f);
-#endif /* CONFIG_PPC64 */
+ if (sys_call_table[i] != (void *)&sys_ni_syscall)
+ vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f);
+ if (IS_ENABLED(CONFIG_COMPAT) &&
+ compat_sys_call_table[i] != (void *)&sys_ni_syscall)
+ vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f);
}
}
@@ -682,7 +305,7 @@ int vdso_getcpu_init(void)
node = cpu_to_node(cpu);
WARN_ON_ONCE(node > 0xffff);
- val = (cpu & 0xfff) | ((node & 0xffff) << 16);
+ val = (cpu & 0xffff) | ((node & 0xffff) << 16);
mtspr(SPRN_SPRG_VDSO_WRITE, val);
get_paca()->sprg_vdso = val;
@@ -694,10 +317,24 @@ int vdso_getcpu_init(void)
early_initcall(vdso_getcpu_init);
#endif
-static int __init vdso_init(void)
+static struct page ** __init vdso_setup_pages(void *start, void *end)
{
int i;
+ struct page **pagelist;
+ int pages = (end - start) >> PAGE_SHIFT;
+
+ pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
+ if (!pagelist)
+ panic("%s: Cannot allocate page list for VDSO", __func__);
+
+ for (i = 0; i < pages; i++)
+ pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
+ return pagelist;
+}
+
+static int __init vdso_init(void)
+{
#ifdef CONFIG_PPC64
/*
* Fill up the "systemcfg" stuff for backward compatibility
@@ -722,80 +359,19 @@ static int __init vdso_init(void)
vdso_data->icache_block_size = ppc64_caches.l1i.block_size;
vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size;
vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size;
-
- /*
- * Calculate the size of the 64 bits vDSO
- */
- vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
- DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
-#else
- vdso_data->dcache_block_size = L1_CACHE_BYTES;
- vdso_data->dcache_log_block_size = L1_CACHE_SHIFT;
- vdso_data->icache_block_size = L1_CACHE_BYTES;
- vdso_data->icache_log_block_size = L1_CACHE_SHIFT;
#endif /* CONFIG_PPC64 */
-
-#ifdef CONFIG_VDSO32
- vdso32_kbase = &vdso32_start;
-
- /*
- * Calculate the size of the 32 bits vDSO
- */
- vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
- DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages);
-#endif
-
-
- /*
- * Setup the syscall map in the vDOS
- */
vdso_setup_syscall_map();
- /*
- * Initialize the vDSO images in memory, that is do necessary
- * fixups of vDSO symbols, locate trampolines, etc...
- */
- if (vdso_setup()) {
- printk(KERN_ERR "vDSO setup failure, not enabled !\n");
- vdso32_pages = 0;
-#ifdef CONFIG_PPC64
- vdso64_pages = 0;
-#endif
- return 0;
- }
-
-#ifdef CONFIG_VDSO32
- /* Make sure pages are in the correct state */
- vdso32_pagelist = kcalloc(vdso32_pages + 2, sizeof(struct page *),
- GFP_KERNEL);
- BUG_ON(vdso32_pagelist == NULL);
- for (i = 0; i < vdso32_pages; i++) {
- struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
- get_page(pg);
- vdso32_pagelist[i] = pg;
- }
- vdso32_pagelist[i++] = virt_to_page(vdso_data);
- vdso32_pagelist[i] = NULL;
-#endif
+ vdso_fixup_features();
-#ifdef CONFIG_PPC64
- vdso64_pagelist = kcalloc(vdso64_pages + 2, sizeof(struct page *),
- GFP_KERNEL);
- BUG_ON(vdso64_pagelist == NULL);
- for (i = 0; i < vdso64_pages; i++) {
- struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
- get_page(pg);
- vdso64_pagelist[i] = pg;
- }
- vdso64_pagelist[i++] = virt_to_page(vdso_data);
- vdso64_pagelist[i] = NULL;
-#endif /* CONFIG_PPC64 */
+ if (IS_ENABLED(CONFIG_VDSO32))
+ vdso32_spec.pages = vdso_setup_pages(&vdso32_start, &vdso32_end);
- get_page(virt_to_page(vdso_data));
+ if (IS_ENABLED(CONFIG_PPC64))
+ vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end);
smp_wmb();
- vdso_ready = 1;
return 0;
}
diff --git a/arch/powerpc/kernel/vdso/.gitignore b/arch/powerpc/kernel/vdso/.gitignore
new file mode 100644
index 000000000000..dd9bdd67758b
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/.gitignore
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vdso32.lds
+vdso32.so.dbg
+vdso64.lds
+vdso64.so.dbg
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
new file mode 100644
index 000000000000..1b93655c2857
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -0,0 +1,118 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# List of files in the vdso, has to be asm only for now
+
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile
+
+obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o
+obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o
+
+ifneq ($(c-gettimeofday-y),)
+ CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y)
+ CFLAGS_vgettimeofday-32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+ CFLAGS_vgettimeofday-32.o += $(call cc-option, -fno-stack-protector)
+ CFLAGS_vgettimeofday-32.o += -DDISABLE_BRANCH_PROFILING
+ CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables
+ CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE)
+ CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc
+ # This flag is supported by clang for 64-bit but not 32-bit so it will cause
+ # an unused command line flag warning for this file.
+ ifdef CONFIG_CC_IS_CLANG
+ CFLAGS_REMOVE_vgettimeofday-32.o += -fno-stack-clash-protection
+ endif
+ CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y)
+ CFLAGS_vgettimeofday-64.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+ CFLAGS_vgettimeofday-64.o += $(call cc-option, -fno-stack-protector)
+ CFLAGS_vgettimeofday-64.o += -DDISABLE_BRANCH_PROFILING
+ CFLAGS_vgettimeofday-64.o += -ffreestanding -fasynchronous-unwind-tables
+ CFLAGS_REMOVE_vgettimeofday-64.o = $(CC_FLAGS_FTRACE)
+# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true
+# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is
+# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code
+# generation is minimal, it will just use r29 instead.
+ CFLAGS_vgettimeofday-64.o += $(call cc-option, -ffixed-r30)
+endif
+
+# Build rules
+
+ifdef CROSS32_COMPILE
+ VDSOCC := $(CROSS32_COMPILE)gcc
+else
+ VDSOCC := $(CC)
+endif
+
+targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+
+ccflags-y := -fno-common -fno-builtin
+ldflags-y := -Wl,--hash-style=both -nostdlib -shared -z noexecstack $(CLANG_FLAGS)
+ldflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld)
+ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
+
+# Filter flags that clang will warn are unused for linking
+ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CFLAGS))
+
+CC32FLAGS := -m32
+LD32FLAGS := -Wl,-soname=linux-vdso32.so.1
+AS32FLAGS := -D__VDSO32__
+
+LD64FLAGS := -Wl,-soname=linux-vdso64.so.1
+AS64FLAGS := -D__VDSO64__
+
+targets += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -Upowerpc
+targets += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE
+ $(call if_changed,vdso32ld_and_check)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE
+ $(call if_changed,vdso64ld_and_check)
+
+# assembly rules for the .S files
+$(obj-vdso32): %-32.o: %.S FORCE
+ $(call if_changed_dep,vdso32as)
+$(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE
+ $(call if_changed_dep,vdso32cc)
+$(obj-vdso64): %-64.o: %.S FORCE
+ $(call if_changed_dep,vdso64as)
+$(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE
+ $(call if_changed_dep,cc_o_c)
+
+# Generate VDSO offsets using helper script
+gen-vdso32sym := $(srctree)/$(src)/gen_vdso32_offsets.sh
+quiet_cmd_vdso32sym = VDSO32SYM $@
+ cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@
+gen-vdso64sym := $(srctree)/$(src)/gen_vdso64_offsets.sh
+quiet_cmd_vdso64sym = VDSO64SYM $@
+ cmd_vdso64sym = $(NM) $< | $(gen-vdso64sym) | LC_ALL=C sort > $@
+
+include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
+ $(call if_changed,vdso32sym)
+include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE
+ $(call if_changed,vdso64sym)
+
+# actual build commands
+quiet_cmd_vdso32ld_and_check = VDSO32L $@
+ cmd_vdso32ld_and_check = $(VDSOCC) $(ldflags-y) $(CC32FLAGS) $(LD32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check)
+quiet_cmd_vdso32as = VDSO32A $@
+ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $<
+quiet_cmd_vdso32cc = VDSO32C $@
+ cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $<
+
+quiet_cmd_vdso64ld_and_check = VDSO64L $@
+ cmd_vdso64ld_and_check = $(VDSOCC) $(ldflags-y) $(LD64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check)
+quiet_cmd_vdso64as = VDSO64A $@
+ cmd_vdso64as = $(VDSOCC) $(a_flags) $(AS64FLAGS) -c -o $@ $<
+
+OBJECT_FILES_NON_STANDARD := y
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S
index 526f5ba2593e..0085ae464dac 100644
--- a/arch/powerpc/kernel/vdso64/cacheflush.S
+++ b/arch/powerpc/kernel/vdso/cacheflush.S
@@ -8,7 +8,9 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
#include <asm/asm-offsets.h>
+#include <asm/cache.h>
.text
@@ -22,59 +24,76 @@
*/
V_FUNCTION_BEGIN(__kernel_sync_dicache)
.cfi_startproc
+BEGIN_FTR_SECTION
+ b 3f
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+#ifdef CONFIG_PPC64
mflr r12
.cfi_register lr,r12
- mr r11,r3
- bl V_LOCAL_FUNC(__get_datapage)
+ get_datapage r10
mtlr r12
- mr r10,r3
+ .cfi_restore lr
+#endif
+#ifdef CONFIG_PPC64
lwz r7,CFG_DCACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+#else
+ li r5, L1_CACHE_BYTES - 1
+#endif
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
+#ifdef CONFIG_PPC64
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
- srd. r8,r8,r9 /* compute line count */
+ PPC_SRL. r8,r8,r9 /* compute line count */
+#else
+ srwi. r8, r8, L1_CACHE_SHIFT
+ mr r7, r6
+#endif
crclr cr0*4+so
beqlr /* nothing to do? */
mtctr r8
1: dcbst 0,r6
+#ifdef CONFIG_PPC64
add r6,r6,r7
+#else
+ addi r6, r6, L1_CACHE_BYTES
+#endif
bdnz 1b
sync
/* Now invalidate the instruction cache */
+#ifdef CONFIG_PPC64
lwz r7,CFG_ICACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
- srd. r8,r8,r9 /* compute line count */
+ PPC_SRL. r8,r8,r9 /* compute line count */
crclr cr0*4+so
beqlr /* nothing to do? */
+#endif
mtctr r8
+#ifdef CONFIG_PPC64
2: icbi 0,r6
add r6,r6,r7
+#else
+2: icbi 0, r7
+ addi r7, r7, L1_CACHE_BYTES
+#endif
bdnz 2b
isync
li r3,0
blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_sync_dicache)
-
-
-/*
- * POWER5 version of __kernel_sync_dicache
- */
-V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
- .cfi_startproc
+3:
crclr cr0*4+so
sync
+ icbi 0,r1
isync
li r3,0
blr
.cfi_endproc
-V_FUNCTION_END(__kernel_sync_dicache_p5)
+V_FUNCTION_END(__kernel_sync_dicache)
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso/datapage.S
index 6c7401bd284e..db8e167f0166 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso/datapage.S
@@ -10,34 +10,9 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
.text
- .global __kernel_datapage_offset;
-__kernel_datapage_offset:
- .long 0
-
-V_FUNCTION_BEGIN(__get_datapage)
- .cfi_startproc
- /* We don't want that exposed or overridable as we want other objects
- * to be able to bl directly to here
- */
- .protected __get_datapage
- .hidden __get_datapage
-
- mflr r0
- .cfi_register lr,r0
-
- bcl 20,31,data_page_branch
-data_page_branch:
- mflr r3
- mtlr r0
- addi r3, r3, __kernel_datapage_offset-data_page_branch
- lwz r0,0(r3)
- .cfi_restore lr
- add r3,r0,r3
- blr
- .cfi_endproc
-V_FUNCTION_END(__get_datapage)
/*
* void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
@@ -52,15 +27,18 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- mr r4,r3
- bl __get_datapage@local
+ mr. r4,r3
+ get_datapage r3
mtlr r12
+#ifdef __powerpc64__
+ addi r3,r3,CFG_SYSCALL_MAP64
+#else
addi r3,r3,CFG_SYSCALL_MAP32
- cmpli cr0,r4,0
+#endif
+ crclr cr0*4+so
beqlr
li r0,NR_syscalls
stw r0,0(r4)
- crclr cr0*4+so
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_syscall_map)
@@ -70,17 +48,17 @@ V_FUNCTION_END(__kernel_get_syscall_map)
*
* returns the timebase frequency in HZ
*/
-#ifndef CONFIG_PPC_BOOK3S_601
V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- bl __get_datapage@local
+ get_datapage r3
+#ifndef __powerpc64__
lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
- lwz r3,CFG_TB_TICKS_PER_SEC(r3)
+#endif
+ PPC_LL r3,CFG_TB_TICKS_PER_SEC(r3)
mtlr r12
crclr cr0*4+so
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_tbfreq)
-#endif
diff --git a/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh
new file mode 100755
index 000000000000..c7b54a5dcd3e
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Author: Will Deacon <will.deacon@arm.com
+#
+
+LC_ALL=C
+sed -n -e 's/^00*/0/' -e \
+'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso32_offset_\2\t0x\1/p'
diff --git a/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh
new file mode 100755
index 000000000000..4bf15ffd5933
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Author: Will Deacon <will.deacon@arm.com
+#
+
+LC_ALL=C
+sed -n -e 's/^00*/0/' -e \
+'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso64_offset_\2\t0x\1/p'
diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso/getcpu.S
index 63e914539e1a..8e08ccf19062 100644
--- a/arch/powerpc/kernel/vdso32/getcpu.S
+++ b/arch/powerpc/kernel/vdso/getcpu.S
@@ -15,19 +15,36 @@
* int __kernel_getcpu(unsigned *cpu, unsigned *node);
*
*/
+#if defined(CONFIG_PPC64)
V_FUNCTION_BEGIN(__kernel_getcpu)
.cfi_startproc
mfspr r5,SPRN_SPRG_VDSO_READ
- cmpwi cr0,r3,0
- cmpwi cr1,r4,0
+ PPC_LCMPI cr0,r3,0
+ PPC_LCMPI cr1,r4,0
clrlwi r6,r5,16
rlwinm r7,r5,16,31-15,31-0
beq cr0,1f
stw r6,0(r3)
-1: beq cr1,2f
- stw r7,0(r4)
-2: crclr cr0*4+so
+1: crclr cr0*4+so
li r3,0 /* always success */
+ beqlr cr1
+ stw r7,0(r4)
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_getcpu)
+#elif !defined(CONFIG_SMP)
+V_FUNCTION_BEGIN(__kernel_getcpu)
+ .cfi_startproc
+ cmpwi cr0, r3, 0
+ cmpwi cr1, r4, 0
+ li r5, 0
+ beq cr0, 1f
+ stw r5, 0(r3)
+1: li r3, 0 /* always success */
+ crclr cr0*4+so
+ beqlr cr1
+ stw r5, 0(r4)
blr
.cfi_endproc
V_FUNCTION_END(__kernel_getcpu)
+#endif
diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S
new file mode 100644
index 000000000000..48fc6658053a
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/gettimeofday.S
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Userland implementation of gettimeofday() for processes
+ * for use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org,
+ * IBM Corp.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+/*
+ * The macro sets two stack frames, one for the caller and one for the callee
+ * because there are no requirement for the caller to set a stack frame when
+ * calling VDSO so it may have omitted to set one, especially on PPC64
+ */
+
+.macro cvdso_call funct call_time=0
+ .cfi_startproc
+ PPC_STLU r1, -PPC_MIN_STKFRM(r1)
+ .cfi_adjust_cfa_offset PPC_MIN_STKFRM
+ mflr r0
+ PPC_STLU r1, -PPC_MIN_STKFRM(r1)
+ .cfi_adjust_cfa_offset PPC_MIN_STKFRM
+ PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+ .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF
+#ifdef __powerpc64__
+ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1)
+ .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT
+#endif
+ get_datapage r5
+ .ifeq \call_time
+ addi r5, r5, VDSO_DATA_OFFSET
+ .else
+ addi r4, r5, VDSO_DATA_OFFSET
+ .endif
+#ifdef __powerpc64__
+ bl CFUNC(DOTSYM(\funct))
+#else
+ bl \funct
+#endif
+ PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+#ifdef __powerpc64__
+ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1)
+ .cfi_restore r2
+#endif
+ .ifeq \call_time
+ cmpwi r3, 0
+ .endif
+ mtlr r0
+ addi r1, r1, 2 * PPC_MIN_STKFRM
+ .cfi_restore lr
+ .cfi_def_cfa_offset 0
+ crclr so
+ .ifeq \call_time
+ beqlr+
+ crset so
+ neg r3, r3
+ .endif
+ blr
+ .cfi_endproc
+.endm
+
+ .text
+/*
+ * Exact prototype of gettimeofday
+ *
+ * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_gettimeofday)
+ cvdso_call __c_kernel_gettimeofday
+V_FUNCTION_END(__kernel_gettimeofday)
+
+/*
+ * Exact prototype of clock_gettime()
+ *
+ * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime)
+ cvdso_call __c_kernel_clock_gettime
+V_FUNCTION_END(__kernel_clock_gettime)
+
+/*
+ * Exact prototype of clock_gettime64()
+ *
+ * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts);
+ *
+ */
+#ifndef __powerpc64__
+V_FUNCTION_BEGIN(__kernel_clock_gettime64)
+ cvdso_call __c_kernel_clock_gettime64
+V_FUNCTION_END(__kernel_clock_gettime64)
+#endif
+
+/*
+ * Exact prototype of clock_getres()
+ *
+ * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_getres)
+ cvdso_call __c_kernel_clock_getres
+V_FUNCTION_END(__kernel_clock_getres)
+
+
+/*
+ * Exact prototype of time()
+ *
+ * time_t time(time *t);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_time)
+ cvdso_call __c_kernel_time call_time=1
+V_FUNCTION_END(__kernel_time)
+
+/* Routines for restoring integer registers, called by the compiler. */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area. */
+#ifndef __powerpc64__
+_GLOBAL(_restgpr_31_x)
+_GLOBAL(_rest32gpr_31_x)
+ lwz r0,4(r11)
+ lwz r31,-4(r11)
+ mtlr r0
+ mr r1,r11
+ blr
+#endif
diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso/note.S
index 227a7327399e..227a7327399e 100644
--- a/arch/powerpc/kernel/vdso32/note.S
+++ b/arch/powerpc/kernel/vdso/note.S
diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso/sigtramp32.S
index 0bcc5e5fe789..0bcc5e5fe789 100644
--- a/arch/powerpc/kernel/vdso32/sigtramp.S
+++ b/arch/powerpc/kernel/vdso/sigtramp32.S
diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso/sigtramp64.S
index a8cc0409d7d2..2d4067561293 100644
--- a/arch/powerpc/kernel/vdso64/sigtramp.S
+++ b/arch/powerpc/kernel/vdso/sigtramp64.S
@@ -6,6 +6,7 @@
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
* Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
*/
+#include <asm/cache.h> /* IFETCH_ALIGN_BYTES */
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/unistd.h>
@@ -14,21 +15,26 @@
.text
-/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
- the return address to get an address in the middle of the presumed
- call instruction. Since we don't have a call here, we artificially
- extend the range covered by the unwind info by padding before the
- real start. */
- nop
+/*
+ * __kernel_start_sigtramp_rt64 and __kernel_sigtramp_rt64 together
+ * are one function split in two parts. The kernel jumps to the former
+ * and the signal handler indirectly (by blr) returns to the latter.
+ * __kernel_sigtramp_rt64 needs to point to the return address so
+ * glibc can correctly identify the trampoline stack frame.
+ */
.balign 8
+ .balign IFETCH_ALIGN_BYTES
+V_FUNCTION_BEGIN(__kernel_start_sigtramp_rt64)
+.Lsigrt_start:
+ bctrl /* call the handler */
+V_FUNCTION_END(__kernel_start_sigtramp_rt64)
V_FUNCTION_BEGIN(__kernel_sigtramp_rt64)
-.Lsigrt_start = . - 4
addi r1, r1, __SIGNAL_FRAMESIZE
li r0,__NR_rt_sigreturn
sc
.Lsigrt_end:
V_FUNCTION_END(__kernel_sigtramp_rt64)
-/* The ".balign 8" above and the following zeros mimic the old stack
+/* The .balign 8 above and the following zeros mimic the old stack
trampoline layout. The last magic value is the ucontext pointer,
chosen in such a way that older libgcc unwind code returns a zero
for a sigcontext pointer. */
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S
index 00c025ba4a92..426e1ccc6971 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso/vdso32.lds.S
@@ -4,6 +4,8 @@
* library
*/
#include <asm/vdso.h>
+#include <asm/page.h>
+#include <asm-generic/vmlinux.lds.h>
#ifdef __LITTLE_ENDIAN__
OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle")
@@ -11,11 +13,11 @@ OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle")
OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
#endif
OUTPUT_ARCH(powerpc:common)
-ENTRY(_start)
SECTIONS
{
- . = VDSO32_LBASE + SIZEOF_HEADERS;
+ PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE);
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -36,17 +38,25 @@ SECTIONS
PROVIDE(etext = .);
. = ALIGN(8);
+ VDSO_ftr_fixup_start = .;
__ftr_fixup : { *(__ftr_fixup) }
+ VDSO_ftr_fixup_end = .;
. = ALIGN(8);
+ VDSO_mmu_ftr_fixup_start = .;
__mmu_ftr_fixup : { *(__mmu_ftr_fixup) }
+ VDSO_mmu_ftr_fixup_end = .;
. = ALIGN(8);
+ VDSO_lwsync_fixup_start = .;
__lwsync_fixup : { *(__lwsync_fixup) }
+ VDSO_lwsync_fixup_end = .;
#ifdef CONFIG_PPC64
. = ALIGN(8);
+ VDSO_fw_ftr_fixup_start = .;
__fw_ftr_fixup : { *(__fw_ftr_fixup) }
+ VDSO_fw_ftr_fixup_end = .;
#endif
/*
@@ -68,49 +78,16 @@ SECTIONS
__end = .;
PROVIDE(end = .);
- /*
- * Stabs debugging sections are here too.
- */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
-
- /*
- * DWARF debug sections.
- * Symbols in the DWARF debugging sections are relative to the beginning
- * of the section so we begin them at 0.
- */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
+ DWARF_DEBUG
+ ELF_DETAILS
/DISCARD/ : {
*(.note.GNU-stack)
+ *(*.EMB.apuinfo)
+ *(.branch_lt)
*(.data .data.* .gnu.linkonce.d.* .sdata*)
*(.bss .sbss .dynbss .dynsbss)
+ *(.got1 .glink .iplt .rela*)
}
}
@@ -138,27 +115,26 @@ VERSION
{
VDSO_VERSION_STRING {
global:
- /*
- * Has to be there for the kernel to find
- */
- __kernel_datapage_offset;
-
__kernel_get_syscall_map;
-#ifndef CONFIG_PPC_BOOK3S_601
__kernel_gettimeofday;
__kernel_clock_gettime;
+ __kernel_clock_gettime64;
__kernel_clock_getres;
__kernel_time;
__kernel_get_tbfreq;
-#endif
__kernel_sync_dicache;
- __kernel_sync_dicache_p5;
__kernel_sigtramp32;
__kernel_sigtramp_rt32;
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) || !defined(CONFIG_SMP)
__kernel_getcpu;
#endif
local: *;
};
}
+
+/*
+ * Make the sigreturn code visible to the kernel.
+ */
+VDSO_sigtramp32 = __kernel_sigtramp32;
+VDSO_sigtramp_rt32 = __kernel_sigtramp_rt32;
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S
index 256fb9720298..bda6c8cdd459 100644
--- a/arch/powerpc/kernel/vdso64/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso/vdso64.lds.S
@@ -4,6 +4,8 @@
* library
*/
#include <asm/vdso.h>
+#include <asm/page.h>
+#include <asm-generic/vmlinux.lds.h>
#ifdef __LITTLE_ENDIAN__
OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle")
@@ -11,11 +13,11 @@ OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle")
OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc")
#endif
OUTPUT_ARCH(powerpc:common64)
-ENTRY(_start)
SECTIONS
{
- . = VDSO64_LBASE + SIZEOF_HEADERS;
+ PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE);
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -30,23 +32,31 @@ SECTIONS
. = ALIGN(16);
.text : {
*(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*)
- *(.sfpr .glink)
+ *(.sfpr)
} :text
PROVIDE(__etext = .);
PROVIDE(_etext = .);
PROVIDE(etext = .);
. = ALIGN(8);
+ VDSO_ftr_fixup_start = .;
__ftr_fixup : { *(__ftr_fixup) }
+ VDSO_ftr_fixup_end = .;
. = ALIGN(8);
+ VDSO_mmu_ftr_fixup_start = .;
__mmu_ftr_fixup : { *(__mmu_ftr_fixup) }
+ VDSO_mmu_ftr_fixup_end = .;
. = ALIGN(8);
+ VDSO_lwsync_fixup_start = .;
__lwsync_fixup : { *(__lwsync_fixup) }
+ VDSO_lwsync_fixup_end = .;
. = ALIGN(8);
+ VDSO_fw_ftr_fixup_start = .;
__fw_ftr_fixup : { *(__fw_ftr_fixup) }
+ VDSO_fw_ftr_fixup_end = .;
/*
* Other stuff is appended to the text segment:
@@ -61,56 +71,22 @@ SECTIONS
.gcc_except_table : { *(.gcc_except_table) }
.rela.dyn ALIGN(8) : { *(.rela.dyn) }
- .opd ALIGN(8) : { KEEP (*(.opd)) }
.got ALIGN(8) : { *(.got .toc) }
_end = .;
PROVIDE(end = .);
- /*
- * Stabs debugging sections are here too.
- */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
-
- /*
- * DWARF debug sections.
- * Symbols in the DWARF debugging sections are relative to the beginning
- * of the section so we begin them at 0.
- */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
+ DWARF_DEBUG
+ ELF_DETAILS
/DISCARD/ : {
*(.note.GNU-stack)
+ *(*.EMB.apuinfo)
*(.branch_lt)
*(.data .data.* .gnu.linkonce.d.* .sdata*)
*(.bss .sbss .dynbss .dynsbss)
+ *(.opd)
+ *(.glink .iplt .plt .rela*)
}
}
@@ -138,18 +114,12 @@ VERSION
{
VDSO_VERSION_STRING {
global:
- /*
- * Has to be there for the kernel to find
- */
- __kernel_datapage_offset;
-
__kernel_get_syscall_map;
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
__kernel_get_tbfreq;
__kernel_sync_dicache;
- __kernel_sync_dicache_p5;
__kernel_sigtramp_rt64;
__kernel_getcpu;
__kernel_time;
@@ -157,3 +127,8 @@ VERSION
local: *;
};
}
+
+/*
+ * Make the sigreturn code visible to the kernel.
+ */
+VDSO_sigtramp_rt64 = __kernel_start_sigtramp_rt64;
diff --git a/arch/powerpc/kernel/vdso/vgettimeofday.c b/arch/powerpc/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..55a287c9a736
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Powerpc userspace implementations of gettimeofday() and similar.
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+#ifdef __powerpc64__
+int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
+ const struct vdso_data *vd)
+{
+ return __cvdso_clock_gettime_data(vd, clock, ts);
+}
+
+int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res,
+ const struct vdso_data *vd)
+{
+ return __cvdso_clock_getres_data(vd, clock_id, res);
+}
+#else
+int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts,
+ const struct vdso_data *vd)
+{
+ return __cvdso_clock_gettime32_data(vd, clock, ts);
+}
+
+int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts,
+ const struct vdso_data *vd)
+{
+ return __cvdso_clock_gettime_data(vd, clock, ts);
+}
+
+int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res,
+ const struct vdso_data *vd)
+{
+ return __cvdso_clock_getres_time32_data(vd, clock_id, res);
+}
+#endif
+
+int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz,
+ const struct vdso_data *vd)
+{
+ return __cvdso_gettimeofday_data(vd, tv, tz);
+}
+
+__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd)
+{
+ return __cvdso_time_data(vd, time);
+}
diff --git a/arch/powerpc/kernel/vdso32/.gitignore b/arch/powerpc/kernel/vdso32/.gitignore
deleted file mode 100644
index fea5809857a5..000000000000
--- a/arch/powerpc/kernel/vdso32/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-vdso32.lds
-vdso32.so.dbg
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
deleted file mode 100644
index 06f54d947057..000000000000
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ /dev/null
@@ -1,67 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-# List of files in the vdso, has to be asm only for now
-
-obj-vdso32-$(CONFIG_PPC64) = getcpu.o
-obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \
- $(obj-vdso32-y)
-
-# Build rules
-
-ifdef CROSS32_COMPILE
- VDSOCC := $(CROSS32_COMPILE)gcc
-else
- VDSOCC := $(CC)
-endif
-
-CC32FLAGS :=
-ifdef CONFIG_PPC64
-CC32FLAGS += -m32
-endif
-
-targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
-obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
-
-GCOV_PROFILE := n
-KCOV_INSTRUMENT := n
-UBSAN_SANITIZE := n
-
-ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
- -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both
-asflags-y := -D__VDSO32__ -s
-
-obj-y += vdso32_wrapper.o
-extra-y += vdso32.lds
-CPPFLAGS_vdso32.lds += -P -C -Upowerpc
-
-# Force dependency (incbin is bad)
-$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
-
-# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
- $(call if_changed,vdso32ld)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
- $(call if_changed,objcopy)
-
-# assembly rules for the .S files
-$(obj-vdso32): %.o: %.S FORCE
- $(call if_changed_dep,vdso32as)
-
-# actual build commands
-quiet_cmd_vdso32ld = VDSO32L $@
- cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
-quiet_cmd_vdso32as = VDSO32A $@
- cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $<
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso32.so: $(obj)/vdso32.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso32.so
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
deleted file mode 100644
index 7f882e7b9f43..000000000000
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ /dev/null
@@ -1,81 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * vDSO provided cache flush routines
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
- * IBM Corp.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-
- .text
-
-/*
- * Default "generic" version of __kernel_sync_dicache.
- *
- * void __kernel_sync_dicache(unsigned long start, unsigned long end)
- *
- * Flushes the data cache & invalidate the instruction cache for the
- * provided range [start, end[
- */
-V_FUNCTION_BEGIN(__kernel_sync_dicache)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
- mr r11,r3
- bl __get_datapage@local
- mtlr r12
- mr r10,r3
-
- lwz r7,CFG_DCACHE_BLOCKSZ(r10)
- addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
- srw. r8,r8,r9 /* compute line count */
- crclr cr0*4+so
- beqlr /* nothing to do? */
- mtctr r8
-1: dcbst 0,r6
- add r6,r6,r7
- bdnz 1b
- sync
-
-/* Now invalidate the instruction cache */
-
- lwz r7,CFG_ICACHE_BLOCKSZ(r10)
- addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5
- lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
- srw. r8,r8,r9 /* compute line count */
- crclr cr0*4+so
- beqlr /* nothing to do? */
- mtctr r8
-2: icbi 0,r6
- add r6,r6,r7
- bdnz 2b
- isync
- li r3,0
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_sync_dicache)
-
-
-/*
- * POWER5 version of __kernel_sync_dicache
- */
-V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
- .cfi_startproc
- crclr cr0*4+so
- sync
- isync
- li r3,0
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_sync_dicache_p5)
-
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
deleted file mode 100644
index 3306672f57a9..000000000000
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ /dev/null
@@ -1,290 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Userland implementation of gettimeofday() for 32 bits processes in a
- * ppc64 kernel for use in the vDSO
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org,
- * IBM Corp.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-/* Offset for the low 32-bit part of a field of long type */
-#ifdef CONFIG_PPC64
-#define LOPART 4
-#else
-#define LOPART 0
-#endif
-
- .text
-/*
- * Exact prototype of gettimeofday
- *
- * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
- *
- */
-V_FUNCTION_BEGIN(__kernel_gettimeofday)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r10,r3 /* r10 saves tv */
- mr r11,r4 /* r11 saves tz */
- bl __get_datapage@local /* get data page */
- mr r9, r3 /* datapage ptr in r9 */
- cmplwi r10,0 /* check if tv is NULL */
- beq 3f
- lis r7,1000000@ha /* load up USEC_PER_SEC */
- addi r7,r7,1000000@l /* so we get microseconds in r4 */
- bl __do_get_tspec@local /* get sec/usec from tb & kernel */
- stw r3,TVAL32_TV_SEC(r10)
- stw r4,TVAL32_TV_USEC(r10)
-
-3: cmplwi r11,0 /* check if tz is NULL */
- beq 1f
- lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
- lwz r5,CFG_TZ_DSTTIME(r9)
- stw r4,TZONE_TZ_MINWEST(r11)
- stw r5,TZONE_TZ_DSTTIME(r11)
-
-1: mtlr r12
- crclr cr0*4+so
- li r3,0
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_gettimeofday)
-
-/*
- * Exact prototype of clock_gettime()
- *
- * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
- *
- */
-V_FUNCTION_BEGIN(__kernel_clock_gettime)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpli cr0,r3,CLOCK_REALTIME
- cmpli cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
-
- mflr r12 /* r12 saves lr */
- .cfi_register lr,r12
- mr r11,r4 /* r11 saves tp */
- bl __get_datapage@local /* get data page */
- mr r9,r3 /* datapage ptr in r9 */
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
-50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
- bne cr1,80f /* not monotonic -> all done */
-
- /*
- * CLOCK_MONOTONIC
- */
-
- /* now we must fixup using wall to monotonic. We need to snapshot
- * that value and do the counter trick again. Fortunately, we still
- * have the counter value in r8 that was returned by __do_get_xsec.
- * At this point, r3,r4 contain our sec/nsec values, r5 and r6
- * can be used, r7 contains NSEC_PER_SEC.
- */
-
- lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
- lwz r6,WTOM_CLOCK_NSEC(r9)
-
- /* We now have our offset in r5,r6. We create a fake dependency
- * on that value and re-check the counter
- */
- or r0,r6,r5
- xor r0,r0,r0
- add r9,r9,r0
- lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- cmpl cr0,r8,r0 /* check if updated */
- bne- 50b
-
- /* Calculate and store result. Note that this mimics the C code,
- * which may cause funny results if nsec goes negative... is that
- * possible at all ?
- */
- add r3,r3,r5
- add r4,r4,r6
- cmpw cr0,r4,r7
- cmpwi cr1,r4,0
- blt 1f
- subf r4,r7,r4
- addi r3,r3,1
-1: bge cr1,80f
- addi r3,r3,-1
- add r4,r4,r7
-
-80: stw r3,TSPC32_TV_SEC(r11)
- stw r4,TSPC32_TV_NSEC(r11)
-
- mtlr r12
- crclr cr0*4+so
- li r3,0
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_gettime
- .cfi_restore lr
- sc
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_clock_gettime)
-
-
-/*
- * Exact prototype of clock_getres()
- *
- * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
- *
- */
-V_FUNCTION_BEGIN(__kernel_clock_getres)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
-
- mflr r12
- .cfi_register lr,r12
- bl __get_datapage@local /* get data page */
- lwz r5, CLOCK_HRTIMER_RES(r3)
- mtlr r12
- li r3,0
- cmpli cr0,r4,0
- crclr cr0*4+so
- beqlr
- stw r3,TSPC32_TV_SEC(r4)
- stw r5,TSPC32_TV_NSEC(r4)
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_getres
- sc
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_clock_getres)
-
-
-/*
- * Exact prototype of time()
- *
- * time_t time(time *t);
- *
- */
-V_FUNCTION_BEGIN(__kernel_time)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r11,r3 /* r11 holds t */
- bl __get_datapage@local
- mr r9, r3 /* datapage ptr in r9 */
-
- lwz r3,STAMP_XTIME_SEC+LOPART(r9)
-
- cmplwi r11,0 /* check if t is NULL */
- beq 2f
- stw r3,0(r11) /* store result at *t */
-2: mtlr r12
- crclr cr0*4+so
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_time)
-
-/*
- * This is the core of clock_gettime() and gettimeofday(),
- * it returns the current time in r3 (seconds) and r4.
- * On entry, r7 gives the resolution of r4, either USEC_PER_SEC
- * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds.
- * It expects the datapage ptr in r9 and doesn't clobber it.
- * It clobbers r0, r5 and r6.
- * On return, r8 contains the counter value that can be reused.
- * This clobbers cr0 but not any other cr field.
- */
-__do_get_tspec:
- .cfi_startproc
- /* Check for update count & load values. We use the low
- * order 32 bits of the update count
- */
-1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 1b
- xor r0,r8,r8 /* create dependency */
- add r9,r9,r0
-
- /* Load orig stamp (offset to TB) */
- lwz r5,CFG_TB_ORIG_STAMP(r9)
- lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
-
- /* Get a stable TB value */
-2: MFTBU(r3)
- MFTBL(r4)
- MFTBU(r0)
- cmplw cr0,r3,r0
- bne- 2b
-
- /* Subtract tb orig stamp and shift left 12 bits.
- */
- subfc r4,r6,r4
- subfe r0,r5,r3
- slwi r0,r0,12
- rlwimi. r0,r4,12,20,31
- slwi r4,r4,12
-
- /*
- * Load scale factor & do multiplication.
- * We only use the high 32 bits of the tb_to_xs value.
- * Even with a 1GHz timebase clock, the high 32 bits of
- * tb_to_xs will be at least 4 million, so the error from
- * ignoring the low 32 bits will be no more than 0.25ppm.
- * The error will just make the clock run very very slightly
- * slow until the next time the kernel updates the VDSO data,
- * at which point the clock will catch up to the kernel's value,
- * so there is no long-term error accumulation.
- */
- lwz r5,CFG_TB_TO_XS(r9) /* load values */
- mulhwu r4,r4,r5
- li r3,0
-
- beq+ 4f /* skip high part computation if 0 */
- mulhwu r3,r0,r5
- mullw r5,r0,r5
- addc r4,r4,r5
- addze r3,r3
-4:
- /* At this point, we have seconds since the xtime stamp
- * as a 32.32 fixed-point number in r3 and r4.
- * Load & add the xtime stamp.
- */
- lwz r5,STAMP_XTIME_SEC+LOPART(r9)
- lwz r6,STAMP_SEC_FRAC(r9)
- addc r4,r4,r6
- adde r3,r3,r5
-
- /* We create a fake dependency on the result in r3/r4
- * and re-check the counter
- */
- or r6,r4,r3
- xor r0,r6,r6
- add r9,r9,r0
- lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- cmplw cr0,r8,r0 /* check if updated */
- bne- 1b
-
- mulhwu r4,r4,r7 /* convert to micro or nanoseconds */
-
- blr
- .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S
index 3f5ef035b0a9..10f92f265d51 100644
--- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
+++ b/arch/powerpc/kernel/vdso32_wrapper.S
@@ -7,7 +7,7 @@
.globl vdso32_start, vdso32_end
.balign PAGE_SIZE
vdso32_start:
- .incbin "arch/powerpc/kernel/vdso32/vdso32.so.dbg"
+ .incbin "arch/powerpc/kernel/vdso/vdso32.so.dbg"
.balign PAGE_SIZE
vdso32_end:
diff --git a/arch/powerpc/kernel/vdso64/.gitignore b/arch/powerpc/kernel/vdso64/.gitignore
deleted file mode 100644
index 77a0b423642c..000000000000
--- a/arch/powerpc/kernel/vdso64/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-vdso64.lds
-vdso64.so.dbg
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
deleted file mode 100644
index 32ebb3522ea1..000000000000
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ /dev/null
@@ -1,47 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# List of files in the vdso, has to be asm only for now
-
-obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
-
-# Build rules
-
-targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
-obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
-
-GCOV_PROFILE := n
-KCOV_INSTRUMENT := n
-UBSAN_SANITIZE := n
-
-ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
- -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
-asflags-y := -D__VDSO64__ -s
-
-obj-y += vdso64_wrapper.o
-extra-y += vdso64.lds
-CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
-
-# Force dependency (incbin is bad)
-$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
-
-# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
- $(call if_changed,vdso64ld)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
- $(call if_changed,objcopy)
-
-# actual build commands
-quiet_cmd_vdso64ld = VDSO64L $@
- cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso64.so: $(obj)/vdso64.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso64.so
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
deleted file mode 100644
index dc84f5ae3802..000000000000
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ /dev/null
@@ -1,84 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Access to the shared data page by the vDSO & syscall map
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
- */
-
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/vdso.h>
-
- .text
-.global __kernel_datapage_offset;
-__kernel_datapage_offset:
- .long 0
-
-V_FUNCTION_BEGIN(__get_datapage)
- .cfi_startproc
- /* We don't want that exposed or overridable as we want other objects
- * to be able to bl directly to here
- */
- .protected __get_datapage
- .hidden __get_datapage
-
- mflr r0
- .cfi_register lr,r0
-
- bcl 20,31,data_page_branch
-data_page_branch:
- mflr r3
- mtlr r0
- addi r3, r3, __kernel_datapage_offset-data_page_branch
- lwz r0,0(r3)
- .cfi_restore lr
- add r3,r0,r3
- blr
- .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
-/*
- * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
- *
- * returns a pointer to the syscall map. the map is agnostic to the
- * size of "long", unlike kernel bitops, it stores bits from top to
- * bottom so that memory actually contains a linear bitmap
- * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
- * 32 bits int at N >> 5.
- */
-V_FUNCTION_BEGIN(__kernel_get_syscall_map)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
- mr r4,r3
- bl V_LOCAL_FUNC(__get_datapage)
- mtlr r12
- addi r3,r3,CFG_SYSCALL_MAP64
- cmpldi cr0,r4,0
- crclr cr0*4+so
- beqlr
- li r0,NR_syscalls
- stw r0,0(r4)
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_get_syscall_map)
-
-
-/*
- * void unsigned long __kernel_get_tbfreq(void);
- *
- * returns the timebase frequency in HZ
- */
-V_FUNCTION_BEGIN(__kernel_get_tbfreq)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
- bl V_LOCAL_FUNC(__get_datapage)
- ld r3,CFG_TB_TICKS_PER_SEC(r3)
- mtlr r12
- crclr cr0*4+so
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S
deleted file mode 100644
index 12bbf236cdc4..000000000000
--- a/arch/powerpc/kernel/vdso64/getcpu.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *
- * Copyright (C) IBM Corporation, 2012
- *
- * Author: Anton Blanchard <anton@au.ibm.com>
- */
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-
- .text
-/*
- * Exact prototype of getcpu
- *
- * int __kernel_getcpu(unsigned *cpu, unsigned *node);
- *
- */
-V_FUNCTION_BEGIN(__kernel_getcpu)
- .cfi_startproc
- mfspr r5,SPRN_SPRG_VDSO_READ
- cmpdi cr0,r3,0
- cmpdi cr1,r4,0
- clrlwi r6,r5,16
- rlwinm r7,r5,16,31-15,31-0
- beq cr0,1f
- stw r6,0(r3)
-1: beq cr1,2f
- stw r7,0(r4)
-2: crclr cr0*4+so
- li r3,0 /* always success */
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_getcpu)
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
deleted file mode 100644
index 1c9a04703250..000000000000
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ /dev/null
@@ -1,289 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Userland implementation of gettimeofday() for 64 bits processes in a
- * ppc64 kernel for use in the vDSO
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
- * IBM Corp.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
- .text
-/*
- * Exact prototype of gettimeofday
- *
- * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
- *
- */
-V_FUNCTION_BEGIN(__kernel_gettimeofday)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r11,r3 /* r11 holds tv */
- mr r10,r4 /* r10 holds tz */
- bl V_LOCAL_FUNC(__get_datapage) /* get data page */
- cmpldi r11,0 /* check if tv is NULL */
- beq 2f
- lis r7,1000000@ha /* load up USEC_PER_SEC */
- addi r7,r7,1000000@l
- bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */
- std r4,TVAL64_TV_SEC(r11) /* store sec in tv */
- std r5,TVAL64_TV_USEC(r11) /* store usec in tv */
-2: cmpldi r10,0 /* check if tz is NULL */
- beq 1f
- lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */
- lwz r5,CFG_TZ_DSTTIME(r3)
- stw r4,TZONE_TZ_MINWEST(r10)
- stw r5,TZONE_TZ_DSTTIME(r10)
-1: mtlr r12
- crclr cr0*4+so
- li r3,0 /* always success */
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_gettimeofday)
-
-
-/*
- * Exact prototype of clock_gettime()
- *
- * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
- *
- */
-V_FUNCTION_BEGIN(__kernel_clock_gettime)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
-
- cmpwi cr5,r3,CLOCK_REALTIME_COARSE
- cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE
- cror cr5*4+eq,cr5*4+eq,cr6*4+eq
-
- cror cr0*4+eq,cr0*4+eq,cr5*4+eq
- bne cr0,99f
-
- mflr r12 /* r12 saves lr */
- .cfi_register lr,r12
- mr r11,r4 /* r11 saves tp */
- bl V_LOCAL_FUNC(__get_datapage) /* get data page */
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
- beq cr5,70f
-50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
- bne cr1,80f /* if not monotonic, all done */
-
- /*
- * CLOCK_MONOTONIC
- */
-
- /* now we must fixup using wall to monotonic. We need to snapshot
- * that value and do the counter trick again. Fortunately, we still
- * have the counter value in r8 that was returned by __do_get_tspec.
- * At this point, r4,r5 contain our sec/nsec values.
- */
-
- ld r6,WTOM_CLOCK_SEC(r3)
- lwa r9,WTOM_CLOCK_NSEC(r3)
-
- /* We now have our result in r6,r9. We create a fake dependency
- * on that result and re-check the counter
- */
- or r0,r6,r9
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld cr0,r0,r8 /* check if updated */
- bne- 50b
- b 78f
-
- /*
- * For coarse clocks we get data directly from the vdso data page, so
- * we don't need to call __do_get_tspec, but we still need to do the
- * counter trick.
- */
-70: ld r8,CFG_TB_UPDATE_COUNT(r3)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 70b
- add r3,r3,r0 /* r0 is already 0 */
-
- /*
- * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
- * too
- */
- ld r4,STAMP_XTIME_SEC(r3)
- ld r5,STAMP_XTIME_NSEC(r3)
- bne cr6,75f
-
- /* CLOCK_MONOTONIC_COARSE */
- ld r6,WTOM_CLOCK_SEC(r3)
- lwa r9,WTOM_CLOCK_NSEC(r3)
-
- /* check if counter has updated */
- or r0,r6,r9
-75: or r0,r0,r4
- or r0,r0,r5
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld cr0,r0,r8 /* check if updated */
- bne- 70b
-
- /* Counter has not updated, so continue calculating proper values for
- * sec and nsec if monotonic coarse, or just return with the proper
- * values for realtime.
- */
- bne cr6,80f
-
- /* Add wall->monotonic offset and check for overflow or underflow */
-78: add r4,r4,r6
- add r5,r5,r9
- cmpd cr0,r5,r7
- cmpdi cr1,r5,0
- blt 79f
- subf r5,r7,r5
- addi r4,r4,1
-79: bge cr1,80f
- addi r4,r4,-1
- add r5,r5,r7
-
-80: std r4,TSPC64_TV_SEC(r11)
- std r5,TSPC64_TV_NSEC(r11)
-
- mtlr r12
- crclr cr0*4+so
- li r3,0
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_gettime
- .cfi_restore lr
- sc
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_clock_gettime)
-
-
-/*
- * Exact prototype of clock_getres()
- *
- * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
- *
- */
-V_FUNCTION_BEGIN(__kernel_clock_getres)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
-
- mflr r12
- .cfi_register lr,r12
- bl V_LOCAL_FUNC(__get_datapage)
- lwz r5, CLOCK_HRTIMER_RES(r3)
- mtlr r12
- li r3,0
- cmpldi cr0,r4,0
- crclr cr0*4+so
- beqlr
- std r3,TSPC64_TV_SEC(r4)
- std r5,TSPC64_TV_NSEC(r4)
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_getres
- sc
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_clock_getres)
-
-/*
- * Exact prototype of time()
- *
- * time_t time(time *t);
- *
- */
-V_FUNCTION_BEGIN(__kernel_time)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r11,r3 /* r11 holds t */
- bl V_LOCAL_FUNC(__get_datapage)
-
- ld r4,STAMP_XTIME_SEC(r3)
-
- cmpldi r11,0 /* check if t is NULL */
- beq 2f
- std r4,0(r11) /* store result at *t */
-2: mtlr r12
- crclr cr0*4+so
- mr r3,r4
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_time)
-
-
-/*
- * This is the core of clock_gettime() and gettimeofday(),
- * it returns the current time in r4 (seconds) and r5.
- * On entry, r7 gives the resolution of r5, either USEC_PER_SEC
- * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds.
- * It expects the datapage ptr in r3 and doesn't clobber it.
- * It clobbers r0, r6 and r9.
- * On return, r8 contains the counter value that can be reused.
- * This clobbers cr0 but not any other cr field.
- */
-V_FUNCTION_BEGIN(__do_get_tspec)
- .cfi_startproc
- /* check for update count & load values */
-1: ld r8,CFG_TB_UPDATE_COUNT(r3)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 1b
- xor r0,r8,r8 /* create dependency */
- add r3,r3,r0
-
- /* Get TB & offset it. We use the MFTB macro which will generate
- * workaround code for Cell.
- */
- MFTB(r6)
- ld r9,CFG_TB_ORIG_STAMP(r3)
- subf r6,r9,r6
-
- /* Scale result */
- ld r5,CFG_TB_TO_XS(r3)
- sldi r6,r6,12 /* compute time since stamp_xtime */
- mulhdu r6,r6,r5 /* in units of 2^-32 seconds */
-
- /* Add stamp since epoch */
- ld r4,STAMP_XTIME_SEC(r3)
- lwz r5,STAMP_SEC_FRAC(r3)
- or r0,r4,r5
- or r0,r0,r6
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld r0,r8 /* check if updated */
- bne- 1b /* reload if so */
-
- /* convert to seconds & nanoseconds and add to stamp */
- add r6,r6,r5 /* add on fractional seconds of xtime */
- mulhwu r5,r6,r7 /* compute micro or nanoseconds and */
- srdi r6,r6,32 /* seconds since stamp_xtime */
- clrldi r5,r5,32
- add r4,r4,r6
- blr
- .cfi_endproc
-V_FUNCTION_END(__do_get_tspec)
diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S
deleted file mode 100644
index dc2a509f7e8a..000000000000
--- a/arch/powerpc/kernel/vdso64/note.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../vdso32/note.S"
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S
index 1d56d81fe3b3..839d1a61411d 100644
--- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
+++ b/arch/powerpc/kernel/vdso64_wrapper.S
@@ -7,7 +7,7 @@
.globl vdso64_start, vdso64_end
.balign PAGE_SIZE
vdso64_start:
- .incbin "arch/powerpc/kernel/vdso64/vdso64.so.dbg"
+ .incbin "arch/powerpc/kernel/vdso/vdso64.so.dbg"
.balign PAGE_SIZE
vdso64_end:
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
index 4acd3fb2b38e..fd9432875ebc 100644
--- a/arch/powerpc/kernel/vecemu.c
+++ b/arch/powerpc/kernel/vecemu.c
@@ -10,6 +10,7 @@
#include <asm/processor.h>
#include <asm/switch_to.h>
#include <linux/uaccess.h>
+#include <asm/inst.h>
/* Functions in vector.S */
extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
@@ -260,21 +261,24 @@ static unsigned int rfin(unsigned int x)
int emulate_altivec(struct pt_regs *regs)
{
- unsigned int instr, i;
+ ppc_inst_t instr;
+ unsigned int i, word;
unsigned int va, vb, vc, vd;
vector128 *vrs;
- if (get_user(instr, (unsigned int __user *) regs->nip))
+ if (get_user_instr(instr, (void __user *)regs->nip))
return -EFAULT;
- if ((instr >> 26) != 4)
+
+ word = ppc_inst_val(instr);
+ if (ppc_inst_primary_opcode(instr) != 4)
return -EINVAL; /* not an altivec instruction */
- vd = (instr >> 21) & 0x1f;
- va = (instr >> 16) & 0x1f;
- vb = (instr >> 11) & 0x1f;
- vc = (instr >> 6) & 0x1f;
+ vd = (word >> 21) & 0x1f;
+ va = (word >> 16) & 0x1f;
+ vb = (word >> 11) & 0x1f;
+ vc = (word >> 6) & 0x1f;
vrs = current->thread.vr_state.vr;
- switch (instr & 0x3f) {
+ switch (word & 0x3f) {
case 10:
switch (vc) {
case 0: /* vaddfp */
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 8eb867dbad5f..80b3f6e476b6 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -1,4 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
+#include <linux/linkage.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/reg.h>
@@ -7,7 +9,6 @@
#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
#include <asm/asm-compat.h>
/*
@@ -32,6 +33,7 @@ _GLOBAL(store_vr_state)
mfvscr v0
li r4, VRSTATE_VSCR
stvx v0, r4, r3
+ lvx v0, 0, r3
blr
EXPORT_SYMBOL(store_vr_state)
@@ -47,6 +49,10 @@ EXPORT_SYMBOL(store_vr_state)
*/
_GLOBAL(load_up_altivec)
mfmsr r5 /* grab the current MSR */
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ ori r5,r5,MSR_RI
+#endif
oris r5,r5,MSR_VEC@h
MTMSRD(r5) /* enable use of AltiVec now */
isync
@@ -65,20 +71,21 @@ _GLOBAL(load_up_altivec)
1:
/* enable use of VMX after return */
#ifdef CONFIG_PPC32
- mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
+ addi r5,r2,THREAD
oris r9,r9,MSR_VEC@h
#else
ld r4,PACACURRENT(r13)
addi r5,r4,THREAD /* Get THREAD */
oris r12,r12,MSR_VEC@h
std r12,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S_64
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+#endif
#endif
- /* Don't care if r4 overflows, this is desired behaviour */
- lbz r4,THREAD_LOAD_VEC(r5)
- addi r4,r4,1
+ li r4,1
stb r4,THREAD_LOAD_VEC(r5)
addi r6,r5,THREAD_VRSTATE
- li r4,1
li r10,VRSTATE_VSCR
stw r4,THREAD_USED_VR(r5)
lvx v0,r10,r6
@@ -86,6 +93,7 @@ _GLOBAL(load_up_altivec)
REST_32VRS(0,r4,r6)
/* restore registers and return */
blr
+_ASM_NOKPROBE_SYMBOL(load_up_altivec)
/*
* save_altivec(tsk)
@@ -102,6 +110,7 @@ _GLOBAL(save_altivec)
mfvscr v0
li r4,VRSTATE_VSCR
stvx v0,r4,r7
+ lvx v0,0,r7
blr
#ifdef CONFIG_VSX
@@ -124,6 +133,12 @@ _GLOBAL(load_up_vsx)
andis. r5,r12,MSR_VEC@h
beql+ load_up_altivec /* skip if already loaded */
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ li r5,MSR_RI
+ mtmsrd r5,1
+#endif
+
ld r4,PACACURRENT(r13)
addi r4,r4,THREAD /* Get THREAD */
li r6,1
@@ -131,7 +146,9 @@ _GLOBAL(load_up_vsx)
/* enable use of VSX after return */
oris r12,r12,MSR_VSX@h
std r12,_MSR(r1)
- b fast_exception_return
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+ b fast_interrupt_return_srr
#endif /* CONFIG_VSX */
@@ -141,8 +158,8 @@ _GLOBAL(load_up_vsx)
* usage of floating-point registers. These routines must be called
* with preempt disabled.
*/
-#ifdef CONFIG_PPC32
.data
+#ifdef CONFIG_PPC32
fpzero:
.long 0
fpone:
@@ -155,24 +172,29 @@ fphalf:
lfs fr,name@l(r11)
#else
- .section ".toc","aw"
fpzero:
- .tc FD_0_0[TC],0
+ .quad 0
fpone:
- .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
+ .quad 0x3ff0000000000000 /* 1.0 */
fphalf:
- .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
+ .quad 0x3fe0000000000000 /* 0.5 */
-#define LDCONST(fr, name) \
- lfd fr,name@toc(r2)
+#ifdef CONFIG_PPC_KERNEL_PCREL
+#define LDCONST(fr, name) \
+ pla r11,name@pcrel; \
+ lfd fr,0(r11)
+#else
+#define LDCONST(fr, name) \
+ addis r11,r2,name@toc@ha; \
+ lfd fr,name@toc@l(r11)
+#endif
#endif
-
.text
/*
* Internal routine to enable floating point and set FPSCR to 0.
* Don't call it from C; it doesn't use the normal calling convention.
*/
-fpenable:
+SYM_FUNC_START_LOCAL(fpenable)
#ifdef CONFIG_PPC32
stwu r1,-64(r1)
#else
@@ -189,6 +211,7 @@ fpenable:
mffs fr31
MTFSF_L(fr1)
blr
+SYM_FUNC_END(fpenable)
fpdisable:
mtlr r12
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 8834220036a5..1c5970df3233 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -8,6 +8,23 @@
#define BSS_FIRST_SECTIONS *(.bss.prominit)
#define EMITS_PT_NOTE
#define RO_EXCEPTION_TABLE_ALIGN 0
+#define RUNTIME_DISCARD_EXIT
+
+#define SOFT_MASK_TABLE(align) \
+ . = ALIGN(align); \
+ __soft_mask_table : AT(ADDR(__soft_mask_table) - LOAD_OFFSET) { \
+ __start___soft_mask_table = .; \
+ KEEP(*(__soft_mask_table)) \
+ __stop___soft_mask_table = .; \
+ }
+
+#define RESTART_TABLE(align) \
+ . = ALIGN(align); \
+ __restart_table : AT(ADDR(__restart_table) - LOAD_OFFSET) { \
+ __start___restart_table = .; \
+ KEEP(*(__restart_table)) \
+ __stop___restart_table = .; \
+ }
#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
@@ -15,7 +32,10 @@
#include <asm/thread_info.h>
#define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT)
-#define ETEXT_ALIGN_SIZE (1 << CONFIG_ETEXT_SHIFT)
+
+#if STRICT_ALIGN_SIZE < PAGE_SIZE
+#error "CONFIG_DATA_SHIFT must be >= PAGE_SHIFT"
+#endif
ENTRY(_stext)
@@ -52,7 +72,7 @@ SECTIONS
.head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {
#ifdef CONFIG_PPC64
KEEP(*(.head.text.first_256B));
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
#else
KEEP(*(.head.text.real_vectors));
*(.head.text.real_trampolines);
@@ -86,12 +106,10 @@ SECTIONS
ALIGN_FUNCTION();
#endif
/* careful! __ftr_alt_* sections need to be close to .text */
- *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
-#ifdef CONFIG_PPC64
+ *(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text);
*(.tramp.ftrace.text);
-#endif
+ NOINSTR_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
@@ -104,26 +122,76 @@ SECTIONS
* included with the main text sections, so put it by itself.
*/
*(.sfpr);
+ *(.text.asan.* .text.tsan.*)
MEM_KEEP(init.text)
MEM_KEEP(exit.text)
+ } :text
+
+ . = ALIGN(PAGE_SIZE);
+ _etext = .;
+ PROVIDE32 (etext = .);
+
+ /* Read-only data */
+ RO_DATA(PAGE_SIZE)
#ifdef CONFIG_PPC32
+ .sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) {
+ *(.sdata2)
+ }
+#endif
+
+ .data.rel.ro : AT(ADDR(.data.rel.ro) - LOAD_OFFSET) {
+ *(.data.rel.ro .data.rel.ro.*)
+ }
+
+ .branch_lt : AT(ADDR(.branch_lt) - LOAD_OFFSET) {
+ *(.branch_lt)
+ }
+
+#ifdef CONFIG_PPC32
+ .got1 : AT(ADDR(.got1) - LOAD_OFFSET) {
*(.got1)
+ }
+ .got2 : AT(ADDR(.got2) - LOAD_OFFSET) {
__got2_start = .;
*(.got2)
__got2_end = .;
-#endif /* CONFIG_PPC32 */
+ }
+ .got : AT(ADDR(.got) - LOAD_OFFSET) {
+ *(.got)
+ *(.got.plt)
+ }
+ .plt : AT(ADDR(.plt) - LOAD_OFFSET) {
+ /* XXX: is .plt (and .got.plt) required? */
+ *(.plt)
+ }
- } :text
+#else /* CONFIG_PPC32 */
+#ifndef CONFIG_PPC_KERNEL_PCREL
+ .toc1 : AT(ADDR(.toc1) - LOAD_OFFSET) {
+ *(.toc1)
+ }
+#endif
- . = ALIGN(ETEXT_ALIGN_SIZE);
- _etext = .;
- PROVIDE32 (etext = .);
+ .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ *(.got)
+#else
+ *(.got .toc)
+#endif
+ }
- /* Read-only data */
- RO_DATA(PAGE_SIZE)
+ SOFT_MASK_TABLE(8)
+ RESTART_TABLE(8)
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+ .opd : AT(ADDR(.opd) - LOAD_OFFSET) {
+ __start_opd = .;
+ KEEP(*(.opd))
+ __end_opd = .;
+ }
+#endif
-#ifdef CONFIG_PPC64
. = ALIGN(8);
__stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
__start___stf_entry_barrier_fixup = .;
@@ -132,6 +200,27 @@ SECTIONS
}
. = ALIGN(8);
+ __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) {
+ __start___uaccess_flush_fixup = .;
+ *(__uaccess_flush_fixup)
+ __stop___uaccess_flush_fixup = .;
+ }
+
+ . = ALIGN(8);
+ __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) {
+ __start___entry_flush_fixup = .;
+ *(__entry_flush_fixup)
+ __stop___entry_flush_fixup = .;
+ }
+
+ . = ALIGN(8);
+ __scv_entry_flush_fixup : AT(ADDR(__scv_entry_flush_fixup) - LOAD_OFFSET) {
+ __start___scv_entry_flush_fixup = .;
+ *(__scv_entry_flush_fixup)
+ __stop___scv_entry_flush_fixup = .;
+ }
+
+ . = ALIGN(8);
__stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
__start___stf_exit_barrier_fixup = .;
*(__stf_exit_barrier_fixup)
@@ -144,7 +233,7 @@ SECTIONS
*(__rfi_flush_fixup)
__stop___rfi_flush_fixup = .;
}
-#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC_BARRIER_NOSPEC
. = ALIGN(8);
@@ -155,7 +244,7 @@ SECTIONS
}
#endif /* CONFIG_PPC_BARRIER_NOSPEC */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
. = ALIGN(8);
__spec_btb_flush_fixup : AT(ADDR(__spec_btb_flush_fixup) - LOAD_OFFSET) {
__start__btb_flush_fixup = .;
@@ -164,19 +253,28 @@ SECTIONS
}
#endif
+ /*
+ * Various code relies on __init_begin being at the strict RWX boundary.
+ */
+ . = ALIGN(STRICT_ALIGN_SIZE);
+ __srwx_boundary = .;
+ __end_rodata = .;
+ __init_begin = .;
+
/*
* Init sections discarded at runtime
*/
- . = ALIGN(STRICT_ALIGN_SIZE);
- __init_begin = .;
- . = ALIGN(PAGE_SIZE);
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
_sinittext = .;
INIT_TEXT
+
+ /*
+ *.init.text might be RO so we must ensure this section ends on
+ * a page boundary.
+ */
+ . = ALIGN(PAGE_SIZE);
_einittext = .;
-#ifdef CONFIG_PPC64
*(.tramp.ftrace.init);
-#endif
} :text
/* .exit.text is discarded at runtime, not link time,
@@ -186,21 +284,9 @@ SECTIONS
EXIT_TEXT
}
- .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
- INIT_DATA
- }
-
- .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
- INIT_SETUP(16)
- }
-
- .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
- INIT_CALLS
- }
+ . = ALIGN(PAGE_SIZE);
- .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
- CON_INITCALL
- }
+ INIT_DATA_SECTION(16)
. = ALIGN(8);
__ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) {
@@ -228,9 +314,6 @@ SECTIONS
__stop___fw_ftr_fixup = .;
}
#endif
- .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
- INIT_RAM_FS
- }
PERCPU_SECTION(L1_CACHE_BYTES)
@@ -244,9 +327,7 @@ SECTIONS
. = ALIGN(8);
.dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET)
{
-#ifdef CONFIG_PPC32
__dynamic_symtab = .;
-#endif
*(.dynsym)
}
.dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) }
@@ -256,6 +337,7 @@ SECTIONS
*(.dynamic)
}
.hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) }
+ .gnu.hash : AT(ADDR(.gnu.hash) - LOAD_OFFSET) { *(.gnu.hash) }
.interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) }
.rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET)
{
@@ -281,49 +363,16 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
_sdata = .;
-#ifdef CONFIG_PPC32
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
-#ifdef CONFIG_UBSAN
- *(.data..Lubsan_data*)
- *(.data..Lubsan_type*)
-#endif
*(.data.rel*)
+#ifdef CONFIG_PPC32
*(SDATA_MAIN)
- *(.sdata2)
- *(.got.plt) *(.got)
- *(.plt)
- *(.branch_lt)
- }
-#else
- .data : AT(ADDR(.data) - LOAD_OFFSET) {
- DATA_DATA
- *(.data.rel*)
- *(.toc1)
- *(.branch_lt)
- }
-
- .opd : AT(ADDR(.opd) - LOAD_OFFSET) {
- __start_opd = .;
- KEEP(*(.opd))
- __end_opd = .;
- }
-
- . = ALIGN(256);
- .got : AT(ADDR(.got) - LOAD_OFFSET) {
- __toc_start = .;
-#ifndef CONFIG_RELOCATABLE
- __prom_init_toc_start = .;
- arch/powerpc/kernel/prom_init.o*(.toc .got)
- __prom_init_toc_end = .;
#endif
- *(.got)
- *(.toc)
}
-#endif
/* The initial task and kernel stack */
- INIT_TASK_DATA_SECTION(THREAD_SIZE)
+ INIT_TASK_DATA_SECTION(THREAD_ALIGN)
.data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
PAGE_ALIGNED_DATA(PAGE_SIZE)
@@ -358,16 +407,18 @@ SECTIONS
_end = . ;
PROVIDE32 (end = .);
- STABS_DEBUG
-
DWARF_DEBUG
+ ELF_DETAILS
DISCARDS
/DISCARD/ : {
*(*.EMB.apuinfo)
- *(.glink .iplt .plt .rela* .comment)
+ *(.glink .iplt .plt)
*(.gnu.version*)
*(.gnu.attributes)
*(.eh_frame)
+#ifndef CONFIG_RELOCATABLE
+ *(.rela*)
+#endif
}
}
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index af3c15a1d41e..8c464a5d8246 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -24,9 +24,12 @@
#include <linux/kdebug.h>
#include <linux/sched/debug.h>
#include <linux/delay.h>
+#include <linux/processor.h>
#include <linux/smp.h>
+#include <asm/interrupt.h>
#include <asm/paca.h>
+#include <asm/nmi.h>
/*
* The powerpc watchdog ensures that each CPU is able to service timers.
@@ -53,7 +56,7 @@
* solved by also having a SMP watchdog where all CPUs check all other
* CPUs heartbeat.
*
- * The SMP checker can detect lockups on other CPUs. A gobal "pending"
+ * The SMP checker can detect lockups on other CPUs. A global "pending"
* cpumask is kept, containing all CPUs which enable the watchdog. Each
* CPU clears their pending bit in their heartbeat timer. When the bitmask
* becomes empty, the last CPU to clear its pending bit updates a global
@@ -82,10 +85,41 @@ static DEFINE_PER_CPU(u64, wd_timer_tb);
/* SMP checker bits */
static unsigned long __wd_smp_lock;
+static unsigned long __wd_reporting;
+static unsigned long __wd_nmi_output;
static cpumask_t wd_smp_cpus_pending;
static cpumask_t wd_smp_cpus_stuck;
static u64 wd_smp_last_reset_tb;
+#ifdef CONFIG_PPC_PSERIES
+static u64 wd_timeout_pct;
+#endif
+
+/*
+ * Try to take the exclusive watchdog action / NMI IPI / printing lock.
+ * wd_smp_lock must be held. If this fails, we should return and wait
+ * for the watchdog to kick in again (or another CPU to trigger it).
+ *
+ * Importantly, if hardlockup_panic is set, wd_try_report failure should
+ * not delay the panic, because whichever other CPU is reporting will
+ * call panic.
+ */
+static bool wd_try_report(void)
+{
+ if (__wd_reporting)
+ return false;
+ __wd_reporting = 1;
+ return true;
+}
+
+/* End printing after successful wd_try_report. wd_smp_lock not required. */
+static void wd_end_reporting(void)
+{
+ smp_mb(); /* End printing "critical section" */
+ WARN_ON_ONCE(__wd_reporting == 0);
+ WRITE_ONCE(__wd_reporting, 0);
+}
+
static inline void wd_smp_lock(unsigned long *flags)
{
/*
@@ -125,108 +159,182 @@ static void wd_lockup_ipi(struct pt_regs *regs)
else
dump_stack();
+ /*
+ * __wd_nmi_output must be set after we printk from NMI context.
+ *
+ * printk from NMI context defers printing to the console to irq_work.
+ * If that NMI was taken in some code that is hard-locked, then irqs
+ * are disabled so irq_work will never fire. That can result in the
+ * hard lockup messages being delayed (indefinitely, until something
+ * else kicks the console drivers).
+ *
+ * Setting __wd_nmi_output will cause another CPU to notice and kick
+ * the console drivers for us.
+ *
+ * xchg is not needed here (it could be a smp_mb and store), but xchg
+ * gives the memory ordering and atomicity required.
+ */
+ xchg(&__wd_nmi_output, 1);
+
/* Do not panic from here because that can recurse into NMI IPI layer */
}
-static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
+static bool set_cpu_stuck(int cpu)
{
- cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
- cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
+ cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ /*
+ * See wd_smp_clear_cpu_pending()
+ */
+ smp_mb();
if (cpumask_empty(&wd_smp_cpus_pending)) {
- wd_smp_last_reset_tb = tb;
+ wd_smp_last_reset_tb = get_tb();
cpumask_andnot(&wd_smp_cpus_pending,
&wd_cpus_enabled,
&wd_smp_cpus_stuck);
+ return true;
}
-}
-static void set_cpu_stuck(int cpu, u64 tb)
-{
- set_cpumask_stuck(cpumask_of(cpu), tb);
+ return false;
}
-static void watchdog_smp_panic(int cpu, u64 tb)
+static void watchdog_smp_panic(int cpu)
{
+ static cpumask_t wd_smp_cpus_ipi; // protected by reporting
unsigned long flags;
+ u64 tb, last_reset;
int c;
wd_smp_lock(&flags);
/* Double check some things under lock */
- if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb)
+ tb = get_tb();
+ last_reset = wd_smp_last_reset_tb;
+ if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb)
goto out;
if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
goto out;
- if (cpumask_weight(&wd_smp_cpus_pending) == 0)
+ if (!wd_try_report())
+ goto out;
+ for_each_online_cpu(c) {
+ if (!cpumask_test_cpu(c, &wd_smp_cpus_pending))
+ continue;
+ if (c == cpu)
+ continue; // should not happen
+
+ __cpumask_set_cpu(c, &wd_smp_cpus_ipi);
+ if (set_cpu_stuck(c))
+ break;
+ }
+ if (cpumask_empty(&wd_smp_cpus_ipi)) {
+ wd_end_reporting();
goto out;
+ }
+ wd_smp_unlock(&flags);
pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
- cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+ cpu, cpumask_pr_args(&wd_smp_cpus_ipi));
pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n",
- cpu, tb, wd_smp_last_reset_tb,
- tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000);
+ cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000);
if (!sysctl_hardlockup_all_cpu_backtrace) {
/*
* Try to trigger the stuck CPUs, unless we are going to
* get a backtrace on all of them anyway.
*/
- for_each_cpu(c, &wd_smp_cpus_pending) {
- if (c == cpu)
- continue;
+ for_each_cpu(c, &wd_smp_cpus_ipi) {
smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ __cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
}
+ } else {
+ trigger_allbutcpu_cpu_backtrace(cpu);
+ cpumask_clear(&wd_smp_cpus_ipi);
}
- /* Take the stuck CPUs out of the watch group */
- set_cpumask_stuck(&wd_smp_cpus_pending, tb);
-
- wd_smp_unlock(&flags);
-
- printk_safe_flush();
- /*
- * printk_safe_flush() seems to require another print
- * before anything actually goes out to console.
- */
- if (sysctl_hardlockup_all_cpu_backtrace)
- trigger_allbutself_cpu_backtrace();
-
if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP");
+ wd_end_reporting();
+
return;
out:
wd_smp_unlock(&flags);
}
-static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
+static void wd_smp_clear_cpu_pending(int cpu)
{
if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
struct pt_regs *regs = get_irq_regs();
unsigned long flags;
- wd_smp_lock(&flags);
-
pr_emerg("CPU %d became unstuck TB:%lld\n",
- cpu, tb);
+ cpu, get_tb());
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
+ wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
wd_smp_unlock(&flags);
+ } else {
+ /*
+ * The last CPU to clear pending should have reset the
+ * watchdog so we generally should not find it empty
+ * here if our CPU was clear. However it could happen
+ * due to a rare race with another CPU taking the
+ * last CPU out of the mask concurrently.
+ *
+ * We can't add a warning for it. But just in case
+ * there is a problem with the watchdog that is causing
+ * the mask to not be reset, try to kick it along here.
+ */
+ if (unlikely(cpumask_empty(&wd_smp_cpus_pending)))
+ goto none_pending;
}
return;
}
+
+ /*
+ * All other updates to wd_smp_cpus_pending are performed under
+ * wd_smp_lock. All of them are atomic except the case where the
+ * mask becomes empty and is reset. This will not happen here because
+ * cpu was tested to be in the bitmap (above), and a CPU only clears
+ * its own bit. _Except_ in the case where another CPU has detected a
+ * hard lockup on our CPU and takes us out of the pending mask. So in
+ * normal operation there will be no race here, no problem.
+ *
+ * In the lockup case, this atomic clear-bit vs a store that refills
+ * other bits in the accessed word wll not be a problem. The bit clear
+ * is atomic so it will not cause the store to get lost, and the store
+ * will never set this bit so it will not overwrite the bit clear. The
+ * only way for a stuck CPU to return to the pending bitmap is to
+ * become unstuck itself.
+ */
cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+
+ /*
+ * Order the store to clear pending with the load(s) to check all
+ * words in the pending mask to check they are all empty. This orders
+ * with the same barrier on another CPU. This prevents two CPUs
+ * clearing the last 2 pending bits, but neither seeing the other's
+ * store when checking if the mask is empty, and missing an empty
+ * mask, which ends with a false positive.
+ */
+ smp_mb();
if (cpumask_empty(&wd_smp_cpus_pending)) {
unsigned long flags;
+none_pending:
+ /*
+ * Double check under lock because more than one CPU could see
+ * a clear mask with the lockless check after clearing their
+ * pending bits.
+ */
wd_smp_lock(&flags);
if (cpumask_empty(&wd_smp_cpus_pending)) {
- wd_smp_last_reset_tb = tb;
+ wd_smp_last_reset_tb = get_tb();
cpumask_andnot(&wd_smp_cpus_pending,
&wd_cpus_enabled,
&wd_smp_cpus_stuck);
@@ -241,33 +349,60 @@ static void watchdog_timer_interrupt(int cpu)
per_cpu(wd_timer_tb, cpu) = tb;
- wd_smp_clear_cpu_pending(cpu, tb);
+ wd_smp_clear_cpu_pending(cpu);
if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
- watchdog_smp_panic(cpu, tb);
+ watchdog_smp_panic(cpu);
+
+ if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) {
+ /*
+ * Something has called printk from NMI context. It might be
+ * stuck, so this triggers a flush that will get that
+ * printk output to the console.
+ *
+ * See wd_lockup_ipi.
+ */
+ printk_trigger_flush();
+ }
}
-void soft_nmi_interrupt(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
{
unsigned long flags;
int cpu = raw_smp_processor_id();
u64 tb;
- if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
- return;
+ /* should only arrive from kernel, with irqs disabled */
+ WARN_ON_ONCE(!arch_irq_disabled_regs(regs));
- nmi_enter();
+ if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+ return 0;
__this_cpu_inc(irq_stat.soft_nmi_irqs);
tb = get_tb();
if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
+ /*
+ * Taking wd_smp_lock here means it is a soft-NMI lock, which
+ * means we can't take any regular or irqsafe spin locks while
+ * holding this lock. This is why timers can't printk while
+ * holding the lock.
+ */
wd_smp_lock(&flags);
if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
wd_smp_unlock(&flags);
- goto out;
+ return 0;
}
- set_cpu_stuck(cpu, tb);
+ if (!wd_try_report()) {
+ wd_smp_unlock(&flags);
+ /* Couldn't report, try again in 100ms */
+ mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000);
+ return 0;
+ }
+
+ set_cpu_stuck(cpu);
+
+ wd_smp_unlock(&flags);
pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n",
cpu, (void *)regs->nip);
@@ -278,26 +413,32 @@ void soft_nmi_interrupt(struct pt_regs *regs)
print_irqtrace_events(current);
show_regs(regs);
- wd_smp_unlock(&flags);
+ xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
if (sysctl_hardlockup_all_cpu_backtrace)
- trigger_allbutself_cpu_backtrace();
+ trigger_allbutcpu_cpu_backtrace(cpu);
if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
+
+ wd_end_reporting();
}
+ /*
+ * We are okay to change DEC in soft_nmi_interrupt because the masked
+ * handler has marked a DEC as pending, so the timer interrupt will be
+ * replayed as soon as local irqs are enabled again.
+ */
if (wd_panic_timeout_tb < 0x7fffffff)
mtspr(SPRN_DEC, wd_panic_timeout_tb);
-out:
- nmi_exit();
+ return 0;
}
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
int cpu = smp_processor_id();
- if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
return HRTIMER_NORESTART;
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
@@ -314,11 +455,15 @@ void arch_touch_nmi_watchdog(void)
{
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
- u64 tb = get_tb();
+ u64 tb;
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return;
+
+ tb = get_tb();
if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
per_cpu(wd_timer_tb, cpu) = tb;
- wd_smp_clear_cpu_pending(cpu, tb);
+ wd_smp_clear_cpu_pending(cpu);
}
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
@@ -334,7 +479,7 @@ static void start_watchdog(void *arg)
return;
}
- if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
return;
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
@@ -376,7 +521,7 @@ static void stop_watchdog(void *arg)
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
wd_smp_unlock(&flags);
- wd_smp_clear_cpu_pending(cpu, get_tb());
+ wd_smp_clear_cpu_pending(cpu);
}
static int stop_watchdog_on_cpu(unsigned int cpu)
@@ -386,7 +531,13 @@ static int stop_watchdog_on_cpu(unsigned int cpu)
static void watchdog_calc_timeouts(void)
{
- wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq;
+ u64 threshold = watchdog_thresh;
+
+#ifdef CONFIG_PPC_PSERIES
+ threshold += (READ_ONCE(wd_timeout_pct) * threshold) / 100;
+#endif
+
+ wd_panic_timeout_tb = threshold * ppc_tb_freq;
/* Have the SMP detector trigger a bit later */
wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2;
@@ -395,7 +546,7 @@ static void watchdog_calc_timeouts(void)
wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
}
-void watchdog_nmi_stop(void)
+void watchdog_hardlockup_stop(void)
{
int cpu;
@@ -403,7 +554,7 @@ void watchdog_nmi_stop(void)
stop_watchdog_on_cpu(cpu);
}
-void watchdog_nmi_start(void)
+void watchdog_hardlockup_start(void)
{
int cpu;
@@ -415,7 +566,7 @@ void watchdog_nmi_start(void)
/*
* Invoked from core watchdog init.
*/
-int __init watchdog_nmi_probe(void)
+int __init watchdog_hardlockup_probe(void)
{
int err;
@@ -429,3 +580,12 @@ int __init watchdog_nmi_probe(void)
}
return 0;
}
+
+#ifdef CONFIG_PPC_PSERIES
+void watchdog_hardlockup_set_timeout_pct(u64 pct)
+{
+ pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct);
+ WRITE_ONCE(wd_timeout_pct, pct);
+ lockup_detector_reconfigure();
+}
+#endif