summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-04-05 11:12:59 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-04-05 11:12:59 -0700
commitd38c07afc356ddebaa3ed8ecb3f553340e05c969 (patch)
tree04eb76124d4d5af7518e6e513fb86169051d179c /arch/powerpc/kernel
parent31c0aa87ec8a30b1e9e4cf862905a369560f7705 (diff)
parentc17eb4dca5a353a9dbbb8ad6934fe57af7165e91 (diff)
Merge tag 'powerpc-5.7-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "Slightly late as I had to rebase mid-week to insert a bug fix: - A large series from Nick for 64-bit to further rework our exception vectors, and rewrite portions of the syscall entry/exit and interrupt return in C. The result is much easier to follow code that is also faster in general. - Cleanup of our ptrace code to split various parts out that had become badly intertwined with #ifdefs over the years. - Changes to our NUMA setup under the PowerVM hypervisor which should hopefully avoid non-sensical topologies which can lead to warnings from the workqueue code and other problems. - MAINTAINERS updates to remove some of our old orphan entries and update the status of others. - Quite a few other small changes and fixes all over the map. Thanks to: Abdul Haleem, afzal mohammed, Alexey Kardashevskiy, Andrew Donnellan, Aneesh Kumar K.V, Balamuruhan S, Cédric Le Goater, Chen Zhou, Christophe JAILLET, Christophe Leroy, Christoph Hellwig, Clement Courbet, Daniel Axtens, David Gibson, Douglas Miller, Fabiano Rosas, Fangrui Song, Ganesh Goudar, Gautham R. Shenoy, Greg Kroah-Hartman, Greg Kurz, Gustavo Luiz Duarte, Hari Bathini, Ilie Halip, Jan Kara, Joe Lawrence, Joe Perches, Kajol Jain, Larry Finger, Laurentiu Tudor, Leonardo Bras, Libor Pechacek, Madhavan Srinivasan, Mahesh Salgaonkar, Masahiro Yamada, Masami Hiramatsu, Mauricio Faria de Oliveira, Michael Neuling, Michal Suchanek, Mike Rapoport, Nageswara R Sastry, Nathan Chancellor, Nathan Lynch, Naveen N. Rao, Nicholas Piggin, Nick Desaulniers, Oliver O'Halloran, Po-Hsu Lin, Pratik Rajesh Sampat, Rasmus Villemoes, Ravi Bangoria, Roman Bolshakov, Sam Bobroff, Sandipan Das, Santosh S, Sedat Dilek, Segher Boessenkool, Shilpasri G Bhat, Sourabh Jain, Srikar Dronamraju, Stephen Rothwell, Tyrel Datwyler, Vaibhav Jain, YueHaibing" * tag 'powerpc-5.7-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (158 commits) powerpc: Make setjmp/longjmp signature standard powerpc/cputable: Remove unnecessary copy of cpu_spec->oprofile_type powerpc: Suppress .eh_frame generation powerpc: Drop -fno-dwarf2-cfi-asm powerpc/32: drop unused ISA_DMA_THRESHOLD powerpc/powernv: Add documentation for the opal sensor_groups sysfs interfaces selftests/powerpc: Fix try-run when source tree is not writable powerpc/vmlinux.lds: Explicitly retain .gnu.hash powerpc/ptrace: move ptrace_triggered() into hw_breakpoint.c powerpc/ptrace: create ppc_gethwdinfo() powerpc/ptrace: create ptrace_get_debugreg() powerpc/ptrace: split out ADV_DEBUG_REGS related functions. powerpc/ptrace: move register viewing functions out of ptrace.c powerpc/ptrace: split out TRANSACTIONAL_MEM related functions. powerpc/ptrace: split out SPE related functions. powerpc/ptrace: split out ALTIVEC related functions. powerpc/ptrace: split out VSX related functions. powerpc/ptrace: drop PARAMETER_SAVE_AREA_OFFSET powerpc/ptrace: drop unnecessary #ifdefs CONFIG_PPC64 powerpc/ptrace: remove unused header includes ...
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile11
-rw-r--r--arch/powerpc/kernel/btext.c2
-rw-r--r--arch/powerpc/kernel/cputable.c1
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c1
-rw-r--r--arch/powerpc/kernel/eeh.c145
-rw-r--r--arch/powerpc/kernel/entry_32.S38
-rw-r--r--arch/powerpc/kernel/entry_64.S895
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S287
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2043
-rw-r--r--arch/powerpc/kernel/fadump.c134
-rw-r--r--arch/powerpc/kernel/head_32.S9
-rw-r--r--arch/powerpc/kernel/head_32.h28
-rw-r--r--arch/powerpc/kernel/head_64.S4
-rw-r--r--arch/powerpc/kernel/head_booke.h5
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c16
-rw-r--r--arch/powerpc/kernel/irq.c192
-rw-r--r--arch/powerpc/kernel/kprobes.c84
-rw-r--r--arch/powerpc/kernel/mce.c14
-rw-r--r--arch/powerpc/kernel/mce_power.c8
-rw-r--r--arch/powerpc/kernel/misc.S4
-rw-r--r--arch/powerpc/kernel/of_platform.c12
-rw-r--r--arch/powerpc/kernel/paca.c14
-rw-r--r--arch/powerpc/kernel/pci-common.c6
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c2
-rw-r--r--arch/powerpc/kernel/process.c124
-rw-r--r--arch/powerpc/kernel/prom_init.c4
-rw-r--r--arch/powerpc/kernel/ptrace.c3468
-rw-r--r--arch/powerpc/kernel/ptrace/Makefile20
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-adv.c492
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-altivec.c128
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-decl.h184
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-noadv.c265
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-novsx.c57
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-spe.c68
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-tm.c851
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-view.c904
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-vsx.c151
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace.c481
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace32.c (renamed from arch/powerpc/kernel/ptrace32.c)11
-rw-r--r--arch/powerpc/kernel/setup-common.c3
-rw-r--r--arch/powerpc/kernel/setup.h6
-rw-r--r--arch/powerpc/kernel/setup_32.c1
-rw-r--r--arch/powerpc/kernel/setup_64.c32
-rw-r--r--arch/powerpc/kernel/signal.h2
-rw-r--r--arch/powerpc/kernel/signal_64.c4
-rw-r--r--arch/powerpc/kernel/smp.c31
-rw-r--r--arch/powerpc/kernel/stacktrace.c6
-rw-r--r--arch/powerpc/kernel/syscall_64.c379
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl22
-rw-r--r--arch/powerpc/kernel/sysfs.c381
-rw-r--r--arch/powerpc/kernel/systbl.S9
-rw-r--r--arch/powerpc/kernel/time.c9
-rw-r--r--arch/powerpc/kernel/traps.c25
-rw-r--r--arch/powerpc/kernel/vdso.c5
-rw-r--r--arch/powerpc/kernel/vector.S2
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S1
56 files changed, 6565 insertions, 5516 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 78a1b22d4fd8..570660efbb3d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -3,8 +3,6 @@
# Makefile for the linux kernel.
#
-CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-
ifdef CONFIG_PPC64
CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
endif
@@ -41,16 +39,17 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
endif
-obj-y := cputable.o ptrace.o syscalls.o \
+obj-y := cputable.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
process.o systbl.o idle.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o misc_$(BITS).o \
of_platform.o prom_parse.o
-obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
- signal_64.o ptrace32.o \
- paca.o nvram_64.o firmware.o note.o
+obj-y += ptrace/
+obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o signal_64.o \
+ paca.o nvram_64.o firmware.o note.o \
+ syscall_64.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 6dfceaa820e4..f57712a55815 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -26,7 +26,7 @@
static void scrollscreen(void);
#endif
-#define __force_data __attribute__((__section__(".data")))
+#define __force_data __section(.data)
static int g_loc_X __force_data;
static int g_loc_Y __force_data;
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 245be4fafe13..13eba2eb46fe 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2198,7 +2198,6 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
*/
if (old.oprofile_cpu_type != NULL) {
t->oprofile_cpu_type = old.oprofile_cpu_type;
- t->oprofile_type = old.oprofile_type;
t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG;
}
}
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 182b4047c1ef..36bc0d5c4f3a 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -139,7 +139,6 @@ static void __init cpufeatures_setup_cpu(void)
/* Initialize the base environment -- clear FSCR/HFSCR. */
hv_mode = !!(mfmsr() & MSR_HV);
if (hv_mode) {
- /* CPU_FTR_HVMODE is used early in PACA setup */
cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
mtspr(SPRN_HFSCR, 0);
}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 17cb3e9b5697..7cdcb413bb44 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1107,87 +1107,43 @@ static int eeh_init(void)
core_initcall_sync(eeh_init);
/**
- * eeh_add_device_early - Enable EEH for the indicated device node
- * @pdn: PCI device node for which to set up EEH
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices that were added after system boot (e.g. hotplug, dlpar).
- * This routine must be called before any i/o is performed to the
- * adapter (inluding any config-space i/o).
- * Whether this actually enables EEH or not for this device depends
- * on the CEC architecture, type of the device, on earlier boot
- * command-line arguments & etc.
- */
-void eeh_add_device_early(struct pci_dn *pdn)
-{
- struct pci_controller *phb = pdn ? pdn->phb : NULL;
- struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-
- if (!edev)
- return;
-
- if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
- return;
-
- /* USB Bus children of PCI devices will not have BUID's */
- if (NULL == phb ||
- (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
- return;
-
- eeh_ops->probe(pdn, NULL);
-}
-
-/**
- * eeh_add_device_tree_early - Enable EEH for the indicated device
- * @pdn: PCI device node
- *
- * This routine must be used to perform EEH initialization for the
- * indicated PCI device that was added after system boot (e.g.
- * hotplug, dlpar).
- */
-void eeh_add_device_tree_early(struct pci_dn *pdn)
-{
- struct pci_dn *n;
-
- if (!pdn)
- return;
-
- list_for_each_entry(n, &pdn->child_list, list)
- eeh_add_device_tree_early(n);
- eeh_add_device_early(pdn);
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
-
-/**
- * eeh_add_device_late - Perform EEH initialization for the indicated pci device
+ * eeh_probe_device() - Perform EEH initialization for the indicated pci device
* @dev: pci device for which to set up EEH
*
* This routine must be used to complete EEH initialization for PCI
* devices that were added after system boot (e.g. hotplug, dlpar).
*/
-void eeh_add_device_late(struct pci_dev *dev)
+void eeh_probe_device(struct pci_dev *dev)
{
- struct pci_dn *pdn;
struct eeh_dev *edev;
- if (!dev)
+ pr_debug("EEH: Adding device %s\n", pci_name(dev));
+
+ /*
+ * pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was
+ * already called for this device.
+ */
+ if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) {
+ pci_dbg(dev, "Already bound to an eeh_dev!\n");
return;
+ }
- pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
- edev = pdn_to_eeh_dev(pdn);
- eeh_edev_dbg(edev, "Adding device\n");
- if (edev->pdev == dev) {
- eeh_edev_dbg(edev, "Device already referenced!\n");
+ edev = eeh_ops->probe(dev);
+ if (!edev) {
+ pr_debug("EEH: Adding device failed\n");
return;
}
/*
- * The EEH cache might not be removed correctly because of
- * unbalanced kref to the device during unplug time, which
- * relies on pcibios_release_device(). So we have to remove
- * that here explicitly.
+ * FIXME: We rely on pcibios_release_device() to remove the
+ * existing EEH state. The release function is only called if
+ * the pci_dev's refcount drops to zero so if something is
+ * keeping a ref to a device (e.g. a filesystem) we need to
+ * remove the old EEH state.
+ *
+ * FIXME: HEY MA, LOOK AT ME, NO LOCKING!
*/
- if (edev->pdev) {
+ if (edev->pdev && edev->pdev != dev) {
eeh_rmv_from_parent_pe(edev);
eeh_addr_cache_rmv_dev(edev->pdev);
eeh_sysfs_remove_device(edev->pdev);
@@ -1198,69 +1154,16 @@ void eeh_add_device_late(struct pci_dev *dev)
* into error handler afterwards.
*/
edev->mode |= EEH_DEV_NO_HANDLER;
-
- edev->pdev = NULL;
- dev->dev.archdata.edev = NULL;
}
- if (eeh_has_flag(EEH_PROBE_MODE_DEV))
- eeh_ops->probe(pdn, NULL);
-
+ /* bind the pdev and the edev together */
edev->pdev = dev;
dev->dev.archdata.edev = edev;
-
eeh_addr_cache_insert_dev(dev);
+ eeh_sysfs_add_device(dev);
}
/**
- * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_device_tree_late(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- if (eeh_has_flag(EEH_FORCE_DISABLED))
- return;
- list_for_each_entry(dev, &bus->devices, bus_list) {
- eeh_add_device_late(dev);
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- struct pci_bus *subbus = dev->subordinate;
- if (subbus)
- eeh_add_device_tree_late(subbus);
- }
- }
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
-
-/**
- * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to add EEH sysfs files for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_sysfs_files(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- eeh_sysfs_add_device(dev);
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- struct pci_bus *subbus = dev->subordinate;
- if (subbus)
- eeh_add_sysfs_files(subbus);
- }
- }
-}
-EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
-
-/**
* eeh_remove_device - Undo EEH setup for the indicated pci device
* @dev: pci device to be removed
*
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 16af0d8d90a8..a6371fb8f761 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -246,9 +246,8 @@ reenable_mmu:
* r3 can be different from GPR3(r1) at this point, r9 and r11
* contains the old MSR and handler address respectively,
* r4 & r5 can contain page fault arguments that need to be passed
- * along as well. r12, CCR, CTR, XER etc... are left clobbered as
- * they aren't useful past this point (aren't syscall arguments),
- * the rest is restored from the exception frame.
+ * along as well. r0, r6-r8, r12, CCR, CTR, XER etc... are left
+ * clobbered as they aren't useful past this point.
*/
stwu r1,-32(r1)
@@ -262,16 +261,12 @@ reenable_mmu:
* lockdep
*/
1: bl trace_hardirqs_off
-2: lwz r5,24(r1)
+ lwz r5,24(r1)
lwz r4,20(r1)
lwz r3,16(r1)
lwz r11,12(r1)
lwz r9,8(r1)
addi r1,r1,32
- lwz r0,GPR0(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
mtctr r11
mtlr r9
bctr /* jump to handler */
@@ -575,6 +570,33 @@ syscall_exit_work:
bl do_syscall_trace_leave
b ret_from_except_full
+ /*
+ * System call was called from kernel. We get here with SRR1 in r9.
+ * Mark the exception as recoverable once we have retrieved SRR0,
+ * trap a warning and return ENOSYS with CR[SO] set.
+ */
+ .globl ret_from_kernel_syscall
+ret_from_kernel_syscall:
+ mfspr r9, SPRN_SRR0
+ mfspr r10, SPRN_SRR1
+#if !defined(CONFIG_4xx) && !defined(CONFIG_BOOKE)
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR|MSR_DR))
+ mtmsr r11
+#endif
+
+0: trap
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+
+ li r3, ENOSYS
+ crset so
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
+ mtspr SPRN_NRI, r0
+#endif
+ mtspr SPRN_SRR0, r9
+ mtspr SPRN_SRR1, r10
+ SYNC
+ RFI
+
/*
* The fork/clone functions need to copy the full register set into
* the child process. Therefore we need to save all the nonvolatile
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6ba675b0cf7d..63f0a4414618 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -16,6 +16,7 @@
#include <linux/errno.h>
#include <linux/err.h>
+#include <asm/cache.h>
#include <asm/unistd.h>
#include <asm/processor.h>
#include <asm/page.h>
@@ -69,6 +70,7 @@ BEGIN_FTR_SECTION
bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
+_ASM_NOKPROBE_SYMBOL(system_call_common)
mr r10,r1
ld r1,PACAKSAVE(r13)
std r10,0(r1)
@@ -76,342 +78,122 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
+ std r2,GPR2(r1)
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
#endif
- ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
- std r2,GPR2(r1)
+ ld r2,PACATOC(r13)
+ mfcr r12
+ li r11,0
+ /* Can we avoid saving r3-r8 in common case? */
std r3,GPR3(r1)
- mfcr r2
std r4,GPR4(r1)
std r5,GPR5(r1)
std r6,GPR6(r1)
std r7,GPR7(r1)
std r8,GPR8(r1)
- li r11,0
+ /* Zero r9-r12, this should only be required when restoring all GPRs */
std r11,GPR9(r1)
std r11,GPR10(r1)
std r11,GPR11(r1)
std r11,GPR12(r1)
+ std r9,GPR13(r1)
+ SAVE_NVGPRS(r1)
std r11,_XER(r1)
std r11,_CTR(r1)
- std r9,GPR13(r1)
mflr r10
+
/*
* This clears CR0.SO (bit 28), which is the error indication on
* return from this system call.
*/
- rldimi r2,r11,28,(63-28)
- li r11,0xc01
+ rldimi r12,r11,28,(63-28)
+ li r11,0xc00
std r10,_LINK(r1)
std r11,_TRAP(r1)
+ std r12,_CCR(r1)
std r3,ORIG_GPR3(r1)
- std r2,_CCR(r1)
- ld r2,PACATOC(r13)
- addi r9,r1,STACK_FRAME_OVERHEAD
+ addi r10,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2)
- std r11,-16(r9) /* "regshere" marker */
-
- kuap_check_amr r10, r11
-
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
-BEGIN_FW_FTR_SECTION
- /* see if there are any DTL entries to process */
- ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
- ld r11,PACA_DTL_RIDX(r13) /* get log read index */
- addi r10,r10,LPPACA_DTLIDX
- LDX_BE r10,0,r10 /* get log write index */
- cmpd r11,r10
- beq+ 33f
- bl accumulate_stolen_time
- REST_GPR(0,r1)
- REST_4GPRS(3,r1)
- REST_2GPRS(7,r1)
- addi r9,r1,STACK_FRAME_OVERHEAD
-33:
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
-
- /*
- * A syscall should always be called with interrupts enabled
- * so we just unconditionally hard-enable here. When some kind
- * of irq tracing is used, we additionally check that condition
- * is correct
- */
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
- lbz r10,PACAIRQSOFTMASK(r13)
-1: tdnei r10,IRQS_ENABLED
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- li r11,MSR_RI
- ori r11,r11,MSR_EE
- mtmsrd r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
-system_call: /* label this so stack traces look sane */
- /* We do need to set SOFTE in the stack frame or the return
- * from interrupt will be painful
- */
- li r10,IRQS_ENABLED
- std r10,SOFTE(r1)
-
- ld r11, PACA_THREAD_INFO(r13)
- ld r10,TI_FLAGS(r11)
- andi. r11,r10,_TIF_SYSCALL_DOTRACE
- bne .Lsyscall_dotrace /* does not return */
- cmpldi 0,r0,NR_syscalls
- bge- .Lsyscall_enosys
+ std r11,-16(r10) /* "regshere" marker */
-.Lsyscall:
-/*
- * Need to vector to 32 Bit or default sys_call_table here,
- * based on caller's run-mode / personality.
- */
- ld r11,SYS_CALL_TABLE@toc(2)
- andis. r10,r10,_TIF_32BIT@h
- beq 15f
- ld r11,COMPAT_SYS_CALL_TABLE@toc(2)
- clrldi r3,r3,32
- clrldi r4,r4,32
- clrldi r5,r5,32
- clrldi r6,r6,32
- clrldi r7,r7,32
- clrldi r8,r8,32
-15:
- slwi r0,r0,3
-
- barrier_nospec_asm
/*
- * Prevent the load of the handler below (based on the user-passed
- * system call number) being speculatively executed until the test
- * against NR_syscalls and branch to .Lsyscall_enosys above has
- * committed.
+ * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
+ * would clobber syscall parameters. Also we always enter with IRQs
+ * enabled and nothing pending. system_call_exception() will call
+ * trace_hardirqs_off().
*/
+ li r11,IRQS_ALL_DISABLED
+ li r12,PACA_IRQ_HARD_DIS
+ stb r11,PACAIRQSOFTMASK(r13)
+ stb r12,PACAIRQHAPPENED(r13)
- ldx r12,r11,r0 /* Fetch system call handler [ptr] */
- mtctr r12
- bctrl /* Call handler */
+ /* Calling convention has r9 = orig r0, r10 = regs */
+ mr r9,r0
+ bl system_call_exception
- /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
.Lsyscall_exit:
- std r3,RESULT(r1)
-
-#ifdef CONFIG_DEBUG_RSEQ
- /* Check whether the syscall is issued inside a restartable sequence */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl rseq_syscall
- ld r3,RESULT(r1)
-#endif
-
- ld r12, PACA_THREAD_INFO(r13)
-
- ld r8,_MSR(r1)
-
-/*
- * This is a few instructions into the actual syscall exit path (which actually
- * starts at .Lsyscall_exit) to cater to kprobe blacklisting and to reduce the
- * number of visible symbols for profiling purposes.
- *
- * We can probe from system_call until this point as MSR_RI is set. But once it
- * is cleared below, we won't be able to take a trap.
- *
- * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
- */
-system_call_exit:
- /*
- * Disable interrupts so current_thread_info()->flags can't change,
- * and so that we don't get interrupted after loading SRR0/1.
- *
- * Leave MSR_RI enabled for now, because with THREAD_INFO_IN_TASK we
- * could fault on the load of the TI_FLAGS below.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r11,MSR_RI
- mtmsrd r11,1
-#endif /* CONFIG_PPC_BOOK3E */
+ addi r4,r1,STACK_FRAME_OVERHEAD
+ bl syscall_exit_prepare
- ld r9,TI_FLAGS(r12)
- li r11,-MAX_ERRNO
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
- bne- .Lsyscall_exit_work
+ ld r2,_CCR(r1)
+ ld r4,_NIP(r1)
+ ld r5,_MSR(r1)
+ ld r6,_LINK(r1)
- andi. r0,r8,MSR_FP
- beq 2f
-#ifdef CONFIG_ALTIVEC
- andis. r0,r8,MSR_VEC@h
- bne 3f
-#endif
-2: addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_math
- ld r8,_MSR(r1)
- ld r3,RESULT(r1)
- li r11,-MAX_ERRNO
-
-3: cmpld r3,r11
- ld r5,_CCR(r1)
- bge- .Lsyscall_error
-.Lsyscall_error_cont:
- ld r7,_NIP(r1)
BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- andi. r6,r8,MSR_PR
- ld r4,_LINK(r1)
- kuap_check_amr r10, r11
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r5
+ mtlr r6
-#ifdef CONFIG_PPC_BOOK3S
- /*
- * Clear MSR_RI, MSR_EE is already and remains disabled. We could do
- * this later, but testing shows that doing it here causes less slow
- * down than doing it closer to the rfid.
- */
+ cmpdi r3,0
+ bne .Lsyscall_restore_regs
+ /* Zero volatile regs that may contain sensitive kernel data */
+ li r0,0
+ li r4,0
+ li r5,0
+ li r6,0
+ li r7,0
+ li r8,0
+ li r9,0
+ li r10,0
li r11,0
- mtmsrd r11,1
-#endif
-
- beq- 1f
- ACCOUNT_CPU_USER_EXIT(r13, r11, r12)
+ li r12,0
+ mtctr r0
+ mtspr SPRN_XER,r0
+.Lsyscall_restore_regs_cont:
BEGIN_FTR_SECTION
HMT_MEDIUM_LOW
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- std r8, PACATMSCRATCH(r13)
-#endif
-
/*
* We don't need to restore AMR on the way back to userspace for KUAP.
* The value of AMR only matters while we're in the kernel.
*/
- ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
+ mtcr r2
ld r2,GPR2(r1)
+ ld r3,GPR3(r1)
+ ld r13,GPR13(r1)
ld r1,GPR1(r1)
- mtlr r4
- mtcr r5
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r8
RFI_TO_USER
b . /* prevent speculative execution */
-1: /* exit to kernel */
- kuap_restore_amr r2
-
- ld r2,GPR2(r1)
- ld r1,GPR1(r1)
- mtlr r4
- mtcr r5
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r8
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
-.Lsyscall_error:
- oris r5,r5,0x1000 /* Set SO bit in CR */
- neg r3,r3
- std r5,_CCR(r1)
- b .Lsyscall_error_cont
-
-/* Traced system call support */
-.Lsyscall_dotrace:
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_enter
-
- /*
- * We use the return value of do_syscall_trace_enter() as the syscall
- * number. If the syscall was rejected for any reason do_syscall_trace_enter()
- * returns an invalid syscall number and the test below against
- * NR_syscalls will fail.
- */
- mr r0,r3
-
- /* Restore argument registers just clobbered and/or possibly changed. */
- ld r3,GPR3(r1)
- ld r4,GPR4(r1)
- ld r5,GPR5(r1)
- ld r6,GPR6(r1)
- ld r7,GPR7(r1)
- ld r8,GPR8(r1)
-
- /* Repopulate r9 and r10 for the syscall path */
- addi r9,r1,STACK_FRAME_OVERHEAD
- ld r10, PACA_THREAD_INFO(r13)
- ld r10,TI_FLAGS(r10)
-
- cmpldi r0,NR_syscalls
- blt+ .Lsyscall
-
- /* Return code is already in r3 thanks to do_syscall_trace_enter() */
- b .Lsyscall_exit
-
-
-.Lsyscall_enosys:
- li r3,-ENOSYS
- b .Lsyscall_exit
-
-.Lsyscall_exit_work:
- /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
- If TIF_NOERROR is set, just save r3 as it is. */
-
- andi. r0,r9,_TIF_RESTOREALL
- beq+ 0f
+.Lsyscall_restore_regs:
+ ld r3,_CTR(r1)
+ ld r4,_XER(r1)
REST_NVGPRS(r1)
- b 2f
-0: cmpld r3,r11 /* r11 is -MAX_ERRNO */
- blt+ 1f
- andi. r0,r9,_TIF_NOERROR
- bne- 1f
- ld r5,_CCR(r1)
- neg r3,r3
- oris r5,r5,0x1000 /* Set SO bit in CR */
- std r5,_CCR(r1)
-1: std r3,GPR3(r1)
-2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
- beq 4f
-
- /* Clear per-syscall TIF flags if any are set. */
-
- li r11,_TIF_PERSYSCALL_MASK
- addi r12,r12,TI_FLAGS
-3: ldarx r10,0,r12
- andc r10,r10,r11
- stdcx. r10,0,r12
- bne- 3b
- subi r12,r12,TI_FLAGS
-
-4: /* Anything else left to do? */
-BEGIN_FTR_SECTION
- lis r3,DEFAULT_PPR@highest /* Set default PPR */
- sldi r3,r3,32 /* bits 11-13 are used for ppr */
- std r3,_PPR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
- beq ret_from_except_lite
-
- /* Re-enable interrupts */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- li r10,MSR_RI
- ori r10,r10,MSR_EE
- mtmsrd r10,1
-#endif /* CONFIG_PPC_BOOK3E */
-
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_leave
- b ret_from_except
+ mtctr r3
+ mtspr SPRN_XER,r4
+ ld r0,GPR0(r1)
+ REST_8GPRS(4, r1)
+ ld r12,GPR12(r1)
+ b .Lsyscall_restore_regs_cont
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
.Ltabort_syscall:
@@ -439,64 +221,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
RFI_TO_USER
b . /* prevent speculative execution */
#endif
-_ASM_NOKPROBE_SYMBOL(system_call_common);
-_ASM_NOKPROBE_SYMBOL(system_call_exit);
-
-/* Save non-volatile GPRs, if not already saved. */
-_GLOBAL(save_nvgprs)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- beqlr-
- SAVE_NVGPRS(r1)
- clrrdi r0,r11,1
- std r0,_TRAP(r1)
- blr
-_ASM_NOKPROBE_SYMBOL(save_nvgprs);
-
-
-/*
- * The sigsuspend and rt_sigsuspend system calls can call do_signal
- * and thus put the process into the stopped state where we might
- * want to examine its user state with ptrace. Therefore we need
- * to save all the nonvolatile registers (r14 - r31) before calling
- * the C code. Similarly, fork, vfork and clone need the full
- * register state on the stack so that it can be copied to the child.
- */
-
-_GLOBAL(ppc_fork)
- bl save_nvgprs
- bl sys_fork
- b .Lsyscall_exit
-
-_GLOBAL(ppc_vfork)
- bl save_nvgprs
- bl sys_vfork
- b .Lsyscall_exit
-
-_GLOBAL(ppc_clone)
- bl save_nvgprs
- bl sys_clone
- b .Lsyscall_exit
-
-_GLOBAL(ppc_clone3)
- bl save_nvgprs
- bl sys_clone3
- b .Lsyscall_exit
-
-_GLOBAL(ppc32_swapcontext)
- bl save_nvgprs
- bl compat_sys_swapcontext
- b .Lsyscall_exit
-
-_GLOBAL(ppc64_swapcontext)
- bl save_nvgprs
- bl sys_swapcontext
- b .Lsyscall_exit
-
-_GLOBAL(ppc_switch_endian)
- bl save_nvgprs
- bl sys_switch_endian
- b .Lsyscall_exit
_GLOBAL(ret_from_fork)
bl schedule_tail
@@ -516,6 +240,19 @@ _GLOBAL(ret_from_kernel_thread)
li r3,0
b .Lsyscall_exit
+#ifdef CONFIG_PPC_BOOK3E
+/* Save non-volatile GPRs, if not already saved. */
+_GLOBAL(save_nvgprs)
+ ld r11,_TRAP(r1)
+ andi. r0,r11,1
+ beqlr-
+ SAVE_NVGPRS(r1)
+ clrrdi r0,r11,1
+ std r0,_TRAP(r1)
+ blr
+_ASM_NOKPROBE_SYMBOL(save_nvgprs);
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
#define FLUSH_COUNT_CACHE \
@@ -578,7 +315,7 @@ flush_count_cache:
* state of one is saved on its kernel stack. Then the state
* of the other is restored from its kernel stack. The memory
* management hardware is updated to the second process's state.
- * Finally, we can return to the second process, via ret_from_except.
+ * Finally, we can return to the second process, via interrupt_return.
* On entry, r3 points to the THREAD for the current task, r4
* points to the THREAD for the new task.
*
@@ -730,408 +467,146 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
addi r1,r1,SWITCH_FRAME_SIZE
blr
- .align 7
-_GLOBAL(ret_from_except)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- bne ret_from_except_lite
- REST_NVGPRS(r1)
-
-_GLOBAL(ret_from_except_lite)
- /*
- * Disable interrupts so that current_thread_info()->flags
- * can't change between when we test it and when we return
- * from the interrupt.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r10,MSR_RI
- mtmsrd r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-
- ld r9, PACA_THREAD_INFO(r13)
- ld r3,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3E
- ld r10,PACACURRENT(r13)
-#endif /* CONFIG_PPC_BOOK3E */
- ld r4,TI_FLAGS(r9)
- andi. r3,r3,MSR_PR
- beq resume_kernel
-#ifdef CONFIG_PPC_BOOK3E
- lwz r3,(THREAD+THREAD_DBCR0)(r10)
-#endif /* CONFIG_PPC_BOOK3E */
-
- /* Check current_thread_info()->flags */
- andi. r0,r4,_TIF_USER_WORK_MASK
- bne 1f
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3S
/*
- * Check to see if the dbcr0 register is set up to debug.
- * Use the internal debug mode bit to do this.
- */
- andis. r0,r3,DBCR0_IDM@h
- beq restore
- mfmsr r0
- rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
- mtmsr r0
- mtspr SPRN_DBCR0,r3
- li r10, -1
- mtspr SPRN_DBSR,r10
- b restore
-#else
+ * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
+ * touched, AMR not set, no exit work created, then this can be used.
+ */
+ .balign IFETCH_ALIGN_BYTES
+ .globl fast_interrupt_return
+fast_interrupt_return:
+_ASM_NOKPROBE_SYMBOL(fast_interrupt_return)
+ ld r4,_MSR(r1)
+ andi. r0,r4,MSR_PR
+ bne .Lfast_user_interrupt_return
+ andi. r0,r4,MSR_RI
+ li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
+ bne+ .Lfast_kernel_interrupt_return
addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_math
- b restore
-#endif
-1: andi. r0,r4,_TIF_NEED_RESCHED
- beq 2f
- bl restore_interrupts
- SCHEDULE_USER
- b ret_from_except_lite
-2:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
- bne 3f /* only restore TM if nothing else to do */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_tm_state
- b restore
-3:
-#endif
- bl save_nvgprs
- /*
- * Use a non volatile GPR to save and restore our thread_info flags
- * across the call to restore_interrupts.
- */
- mr r30,r4
- bl restore_interrupts
- mr r4,r30
+ bl unrecoverable_exception
+ b . /* should not get here */
+
+ .balign IFETCH_ALIGN_BYTES
+ .globl interrupt_return
+interrupt_return:
+_ASM_NOKPROBE_SYMBOL(interrupt_return)
+ ld r4,_MSR(r1)
+ andi. r0,r4,MSR_PR
+ beq .Lkernel_interrupt_return
addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_notify_resume
- b ret_from_except
-
-resume_kernel:
- /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
- andis. r8,r4,_TIF_EMULATE_STACK_STORE@h
- beq+ 1f
+ bl interrupt_exit_user_prepare
+ cmpdi r3,0
+ bne- .Lrestore_nvgprs
- addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+.Lfast_user_interrupt_return:
+ ld r11,_NIP(r1)
+ ld r12,_MSR(r1)
+BEGIN_FTR_SECTION
+ ld r10,_PPR(r1)
+ mtspr SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
- ld r3,GPR1(r1)
- subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
- mr r4,r1 /* src: current exception frame */
- mr r1,r3 /* Reroute the trampoline frame to r1 */
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ ldarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- /* Copy from the original to the trampoline. */
- li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */
- li r6,0 /* start offset: 0 */
- mtctr r5
-2: ldx r0,r6,r4
- stdx r0,r6,r3
- addi r6,r6,8
- bdnz 2b
-
- /* Do real store operation to complete stdu */
- ld r5,GPR1(r1)
- std r8,0(r5)
-
- /* Clear _TIF_EMULATE_STACK_STORE flag */
- lis r11,_TIF_EMULATE_STACK_STORE@h
- addi r5,r9,TI_FLAGS
-0: ldarx r4,0,r5
- andc r4,r4,r11
- stdcx. r4,0,r5
- bne- 0b
-1:
-
-#ifdef CONFIG_PREEMPTION
- /* Check if we need to preempt */
- andi. r0,r4,_TIF_NEED_RESCHED
- beq+ restore
- /* Check that preempt_count() == 0 and interrupts are enabled */
- lwz r8,TI_PREEMPT(r9)
- cmpwi cr0,r8,0
- bne restore
- ld r0,SOFTE(r1)
- andi. r0,r0,IRQS_DISABLED
- bne restore
+ ld r3,_CCR(r1)
+ ld r4,_LINK(r1)
+ ld r5,_CTR(r1)
+ ld r6,_XER(r1)
+ li r0,0
- /*
- * Here we are preempting the current task. We want to make
- * sure we are soft-disabled first and reconcile irq state.
- */
- RECONCILE_IRQ_STATE(r3,r4)
- bl preempt_schedule_irq
+ REST_4GPRS(7, r1)
+ REST_2GPRS(11, r1)
+ REST_GPR(13, r1)
- /*
- * arch_local_irq_restore() from preempt_schedule_irq above may
- * enable hard interrupt but we really should disable interrupts
- * when we return from the interrupt, and so that we don't get
- * interrupted after loading SRR0/1.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r10,MSR_RI
- mtmsrd r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-#endif /* CONFIG_PREEMPTION */
+ mtcr r3
+ mtlr r4
+ mtctr r5
+ mtspr SPRN_XER,r6
- .globl fast_exc_return_irq
-fast_exc_return_irq:
-restore:
- /*
- * This is the main kernel exit path. First we check if we
- * are about to re-enable interrupts
- */
- ld r5,SOFTE(r1)
- lbz r6,PACAIRQSOFTMASK(r13)
- andi. r5,r5,IRQS_DISABLED
- bne .Lrestore_irq_off
+ REST_4GPRS(2, r1)
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ RFI_TO_USER
+ b . /* prevent speculative execution */
- /* We are enabling, were we already enabled ? Yes, just return */
- andi. r6,r6,IRQS_DISABLED
- beq cr0,.Ldo_restore
+.Lrestore_nvgprs:
+ REST_NVGPRS(r1)
+ b .Lfast_user_interrupt_return
- /*
- * We are about to soft-enable interrupts (we are hard disabled
- * at this point). We check if there's anything that needs to
- * be replayed first.
- */
- lbz r0,PACAIRQHAPPENED(r13)
- cmpwi cr0,r0,0
- bne- .Lrestore_check_irq_replay
+ .balign IFETCH_ALIGN_BYTES
+.Lkernel_interrupt_return:
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl interrupt_exit_kernel_prepare
- /*
- * Get here when nothing happened while soft-disabled, just
- * soft-enable and move-on. We will hard-enable as a side
- * effect of rfi
- */
-.Lrestore_no_replay:
- TRACE_ENABLE_INTS
- li r0,IRQS_ENABLED
- stb r0,PACAIRQSOFTMASK(r13);
+.Lfast_kernel_interrupt_return:
+ cmpdi cr1,r3,0
+ ld r11,_NIP(r1)
+ ld r12,_MSR(r1)
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
- /*
- * Final return path. BookE is handled in a different file
- */
-.Ldo_restore:
-#ifdef CONFIG_PPC_BOOK3E
- b exception_return_book3e
-#else
- /*
- * Clear the reservation. If we know the CPU tracks the address of
- * the reservation then we can potentially save some cycles and use
- * a larx. On POWER6 and POWER7 this is significantly faster.
- */
BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
FTR_SECTION_ELSE
- ldarx r4,0,r1
+ ldarx r0,0,r1
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- /*
- * Some code path such as load_up_fpu or altivec return directly
- * here. They run entirely hard disabled and do not alter the
- * interrupt state. They also don't use lwarx/stwcx. and thus
- * are known not to leave dangling reservations.
- */
- .globl fast_exception_return
-fast_exception_return:
- ld r3,_MSR(r1)
+ ld r3,_LINK(r1)
ld r4,_CTR(r1)
- ld r0,_LINK(r1)
- mtctr r4
- mtlr r0
- ld r4,_XER(r1)
- mtspr SPRN_XER,r4
-
- kuap_check_amr r5, r6
-
- REST_8GPRS(5, r1)
-
- andi. r0,r3,MSR_RI
- beq- .Lunrecov_restore
-
- /*
- * Clear RI before restoring r13. If we are returning to
- * userspace and we take an exception after restoring r13,
- * we end up corrupting the userspace r13 value.
- */
- li r4,0
- mtmsrd r4,1
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* TM debug */
- std r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */
-#endif
- /*
- * r13 is our per cpu area, only restore it if we are returning to
- * userspace the value stored in the stack frame may belong to
- * another CPU.
- */
- andi. r0,r3,MSR_PR
- beq 1f
-BEGIN_FTR_SECTION
- /* Restore PPR */
- ld r2,_PPR(r1)
- mtspr SPRN_PPR,r2
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
- ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
- REST_GPR(13, r1)
-
- /*
- * We don't need to restore AMR on the way back to userspace for KUAP.
- * The value of AMR only matters while we're in the kernel.
- */
- mtspr SPRN_SRR1,r3
-
- ld r2,_CCR(r1)
- mtcrf 0xFF,r2
- ld r2,_NIP(r1)
- mtspr SPRN_SRR0,r2
-
- ld r0,GPR0(r1)
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r4,GPR4(r1)
- ld r1,GPR1(r1)
- RFI_TO_USER
- b . /* prevent speculative execution */
+ ld r5,_XER(r1)
+ ld r6,_CCR(r1)
+ li r0,0
-1: mtspr SPRN_SRR1,r3
+ REST_4GPRS(7, r1)
+ REST_2GPRS(11, r1)
- ld r2,_CCR(r1)
- mtcrf 0xFF,r2
- ld r2,_NIP(r1)
- mtspr SPRN_SRR0,r2
+ mtlr r3
+ mtctr r4
+ mtspr SPRN_XER,r5
/*
* Leaving a stale exception_marker on the stack can confuse
* the reliable stack unwinder later on. Clear it.
*/
- li r2,0
- std r2,STACK_FRAME_OVERHEAD-16(r1)
-
- ld r0,GPR0(r1)
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
+ std r0,STACK_FRAME_OVERHEAD-16(r1)
- kuap_restore_amr r4
+ REST_4GPRS(2, r1)
- ld r4,GPR4(r1)
- ld r1,GPR1(r1)
+ bne- cr1,1f /* emulate stack store */
+ mtcr r6
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
RFI_TO_KERNEL
b . /* prevent speculative execution */
-#endif /* CONFIG_PPC_BOOK3E */
-
- /*
- * We are returning to a context with interrupts soft disabled.
- *
- * However, we may also about to hard enable, so we need to
- * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
- * or that bit can get out of sync and bad things will happen
- */
-.Lrestore_irq_off:
- ld r3,_MSR(r1)
- lbz r7,PACAIRQHAPPENED(r13)
- andi. r0,r3,MSR_EE
- beq 1f
- rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
- stb r7,PACAIRQHAPPENED(r13)
-1:
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
- /* The interrupt should not have soft enabled. */
- lbz r7,PACAIRQSOFTMASK(r13)
-1: tdeqi r7,IRQS_ENABLED
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
- b .Ldo_restore
-
- /*
- * Something did happen, check if a re-emit is needed
- * (this also clears paca->irq_happened)
- */
-.Lrestore_check_irq_replay:
- /* XXX: We could implement a fast path here where we check
- * for irq_happened being just 0x01, in which case we can
- * clear it and return. That means that we would potentially
- * miss a decrementer having wrapped all the way around.
- *
- * Still, this might be useful for things like hash_page
- */
- bl __check_irq_replay
- cmpwi cr0,r3,0
- beq .Lrestore_no_replay
-
- /*
- * We need to re-emit an interrupt. We do so by re-using our
- * existing exception frame. We first change the trap value,
- * but we need to ensure we preserve the low nibble of it
- */
- ld r4,_TRAP(r1)
- clrldi r4,r4,60
- or r4,r4,r3
- std r4,_TRAP(r1)
-
- /*
- * PACA_IRQ_HARD_DIS won't always be set here, so set it now
- * to reconcile the IRQ state. Tracing is already accounted for.
- */
- lbz r4,PACAIRQHAPPENED(r13)
- ori r4,r4,PACA_IRQ_HARD_DIS
- stb r4,PACAIRQHAPPENED(r13)
-
- /*
- * Then find the right handler and call it. Interrupts are
- * still soft-disabled and we keep them that way.
- */
- cmpwi cr0,r3,0x500
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl do_IRQ
- b ret_from_except
-1: cmpwi cr0,r3,0xf00
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl performance_monitor_exception
- b ret_from_except
-1: cmpwi cr0,r3,0xe60
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl handle_hmi_exception
- b ret_from_except
-1: cmpwi cr0,r3,0x900
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl timer_interrupt
- b ret_from_except
-#ifdef CONFIG_PPC_DOORBELL
-1:
-#ifdef CONFIG_PPC_BOOK3E
- cmpwi cr0,r3,0x280
-#else
- cmpwi cr0,r3,0xa00
-#endif /* CONFIG_PPC_BOOK3E */
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl doorbell_exception
-#endif /* CONFIG_PPC_DOORBELL */
-1: b ret_from_except /* What else to do here ? */
-
-.Lunrecov_restore:
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b .Lunrecov_restore
-
-_ASM_NOKPROBE_SYMBOL(ret_from_except);
-_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
-_ASM_NOKPROBE_SYMBOL(resume_kernel);
-_ASM_NOKPROBE_SYMBOL(fast_exc_return_irq);
-_ASM_NOKPROBE_SYMBOL(restore);
-_ASM_NOKPROBE_SYMBOL(fast_exception_return);
+1: /*
+ * Emulate stack store with update. New r1 value was already calculated
+ * and updated in our interrupt regs by emulate_loadstore, but we can't
+ * store the previous value of r1 to the stack before re-loading our
+ * registers from it, otherwise they could be clobbered. Use
+ * PACA_EXGEN as temporary storage to hold the store data, as
+ * interrupts are disabled here so it won't be clobbered.
+ */
+ mtcr r6
+ std r9,PACA_EXGEN+0(r13)
+ addi r9,r1,INT_FRAME_SIZE /* get original r1 */
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ std r9,0(r1) /* perform store component of stdu */
+ ld r9,PACA_EXGEN+0(r13)
+ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_RTAS
/*
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index e4076e3c072d..d9ed79415100 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -24,6 +24,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
#include <asm/feature-fixups.h>
+#include <asm/context_tracking.h>
/* XXX This will ultimately add space for a special exception save
* structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
@@ -1003,38 +1004,6 @@ masked_interrupt_book3e_0x2c0:
masked_interrupt_book3e PACA_IRQ_DBELL 0
/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280
- * to indicate the kind of interrupt. MSR:EE is already off.
- * We generate a stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
- */
-_GLOBAL(__replay_interrupt)
- /* We are going to jump to the exception common code which
- * will retrieve various register values from the PACA which
- * we don't give a damn about.
- */
- mflr r10
- mfmsr r11
- mfcr r4
- mtspr SPRN_SPRG_GEN_SCRATCH,r13;
- std r1,PACA_EXGEN+EX_R1(r13);
- stw r4,PACA_EXGEN+EX_CR(r13);
- ori r11,r11,MSR_EE
- subi r1,r1,INT_FRAME_SIZE;
- cmpwi cr0,r3,0x500
- beq exc_0x500_common
- cmpwi cr0,r3,0x900
- beq exc_0x900_common
- cmpwi cr0,r3,0x280
- beq exc_0x280_common
- blr
-
-
-/*
* This is called from 0x300 and 0x400 handlers after the prologs with
* r14 and r15 containing the fault address and error code, with the
* original values stashed away in the PACA
@@ -1073,17 +1042,161 @@ alignment_more:
bl alignment_exception
b ret_from_except
-/*
- * We branch here from entry_64.S for the last stage of the exception
- * return code path. MSR:EE is expected to be off at that point
- */
-_GLOBAL(exception_return_book3e)
- b 1f
+ .align 7
+_GLOBAL(ret_from_except)
+ ld r11,_TRAP(r1)
+ andi. r0,r11,1
+ bne ret_from_except_lite
+ REST_NVGPRS(r1)
+
+_GLOBAL(ret_from_except_lite)
+ /*
+ * Disable interrupts so that current_thread_info()->flags
+ * can't change between when we test it and when we return
+ * from the interrupt.
+ */
+ wrteei 0
+
+ ld r9, PACA_THREAD_INFO(r13)
+ ld r3,_MSR(r1)
+ ld r10,PACACURRENT(r13)
+ ld r4,TI_FLAGS(r9)
+ andi. r3,r3,MSR_PR
+ beq resume_kernel
+ lwz r3,(THREAD+THREAD_DBCR0)(r10)
+
+ /* Check current_thread_info()->flags */
+ andi. r0,r4,_TIF_USER_WORK_MASK
+ bne 1f
+ /*
+ * Check to see if the dbcr0 register is set up to debug.
+ * Use the internal debug mode bit to do this.
+ */
+ andis. r0,r3,DBCR0_IDM@h
+ beq restore
+ mfmsr r0
+ rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
+ mtmsr r0
+ mtspr SPRN_DBCR0,r3
+ li r10, -1
+ mtspr SPRN_DBSR,r10
+ b restore
+1: andi. r0,r4,_TIF_NEED_RESCHED
+ beq 2f
+ bl restore_interrupts
+ SCHEDULE_USER
+ b ret_from_except_lite
+2:
+ bl save_nvgprs
+ /*
+ * Use a non volatile GPR to save and restore our thread_info flags
+ * across the call to restore_interrupts.
+ */
+ mr r30,r4
+ bl restore_interrupts
+ mr r4,r30
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_notify_resume
+ b ret_from_except
+
+resume_kernel:
+ /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+ andis. r8,r4,_TIF_EMULATE_STACK_STORE@h
+ beq+ 1f
+
+ addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+
+ ld r3,GPR1(r1)
+ subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
+ mr r4,r1 /* src: current exception frame */
+ mr r1,r3 /* Reroute the trampoline frame to r1 */
+
+ /* Copy from the original to the trampoline. */
+ li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */
+ li r6,0 /* start offset: 0 */
+ mtctr r5
+2: ldx r0,r6,r4
+ stdx r0,r6,r3
+ addi r6,r6,8
+ bdnz 2b
+
+ /* Do real store operation to complete stdu */
+ ld r5,GPR1(r1)
+ std r8,0(r5)
+
+ /* Clear _TIF_EMULATE_STACK_STORE flag */
+ lis r11,_TIF_EMULATE_STACK_STORE@h
+ addi r5,r9,TI_FLAGS
+0: ldarx r4,0,r5
+ andc r4,r4,r11
+ stdcx. r4,0,r5
+ bne- 0b
+1:
+
+#ifdef CONFIG_PREEMPT
+ /* Check if we need to preempt */
+ andi. r0,r4,_TIF_NEED_RESCHED
+ beq+ restore
+ /* Check that preempt_count() == 0 and interrupts are enabled */
+ lwz r8,TI_PREEMPT(r9)
+ cmpwi cr0,r8,0
+ bne restore
+ ld r0,SOFTE(r1)
+ andi. r0,r0,IRQS_DISABLED
+ bne restore
+
+ /*
+ * Here we are preempting the current task. We want to make
+ * sure we are soft-disabled first and reconcile irq state.
+ */
+ RECONCILE_IRQ_STATE(r3,r4)
+ bl preempt_schedule_irq
+
+ /*
+ * arch_local_irq_restore() from preempt_schedule_irq above may
+ * enable hard interrupt but we really should disable interrupts
+ * when we return from the interrupt, and so that we don't get
+ * interrupted after loading SRR0/1.
+ */
+ wrteei 0
+#endif /* CONFIG_PREEMPT */
+
+restore:
+ /*
+ * This is the main kernel exit path. First we check if we
+ * are about to re-enable interrupts
+ */
+ ld r5,SOFTE(r1)
+ lbz r6,PACAIRQSOFTMASK(r13)
+ andi. r5,r5,IRQS_DISABLED
+ bne .Lrestore_irq_off
+
+ /* We are enabling, were we already enabled ? Yes, just return */
+ andi. r6,r6,IRQS_DISABLED
+ beq cr0,fast_exception_return
+
+ /*
+ * We are about to soft-enable interrupts (we are hard disabled
+ * at this point). We check if there's anything that needs to
+ * be replayed first.
+ */
+ lbz r0,PACAIRQHAPPENED(r13)
+ cmpwi cr0,r0,0
+ bne- .Lrestore_check_irq_replay
+
+ /*
+ * Get here when nothing happened while soft-disabled, just
+ * soft-enable and move-on. We will hard-enable as a side
+ * effect of rfi
+ */
+.Lrestore_no_replay:
+ TRACE_ENABLE_INTS
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13);
/* This is the return from load_up_fpu fast path which could do with
* less GPR restores in fact, but for now we have a single return path
*/
- .globl fast_exception_return
fast_exception_return:
wrteei 0
1: mr r0,r13
@@ -1124,6 +1237,102 @@ fast_exception_return:
mfspr r13,SPRN_SPRG_GEN_SCRATCH
rfi
+ /*
+ * We are returning to a context with interrupts soft disabled.
+ *
+ * However, we may also about to hard enable, so we need to
+ * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
+ * or that bit can get out of sync and bad things will happen
+ */
+.Lrestore_irq_off:
+ ld r3,_MSR(r1)
+ lbz r7,PACAIRQHAPPENED(r13)
+ andi. r0,r3,MSR_EE
+ beq 1f
+ rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
+ stb r7,PACAIRQHAPPENED(r13)
+1:
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
+ /* The interrupt should not have soft enabled. */
+ lbz r7,PACAIRQSOFTMASK(r13)
+1: tdeqi r7,IRQS_ENABLED
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+#endif
+ b fast_exception_return
+
+ /*
+ * Something did happen, check if a re-emit is needed
+ * (this also clears paca->irq_happened)
+ */
+.Lrestore_check_irq_replay:
+ /* XXX: We could implement a fast path here where we check
+ * for irq_happened being just 0x01, in which case we can
+ * clear it and return. That means that we would potentially
+ * miss a decrementer having wrapped all the way around.
+ *
+ * Still, this might be useful for things like hash_page
+ */
+ bl __check_irq_replay
+ cmpwi cr0,r3,0
+ beq .Lrestore_no_replay
+
+ /*
+ * We need to re-emit an interrupt. We do so by re-using our
+ * existing exception frame. We first change the trap value,
+ * but we need to ensure we preserve the low nibble of it
+ */
+ ld r4,_TRAP(r1)
+ clrldi r4,r4,60
+ or r4,r4,r3
+ std r4,_TRAP(r1)
+
+ /*
+ * PACA_IRQ_HARD_DIS won't always be set here, so set it now
+ * to reconcile the IRQ state. Tracing is already accounted for.
+ */
+ lbz r4,PACAIRQHAPPENED(r13)
+ ori r4,r4,PACA_IRQ_HARD_DIS
+ stb r4,PACAIRQHAPPENED(r13)
+
+ /*
+ * Then find the right handler and call it. Interrupts are
+ * still soft-disabled and we keep them that way.
+ */
+ cmpwi cr0,r3,0x500
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl do_IRQ
+ b ret_from_except
+1: cmpwi cr0,r3,0xf00
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl performance_monitor_exception
+ b ret_from_except
+1: cmpwi cr0,r3,0xe60
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl handle_hmi_exception
+ b ret_from_except
+1: cmpwi cr0,r3,0x900
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl timer_interrupt
+ b ret_from_except
+#ifdef CONFIG_PPC_DOORBELL
+1:
+ cmpwi cr0,r3,0x280
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl doorbell_exception
+#endif /* CONFIG_PPC_DOORBELL */
+1: b ret_from_except /* What else to do here ? */
+
+_ASM_NOKPROBE_SYMBOL(ret_from_except);
+_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
+_ASM_NOKPROBE_SYMBOL(resume_kernel);
+_ASM_NOKPROBE_SYMBOL(restore);
+_ASM_NOKPROBE_SYMBOL(fast_exception_return);
+
/*
* Trampolines used when spotting a bad kernel stack pointer in
* the exception entry code.
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ffc15f4f079d..18bbce143084 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -32,16 +32,10 @@
#define EX_CCR 52
#define EX_CFAR 56
#define EX_PPR 64
-#if defined(CONFIG_RELOCATABLE)
#define EX_CTR 72
.if EX_SIZE != 10
.error "EX_SIZE is wrong"
.endif
-#else
-.if EX_SIZE != 9
- .error "EX_SIZE is wrong"
-.endif
-#endif
/*
* Following are fixed section helper macros.
@@ -50,7 +44,6 @@
* EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors
* TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these)
* TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use)
- * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated
* EXC_COMMON - After switching to virtual, relocated mode.
*/
@@ -80,13 +73,6 @@ name:
#define TRAMP_VIRT_BEGIN(name) \
FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define TRAMP_KVM_BEGIN(name) \
- TRAMP_VIRT_BEGIN(name)
-#else
-#define TRAMP_KVM_BEGIN(name)
-#endif
-
#define EXC_REAL_NONE(start, size) \
FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
@@ -119,67 +105,6 @@ name:
ori reg,reg,(ABS_ADDR(label))@l; \
addis reg,reg,(ABS_ADDR(label))@h
-/* Exception register prefixes */
-#define EXC_HV_OR_STD 2 /* depends on HVMODE */
-#define EXC_HV 1
-#define EXC_STD 0
-
-#if defined(CONFIG_RELOCATABLE)
-/*
- * If we support interrupts with relocation on AND we're a relocatable kernel,
- * we need to use CTR to get to the 2nd level handler. So, save/restore it
- * when required.
- */
-#define SAVE_CTR(reg, area) mfctr reg ; std reg,area+EX_CTR(r13)
-#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13)
-#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg
-#else
-/* ...else CTR is unused and in register. */
-#define SAVE_CTR(reg, area)
-#define GET_CTR(reg, area) mfctr reg
-#define RESTORE_CTR(reg, area)
-#endif
-
-/*
- * PPR save/restore macros used in exceptions-64s.S
- * Used for P7 or later processors
- */
-#define SAVE_PPR(area, ra) \
-BEGIN_FTR_SECTION_NESTED(940) \
- ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \
- std ra,_PPR(r1); \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
-
-#define RESTORE_PPR_PACA(area, ra) \
-BEGIN_FTR_SECTION_NESTED(941) \
- ld ra,area+EX_PPR(r13); \
- mtspr SPRN_PPR,ra; \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
-
-/*
- * Get an SPR into a register if the CPU has the given feature
- */
-#define OPT_GET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mfspr ra,spr; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Set an SPR from a register if the CPU has the given feature
- */
-#define OPT_SET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mtspr spr,ra; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Save a register to the PACA if the CPU has the given feature
- */
-#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- std ra,offset(r13); \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
/*
* Branch to label using its 0xC000 address. This results in instruction
* address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
@@ -193,89 +118,199 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtctr reg; \
bctr
-.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
- TRAMP_KVM_BEGIN(\name\()_kvm)
- KVM_HANDLER \vec, \hsrr, \area, \skip
+/*
+ * Interrupt code generation macros
+ */
+#define IVEC .L_IVEC_\name\() /* Interrupt vector address */
+#define IHSRR .L_IHSRR_\name\() /* Sets SRR or HSRR registers */
+#define IHSRR_IF_HVMODE .L_IHSRR_IF_HVMODE_\name\() /* HSRR if HV else SRR */
+#define IAREA .L_IAREA_\name\() /* PACA save area */
+#define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */
+#define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */
+#define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */
+#define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */
+#define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */
+#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
+#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
+#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
+#define IKVM_SKIP .L_IKVM_SKIP_\name\() /* Generate KVM skip handler */
+#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */
+#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name
+#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
+#define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */
+#define __ISTACK(name) .L_ISTACK_ ## name
+#define IRECONCILE .L_IRECONCILE_\name\() /* Do RECONCILE_IRQ_STATE */
+#define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */
+
+#define INT_DEFINE_BEGIN(n) \
+.macro int_define_ ## n name
+
+#define INT_DEFINE_END(n) \
+.endm ; \
+int_define_ ## n n ; \
+do_define_int n
+
+.macro do_define_int name
+ .ifndef IVEC
+ .error "IVEC not defined"
+ .endif
+ .ifndef IHSRR
+ IHSRR=0
+ .endif
+ .ifndef IHSRR_IF_HVMODE
+ IHSRR_IF_HVMODE=0
+ .endif
+ .ifndef IAREA
+ IAREA=PACA_EXGEN
+ .endif
+ .ifndef IVIRT
+ IVIRT=1
+ .endif
+ .ifndef IISIDE
+ IISIDE=0
+ .endif
+ .ifndef IDAR
+ IDAR=0
+ .endif
+ .ifndef IDSISR
+ IDSISR=0
+ .endif
+ .ifndef ISET_RI
+ ISET_RI=1
+ .endif
+ .ifndef IBRANCH_TO_COMMON
+ IBRANCH_TO_COMMON=1
+ .endif
+ .ifndef IREALMODE_COMMON
+ IREALMODE_COMMON=0
+ .else
+ .if ! IBRANCH_TO_COMMON
+ .error "IREALMODE_COMMON=1 but IBRANCH_TO_COMMON=0"
+ .endif
+ .endif
+ .ifndef IMASK
+ IMASK=0
+ .endif
+ .ifndef IKVM_SKIP
+ IKVM_SKIP=0
+ .endif
+ .ifndef IKVM_REAL
+ IKVM_REAL=0
+ .endif
+ .ifndef IKVM_VIRT
+ IKVM_VIRT=0
+ .endif
+ .ifndef ISTACK
+ ISTACK=1
+ .endif
+ .ifndef IRECONCILE
+ IRECONCILE=1
+ .endif
+ .ifndef IKUAP
+ IKUAP=1
+ .endif
.endm
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
- * If hv is possible, interrupts come into to the hv version
- * of the kvmppc_interrupt code, which then jumps to the PR handler,
- * kvmppc_interrupt_pr, if the guest is a PR guest.
+ * All interrupts which set HSRR registers, as well as SRESET and MCE and
+ * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
+ * so they all generally need to test whether they were taken in guest context.
+ *
+ * Note: SRESET and MCE may also be sent to the guest by the hypervisor, and be
+ * taken with MSR[HV]=0.
+ *
+ * Interrupts which set SRR registers (with the above exceptions) do not
+ * elevate to MSR[HV]=1 mode, though most can be taken when running with
+ * MSR[HV]=1 (e.g., bare metal kernel and userspace). So these interrupts do
+ * not need to test whether a guest is running because they get delivered to
+ * the guest directly, including nested HV KVM guests.
+ *
+ * The exception is PR KVM, where the guest runs with MSR[PR]=1 and the host
+ * runs with MSR[HV]=0, so the host takes all interrupts on behalf of the
+ * guest. PR KVM runs with LPCR[AIL]=0 which causes interrupts to always be
+ * delivered to the real-mode entry point, therefore such interrupts only test
+ * KVM in their real mode handlers, and only when PR KVM is possible.
+ *
+ * Interrupts that are taken in MSR[HV]=0 and escalate to MSR[HV]=1 are always
+ * delivered in real-mode when the MMU is in hash mode because the MMU
+ * registers are not set appropriately to translate host addresses. In nested
+ * radix mode these can be delivered in virt-mode as the host translations are
+ * used implicitly (see: effective LPID, effective PID).
+ */
+
+/*
+ * If an interrupt is taken while a guest is running, it is immediately routed
+ * to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first
+ * to kvmppc_interrupt_hv, which handles the PR guest case.
*/
#define kvmppc_interrupt kvmppc_interrupt_hv
#else
#define kvmppc_interrupt kvmppc_interrupt_pr
#endif
-.macro KVMTEST name, hsrr, n
+.macro KVMTEST name
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,0
bne \name\()_kvm
.endm
-.macro KVM_HANDLER vec, hsrr, area, skip
- .if \skip
+.macro GEN_KVM name
+ .balign IFETCH_ALIGN_BYTES
+\name\()_kvm:
+
+ .if IKVM_SKIP
cmpwi r10,KVM_GUEST_MODE_SKIP
beq 89f
.else
-BEGIN_FTR_SECTION_NESTED(947)
- ld r10,\area+EX_CFAR(r13)
+BEGIN_FTR_SECTION
+ ld r10,IAREA+EX_CFAR(r13)
std r10,HSTATE_CFAR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.endif
-BEGIN_FTR_SECTION_NESTED(948)
- ld r10,\area+EX_PPR(r13)
+ ld r10,PACA_EXGEN+EX_CTR(r13)
+ mtctr r10
+BEGIN_FTR_SECTION
+ ld r10,IAREA+EX_PPR(r13)
std r10,HSTATE_PPR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
- ld r10,\area+EX_R10(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ld r11,IAREA+EX_R11(r13)
+ ld r12,IAREA+EX_R12(r13)
std r12,HSTATE_SCRATCH0(r13)
sldi r12,r9,32
+ ld r9,IAREA+EX_R9(r13)
+ ld r10,IAREA+EX_R10(r13)
/* HSRR variants have the 0x2 bit added to their trap number */
- .if \hsrr == EXC_HV_OR_STD
+ .if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
- ori r12,r12,(\vec + 0x2)
+ ori r12,r12,(IVEC + 0x2)
FTR_SECTION_ELSE
- ori r12,r12,(\vec)
+ ori r12,r12,(IVEC)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- ori r12,r12,(\vec + 0x2)
+ .elseif IHSRR
+ ori r12,r12,(IVEC+ 0x2)
.else
- ori r12,r12,(\vec)
+ ori r12,r12,(IVEC)
.endif
-
-#ifdef CONFIG_RELOCATABLE
- /*
- * KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
- * outside the head section. CONFIG_RELOCATABLE KVM expects CTR
- * to be saved in HSTATE_SCRATCH1.
- */
- mfctr r9
- std r9,HSTATE_SCRATCH1(r13)
- __LOAD_FAR_HANDLER(r9, kvmppc_interrupt)
- mtctr r9
- ld r9,\area+EX_R9(r13)
- bctr
-#else
- ld r9,\area+EX_R9(r13)
b kvmppc_interrupt
-#endif
-
- .if \skip
+ .if IKVM_SKIP
89: mtocrf 0x80,r9
- ld r9,\area+EX_R9(r13)
- ld r10,\area+EX_R10(r13)
- .if \hsrr == EXC_HV_OR_STD
+ ld r10,PACA_EXGEN+EX_CTR(r13)
+ mtctr r10
+ ld r9,IAREA+EX_R9(r13)
+ ld r10,IAREA+EX_R10(r13)
+ ld r11,IAREA+EX_R11(r13)
+ ld r12,IAREA+EX_R12(r13)
+ .if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
b kvmppc_skip_Hinterrupt
FTR_SECTION_ELSE
b kvmppc_skip_interrupt
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
+ .elseif IHSRR
b kvmppc_skip_Hinterrupt
.else
b kvmppc_skip_interrupt
@@ -284,107 +319,12 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endm
#else
-.macro KVMTEST name, hsrr, n
+.macro KVMTEST name
.endm
-.macro KVM_HANDLER name, vec, hsrr, area, skip
+.macro GEN_KVM name
.endm
#endif
-.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri
- ld r10,PACAKMSR(r13) /* get MSR value for kernel */
- .if ! \set_ri
- xori r10,r10,MSR_RI /* Clear MSR_RI */
- .endif
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- mtspr SPRN_HSRR1,r10
- FTR_SECTION_ELSE
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- mtspr SPRN_SRR1,r10
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- mtspr SPRN_HSRR1,r10
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- mtspr SPRN_SRR1,r10
- .endif
- LOAD_HANDLER(r10, \label\())
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mtspr SPRN_HSRR0,r10
- HRFI_TO_KERNEL
- FTR_SECTION_ELSE
- mtspr SPRN_SRR0,r10
- RFI_TO_KERNEL
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mtspr SPRN_HSRR0,r10
- HRFI_TO_KERNEL
- .else
- mtspr SPRN_SRR0,r10
- RFI_TO_KERNEL
- .endif
- b . /* prevent speculative execution */
-.endm
-
-/* INT_SAVE_SRR_AND_JUMP works for real or virt, this is faster but virt only */
-.macro INT_VIRT_SAVE_SRR_AND_JUMP label, hsrr
-#ifdef CONFIG_RELOCATABLE
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- FTR_SECTION_ELSE
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- .endif
- LOAD_HANDLER(r12, \label\())
- mtctr r12
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- FTR_SECTION_ELSE
- mfspr r12,SPRN_SRR1 /* and HSRR1 */
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- .else
- mfspr r12,SPRN_SRR1 /* and HSRR1 */
- .endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- bctr
-#else
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- FTR_SECTION_ELSE
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- .endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- b \label
-#endif
-.endm
-
/*
* This is the BOOK3S interrupt entry code macro.
*
@@ -405,14 +345,41 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
* - Fall through and continue executing in real, unrelocated mode.
* This is done if early=2.
*/
-.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, ri=1, dar=0, dsisr=0, bitmask=0, kvm=0
+
+.macro GEN_BRANCH_TO_COMMON name, virt
+ .if IREALMODE_COMMON
+ LOAD_HANDLER(r10, \name\()_common)
+ mtctr r10
+ bctr
+ .else
+ .if \virt
+#ifndef CONFIG_RELOCATABLE
+ b \name\()_common_virt
+#else
+ LOAD_HANDLER(r10, \name\()_common_virt)
+ mtctr r10
+ bctr
+#endif
+ .else
+ LOAD_HANDLER(r10, \name\()_common_real)
+ mtctr r10
+ bctr
+ .endif
+ .endif
+.endm
+
+.macro GEN_INT_ENTRY name, virt, ool=0
SET_SCRATCH0(r13) /* save r13 */
GET_PACA(r13)
- std r9,\area\()+EX_R9(r13) /* save r9 */
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
+ std r9,IAREA+EX_R9(r13) /* save r9 */
+BEGIN_FTR_SECTION
+ mfspr r9,SPRN_PPR
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
- std r10,\area\()+EX_R10(r13) /* save r10 - r12 */
- OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+ std r10,IAREA+EX_R10(r13) /* save r10 - r12 */
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_CFAR
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.if \ool
.if !\virt
b tramp_real_\name
@@ -425,47 +392,18 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
.endif
- OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
- OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ std r9,IAREA+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+ std r10,IAREA+EX_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
INTERRUPT_TO_KERNEL
- SAVE_CTR(r10, \area\())
+ mfctr r10
+ std r10,IAREA+EX_CTR(r13)
mfcr r9
- .if \kvm
- KVMTEST \name \hsrr \vec
- .endif
- .if \bitmask
- lbz r10,PACAIRQSOFTMASK(r13)
- andi. r10,r10,\bitmask
- /* Associate vector numbers with bits in paca->irq_happened */
- .if \vec == 0x500 || \vec == 0xea0
- li r10,PACA_IRQ_EE
- .elseif \vec == 0x900
- li r10,PACA_IRQ_DEC
- .elseif \vec == 0xa00 || \vec == 0xe80
- li r10,PACA_IRQ_DBELL
- .elseif \vec == 0xe60
- li r10,PACA_IRQ_HMI
- .elseif \vec == 0xf00
- li r10,PACA_IRQ_PMI
- .else
- .abort "Bad maskable vector"
- .endif
-
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- bne masked_Hinterrupt
- FTR_SECTION_ELSE
- bne masked_interrupt
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- bne masked_Hinterrupt
- .else
- bne masked_interrupt
- .endif
- .endif
-
- std r11,\area\()+EX_R11(r13)
- std r12,\area\()+EX_R12(r13)
+ std r11,IAREA+EX_R11(r13)
+ std r12,IAREA+EX_R12(r13)
/*
* DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
@@ -473,49 +411,134 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
* not recoverable if they are live.
*/
GET_SCRATCH0(r10)
- std r10,\area\()+EX_R13(r13)
- .if \dar
- .if \hsrr
+ std r10,IAREA+EX_R13(r13)
+ .if IDAR && !IISIDE
+ .if IHSRR
mfspr r10,SPRN_HDAR
.else
mfspr r10,SPRN_DAR
.endif
- std r10,\area\()+EX_DAR(r13)
+ std r10,IAREA+EX_DAR(r13)
.endif
- .if \dsisr
- .if \hsrr
+ .if IDSISR && !IISIDE
+ .if IHSRR
mfspr r10,SPRN_HDSISR
.else
mfspr r10,SPRN_DSISR
.endif
- stw r10,\area\()+EX_DSISR(r13)
+ stw r10,IAREA+EX_DSISR(r13)
.endif
- .if \early == 2
- /* nothing more */
- .elseif \early
- mfctr r10 /* save ctr, even for !RELOCATABLE */
- BRANCH_TO_C000(r11, \name\()_early_common)
- .elseif !\virt
- INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
.else
- INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ .endif
+
+ .if IBRANCH_TO_COMMON
+ GEN_BRANCH_TO_COMMON \name \virt
.endif
+
.if \ool
.popsection
.endif
.endm
/*
- * On entry r13 points to the paca, r9-r13 are saved in the paca,
- * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
- * SRR1, and relocation is on.
+ * __GEN_COMMON_ENTRY is required to receive the branch from interrupt
+ * entry, except in the case of the real-mode handlers which require
+ * __GEN_REALMODE_COMMON_ENTRY.
*
- * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
- * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
+ * This switches to virtual mode and sets MSR[RI].
+ */
+.macro __GEN_COMMON_ENTRY name
+DEFINE_FIXED_SYMBOL(\name\()_common_real)
+\name\()_common_real:
+ .if IKVM_REAL
+ KVMTEST \name
+ .endif
+
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ /* MSR[RI] is clear iff using SRR regs */
+ .if IHSRR == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ xori r10,r10,MSR_RI
+ END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
+ .elseif ! IHSRR
+ xori r10,r10,MSR_RI
+ .endif
+ mtmsrd r10
+
+ .if IVIRT
+ .if IKVM_VIRT
+ b 1f /* skip the virt test coming from real */
+ .endif
+
+ .balign IFETCH_ALIGN_BYTES
+DEFINE_FIXED_SYMBOL(\name\()_common_virt)
+\name\()_common_virt:
+ .if IKVM_VIRT
+ KVMTEST \name
+1:
+ .endif
+ .endif /* IVIRT */
+.endm
+
+/*
+ * Don't switch to virt mode. Used for early MCE and HMI handlers that
+ * want to run in real mode.
*/
-.macro INT_COMMON vec, area, stack, kuap, reconcile, dar, dsisr
- .if \stack
+.macro __GEN_REALMODE_COMMON_ENTRY name
+DEFINE_FIXED_SYMBOL(\name\()_common_real)
+\name\()_common_real:
+ .if IKVM_REAL
+ KVMTEST \name
+ .endif
+.endm
+
+.macro __GEN_COMMON_BODY name
+ .if IMASK
+ lbz r10,PACAIRQSOFTMASK(r13)
+ andi. r10,r10,IMASK
+ /* Associate vector numbers with bits in paca->irq_happened */
+ .if IVEC == 0x500 || IVEC == 0xea0
+ li r10,PACA_IRQ_EE
+ .elseif IVEC == 0x900
+ li r10,PACA_IRQ_DEC
+ .elseif IVEC == 0xa00 || IVEC == 0xe80
+ li r10,PACA_IRQ_DBELL
+ .elseif IVEC == 0xe60
+ li r10,PACA_IRQ_HMI
+ .elseif IVEC == 0xf00
+ li r10,PACA_IRQ_PMI
+ .else
+ .abort "Bad maskable vector"
+ .endif
+
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ bne masked_Hinterrupt
+ FTR_SECTION_ELSE
+ bne masked_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ bne masked_Hinterrupt
+ .else
+ bne masked_interrupt
+ .endif
+ .endif
+
+ .if ISTACK
andi. r10,r12,MSR_PR /* See if coming from user */
mr r10,r1 /* Save r1 */
subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
@@ -532,54 +555,67 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
std r0,GPR0(r1) /* save r0 in stackframe */
std r10,GPR1(r1) /* save r1 in stackframe */
- .if \stack
- .if \kuap
+ .if ISET_RI
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set MSR_RI */
+ .endif
+
+ .if ISTACK
+ .if IKUAP
kuap_save_amr_and_lock r9, r10, cr1, cr0
.endif
beq 101f /* if from kernel mode */
ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
- SAVE_PPR(\area, r9)
+BEGIN_FTR_SECTION
+ ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */
+ std r9,_PPR(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
101:
.else
- .if \kuap
+ .if IKUAP
kuap_save_amr_and_lock r9, r10, cr1
.endif
.endif
/* Save original regs values from save area to stack frame. */
- ld r9,\area+EX_R9(r13) /* move r9, r10 to stackframe */
- ld r10,\area+EX_R10(r13)
+ ld r9,IAREA+EX_R9(r13) /* move r9, r10 to stackframe */
+ ld r10,IAREA+EX_R10(r13)
std r9,GPR9(r1)
std r10,GPR10(r1)
- ld r9,\area+EX_R11(r13) /* move r11 - r13 to stackframe */
- ld r10,\area+EX_R12(r13)
- ld r11,\area+EX_R13(r13)
+ ld r9,IAREA+EX_R11(r13) /* move r11 - r13 to stackframe */
+ ld r10,IAREA+EX_R12(r13)
+ ld r11,IAREA+EX_R13(r13)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
- .if \dar
- .if \dar == 2
+
+ SAVE_NVGPRS(r1)
+
+ .if IDAR
+ .if IISIDE
ld r10,_NIP(r1)
.else
- ld r10,\area+EX_DAR(r13)
+ ld r10,IAREA+EX_DAR(r13)
.endif
std r10,_DAR(r1)
.endif
- .if \dsisr
- .if \dsisr == 2
+
+ .if IDSISR
+ .if IISIDE
ld r10,_MSR(r1)
lis r11,DSISR_SRR1_MATCH_64S@h
and r10,r10,r11
.else
- lwz r10,\area+EX_DSISR(r13)
+ lwz r10,IAREA+EX_DSISR(r13)
.endif
std r10,_DSISR(r1)
.endif
-BEGIN_FTR_SECTION_NESTED(66)
- ld r10,\area+EX_CFAR(r13)
+
+BEGIN_FTR_SECTION
+ ld r10,IAREA+EX_CFAR(r13)
std r10,ORIG_GPR3(r1)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
- GET_CTR(r10, \area)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ ld r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
std r2,GPR2(r1) /* save r2 in stackframe */
SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
@@ -591,32 +627,42 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
mfspr r11,SPRN_XER /* save XER in stackframe */
std r10,SOFTE(r1)
std r11,_XER(r1)
- li r9,(\vec)+1
+ li r9,IVEC
std r9,_TRAP(r1) /* set trap number */
li r10,0
ld r11,exception_marker@toc(r2)
std r10,RESULT(r1) /* clear regs->result */
std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */
- .if \stack
+ .if ISTACK
ACCOUNT_STOLEN_TIME
.endif
- .if \reconcile
+ .if IRECONCILE
RECONCILE_IRQ_STATE(r10, r11)
.endif
.endm
/*
+ * On entry r13 points to the paca, r9-r13 are saved in the paca,
+ * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
+ * SRR1, and relocation is on.
+ *
+ * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
+ * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
+ */
+.macro GEN_COMMON name
+ __GEN_COMMON_ENTRY \name
+ __GEN_COMMON_BODY \name
+.endm
+
+/*
* Restore all registers including H/SRR0/1 saved in a stack frame of a
* standard exception.
*/
-.macro EXCEPTION_RESTORE_REGS hsrr
+.macro EXCEPTION_RESTORE_REGS hsrr=0
/* Move original SRR0 and SRR1 into the respective regs */
ld r9,_MSR(r1)
- .if \hsrr == EXC_HV_OR_STD
- .error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS"
- .endif
.if \hsrr
mtspr SPRN_HSRR1,r9
.else
@@ -670,28 +716,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
#define FINISH_NAP
#endif
-#define EXC_COMMON(name, realvec, hdlr) \
- EXC_COMMON_BEGIN(name); \
- INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
- bl save_nvgprs; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b ret_from_except
-
-/*
- * Like EXC_COMMON, but for exceptions that can occur in the idle task and
- * therefore need the special idle handling (finish nap and runlatch)
- */
-#define EXC_COMMON_ASYNC(name, realvec, hdlr) \
- EXC_COMMON_BEGIN(name); \
- INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
- FINISH_NAP; \
- RUNLATCH_ON; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b ret_from_except_lite
-
-
/*
* There are a few constraints to be concerned with.
* - Real mode exceptions code/data must be located at their physical location.
@@ -778,6 +802,53 @@ __start_interrupts:
EXC_VIRT_NONE(0x4000, 0x100)
+/**
+ * Interrupt 0x100 - System Reset Interrupt (SRESET aka NMI).
+ * This is a non-maskable, asynchronous interrupt always taken in real-mode.
+ * It is caused by:
+ * - Wake from power-saving state, on powernv.
+ * - An NMI from another CPU, triggered by firmware or hypercall.
+ * - As crash/debug signal injected from BMC, firmware or hypervisor.
+ *
+ * Handling:
+ * Power-save wakeup is the only performance critical path, so this is
+ * determined quickly as possible first. In this case volatile registers
+ * can be discarded and SPRs like CFAR don't need to be read.
+ *
+ * If not a powersave wakeup, then it's run as a regular interrupt, however
+ * it uses its own stack and PACA save area to preserve the regular kernel
+ * environment for debugging.
+ *
+ * This interrupt is not maskable, so triggering it when MSR[RI] is clear,
+ * or SCRATCH0 is in use, etc. may cause a crash. It's also not entirely
+ * correct to switch to virtual mode to run the regular interrupt handler
+ * because it might be interrupted when the MMU is in a bad state (e.g., SLB
+ * is clear).
+ *
+ * FWNMI:
+ * PAPR specifies a "fwnmi" facility which sends the sreset to a different
+ * entry point with a different register set up. Some hypervisors will
+ * send the sreset to 0x100 in the guest if it is not fwnmi capable.
+ *
+ * KVM:
+ * Unlike most SRR interrupts, this may be taken by the host while executing
+ * in a guest, so a KVM test is required. KVM will pull the CPU out of guest
+ * mode and then raise the sreset.
+ */
+INT_DEFINE_BEGIN(system_reset)
+ IVEC=0x100
+ IAREA=PACA_EXNMI
+ IVIRT=0 /* no virt entry point */
+ /*
+ * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
+ * being used, so a nested NMI exception would corrupt it.
+ */
+ ISET_RI=0
+ ISTACK=0
+ IRECONCILE=0
+ IKVM_REAL=1
+INT_DEFINE_END(system_reset)
+
EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
#ifdef CONFIG_PPC_P7_NAP
/*
@@ -815,11 +886,8 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
- INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1
+ GEN_INT_ENTRY system_reset, virt=0
/*
- * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
- * being used, so a nested NMI exception would corrupt it.
- *
* In theory, we should not enable relocation here if it was disabled
* in SRR1, because the MMU may not be configured to support it (e.g.,
* SLB may have been cleared). In practice, there should only be a few
@@ -828,7 +896,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
*/
EXC_REAL_END(system_reset, 0x100, 0x100)
EXC_VIRT_NONE(0x4100, 0x100)
-INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0
#ifdef CONFIG_PPC_P7_NAP
TRAMP_REAL_BEGIN(system_reset_idle_wake)
@@ -843,12 +910,14 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
* Vectors for the FWNMI option. Share common code.
*/
TRAMP_REAL_BEGIN(system_reset_fwnmi)
- /* See comment at system_reset exception, don't turn on RI */
- INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0
+ /* XXX: fwnmi guest could run a nested/PR guest, so why no test? */
+ __IKVM_REAL(system_reset)=0
+ GEN_INT_ENTRY system_reset, virt=0
#endif /* CONFIG_PPC_PSERIES */
EXC_COMMON_BEGIN(system_reset_common)
+ __GEN_COMMON_ENTRY system_reset
/*
* Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
* to recover, but nested NMI will notice in_nmi and not recover
@@ -864,21 +933,21 @@ EXC_COMMON_BEGIN(system_reset_common)
mr r10,r1
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
- INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0
- bl save_nvgprs
+ __GEN_COMMON_BODY system_reset
/*
- * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
+ * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
* the right thing. We do not want to reconcile because that goes
* through irq tracing which we don't want in NMI.
*
- * Save PACAIRQHAPPENED because some code will do a hard disable
- * (e.g., xmon). So we want to restore this back to where it was
- * when we return. DAR is unused in the stack, so save it there.
+ * Save PACAIRQHAPPENED to _DAR (otherwise unused), and set HARD_DIS
+ * as we are running with MSR[EE]=0.
*/
li r10,IRQS_ALL_DISABLED
stb r10,PACAIRQSOFTMASK(r13)
lbz r10,PACAIRQHAPPENED(r13)
std r10,_DAR(r1)
+ ori r10,r10,PACA_IRQ_HARD_DIS
+ stb r10,PACAIRQHAPPENED(r13)
addi r3,r1,STACK_FRAME_OVERHEAD
bl system_reset_exception
@@ -902,28 +971,95 @@ EXC_COMMON_BEGIN(system_reset_common)
ld r10,SOFTE(r1)
stb r10,PACAIRQSOFTMASK(r13)
- EXCEPTION_RESTORE_REGS EXC_STD
+ EXCEPTION_RESTORE_REGS
RFI_TO_USER_OR_KERNEL
+ GEN_KVM system_reset
-EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
- INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+
+/**
+ * Interrupt 0x200 - Machine Check Interrupt (MCE).
+ * This is a non-maskable interrupt always taken in real-mode. It can be
+ * synchronous or asynchronous, caused by hardware or software, and it may be
+ * taken in a power-saving state.
+ *
+ * Handling:
+ * Similarly to system reset, this uses its own stack and PACA save area,
+ * the difference is re-entrancy is allowed on the machine check stack.
+ *
+ * machine_check_early is run in real mode, and carefully decodes the
+ * machine check and tries to handle it (e.g., flush the SLB if there was an
+ * error detected there), determines if it was recoverable and logs the
+ * event.
+ *
+ * This early code does not "reconcile" irq soft-mask state like SRESET or
+ * regular interrupts do, so irqs_disabled() among other things may not work
+ * properly (irq disable/enable already doesn't work because irq tracing can
+ * not work in real mode).
+ *
+ * Then, depending on the execution context when the interrupt is taken, there
+ * are 3 main actions:
+ * - Executing in kernel mode. The event is queued with irq_work, which means
+ * it is handled when it is next safe to do so (i.e., the kernel has enabled
+ * interrupts), which could be immediately when the interrupt returns. This
+ * avoids nasty issues like switching to virtual mode when the MMU is in a
+ * bad state, or when executing OPAL code. (SRESET is exposed to such issues,
+ * but it has different priorities). Check to see if the CPU was in power
+ * save, and return via the wake up code if it was.
+ *
+ * - Executing in user mode. machine_check_exception is run like a normal
+ * interrupt handler, which processes the data generated by the early handler.
+ *
+ * - Executing in guest mode. The interrupt is run with its KVM test, and
+ * branches to KVM to deal with. KVM may queue the event for the host
+ * to report later.
+ *
+ * This interrupt is not maskable, so if it triggers when MSR[RI] is clear,
+ * or SCRATCH0 is in use, it may cause a crash.
+ *
+ * KVM:
+ * See SRESET.
+ */
+INT_DEFINE_BEGIN(machine_check_early)
+ IVEC=0x200
+ IAREA=PACA_EXMC
+ IVIRT=0 /* no virt entry point */
+ IREALMODE_COMMON=1
/*
* MSR_RI is not enabled, because PACA_EXMC is being used, so a
* nested machine check corrupts it. machine_check_common enables
* MSR_RI.
*/
+ ISET_RI=0
+ ISTACK=0
+ IDAR=1
+ IDSISR=1
+ IRECONCILE=0
+ IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+INT_DEFINE_END(machine_check_early)
+
+INT_DEFINE_BEGIN(machine_check)
+ IVEC=0x200
+ IAREA=PACA_EXMC
+ IVIRT=0 /* no virt entry point */
+ ISET_RI=0
+ IDAR=1
+ IDSISR=1
+ IKVM_SKIP=1
+ IKVM_REAL=1
+INT_DEFINE_END(machine_check)
+
+EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
+ GEN_INT_ENTRY machine_check_early, virt=0
EXC_REAL_END(machine_check, 0x200, 0x100)
EXC_VIRT_NONE(0x4200, 0x100)
#ifdef CONFIG_PPC_PSERIES
TRAMP_REAL_BEGIN(machine_check_fwnmi)
/* See comment at machine_check exception, don't turn on RI */
- INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+ GEN_INT_ENTRY machine_check_early, virt=0
#endif
-INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
-
#define MACHINE_CHECK_HANDLER_WINDUP \
/* Clear MSR_RI before setting SRR0 and SRR1. */\
li r9,0; \
@@ -932,12 +1068,10 @@ INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
lhz r12,PACA_IN_MCE(r13); \
subi r12,r12,1; \
sth r12,PACA_IN_MCE(r13); \
- EXCEPTION_RESTORE_REGS EXC_STD
+ EXCEPTION_RESTORE_REGS
EXC_COMMON_BEGIN(machine_check_early_common)
- mtctr r10 /* Restore ctr */
- mfspr r11,SPRN_SRR0
- mfspr r12,SPRN_SRR1
+ __GEN_REALMODE_COMMON_ENTRY machine_check_early
/*
* Switch to mc_emergency stack and handle re-entrancy (we limit
@@ -974,8 +1108,7 @@ EXC_COMMON_BEGIN(machine_check_early_common)
bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- /* We don't touch AMR here, we never go to virtual mode */
- INT_COMMON 0x200, PACA_EXMC, 0, 0, 0, 1, 1
+ __GEN_COMMON_BODY machine_check_early
BEGIN_FTR_SECTION
bl enable_machine_check
@@ -983,7 +1116,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
li r10,MSR_RI
mtmsrd r10,1
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_early
std r3,RESULT(r1) /* Save result */
@@ -1063,23 +1195,25 @@ BEGIN_FTR_SECTION
mtspr SPRN_CFAR,r10
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
MACHINE_CHECK_HANDLER_WINDUP
- /* See comment at machine_check exception, don't turn on RI */
- INT_HANDLER machine_check, 0x200, area=PACA_EXMC, ri=0, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY machine_check, virt=0
EXC_COMMON_BEGIN(machine_check_common)
/*
* Machine check is different because we use a different
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
- INT_COMMON 0x200, PACA_EXMC, 1, 1, 1, 1, 1
+ GEN_COMMON machine_check
+
FINISH_NAP
/* Enable MSR_RI when finished with PACA_EXMC */
li r10,MSR_RI
mtmsrd r10,1
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
- b ret_from_except
+ b interrupt_return
+
+ GEN_KVM machine_check
+
#ifdef CONFIG_PPC_P7_NAP
/*
@@ -1144,21 +1278,48 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
b .
+/**
+ * Interrupt 0x300 - Data Storage Interrupt (DSI).
+ * This is a synchronous interrupt generated due to a data access exception,
+ * e.g., a load orstore which does not have a valid page table entry with
+ * permissions. DAWR matches also fault here, as do RC updates, and minor misc
+ * errors e.g., copy/paste, AMO, certain invalid CI accesses, etc.
+ *
+ * Handling:
+ * - Hash MMU
+ * Go to do_hash_page first to see if the HPT can be filled from an entry in
+ * the Linux page table. Hash faults can hit in kernel mode in a fairly
+ * arbitrary state (e.g., interrupts disabled, locks held) when accessing
+ * "non-bolted" regions, e.g., vmalloc space. However these should always be
+ * backed by Linux page tables.
+ *
+ * If none is found, do a Linux page fault. Linux page faults can happen in
+ * kernel mode due to user copy operations of course.
+ *
+ * - Radix MMU
+ * The hardware loads from the Linux page table directly, so a fault goes
+ * immediately to Linux page fault.
+ *
+ * Conditions like DAWR match are handled on the way in to Linux page fault.
+ */
+INT_DEFINE_BEGIN(data_access)
+ IVEC=0x300
+ IDAR=1
+ IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_SKIP=1
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(data_access)
+
EXC_REAL_BEGIN(data_access, 0x300, 0x80)
- INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY data_access, virt=0
EXC_REAL_END(data_access, 0x300, 0x80)
EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
- INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1
+ GEN_INT_ENTRY data_access, virt=1
EXC_VIRT_END(data_access, 0x4300, 0x80)
-INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON_BEGIN(data_access_common)
- /*
- * Here r13 points to the paca, r9 contains the saved CR,
- * SRR0 and SRR1 are saved in r11 and r12,
- * r9 - r13 are saved in paca->exgen.
- * EX_DAR and EX_DSISR have saved DAR/DSISR
- */
- INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1
+ GEN_COMMON data_access
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
@@ -1169,16 +1330,46 @@ MMU_FTR_SECTION_ELSE
b handle_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+ GEN_KVM data_access
+
+
+/**
+ * Interrupt 0x380 - Data Segment Interrupt (DSLB).
+ * This is a synchronous interrupt in response to an MMU fault missing SLB
+ * entry for HPT, or an address outside RPT translation range.
+ *
+ * Handling:
+ * - HPT:
+ * This refills the SLB, or reports an access fault similarly to a bad page
+ * fault. When coming from user-mode, the SLB handler may access any kernel
+ * data, though it may itself take a DSLB. When coming from kernel mode,
+ * recursive faults must be avoided so access is restricted to the kernel
+ * image text/data, kernel stack, and any data allocated below
+ * ppc64_bolted_size (first segment). The kernel handler must avoid stomping
+ * on user-handler data structures.
+ *
+ * A dedicated save area EXSLB is used (XXX: but it actually need not be
+ * these days, we could use EXGEN).
+ */
+INT_DEFINE_BEGIN(data_access_slb)
+ IVEC=0x380
+ IAREA=PACA_EXSLB
+ IRECONCILE=0
+ IDAR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_SKIP=1
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(data_access_slb)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
- INT_HANDLER data_access_slb, 0x380, ool=1, area=PACA_EXSLB, dar=1, kvm=1
+ GEN_INT_ENTRY data_access_slb, virt=0
EXC_REAL_END(data_access_slb, 0x380, 0x80)
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
- INT_HANDLER data_access_slb, 0x380, virt=1, area=PACA_EXSLB, dar=1
+ GEN_INT_ENTRY data_access_slb, virt=1
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
-INT_KVM_HANDLER data_access_slb, 0x380, EXC_STD, PACA_EXSLB, 1
EXC_COMMON_BEGIN(data_access_slb_common)
- INT_COMMON 0x380, PACA_EXSLB, 1, 1, 0, 1, 0
+ GEN_COMMON data_access_slb
ld r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
@@ -1186,31 +1377,50 @@ BEGIN_MMU_FTR_SECTION
bl do_slb_fault
cmpdi r3,0
bne- 1f
- b fast_exception_return
+ b fast_interrupt_return
1: /* Error case */
MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
ld r4,_DAR(r1)
ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
- b ret_from_except
+ b interrupt_return
+
+ GEN_KVM data_access_slb
+/**
+ * Interrupt 0x400 - Instruction Storage Interrupt (ISI).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSI, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR), and status in SRR1 (rather than DSISR).
+ */
+INT_DEFINE_BEGIN(instruction_access)
+ IVEC=0x400
+ IISIDE=1
+ IDAR=1
+ IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_access)
+
EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
- INT_HANDLER instruction_access, 0x400, kvm=1
+ GEN_INT_ENTRY instruction_access, virt=0
EXC_REAL_END(instruction_access, 0x400, 0x80)
EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
- INT_HANDLER instruction_access, 0x400, virt=1
+ GEN_INT_ENTRY instruction_access, virt=1
EXC_VIRT_END(instruction_access, 0x4400, 0x80)
-INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(instruction_access_common)
- INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2
+ GEN_COMMON instruction_access
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
@@ -1221,16 +1431,37 @@ MMU_FTR_SECTION_ELSE
b handle_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+ GEN_KVM instruction_access
+
+
+/**
+ * Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSLB, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR).
+ */
+INT_DEFINE_BEGIN(instruction_access_slb)
+ IVEC=0x480
+ IAREA=PACA_EXSLB
+ IRECONCILE=0
+ IISIDE=1
+ IDAR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_access_slb)
EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
- INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1
+ GEN_INT_ENTRY instruction_access_slb, virt=0
EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
- INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB
+ GEN_INT_ENTRY instruction_access_slb, virt=1
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
-INT_KVM_HANDLER instruction_access_slb, 0x480, EXC_STD, PACA_EXSLB, 0
EXC_COMMON_BEGIN(instruction_access_slb_common)
- INT_COMMON 0x480, PACA_EXSLB, 1, 1, 0, 2, 0
+ GEN_COMMON instruction_access_slb
ld r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
@@ -1238,54 +1469,125 @@ BEGIN_MMU_FTR_SECTION
bl do_slb_fault
cmpdi r3,0
bne- 1f
- b fast_exception_return
+ b fast_interrupt_return
1: /* Error case */
MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
ld r4,_DAR(r1)
ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
- b ret_from_except
+ b interrupt_return
+
+ GEN_KVM instruction_access_slb
+
+
+/**
+ * Interrupt 0x500 - External Interrupt.
+ * This is an asynchronous maskable interrupt in response to an "external
+ * exception" from the interrupt controller or hypervisor (e.g., device
+ * interrupt). It is maskable in hardware by clearing MSR[EE], and
+ * soft-maskable with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * When running in HV mode, Linux sets up the LPCR[LPES] bit such that
+ * interrupts are delivered with HSRR registers, guests use SRRs, which
+ * reqiures IHSRR_IF_HVMODE.
+ *
+ * On bare metal POWER9 and later, Linux sets the LPCR[HVICE] bit such that
+ * external interrupts are delivered as Hypervisor Virtualization Interrupts
+ * rather than External Interrupts.
+ *
+ * Handling:
+ * This calls into Linux IRQ handler. NVGPRs are not saved to reduce overhead,
+ * because registers at the time of the interrupt are not so important as it is
+ * asynchronous.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ */
+INT_DEFINE_BEGIN(hardware_interrupt)
+ IVEC=0x500
+ IHSRR_IF_HVMODE=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(hardware_interrupt)
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
- INT_HANDLER hardware_interrupt, 0x500, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY hardware_interrupt, virt=0
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
- INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY hardware_interrupt, virt=1
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
-INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
+EXC_COMMON_BEGIN(hardware_interrupt_common)
+ GEN_COMMON hardware_interrupt
+ FINISH_NAP
+ RUNLATCH_ON
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_IRQ
+ b interrupt_return
+ GEN_KVM hardware_interrupt
+
+
+/**
+ * Interrupt 0x600 - Alignment Interrupt
+ * This is a synchronous interrupt in response to data alignment fault.
+ */
+INT_DEFINE_BEGIN(alignment)
+ IVEC=0x600
+ IDAR=1
+ IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(alignment)
EXC_REAL_BEGIN(alignment, 0x600, 0x100)
- INT_HANDLER alignment, 0x600, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY alignment, virt=0
EXC_REAL_END(alignment, 0x600, 0x100)
EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
- INT_HANDLER alignment, 0x600, virt=1, dar=1, dsisr=1
+ GEN_INT_ENTRY alignment, virt=1
EXC_VIRT_END(alignment, 0x4600, 0x100)
-INT_KVM_HANDLER alignment, 0x600, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(alignment_common)
- INT_COMMON 0x600, PACA_EXGEN, 1, 1, 1, 1, 1
- bl save_nvgprs
+ GEN_COMMON alignment
addi r3,r1,STACK_FRAME_OVERHEAD
bl alignment_exception
- b ret_from_except
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ b interrupt_return
+
+ GEN_KVM alignment
+
+/**
+ * Interrupt 0x700 - Program Interrupt (program check).
+ * This is a synchronous interrupt in response to various instruction faults:
+ * traps, privilege errors, TM errors, floating point exceptions.
+ *
+ * Handling:
+ * This interrupt may use the "emergency stack" in some cases when being taken
+ * from kernel context, which complicates handling.
+ */
+INT_DEFINE_BEGIN(program_check)
+ IVEC=0x700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(program_check)
EXC_REAL_BEGIN(program_check, 0x700, 0x100)
- INT_HANDLER program_check, 0x700, kvm=1
+ GEN_INT_ENTRY program_check, virt=0
EXC_REAL_END(program_check, 0x700, 0x100)
EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
- INT_HANDLER program_check, 0x700, virt=1
+ GEN_INT_ENTRY program_check, virt=1
EXC_VIRT_END(program_check, 0x4700, 0x100)
-INT_KVM_HANDLER program_check, 0x700, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(program_check_common)
+ __GEN_COMMON_ENTRY program_check
+
/*
* It's possible to receive a TM Bad Thing type program check with
* userspace register values (in particular r1), but with SRR1 reporting
@@ -1310,28 +1612,47 @@ EXC_COMMON_BEGIN(program_check_common)
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- INT_COMMON 0x700, PACA_EXGEN, 0, 1, 1, 0, 0
+ __ISTACK(program_check)=0
+ __GEN_COMMON_BODY program_check
b 3f
2:
- INT_COMMON 0x700, PACA_EXGEN, 1, 1, 1, 0, 0
+ __ISTACK(program_check)=1
+ __GEN_COMMON_BODY program_check
3:
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
- b ret_from_except
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ b interrupt_return
+ GEN_KVM program_check
+
+
+/*
+ * Interrupt 0x800 - Floating-Point Unavailable Interrupt.
+ * This is a synchronous interrupt in response to executing an fp instruction
+ * with MSR[FP]=0.
+ *
+ * Handling:
+ * This will load FP registers and enable the FP bit if coming from userspace,
+ * otherwise report a bad kernel use of FP.
+ */
+INT_DEFINE_BEGIN(fp_unavailable)
+ IVEC=0x800
+ IRECONCILE=0
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(fp_unavailable)
EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
- INT_HANDLER fp_unavailable, 0x800, kvm=1
+ GEN_INT_ENTRY fp_unavailable, virt=0
EXC_REAL_END(fp_unavailable, 0x800, 0x100)
EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
- INT_HANDLER fp_unavailable, 0x800, virt=1
+ GEN_INT_ENTRY fp_unavailable, virt=1
EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
-INT_KVM_HANDLER fp_unavailable, 0x800, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(fp_unavailable_common)
- INT_COMMON 0x800, PACA_EXGEN, 1, 1, 0, 0, 0
+ GEN_COMMON fp_unavailable
bne 1f /* if from user, just load it up */
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_fp_unavailable_exception
@@ -1348,64 +1669,168 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
bl load_up_fpu
- b fast_exception_return
+ b fast_interrupt_return
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl fp_unavailable_tm
- b ret_from_except
+ b interrupt_return
#endif
+ GEN_KVM fp_unavailable
+
+
+/**
+ * Interrupt 0x900 - Decrementer Interrupt.
+ * This is an asynchronous interrupt in response to a decrementer exception
+ * (e.g., DEC has wrapped below zero). It is maskable in hardware by clearing
+ * MSR[EE], and soft-maskable with IRQS_DISABLED mask (i.e.,
+ * local_irq_disable()).
+ *
+ * Handling:
+ * This calls into Linux timer handler. NVGPRs are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and bump the decrementer to a high value, leaving MSR[EE] enabled
+ * in the interrupted context.
+ * If PPC_WATCHDOG is configured, the soft masked handler will actually set
+ * things back up to run soft_nmi_interrupt as a regular interrupt handler
+ * on the emergency stack.
+ */
+INT_DEFINE_BEGIN(decrementer)
+ IVEC=0x900
+ IMASK=IRQS_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(decrementer)
EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
- INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY decrementer, virt=0
EXC_REAL_END(decrementer, 0x900, 0x80)
EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
- INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED
+ GEN_INT_ENTRY decrementer, virt=1
EXC_VIRT_END(decrementer, 0x4900, 0x80)
-INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
+EXC_COMMON_BEGIN(decrementer_common)
+ GEN_COMMON decrementer
+ FINISH_NAP
+ RUNLATCH_ON
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl timer_interrupt
+ b interrupt_return
+
+ GEN_KVM decrementer
+/**
+ * Interrupt 0x980 - Hypervisor Decrementer Interrupt.
+ * This is an asynchronous interrupt, similar to 0x900 but for the HDEC
+ * register.
+ *
+ * Handling:
+ * Linux does not use this outside KVM where it's used to keep a host timer
+ * while the guest is given control of DEC. It should normally be caught by
+ * the KVM test and routed there.
+ */
+INT_DEFINE_BEGIN(hdecrementer)
+ IVEC=0x980
+ IHSRR=1
+ ISTACK=0
+ IRECONCILE=0
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(hdecrementer)
+
EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
- INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY hdecrementer, virt=0
EXC_REAL_END(hdecrementer, 0x980, 0x80)
EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
- INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY hdecrementer, virt=1
EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
-INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
+EXC_COMMON_BEGIN(hdecrementer_common)
+ __GEN_COMMON_ENTRY hdecrementer
+ /*
+ * Hypervisor decrementer interrupts not caught by the KVM test
+ * shouldn't occur but are sometimes left pending on exit from a KVM
+ * guest. We don't need to do anything to clear them, as they are
+ * edge-triggered.
+ *
+ * Be careful to avoid touching the kernel stack.
+ */
+ ld r10,PACA_EXGEN+EX_CTR(r13)
+ mtctr r10
+ mtcrf 0x80,r9
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ HRFI_TO_KERNEL
+
+ GEN_KVM hdecrementer
+/**
+ * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsndp doorbell.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * Guests may use this for IPIs between threads in a core if the
+ * hypervisor supports it. NVGPRS are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, leaving MSR[EE] enabled in the interrupted context because the
+ * doorbells are edge triggered.
+ */
+INT_DEFINE_BEGIN(doorbell_super)
+ IVEC=0xa00
+ IMASK=IRQS_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(doorbell_super)
+
EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
- INT_HANDLER doorbell_super, 0xa00, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY doorbell_super, virt=0
EXC_REAL_END(doorbell_super, 0xa00, 0x100)
EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
- INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED
+ GEN_INT_ENTRY doorbell_super, virt=1
EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
-INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(doorbell_super_common)
+ GEN_COMMON doorbell_super
+ FINISH_NAP
+ RUNLATCH_ON
+ addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
-EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
+ bl doorbell_exception
#else
-EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception)
+ bl unknown_exception
#endif
+ b interrupt_return
+
+ GEN_KVM doorbell_super
EXC_REAL_NONE(0xb00, 0x100)
EXC_VIRT_NONE(0x4b00, 0x100)
-/*
- * system call / hypercall (0xc00, 0x4c00)
- *
- * The system call exception is invoked with "sc 0" and does not alter HV bit.
- *
- * The hypercall is invoked with "sc 1" and sets HV=1.
+/**
+ * Interrupt 0xc00 - System Call Interrupt (syscall, hcall).
+ * This is a synchronous interrupt invoked with the "sc" instruction. The
+ * system call is invoked with "sc 0" and does not alter the HV bit, so it
+ * is directed to the currently running OS. The hypercall is invoked with
+ * "sc 1" and it sets HV=1, so it elevates to hypervisor.
*
* In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to
* 0x4c00 virtual mode.
*
+ * Handling:
+ * If the KVM test fires then it was due to a hypercall and is accordingly
+ * routed to KVM. Otherwise this executes a normal Linux system call.
+ *
* Call convention:
*
* syscall and hypercalls register conventions are documented in
@@ -1417,6 +1842,12 @@ EXC_VIRT_NONE(0x4b00, 0x100)
* without saving, though xer is not a good idea to use, as hardware may
* interpret some bits so it may be costly to change them.
*/
+INT_DEFINE_BEGIN(system_call)
+ IVEC=0xc00
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(system_call)
+
.macro SYSTEM_CALL virt
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
@@ -1431,7 +1862,7 @@ EXC_VIRT_NONE(0x4b00, 0x100)
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
- KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to system_call_kvm */
+ KVMTEST system_call /* uses r10, branch to system_call_kvm */
mfctr r9
#else
mr r9,r13
@@ -1490,6 +1921,7 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
EXC_VIRT_END(system_call, 0x4c00, 0x100)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+TRAMP_REAL_BEGIN(system_call_kvm)
/*
* This is a hcall, so register convention is as above, with these
* differences:
@@ -1497,43 +1929,95 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
* ctr = orig r13
* orig r10 saved in PACA
*/
-TRAMP_KVM_BEGIN(system_call_kvm)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
* guest state (it is the guest's PPR value).
*/
- OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_PPR
+ std r10,HSTATE_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
- OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR)
mfctr r10
SET_SCRATCH0(r10)
- std r9,PACA_EXGEN+EX_R9(r13)
- mfcr r9
- KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0
+ mfcr r10
+ std r12,HSTATE_SCRATCH0(r13)
+ sldi r12,r10,32
+ ori r12,r12,0xc00
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
+ * outside the head section.
+ */
+ __LOAD_FAR_HANDLER(r10, kvmppc_interrupt)
+ mtctr r10
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ bctr
+#else
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ b kvmppc_interrupt
+#endif
#endif
+/**
+ * Interrupt 0xd00 - Trace Interrupt.
+ * This is a synchronous interrupt in response to instruction step or
+ * breakpoint faults.
+ */
+INT_DEFINE_BEGIN(single_step)
+ IVEC=0xd00
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(single_step)
+
EXC_REAL_BEGIN(single_step, 0xd00, 0x100)
- INT_HANDLER single_step, 0xd00, kvm=1
+ GEN_INT_ENTRY single_step, virt=0
EXC_REAL_END(single_step, 0xd00, 0x100)
EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
- INT_HANDLER single_step, 0xd00, virt=1
+ GEN_INT_ENTRY single_step, virt=1
EXC_VIRT_END(single_step, 0x4d00, 0x100)
-INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON(single_step_common, 0xd00, single_step_exception)
+EXC_COMMON_BEGIN(single_step_common)
+ GEN_COMMON single_step
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl single_step_exception
+ b interrupt_return
+ GEN_KVM single_step
+
+
+/**
+ * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest data access.
+ *
+ * Handling:
+ * This should always get routed to KVM. In radix MMU mode, this is caused
+ * by a guest nested radix access that can't be performed due to the
+ * partition scope page table. In hash mode, this can be caused by guests
+ * running with translation disabled (virtual real mode) or with VPM enabled.
+ * KVM will update the page table structures or disallow the access.
+ */
+INT_DEFINE_BEGIN(h_data_storage)
+ IVEC=0xe00
+ IHSRR=1
+ IDAR=1
+ IDSISR=1
+ IKVM_SKIP=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_data_storage)
EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20)
- INT_HANDLER h_data_storage, 0xe00, ool=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY h_data_storage, virt=0, ool=1
EXC_REAL_END(h_data_storage, 0xe00, 0x20)
EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
- INT_HANDLER h_data_storage, 0xe00, ool=1, virt=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY h_data_storage, virt=1, ool=1
EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
-INT_KVM_HANDLER h_data_storage, 0xe00, EXC_HV, PACA_EXGEN, 1
EXC_COMMON_BEGIN(h_data_storage_common)
- INT_COMMON 0xe00, PACA_EXGEN, 1, 1, 1, 1, 1
- bl save_nvgprs
+ GEN_COMMON h_data_storage
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
ld r4,_DAR(r1)
@@ -1542,56 +2026,125 @@ BEGIN_MMU_FTR_SECTION
MMU_FTR_SECTION_ELSE
bl unknown_exception
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
- b ret_from_except
+ b interrupt_return
+ GEN_KVM h_data_storage
+
+
+/**
+ * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest instruction fetch, similar to HDSI.
+ */
+INT_DEFINE_BEGIN(h_instr_storage)
+ IVEC=0xe20
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_instr_storage)
EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20)
- INT_HANDLER h_instr_storage, 0xe20, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_instr_storage, virt=0, ool=1
EXC_REAL_END(h_instr_storage, 0xe20, 0x20)
EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
- INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_instr_storage, virt=1, ool=1
EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
-INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
+EXC_COMMON_BEGIN(h_instr_storage_common)
+ GEN_COMMON h_instr_storage
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b interrupt_return
+ GEN_KVM h_instr_storage
+
+
+/**
+ * Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
+ */
+INT_DEFINE_BEGIN(emulation_assist)
+ IVEC=0xe40
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(emulation_assist)
EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20)
- INT_HANDLER emulation_assist, 0xe40, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY emulation_assist, virt=0, ool=1
EXC_REAL_END(emulation_assist, 0xe40, 0x20)
EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
- INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY emulation_assist, virt=1, ool=1
EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
-INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
+EXC_COMMON_BEGIN(emulation_assist_common)
+ GEN_COMMON emulation_assist
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl emulation_assist_interrupt
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ b interrupt_return
+ GEN_KVM emulation_assist
-/*
- * hmi_exception trampoline is a special case. It jumps to hmi_exception_early
- * first, and then eventaully from there to the trampoline to get into virtual
- * mode.
+
+/**
+ * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
+ * This is an asynchronous interrupt caused by a Hypervisor Maintenance
+ * Exception. It is always taken in real mode but uses HSRR registers
+ * unlike SRESET and MCE.
+ *
+ * It is maskable in hardware by clearing MSR[EE], and partially soft-maskable
+ * with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * This is a special case, this is handled similarly to machine checks, with an
+ * initial real mode handler that is not soft-masked, which attempts to fix the
+ * problem. Then a regular handler which is soft-maskable and reports the
+ * problem.
+ *
+ * The emergency stack is used for the early real mode handler.
+ *
+ * XXX: unclear why MCE and HMI schemes could not be made common, e.g.,
+ * either use soft-masking for the MCE, or use irq_work for the HMI.
+ *
+ * KVM:
+ * Unlike MCE, this calls into KVM without calling the real mode handler
+ * first.
*/
+INT_DEFINE_BEGIN(hmi_exception_early)
+ IVEC=0xe60
+ IHSRR=1
+ IREALMODE_COMMON=1
+ ISTACK=0
+ IRECONCILE=0
+ IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+ IKVM_REAL=1
+INT_DEFINE_END(hmi_exception_early)
+
+INT_DEFINE_BEGIN(hmi_exception)
+ IVEC=0xe60
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+INT_DEFINE_END(hmi_exception)
+
EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20)
- INT_HANDLER hmi_exception, 0xe60, ool=1, early=1, hsrr=EXC_HV, ri=0, kvm=1
+ GEN_INT_ENTRY hmi_exception_early, virt=0, ool=1
EXC_REAL_END(hmi_exception, 0xe60, 0x20)
EXC_VIRT_NONE(0x4e60, 0x20)
-INT_KVM_HANDLER hmi_exception, 0xe60, EXC_HV, PACA_EXGEN, 0
+
EXC_COMMON_BEGIN(hmi_exception_early_common)
- mtctr r10 /* Restore ctr */
- mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
+ __GEN_REALMODE_COMMON_ENTRY hmi_exception_early
+
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- /* We don't touch AMR here, we never go to virtual mode */
- INT_COMMON 0xe60, PACA_EXGEN, 0, 0, 0, 0, 0
+ __GEN_COMMON_BODY hmi_exception_early
addi r3,r1,STACK_FRAME_OVERHEAD
bl hmi_exception_realmode
cmpdi cr0,r3,0
bne 1f
- EXCEPTION_RESTORE_REGS EXC_HV
+ EXCEPTION_RESTORE_REGS hsrr=1
HRFI_TO_USER_OR_KERNEL
1:
@@ -1599,41 +2152,84 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
* Go to virtual mode and pull the HMI event information from
* firmware.
*/
- EXCEPTION_RESTORE_REGS EXC_HV
- INT_HANDLER hmi_exception, 0xe60, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ EXCEPTION_RESTORE_REGS hsrr=1
+ GEN_INT_ENTRY hmi_exception, virt=0
+
+ GEN_KVM hmi_exception_early
EXC_COMMON_BEGIN(hmi_exception_common)
- INT_COMMON 0xe60, PACA_EXGEN, 1, 1, 1, 0, 0
+ GEN_COMMON hmi_exception
FINISH_NAP
RUNLATCH_ON
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
- b ret_from_except
+ b interrupt_return
+
+ GEN_KVM hmi_exception
+/**
+ * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsnd doorbell.
+ * Similar to the 0xa00 doorbell but for host rather than guest.
+ */
+INT_DEFINE_BEGIN(h_doorbell)
+ IVEC=0xe80
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_doorbell)
+
EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
- INT_HANDLER h_doorbell, 0xe80, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_doorbell, virt=0, ool=1
EXC_REAL_END(h_doorbell, 0xe80, 0x20)
EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
- INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_doorbell, virt=1, ool=1
EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
-INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(h_doorbell_common)
+ GEN_COMMON h_doorbell
+ FINISH_NAP
+ RUNLATCH_ON
+ addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
-EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
+ bl doorbell_exception
#else
-EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
+ bl unknown_exception
#endif
+ b interrupt_return
+ GEN_KVM h_doorbell
+
+
+/**
+ * Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
+ * This is an asynchronous interrupt in response to an "external exception".
+ * Similar to 0x500 but for host only.
+ */
+INT_DEFINE_BEGIN(h_virt_irq)
+ IVEC=0xea0
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_virt_irq)
EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
- INT_HANDLER h_virt_irq, 0xea0, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_virt_irq, virt=0, ool=1
EXC_REAL_END(h_virt_irq, 0xea0, 0x20)
EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
- INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_virt_irq, virt=1, ool=1
EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
-INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
+EXC_COMMON_BEGIN(h_virt_irq_common)
+ GEN_COMMON h_virt_irq
+ FINISH_NAP
+ RUNLATCH_ON
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_IRQ
+ b interrupt_return
+
+ GEN_KVM h_virt_irq
EXC_REAL_NONE(0xec0, 0x20)
@@ -1642,25 +2238,69 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
+/*
+ * Interrupt 0xf00 - Performance Monitor Interrupt (PMI, PMU).
+ * This is an asynchronous interrupt in response to a PMU exception.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_PMI_DISABLED mask (NOTE: NOT local_irq_disable()).
+ *
+ * Handling:
+ * This calls into the perf subsystem.
+ *
+ * Like the watchdog soft-nmi, it appears an NMI interrupt to Linux, in that it
+ * runs under local_irq_disable. However it may be soft-masked in
+ * powerpc-specific code.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ */
+INT_DEFINE_BEGIN(performance_monitor)
+ IVEC=0xf00
+ IMASK=IRQS_PMI_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(performance_monitor)
+
EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
- INT_HANDLER performance_monitor, 0xf00, ool=1, bitmask=IRQS_PMI_DISABLED, kvm=1
+ GEN_INT_ENTRY performance_monitor, virt=0, ool=1
EXC_REAL_END(performance_monitor, 0xf00, 0x20)
EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
- INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED
+ GEN_INT_ENTRY performance_monitor, virt=1, ool=1
EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
-INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
+EXC_COMMON_BEGIN(performance_monitor_common)
+ GEN_COMMON performance_monitor
+ FINISH_NAP
+ RUNLATCH_ON
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl performance_monitor_exception
+ b interrupt_return
+ GEN_KVM performance_monitor
+
+
+/**
+ * Interrupt 0xf20 - Vector Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a vector (or altivec) instruction with MSR[VEC]=0.
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(altivec_unavailable)
+ IVEC=0xf20
+ IRECONCILE=0
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(altivec_unavailable)
EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
- INT_HANDLER altivec_unavailable, 0xf20, ool=1, kvm=1
+ GEN_INT_ENTRY altivec_unavailable, virt=0, ool=1
EXC_REAL_END(altivec_unavailable, 0xf20, 0x20)
EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20)
- INT_HANDLER altivec_unavailable, 0xf20, ool=1, virt=1
+ GEN_INT_ENTRY altivec_unavailable, virt=1, ool=1
EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20)
-INT_KVM_HANDLER altivec_unavailable, 0xf20, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(altivec_unavailable_common)
- INT_COMMON 0xf20, PACA_EXGEN, 1, 1, 0, 0, 0
+ GEN_COMMON altivec_unavailable
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
beq 1f
@@ -1674,34 +2314,47 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
#endif
bl load_up_altivec
- b fast_exception_return
+ b fast_interrupt_return
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_tm
- b ret_from_except
+ b interrupt_return
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_exception
- b ret_from_except
+ b interrupt_return
+ GEN_KVM altivec_unavailable
+
+
+/**
+ * Interrupt 0xf40 - VSX Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a VSX instruction with MSR[VSX]=0.
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(vsx_unavailable)
+ IVEC=0xf40
+ IRECONCILE=0
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(vsx_unavailable)
EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
- INT_HANDLER vsx_unavailable, 0xf40, ool=1, kvm=1
+ GEN_INT_ENTRY vsx_unavailable, virt=0, ool=1
EXC_REAL_END(vsx_unavailable, 0xf40, 0x20)
EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20)
- INT_HANDLER vsx_unavailable, 0xf40, ool=1, virt=1
+ GEN_INT_ENTRY vsx_unavailable, virt=1, ool=1
EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20)
-INT_KVM_HANDLER vsx_unavailable, 0xf40, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(vsx_unavailable_common)
- INT_COMMON 0xf40, PACA_EXGEN, 1, 1, 0, 0, 0
+ GEN_COMMON vsx_unavailable
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
beq 1f
@@ -1717,40 +2370,78 @@ BEGIN_FTR_SECTION
b load_up_vsx
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_tm
- b ret_from_except
+ b interrupt_return
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_exception
- b ret_from_except
+ b interrupt_return
+
+ GEN_KVM vsx_unavailable
+
+/**
+ * Interrupt 0xf60 - Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can be
+ * resolved by the OS (e.g., FSCR, MSR).
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(facility_unavailable)
+ IVEC=0xf60
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(facility_unavailable)
EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20)
- INT_HANDLER facility_unavailable, 0xf60, ool=1, kvm=1
+ GEN_INT_ENTRY facility_unavailable, virt=0, ool=1
EXC_REAL_END(facility_unavailable, 0xf60, 0x20)
EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
- INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1
+ GEN_INT_ENTRY facility_unavailable, virt=1, ool=1
EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
-INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception)
+EXC_COMMON_BEGIN(facility_unavailable_common)
+ GEN_COMMON facility_unavailable
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl facility_unavailable_exception
+ b interrupt_return
+
+ GEN_KVM facility_unavailable
+/**
+ * Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can only
+ * be resolved in HV mode (e.g., HFSCR).
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(h_facility_unavailable)
+ IVEC=0xf80
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_facility_unavailable)
+
EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20)
- INT_HANDLER h_facility_unavailable, 0xf80, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_facility_unavailable, virt=0, ool=1
EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20)
EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
- INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_facility_unavailable, virt=1, ool=1
EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
-INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception)
+EXC_COMMON_BEGIN(h_facility_unavailable_common)
+ GEN_COMMON h_facility_unavailable
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl facility_unavailable_exception
+ b interrupt_return
+
+ GEN_KVM h_facility_unavailable
EXC_REAL_NONE(0xfa0, 0x20)
@@ -1766,56 +2457,95 @@ EXC_REAL_NONE(0x1100, 0x100)
EXC_VIRT_NONE(0x5100, 0x100)
#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_system_error)
+ IVEC=0x1200
+ IHSRR=1
+ IKVM_SKIP=1
+ IKVM_REAL=1
+INT_DEFINE_END(cbe_system_error)
+
EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
- INT_HANDLER cbe_system_error, 0x1200, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY cbe_system_error, virt=0
EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
-INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception)
+EXC_COMMON_BEGIN(cbe_system_error_common)
+ GEN_COMMON cbe_system_error
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl cbe_system_error_exception
+ b interrupt_return
+
+ GEN_KVM cbe_system_error
+
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
#endif
+INT_DEFINE_BEGIN(instruction_breakpoint)
+ IVEC=0x1300
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_SKIP=1
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_breakpoint)
+
EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100)
- INT_HANDLER instruction_breakpoint, 0x1300, kvm=1
+ GEN_INT_ENTRY instruction_breakpoint, virt=0
EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100)
EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
- INT_HANDLER instruction_breakpoint, 0x1300, virt=1
+ GEN_INT_ENTRY instruction_breakpoint, virt=1
EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
-INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1
-EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception)
+EXC_COMMON_BEGIN(instruction_breakpoint_common)
+ GEN_COMMON instruction_breakpoint
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl instruction_breakpoint_exception
+ b interrupt_return
+
+ GEN_KVM instruction_breakpoint
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
-EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
- INT_HANDLER denorm_exception_hv, 0x1500, early=2, hsrr=EXC_HV
+/**
+ * Interrupt 0x1500 - Soft Patch Interrupt
+ *
+ * Handling:
+ * This is an implementation specific interrupt which can be used for a
+ * range of exceptions.
+ *
+ * This interrupt handler is unique in that it runs the denormal assist
+ * code even for guests (and even in guest context) without going to KVM,
+ * for speed. POWER9 does not raise denorm exceptions, so this special case
+ * could be phased out in future to reduce special cases.
+ */
+INT_DEFINE_BEGIN(denorm_exception)
+ IVEC=0x1500
+ IHSRR=1
+ IBRANCH_COMMON=0
+ IKVM_REAL=1
+INT_DEFINE_END(denorm_exception)
+
+EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100)
+ GEN_INT_ENTRY denorm_exception, virt=0
#ifdef CONFIG_PPC_DENORMALISATION
- mfspr r10,SPRN_HSRR1
- andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
#endif
- KVMTEST denorm_exception_hv, EXC_HV 0x1500
- INT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV, 1
-EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
-
+ GEN_BRANCH_TO_COMMON denorm_exception, virt=0
+EXC_REAL_END(denorm_exception, 0x1500, 0x100)
#ifdef CONFIG_PPC_DENORMALISATION
EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
- INT_HANDLER denorm_exception, 0x1500, 0, 2, 1, EXC_HV, PACA_EXGEN, 1, 0, 0, 0, 0
- mfspr r10,SPRN_HSRR1
- andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ GEN_INT_ENTRY denorm_exception, virt=1
+ andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
- INT_VIRT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV
+ GEN_BRANCH_TO_COMMON denorm_exception, virt=1
EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
#else
EXC_VIRT_NONE(0x5500, 0x100)
#endif
-INT_KVM_HANDLER denorm_exception_hv, 0x1500, EXC_HV, PACA_EXGEN, 0
-
#ifdef CONFIG_PPC_DENORMALISATION
TRAMP_REAL_BEGIN(denorm_assist)
BEGIN_FTR_SECTION
@@ -1872,7 +2602,10 @@ denorm_done:
mtspr SPRN_HSRR0,r11
mtcrf 0x80,r9
ld r9,PACA_EXGEN+EX_R9(r13)
- RESTORE_PPR_PACA(PACA_EXGEN, r10)
+BEGIN_FTR_SECTION
+ ld r10,PACA_EXGEN+EX_PPR(r13)
+ mtspr SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
BEGIN_FTR_SECTION
ld r10,PACA_EXGEN+EX_CFAR(r13)
mtspr SPRN_CFAR,r10
@@ -1885,43 +2618,88 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
b .
#endif
-EXC_COMMON(denorm_common, 0x1500, unknown_exception)
+EXC_COMMON_BEGIN(denorm_exception_common)
+ GEN_COMMON denorm_exception
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b interrupt_return
+
+ GEN_KVM denorm_exception
#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_maintenance)
+ IVEC=0x1600
+ IHSRR=1
+ IKVM_SKIP=1
+ IKVM_REAL=1
+INT_DEFINE_END(cbe_maintenance)
+
EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
- INT_HANDLER cbe_maintenance, 0x1600, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY cbe_maintenance, virt=0
EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
-INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception)
+EXC_COMMON_BEGIN(cbe_maintenance_common)
+ GEN_COMMON cbe_maintenance
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl cbe_maintenance_exception
+ b interrupt_return
+
+ GEN_KVM cbe_maintenance
+
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
#endif
+INT_DEFINE_BEGIN(altivec_assist)
+ IVEC=0x1700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(altivec_assist)
+
EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100)
- INT_HANDLER altivec_assist, 0x1700, kvm=1
+ GEN_INT_ENTRY altivec_assist, virt=0
EXC_REAL_END(altivec_assist, 0x1700, 0x100)
EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
- INT_HANDLER altivec_assist, 0x1700, virt=1
+ GEN_INT_ENTRY altivec_assist, virt=1
EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
-INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(altivec_assist_common)
+ GEN_COMMON altivec_assist
+ addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_ALTIVEC
-EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception)
+ bl altivec_assist_exception
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
#else
-EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception)
+ bl unknown_exception
#endif
+ b interrupt_return
+
+ GEN_KVM altivec_assist
#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_thermal)
+ IVEC=0x1800
+ IHSRR=1
+ IKVM_SKIP=1
+ IKVM_REAL=1
+INT_DEFINE_END(cbe_thermal)
+
EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
- INT_HANDLER cbe_thermal, 0x1800, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY cbe_thermal, virt=0
EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
-INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception)
+EXC_COMMON_BEGIN(cbe_thermal_common)
+ GEN_COMMON cbe_thermal
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl cbe_thermal_exception
+ b interrupt_return
+
+ GEN_KVM cbe_thermal
+
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
@@ -1930,14 +2708,11 @@ EXC_VIRT_NONE(0x5800, 0x100)
#ifdef CONFIG_PPC_WATCHDOG
-#define MASKED_DEC_HANDLER_LABEL 3f
-
-#define MASKED_DEC_HANDLER(_H) \
-3: /* soft-nmi */ \
- std r12,PACA_EXGEN+EX_R12(r13); \
- GET_SCRATCH0(r10); \
- std r10,PACA_EXGEN+EX_R13(r13); \
- INT_SAVE_SRR_AND_JUMP soft_nmi_common, _H, 1
+INT_DEFINE_BEGIN(soft_nmi)
+ IVEC=0x900
+ ISTACK=0
+ IRECONCILE=0 /* Soft-NMI may fire under local_irq_disable */
+INT_DEFINE_END(soft_nmi)
/*
* Branch to soft_nmi_interrupt using the emergency stack. The emergency
@@ -1949,18 +2724,42 @@ EXC_VIRT_NONE(0x5800, 0x100)
* and run it entirely with interrupts hard disabled.
*/
EXC_COMMON_BEGIN(soft_nmi_common)
+ mfspr r11,SPRN_SRR0
mr r10,r1
ld r1,PACAEMERGSP(r13)
subi r1,r1,INT_FRAME_SIZE
- INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0
- bl save_nvgprs
+ __GEN_COMMON_BODY soft_nmi
+
+ /*
+ * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see
+ * system_reset_common)
+ */
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+ lbz r10,PACAIRQHAPPENED(r13)
+ std r10,_DAR(r1)
+ ori r10,r10,PACA_IRQ_HARD_DIS
+ stb r10,PACAIRQHAPPENED(r13)
+
addi r3,r1,STACK_FRAME_OVERHEAD
bl soft_nmi_interrupt
- b ret_from_except
-#else /* CONFIG_PPC_WATCHDOG */
-#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
-#define MASKED_DEC_HANDLER(_H)
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r9,0
+ mtmsrd r9,1
+
+ /*
+ * Restore soft mask settings.
+ */
+ ld r10,_DAR(r1)
+ stb r10,PACAIRQHAPPENED(r13)
+ ld r10,SOFTE(r1)
+ stb r10,PACAIRQSOFTMASK(r13)
+
+ kuap_restore_amr r10
+ EXCEPTION_RESTORE_REGS hsrr=0
+ RFI_TO_KERNEL
+
#endif /* CONFIG_PPC_WATCHDOG */
/*
@@ -1973,13 +2772,12 @@ EXC_COMMON_BEGIN(soft_nmi_common)
* - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
* This is called with r10 containing the value to OR to the paca field.
*/
-.macro MASKED_INTERRUPT hsrr
+.macro MASKED_INTERRUPT hsrr=0
.if \hsrr
masked_Hinterrupt:
.else
masked_interrupt:
.endif
- std r11,PACA_EXGEN+EX_R11(r13)
lbz r11,PACAIRQHAPPENED(r13)
or r11,r11,r10
stb r11,PACAIRQHAPPENED(r13)
@@ -1988,26 +2786,30 @@ masked_interrupt:
lis r10,0x7fff
ori r10,r10,0xffff
mtspr SPRN_DEC,r10
- b MASKED_DEC_HANDLER_LABEL
+#ifdef CONFIG_PPC_WATCHDOG
+ b soft_nmi_common
+#else
+ b 2f
+#endif
1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK
beq 2f
+ xori r12,r12,MSR_EE /* clear MSR_EE */
.if \hsrr
- mfspr r10,SPRN_HSRR1
- xori r10,r10,MSR_EE /* clear MSR_EE */
- mtspr SPRN_HSRR1,r10
+ mtspr SPRN_HSRR1,r12
.else
- mfspr r10,SPRN_SRR1
- xori r10,r10,MSR_EE /* clear MSR_EE */
- mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR1,r12
.endif
ori r11,r11,PACA_IRQ_HARD_DIS
stb r11,PACAIRQHAPPENED(r13)
2: /* done */
+ ld r10,PACA_EXGEN+EX_CTR(r13)
+ mtctr r10
mtcrf 0x80,r9
std r1,PACAR1(r13)
ld r9,PACA_EXGEN+EX_R9(r13)
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
/* returns to kernel where r13 must be set up, so don't restore it */
.if \hsrr
HRFI_TO_KERNEL
@@ -2015,7 +2817,6 @@ masked_interrupt:
RFI_TO_KERNEL
.endif
b .
- MASKED_DEC_HANDLER(\hsrr\())
.endm
TRAMP_REAL_BEGIN(stf_barrier_fallback)
@@ -2117,17 +2918,12 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
GET_SCRATCH0(r13);
hrfid
-/*
- * Real mode exceptions actually use this too, but alternate
- * instruction code patches (which end up in the common .text area)
- * cannot reach these if they are put there.
- */
-USE_FIXED_SECTION(virt_trampolines)
- MASKED_INTERRUPT EXC_STD
- MASKED_INTERRUPT EXC_HV
+USE_TEXT_SECTION()
+ MASKED_INTERRUPT
+ MASKED_INTERRUPT hsrr=1
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
+kvmppc_skip_interrupt:
/*
* Here all GPRs are unchanged from when the interrupt happened
* except for r13, which is saved in SPRG_SCRATCH0.
@@ -2139,7 +2935,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
RFI_TO_KERNEL
b .
-TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
+kvmppc_skip_Hinterrupt:
/*
* Here all GPRs are unchanged from when the interrupt happened
* except for r13, which is saved in SPRG_SCRATCH0.
@@ -2152,16 +2948,6 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
b .
#endif
-/*
- * Ensure that any handlers that get invoked from the exception prologs
- * above are below the first 64KB (0x10000) of the kernel image because
- * the prologs assemble the addresses of these handlers using the
- * LOAD_HANDLER macro, which uses an ori instruction.
- */
-
-/*** Common interrupt handlers ***/
-
-
/*
* Relocation-on interrupts: A subset of the interrupts can be delivered
* with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
@@ -2275,7 +3061,7 @@ do_hash_page:
cmpdi r3,0 /* see if __hash_page succeeded */
/* Success */
- beq fast_exc_return_irq /* Return from exception on success */
+ beq interrupt_return /* Return from exception on success */
/* Error */
blt- 13f
@@ -2292,39 +3078,36 @@ handle_page_fault:
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
cmpdi r3,0
- beq+ ret_from_except_lite
- bl save_nvgprs
+ beq+ interrupt_return
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
ld r4,_DAR(r1)
bl bad_page_fault
- b ret_from_except
+ b interrupt_return
/* We have a data breakpoint exception - handle it */
handle_dabr_fault:
- bl save_nvgprs
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_break
/*
* do_break() may have changed the NV GPRS while handling a breakpoint.
- * If so, we need to restore them with their updated values. Don't use
- * ret_from_except_lite here.
+ * If so, we need to restore them with their updated values.
*/
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
#ifdef CONFIG_PPC_BOOK3S_64
/* We have a page fault that hash_page could handle but HV refused
* the PTE insertion
*/
-13: bl save_nvgprs
- mr r5,r3
+13: mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
ld r4,_DAR(r1)
bl low_hash_fault
- b ret_from_except
+ b interrupt_return
#endif
/*
@@ -2334,11 +3117,10 @@ handle_dabr_fault:
* were soft-disabled. We want to invoke the exception handler for
* the access, or panic if there isn't a handler.
*/
-77: bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+77: addi r3,r1,STACK_FRAME_OVERHEAD
li r5,SIGSEGV
bl bad_page_fault
- b ret_from_except
+ b interrupt_return
/*
* When doorbell is triggered from system reset wakeup, the message is
@@ -2352,56 +3134,9 @@ handle_dabr_fault:
h_doorbell_common_msgclr:
LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
PPC_MSGCLR(3)
- b h_doorbell_common
+ b h_doorbell_common_virt
doorbell_super_common_msgclr:
LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
PPC_MSGCLRP(3)
- b doorbell_super_common
-
-/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
- * which kind of interrupt. MSR:EE is already off. We generate a
- * stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
- *
- * Note that we don't specify LR as the NIP (return address) for
- * the interrupt because that would unbalance the return branch
- * predictor.
- */
-_GLOBAL(__replay_interrupt)
- /* We are going to jump to the exception common code which
- * will retrieve various register values from the PACA which
- * we don't give a damn about, so we don't bother storing them.
- */
- mfmsr r12
- LOAD_REG_ADDR(r11, replay_interrupt_return)
- mfcr r9
- ori r12,r12,MSR_EE
- cmpwi r3,0x900
- beq decrementer_common
- cmpwi r3,0x500
-BEGIN_FTR_SECTION
- beq h_virt_irq_common
-FTR_SECTION_ELSE
- beq hardware_interrupt_common
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
- cmpwi r3,0xf00
- beq performance_monitor_common
-BEGIN_FTR_SECTION
- cmpwi r3,0xa00
- beq h_doorbell_common_msgclr
- cmpwi r3,0xe60
- beq hmi_exception_common
-FTR_SECTION_ELSE
- cmpwi r3,0xa00
- beq doorbell_super_common_msgclr
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-replay_interrupt_return:
- blr
-
-_ASM_NOKPROBE_SYMBOL(__replay_interrupt)
+ b doorbell_super_common_virt
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index ff0114aeba9b..59e60a9a9f5c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -36,6 +36,8 @@ static struct fw_dump fw_dump;
static void __init fadump_reserve_crash_area(u64 base);
+struct kobject *fadump_kobj;
+
#ifndef CONFIG_PRESERVE_FA_DUMP
static DEFINE_MUTEX(fadump_mutex);
struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
@@ -1323,9 +1325,9 @@ static void fadump_invalidate_release_mem(void)
fw_dump.ops->fadump_init_mem_struct(&fw_dump);
}
-static ssize_t fadump_release_memory_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
+static ssize_t release_mem_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
{
int input = -1;
@@ -1350,23 +1352,40 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj,
return count;
}
-static ssize_t fadump_enabled_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf)
+/* Release the reserved memory and disable the FADump */
+static void unregister_fadump(void)
+{
+ fadump_cleanup();
+ fadump_release_memory(fw_dump.reserve_dump_area_start,
+ fw_dump.reserve_dump_area_size);
+ fw_dump.fadump_enabled = 0;
+ kobject_put(fadump_kobj);
+}
+
+static ssize_t enabled_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
}
-static ssize_t fadump_register_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf)
+static ssize_t mem_reserved_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size);
+}
+
+static ssize_t registered_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", fw_dump.dump_registered);
}
-static ssize_t fadump_register_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
+static ssize_t registered_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
{
int ret = 0;
int input = -1;
@@ -1418,45 +1437,82 @@ static int fadump_region_show(struct seq_file *m, void *private)
return 0;
}
-static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
- 0200, NULL,
- fadump_release_memory_store);
-static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
- 0444, fadump_enabled_show,
- NULL);
-static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
- 0644, fadump_register_show,
- fadump_register_store);
+static struct kobj_attribute release_attr = __ATTR_WO(release_mem);
+static struct kobj_attribute enable_attr = __ATTR_RO(enabled);
+static struct kobj_attribute register_attr = __ATTR_RW(registered);
+static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved);
+
+static struct attribute *fadump_attrs[] = {
+ &enable_attr.attr,
+ &register_attr.attr,
+ &mem_reserved_attr.attr,
+ NULL,
+};
+
+ATTRIBUTE_GROUPS(fadump);
DEFINE_SHOW_ATTRIBUTE(fadump_region);
static void fadump_init_files(void)
{
- struct dentry *debugfs_file;
int rc = 0;
- rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
- if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_enabled (%d)\n", rc);
+ fadump_kobj = kobject_create_and_add("fadump", kernel_kobj);
+ if (!fadump_kobj) {
+ pr_err("failed to create fadump kobject\n");
+ return;
+ }
- rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
- if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_registered (%d)\n", rc);
+ debugfs_create_file("fadump_region", 0444, powerpc_debugfs_root, NULL,
+ &fadump_region_fops);
- debugfs_file = debugfs_create_file("fadump_region", 0444,
- powerpc_debugfs_root, NULL,
- &fadump_region_fops);
- if (!debugfs_file)
- printk(KERN_ERR "fadump: unable to create debugfs file"
- " fadump_region\n");
+ if (fw_dump.dump_active) {
+ rc = sysfs_create_file(fadump_kobj, &release_attr.attr);
+ if (rc)
+ pr_err("unable to create release_mem sysfs file (%d)\n",
+ rc);
+ }
+
+ rc = sysfs_create_groups(fadump_kobj, fadump_groups);
+ if (rc) {
+ pr_err("sysfs group creation failed (%d), unregistering FADump",
+ rc);
+ unregister_fadump();
+ return;
+ }
+
+ /*
+ * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to
+ * create symlink at old location to maintain backward compatibility.
+ *
+ * - fadump_enabled -> fadump/enabled
+ * - fadump_registered -> fadump/registered
+ * - fadump_release_mem -> fadump/release_mem
+ */
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
+ "enabled", "fadump_enabled");
+ if (rc) {
+ pr_err("unable to create fadump_enabled symlink (%d)", rc);
+ return;
+ }
+
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
+ "registered",
+ "fadump_registered");
+ if (rc) {
+ pr_err("unable to create fadump_registered symlink (%d)", rc);
+ sysfs_remove_link(kernel_kobj, "fadump_enabled");
+ return;
+ }
if (fw_dump.dump_active) {
- rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj,
+ fadump_kobj,
+ "release_mem",
+ "fadump_release_mem");
if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_release_mem (%d)\n", rc);
+ pr_err("unable to create fadump_release_mem symlink (%d)",
+ rc);
}
return;
}
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 97c887950c3c..daaa153950c2 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -348,7 +348,7 @@ BEGIN_MMU_FTR_SECTION
andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
#endif
bne handle_page_fault_tramp_2 /* if not, try to put a PTE */
- rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */
+ rlwinm r3, r5, 32 - 24, 30, 30 /* DSISR_STORE -> _PAGE_RW */
bl hash_page
b handle_page_fault_tramp_1
FTR_SECTION_ELSE
@@ -497,7 +497,6 @@ InstructionTLBMiss:
andc. r1,r1,r0 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
ori r1, r1, 0xe06 /* clear out reserved bits */
andc r1, r0, r1 /* PP = user? 1 : 0 */
BEGIN_FTR_SECTION
@@ -565,9 +564,8 @@ DataLoadTLBMiss:
* we would need to update the pte atomically with lwarx/stwcx.
*/
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
+ rlwinm r1,r0,0,30,30 /* _PAGE_RW -> PP msb */
+ rlwimi r0,r0,1,30,30 /* _PAGE_USER -> PP msb */
ori r1,r1,0xe04 /* clear out reserved bits */
andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */
BEGIN_FTR_SECTION
@@ -645,7 +643,6 @@ DataStoreTLBMiss:
* we would need to update the pte atomically with lwarx/stwcx.
*/
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
li r1,0xe06 /* clear out reserved bits & PP msb */
andc r1,r0,r1 /* PP = user? 1: 0 */
BEGIN_FTR_SECTION
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 9db162f79fe6..9abec6cd099c 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -130,37 +130,36 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
.macro SYSCALL_ENTRY trapno
mfspr r12,SPRN_SPRG_THREAD
+ mfspr r9, SPRN_SRR1
#ifdef CONFIG_VMAP_STACK
- mfspr r9, SPRN_SRR0
- mfspr r11, SPRN_SRR1
- stw r9, SRR0(r12)
- stw r11, SRR1(r12)
+ mfspr r11, SPRN_SRR0
+ mtctr r11
#endif
- mfcr r10
+ andi. r11, r9, MSR_PR
lwz r11,TASK_STACK-THREAD(r12)
- rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
+ beq- 99f
addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
#ifdef CONFIG_VMAP_STACK
- li r9, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
- mtmsr r9
+ li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+ mtmsr r10
isync
#endif
tovirt_vmstack r12, r12
tophys_novmstack r11, r11
- mflr r9
- stw r10,_CCR(r11) /* save registers */
- stw r9, _LINK(r11)
+ mflr r10
+ stw r10, _LINK(r11)
#ifdef CONFIG_VMAP_STACK
- lwz r10, SRR0(r12)
- lwz r9, SRR1(r12)
+ mfctr r10
#else
mfspr r10,SPRN_SRR0
- mfspr r9,SPRN_SRR1
#endif
stw r1,GPR1(r11)
stw r1,0(r11)
tovirt_novmstack r1, r11 /* set new kernel sp */
stw r10,_NIP(r11)
+ mfcr r10
+ rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
+ stw r10,_CCR(r11) /* save registers */
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
@@ -228,6 +227,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
mtspr SPRN_SRR0,r11
SYNC
RFI /* jump to handler, enable MMU */
+99: b ret_from_kernel_syscall
.endm
.macro save_dar_dsisr_on_stack reg1, reg2, sp
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index ad79fddb974d..ddfbd02140d9 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -537,6 +537,7 @@ __start_initialization_multiplatform:
b __after_prom_start
#endif /* CONFIG_PPC_BOOK3E */
+__REF
__boot_from_prom:
#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
/* Save parameters */
@@ -574,6 +575,7 @@ __boot_from_prom:
/* We never return. We also hit that trap if trying to boot
* from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
trap
+ .previous
__after_prom_start:
#ifdef CONFIG_RELOCATABLE
@@ -977,7 +979,6 @@ start_here_multiplatform:
RFI
b . /* prevent speculative execution */
- .previous
/* This is where all platforms converge execution */
start_here_common:
@@ -1001,6 +1002,7 @@ start_here_common:
/* Not reached */
trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
+ .previous
/*
* We put a few things here that have to be page-aligned.
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 37fc84ed90e3..bd2e5ed8dd50 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -104,16 +104,18 @@ FTR_SECTION_ELSE
#ifdef CONFIG_KVM_BOOKE_HV
ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
#endif
+ mfspr r9, SPRN_SRR1
BOOKE_CLEAR_BTB(r11)
+ andi. r11, r9, MSR_PR
lwz r11, TASK_STACK - THREAD(r10)
rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
+ beq- 99f
ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
stw r12, _CCR(r11) /* save various registers */
mflr r12
stw r12,_LINK(r11)
mfspr r12,SPRN_SRR0
stw r1, GPR1(r11)
- mfspr r9,SPRN_SRR1
stw r1, 0(r11)
mr r1, r11
stw r12,_NIP(r11)
@@ -176,6 +178,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
mtspr SPRN_SRR0,r11
SYNC
RFI /* jump to handler, enable MMU */
+99: b ret_from_kernel_syscall
.endm
/* To handle the additional exception priority levels on 40x and Book-E
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index d0854320bb50..72f461bd70fb 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -429,3 +429,19 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
{
/* TODO */
}
+
+void ptrace_triggered(struct perf_event *bp,
+ struct perf_sample_data *data, struct pt_regs *regs)
+{
+ struct perf_event_attr attr;
+
+ /*
+ * Disable the breakpoint request here since ptrace has defined a
+ * one-shot behaviour for breakpoint exceptions in PPC64.
+ * The SIGTRAP signal is generated automatically for us in do_dabr().
+ * We don't have to do anything about that here
+ */
+ attr = bp->attr;
+ attr.disabled = true;
+ modify_user_hw_breakpoint(bp, &attr);
+}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5c9b11878555..a25ed47087ee 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -70,6 +70,7 @@
#include <asm/paca.h>
#include <asm/firmware.h>
#include <asm/lv1call.h>
+#include <asm/dbell.h>
#endif
#define CREATE_TRACE_POINTS
#include <asm/trace.h>
@@ -109,6 +110,8 @@ static inline notrace int decrementer_check_overflow(void)
return now >= *next_tb;
}
+#ifdef CONFIG_PPC_BOOK3E
+
/* This is called whenever we are re-enabling interrupts
* and returns either 0 (nothing to do) or 500/900/280/a00/e80 if
* there's an EE, DEC or DBELL to generate.
@@ -168,6 +171,67 @@ notrace unsigned int __check_irq_replay(void)
}
}
+ if (happened & PACA_IRQ_DEC) {
+ local_paca->irq_happened &= ~PACA_IRQ_DEC;
+ return 0x900;
+ }
+
+ if (happened & PACA_IRQ_EE) {
+ local_paca->irq_happened &= ~PACA_IRQ_EE;
+ return 0x500;
+ }
+
+ /*
+ * Check if an EPR external interrupt happened this bit is typically
+ * set if we need to handle another "edge" interrupt from within the
+ * MPIC "EPR" handler.
+ */
+ if (happened & PACA_IRQ_EE_EDGE) {
+ local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
+ return 0x500;
+ }
+
+ if (happened & PACA_IRQ_DBELL) {
+ local_paca->irq_happened &= ~PACA_IRQ_DBELL;
+ return 0x280;
+ }
+
+ /* There should be nothing left ! */
+ BUG_ON(local_paca->irq_happened != 0);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_BOOK3E */
+
+void replay_soft_interrupts(void)
+{
+ /*
+ * We use local_paca rather than get_paca() to avoid all
+ * the debug_smp_processor_id() business in this low level
+ * function
+ */
+ unsigned char happened = local_paca->irq_happened;
+ struct pt_regs regs;
+
+ ppc_save_regs(&regs);
+ regs.softe = IRQS_ALL_DISABLED;
+
+again:
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ if (happened & PACA_IRQ_HARD_DIS) {
+ /*
+ * We may have missed a decrementer interrupt if hard disabled.
+ * Check the decrementer register in case we had a rollover
+ * while hard disabled.
+ */
+ if (!(happened & PACA_IRQ_DEC)) {
+ if (decrementer_check_overflow())
+ happened |= PACA_IRQ_DEC;
+ }
+ }
+
/*
* Force the delivery of pending soft-disabled interrupts on PS3.
* Any HV call will have this side effect.
@@ -182,58 +246,78 @@ notrace unsigned int __check_irq_replay(void)
* This is a higher priority interrupt than the others, so
* replay it first.
*/
- if (happened & PACA_IRQ_HMI) {
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_HMI)) {
local_paca->irq_happened &= ~PACA_IRQ_HMI;
- return 0xe60;
+ regs.trap = 0xe60;
+ handle_hmi_exception(&regs);
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+ hard_irq_disable();
}
if (happened & PACA_IRQ_DEC) {
local_paca->irq_happened &= ~PACA_IRQ_DEC;
- return 0x900;
- }
-
- if (happened & PACA_IRQ_PMI) {
- local_paca->irq_happened &= ~PACA_IRQ_PMI;
- return 0xf00;
+ regs.trap = 0x900;
+ timer_interrupt(&regs);
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+ hard_irq_disable();
}
if (happened & PACA_IRQ_EE) {
local_paca->irq_happened &= ~PACA_IRQ_EE;
- return 0x500;
+ regs.trap = 0x500;
+ do_IRQ(&regs);
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+ hard_irq_disable();
}
-#ifdef CONFIG_PPC_BOOK3E
/*
* Check if an EPR external interrupt happened this bit is typically
* set if we need to handle another "edge" interrupt from within the
* MPIC "EPR" handler.
*/
- if (happened & PACA_IRQ_EE_EDGE) {
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E) && (happened & PACA_IRQ_EE_EDGE)) {
local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
- return 0x500;
+ regs.trap = 0x500;
+ do_IRQ(&regs);
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+ hard_irq_disable();
}
- if (happened & PACA_IRQ_DBELL) {
- local_paca->irq_happened &= ~PACA_IRQ_DBELL;
- return 0x280;
- }
-#else
- if (happened & PACA_IRQ_DBELL) {
+ if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (happened & PACA_IRQ_DBELL)) {
local_paca->irq_happened &= ~PACA_IRQ_DBELL;
- return 0xa00;
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E))
+ regs.trap = 0x280;
+ else
+ regs.trap = 0xa00;
+ doorbell_exception(&regs);
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+ hard_irq_disable();
}
-#endif /* CONFIG_PPC_BOOK3E */
- /* There should be nothing left ! */
- BUG_ON(local_paca->irq_happened != 0);
+ /* Book3E does not support soft-masking PMI interrupts */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_PMI)) {
+ local_paca->irq_happened &= ~PACA_IRQ_PMI;
+ regs.trap = 0xf00;
+ performance_monitor_exception(&regs);
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+ hard_irq_disable();
+ }
- return 0;
+ happened = local_paca->irq_happened;
+ if (happened & ~PACA_IRQ_HARD_DIS) {
+ /*
+ * We are responding to the next interrupt, so interrupt-off
+ * latencies should be reset here.
+ */
+ trace_hardirqs_on();
+ trace_hardirqs_off();
+ goto again;
+ }
}
notrace void arch_local_irq_restore(unsigned long mask)
{
unsigned char irq_happened;
- unsigned int replay;
/* Write the new soft-enabled value */
irq_soft_mask_set(mask);
@@ -255,24 +339,16 @@ notrace void arch_local_irq_restore(unsigned long mask)
*/
irq_happened = get_irq_happened();
if (!irq_happened) {
-#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- WARN_ON_ONCE(!(mfmsr() & MSR_EE));
-#endif
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(!(mfmsr() & MSR_EE));
return;
}
- /*
- * We need to hard disable to get a trusted value from
- * __check_irq_replay(). We also need to soft-disable
- * again to avoid warnings in there due to the use of
- * per-cpu variables.
- */
+ /* We need to hard disable to replay. */
if (!(irq_happened & PACA_IRQ_HARD_DIS)) {
-#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- WARN_ON_ONCE(!(mfmsr() & MSR_EE));
-#endif
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(!(mfmsr() & MSR_EE));
__hard_irq_disable();
-#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
} else {
/*
* We should already be hard disabled here. We had bugs
@@ -280,35 +356,26 @@ notrace void arch_local_irq_restore(unsigned long mask)
* warn if we are wrong. Only do that when IRQ tracing
* is enabled as mfmsr() can be costly.
*/
- if (WARN_ON_ONCE(mfmsr() & MSR_EE))
- __hard_irq_disable();
-#endif
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ if (WARN_ON_ONCE(mfmsr() & MSR_EE))
+ __hard_irq_disable();
+ }
+
+ if (irq_happened == PACA_IRQ_HARD_DIS) {
+ local_paca->irq_happened = 0;
+ __hard_irq_enable();
+ return;
+ }
}
irq_soft_mask_set(IRQS_ALL_DISABLED);
trace_hardirqs_off();
- /*
- * Check if anything needs to be re-emitted. We haven't
- * soft-enabled yet to avoid warnings in decrementer_check_overflow
- * accessing per-cpu variables
- */
- replay = __check_irq_replay();
+ replay_soft_interrupts();
+ local_paca->irq_happened = 0;
- /* We can soft-enable now */
trace_hardirqs_on();
irq_soft_mask_set(IRQS_ENABLED);
-
- /*
- * And replay if we have to. This will return with interrupts
- * hard-enabled.
- */
- if (replay) {
- __replay_interrupt(replay);
- return;
- }
-
- /* Finally, let's ensure we are hard enabled */
__hard_irq_enable();
}
EXPORT_SYMBOL(arch_local_irq_restore);
@@ -599,17 +666,18 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
static inline void check_stack_overflow(void)
{
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
long sp;
- sp = current_stack_pointer() & (THREAD_SIZE-1);
+ if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW))
+ return;
+
+ sp = current_stack_pointer & (THREAD_SIZE - 1);
/* check for stack overflow: is there less than 2KB free? */
if (unlikely(sp < 2048)) {
pr_err("do_IRQ: stack overflow: %ld\n", sp);
dump_stack();
}
-#endif
}
void __do_irq(struct pt_regs *regs)
@@ -647,7 +715,7 @@ void do_IRQ(struct pt_regs *regs)
void *cursp, *irqsp, *sirqsp;
/* Switch to the irq stack to handle this */
- cursp = (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+ cursp = (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
irqsp = hardirq_ctx[raw_smp_processor_id()];
sirqsp = softirq_ctx[raw_smp_processor_id()];
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 2d27ec4feee4..81efb605113e 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -264,6 +264,9 @@ int kprobe_handler(struct pt_regs *regs)
if (user_mode(regs))
return 0;
+ if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR))
+ return 0;
+
/*
* We don't want to be preempted for the entire
* duration of kprobe processing
@@ -271,54 +274,6 @@ int kprobe_handler(struct pt_regs *regs)
preempt_disable();
kcb = get_kprobe_ctlblk();
- /* Check we're not actually recursing */
- if (kprobe_running()) {
- p = get_kprobe(addr);
- if (p) {
- kprobe_opcode_t insn = *p->ainsn.insn;
- if (kcb->kprobe_status == KPROBE_HIT_SS &&
- is_trap(insn)) {
- /* Turn off 'trace' bits */
- regs->msr &= ~MSR_SINGLESTEP;
- regs->msr |= kcb->kprobe_saved_msr;
- goto no_kprobe;
- }
- /* We have reentered the kprobe_handler(), since
- * another probe was hit while within the handler.
- * We here save the original kprobes variables and
- * just single step on the instruction of the new probe
- * without calling any user handlers.
- */
- save_previous_kprobe(kcb);
- set_current_kprobe(p, regs, kcb);
- kprobes_inc_nmissed_count(p);
- kcb->kprobe_status = KPROBE_REENTER;
- if (p->ainsn.boostable >= 0) {
- ret = try_to_emulate(p, regs);
-
- if (ret > 0) {
- restore_previous_kprobe(kcb);
- preempt_enable_no_resched();
- return 1;
- }
- }
- prepare_singlestep(p, regs);
- return 1;
- } else if (*addr != BREAKPOINT_INSTRUCTION) {
- /* If trap variant, then it belongs not to us */
- kprobe_opcode_t cur_insn = *addr;
-
- if (is_trap(cur_insn))
- goto no_kprobe;
- /* The breakpoint instruction was removed by
- * another cpu right after we hit, no further
- * handling of this interrupt is appropriate
- */
- ret = 1;
- }
- goto no_kprobe;
- }
-
p = get_kprobe(addr);
if (!p) {
if (*addr != BREAKPOINT_INSTRUCTION) {
@@ -343,6 +298,39 @@ int kprobe_handler(struct pt_regs *regs)
goto no_kprobe;
}
+ /* Check we're not actually recursing */
+ if (kprobe_running()) {
+ kprobe_opcode_t insn = *p->ainsn.insn;
+ if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) {
+ /* Turn off 'trace' bits */
+ regs->msr &= ~MSR_SINGLESTEP;
+ regs->msr |= kcb->kprobe_saved_msr;
+ goto no_kprobe;
+ }
+
+ /*
+ * We have reentered the kprobe_handler(), since another probe
+ * was hit while within the handler. We here save the original
+ * kprobes variables and just single step on the instruction of
+ * the new probe without calling any user handlers.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kprobes_inc_nmissed_count(p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ if (p->ainsn.boostable >= 0) {
+ ret = try_to_emulate(p, regs);
+
+ if (ret > 0) {
+ restore_previous_kprobe(kcb);
+ preempt_enable_no_resched();
+ return 1;
+ }
+ }
+ prepare_singlestep(p, regs);
+ return 1;
+ }
+
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
set_current_kprobe(p, regs, kcb);
if (p->pre_handler && p->pre_handler(p, regs)) {
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 34c1001e9e8b..8077b5fb18a7 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -15,6 +15,7 @@
#include <linux/percpu.h>
#include <linux/export.h>
#include <linux/irq_work.h>
+#include <linux/extable.h>
#include <asm/machdep.h>
#include <asm/mce.h>
@@ -251,6 +252,19 @@ void machine_check_queue_event(void)
/* Queue irq work to process this event later. */
irq_work_queue(&mce_event_process_work);
}
+
+void mce_common_process_ue(struct pt_regs *regs,
+ struct mce_error_info *mce_err)
+{
+ const struct exception_table_entry *entry;
+
+ entry = search_kernel_exception_table(regs->nip);
+ if (entry) {
+ mce_err->ignore_event = true;
+ regs->nip = extable_fixup(entry);
+ }
+}
+
/*
* process pending MCE event from the mce event queue. This function will be
* called during syscall exit.
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 1cbf7f1a4e3d..067b094bfeff 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -579,14 +579,10 @@ static long mce_handle_ue_error(struct pt_regs *regs,
struct mce_error_info *mce_err)
{
long handled = 0;
- const struct exception_table_entry *entry;
- entry = search_kernel_exception_table(regs->nip);
- if (entry) {
- mce_err->ignore_event = true;
- regs->nip = extable_fixup(entry);
+ mce_common_process_ue(regs, mce_err);
+ if (mce_err->ignore_event)
return 1;
- }
/*
* On specific SCOM read via MMIO we may get a machine check
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 974f65f79a8e..65f9f731c229 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -110,7 +110,7 @@ _GLOBAL(longjmp)
li r3, 1
blr
-_GLOBAL(current_stack_pointer)
+_GLOBAL(current_stack_frame)
PPC_LL r3,0(r1)
blr
-EXPORT_SYMBOL(current_stack_pointer)
+EXPORT_SYMBOL(current_stack_frame)
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 427fc22f72b6..71a3f97dc988 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -62,13 +62,9 @@ static int of_pci_phb_probe(struct platform_device *dev)
/* Init pci_dn data structures */
pci_devs_phb_init_dynamic(phb);
- /* Create EEH devices for the PHB */
+ /* Create EEH PEs for the PHB */
eeh_dev_phb_init_dynamic(phb);
- /* Register devices with EEH */
- if (dev->dev.of_node->child)
- eeh_add_device_tree_early(PCI_DN(dev->dev.of_node));
-
/* Scan the bus */
pcibios_scan_phb(phb);
if (phb->bus == NULL)
@@ -80,15 +76,9 @@ static int of_pci_phb_probe(struct platform_device *dev)
*/
pcibios_claim_one_bus(phb->bus);
- /* Finish EEH setup */
- eeh_add_device_tree_late(phb->bus);
-
/* Add probed PCI devices to the device model */
pci_bus_add_devices(phb->bus);
- /* sysfs files should only be added after devices are added */
- eeh_add_sysfs_files(phb->bus);
-
return 0;
}
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 949eceb254d8..3f91ccaa9c74 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -176,7 +176,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
struct paca_struct **paca_ptrs __read_mostly;
EXPORT_SYMBOL(paca_ptrs);
-void __init initialise_paca(struct paca_struct *new_paca, int cpu)
+void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int cpu)
{
#ifdef CONFIG_PPC_PSERIES
new_paca->lppaca_ptr = NULL;
@@ -205,7 +205,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
}
/* Put the paca pointer into r13 and SPRG_PACA */
-void setup_paca(struct paca_struct *new_paca)
+void __nostackprotector setup_paca(struct paca_struct *new_paca)
{
/* Setup r13 */
local_paca = new_paca;
@@ -214,11 +214,15 @@ void setup_paca(struct paca_struct *new_paca)
/* On Book3E, initialize the TLB miss exception frames */
mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
#else
- /* In HV mode, we setup both HPACA and PACA to avoid problems
+ /*
+ * In HV mode, we setup both HPACA and PACA to avoid problems
* if we do a GET_PACA() before the feature fixups have been
- * applied
+ * applied.
+ *
+ * Normally you should test against CPU_FTR_HVMODE, but CPU features
+ * are not yet set up when we first reach here.
*/
- if (early_cpu_has_feature(CPU_FTR_HVMODE))
+ if (mfmsr() & MSR_HV)
mtspr(SPRN_SPRG_HPACA, local_paca);
#endif
mtspr(SPRN_SPRG_PACA, local_paca);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index d0074ad73aa3..be108616a721 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1399,14 +1399,8 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
pci_assign_unassigned_bus_resources(bus);
}
- /* Fixup EEH */
- eeh_add_device_tree_late(bus);
-
/* Add new devices to global lists. Register in proc, sysfs. */
pci_bus_add_devices(bus);
-
- /* sysfs files should only be added after devices are added */
- eeh_add_sysfs_files(bus);
}
EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index d6a67f814983..bf83f76563a3 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -112,8 +112,6 @@ void pci_hp_add_devices(struct pci_bus *bus)
struct pci_controller *phb;
struct device_node *dn = pci_bus_to_OF_node(bus);
- eeh_add_device_tree_early(PCI_DN(dn));
-
phb = pci_bus_to_host(bus);
mode = PCI_PROBE_NORMAL;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index fad50db9dcf2..9c21288f8645 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -236,23 +236,9 @@ void enable_kernel_fp(void)
}
}
EXPORT_SYMBOL(enable_kernel_fp);
-
-static int restore_fp(struct task_struct *tsk)
-{
- if (tsk->thread.load_fp) {
- load_fp_state(&current->thread.fp_state);
- current->thread.load_fp++;
- return 1;
- }
- return 0;
-}
-#else
-static int restore_fp(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_PPC_FPU */
#ifdef CONFIG_ALTIVEC
-#define loadvec(thr) ((thr).load_vec)
-
static void __giveup_altivec(struct task_struct *tsk)
{
unsigned long msr;
@@ -318,21 +304,6 @@ void flush_altivec_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
-
-static int restore_altivec(struct task_struct *tsk)
-{
- if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
- load_vr_state(&tsk->thread.vr_state);
- tsk->thread.used_vr = 1;
- tsk->thread.load_vec++;
-
- return 1;
- }
- return 0;
-}
-#else
-#define loadvec(thr) 0
-static inline int restore_altivec(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
@@ -400,18 +371,6 @@ void flush_vsx_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
-
-static int restore_vsx(struct task_struct *tsk)
-{
- if (cpu_has_feature(CPU_FTR_VSX)) {
- tsk->thread.used_vsr = 1;
- return 1;
- }
-
- return 0;
-}
-#else
-static inline int restore_vsx(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
@@ -511,6 +470,53 @@ void giveup_all(struct task_struct *tsk)
}
EXPORT_SYMBOL(giveup_all);
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_FPU
+static int restore_fp(struct task_struct *tsk)
+{
+ if (tsk->thread.load_fp) {
+ load_fp_state(&current->thread.fp_state);
+ current->thread.load_fp++;
+ return 1;
+ }
+ return 0;
+}
+#else
+static int restore_fp(struct task_struct *tsk) { return 0; }
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+#define loadvec(thr) ((thr).load_vec)
+static int restore_altivec(struct task_struct *tsk)
+{
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
+ load_vr_state(&tsk->thread.vr_state);
+ tsk->thread.used_vr = 1;
+ tsk->thread.load_vec++;
+
+ return 1;
+ }
+ return 0;
+}
+#else
+#define loadvec(thr) 0
+static inline int restore_altivec(struct task_struct *tsk) { return 0; }
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static int restore_vsx(struct task_struct *tsk)
+{
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ tsk->thread.used_vsr = 1;
+ return 1;
+ }
+
+ return 0;
+}
+#else
+static inline int restore_vsx(struct task_struct *tsk) { return 0; }
+#endif /* CONFIG_VSX */
+
/*
* The exception exit path calls restore_math() with interrupts hard disabled
* but the soft irq state not "reconciled". ftrace code that calls
@@ -551,6 +557,7 @@ void notrace restore_math(struct pt_regs *regs)
regs->msr = msr;
}
+#endif
static void save_all(struct task_struct *tsk)
{
@@ -1634,11 +1641,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
p->thread.regs = childregs;
childregs->gpr[3] = 0; /* Result from fork() */
if (clone_flags & CLONE_SETTLS) {
-#ifdef CONFIG_PPC64
if (!is_32bit_task())
childregs->gpr[13] = tls;
else
-#endif
childregs->gpr[2] = tls;
}
@@ -1976,6 +1981,32 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
return 0;
}
+static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes)
+{
+#ifdef CONFIG_PPC64
+ unsigned long stack_page;
+ unsigned long cpu = task_cpu(p);
+
+ stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+# ifdef CONFIG_PPC_BOOK3S_64
+ stack_page = (unsigned long)paca_ptrs[cpu]->nmi_emergency_sp - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+ stack_page = (unsigned long)paca_ptrs[cpu]->mc_emergency_sp - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+# endif
+#endif
+
+ return 0;
+}
+
+
int validate_sp(unsigned long sp, struct task_struct *p,
unsigned long nbytes)
{
@@ -1987,7 +2018,10 @@ int validate_sp(unsigned long sp, struct task_struct *p,
if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
return 1;
- return valid_irq_stack(sp, p, nbytes);
+ if (valid_irq_stack(sp, p, nbytes))
+ return 1;
+
+ return valid_emergency_stack(sp, p, nbytes);
}
EXPORT_SYMBOL(validate_sp);
@@ -2053,7 +2087,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
sp = (unsigned long) stack;
if (sp == 0) {
if (tsk == current)
- sp = current_stack_pointer();
+ sp = current_stack_frame();
else
sp = tsk->thread.ksp;
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 577345382b23..806be751c336 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1773,6 +1773,9 @@ static void __init prom_rtas_os_term(char *str)
if (token == 0)
prom_panic("Could not get token for ibm,os-term\n");
os_term_args.token = cpu_to_be32(token);
+ os_term_args.nargs = cpu_to_be32(1);
+ os_term_args.nret = cpu_to_be32(1);
+ os_term_args.args[0] = cpu_to_be32(__pa(str));
prom_rtas_hcall((uint64_t)&os_term_args);
}
#endif /* CONFIG_PPC_SVM */
@@ -3474,7 +3477,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*/
hdr = dt_header_start;
- /* Don't print anything after quiesce under OPAL, it crashes OFW */
prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
prom_debug("->dt_header_start=0x%lx\n", hdr);
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
deleted file mode 100644
index 25c0424e8868..000000000000
--- a/arch/powerpc/kernel/ptrace.c
+++ /dev/null
@@ -1,3468 +0,0 @@
-/*
- * PowerPC version
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Derived from "arch/m68k/kernel/ptrace.c"
- * Copyright (C) 1994 by Hamish Macdonald
- * Taken from linux/kernel/ptrace.c and modified for M680x0.
- * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
- *
- * Modified by Cort Dougan (cort@hq.fsmlabs.com)
- * and Paul Mackerras (paulus@samba.org).
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file README.legal in the main directory of
- * this archive for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/regset.h>
-#include <linux/tracehook.h>
-#include <linux/elf.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/signal.h>
-#include <linux/seccomp.h>
-#include <linux/audit.h>
-#include <trace/syscall.h>
-#include <linux/hw_breakpoint.h>
-#include <linux/perf_event.h>
-#include <linux/context_tracking.h>
-#include <linux/nospec.h>
-
-#include <linux/uaccess.h>
-#include <linux/pkeys.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/switch_to.h>
-#include <asm/tm.h>
-#include <asm/asm-prototypes.h>
-#include <asm/debug.h>
-#include <asm/hw_breakpoint.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
-/*
- * The parameter save area on the stack is used to store arguments being passed
- * to callee function and is located at fixed offset from stack pointer.
- */
-#ifdef CONFIG_PPC32
-#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */
-#else /* CONFIG_PPC32 */
-#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */
-#endif
-
-struct pt_regs_offset {
- const char *name;
- int offset;
-};
-
-#define STR(s) #s /* convert to string */
-#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
-#define GPR_OFFSET_NAME(num) \
- {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \
- {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
-#define REG_OFFSET_END {.name = NULL, .offset = 0}
-
-#define TVSO(f) (offsetof(struct thread_vr_state, f))
-#define TFSO(f) (offsetof(struct thread_fp_state, f))
-#define TSO(f) (offsetof(struct thread_struct, f))
-
-static const struct pt_regs_offset regoffset_table[] = {
- GPR_OFFSET_NAME(0),
- GPR_OFFSET_NAME(1),
- GPR_OFFSET_NAME(2),
- GPR_OFFSET_NAME(3),
- GPR_OFFSET_NAME(4),
- GPR_OFFSET_NAME(5),
- GPR_OFFSET_NAME(6),
- GPR_OFFSET_NAME(7),
- GPR_OFFSET_NAME(8),
- GPR_OFFSET_NAME(9),
- GPR_OFFSET_NAME(10),
- GPR_OFFSET_NAME(11),
- GPR_OFFSET_NAME(12),
- GPR_OFFSET_NAME(13),
- GPR_OFFSET_NAME(14),
- GPR_OFFSET_NAME(15),
- GPR_OFFSET_NAME(16),
- GPR_OFFSET_NAME(17),
- GPR_OFFSET_NAME(18),
- GPR_OFFSET_NAME(19),
- GPR_OFFSET_NAME(20),
- GPR_OFFSET_NAME(21),
- GPR_OFFSET_NAME(22),
- GPR_OFFSET_NAME(23),
- GPR_OFFSET_NAME(24),
- GPR_OFFSET_NAME(25),
- GPR_OFFSET_NAME(26),
- GPR_OFFSET_NAME(27),
- GPR_OFFSET_NAME(28),
- GPR_OFFSET_NAME(29),
- GPR_OFFSET_NAME(30),
- GPR_OFFSET_NAME(31),
- REG_OFFSET_NAME(nip),
- REG_OFFSET_NAME(msr),
- REG_OFFSET_NAME(ctr),
- REG_OFFSET_NAME(link),
- REG_OFFSET_NAME(xer),
- REG_OFFSET_NAME(ccr),
-#ifdef CONFIG_PPC64
- REG_OFFSET_NAME(softe),
-#else
- REG_OFFSET_NAME(mq),
-#endif
- REG_OFFSET_NAME(trap),
- REG_OFFSET_NAME(dar),
- REG_OFFSET_NAME(dsisr),
- REG_OFFSET_END,
-};
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static void flush_tmregs_to_thread(struct task_struct *tsk)
-{
- /*
- * If task is not current, it will have been flushed already to
- * it's thread_struct during __switch_to().
- *
- * A reclaim flushes ALL the state or if not in TM save TM SPRs
- * in the appropriate thread structures from live.
- */
-
- if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current))
- return;
-
- if (MSR_TM_SUSPENDED(mfmsr())) {
- tm_reclaim_current(TM_CAUSE_SIGNAL);
- } else {
- tm_enable();
- tm_save_sprs(&(tsk->thread));
- }
-}
-#else
-static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
-#endif
-
-/**
- * regs_query_register_offset() - query register offset from its name
- * @name: the name of a register
- *
- * regs_query_register_offset() returns the offset of a register in struct
- * pt_regs from its name. If the name is invalid, this returns -EINVAL;
- */
-int regs_query_register_offset(const char *name)
-{
- const struct pt_regs_offset *roff;
- for (roff = regoffset_table; roff->name != NULL; roff++)
- if (!strcmp(roff->name, name))
- return roff->offset;
- return -EINVAL;
-}
-
-/**
- * regs_query_register_name() - query register name from its offset
- * @offset: the offset of a register in struct pt_regs.
- *
- * regs_query_register_name() returns the name of a register from its
- * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
- */
-const char *regs_query_register_name(unsigned int offset)
-{
- const struct pt_regs_offset *roff;
- for (roff = regoffset_table; roff->name != NULL; roff++)
- if (roff->offset == offset)
- return roff->name;
- return NULL;
-}
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-/*
- * Set of msr bits that gdb can change on behalf of a process.
- */
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-#define MSR_DEBUGCHANGE 0
-#else
-#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE)
-#endif
-
-/*
- * Max register writeable via put_reg
- */
-#ifdef CONFIG_PPC32
-#define PT_MAX_PUT_REG PT_MQ
-#else
-#define PT_MAX_PUT_REG PT_CCR
-#endif
-
-static unsigned long get_user_msr(struct task_struct *task)
-{
- return task->thread.regs->msr | task->thread.fpexc_mode;
-}
-
-static int set_user_msr(struct task_struct *task, unsigned long msr)
-{
- task->thread.regs->msr &= ~MSR_DEBUGCHANGE;
- task->thread.regs->msr |= msr & MSR_DEBUGCHANGE;
- return 0;
-}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static unsigned long get_user_ckpt_msr(struct task_struct *task)
-{
- return task->thread.ckpt_regs.msr | task->thread.fpexc_mode;
-}
-
-static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr)
-{
- task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE;
- task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE;
- return 0;
-}
-
-static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap)
-{
- task->thread.ckpt_regs.trap = trap & 0xfff0;
- return 0;
-}
-#endif
-
-#ifdef CONFIG_PPC64
-static int get_user_dscr(struct task_struct *task, unsigned long *data)
-{
- *data = task->thread.dscr;
- return 0;
-}
-
-static int set_user_dscr(struct task_struct *task, unsigned long dscr)
-{
- task->thread.dscr = dscr;
- task->thread.dscr_inherit = 1;
- return 0;
-}
-#else
-static int get_user_dscr(struct task_struct *task, unsigned long *data)
-{
- return -EIO;
-}
-
-static int set_user_dscr(struct task_struct *task, unsigned long dscr)
-{
- return -EIO;
-}
-#endif
-
-/*
- * We prevent mucking around with the reserved area of trap
- * which are used internally by the kernel.
- */
-static int set_user_trap(struct task_struct *task, unsigned long trap)
-{
- task->thread.regs->trap = trap & 0xfff0;
- return 0;
-}
-
-/*
- * Get contents of register REGNO in task TASK.
- */
-int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
-{
- unsigned int regs_max;
-
- if ((task->thread.regs == NULL) || !data)
- return -EIO;
-
- if (regno == PT_MSR) {
- *data = get_user_msr(task);
- return 0;
- }
-
- if (regno == PT_DSCR)
- return get_user_dscr(task, data);
-
-#ifdef CONFIG_PPC64
- /*
- * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
- * no more used as a flag, lets force usr to alway see the softe value as 1
- * which means interrupts are not soft disabled.
- */
- if (regno == PT_SOFTE) {
- *data = 1;
- return 0;
- }
-#endif
-
- regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long);
- if (regno < regs_max) {
- regno = array_index_nospec(regno, regs_max);
- *data = ((unsigned long *)task->thread.regs)[regno];
- return 0;
- }
-
- return -EIO;
-}
-
-/*
- * Write contents of register REGNO in task TASK.
- */
-int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
-{
- if (task->thread.regs == NULL)
- return -EIO;
-
- if (regno == PT_MSR)
- return set_user_msr(task, data);
- if (regno == PT_TRAP)
- return set_user_trap(task, data);
- if (regno == PT_DSCR)
- return set_user_dscr(task, data);
-
- if (regno <= PT_MAX_PUT_REG) {
- regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1);
- ((unsigned long *)task->thread.regs)[regno] = data;
- return 0;
- }
- return -EIO;
-}
-
-static int gpr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int i, ret;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- if (!FULL_REGS(target->thread.regs)) {
- /* We have a partial register set. Fill 14-31 with bogus values */
- for (i = 14; i < 32; i++)
- target->thread.regs->gpr[i] = NV_REG_POISON;
- }
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- target->thread.regs,
- 0, offsetof(struct pt_regs, msr));
- if (!ret) {
- unsigned long msr = get_user_msr(target);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
- offsetof(struct pt_regs, msr),
- offsetof(struct pt_regs, msr) +
- sizeof(msr));
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->orig_gpr3,
- offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct user_pt_regs));
- if (!ret)
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct user_pt_regs), -1);
-
- return ret;
-}
-
-static int gpr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- unsigned long reg;
- int ret;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- CHECK_FULL_REGS(target->thread.regs);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.regs,
- 0, PT_MSR * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_MSR * sizeof(reg),
- (PT_MSR + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_msr(target, reg);
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->orig_gpr3,
- PT_ORIG_R3 * sizeof(reg),
- (PT_MAX_PUT_REG + 1) * sizeof(reg));
-
- if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_MAX_PUT_REG + 1) * sizeof(reg),
- PT_TRAP * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_TRAP * sizeof(reg),
- (PT_TRAP + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_trap(target, reg);
- }
-
- if (!ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_TRAP + 1) * sizeof(reg), -1);
-
- return ret;
-}
-
-/*
- * Regardless of transactions, 'fp_state' holds the current running
- * value of all FPR registers and 'ckfp_state' holds the last checkpointed
- * value of all FPR registers for the current transaction.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * u64 fpr[32];
- * u64 fpscr;
- * };
- */
-static int fpr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
-#ifdef CONFIG_VSX
- u64 buf[33];
- int i;
-
- flush_fp_to_thread(target);
-
- /* copy to local buffer then write that out */
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_FPR(i);
- buf[32] = target->thread.fp_state.fpscr;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
-#else
- BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
- offsetof(struct thread_fp_state, fpr[32]));
-
- flush_fp_to_thread(target);
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fp_state, 0, -1);
-#endif
-}
-
-/*
- * Regardless of transactions, 'fp_state' holds the current running
- * value of all FPR registers and 'ckfp_state' holds the last checkpointed
- * value of all FPR registers for the current transaction.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * u64 fpr[32];
- * u64 fpscr;
- * };
- *
- */
-static int fpr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
-#ifdef CONFIG_VSX
- u64 buf[33];
- int i;
-
- flush_fp_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_FPR(i);
- buf[32] = target->thread.fp_state.fpscr;
-
- /* copy to local buffer then write that out */
- i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
- if (i)
- return i;
-
- for (i = 0; i < 32 ; i++)
- target->thread.TS_FPR(i) = buf[i];
- target->thread.fp_state.fpscr = buf[32];
- return 0;
-#else
- BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
- offsetof(struct thread_fp_state, fpr[32]));
-
- flush_fp_to_thread(target);
-
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fp_state, 0, -1);
-#endif
-}
-
-#ifdef CONFIG_ALTIVEC
-/*
- * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
- * The transfer totals 34 quadword. Quadwords 0-31 contain the
- * corresponding vector registers. Quadword 32 contains the vscr as the
- * last word (offset 12) within that quadword. Quadword 33 contains the
- * vrsave as the first word (offset 0) within the quadword.
- *
- * This definition of the VMX state is compatible with the current PPC32
- * ptrace interface. This allows signal handling and ptrace to use the
- * same structures. This also simplifies the implementation of a bi-arch
- * (combined (32- and 64-bit) gdb.
- */
-
-static int vr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- flush_altivec_to_thread(target);
- return target->thread.used_vr ? regset->n : 0;
-}
-
-/*
- * Regardless of transactions, 'vr_state' holds the current running
- * value of all the VMX registers and 'ckvr_state' holds the last
- * checkpointed value of all the VMX registers for the current
- * transaction to fall back on in case it aborts.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * vector128 vr[32];
- * vector128 vscr;
- * vector128 vrsave;
- * };
- */
-static int vr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- flush_altivec_to_thread(target);
-
- BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
- offsetof(struct thread_vr_state, vr[32]));
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.vr_state, 0,
- 33 * sizeof(vector128));
- if (!ret) {
- /*
- * Copy out only the low-order word of vrsave.
- */
- int start, end;
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
-
- vrsave.word = target->thread.vrsave;
-
- start = 33 * sizeof(vector128);
- end = start + sizeof(vrsave);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
- start, end);
- }
-
- return ret;
-}
-
-/*
- * Regardless of transactions, 'vr_state' holds the current running
- * value of all the VMX registers and 'ckvr_state' holds the last
- * checkpointed value of all the VMX registers for the current
- * transaction to fall back on in case it aborts.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * vector128 vr[32];
- * vector128 vscr;
- * vector128 vrsave;
- * };
- */
-static int vr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- flush_altivec_to_thread(target);
-
- BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
- offsetof(struct thread_vr_state, vr[32]));
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.vr_state, 0,
- 33 * sizeof(vector128));
- if (!ret && count > 0) {
- /*
- * We use only the first word of vrsave.
- */
- int start, end;
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
-
- vrsave.word = target->thread.vrsave;
-
- start = 33 * sizeof(vector128);
- end = start + sizeof(vrsave);
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
- start, end);
- if (!ret)
- target->thread.vrsave = vrsave.word;
- }
-
- return ret;
-}
-#endif /* CONFIG_ALTIVEC */
-
-#ifdef CONFIG_VSX
-/*
- * Currently to set and and get all the vsx state, you need to call
- * the fp and VMX calls as well. This only get/sets the lower 32
- * 128bit VSX registers.
- */
-
-static int vsr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- flush_vsx_to_thread(target);
- return target->thread.used_vsr ? regset->n : 0;
-}
-
-/*
- * Regardless of transactions, 'fp_state' holds the current running
- * value of all FPR registers and 'ckfp_state' holds the last
- * checkpointed value of all FPR registers for the current
- * transaction.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * u64 vsx[32];
- * };
- */
-static int vsr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- u64 buf[32];
- int ret, i;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
-
- return ret;
-}
-
-/*
- * Regardless of transactions, 'fp_state' holds the current running
- * value of all FPR registers and 'ckfp_state' holds the last
- * checkpointed value of all FPR registers for the current
- * transaction.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * u64 vsx[32];
- * };
- */
-static int vsr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 buf[32];
- int ret,i;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
- if (!ret)
- for (i = 0; i < 32 ; i++)
- target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
-
- return ret;
-}
-#endif /* CONFIG_VSX */
-
-#ifdef CONFIG_SPE
-
-/*
- * For get_evrregs/set_evrregs functions 'data' has the following layout:
- *
- * struct {
- * u32 evr[32];
- * u64 acc;
- * u32 spefscr;
- * }
- */
-
-static int evr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- flush_spe_to_thread(target);
- return target->thread.used_spe ? regset->n : 0;
-}
-
-static int evr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- flush_spe_to_thread(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.evr,
- 0, sizeof(target->thread.evr));
-
- BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
- offsetof(struct thread_struct, spefscr));
-
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.acc,
- sizeof(target->thread.evr), -1);
-
- return ret;
-}
-
-static int evr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- flush_spe_to_thread(target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.evr,
- 0, sizeof(target->thread.evr));
-
- BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
- offsetof(struct thread_struct, spefscr));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.acc,
- sizeof(target->thread.evr), -1);
-
- return ret;
-}
-#endif /* CONFIG_SPE */
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/**
- * tm_cgpr_active - get active number of registers in CGPR
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks for the active number of available
- * regisers in transaction checkpointed GPR category.
- */
-static int tm_cgpr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return 0;
-
- return regset->n;
-}
-
-/**
- * tm_cgpr_get - get CGPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets transaction checkpointed GPR registers.
- *
- * When the transaction is active, 'ckpt_regs' holds all the checkpointed
- * GPR register values for the current transaction to fall back on if it
- * aborts in between. This function gets those checkpointed GPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct data {
- * struct pt_regs ckpt_regs;
- * };
- */
-static int tm_cgpr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckpt_regs,
- 0, offsetof(struct pt_regs, msr));
- if (!ret) {
- unsigned long msr = get_user_ckpt_msr(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
- offsetof(struct pt_regs, msr),
- offsetof(struct pt_regs, msr) +
- sizeof(msr));
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckpt_regs.orig_gpr3,
- offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct user_pt_regs));
- if (!ret)
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct user_pt_regs), -1);
-
- return ret;
-}
-
-/*
- * tm_cgpr_set - set the CGPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets in transaction checkpointed GPR registers.
- *
- * When the transaction is active, 'ckpt_regs' holds the checkpointed
- * GPR register values for the current transaction to fall back on if it
- * aborts in between. This function sets those checkpointed GPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct data {
- * struct pt_regs ckpt_regs;
- * };
- */
-static int tm_cgpr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- unsigned long reg;
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckpt_regs,
- 0, PT_MSR * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_MSR * sizeof(reg),
- (PT_MSR + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_ckpt_msr(target, reg);
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckpt_regs.orig_gpr3,
- PT_ORIG_R3 * sizeof(reg),
- (PT_MAX_PUT_REG + 1) * sizeof(reg));
-
- if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_MAX_PUT_REG + 1) * sizeof(reg),
- PT_TRAP * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_TRAP * sizeof(reg),
- (PT_TRAP + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_ckpt_trap(target, reg);
- }
-
- if (!ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_TRAP + 1) * sizeof(reg), -1);
-
- return ret;
-}
-
-/**
- * tm_cfpr_active - get active number of registers in CFPR
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks for the active number of available
- * regisers in transaction checkpointed FPR category.
- */
-static int tm_cfpr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return 0;
-
- return regset->n;
-}
-
-/**
- * tm_cfpr_get - get CFPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets in transaction checkpointed FPR registers.
- *
- * When the transaction is active 'ckfp_state' holds the checkpointed
- * values for the current transaction to fall back on if it aborts
- * in between. This function gets those checkpointed FPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct data {
- * u64 fpr[32];
- * u64 fpscr;
- *};
- */
-static int tm_cfpr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- u64 buf[33];
- int i;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- /* copy to local buffer then write that out */
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_CKFPR(i);
- buf[32] = target->thread.ckfp_state.fpscr;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
-}
-
-/**
- * tm_cfpr_set - set CFPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets in transaction checkpointed FPR registers.
- *
- * When the transaction is active 'ckfp_state' holds the checkpointed
- * FPR register values for the current transaction to fall back on
- * if it aborts in between. This function sets these checkpointed
- * FPR registers. The userspace interface buffer layout is as follows.
- *
- * struct data {
- * u64 fpr[32];
- * u64 fpscr;
- *};
- */
-static int tm_cfpr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 buf[33];
- int i;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- for (i = 0; i < 32; i++)
- buf[i] = target->thread.TS_CKFPR(i);
- buf[32] = target->thread.ckfp_state.fpscr;
-
- /* copy to local buffer then write that out */
- i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
- if (i)
- return i;
- for (i = 0; i < 32 ; i++)
- target->thread.TS_CKFPR(i) = buf[i];
- target->thread.ckfp_state.fpscr = buf[32];
- return 0;
-}
-
-/**
- * tm_cvmx_active - get active number of registers in CVMX
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks for the active number of available
- * regisers in checkpointed VMX category.
- */
-static int tm_cvmx_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return 0;
-
- return regset->n;
-}
-
-/**
- * tm_cvmx_get - get CMVX registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets in transaction checkpointed VMX registers.
- *
- * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
- * the checkpointed values for the current transaction to fall
- * back on if it aborts in between. The userspace interface buffer
- * layout is as follows.
- *
- * struct data {
- * vector128 vr[32];
- * vector128 vscr;
- * vector128 vrsave;
- *};
- */
-static int tm_cvmx_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- /* Flush the state */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckvr_state, 0,
- 33 * sizeof(vector128));
- if (!ret) {
- /*
- * Copy out only the low-order word of vrsave.
- */
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
- vrsave.word = target->thread.ckvrsave;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
- }
-
- return ret;
-}
-
-/**
- * tm_cvmx_set - set CMVX registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets in transaction checkpointed VMX registers.
- *
- * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
- * the checkpointed values for the current transaction to fall
- * back on if it aborts in between. The userspace interface buffer
- * layout is as follows.
- *
- * struct data {
- * vector128 vr[32];
- * vector128 vscr;
- * vector128 vrsave;
- *};
- */
-static int tm_cvmx_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckvr_state, 0,
- 33 * sizeof(vector128));
- if (!ret && count > 0) {
- /*
- * We use only the low-order word of vrsave.
- */
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
- vrsave.word = target->thread.ckvrsave;
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
- if (!ret)
- target->thread.ckvrsave = vrsave.word;
- }
-
- return ret;
-}
-
-/**
- * tm_cvsx_active - get active number of registers in CVSX
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks for the active number of available
- * regisers in transaction checkpointed VSX category.
- */
-static int tm_cvsx_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return 0;
-
- flush_vsx_to_thread(target);
- return target->thread.used_vsr ? regset->n : 0;
-}
-
-/**
- * tm_cvsx_get - get CVSX registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets in transaction checkpointed VSX registers.
- *
- * When the transaction is active 'ckfp_state' holds the checkpointed
- * values for the current transaction to fall back on if it aborts
- * in between. This function gets those checkpointed VSX registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct data {
- * u64 vsx[32];
- *};
- */
-static int tm_cvsx_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- u64 buf[32];
- int ret, i;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- /* Flush the state */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
-
- return ret;
-}
-
-/**
- * tm_cvsx_set - set CFPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets in transaction checkpointed VSX registers.
- *
- * When the transaction is active 'ckfp_state' holds the checkpointed
- * VSX register values for the current transaction to fall back on
- * if it aborts in between. This function sets these checkpointed
- * FPR registers. The userspace interface buffer layout is as follows.
- *
- * struct data {
- * u64 vsx[32];
- *};
- */
-static int tm_cvsx_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 buf[32];
- int ret, i;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- /* Flush the state */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
- if (!ret)
- for (i = 0; i < 32 ; i++)
- target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
-
- return ret;
-}
-
-/**
- * tm_spr_active - get active number of registers in TM SPR
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks the active number of available
- * regisers in the transactional memory SPR category.
- */
-static int tm_spr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- return regset->n;
-}
-
-/**
- * tm_spr_get - get the TM related SPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets transactional memory related SPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct {
- * u64 tm_tfhar;
- * u64 tm_texasr;
- * u64 tm_tfiar;
- * };
- */
-static int tm_spr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- /* Build tests */
- BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
- BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
- BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- /* Flush the states */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- /* TFHAR register */
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tfhar, 0, sizeof(u64));
-
- /* TEXASR register */
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_texasr, sizeof(u64),
- 2 * sizeof(u64));
-
- /* TFIAR register */
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tfiar,
- 2 * sizeof(u64), 3 * sizeof(u64));
- return ret;
-}
-
-/**
- * tm_spr_set - set the TM related SPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets transactional memory related SPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct {
- * u64 tm_tfhar;
- * u64 tm_texasr;
- * u64 tm_tfiar;
- * };
- */
-static int tm_spr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- /* Build tests */
- BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
- BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
- BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- /* Flush the states */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- /* TFHAR register */
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tfhar, 0, sizeof(u64));
-
- /* TEXASR register */
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_texasr, sizeof(u64),
- 2 * sizeof(u64));
-
- /* TFIAR register */
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tfiar,
- 2 * sizeof(u64), 3 * sizeof(u64));
- return ret;
-}
-
-static int tm_tar_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (MSR_TM_ACTIVE(target->thread.regs->msr))
- return regset->n;
-
- return 0;
-}
-
-static int tm_tar_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tar, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_tar_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tar, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_ppr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (MSR_TM_ACTIVE(target->thread.regs->msr))
- return regset->n;
-
- return 0;
-}
-
-
-static int tm_ppr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_ppr, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_ppr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_ppr, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_dscr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (MSR_TM_ACTIVE(target->thread.regs->msr))
- return regset->n;
-
- return 0;
-}
-
-static int tm_dscr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_dscr, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_dscr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_dscr, 0, sizeof(u64));
- return ret;
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
-#ifdef CONFIG_PPC64
-static int ppr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->ppr, 0, sizeof(u64));
-}
-
-static int ppr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->ppr, 0, sizeof(u64));
-}
-
-static int dscr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.dscr, 0, sizeof(u64));
-}
-static int dscr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.dscr, 0, sizeof(u64));
-}
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
-static int tar_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tar, 0, sizeof(u64));
-}
-static int tar_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tar, 0, sizeof(u64));
-}
-
-static int ebb_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- if (target->thread.used_ebb)
- return regset->n;
-
- return 0;
-}
-
-static int ebb_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- /* Build tests */
- BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
- BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- if (!target->thread.used_ebb)
- return -ENODATA;
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ebbrr, 0, 3 * sizeof(unsigned long));
-}
-
-static int ebb_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret = 0;
-
- /* Build tests */
- BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
- BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- if (target->thread.used_ebb)
- return -ENODATA;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ebbrr, 0, sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ebbhr, sizeof(unsigned long),
- 2 * sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.bescr,
- 2 * sizeof(unsigned long), 3 * sizeof(unsigned long));
-
- return ret;
-}
-static int pmu_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- return regset->n;
-}
-
-static int pmu_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- /* Build tests */
- BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
- BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
- BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
- BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.siar, 0,
- 5 * sizeof(unsigned long));
-}
-
-static int pmu_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret = 0;
-
- /* Build tests */
- BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
- BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
- BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
- BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.siar, 0,
- sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.sdar, sizeof(unsigned long),
- 2 * sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.sier, 2 * sizeof(unsigned long),
- 3 * sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.mmcr2, 3 * sizeof(unsigned long),
- 4 * sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.mmcr0, 4 * sizeof(unsigned long),
- 5 * sizeof(unsigned long));
- return ret;
-}
-#endif
-
-#ifdef CONFIG_PPC_MEM_KEYS
-static int pkey_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!arch_pkeys_enabled())
- return -ENODEV;
-
- return regset->n;
-}
-
-static int pkey_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr));
- BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor));
-
- if (!arch_pkeys_enabled())
- return -ENODEV;
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.amr, 0,
- ELF_NPKEY * sizeof(unsigned long));
-}
-
-static int pkey_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 new_amr;
- int ret;
-
- if (!arch_pkeys_enabled())
- return -ENODEV;
-
- /* Only the AMR can be set from userspace */
- if (pos != 0 || count != sizeof(new_amr))
- return -EINVAL;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &new_amr, 0, sizeof(new_amr));
- if (ret)
- return ret;
-
- /* UAMOR determines which bits of the AMR can be set from userspace. */
- target->thread.amr = (new_amr & target->thread.uamor) |
- (target->thread.amr & ~target->thread.uamor);
-
- return 0;
-}
-#endif /* CONFIG_PPC_MEM_KEYS */
-
-/*
- * These are our native regset flavors.
- */
-enum powerpc_regset {
- REGSET_GPR,
- REGSET_FPR,
-#ifdef CONFIG_ALTIVEC
- REGSET_VMX,
-#endif
-#ifdef CONFIG_VSX
- REGSET_VSX,
-#endif
-#ifdef CONFIG_SPE
- REGSET_SPE,
-#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- REGSET_TM_CGPR, /* TM checkpointed GPR registers */
- REGSET_TM_CFPR, /* TM checkpointed FPR registers */
- REGSET_TM_CVMX, /* TM checkpointed VMX registers */
- REGSET_TM_CVSX, /* TM checkpointed VSX registers */
- REGSET_TM_SPR, /* TM specific SPR registers */
- REGSET_TM_CTAR, /* TM checkpointed TAR register */
- REGSET_TM_CPPR, /* TM checkpointed PPR register */
- REGSET_TM_CDSCR, /* TM checkpointed DSCR register */
-#endif
-#ifdef CONFIG_PPC64
- REGSET_PPR, /* PPR register */
- REGSET_DSCR, /* DSCR register */
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- REGSET_TAR, /* TAR register */
- REGSET_EBB, /* EBB registers */
- REGSET_PMR, /* Performance Monitor Registers */
-#endif
-#ifdef CONFIG_PPC_MEM_KEYS
- REGSET_PKEY, /* AMR register */
-#endif
-};
-
-static const struct user_regset native_regsets[] = {
- [REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
- .size = sizeof(long), .align = sizeof(long),
- .get = gpr_get, .set = gpr_set
- },
- [REGSET_FPR] = {
- .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .get = fpr_get, .set = fpr_set
- },
-#ifdef CONFIG_ALTIVEC
- [REGSET_VMX] = {
- .core_note_type = NT_PPC_VMX, .n = 34,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = vr_active, .get = vr_get, .set = vr_set
- },
-#endif
-#ifdef CONFIG_VSX
- [REGSET_VSX] = {
- .core_note_type = NT_PPC_VSX, .n = 32,
- .size = sizeof(double), .align = sizeof(double),
- .active = vsr_active, .get = vsr_get, .set = vsr_set
- },
-#endif
-#ifdef CONFIG_SPE
- [REGSET_SPE] = {
- .core_note_type = NT_PPC_SPE, .n = 35,
- .size = sizeof(u32), .align = sizeof(u32),
- .active = evr_active, .get = evr_get, .set = evr_set
- },
-#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- [REGSET_TM_CGPR] = {
- .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
- .size = sizeof(long), .align = sizeof(long),
- .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set
- },
- [REGSET_TM_CFPR] = {
- .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
- },
- [REGSET_TM_CVMX] = {
- .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
- },
- [REGSET_TM_CVSX] = {
- .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
- .size = sizeof(double), .align = sizeof(double),
- .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
- },
- [REGSET_TM_SPR] = {
- .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
- },
- [REGSET_TM_CTAR] = {
- .core_note_type = NT_PPC_TM_CTAR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
- },
- [REGSET_TM_CPPR] = {
- .core_note_type = NT_PPC_TM_CPPR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
- },
- [REGSET_TM_CDSCR] = {
- .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
- },
-#endif
-#ifdef CONFIG_PPC64
- [REGSET_PPR] = {
- .core_note_type = NT_PPC_PPR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = ppr_get, .set = ppr_set
- },
- [REGSET_DSCR] = {
- .core_note_type = NT_PPC_DSCR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = dscr_get, .set = dscr_set
- },
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- [REGSET_TAR] = {
- .core_note_type = NT_PPC_TAR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = tar_get, .set = tar_set
- },
- [REGSET_EBB] = {
- .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = ebb_active, .get = ebb_get, .set = ebb_set
- },
- [REGSET_PMR] = {
- .core_note_type = NT_PPC_PMU, .n = ELF_NPMU,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = pmu_active, .get = pmu_get, .set = pmu_set
- },
-#endif
-#ifdef CONFIG_PPC_MEM_KEYS
- [REGSET_PKEY] = {
- .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = pkey_active, .get = pkey_get, .set = pkey_set
- },
-#endif
-};
-
-static const struct user_regset_view user_ppc_native_view = {
- .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
- .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
-};
-
-#ifdef CONFIG_PPC64
-#include <linux/compat.h>
-
-static int gpr32_get_common(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf,
- unsigned long *regs)
-{
- compat_ulong_t *k = kbuf;
- compat_ulong_t __user *u = ubuf;
- compat_ulong_t reg;
-
- pos /= sizeof(reg);
- count /= sizeof(reg);
-
- if (kbuf)
- for (; count > 0 && pos < PT_MSR; --count)
- *k++ = regs[pos++];
- else
- for (; count > 0 && pos < PT_MSR; --count)
- if (__put_user((compat_ulong_t) regs[pos++], u++))
- return -EFAULT;
-
- if (count > 0 && pos == PT_MSR) {
- reg = get_user_msr(target);
- if (kbuf)
- *k++ = reg;
- else if (__put_user(reg, u++))
- return -EFAULT;
- ++pos;
- --count;
- }
-
- if (kbuf)
- for (; count > 0 && pos < PT_REGS_COUNT; --count)
- *k++ = regs[pos++];
- else
- for (; count > 0 && pos < PT_REGS_COUNT; --count)
- if (__put_user((compat_ulong_t) regs[pos++], u++))
- return -EFAULT;
-
- kbuf = k;
- ubuf = u;
- pos *= sizeof(reg);
- count *= sizeof(reg);
- return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- PT_REGS_COUNT * sizeof(reg), -1);
-}
-
-static int gpr32_set_common(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf,
- unsigned long *regs)
-{
- const compat_ulong_t *k = kbuf;
- const compat_ulong_t __user *u = ubuf;
- compat_ulong_t reg;
-
- pos /= sizeof(reg);
- count /= sizeof(reg);
-
- if (kbuf)
- for (; count > 0 && pos < PT_MSR; --count)
- regs[pos++] = *k++;
- else
- for (; count > 0 && pos < PT_MSR; --count) {
- if (__get_user(reg, u++))
- return -EFAULT;
- regs[pos++] = reg;
- }
-
-
- if (count > 0 && pos == PT_MSR) {
- if (kbuf)
- reg = *k++;
- else if (__get_user(reg, u++))
- return -EFAULT;
- set_user_msr(target, reg);
- ++pos;
- --count;
- }
-
- if (kbuf) {
- for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
- regs[pos++] = *k++;
- for (; count > 0 && pos < PT_TRAP; --count, ++pos)
- ++k;
- } else {
- for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
- if (__get_user(reg, u++))
- return -EFAULT;
- regs[pos++] = reg;
- }
- for (; count > 0 && pos < PT_TRAP; --count, ++pos)
- if (__get_user(reg, u++))
- return -EFAULT;
- }
-
- if (count > 0 && pos == PT_TRAP) {
- if (kbuf)
- reg = *k++;
- else if (__get_user(reg, u++))
- return -EFAULT;
- set_user_trap(target, reg);
- ++pos;
- --count;
- }
-
- kbuf = k;
- ubuf = u;
- pos *= sizeof(reg);
- count *= sizeof(reg);
- return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
- (PT_TRAP + 1) * sizeof(reg), -1);
-}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static int tm_cgpr32_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return gpr32_get_common(target, regset, pos, count, kbuf, ubuf,
- &target->thread.ckpt_regs.gpr[0]);
-}
-
-static int tm_cgpr32_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
- &target->thread.ckpt_regs.gpr[0]);
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
-static int gpr32_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int i;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- if (!FULL_REGS(target->thread.regs)) {
- /*
- * We have a partial register set.
- * Fill 14-31 with bogus values.
- */
- for (i = 14; i < 32; i++)
- target->thread.regs->gpr[i] = NV_REG_POISON;
- }
- return gpr32_get_common(target, regset, pos, count, kbuf, ubuf,
- &target->thread.regs->gpr[0]);
-}
-
-static int gpr32_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- if (target->thread.regs == NULL)
- return -EIO;
-
- CHECK_FULL_REGS(target->thread.regs);
- return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
- &target->thread.regs->gpr[0]);
-}
-
-/*
- * These are the regset flavors matching the CONFIG_PPC32 native set.
- */
-static const struct user_regset compat_regsets[] = {
- [REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
- .size = sizeof(compat_long_t), .align = sizeof(compat_long_t),
- .get = gpr32_get, .set = gpr32_set
- },
- [REGSET_FPR] = {
- .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .get = fpr_get, .set = fpr_set
- },
-#ifdef CONFIG_ALTIVEC
- [REGSET_VMX] = {
- .core_note_type = NT_PPC_VMX, .n = 34,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = vr_active, .get = vr_get, .set = vr_set
- },
-#endif
-#ifdef CONFIG_SPE
- [REGSET_SPE] = {
- .core_note_type = NT_PPC_SPE, .n = 35,
- .size = sizeof(u32), .align = sizeof(u32),
- .active = evr_active, .get = evr_get, .set = evr_set
- },
-#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- [REGSET_TM_CGPR] = {
- .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
- .size = sizeof(long), .align = sizeof(long),
- .active = tm_cgpr_active,
- .get = tm_cgpr32_get, .set = tm_cgpr32_set
- },
- [REGSET_TM_CFPR] = {
- .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
- },
- [REGSET_TM_CVMX] = {
- .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
- },
- [REGSET_TM_CVSX] = {
- .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
- .size = sizeof(double), .align = sizeof(double),
- .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
- },
- [REGSET_TM_SPR] = {
- .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
- },
- [REGSET_TM_CTAR] = {
- .core_note_type = NT_PPC_TM_CTAR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
- },
- [REGSET_TM_CPPR] = {
- .core_note_type = NT_PPC_TM_CPPR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
- },
- [REGSET_TM_CDSCR] = {
- .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
- },
-#endif
-#ifdef CONFIG_PPC64
- [REGSET_PPR] = {
- .core_note_type = NT_PPC_PPR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = ppr_get, .set = ppr_set
- },
- [REGSET_DSCR] = {
- .core_note_type = NT_PPC_DSCR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = dscr_get, .set = dscr_set
- },
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- [REGSET_TAR] = {
- .core_note_type = NT_PPC_TAR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = tar_get, .set = tar_set
- },
- [REGSET_EBB] = {
- .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = ebb_active, .get = ebb_get, .set = ebb_set
- },
-#endif
-};
-
-static const struct user_regset_view user_ppc_compat_view = {
- .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI,
- .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets)
-};
-#endif /* CONFIG_PPC64 */
-
-const struct user_regset_view *task_user_regset_view(struct task_struct *task)
-{
-#ifdef CONFIG_PPC64
- if (test_tsk_thread_flag(task, TIF_32BIT))
- return &user_ppc_compat_view;
-#endif
- return &user_ppc_native_view;
-}
-
-
-void user_enable_single_step(struct task_struct *task)
-{
- struct pt_regs *regs = task->thread.regs;
-
- if (regs != NULL) {
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- task->thread.debug.dbcr0 &= ~DBCR0_BT;
- task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
- regs->msr |= MSR_DE;
-#else
- regs->msr &= ~MSR_BE;
- regs->msr |= MSR_SE;
-#endif
- }
- set_tsk_thread_flag(task, TIF_SINGLESTEP);
-}
-
-void user_enable_block_step(struct task_struct *task)
-{
- struct pt_regs *regs = task->thread.regs;
-
- if (regs != NULL) {
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- task->thread.debug.dbcr0 &= ~DBCR0_IC;
- task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
- regs->msr |= MSR_DE;
-#else
- regs->msr &= ~MSR_SE;
- regs->msr |= MSR_BE;
-#endif
- }
- set_tsk_thread_flag(task, TIF_SINGLESTEP);
-}
-
-void user_disable_single_step(struct task_struct *task)
-{
- struct pt_regs *regs = task->thread.regs;
-
- if (regs != NULL) {
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- /*
- * The logic to disable single stepping should be as
- * simple as turning off the Instruction Complete flag.
- * And, after doing so, if all debug flags are off, turn
- * off DBCR0(IDM) and MSR(DE) .... Torez
- */
- task->thread.debug.dbcr0 &= ~(DBCR0_IC|DBCR0_BT);
- /*
- * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
- */
- if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
- task->thread.debug.dbcr1)) {
- /*
- * All debug events were off.....
- */
- task->thread.debug.dbcr0 &= ~DBCR0_IDM;
- regs->msr &= ~MSR_DE;
- }
-#else
- regs->msr &= ~(MSR_SE | MSR_BE);
-#endif
- }
- clear_tsk_thread_flag(task, TIF_SINGLESTEP);
-}
-
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
-void ptrace_triggered(struct perf_event *bp,
- struct perf_sample_data *data, struct pt_regs *regs)
-{
- struct perf_event_attr attr;
-
- /*
- * Disable the breakpoint request here since ptrace has defined a
- * one-shot behaviour for breakpoint exceptions in PPC64.
- * The SIGTRAP signal is generated automatically for us in do_dabr().
- * We don't have to do anything about that here
- */
- attr = bp->attr;
- attr.disabled = true;
- modify_user_hw_breakpoint(bp, &attr);
-}
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-
-static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
- unsigned long data)
-{
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- int ret;
- struct thread_struct *thread = &(task->thread);
- struct perf_event *bp;
- struct perf_event_attr attr;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- bool set_bp = true;
- struct arch_hw_breakpoint hw_brk;
-#endif
-
- /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
- * For embedded processors we support one DAC and no IAC's at the
- * moment.
- */
- if (addr > 0)
- return -EINVAL;
-
- /* The bottom 3 bits in dabr are flags */
- if ((data & ~0x7UL) >= TASK_SIZE)
- return -EIO;
-
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
- * It was assumed, on previous implementations, that 3 bits were
- * passed together with the data address, fitting the design of the
- * DABR register, as follows:
- *
- * bit 0: Read flag
- * bit 1: Write flag
- * bit 2: Breakpoint translation
- *
- * Thus, we use them here as so.
- */
-
- /* Ensure breakpoint translation bit is set */
- if (data && !(data & HW_BRK_TYPE_TRANSLATE))
- return -EIO;
- hw_brk.address = data & (~HW_BRK_TYPE_DABR);
- hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
- hw_brk.len = DABR_MAX_LEN;
- hw_brk.hw_len = DABR_MAX_LEN;
- set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- bp = thread->ptrace_bps[0];
- if (!set_bp) {
- if (bp) {
- unregister_hw_breakpoint(bp);
- thread->ptrace_bps[0] = NULL;
- }
- return 0;
- }
- if (bp) {
- attr = bp->attr;
- attr.bp_addr = hw_brk.address;
- attr.bp_len = DABR_MAX_LEN;
- arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
-
- /* Enable breakpoint */
- attr.disabled = false;
-
- ret = modify_user_hw_breakpoint(bp, &attr);
- if (ret) {
- return ret;
- }
- thread->ptrace_bps[0] = bp;
- thread->hw_brk = hw_brk;
- return 0;
- }
-
- /* Create a new breakpoint request if one doesn't exist already */
- hw_breakpoint_init(&attr);
- attr.bp_addr = hw_brk.address;
- attr.bp_len = DABR_MAX_LEN;
- arch_bp_generic_fields(hw_brk.type,
- &attr.bp_type);
-
- thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
- ptrace_triggered, NULL, task);
- if (IS_ERR(bp)) {
- thread->ptrace_bps[0] = NULL;
- return PTR_ERR(bp);
- }
-
-#else /* !CONFIG_HAVE_HW_BREAKPOINT */
- if (set_bp && (!ppc_breakpoint_available()))
- return -ENODEV;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
- task->thread.hw_brk = hw_brk;
-#else /* CONFIG_PPC_ADV_DEBUG_REGS */
- /* As described above, it was assumed 3 bits were passed with the data
- * address, but we will assume only the mode bits will be passed
- * as to not cause alignment restrictions for DAC-based processors.
- */
-
- /* DAC's hold the whole address without any mode flags */
- task->thread.debug.dac1 = data & ~0x3UL;
-
- if (task->thread.debug.dac1 == 0) {
- dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
- if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
- task->thread.debug.dbcr1)) {
- task->thread.regs->msr &= ~MSR_DE;
- task->thread.debug.dbcr0 &= ~DBCR0_IDM;
- }
- return 0;
- }
-
- /* Read or Write bits must be set */
-
- if (!(data & 0x3UL))
- return -EINVAL;
-
- /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
- register */
- task->thread.debug.dbcr0 |= DBCR0_IDM;
-
- /* Check for write and read flags and set DBCR0
- accordingly */
- dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W);
- if (data & 0x1UL)
- dbcr_dac(task) |= DBCR_DAC1R;
- if (data & 0x2UL)
- dbcr_dac(task) |= DBCR_DAC1W;
- task->thread.regs->msr |= MSR_DE;
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
- return 0;
-}
-
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure single step bits etc are not set.
- */
-void ptrace_disable(struct task_struct *child)
-{
- /* make sure the single step bit is not set. */
- user_disable_single_step(child);
-}
-
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-static long set_instruction_bp(struct task_struct *child,
- struct ppc_hw_breakpoint *bp_info)
-{
- int slot;
- int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
- int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
- int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
- int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
-
- if (dbcr_iac_range(child) & DBCR_IAC12MODE)
- slot2_in_use = 1;
- if (dbcr_iac_range(child) & DBCR_IAC34MODE)
- slot4_in_use = 1;
-
- if (bp_info->addr >= TASK_SIZE)
- return -EIO;
-
- if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
-
- /* Make sure range is valid. */
- if (bp_info->addr2 >= TASK_SIZE)
- return -EIO;
-
- /* We need a pair of IAC regsisters */
- if ((!slot1_in_use) && (!slot2_in_use)) {
- slot = 1;
- child->thread.debug.iac1 = bp_info->addr;
- child->thread.debug.iac2 = bp_info->addr2;
- child->thread.debug.dbcr0 |= DBCR0_IAC1;
- if (bp_info->addr_mode ==
- PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
- dbcr_iac_range(child) |= DBCR_IAC12X;
- else
- dbcr_iac_range(child) |= DBCR_IAC12I;
-#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- } else if ((!slot3_in_use) && (!slot4_in_use)) {
- slot = 3;
- child->thread.debug.iac3 = bp_info->addr;
- child->thread.debug.iac4 = bp_info->addr2;
- child->thread.debug.dbcr0 |= DBCR0_IAC3;
- if (bp_info->addr_mode ==
- PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
- dbcr_iac_range(child) |= DBCR_IAC34X;
- else
- dbcr_iac_range(child) |= DBCR_IAC34I;
-#endif
- } else
- return -ENOSPC;
- } else {
- /* We only need one. If possible leave a pair free in
- * case a range is needed later
- */
- if (!slot1_in_use) {
- /*
- * Don't use iac1 if iac1-iac2 are free and either
- * iac3 or iac4 (but not both) are free
- */
- if (slot2_in_use || (slot3_in_use == slot4_in_use)) {
- slot = 1;
- child->thread.debug.iac1 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC1;
- goto out;
- }
- }
- if (!slot2_in_use) {
- slot = 2;
- child->thread.debug.iac2 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC2;
-#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- } else if (!slot3_in_use) {
- slot = 3;
- child->thread.debug.iac3 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC3;
- } else if (!slot4_in_use) {
- slot = 4;
- child->thread.debug.iac4 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC4;
-#endif
- } else
- return -ENOSPC;
- }
-out:
- child->thread.debug.dbcr0 |= DBCR0_IDM;
- child->thread.regs->msr |= MSR_DE;
-
- return slot;
-}
-
-static int del_instruction_bp(struct task_struct *child, int slot)
-{
- switch (slot) {
- case 1:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
- /* address range - clear slots 1 & 2 */
- child->thread.debug.iac2 = 0;
- dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
- }
- child->thread.debug.iac1 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
- break;
- case 2:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC12MODE)
- /* used in a range */
- return -EINVAL;
- child->thread.debug.iac2 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
- break;
-#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- case 3:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
- /* address range - clear slots 3 & 4 */
- child->thread.debug.iac4 = 0;
- dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
- }
- child->thread.debug.iac3 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
- break;
- case 4:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC34MODE)
- /* Used in a range */
- return -EINVAL;
- child->thread.debug.iac4 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
- break;
-#endif
- default:
- return -EINVAL;
- }
- return 0;
-}
-
-static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
-{
- int byte_enable =
- (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT)
- & 0xf;
- int condition_mode =
- bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE;
- int slot;
-
- if (byte_enable && (condition_mode == 0))
- return -EINVAL;
-
- if (bp_info->addr >= TASK_SIZE)
- return -EIO;
-
- if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) {
- slot = 1;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- dbcr_dac(child) |= DBCR_DAC1R;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- dbcr_dac(child) |= DBCR_DAC1W;
- child->thread.debug.dac1 = (unsigned long)bp_info->addr;
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- if (byte_enable) {
- child->thread.debug.dvc1 =
- (unsigned long)bp_info->condition_value;
- child->thread.debug.dbcr2 |=
- ((byte_enable << DBCR2_DVC1BE_SHIFT) |
- (condition_mode << DBCR2_DVC1M_SHIFT));
- }
-#endif
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
- /* Both dac1 and dac2 are part of a range */
- return -ENOSPC;
-#endif
- } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) {
- slot = 2;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- dbcr_dac(child) |= DBCR_DAC2R;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- dbcr_dac(child) |= DBCR_DAC2W;
- child->thread.debug.dac2 = (unsigned long)bp_info->addr;
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- if (byte_enable) {
- child->thread.debug.dvc2 =
- (unsigned long)bp_info->condition_value;
- child->thread.debug.dbcr2 |=
- ((byte_enable << DBCR2_DVC2BE_SHIFT) |
- (condition_mode << DBCR2_DVC2M_SHIFT));
- }
-#endif
- } else
- return -ENOSPC;
- child->thread.debug.dbcr0 |= DBCR0_IDM;
- child->thread.regs->msr |= MSR_DE;
-
- return slot + 4;
-}
-
-static int del_dac(struct task_struct *child, int slot)
-{
- if (slot == 1) {
- if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
- return -ENOENT;
-
- child->thread.debug.dac1 = 0;
- dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
- child->thread.debug.dac2 = 0;
- child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
- }
- child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
-#endif
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- child->thread.debug.dvc1 = 0;
-#endif
- } else if (slot == 2) {
- if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
- return -ENOENT;
-
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
- /* Part of a range */
- return -EINVAL;
- child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
-#endif
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- child->thread.debug.dvc2 = 0;
-#endif
- child->thread.debug.dac2 = 0;
- dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
- } else
- return -EINVAL;
-
- return 0;
-}
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-static int set_dac_range(struct task_struct *child,
- struct ppc_hw_breakpoint *bp_info)
-{
- int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK;
-
- /* We don't allow range watchpoints to be used with DVC */
- if (bp_info->condition_mode)
- return -EINVAL;
-
- /*
- * Best effort to verify the address range. The user/supervisor bits
- * prevent trapping in kernel space, but let's fail on an obvious bad
- * range. The simple test on the mask is not fool-proof, and any
- * exclusive range will spill over into kernel space.
- */
- if (bp_info->addr >= TASK_SIZE)
- return -EIO;
- if (mode == PPC_BREAKPOINT_MODE_MASK) {
- /*
- * dac2 is a bitmask. Don't allow a mask that makes a
- * kernel space address from a valid dac1 value
- */
- if (~((unsigned long)bp_info->addr2) >= TASK_SIZE)
- return -EIO;
- } else {
- /*
- * For range breakpoints, addr2 must also be a valid address
- */
- if (bp_info->addr2 >= TASK_SIZE)
- return -EIO;
- }
-
- if (child->thread.debug.dbcr0 &
- (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
- return -ENOSPC;
-
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
- child->thread.debug.dac1 = bp_info->addr;
- child->thread.debug.dac2 = bp_info->addr2;
- if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
- child->thread.debug.dbcr2 |= DBCR2_DAC12M;
- else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
- child->thread.debug.dbcr2 |= DBCR2_DAC12MX;
- else /* PPC_BREAKPOINT_MODE_MASK */
- child->thread.debug.dbcr2 |= DBCR2_DAC12MM;
- child->thread.regs->msr |= MSR_DE;
-
- return 5;
-}
-#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */
-
-static long ppc_set_hwdebug(struct task_struct *child,
- struct ppc_hw_breakpoint *bp_info)
-{
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- int len = 0;
- struct thread_struct *thread = &(child->thread);
- struct perf_event *bp;
- struct perf_event_attr attr;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- struct arch_hw_breakpoint brk;
-#endif
-
- if (bp_info->version != 1)
- return -ENOTSUPP;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- /*
- * Check for invalid flags and combinations
- */
- if ((bp_info->trigger_type == 0) ||
- (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE |
- PPC_BREAKPOINT_TRIGGER_RW)) ||
- (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) ||
- (bp_info->condition_mode &
- ~(PPC_BREAKPOINT_CONDITION_MODE |
- PPC_BREAKPOINT_CONDITION_BE_ALL)))
- return -EINVAL;
-#if CONFIG_PPC_ADV_DEBUG_DVCS == 0
- if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
- return -EINVAL;
-#endif
-
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) {
- if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) ||
- (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE))
- return -EINVAL;
- return set_instruction_bp(child, bp_info);
- }
- if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
- return set_dac(child, bp_info);
-
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- return set_dac_range(child, bp_info);
-#else
- return -EINVAL;
-#endif
-#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */
- /*
- * We only support one data breakpoint
- */
- if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
- (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
- bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
- return -EINVAL;
-
- if ((unsigned long)bp_info->addr >= TASK_SIZE)
- return -EIO;
-
- brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN;
- brk.type = HW_BRK_TYPE_TRANSLATE;
- brk.len = DABR_MAX_LEN;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- brk.type |= HW_BRK_TYPE_READ;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- brk.type |= HW_BRK_TYPE_WRITE;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
- len = bp_info->addr2 - bp_info->addr;
- else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
- len = 1;
- else
- return -EINVAL;
- bp = thread->ptrace_bps[0];
- if (bp)
- return -ENOSPC;
-
- /* Create a new breakpoint request if one doesn't exist already */
- hw_breakpoint_init(&attr);
- attr.bp_addr = (unsigned long)bp_info->addr;
- attr.bp_len = len;
- arch_bp_generic_fields(brk.type, &attr.bp_type);
-
- thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
- ptrace_triggered, NULL, child);
- if (IS_ERR(bp)) {
- thread->ptrace_bps[0] = NULL;
- return PTR_ERR(bp);
- }
-
- return 1;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-
- if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
- return -EINVAL;
-
- if (child->thread.hw_brk.address)
- return -ENOSPC;
-
- if (!ppc_breakpoint_available())
- return -ENODEV;
-
- child->thread.hw_brk = brk;
-
- return 1;
-#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */
-}
-
-static long ppc_del_hwdebug(struct task_struct *child, long data)
-{
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- int ret = 0;
- struct thread_struct *thread = &(child->thread);
- struct perf_event *bp;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- int rc;
-
- if (data <= 4)
- rc = del_instruction_bp(child, (int)data);
- else
- rc = del_dac(child, (int)data - 4);
-
- if (!rc) {
- if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
- child->thread.debug.dbcr1)) {
- child->thread.debug.dbcr0 &= ~DBCR0_IDM;
- child->thread.regs->msr &= ~MSR_DE;
- }
- }
- return rc;
-#else
- if (data != 1)
- return -EINVAL;
-
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- bp = thread->ptrace_bps[0];
- if (bp) {
- unregister_hw_breakpoint(bp);
- thread->ptrace_bps[0] = NULL;
- } else
- ret = -ENOENT;
- return ret;
-#else /* CONFIG_HAVE_HW_BREAKPOINT */
- if (child->thread.hw_brk.address == 0)
- return -ENOENT;
-
- child->thread.hw_brk.address = 0;
- child->thread.hw_brk.type = 0;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-
- return 0;
-#endif
-}
-
-long arch_ptrace(struct task_struct *child, long request,
- unsigned long addr, unsigned long data)
-{
- int ret = -EPERM;
- void __user *datavp = (void __user *) data;
- unsigned long __user *datalp = datavp;
-
- switch (request) {
- /* read the word at location addr in the USER area. */
- case PTRACE_PEEKUSR: {
- unsigned long index, tmp;
-
- ret = -EIO;
- /* convert to index and check */
-#ifdef CONFIG_PPC32
- index = addr >> 2;
- if ((addr & 3) || (index > PT_FPSCR)
- || (child->thread.regs == NULL))
-#else
- index = addr >> 3;
- if ((addr & 7) || (index > PT_FPSCR))
-#endif
- break;
-
- CHECK_FULL_REGS(child->thread.regs);
- if (index < PT_FPR0) {
- ret = ptrace_get_reg(child, (int) index, &tmp);
- if (ret)
- break;
- } else {
- unsigned int fpidx = index - PT_FPR0;
-
- flush_fp_to_thread(child);
- if (fpidx < (PT_FPSCR - PT_FPR0))
- memcpy(&tmp, &child->thread.TS_FPR(fpidx),
- sizeof(long));
- else
- tmp = child->thread.fp_state.fpscr;
- }
- ret = put_user(tmp, datalp);
- break;
- }
-
- /* write the word at location addr in the USER area */
- case PTRACE_POKEUSR: {
- unsigned long index;
-
- ret = -EIO;
- /* convert to index and check */
-#ifdef CONFIG_PPC32
- index = addr >> 2;
- if ((addr & 3) || (index > PT_FPSCR)
- || (child->thread.regs == NULL))
-#else
- index = addr >> 3;
- if ((addr & 7) || (index > PT_FPSCR))
-#endif
- break;
-
- CHECK_FULL_REGS(child->thread.regs);
- if (index < PT_FPR0) {
- ret = ptrace_put_reg(child, index, data);
- } else {
- unsigned int fpidx = index - PT_FPR0;
-
- flush_fp_to_thread(child);
- if (fpidx < (PT_FPSCR - PT_FPR0))
- memcpy(&child->thread.TS_FPR(fpidx), &data,
- sizeof(long));
- else
- child->thread.fp_state.fpscr = data;
- ret = 0;
- }
- break;
- }
-
- case PPC_PTRACE_GETHWDBGINFO: {
- struct ppc_debug_info dbginfo;
-
- dbginfo.version = 1;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS;
- dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS;
- dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS;
- dbginfo.data_bp_alignment = 4;
- dbginfo.sizeof_condition = 4;
- dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE |
- PPC_DEBUG_FEATURE_INSN_BP_MASK;
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- dbginfo.features |=
- PPC_DEBUG_FEATURE_DATA_BP_RANGE |
- PPC_DEBUG_FEATURE_DATA_BP_MASK;
-#endif
-#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
- dbginfo.num_instruction_bps = 0;
- if (ppc_breakpoint_available())
- dbginfo.num_data_bps = 1;
- else
- dbginfo.num_data_bps = 0;
- dbginfo.num_condition_regs = 0;
-#ifdef CONFIG_PPC64
- dbginfo.data_bp_alignment = 8;
-#else
- dbginfo.data_bp_alignment = 4;
-#endif
- dbginfo.sizeof_condition = 0;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
- if (dawr_enabled())
- dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
-#else
- dbginfo.features = 0;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-
- if (copy_to_user(datavp, &dbginfo,
- sizeof(struct ppc_debug_info)))
- return -EFAULT;
- return 0;
- }
-
- case PPC_PTRACE_SETHWDEBUG: {
- struct ppc_hw_breakpoint bp_info;
-
- if (copy_from_user(&bp_info, datavp,
- sizeof(struct ppc_hw_breakpoint)))
- return -EFAULT;
- return ppc_set_hwdebug(child, &bp_info);
- }
-
- case PPC_PTRACE_DELHWDEBUG: {
- ret = ppc_del_hwdebug(child, data);
- break;
- }
-
- case PTRACE_GET_DEBUGREG: {
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- unsigned long dabr_fake;
-#endif
- ret = -EINVAL;
- /* We only support one DABR and no IABRS at the moment */
- if (addr > 0)
- break;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- ret = put_user(child->thread.debug.dac1, datalp);
-#else
- dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
- (child->thread.hw_brk.type & HW_BRK_TYPE_DABR));
- ret = put_user(dabr_fake, datalp);
-#endif
- break;
- }
-
- case PTRACE_SET_DEBUGREG:
- ret = ptrace_set_debugreg(child, addr, data);
- break;
-
-#ifdef CONFIG_PPC64
- case PTRACE_GETREGS64:
-#endif
- case PTRACE_GETREGS: /* Get all pt_regs from the child. */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_GPR,
- 0, sizeof(struct user_pt_regs),
- datavp);
-
-#ifdef CONFIG_PPC64
- case PTRACE_SETREGS64:
-#endif
- case PTRACE_SETREGS: /* Set all gp regs in the child. */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_GPR,
- 0, sizeof(struct user_pt_regs),
- datavp);
-
- case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_FPR,
- 0, sizeof(elf_fpregset_t),
- datavp);
-
- case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_FPR,
- 0, sizeof(elf_fpregset_t),
- datavp);
-
-#ifdef CONFIG_ALTIVEC
- case PTRACE_GETVRREGS:
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_VMX,
- 0, (33 * sizeof(vector128) +
- sizeof(u32)),
- datavp);
-
- case PTRACE_SETVRREGS:
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_VMX,
- 0, (33 * sizeof(vector128) +
- sizeof(u32)),
- datavp);
-#endif
-#ifdef CONFIG_VSX
- case PTRACE_GETVSRREGS:
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_VSX,
- 0, 32 * sizeof(double),
- datavp);
-
- case PTRACE_SETVSRREGS:
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_VSX,
- 0, 32 * sizeof(double),
- datavp);
-#endif
-#ifdef CONFIG_SPE
- case PTRACE_GETEVRREGS:
- /* Get the child spe register state. */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_SPE, 0, 35 * sizeof(u32),
- datavp);
-
- case PTRACE_SETEVRREGS:
- /* Set the child spe register state. */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_SPE, 0, 35 * sizeof(u32),
- datavp);
-#endif
-
- default:
- ret = ptrace_request(child, request, addr, data);
- break;
- }
- return ret;
-}
-
-#ifdef CONFIG_SECCOMP
-static int do_seccomp(struct pt_regs *regs)
-{
- if (!test_thread_flag(TIF_SECCOMP))
- return 0;
-
- /*
- * The ABI we present to seccomp tracers is that r3 contains
- * the syscall return value and orig_gpr3 contains the first
- * syscall parameter. This is different to the ptrace ABI where
- * both r3 and orig_gpr3 contain the first syscall parameter.
- */
- regs->gpr[3] = -ENOSYS;
-
- /*
- * We use the __ version here because we have already checked
- * TIF_SECCOMP. If this fails, there is nothing left to do, we
- * have already loaded -ENOSYS into r3, or seccomp has put
- * something else in r3 (via SECCOMP_RET_ERRNO/TRACE).
- */
- if (__secure_computing(NULL))
- return -1;
-
- /*
- * The syscall was allowed by seccomp, restore the register
- * state to what audit expects.
- * Note that we use orig_gpr3, which means a seccomp tracer can
- * modify the first syscall parameter (in orig_gpr3) and also
- * allow the syscall to proceed.
- */
- regs->gpr[3] = regs->orig_gpr3;
-
- return 0;
-}
-#else
-static inline int do_seccomp(struct pt_regs *regs) { return 0; }
-#endif /* CONFIG_SECCOMP */
-
-/**
- * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
- * @regs: the pt_regs of the task to trace (current)
- *
- * Performs various types of tracing on syscall entry. This includes seccomp,
- * ptrace, syscall tracepoints and audit.
- *
- * The pt_regs are potentially visible to userspace via ptrace, so their
- * contents is ABI.
- *
- * One or more of the tracers may modify the contents of pt_regs, in particular
- * to modify arguments or even the syscall number itself.
- *
- * It's also possible that a tracer can choose to reject the system call. In
- * that case this function will return an illegal syscall number, and will put
- * an appropriate return value in regs->r3.
- *
- * Return: the (possibly changed) syscall number.
- */
-long do_syscall_trace_enter(struct pt_regs *regs)
-{
- u32 flags;
-
- user_exit();
-
- flags = READ_ONCE(current_thread_info()->flags) &
- (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
-
- if (flags) {
- int rc = tracehook_report_syscall_entry(regs);
-
- if (unlikely(flags & _TIF_SYSCALL_EMU)) {
- /*
- * A nonzero return code from
- * tracehook_report_syscall_entry() tells us to prevent
- * the syscall execution, but we are not going to
- * execute it anyway.
- *
- * Returning -1 will skip the syscall execution. We want
- * to avoid clobbering any registers, so we don't goto
- * the skip label below.
- */
- return -1;
- }
-
- if (rc) {
- /*
- * The tracer decided to abort the syscall. Note that
- * the tracer may also just change regs->gpr[0] to an
- * invalid syscall number, that is handled below on the
- * exit path.
- */
- goto skip;
- }
- }
-
- /* Run seccomp after ptrace; allow it to set gpr[3]. */
- if (do_seccomp(regs))
- return -1;
-
- /* Avoid trace and audit when syscall is invalid. */
- if (regs->gpr[0] >= NR_syscalls)
- goto skip;
-
- if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_enter(regs, regs->gpr[0]);
-
-#ifdef CONFIG_PPC64
- if (!is_32bit_task())
- audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
- regs->gpr[5], regs->gpr[6]);
- else
-#endif
- audit_syscall_entry(regs->gpr[0],
- regs->gpr[3] & 0xffffffff,
- regs->gpr[4] & 0xffffffff,
- regs->gpr[5] & 0xffffffff,
- regs->gpr[6] & 0xffffffff);
-
- /* Return the possibly modified but valid syscall number */
- return regs->gpr[0];
-
-skip:
- /*
- * If we are aborting explicitly, or if the syscall number is
- * now invalid, set the return value to -ENOSYS.
- */
- regs->gpr[3] = -ENOSYS;
- return -1;
-}
-
-void do_syscall_trace_leave(struct pt_regs *regs)
-{
- int step;
-
- audit_syscall_exit(regs);
-
- if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_exit(regs, regs->result);
-
- step = test_thread_flag(TIF_SINGLESTEP);
- if (step || test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall_exit(regs, step);
-
- user_enter();
-}
-
-void __init pt_regs_check(void);
-
-/*
- * Dummy function, its purpose is to break the build if struct pt_regs and
- * struct user_pt_regs don't match.
- */
-void __init pt_regs_check(void)
-{
- BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
- offsetof(struct user_pt_regs, gpr));
- BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
- offsetof(struct user_pt_regs, nip));
- BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
- offsetof(struct user_pt_regs, msr));
- BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
- offsetof(struct user_pt_regs, msr));
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct user_pt_regs, orig_gpr3));
- BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
- offsetof(struct user_pt_regs, ctr));
- BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
- offsetof(struct user_pt_regs, link));
- BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
- offsetof(struct user_pt_regs, xer));
- BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
- offsetof(struct user_pt_regs, ccr));
-#ifdef __powerpc64__
- BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
- offsetof(struct user_pt_regs, softe));
-#else
- BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
- offsetof(struct user_pt_regs, mq));
-#endif
- BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
- offsetof(struct user_pt_regs, trap));
- BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
- offsetof(struct user_pt_regs, dar));
- BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
- offsetof(struct user_pt_regs, dsisr));
- BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
- offsetof(struct user_pt_regs, result));
-
- BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
-
- // Now check that the pt_regs offsets match the uapi #defines
- #define CHECK_REG(_pt, _reg) \
- BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
- sizeof(unsigned long)));
-
- CHECK_REG(PT_R0, gpr[0]);
- CHECK_REG(PT_R1, gpr[1]);
- CHECK_REG(PT_R2, gpr[2]);
- CHECK_REG(PT_R3, gpr[3]);
- CHECK_REG(PT_R4, gpr[4]);
- CHECK_REG(PT_R5, gpr[5]);
- CHECK_REG(PT_R6, gpr[6]);
- CHECK_REG(PT_R7, gpr[7]);
- CHECK_REG(PT_R8, gpr[8]);
- CHECK_REG(PT_R9, gpr[9]);
- CHECK_REG(PT_R10, gpr[10]);
- CHECK_REG(PT_R11, gpr[11]);
- CHECK_REG(PT_R12, gpr[12]);
- CHECK_REG(PT_R13, gpr[13]);
- CHECK_REG(PT_R14, gpr[14]);
- CHECK_REG(PT_R15, gpr[15]);
- CHECK_REG(PT_R16, gpr[16]);
- CHECK_REG(PT_R17, gpr[17]);
- CHECK_REG(PT_R18, gpr[18]);
- CHECK_REG(PT_R19, gpr[19]);
- CHECK_REG(PT_R20, gpr[20]);
- CHECK_REG(PT_R21, gpr[21]);
- CHECK_REG(PT_R22, gpr[22]);
- CHECK_REG(PT_R23, gpr[23]);
- CHECK_REG(PT_R24, gpr[24]);
- CHECK_REG(PT_R25, gpr[25]);
- CHECK_REG(PT_R26, gpr[26]);
- CHECK_REG(PT_R27, gpr[27]);
- CHECK_REG(PT_R28, gpr[28]);
- CHECK_REG(PT_R29, gpr[29]);
- CHECK_REG(PT_R30, gpr[30]);
- CHECK_REG(PT_R31, gpr[31]);
- CHECK_REG(PT_NIP, nip);
- CHECK_REG(PT_MSR, msr);
- CHECK_REG(PT_ORIG_R3, orig_gpr3);
- CHECK_REG(PT_CTR, ctr);
- CHECK_REG(PT_LNK, link);
- CHECK_REG(PT_XER, xer);
- CHECK_REG(PT_CCR, ccr);
-#ifdef CONFIG_PPC64
- CHECK_REG(PT_SOFTE, softe);
-#else
- CHECK_REG(PT_MQ, mq);
-#endif
- CHECK_REG(PT_TRAP, trap);
- CHECK_REG(PT_DAR, dar);
- CHECK_REG(PT_DSISR, dsisr);
- CHECK_REG(PT_RESULT, result);
- #undef CHECK_REG
-
- BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
-
- /*
- * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
- * real registers.
- */
- BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
-}
diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile
new file mode 100644
index 000000000000..e9d97c2d063e
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
+obj-y += ptrace.o ptrace-view.o
+obj-$(CONFIG_PPC64) += ptrace32.o
+obj-$(CONFIG_VSX) += ptrace-vsx.o
+ifneq ($(CONFIG_VSX),y)
+obj-y += ptrace-novsx.o
+endif
+obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o
+obj-$(CONFIG_SPE) += ptrace-spe.o
+obj-$(CONFIG_PPC_TRANSACTIONAL_MEM) += ptrace-tm.o
+obj-$(CONFIG_PPC_ADV_DEBUG_REGS) += ptrace-adv.o
+ifneq ($(CONFIG_PPC_ADV_DEBUG_REGS),y)
+obj-y += ptrace-noadv.o
+endif
diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c
new file mode 100644
index 000000000000..3990c01ef8cf
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c
@@ -0,0 +1,492 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
+
+#include "ptrace-decl.h"
+
+void user_enable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ task->thread.debug.dbcr0 &= ~DBCR0_BT;
+ task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+ regs->msr |= MSR_DE;
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ task->thread.debug.dbcr0 &= ~DBCR0_IC;
+ task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
+ regs->msr |= MSR_DE;
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ /*
+ * The logic to disable single stepping should be as
+ * simple as turning off the Instruction Complete flag.
+ * And, after doing so, if all debug flags are off, turn
+ * off DBCR0(IDM) and MSR(DE) .... Torez
+ */
+ task->thread.debug.dbcr0 &= ~(DBCR0_IC | DBCR0_BT);
+ /*
+ * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
+ */
+ if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+ task->thread.debug.dbcr1)) {
+ /*
+ * All debug events were off.....
+ */
+ task->thread.debug.dbcr0 &= ~DBCR0_IDM;
+ regs->msr &= ~MSR_DE;
+ }
+ }
+ clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
+{
+ dbginfo->version = 1;
+ dbginfo->num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS;
+ dbginfo->num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS;
+ dbginfo->num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS;
+ dbginfo->data_bp_alignment = 4;
+ dbginfo->sizeof_condition = 4;
+ dbginfo->features = PPC_DEBUG_FEATURE_INSN_BP_RANGE |
+ PPC_DEBUG_FEATURE_INSN_BP_MASK;
+ if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_DAC_RANGE))
+ dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_RANGE |
+ PPC_DEBUG_FEATURE_DATA_BP_MASK;
+}
+
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+ unsigned long __user *datalp)
+{
+ /* We only support one DABR and no IABRS at the moment */
+ if (addr > 0)
+ return -EINVAL;
+ return put_user(child->thread.debug.dac1, datalp);
+}
+
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret;
+ struct thread_struct *thread = &task->thread;
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+ * For embedded processors we support one DAC and no IAC's at the
+ * moment.
+ */
+ if (addr > 0)
+ return -EINVAL;
+
+ /* The bottom 3 bits in dabr are flags */
+ if ((data & ~0x7UL) >= TASK_SIZE)
+ return -EIO;
+
+ /* As described above, it was assumed 3 bits were passed with the data
+ * address, but we will assume only the mode bits will be passed
+ * as to not cause alignment restrictions for DAC-based processors.
+ */
+
+ /* DAC's hold the whole address without any mode flags */
+ task->thread.debug.dac1 = data & ~0x3UL;
+
+ if (task->thread.debug.dac1 == 0) {
+ dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+ if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+ task->thread.debug.dbcr1)) {
+ task->thread.regs->msr &= ~MSR_DE;
+ task->thread.debug.dbcr0 &= ~DBCR0_IDM;
+ }
+ return 0;
+ }
+
+ /* Read or Write bits must be set */
+
+ if (!(data & 0x3UL))
+ return -EINVAL;
+
+ /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 register */
+ task->thread.debug.dbcr0 |= DBCR0_IDM;
+
+ /* Check for write and read flags and set DBCR0 accordingly */
+ dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+ if (data & 0x1UL)
+ dbcr_dac(task) |= DBCR_DAC1R;
+ if (data & 0x2UL)
+ dbcr_dac(task) |= DBCR_DAC1W;
+ task->thread.regs->msr |= MSR_DE;
+ return 0;
+}
+
+static long set_instruction_bp(struct task_struct *child,
+ struct ppc_hw_breakpoint *bp_info)
+{
+ int slot;
+ int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
+ int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
+ int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
+ int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE)
+ slot2_in_use = 1;
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE)
+ slot4_in_use = 1;
+
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
+ /* Make sure range is valid. */
+ if (bp_info->addr2 >= TASK_SIZE)
+ return -EIO;
+
+ /* We need a pair of IAC regsisters */
+ if (!slot1_in_use && !slot2_in_use) {
+ slot = 1;
+ child->thread.debug.iac1 = bp_info->addr;
+ child->thread.debug.iac2 = bp_info->addr2;
+ child->thread.debug.dbcr0 |= DBCR0_IAC1;
+ if (bp_info->addr_mode ==
+ PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ dbcr_iac_range(child) |= DBCR_IAC12X;
+ else
+ dbcr_iac_range(child) |= DBCR_IAC12I;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ } else if ((!slot3_in_use) && (!slot4_in_use)) {
+ slot = 3;
+ child->thread.debug.iac3 = bp_info->addr;
+ child->thread.debug.iac4 = bp_info->addr2;
+ child->thread.debug.dbcr0 |= DBCR0_IAC3;
+ if (bp_info->addr_mode ==
+ PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ dbcr_iac_range(child) |= DBCR_IAC34X;
+ else
+ dbcr_iac_range(child) |= DBCR_IAC34I;
+#endif
+ } else {
+ return -ENOSPC;
+ }
+ } else {
+ /* We only need one. If possible leave a pair free in
+ * case a range is needed later
+ */
+ if (!slot1_in_use) {
+ /*
+ * Don't use iac1 if iac1-iac2 are free and either
+ * iac3 or iac4 (but not both) are free
+ */
+ if (slot2_in_use || slot3_in_use == slot4_in_use) {
+ slot = 1;
+ child->thread.debug.iac1 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC1;
+ goto out;
+ }
+ }
+ if (!slot2_in_use) {
+ slot = 2;
+ child->thread.debug.iac2 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC2;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ } else if (!slot3_in_use) {
+ slot = 3;
+ child->thread.debug.iac3 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC3;
+ } else if (!slot4_in_use) {
+ slot = 4;
+ child->thread.debug.iac4 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC4;
+#endif
+ } else {
+ return -ENOSPC;
+ }
+ }
+out:
+ child->thread.debug.dbcr0 |= DBCR0_IDM;
+ child->thread.regs->msr |= MSR_DE;
+
+ return slot;
+}
+
+static int del_instruction_bp(struct task_struct *child, int slot)
+{
+ switch (slot) {
+ case 1:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
+ /* address range - clear slots 1 & 2 */
+ child->thread.debug.iac2 = 0;
+ dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
+ }
+ child->thread.debug.iac1 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
+ break;
+ case 2:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE)
+ /* used in a range */
+ return -EINVAL;
+ child->thread.debug.iac2 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case 3:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
+ /* address range - clear slots 3 & 4 */
+ child->thread.debug.iac4 = 0;
+ dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
+ }
+ child->thread.debug.iac3 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
+ break;
+ case 4:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE)
+ /* Used in a range */
+ return -EINVAL;
+ child->thread.debug.iac4 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+ int byte_enable =
+ (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT)
+ & 0xf;
+ int condition_mode =
+ bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE;
+ int slot;
+
+ if (byte_enable && condition_mode == 0)
+ return -EINVAL;
+
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) {
+ slot = 1;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ dbcr_dac(child) |= DBCR_DAC1R;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ dbcr_dac(child) |= DBCR_DAC1W;
+ child->thread.debug.dac1 = (unsigned long)bp_info->addr;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ if (byte_enable) {
+ child->thread.debug.dvc1 =
+ (unsigned long)bp_info->condition_value;
+ child->thread.debug.dbcr2 |=
+ ((byte_enable << DBCR2_DVC1BE_SHIFT) |
+ (condition_mode << DBCR2_DVC1M_SHIFT));
+ }
+#endif
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+ /* Both dac1 and dac2 are part of a range */
+ return -ENOSPC;
+#endif
+ } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) {
+ slot = 2;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ dbcr_dac(child) |= DBCR_DAC2R;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ dbcr_dac(child) |= DBCR_DAC2W;
+ child->thread.debug.dac2 = (unsigned long)bp_info->addr;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ if (byte_enable) {
+ child->thread.debug.dvc2 =
+ (unsigned long)bp_info->condition_value;
+ child->thread.debug.dbcr2 |=
+ ((byte_enable << DBCR2_DVC2BE_SHIFT) |
+ (condition_mode << DBCR2_DVC2M_SHIFT));
+ }
+#endif
+ } else {
+ return -ENOSPC;
+ }
+ child->thread.debug.dbcr0 |= DBCR0_IDM;
+ child->thread.regs->msr |= MSR_DE;
+
+ return slot + 4;
+}
+
+static int del_dac(struct task_struct *child, int slot)
+{
+ if (slot == 1) {
+ if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
+ return -ENOENT;
+
+ child->thread.debug.dac1 = 0;
+ dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+ child->thread.debug.dac2 = 0;
+ child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
+ }
+ child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
+#endif
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ child->thread.debug.dvc1 = 0;
+#endif
+ } else if (slot == 2) {
+ if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
+ return -ENOENT;
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
+ /* Part of a range */
+ return -EINVAL;
+ child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
+#endif
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ child->thread.debug.dvc2 = 0;
+#endif
+ child->thread.debug.dac2 = 0;
+ dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+static int set_dac_range(struct task_struct *child,
+ struct ppc_hw_breakpoint *bp_info)
+{
+ int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK;
+
+ /* We don't allow range watchpoints to be used with DVC */
+ if (bp_info->condition_mode)
+ return -EINVAL;
+
+ /*
+ * Best effort to verify the address range. The user/supervisor bits
+ * prevent trapping in kernel space, but let's fail on an obvious bad
+ * range. The simple test on the mask is not fool-proof, and any
+ * exclusive range will spill over into kernel space.
+ */
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+ if (mode == PPC_BREAKPOINT_MODE_MASK) {
+ /*
+ * dac2 is a bitmask. Don't allow a mask that makes a
+ * kernel space address from a valid dac1 value
+ */
+ if (~((unsigned long)bp_info->addr2) >= TASK_SIZE)
+ return -EIO;
+ } else {
+ /*
+ * For range breakpoints, addr2 must also be a valid address
+ */
+ if (bp_info->addr2 >= TASK_SIZE)
+ return -EIO;
+ }
+
+ if (child->thread.debug.dbcr0 &
+ (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
+ return -ENOSPC;
+
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
+ child->thread.debug.dac1 = bp_info->addr;
+ child->thread.debug.dac2 = bp_info->addr2;
+ if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
+ child->thread.debug.dbcr2 |= DBCR2_DAC12M;
+ else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ child->thread.debug.dbcr2 |= DBCR2_DAC12MX;
+ else /* PPC_BREAKPOINT_MODE_MASK */
+ child->thread.debug.dbcr2 |= DBCR2_DAC12MM;
+ child->thread.regs->msr |= MSR_DE;
+
+ return 5;
+}
+#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */
+
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+ if (bp_info->version != 1)
+ return -ENOTSUPP;
+ /*
+ * Check for invalid flags and combinations
+ */
+ if (bp_info->trigger_type == 0 ||
+ (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE |
+ PPC_BREAKPOINT_TRIGGER_RW)) ||
+ (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) ||
+ (bp_info->condition_mode &
+ ~(PPC_BREAKPOINT_CONDITION_MODE |
+ PPC_BREAKPOINT_CONDITION_BE_ALL)))
+ return -EINVAL;
+#if CONFIG_PPC_ADV_DEBUG_DVCS == 0
+ if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+#endif
+
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) {
+ if (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE ||
+ bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+ return set_instruction_bp(child, bp_info);
+ }
+ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
+ return set_dac(child, bp_info);
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ return set_dac_range(child, bp_info);
+#else
+ return -EINVAL;
+#endif
+}
+
+long ppc_del_hwdebug(struct task_struct *child, long data)
+{
+ int rc;
+
+ if (data <= 4)
+ rc = del_instruction_bp(child, (int)data);
+ else
+ rc = del_dac(child, (int)data - 4);
+
+ if (!rc) {
+ if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
+ child->thread.debug.dbcr1)) {
+ child->thread.debug.dbcr0 &= ~DBCR0_IDM;
+ child->thread.regs->msr &= ~MSR_DE;
+ }
+ }
+ return rc;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-altivec.c b/arch/powerpc/kernel/ptrace/ptrace-altivec.c
new file mode 100644
index 000000000000..dd8b75dfbd06
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-altivec.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/elf.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+ * The transfer totals 34 quadword. Quadwords 0-31 contain the
+ * corresponding vector registers. Quadword 32 contains the vscr as the
+ * last word (offset 12) within that quadword. Quadword 33 contains the
+ * vrsave as the first word (offset 0) within the quadword.
+ *
+ * This definition of the VMX state is compatible with the current PPC32
+ * ptrace interface. This allows signal handling and ptrace to use the
+ * same structures. This also simplifies the implementation of a bi-arch
+ * (combined (32- and 64-bit) gdb.
+ */
+
+int vr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ flush_altivec_to_thread(target);
+ return target->thread.used_vr ? regset->n : 0;
+}
+
+/*
+ * Regardless of transactions, 'vr_state' holds the current running
+ * value of all the VMX registers and 'ckvr_state' holds the last
+ * checkpointed value of all the VMX registers for the current
+ * transaction to fall back on in case it aborts.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ * };
+ */
+int vr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ flush_altivec_to_thread(target);
+
+ BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+ offsetof(struct thread_vr_state, vr[32]));
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.vr_state, 0,
+ 33 * sizeof(vector128));
+ if (!ret) {
+ /*
+ * Copy out only the low-order word of vrsave.
+ */
+ int start, end;
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+
+ vrsave.word = target->thread.vrsave;
+
+ start = 33 * sizeof(vector128);
+ end = start + sizeof(vrsave);
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
+ start, end);
+ }
+
+ return ret;
+}
+
+/*
+ * Regardless of transactions, 'vr_state' holds the current running
+ * value of all the VMX registers and 'ckvr_state' holds the last
+ * checkpointed value of all the VMX registers for the current
+ * transaction to fall back on in case it aborts.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ * };
+ */
+int vr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ flush_altivec_to_thread(target);
+
+ BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+ offsetof(struct thread_vr_state, vr[32]));
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.vr_state, 0,
+ 33 * sizeof(vector128));
+ if (!ret && count > 0) {
+ /*
+ * We use only the first word of vrsave.
+ */
+ int start, end;
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+
+ vrsave.word = target->thread.vrsave;
+
+ start = 33 * sizeof(vector128);
+ end = start + sizeof(vrsave);
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+ start, end);
+ if (!ret)
+ target->thread.vrsave = vrsave.word;
+ }
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h
new file mode 100644
index 000000000000..3c8a81999292
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/*
+ * Set of msr bits that gdb can change on behalf of a process.
+ */
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+#define MSR_DEBUGCHANGE 0
+#else
+#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE)
+#endif
+
+/*
+ * Max register writeable via put_reg
+ */
+#ifdef CONFIG_PPC32
+#define PT_MAX_PUT_REG PT_MQ
+#else
+#define PT_MAX_PUT_REG PT_CCR
+#endif
+
+#define TVSO(f) (offsetof(struct thread_vr_state, f))
+#define TFSO(f) (offsetof(struct thread_fp_state, f))
+#define TSO(f) (offsetof(struct thread_struct, f))
+
+/*
+ * These are our native regset flavors.
+ */
+enum powerpc_regset {
+ REGSET_GPR,
+ REGSET_FPR,
+#ifdef CONFIG_ALTIVEC
+ REGSET_VMX,
+#endif
+#ifdef CONFIG_VSX
+ REGSET_VSX,
+#endif
+#ifdef CONFIG_SPE
+ REGSET_SPE,
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ REGSET_TM_CGPR, /* TM checkpointed GPR registers */
+ REGSET_TM_CFPR, /* TM checkpointed FPR registers */
+ REGSET_TM_CVMX, /* TM checkpointed VMX registers */
+ REGSET_TM_CVSX, /* TM checkpointed VSX registers */
+ REGSET_TM_SPR, /* TM specific SPR registers */
+ REGSET_TM_CTAR, /* TM checkpointed TAR register */
+ REGSET_TM_CPPR, /* TM checkpointed PPR register */
+ REGSET_TM_CDSCR, /* TM checkpointed DSCR register */
+#endif
+#ifdef CONFIG_PPC64
+ REGSET_PPR, /* PPR register */
+ REGSET_DSCR, /* DSCR register */
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ REGSET_TAR, /* TAR register */
+ REGSET_EBB, /* EBB registers */
+ REGSET_PMR, /* Performance Monitor Registers */
+#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ REGSET_PKEY, /* AMR register */
+#endif
+};
+
+/* ptrace-(no)vsx */
+
+int fpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-vsx */
+
+int vsr_active(struct task_struct *target, const struct user_regset *regset);
+int vsr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int vsr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-altivec */
+
+int vr_active(struct task_struct *target, const struct user_regset *regset);
+int vr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int vr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-spe */
+
+int evr_active(struct task_struct *target, const struct user_regset *regset);
+int evr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int evr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace */
+
+int gpr32_get_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf,
+ unsigned long *regs);
+int gpr32_set_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ unsigned long *regs);
+
+/* ptrace-tm */
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void flush_tmregs_to_thread(struct task_struct *tsk);
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
+#endif
+
+int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset);
+int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset);
+int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset);
+int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset);
+int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_spr_active(struct task_struct *target, const struct user_regset *regset);
+int tm_spr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_spr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_tar_active(struct task_struct *target, const struct user_regset *regset);
+int tm_tar_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_tar_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_ppr_active(struct task_struct *target, const struct user_regset *regset);
+int tm_ppr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_dscr_active(struct task_struct *target, const struct user_regset *regset);
+int tm_dscr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_dscr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-view */
+
+extern const struct user_regset_view user_ppc_native_view;
+
+/* ptrace-(no)adv */
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo);
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+ unsigned long __user *datalp);
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data);
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info);
+long ppc_del_hwdebug(struct task_struct *child, long data);
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
new file mode 100644
index 000000000000..f87e7c5c3bf3
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
+
+#include <asm/debug.h>
+
+#include "ptrace-decl.h"
+
+void user_enable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ regs->msr &= ~MSR_BE;
+ regs->msr |= MSR_SE;
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ regs->msr &= ~MSR_SE;
+ regs->msr |= MSR_BE;
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL)
+ regs->msr &= ~(MSR_SE | MSR_BE);
+
+ clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
+{
+ dbginfo->version = 1;
+ dbginfo->num_instruction_bps = 0;
+ if (ppc_breakpoint_available())
+ dbginfo->num_data_bps = 1;
+ else
+ dbginfo->num_data_bps = 0;
+ dbginfo->num_condition_regs = 0;
+ dbginfo->data_bp_alignment = sizeof(long);
+ dbginfo->sizeof_condition = 0;
+ if (IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT)) {
+ dbginfo->features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
+ if (dawr_enabled())
+ dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
+ } else {
+ dbginfo->features = 0;
+ }
+}
+
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+ unsigned long __user *datalp)
+{
+ unsigned long dabr_fake;
+
+ /* We only support one DABR and no IABRS at the moment */
+ if (addr > 0)
+ return -EINVAL;
+ dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
+ (child->thread.hw_brk.type & HW_BRK_TYPE_DABR));
+ return put_user(dabr_fake, datalp);
+}
+
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret;
+ struct thread_struct *thread = &task->thread;
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ bool set_bp = true;
+ struct arch_hw_breakpoint hw_brk;
+
+ /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+ * For embedded processors we support one DAC and no IAC's at the
+ * moment.
+ */
+ if (addr > 0)
+ return -EINVAL;
+
+ /* The bottom 3 bits in dabr are flags */
+ if ((data & ~0x7UL) >= TASK_SIZE)
+ return -EIO;
+
+ /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
+ * It was assumed, on previous implementations, that 3 bits were
+ * passed together with the data address, fitting the design of the
+ * DABR register, as follows:
+ *
+ * bit 0: Read flag
+ * bit 1: Write flag
+ * bit 2: Breakpoint translation
+ *
+ * Thus, we use them here as so.
+ */
+
+ /* Ensure breakpoint translation bit is set */
+ if (data && !(data & HW_BRK_TYPE_TRANSLATE))
+ return -EIO;
+ hw_brk.address = data & (~HW_BRK_TYPE_DABR);
+ hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
+ hw_brk.len = DABR_MAX_LEN;
+ hw_brk.hw_len = DABR_MAX_LEN;
+ set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ bp = thread->ptrace_bps[0];
+ if (!set_bp) {
+ if (bp) {
+ unregister_hw_breakpoint(bp);
+ thread->ptrace_bps[0] = NULL;
+ }
+ return 0;
+ }
+ if (bp) {
+ attr = bp->attr;
+ attr.bp_addr = hw_brk.address;
+ attr.bp_len = DABR_MAX_LEN;
+ arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
+
+ /* Enable breakpoint */
+ attr.disabled = false;
+
+ ret = modify_user_hw_breakpoint(bp, &attr);
+ if (ret)
+ return ret;
+
+ thread->ptrace_bps[0] = bp;
+ thread->hw_brk = hw_brk;
+ return 0;
+ }
+
+ /* Create a new breakpoint request if one doesn't exist already */
+ hw_breakpoint_init(&attr);
+ attr.bp_addr = hw_brk.address;
+ attr.bp_len = DABR_MAX_LEN;
+ arch_bp_generic_fields(hw_brk.type,
+ &attr.bp_type);
+
+ thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
+ ptrace_triggered, NULL, task);
+ if (IS_ERR(bp)) {
+ thread->ptrace_bps[0] = NULL;
+ return PTR_ERR(bp);
+ }
+
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+ if (set_bp && (!ppc_breakpoint_available()))
+ return -ENODEV;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ task->thread.hw_brk = hw_brk;
+ return 0;
+}
+
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int len = 0;
+ struct thread_struct *thread = &child->thread;
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ struct arch_hw_breakpoint brk;
+
+ if (bp_info->version != 1)
+ return -ENOTSUPP;
+ /*
+ * We only support one data breakpoint
+ */
+ if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
+ (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
+ bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+
+ if ((unsigned long)bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN;
+ brk.type = HW_BRK_TYPE_TRANSLATE;
+ brk.len = DABR_MAX_LEN;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ brk.type |= HW_BRK_TYPE_READ;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ brk.type |= HW_BRK_TYPE_WRITE;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
+ len = bp_info->addr2 - bp_info->addr;
+ else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
+ len = 1;
+ else
+ return -EINVAL;
+ bp = thread->ptrace_bps[0];
+ if (bp)
+ return -ENOSPC;
+
+ /* Create a new breakpoint request if one doesn't exist already */
+ hw_breakpoint_init(&attr);
+ attr.bp_addr = (unsigned long)bp_info->addr;
+ attr.bp_len = len;
+ arch_bp_generic_fields(brk.type, &attr.bp_type);
+
+ bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, child);
+ thread->ptrace_bps[0] = bp;
+ if (IS_ERR(bp)) {
+ thread->ptrace_bps[0] = NULL;
+ return PTR_ERR(bp);
+ }
+
+ return 1;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
+ return -EINVAL;
+
+ if (child->thread.hw_brk.address)
+ return -ENOSPC;
+
+ if (!ppc_breakpoint_available())
+ return -ENODEV;
+
+ child->thread.hw_brk = brk;
+
+ return 1;
+}
+
+long ppc_del_hwdebug(struct task_struct *child, long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret = 0;
+ struct thread_struct *thread = &child->thread;
+ struct perf_event *bp;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ if (data != 1)
+ return -EINVAL;
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ bp = thread->ptrace_bps[0];
+ if (bp) {
+ unregister_hw_breakpoint(bp);
+ thread->ptrace_bps[0] = NULL;
+ } else {
+ ret = -ENOENT;
+ }
+ return ret;
+#else /* CONFIG_HAVE_HW_BREAKPOINT */
+ if (child->thread.hw_brk.address == 0)
+ return -ENOENT;
+
+ child->thread.hw_brk.address = 0;
+ child->thread.hw_brk.type = 0;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
new file mode 100644
index 000000000000..b2dc4e92d11a
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ */
+int fpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+ offsetof(struct thread_fp_state, fpr[32]));
+
+ flush_fp_to_thread(target);
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fp_state, 0, -1);
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ *
+ */
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+ offsetof(struct thread_fp_state, fpr[32]));
+
+ flush_fp_to_thread(target);
+
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fp_state, 0, -1);
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-spe.c b/arch/powerpc/kernel/ptrace/ptrace-spe.c
new file mode 100644
index 000000000000..68b86b4a4be4
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-spe.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * For get_evrregs/set_evrregs functions 'data' has the following layout:
+ *
+ * struct {
+ * u32 evr[32];
+ * u64 acc;
+ * u32 spefscr;
+ * }
+ */
+
+int evr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ flush_spe_to_thread(target);
+ return target->thread.used_spe ? regset->n : 0;
+}
+
+int evr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ flush_spe_to_thread(target);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.evr,
+ 0, sizeof(target->thread.evr));
+
+ BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
+ offsetof(struct thread_struct, spefscr));
+
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.acc,
+ sizeof(target->thread.evr), -1);
+
+ return ret;
+}
+
+int evr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ flush_spe_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.evr,
+ 0, sizeof(target->thread.evr));
+
+ BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
+ offsetof(struct thread_struct, spefscr));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.acc,
+ sizeof(target->thread.evr), -1);
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c
new file mode 100644
index 000000000000..d75aff31f637
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c
@@ -0,0 +1,851 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+#include <asm/tm.h>
+#include <asm/asm-prototypes.h>
+
+#include "ptrace-decl.h"
+
+void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+ /*
+ * If task is not current, it will have been flushed already to
+ * it's thread_struct during __switch_to().
+ *
+ * A reclaim flushes ALL the state or if not in TM save TM SPRs
+ * in the appropriate thread structures from live.
+ */
+
+ if (!cpu_has_feature(CPU_FTR_TM) || tsk != current)
+ return;
+
+ if (MSR_TM_SUSPENDED(mfmsr())) {
+ tm_reclaim_current(TM_CAUSE_SIGNAL);
+ } else {
+ tm_enable();
+ tm_save_sprs(&tsk->thread);
+ }
+}
+
+static unsigned long get_user_ckpt_msr(struct task_struct *task)
+{
+ return task->thread.ckpt_regs.msr | task->thread.fpexc_mode;
+}
+
+static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr)
+{
+ task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE;
+ task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE;
+ return 0;
+}
+
+static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap)
+{
+ task->thread.ckpt_regs.trap = trap & 0xfff0;
+ return 0;
+}
+
+/**
+ * tm_cgpr_active - get active number of registers in CGPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed GPR category.
+ */
+int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cgpr_get - get CGPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds all the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function gets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * struct pt_regs ckpt_regs;
+ * };
+ */
+int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs,
+ 0, offsetof(struct pt_regs, msr));
+ if (!ret) {
+ unsigned long msr = get_user_ckpt_msr(target);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
+ offsetof(struct pt_regs, msr),
+ offsetof(struct pt_regs, msr) +
+ sizeof(msr));
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs.orig_gpr3,
+ offsetof(struct pt_regs, orig_gpr3),
+ sizeof(struct user_pt_regs));
+ if (!ret)
+ ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ sizeof(struct user_pt_regs), -1);
+
+ return ret;
+}
+
+/*
+ * tm_cgpr_set - set the CGPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function sets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * struct pt_regs ckpt_regs;
+ * };
+ */
+int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ unsigned long reg;
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs,
+ 0, PT_MSR * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_MSR * sizeof(reg),
+ (PT_MSR + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_ckpt_msr(target, reg);
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs.orig_gpr3,
+ PT_ORIG_R3 * sizeof(reg),
+ (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+ if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+ ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_MAX_PUT_REG + 1) * sizeof(reg),
+ PT_TRAP * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_TRAP * sizeof(reg),
+ (PT_TRAP + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_ckpt_trap(target, reg);
+ }
+
+ if (!ret)
+ ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+
+ return ret;
+}
+
+/**
+ * tm_cfpr_active - get active number of registers in CFPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed FPR category.
+ */
+int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cfpr_get - get CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed FPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ *};
+ */
+int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ /* copy to local buffer then write that out */
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_CKFPR(i);
+ buf[32] = target->thread.ckfp_state.fpscr;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+}
+
+/**
+ * tm_cfpr_set - set CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * FPR register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ *};
+ */
+int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ for (i = 0; i < 32; i++)
+ buf[i] = target->thread.TS_CKFPR(i);
+ buf[32] = target->thread.ckfp_state.fpscr;
+
+ /* copy to local buffer then write that out */
+ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+ if (i)
+ return i;
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_CKFPR(i) = buf[i];
+ target->thread.ckfp_state.fpscr = buf[32];
+ return 0;
+}
+
+/**
+ * tm_cvmx_active - get active number of registers in CVMX
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in checkpointed VMX category.
+ */
+int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cvmx_get - get CMVX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ *};
+ */
+int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state,
+ 0, 33 * sizeof(vector128));
+ if (!ret) {
+ /*
+ * Copy out only the low-order word of vrsave.
+ */
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.ckvrsave;
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
+ 33 * sizeof(vector128), -1);
+ }
+
+ return ret;
+}
+
+/**
+ * tm_cvmx_set - set CMVX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ *};
+ */
+int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state,
+ 0, 33 * sizeof(vector128));
+ if (!ret && count > 0) {
+ /*
+ * We use only the low-order word of vrsave.
+ */
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.ckvrsave;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+ 33 * sizeof(vector128), -1);
+ if (!ret)
+ target->thread.ckvrsave = vrsave.word;
+ }
+
+ return ret;
+}
+
+/**
+ * tm_cvsx_active - get active number of registers in CVSX
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed VSX category.
+ */
+int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ flush_vsx_to_thread(target);
+ return target->thread.used_vsr ? regset->n : 0;
+}
+
+/**
+ * tm_cvsx_get - get CVSX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed VSX registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 vsx[32];
+ *};
+ */
+int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+
+ return ret;
+}
+
+/**
+ * tm_cvsx_set - set CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * VSX register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 vsx[32];
+ *};
+ */
+int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+ if (!ret)
+ for (i = 0; i < 32 ; i++)
+ target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+ return ret;
+}
+
+/**
+ * tm_spr_active - get active number of registers in TM SPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks the active number of available
+ * regisers in the transactional memory SPR category.
+ */
+int tm_spr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+/**
+ * tm_spr_get - get the TM related SPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ * u64 tm_tfhar;
+ * u64 tm_texasr;
+ * u64 tm_tfiar;
+ * };
+ */
+int tm_spr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+ BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+ BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ /* Flush the states */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ /* TFHAR register */
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfhar, 0, sizeof(u64));
+
+ /* TEXASR register */
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_texasr, sizeof(u64),
+ 2 * sizeof(u64));
+
+ /* TFIAR register */
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfiar,
+ 2 * sizeof(u64), 3 * sizeof(u64));
+ return ret;
+}
+
+/**
+ * tm_spr_set - set the TM related SPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ * u64 tm_tfhar;
+ * u64 tm_texasr;
+ * u64 tm_tfiar;
+ * };
+ */
+int tm_spr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+ BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+ BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ /* Flush the states */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ /* TFHAR register */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfhar, 0, sizeof(u64));
+
+ /* TEXASR register */
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_texasr, sizeof(u64),
+ 2 * sizeof(u64));
+
+ /* TFIAR register */
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfiar,
+ 2 * sizeof(u64), 3 * sizeof(u64));
+ return ret;
+}
+
+int tm_tar_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+int tm_tar_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tar, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_tar_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tar, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_ppr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+
+int tm_ppr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_ppr, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_ppr, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_dscr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+int tm_dscr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_dscr, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_dscr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_dscr, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ return gpr32_get_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.ckpt_regs.gpr[0]);
+}
+
+int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.ckpt_regs.gpr[0]);
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
new file mode 100644
index 000000000000..15e3b79b6395
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -0,0 +1,904 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/nospec.h>
+#include <linux/pkeys.h>
+
+#include "ptrace-decl.h"
+
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define STR(s) #s /* convert to string */
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define GPR_OFFSET_NAME(num) \
+ {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \
+ {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+ GPR_OFFSET_NAME(0),
+ GPR_OFFSET_NAME(1),
+ GPR_OFFSET_NAME(2),
+ GPR_OFFSET_NAME(3),
+ GPR_OFFSET_NAME(4),
+ GPR_OFFSET_NAME(5),
+ GPR_OFFSET_NAME(6),
+ GPR_OFFSET_NAME(7),
+ GPR_OFFSET_NAME(8),
+ GPR_OFFSET_NAME(9),
+ GPR_OFFSET_NAME(10),
+ GPR_OFFSET_NAME(11),
+ GPR_OFFSET_NAME(12),
+ GPR_OFFSET_NAME(13),
+ GPR_OFFSET_NAME(14),
+ GPR_OFFSET_NAME(15),
+ GPR_OFFSET_NAME(16),
+ GPR_OFFSET_NAME(17),
+ GPR_OFFSET_NAME(18),
+ GPR_OFFSET_NAME(19),
+ GPR_OFFSET_NAME(20),
+ GPR_OFFSET_NAME(21),
+ GPR_OFFSET_NAME(22),
+ GPR_OFFSET_NAME(23),
+ GPR_OFFSET_NAME(24),
+ GPR_OFFSET_NAME(25),
+ GPR_OFFSET_NAME(26),
+ GPR_OFFSET_NAME(27),
+ GPR_OFFSET_NAME(28),
+ GPR_OFFSET_NAME(29),
+ GPR_OFFSET_NAME(30),
+ GPR_OFFSET_NAME(31),
+ REG_OFFSET_NAME(nip),
+ REG_OFFSET_NAME(msr),
+ REG_OFFSET_NAME(ctr),
+ REG_OFFSET_NAME(link),
+ REG_OFFSET_NAME(xer),
+ REG_OFFSET_NAME(ccr),
+#ifdef CONFIG_PPC64
+ REG_OFFSET_NAME(softe),
+#else
+ REG_OFFSET_NAME(mq),
+#endif
+ REG_OFFSET_NAME(trap),
+ REG_OFFSET_NAME(dar),
+ REG_OFFSET_NAME(dsisr),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset: the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (roff->offset == offset)
+ return roff->name;
+ return NULL;
+}
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+static unsigned long get_user_msr(struct task_struct *task)
+{
+ return task->thread.regs->msr | task->thread.fpexc_mode;
+}
+
+static int set_user_msr(struct task_struct *task, unsigned long msr)
+{
+ task->thread.regs->msr &= ~MSR_DEBUGCHANGE;
+ task->thread.regs->msr |= msr & MSR_DEBUGCHANGE;
+ return 0;
+}
+
+#ifdef CONFIG_PPC64
+static int get_user_dscr(struct task_struct *task, unsigned long *data)
+{
+ *data = task->thread.dscr;
+ return 0;
+}
+
+static int set_user_dscr(struct task_struct *task, unsigned long dscr)
+{
+ task->thread.dscr = dscr;
+ task->thread.dscr_inherit = 1;
+ return 0;
+}
+#else
+static int get_user_dscr(struct task_struct *task, unsigned long *data)
+{
+ return -EIO;
+}
+
+static int set_user_dscr(struct task_struct *task, unsigned long dscr)
+{
+ return -EIO;
+}
+#endif
+
+/*
+ * We prevent mucking around with the reserved area of trap
+ * which are used internally by the kernel.
+ */
+static int set_user_trap(struct task_struct *task, unsigned long trap)
+{
+ task->thread.regs->trap = trap & 0xfff0;
+ return 0;
+}
+
+/*
+ * Get contents of register REGNO in task TASK.
+ */
+int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
+{
+ unsigned int regs_max;
+
+ if (task->thread.regs == NULL || !data)
+ return -EIO;
+
+ if (regno == PT_MSR) {
+ *data = get_user_msr(task);
+ return 0;
+ }
+
+ if (regno == PT_DSCR)
+ return get_user_dscr(task, data);
+
+ /*
+ * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
+ * no more used as a flag, lets force usr to alway see the softe value as 1
+ * which means interrupts are not soft disabled.
+ */
+ if (IS_ENABLED(CONFIG_PPC64) && regno == PT_SOFTE) {
+ *data = 1;
+ return 0;
+ }
+
+ regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long);
+ if (regno < regs_max) {
+ regno = array_index_nospec(regno, regs_max);
+ *data = ((unsigned long *)task->thread.regs)[regno];
+ return 0;
+ }
+
+ return -EIO;
+}
+
+/*
+ * Write contents of register REGNO in task TASK.
+ */
+int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
+{
+ if (task->thread.regs == NULL)
+ return -EIO;
+
+ if (regno == PT_MSR)
+ return set_user_msr(task, data);
+ if (regno == PT_TRAP)
+ return set_user_trap(task, data);
+ if (regno == PT_DSCR)
+ return set_user_dscr(task, data);
+
+ if (regno <= PT_MAX_PUT_REG) {
+ regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1);
+ ((unsigned long *)task->thread.regs)[regno] = data;
+ return 0;
+ }
+ return -EIO;
+}
+
+static int gpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int i, ret;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ if (!FULL_REGS(target->thread.regs)) {
+ /* We have a partial register set. Fill 14-31 with bogus values */
+ for (i = 14; i < 32; i++)
+ target->thread.regs->gpr[i] = NV_REG_POISON;
+ }
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ target->thread.regs,
+ 0, offsetof(struct pt_regs, msr));
+ if (!ret) {
+ unsigned long msr = get_user_msr(target);
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
+ offsetof(struct pt_regs, msr),
+ offsetof(struct pt_regs, msr) +
+ sizeof(msr));
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->orig_gpr3,
+ offsetof(struct pt_regs, orig_gpr3),
+ sizeof(struct user_pt_regs));
+ if (!ret)
+ ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ sizeof(struct user_pt_regs), -1);
+
+ return ret;
+}
+
+static int gpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ unsigned long reg;
+ int ret;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ CHECK_FULL_REGS(target->thread.regs);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.regs,
+ 0, PT_MSR * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_MSR * sizeof(reg),
+ (PT_MSR + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_msr(target, reg);
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->orig_gpr3,
+ PT_ORIG_R3 * sizeof(reg),
+ (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+ if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+ ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_MAX_PUT_REG + 1) * sizeof(reg),
+ PT_TRAP * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_TRAP * sizeof(reg),
+ (PT_TRAP + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_trap(target, reg);
+ }
+
+ if (!ret)
+ ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC64
+static int ppr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->ppr, 0, sizeof(u64));
+}
+
+static int ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->ppr, 0, sizeof(u64));
+}
+
+static int dscr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
+}
+static int dscr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
+}
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+static int tar_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
+}
+static int tar_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
+}
+
+static int ebb_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (target->thread.used_ebb)
+ return regset->n;
+
+ return 0;
+}
+
+static int ebb_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ /* Build tests */
+ BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+ BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (!target->thread.used_ebb)
+ return -ENODATA;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr,
+ 0, 3 * sizeof(unsigned long));
+}
+
+static int ebb_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ int ret = 0;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+ BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (target->thread.used_ebb)
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr,
+ 0, sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ebbhr, sizeof(unsigned long),
+ 2 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.bescr, 2 * sizeof(unsigned long),
+ 3 * sizeof(unsigned long));
+
+ return ret;
+}
+static int pmu_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pmu_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ /* Build tests */
+ BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+ BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+ BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+ BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.siar,
+ 0, 5 * sizeof(unsigned long));
+}
+
+static int pmu_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ int ret = 0;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+ BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+ BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+ BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.siar,
+ 0, sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.sdar, sizeof(unsigned long),
+ 2 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.sier, 2 * sizeof(unsigned long),
+ 3 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.mmcr2, 3 * sizeof(unsigned long),
+ 4 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.mmcr0, 4 * sizeof(unsigned long),
+ 5 * sizeof(unsigned long));
+ return ret;
+}
+#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+static int pkey_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pkey_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr));
+ BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor));
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.amr,
+ 0, ELF_NPKEY * sizeof(unsigned long));
+}
+
+static int pkey_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ u64 new_amr;
+ int ret;
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ /* Only the AMR can be set from userspace */
+ if (pos != 0 || count != sizeof(new_amr))
+ return -EINVAL;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &new_amr, 0, sizeof(new_amr));
+ if (ret)
+ return ret;
+
+ /* UAMOR determines which bits of the AMR can be set from userspace. */
+ target->thread.amr = (new_amr & target->thread.uamor) |
+ (target->thread.amr & ~target->thread.uamor);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
+static const struct user_regset native_regsets[] = {
+ [REGSET_GPR] = {
+ .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .get = gpr_get, .set = gpr_set
+ },
+ [REGSET_FPR] = {
+ .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .get = fpr_get, .set = fpr_set
+ },
+#ifdef CONFIG_ALTIVEC
+ [REGSET_VMX] = {
+ .core_note_type = NT_PPC_VMX, .n = 34,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = vr_active, .get = vr_get, .set = vr_set
+ },
+#endif
+#ifdef CONFIG_VSX
+ [REGSET_VSX] = {
+ .core_note_type = NT_PPC_VSX, .n = 32,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = vsr_active, .get = vsr_get, .set = vsr_set
+ },
+#endif
+#ifdef CONFIG_SPE
+ [REGSET_SPE] = {
+ .core_note_type = NT_PPC_SPE, .n = 35,
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = evr_active, .get = evr_get, .set = evr_set
+ },
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ [REGSET_TM_CGPR] = {
+ .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set
+ },
+ [REGSET_TM_CFPR] = {
+ .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
+ },
+ [REGSET_TM_CVMX] = {
+ .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
+ },
+ [REGSET_TM_CVSX] = {
+ .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
+ },
+ [REGSET_TM_SPR] = {
+ .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
+ },
+ [REGSET_TM_CTAR] = {
+ .core_note_type = NT_PPC_TM_CTAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
+ },
+ [REGSET_TM_CPPR] = {
+ .core_note_type = NT_PPC_TM_CPPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
+ },
+ [REGSET_TM_CDSCR] = {
+ .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC64
+ [REGSET_PPR] = {
+ .core_note_type = NT_PPC_PPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = ppr_get, .set = ppr_set
+ },
+ [REGSET_DSCR] = {
+ .core_note_type = NT_PPC_DSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = dscr_get, .set = dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ [REGSET_TAR] = {
+ .core_note_type = NT_PPC_TAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = tar_get, .set = tar_set
+ },
+ [REGSET_EBB] = {
+ .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = ebb_active, .get = ebb_get, .set = ebb_set
+ },
+ [REGSET_PMR] = {
+ .core_note_type = NT_PPC_PMU, .n = ELF_NPMU,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pmu_active, .get = pmu_get, .set = pmu_set
+ },
+#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ [REGSET_PKEY] = {
+ .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pkey_active, .get = pkey_get, .set = pkey_set
+ },
+#endif
+};
+
+const struct user_regset_view user_ppc_native_view = {
+ .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
+ .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+#include <linux/compat.h>
+
+int gpr32_get_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf,
+ unsigned long *regs)
+{
+ compat_ulong_t *k = kbuf;
+ compat_ulong_t __user *u = ubuf;
+ compat_ulong_t reg;
+
+ pos /= sizeof(reg);
+ count /= sizeof(reg);
+
+ if (kbuf)
+ for (; count > 0 && pos < PT_MSR; --count)
+ *k++ = regs[pos++];
+ else
+ for (; count > 0 && pos < PT_MSR; --count)
+ if (__put_user((compat_ulong_t)regs[pos++], u++))
+ return -EFAULT;
+
+ if (count > 0 && pos == PT_MSR) {
+ reg = get_user_msr(target);
+ if (kbuf)
+ *k++ = reg;
+ else if (__put_user(reg, u++))
+ return -EFAULT;
+ ++pos;
+ --count;
+ }
+
+ if (kbuf)
+ for (; count > 0 && pos < PT_REGS_COUNT; --count)
+ *k++ = regs[pos++];
+ else
+ for (; count > 0 && pos < PT_REGS_COUNT; --count)
+ if (__put_user((compat_ulong_t)regs[pos++], u++))
+ return -EFAULT;
+
+ kbuf = k;
+ ubuf = u;
+ pos *= sizeof(reg);
+ count *= sizeof(reg);
+ return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ PT_REGS_COUNT * sizeof(reg), -1);
+}
+
+int gpr32_set_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ unsigned long *regs)
+{
+ const compat_ulong_t *k = kbuf;
+ const compat_ulong_t __user *u = ubuf;
+ compat_ulong_t reg;
+
+ pos /= sizeof(reg);
+ count /= sizeof(reg);
+
+ if (kbuf)
+ for (; count > 0 && pos < PT_MSR; --count)
+ regs[pos++] = *k++;
+ else
+ for (; count > 0 && pos < PT_MSR; --count) {
+ if (__get_user(reg, u++))
+ return -EFAULT;
+ regs[pos++] = reg;
+ }
+
+
+ if (count > 0 && pos == PT_MSR) {
+ if (kbuf)
+ reg = *k++;
+ else if (__get_user(reg, u++))
+ return -EFAULT;
+ set_user_msr(target, reg);
+ ++pos;
+ --count;
+ }
+
+ if (kbuf) {
+ for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
+ regs[pos++] = *k++;
+ for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+ ++k;
+ } else {
+ for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
+ if (__get_user(reg, u++))
+ return -EFAULT;
+ regs[pos++] = reg;
+ }
+ for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+ if (__get_user(reg, u++))
+ return -EFAULT;
+ }
+
+ if (count > 0 && pos == PT_TRAP) {
+ if (kbuf)
+ reg = *k++;
+ else if (__get_user(reg, u++))
+ return -EFAULT;
+ set_user_trap(target, reg);
+ ++pos;
+ --count;
+ }
+
+ kbuf = k;
+ ubuf = u;
+ pos *= sizeof(reg);
+ count *= sizeof(reg);
+ return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+}
+
+static int gpr32_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int i;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ if (!FULL_REGS(target->thread.regs)) {
+ /*
+ * We have a partial register set.
+ * Fill 14-31 with bogus values.
+ */
+ for (i = 14; i < 32; i++)
+ target->thread.regs->gpr[i] = NV_REG_POISON;
+ }
+ return gpr32_get_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.regs->gpr[0]);
+}
+
+static int gpr32_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ CHECK_FULL_REGS(target->thread.regs);
+ return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.regs->gpr[0]);
+}
+
+/*
+ * These are the regset flavors matching the CONFIG_PPC32 native set.
+ */
+static const struct user_regset compat_regsets[] = {
+ [REGSET_GPR] = {
+ .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
+ .size = sizeof(compat_long_t), .align = sizeof(compat_long_t),
+ .get = gpr32_get, .set = gpr32_set
+ },
+ [REGSET_FPR] = {
+ .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .get = fpr_get, .set = fpr_set
+ },
+#ifdef CONFIG_ALTIVEC
+ [REGSET_VMX] = {
+ .core_note_type = NT_PPC_VMX, .n = 34,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = vr_active, .get = vr_get, .set = vr_set
+ },
+#endif
+#ifdef CONFIG_SPE
+ [REGSET_SPE] = {
+ .core_note_type = NT_PPC_SPE, .n = 35,
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = evr_active, .get = evr_get, .set = evr_set
+ },
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ [REGSET_TM_CGPR] = {
+ .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .active = tm_cgpr_active,
+ .get = tm_cgpr32_get, .set = tm_cgpr32_set
+ },
+ [REGSET_TM_CFPR] = {
+ .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
+ },
+ [REGSET_TM_CVMX] = {
+ .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
+ },
+ [REGSET_TM_CVSX] = {
+ .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
+ },
+ [REGSET_TM_SPR] = {
+ .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
+ },
+ [REGSET_TM_CTAR] = {
+ .core_note_type = NT_PPC_TM_CTAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
+ },
+ [REGSET_TM_CPPR] = {
+ .core_note_type = NT_PPC_TM_CPPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
+ },
+ [REGSET_TM_CDSCR] = {
+ .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC64
+ [REGSET_PPR] = {
+ .core_note_type = NT_PPC_PPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = ppr_get, .set = ppr_set
+ },
+ [REGSET_DSCR] = {
+ .core_note_type = NT_PPC_DSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = dscr_get, .set = dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ [REGSET_TAR] = {
+ .core_note_type = NT_PPC_TAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = tar_get, .set = tar_set
+ },
+ [REGSET_EBB] = {
+ .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = ebb_active, .get = ebb_get, .set = ebb_set
+ },
+#endif
+};
+
+static const struct user_regset_view user_ppc_compat_view = {
+ .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI,
+ .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+ if (IS_ENABLED(CONFIG_PPC64) && test_tsk_thread_flag(task, TIF_32BIT))
+ return &user_ppc_compat_view;
+ return &user_ppc_native_view;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c
new file mode 100644
index 000000000000..d53466d49cc0
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ */
+int fpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ flush_fp_to_thread(target);
+
+ /* copy to local buffer then write that out */
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_FPR(i);
+ buf[32] = target->thread.fp_state.fpscr;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ *
+ */
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ flush_fp_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_FPR(i);
+ buf[32] = target->thread.fp_state.fpscr;
+
+ /* copy to local buffer then write that out */
+ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+ if (i)
+ return i;
+
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_FPR(i) = buf[i];
+ target->thread.fp_state.fpscr = buf[32];
+ return 0;
+}
+
+/*
+ * Currently to set and and get all the vsx state, you need to call
+ * the fp and VMX calls as well. This only get/sets the lower 32
+ * 128bit VSX registers.
+ */
+
+int vsr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ flush_vsx_to_thread(target);
+ return target->thread.used_vsr ? regset->n : 0;
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last
+ * checkpointed value of all FPR registers for the current
+ * transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 vsx[32];
+ * };
+ */
+int vsr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+
+ return ret;
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last
+ * checkpointed value of all FPR registers for the current
+ * transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 vsx[32];
+ * };
+ */
+int vsr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+ if (!ret)
+ for (i = 0; i < 32 ; i++)
+ target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
new file mode 100644
index 000000000000..f6e51be47c6e
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -0,0 +1,481 @@
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Derived from "arch/m68k/kernel/ptrace.c"
+ * Copyright (C) 1994 by Hamish Macdonald
+ * Taken from linux/kernel/ptrace.c and modified for M680x0.
+ * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
+ *
+ * Modified by Cort Dougan (cort@hq.fsmlabs.com)
+ * and Paul Mackerras (paulus@samba.org).
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file README.legal in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <linux/audit.h>
+#include <linux/context_tracking.h>
+#include <linux/syscalls.h>
+
+#include <asm/switch_to.h>
+#include <asm/asm-prototypes.h>
+#include <asm/debug.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+ /* make sure the single step bit is not set. */
+ user_disable_single_step(child);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
+ int ret = -EPERM;
+ void __user *datavp = (void __user *) data;
+ unsigned long __user *datalp = datavp;
+
+ switch (request) {
+ /* read the word at location addr in the USER area. */
+ case PTRACE_PEEKUSR: {
+ unsigned long index, tmp;
+
+ ret = -EIO;
+ /* convert to index and check */
+#ifdef CONFIG_PPC32
+ index = addr >> 2;
+ if ((addr & 3) || (index > PT_FPSCR)
+ || (child->thread.regs == NULL))
+#else
+ index = addr >> 3;
+ if ((addr & 7) || (index > PT_FPSCR))
+#endif
+ break;
+
+ CHECK_FULL_REGS(child->thread.regs);
+ if (index < PT_FPR0) {
+ ret = ptrace_get_reg(child, (int) index, &tmp);
+ if (ret)
+ break;
+ } else {
+ unsigned int fpidx = index - PT_FPR0;
+
+ flush_fp_to_thread(child);
+ if (fpidx < (PT_FPSCR - PT_FPR0))
+ memcpy(&tmp, &child->thread.TS_FPR(fpidx),
+ sizeof(long));
+ else
+ tmp = child->thread.fp_state.fpscr;
+ }
+ ret = put_user(tmp, datalp);
+ break;
+ }
+
+ /* write the word at location addr in the USER area */
+ case PTRACE_POKEUSR: {
+ unsigned long index;
+
+ ret = -EIO;
+ /* convert to index and check */
+#ifdef CONFIG_PPC32
+ index = addr >> 2;
+ if ((addr & 3) || (index > PT_FPSCR)
+ || (child->thread.regs == NULL))
+#else
+ index = addr >> 3;
+ if ((addr & 7) || (index > PT_FPSCR))
+#endif
+ break;
+
+ CHECK_FULL_REGS(child->thread.regs);
+ if (index < PT_FPR0) {
+ ret = ptrace_put_reg(child, index, data);
+ } else {
+ unsigned int fpidx = index - PT_FPR0;
+
+ flush_fp_to_thread(child);
+ if (fpidx < (PT_FPSCR - PT_FPR0))
+ memcpy(&child->thread.TS_FPR(fpidx), &data,
+ sizeof(long));
+ else
+ child->thread.fp_state.fpscr = data;
+ ret = 0;
+ }
+ break;
+ }
+
+ case PPC_PTRACE_GETHWDBGINFO: {
+ struct ppc_debug_info dbginfo;
+
+ ppc_gethwdinfo(&dbginfo);
+
+ if (copy_to_user(datavp, &dbginfo,
+ sizeof(struct ppc_debug_info)))
+ return -EFAULT;
+ return 0;
+ }
+
+ case PPC_PTRACE_SETHWDEBUG: {
+ struct ppc_hw_breakpoint bp_info;
+
+ if (copy_from_user(&bp_info, datavp,
+ sizeof(struct ppc_hw_breakpoint)))
+ return -EFAULT;
+ return ppc_set_hwdebug(child, &bp_info);
+ }
+
+ case PPC_PTRACE_DELHWDEBUG: {
+ ret = ppc_del_hwdebug(child, data);
+ break;
+ }
+
+ case PTRACE_GET_DEBUGREG:
+ ret = ptrace_get_debugreg(child, addr, datalp);
+ break;
+
+ case PTRACE_SET_DEBUGREG:
+ ret = ptrace_set_debugreg(child, addr, data);
+ break;
+
+#ifdef CONFIG_PPC64
+ case PTRACE_GETREGS64:
+#endif
+ case PTRACE_GETREGS: /* Get all pt_regs from the child. */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_GPR,
+ 0, sizeof(struct user_pt_regs),
+ datavp);
+
+#ifdef CONFIG_PPC64
+ case PTRACE_SETREGS64:
+#endif
+ case PTRACE_SETREGS: /* Set all gp regs in the child. */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_GPR,
+ 0, sizeof(struct user_pt_regs),
+ datavp);
+
+ case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_FPR,
+ 0, sizeof(elf_fpregset_t),
+ datavp);
+
+ case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_FPR,
+ 0, sizeof(elf_fpregset_t),
+ datavp);
+
+#ifdef CONFIG_ALTIVEC
+ case PTRACE_GETVRREGS:
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_VMX,
+ 0, (33 * sizeof(vector128) +
+ sizeof(u32)),
+ datavp);
+
+ case PTRACE_SETVRREGS:
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_VMX,
+ 0, (33 * sizeof(vector128) +
+ sizeof(u32)),
+ datavp);
+#endif
+#ifdef CONFIG_VSX
+ case PTRACE_GETVSRREGS:
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_VSX,
+ 0, 32 * sizeof(double),
+ datavp);
+
+ case PTRACE_SETVSRREGS:
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_VSX,
+ 0, 32 * sizeof(double),
+ datavp);
+#endif
+#ifdef CONFIG_SPE
+ case PTRACE_GETEVRREGS:
+ /* Get the child spe register state. */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_SPE, 0, 35 * sizeof(u32),
+ datavp);
+
+ case PTRACE_SETEVRREGS:
+ /* Set the child spe register state. */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_SPE, 0, 35 * sizeof(u32),
+ datavp);
+#endif
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ break;
+ }
+ return ret;
+}
+
+#ifdef CONFIG_SECCOMP
+static int do_seccomp(struct pt_regs *regs)
+{
+ if (!test_thread_flag(TIF_SECCOMP))
+ return 0;
+
+ /*
+ * The ABI we present to seccomp tracers is that r3 contains
+ * the syscall return value and orig_gpr3 contains the first
+ * syscall parameter. This is different to the ptrace ABI where
+ * both r3 and orig_gpr3 contain the first syscall parameter.
+ */
+ regs->gpr[3] = -ENOSYS;
+
+ /*
+ * We use the __ version here because we have already checked
+ * TIF_SECCOMP. If this fails, there is nothing left to do, we
+ * have already loaded -ENOSYS into r3, or seccomp has put
+ * something else in r3 (via SECCOMP_RET_ERRNO/TRACE).
+ */
+ if (__secure_computing(NULL))
+ return -1;
+
+ /*
+ * The syscall was allowed by seccomp, restore the register
+ * state to what audit expects.
+ * Note that we use orig_gpr3, which means a seccomp tracer can
+ * modify the first syscall parameter (in orig_gpr3) and also
+ * allow the syscall to proceed.
+ */
+ regs->gpr[3] = regs->orig_gpr3;
+
+ return 0;
+}
+#else
+static inline int do_seccomp(struct pt_regs *regs) { return 0; }
+#endif /* CONFIG_SECCOMP */
+
+/**
+ * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
+ * @regs: the pt_regs of the task to trace (current)
+ *
+ * Performs various types of tracing on syscall entry. This includes seccomp,
+ * ptrace, syscall tracepoints and audit.
+ *
+ * The pt_regs are potentially visible to userspace via ptrace, so their
+ * contents is ABI.
+ *
+ * One or more of the tracers may modify the contents of pt_regs, in particular
+ * to modify arguments or even the syscall number itself.
+ *
+ * It's also possible that a tracer can choose to reject the system call. In
+ * that case this function will return an illegal syscall number, and will put
+ * an appropriate return value in regs->r3.
+ *
+ * Return: the (possibly changed) syscall number.
+ */
+long do_syscall_trace_enter(struct pt_regs *regs)
+{
+ u32 flags;
+
+ user_exit();
+
+ flags = READ_ONCE(current_thread_info()->flags) &
+ (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
+
+ if (flags) {
+ int rc = tracehook_report_syscall_entry(regs);
+
+ if (unlikely(flags & _TIF_SYSCALL_EMU)) {
+ /*
+ * A nonzero return code from
+ * tracehook_report_syscall_entry() tells us to prevent
+ * the syscall execution, but we are not going to
+ * execute it anyway.
+ *
+ * Returning -1 will skip the syscall execution. We want
+ * to avoid clobbering any registers, so we don't goto
+ * the skip label below.
+ */
+ return -1;
+ }
+
+ if (rc) {
+ /*
+ * The tracer decided to abort the syscall. Note that
+ * the tracer may also just change regs->gpr[0] to an
+ * invalid syscall number, that is handled below on the
+ * exit path.
+ */
+ goto skip;
+ }
+ }
+
+ /* Run seccomp after ptrace; allow it to set gpr[3]. */
+ if (do_seccomp(regs))
+ return -1;
+
+ /* Avoid trace and audit when syscall is invalid. */
+ if (regs->gpr[0] >= NR_syscalls)
+ goto skip;
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_enter(regs, regs->gpr[0]);
+
+ if (!is_32bit_task())
+ audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
+ regs->gpr[5], regs->gpr[6]);
+ else
+ audit_syscall_entry(regs->gpr[0],
+ regs->gpr[3] & 0xffffffff,
+ regs->gpr[4] & 0xffffffff,
+ regs->gpr[5] & 0xffffffff,
+ regs->gpr[6] & 0xffffffff);
+
+ /* Return the possibly modified but valid syscall number */
+ return regs->gpr[0];
+
+skip:
+ /*
+ * If we are aborting explicitly, or if the syscall number is
+ * now invalid, set the return value to -ENOSYS.
+ */
+ regs->gpr[3] = -ENOSYS;
+ return -1;
+}
+
+void do_syscall_trace_leave(struct pt_regs *regs)
+{
+ int step;
+
+ audit_syscall_exit(regs);
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_exit(regs, regs->result);
+
+ step = test_thread_flag(TIF_SINGLESTEP);
+ if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, step);
+
+ user_enter();
+}
+
+void __init pt_regs_check(void);
+
+/*
+ * Dummy function, its purpose is to break the build if struct pt_regs and
+ * struct user_pt_regs don't match.
+ */
+void __init pt_regs_check(void)
+{
+ BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
+ offsetof(struct user_pt_regs, gpr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
+ offsetof(struct user_pt_regs, nip));
+ BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+ offsetof(struct user_pt_regs, msr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+ offsetof(struct user_pt_regs, msr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct user_pt_regs, orig_gpr3));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
+ offsetof(struct user_pt_regs, ctr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
+ offsetof(struct user_pt_regs, link));
+ BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
+ offsetof(struct user_pt_regs, xer));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
+ offsetof(struct user_pt_regs, ccr));
+#ifdef __powerpc64__
+ BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
+ offsetof(struct user_pt_regs, softe));
+#else
+ BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
+ offsetof(struct user_pt_regs, mq));
+#endif
+ BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
+ offsetof(struct user_pt_regs, trap));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
+ offsetof(struct user_pt_regs, dar));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
+ offsetof(struct user_pt_regs, dsisr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
+ offsetof(struct user_pt_regs, result));
+
+ BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+
+ // Now check that the pt_regs offsets match the uapi #defines
+ #define CHECK_REG(_pt, _reg) \
+ BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
+ sizeof(unsigned long)));
+
+ CHECK_REG(PT_R0, gpr[0]);
+ CHECK_REG(PT_R1, gpr[1]);
+ CHECK_REG(PT_R2, gpr[2]);
+ CHECK_REG(PT_R3, gpr[3]);
+ CHECK_REG(PT_R4, gpr[4]);
+ CHECK_REG(PT_R5, gpr[5]);
+ CHECK_REG(PT_R6, gpr[6]);
+ CHECK_REG(PT_R7, gpr[7]);
+ CHECK_REG(PT_R8, gpr[8]);
+ CHECK_REG(PT_R9, gpr[9]);
+ CHECK_REG(PT_R10, gpr[10]);
+ CHECK_REG(PT_R11, gpr[11]);
+ CHECK_REG(PT_R12, gpr[12]);
+ CHECK_REG(PT_R13, gpr[13]);
+ CHECK_REG(PT_R14, gpr[14]);
+ CHECK_REG(PT_R15, gpr[15]);
+ CHECK_REG(PT_R16, gpr[16]);
+ CHECK_REG(PT_R17, gpr[17]);
+ CHECK_REG(PT_R18, gpr[18]);
+ CHECK_REG(PT_R19, gpr[19]);
+ CHECK_REG(PT_R20, gpr[20]);
+ CHECK_REG(PT_R21, gpr[21]);
+ CHECK_REG(PT_R22, gpr[22]);
+ CHECK_REG(PT_R23, gpr[23]);
+ CHECK_REG(PT_R24, gpr[24]);
+ CHECK_REG(PT_R25, gpr[25]);
+ CHECK_REG(PT_R26, gpr[26]);
+ CHECK_REG(PT_R27, gpr[27]);
+ CHECK_REG(PT_R28, gpr[28]);
+ CHECK_REG(PT_R29, gpr[29]);
+ CHECK_REG(PT_R30, gpr[30]);
+ CHECK_REG(PT_R31, gpr[31]);
+ CHECK_REG(PT_NIP, nip);
+ CHECK_REG(PT_MSR, msr);
+ CHECK_REG(PT_ORIG_R3, orig_gpr3);
+ CHECK_REG(PT_CTR, ctr);
+ CHECK_REG(PT_LNK, link);
+ CHECK_REG(PT_XER, xer);
+ CHECK_REG(PT_CCR, ccr);
+#ifdef CONFIG_PPC64
+ CHECK_REG(PT_SOFTE, softe);
+#else
+ CHECK_REG(PT_MQ, mq);
+#endif
+ CHECK_REG(PT_TRAP, trap);
+ CHECK_REG(PT_DAR, dar);
+ CHECK_REG(PT_DSISR, dsisr);
+ CHECK_REG(PT_RESULT, result);
+ #undef CHECK_REG
+
+ BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+ /*
+ * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
+ * real registers.
+ */
+ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
+}
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c
index f37eb53de1a1..7976ddf29c0e 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace/ptrace32.c
@@ -17,21 +17,10 @@
* this archive for more details.
*/
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/regset.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/signal.h>
#include <linux/compat.h>
-#include <linux/uaccess.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/switch_to.h>
/*
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 7f8c890360fe..f9c0d888ce8a 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -787,8 +787,7 @@ EXPORT_SYMBOL(powerpc_debugfs_root);
static int powerpc_debugfs_init(void)
{
powerpc_debugfs_root = debugfs_create_dir("powerpc", NULL);
-
- return powerpc_debugfs_root == NULL;
+ return 0;
}
arch_initcall(powerpc_debugfs_init);
#endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 2dd0d9cb5a20..2ec835574cc9 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -8,6 +8,12 @@
#ifndef __ARCH_POWERPC_KERNEL_SETUP_H
#define __ARCH_POWERPC_KERNEL_SETUP_H
+#ifdef CONFIG_CC_IS_CLANG
+#define __nostackprotector
+#else
+#define __nostackprotector __attribute__((__optimize__("no-stack-protector")))
+#endif
+
void initialize_cache_info(void);
void irqstack_early_init(void);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 5b49b26eb154..305ca89d856f 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -58,7 +58,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid_phys);
int smp_hw_index[NR_CPUS];
EXPORT_SYMBOL(smp_hw_index);
-unsigned long ISA_DMA_THRESHOLD;
unsigned int DMA_MODE_READ;
unsigned int DMA_MODE_WRITE;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e05e6dd67ae6..438a9befce41 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -279,24 +279,42 @@ void __init record_spr_defaults(void)
* device-tree is not accessible via normal means at this point.
*/
-void __init early_setup(unsigned long dt_ptr)
+void __init __nostackprotector early_setup(unsigned long dt_ptr)
{
static __initdata struct paca_struct boot_paca;
/* -------- printk is _NOT_ safe to use here ! ------- */
- /* Try new device tree based feature discovery ... */
- if (!dt_cpu_ftrs_init(__va(dt_ptr)))
- /* Otherwise use the old style CPU table */
- identify_cpu(0, mfspr(SPRN_PVR));
-
- /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
+ /*
+ * Assume we're on cpu 0 for now.
+ *
+ * We need to load a PACA very early for a few reasons.
+ *
+ * The stack protector canary is stored in the paca, so as soon as we
+ * call any stack protected code we need r13 pointing somewhere valid.
+ *
+ * If we are using kcov it will call in_task() in its instrumentation,
+ * which relies on the current task from the PACA.
+ *
+ * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as
+ * printk(), which can trigger both stack protector and kcov.
+ *
+ * percpu variables and spin locks also use the paca.
+ *
+ * So set up a temporary paca. It will be replaced below once we know
+ * what CPU we are on.
+ */
initialise_paca(&boot_paca, 0);
setup_paca(&boot_paca);
fixup_boot_paca();
/* -------- printk is now safe to use ------- */
+ /* Try new device tree based feature discovery ... */
+ if (!dt_cpu_ftrs_init(__va(dt_ptr)))
+ /* Otherwise use the old style CPU table */
+ identify_cpu(0, mfspr(SPRN_PVR));
+
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 800433685888..d396efca4068 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -10,8 +10,6 @@
#ifndef _POWERPC_ARCH_SIGNAL_H
#define _POWERPC_ARCH_SIGNAL_H
-extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
-
extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp,
size_t frame_size, int is_32);
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 84ed2e77ef9c..adfde59cf4ba 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -473,8 +473,10 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
err |= __get_user(tsk->thread.ckpt_regs.ccr,
&sc->gp_regs[PT_CCR]);
+ /* Don't allow userspace to set the trap value */
+ regs->trap = 0;
+
/* These regs are not checkpointed; they can go in 'regs'. */
- err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]);
err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ea6adbf6a221..6d2a3a3666f0 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1185,10 +1185,30 @@ static inline void add_cpu_to_smallcore_masks(int cpu)
}
}
+int get_physical_package_id(int cpu)
+{
+ int pkg_id = cpu_to_chip_id(cpu);
+
+ /*
+ * If the platform is PowerNV or Guest on KVM, ibm,chip-id is
+ * defined. Hence we would return the chip-id as the result of
+ * get_physical_package_id.
+ */
+ if (pkg_id == -1 && firmware_has_feature(FW_FEATURE_LPAR) &&
+ IS_ENABLED(CONFIG_PPC_SPLPAR)) {
+ struct device_node *np = of_get_cpu_node(cpu, NULL);
+ pkg_id = of_node_to_nid(np);
+ of_node_put(np);
+ }
+
+ return pkg_id;
+}
+EXPORT_SYMBOL_GPL(get_physical_package_id);
+
static void add_cpu_to_masks(int cpu)
{
int first_thread = cpu_first_thread_sibling(cpu);
- int chipid = cpu_to_chip_id(cpu);
+ int pkg_id = get_physical_package_id(cpu);
int i;
/*
@@ -1217,11 +1237,11 @@ static void add_cpu_to_masks(int cpu)
for_each_cpu(i, cpu_l2_cache_mask(cpu))
set_cpus_related(cpu, i, cpu_core_mask);
- if (chipid == -1)
+ if (pkg_id == -1)
return;
for_each_cpu(i, cpu_online_mask)
- if (cpu_to_chip_id(i) == chipid)
+ if (get_physical_package_id(i) == pkg_id)
set_cpus_related(cpu, i, cpu_core_mask);
}
@@ -1359,11 +1379,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
if (smp_ops && smp_ops->bringup_done)
smp_ops->bringup_done();
- /*
- * On a shared LPAR, associativity needs to be requested.
- * Hence, get numa topology before dumping cpu topology
- */
- shared_proc_topology_init();
dump_numa_cpu_topology();
#ifdef CONFIG_SCHED_SMT
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index e2a46cfed5fd..c477b8585a29 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -57,7 +57,7 @@ void save_stack_trace(struct stack_trace *trace)
{
unsigned long sp;
- sp = current_stack_pointer();
+ sp = current_stack_frame();
save_context_stack(trace, sp, current, 1);
}
@@ -71,7 +71,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
return;
if (tsk == current)
- sp = current_stack_pointer();
+ sp = current_stack_frame();
else
sp = tsk->thread.ksp;
@@ -131,7 +131,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
}
if (tsk == current)
- sp = current_stack_pointer();
+ sp = current_stack_frame();
else
sp = tsk->thread.ksp;
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
new file mode 100644
index 000000000000..cf06eb443a80
--- /dev/null
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/err.h>
+#include <asm/asm-prototypes.h>
+#include <asm/book3s/64/kup-radix.h>
+#include <asm/cputime.h>
+#include <asm/hw_irq.h>
+#include <asm/kprobes.h>
+#include <asm/paca.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/signal.h>
+#include <asm/switch_to.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+#include <asm/unistd.h>
+
+typedef long (*syscall_fn)(long, long, long, long, long, long);
+
+/* Has to run notrace because it is entered not completely "reconciled" */
+notrace long system_call_exception(long r3, long r4, long r5,
+ long r6, long r7, long r8,
+ unsigned long r0, struct pt_regs *regs)
+{
+ unsigned long ti_flags;
+ syscall_fn f;
+
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+
+ trace_hardirqs_off(); /* finish reconciling */
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S))
+ BUG_ON(!(regs->msr & MSR_RI));
+ BUG_ON(!(regs->msr & MSR_PR));
+ BUG_ON(!FULL_REGS(regs));
+ BUG_ON(regs->softe != IRQS_ENABLED);
+
+ account_cpu_user_entry();
+
+#ifdef CONFIG_PPC_SPLPAR
+ if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) &&
+ firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct lppaca *lp = local_paca->lppaca_ptr;
+
+ if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx)))
+ accumulate_stolen_time();
+ }
+#endif
+
+ kuap_check_amr();
+
+ /*
+ * This is not required for the syscall exit path, but makes the
+ * stack frame look nicer. If this was initialised in the first stack
+ * frame, or if the unwinder was taught the first stack frame always
+ * returns to user with IRQS_ENABLED, this store could be avoided!
+ */
+ regs->softe = IRQS_ENABLED;
+
+ local_irq_enable();
+
+ ti_flags = current_thread_info()->flags;
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ /*
+ * We use the return value of do_syscall_trace_enter() as the
+ * syscall number. If the syscall was rejected for any reason
+ * do_syscall_trace_enter() returns an invalid syscall number
+ * and the test against NR_syscalls will fail and the return
+ * value to be used is in regs->gpr[3].
+ */
+ r0 = do_syscall_trace_enter(regs);
+ if (unlikely(r0 >= NR_syscalls))
+ return regs->gpr[3];
+ r3 = regs->gpr[3];
+ r4 = regs->gpr[4];
+ r5 = regs->gpr[5];
+ r6 = regs->gpr[6];
+ r7 = regs->gpr[7];
+ r8 = regs->gpr[8];
+
+ } else if (unlikely(r0 >= NR_syscalls)) {
+ return -ENOSYS;
+ }
+
+ /* May be faster to do array_index_nospec? */
+ barrier_nospec();
+
+ if (unlikely(ti_flags & _TIF_32BIT)) {
+ f = (void *)compat_sys_call_table[r0];
+
+ r3 &= 0x00000000ffffffffULL;
+ r4 &= 0x00000000ffffffffULL;
+ r5 &= 0x00000000ffffffffULL;
+ r6 &= 0x00000000ffffffffULL;
+ r7 &= 0x00000000ffffffffULL;
+ r8 &= 0x00000000ffffffffULL;
+
+ } else {
+ f = (void *)sys_call_table[r0];
+ }
+
+ return f(r3, r4, r5, r6, r7, r8);
+}
+
+/*
+ * This should be called after a syscall returns, with r3 the return value
+ * from the syscall. If this function returns non-zero, the system call
+ * exit assembly should additionally load all GPR registers and CTR and XER
+ * from the interrupt frame.
+ *
+ * The function graph tracer can not trace the return side of this function,
+ * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
+ */
+notrace unsigned long syscall_exit_prepare(unsigned long r3,
+ struct pt_regs *regs)
+{
+ unsigned long *ti_flagsp = &current_thread_info()->flags;
+ unsigned long ti_flags;
+ unsigned long ret = 0;
+
+ regs->result = r3;
+
+ /* Check whether the syscall is issued inside a restartable sequence */
+ rseq_syscall(regs);
+
+ ti_flags = *ti_flagsp;
+
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) {
+ if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+ r3 = -r3;
+ regs->ccr |= 0x10000000; /* Set SO bit in CR */
+ }
+ }
+
+ if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+ if (ti_flags & _TIF_RESTOREALL)
+ ret = _TIF_RESTOREALL;
+ else
+ regs->gpr[3] = r3;
+ clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
+ } else {
+ regs->gpr[3] = r3;
+ }
+
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ do_syscall_trace_leave(regs);
+ ret |= _TIF_RESTOREALL;
+ }
+
+again:
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ local_irq_enable();
+ if (ti_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ } else {
+ /*
+ * SIGPENDING must restore signal handler function
+ * argument GPRs, and some non-volatiles (e.g., r1).
+ * Restore all for now. This could be made lighter.
+ */
+ if (ti_flags & _TIF_SIGPENDING)
+ ret |= _TIF_RESTOREALL;
+ do_notify_resume(regs, ti_flags);
+ }
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ unlikely((ti_flags & _TIF_RESTORE_TM))) {
+ restore_tm_state(regs);
+ } else {
+ unsigned long mathflags = MSR_FP;
+
+ if (cpu_has_feature(CPU_FTR_VSX))
+ mathflags |= MSR_VEC | MSR_VSX;
+ else if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ mathflags |= MSR_VEC;
+
+ if ((regs->msr & mathflags) != mathflags)
+ restore_math(regs);
+ }
+ }
+
+ /* This must be done with RI=1 because tracing may touch vmaps */
+ trace_hardirqs_on();
+
+ /* This pattern matches prep_irq_for_idle */
+ __hard_EE_RI_disable();
+ if (unlikely(lazy_irq_pending())) {
+ __hard_RI_enable();
+ trace_hardirqs_off();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ local_irq_enable();
+ /* Took an interrupt, may have more exit work to do. */
+ goto again;
+ }
+ local_paca->irq_happened = 0;
+ irq_soft_mask_set(IRQS_ENABLED);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ kuap_check_amr();
+
+ account_cpu_user_exit();
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
+{
+#ifdef CONFIG_PPC_BOOK3E
+ struct thread_struct *ts = &current->thread;
+#endif
+ unsigned long *ti_flagsp = &current_thread_info()->flags;
+ unsigned long ti_flags;
+ unsigned long flags;
+ unsigned long ret = 0;
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S))
+ BUG_ON(!(regs->msr & MSR_RI));
+ BUG_ON(!(regs->msr & MSR_PR));
+ BUG_ON(!FULL_REGS(regs));
+ BUG_ON(regs->softe != IRQS_ENABLED);
+
+ local_irq_save(flags);
+
+again:
+ ti_flags = READ_ONCE(*ti_flagsp);
+ while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ local_irq_enable(); /* returning to user: may enable */
+ if (ti_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ } else {
+ if (ti_flags & _TIF_SIGPENDING)
+ ret |= _TIF_RESTOREALL;
+ do_notify_resume(regs, ti_flags);
+ }
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ unlikely((ti_flags & _TIF_RESTORE_TM))) {
+ restore_tm_state(regs);
+ } else {
+ unsigned long mathflags = MSR_FP;
+
+ if (cpu_has_feature(CPU_FTR_VSX))
+ mathflags |= MSR_VEC | MSR_VSX;
+ else if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ mathflags |= MSR_VEC;
+
+ if ((regs->msr & mathflags) != mathflags)
+ restore_math(regs);
+ }
+ }
+
+ trace_hardirqs_on();
+ __hard_EE_RI_disable();
+ if (unlikely(lazy_irq_pending())) {
+ __hard_RI_enable();
+ trace_hardirqs_off();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ local_irq_enable();
+ local_irq_disable();
+ /* Took an interrupt, may have more exit work to do. */
+ goto again;
+ }
+ local_paca->irq_happened = 0;
+ irq_soft_mask_set(IRQS_ENABLED);
+
+#ifdef CONFIG_PPC_BOOK3E
+ if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) {
+ /*
+ * Check to see if the dbcr0 register is set up to debug.
+ * Use the internal debug mode bit to do this.
+ */
+ mtmsr(mfmsr() & ~MSR_DE);
+ mtspr(SPRN_DBCR0, ts->debug.dbcr0);
+ mtspr(SPRN_DBSR, -1);
+ }
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ kuap_check_amr();
+
+ account_cpu_user_exit();
+
+ return ret;
+}
+
+void unrecoverable_exception(struct pt_regs *regs);
+void preempt_schedule_irq(void);
+
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
+{
+ unsigned long *ti_flagsp = &current_thread_info()->flags;
+ unsigned long flags;
+ unsigned long ret = 0;
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI)))
+ unrecoverable_exception(regs);
+ BUG_ON(regs->msr & MSR_PR);
+ BUG_ON(!FULL_REGS(regs));
+
+ if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) {
+ clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp);
+ ret = 1;
+ }
+
+ local_irq_save(flags);
+
+ if (regs->softe == IRQS_ENABLED) {
+ /* Returning to a kernel context with local irqs enabled. */
+ WARN_ON_ONCE(!(regs->msr & MSR_EE));
+again:
+ if (IS_ENABLED(CONFIG_PREEMPT)) {
+ /* Return to preemptible kernel context */
+ if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) {
+ if (preempt_count() == 0)
+ preempt_schedule_irq();
+ }
+ }
+
+ trace_hardirqs_on();
+ __hard_EE_RI_disable();
+ if (unlikely(lazy_irq_pending())) {
+ __hard_RI_enable();
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
+ trace_hardirqs_off();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ /*
+ * Can't local_irq_restore to replay if we were in
+ * interrupt context. Must replay directly.
+ */
+ if (irqs_disabled_flags(flags)) {
+ replay_soft_interrupts();
+ } else {
+ local_irq_restore(flags);
+ local_irq_save(flags);
+ }
+ /* Took an interrupt, may have more exit work to do. */
+ goto again;
+ }
+ local_paca->irq_happened = 0;
+ irq_soft_mask_set(IRQS_ENABLED);
+ } else {
+ /* Returning to a kernel context with local irqs disabled. */
+ __hard_EE_RI_disable();
+ if (regs->msr & MSR_EE)
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ }
+
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
+ kuap_restore_amr(regs);
+
+ return ret;
+}
+#endif
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 35b61bfc1b1a..220ae11555f2 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -9,7 +9,9 @@
#
0 nospu restart_syscall sys_restart_syscall
1 nospu exit sys_exit
-2 nospu fork ppc_fork
+2 32 fork ppc_fork sys_fork
+2 64 fork sys_fork
+2 spu fork sys_ni_syscall
3 common read sys_read
4 common write sys_write
5 common open sys_open compat_sys_open
@@ -158,7 +160,9 @@
119 32 sigreturn sys_sigreturn compat_sys_sigreturn
119 64 sigreturn sys_ni_syscall
119 spu sigreturn sys_ni_syscall
-120 nospu clone ppc_clone
+120 32 clone ppc_clone sys_clone
+120 64 clone sys_clone
+120 spu clone sys_ni_syscall
121 common setdomainname sys_setdomainname
122 common uname sys_newuname
123 common modify_ldt sys_ni_syscall
@@ -240,7 +244,9 @@
186 spu sendfile sys_sendfile64
187 common getpmsg sys_ni_syscall
188 common putpmsg sys_ni_syscall
-189 nospu vfork ppc_vfork
+189 32 vfork ppc_vfork sys_vfork
+189 64 vfork sys_vfork
+189 spu vfork sys_ni_syscall
190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
191 common readahead sys_readahead compat_sys_readahead
192 32 mmap2 sys_mmap2 compat_sys_mmap2
@@ -316,8 +322,8 @@
248 32 clock_nanosleep sys_clock_nanosleep_time32
248 64 clock_nanosleep sys_clock_nanosleep
248 spu clock_nanosleep sys_clock_nanosleep
-249 32 swapcontext ppc_swapcontext ppc32_swapcontext
-249 64 swapcontext ppc64_swapcontext
+249 32 swapcontext ppc_swapcontext compat_sys_swapcontext
+249 64 swapcontext sys_swapcontext
249 spu swapcontext sys_ni_syscall
250 common tgkill sys_tgkill
251 32 utimes sys_utimes_time32
@@ -456,7 +462,7 @@
361 common bpf sys_bpf
362 nospu execveat sys_execveat compat_sys_execveat
363 32 switch_endian sys_ni_syscall
-363 64 switch_endian ppc_switch_endian
+363 64 switch_endian sys_switch_endian
363 spu switch_endian sys_ni_syscall
364 common userfaultfd sys_userfaultfd
365 common membarrier sys_membarrier
@@ -516,6 +522,8 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
-435 nospu clone3 ppc_clone3
+435 32 clone3 ppc_clone3 sys_clone3
+435 64 clone3 sys_clone3
+435 spu clone3 sys_ni_syscall
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 80a676da11cb..479c70680b76 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -87,6 +87,155 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay);
#endif /* CONFIG_PPC64 */
+#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \
+static void read_##NAME(void *val) \
+{ \
+ *(unsigned long *)val = mfspr(ADDRESS); \
+} \
+static void write_##NAME(void *val) \
+{ \
+ EXTRA; \
+ mtspr(ADDRESS, *(unsigned long *)val); \
+}
+
+#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+static ssize_t show_##NAME(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, dev); \
+ unsigned long val; \
+ smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \
+ return sprintf(buf, "%lx\n", val); \
+} \
+static ssize_t __used \
+ store_##NAME(struct device *dev, struct device_attribute *attr, \
+ const char *buf, size_t count) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, dev); \
+ unsigned long val; \
+ int ret = sscanf(buf, "%lx", &val); \
+ if (ret != 1) \
+ return -EINVAL; \
+ smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \
+ return count; \
+}
+
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+ __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+#define SYSFS_SPRSETUP(NAME, ADDRESS) \
+ __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#ifdef CONFIG_PPC64
+
+/*
+ * This is the system wide DSCR register default value. Any
+ * change to this default value through the sysfs interface
+ * will update all per cpu DSCR default values across the
+ * system stored in their respective PACA structures.
+ */
+static unsigned long dscr_default;
+
+/**
+ * read_dscr() - Fetch the cpu specific DSCR default
+ * @val: Returned cpu specific DSCR default value
+ *
+ * This function returns the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
+static void read_dscr(void *val)
+{
+ *(unsigned long *)val = get_paca()->dscr_default;
+}
+
+
+/**
+ * write_dscr() - Update the cpu specific DSCR default
+ * @val: New cpu specific DSCR default value to update
+ *
+ * This function updates the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
+static void write_dscr(void *val)
+{
+ get_paca()->dscr_default = *(unsigned long *)val;
+ if (!current->thread.dscr_inherit) {
+ current->thread.dscr = *(unsigned long *)val;
+ mtspr(SPRN_DSCR, *(unsigned long *)val);
+ }
+}
+
+SYSFS_SPRSETUP_SHOW_STORE(dscr);
+static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
+
+static void add_write_permission_dev_attr(struct device_attribute *attr)
+{
+ attr->attr.mode |= 0200;
+}
+
+/**
+ * show_dscr_default() - Fetch the system wide DSCR default
+ * @dev: Device structure
+ * @attr: Device attribute structure
+ * @buf: Interface buffer
+ *
+ * This function returns the system wide DSCR default value.
+ */
+static ssize_t show_dscr_default(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lx\n", dscr_default);
+}
+
+/**
+ * store_dscr_default() - Update the system wide DSCR default
+ * @dev: Device structure
+ * @attr: Device attribute structure
+ * @buf: Interface buffer
+ * @count: Size of the update
+ *
+ * This function updates the system wide DSCR default value.
+ */
+static ssize_t __used store_dscr_default(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ int ret = 0;
+
+ ret = sscanf(buf, "%lx", &val);
+ if (ret != 1)
+ return -EINVAL;
+ dscr_default = val;
+
+ on_each_cpu(write_dscr, &val, 1);
+
+ return count;
+}
+
+static DEVICE_ATTR(dscr_default, 0600,
+ show_dscr_default, store_dscr_default);
+
+static void sysfs_create_dscr_default(void)
+{
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ int err = 0;
+ int cpu;
+
+ dscr_default = spr_default_dscr;
+ for_each_possible_cpu(cpu)
+ paca_ptrs[cpu]->dscr_default = dscr_default;
+
+ err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
+ }
+}
+#endif /* CONFIG_PPC64 */
+
#ifdef CONFIG_PPC_FSL_BOOK3E
#define MAX_BIT 63
@@ -407,84 +556,35 @@ void ppc_enable_pmcs(void)
}
EXPORT_SYMBOL(ppc_enable_pmcs);
-#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \
-static void read_##NAME(void *val) \
-{ \
- *(unsigned long *)val = mfspr(ADDRESS); \
-} \
-static void write_##NAME(void *val) \
-{ \
- EXTRA; \
- mtspr(ADDRESS, *(unsigned long *)val); \
-}
-#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \
-static ssize_t show_##NAME(struct device *dev, \
- struct device_attribute *attr, \
- char *buf) \
-{ \
- struct cpu *cpu = container_of(dev, struct cpu, dev); \
- unsigned long val; \
- smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \
- return sprintf(buf, "%lx\n", val); \
-} \
-static ssize_t __used \
- store_##NAME(struct device *dev, struct device_attribute *attr, \
- const char *buf, size_t count) \
-{ \
- struct cpu *cpu = container_of(dev, struct cpu, dev); \
- unsigned long val; \
- int ret = sscanf(buf, "%lx", &val); \
- if (ret != 1) \
- return -EINVAL; \
- smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \
- return count; \
-}
-
-#define SYSFS_PMCSETUP(NAME, ADDRESS) \
- __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \
- __SYSFS_SPRSETUP_SHOW_STORE(NAME)
-#define SYSFS_SPRSETUP(NAME, ADDRESS) \
- __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \
- __SYSFS_SPRSETUP_SHOW_STORE(NAME)
-
-#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \
- __SYSFS_SPRSETUP_SHOW_STORE(NAME)
/* Let's define all possible registers, we'll only hook up the ones
* that are implemented on the current processor
*/
-#if defined(CONFIG_PPC64)
+#ifdef CONFIG_PMU_SYSFS
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_BOOK3S_32)
#define HAS_PPC_PMC_CLASSIC 1
#define HAS_PPC_PMC_IBM 1
-#define HAS_PPC_PMC_PA6T 1
-#elif defined(CONFIG_PPC_BOOK3S_32)
-#define HAS_PPC_PMC_CLASSIC 1
-#define HAS_PPC_PMC_IBM 1
-#define HAS_PPC_PMC_G4 1
#endif
+#ifdef CONFIG_PPC64
+#define HAS_PPC_PMC_PA6T 1
+#define HAS_PPC_PMC56 1
+#endif
-#ifdef HAS_PPC_PMC_CLASSIC
-SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
-SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
-SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
-SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
-SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
-SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
-SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
-SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
-
-#ifdef HAS_PPC_PMC_G4
-SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2);
+#ifdef CONFIG_PPC_BOOK3S_32
+#define HAS_PPC_PMC_G4 1
#endif
+#endif /* CONFIG_PMU_SYSFS */
+#if defined(CONFIG_PPC64) && defined(CONFIG_DEBUG_MISC)
+#define HAS_PPC_PA6T
+#endif
+/*
+ * SPRs which are not related to PMU.
+ */
#ifdef CONFIG_PPC64
-SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
-SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
-
-SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
SYSFS_SPRSETUP(purr, SPRN_PURR);
SYSFS_SPRSETUP(spurr, SPRN_SPURR);
SYSFS_SPRSETUP(pir, SPRN_PIR);
@@ -495,115 +595,38 @@ SYSFS_SPRSETUP(tscr, SPRN_TSCR);
enable write when needed with a separate function.
Lets be conservative and default to pseries.
*/
-static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
static DEVICE_ATTR(pir, 0400, show_pir, NULL);
static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr);
+#endif /* CONFIG_PPC64 */
-/*
- * This is the system wide DSCR register default value. Any
- * change to this default value through the sysfs interface
- * will update all per cpu DSCR default values across the
- * system stored in their respective PACA structures.
- */
-static unsigned long dscr_default;
-
-/**
- * read_dscr() - Fetch the cpu specific DSCR default
- * @val: Returned cpu specific DSCR default value
- *
- * This function returns the per cpu DSCR default value
- * for any cpu which is contained in it's PACA structure.
- */
-static void read_dscr(void *val)
-{
- *(unsigned long *)val = get_paca()->dscr_default;
-}
-
-
-/**
- * write_dscr() - Update the cpu specific DSCR default
- * @val: New cpu specific DSCR default value to update
- *
- * This function updates the per cpu DSCR default value
- * for any cpu which is contained in it's PACA structure.
- */
-static void write_dscr(void *val)
-{
- get_paca()->dscr_default = *(unsigned long *)val;
- if (!current->thread.dscr_inherit) {
- current->thread.dscr = *(unsigned long *)val;
- mtspr(SPRN_DSCR, *(unsigned long *)val);
- }
-}
-
-SYSFS_SPRSETUP_SHOW_STORE(dscr);
-static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
-
-static void add_write_permission_dev_attr(struct device_attribute *attr)
-{
- attr->attr.mode |= 0200;
-}
-
-/**
- * show_dscr_default() - Fetch the system wide DSCR default
- * @dev: Device structure
- * @attr: Device attribute structure
- * @buf: Interface buffer
- *
- * This function returns the system wide DSCR default value.
- */
-static ssize_t show_dscr_default(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sprintf(buf, "%lx\n", dscr_default);
-}
-
-/**
- * store_dscr_default() - Update the system wide DSCR default
- * @dev: Device structure
- * @attr: Device attribute structure
- * @buf: Interface buffer
- * @count: Size of the update
- *
- * This function updates the system wide DSCR default value.
- */
-static ssize_t __used store_dscr_default(struct device *dev,
- struct device_attribute *attr, const char *buf,
- size_t count)
-{
- unsigned long val;
- int ret = 0;
-
- ret = sscanf(buf, "%lx", &val);
- if (ret != 1)
- return -EINVAL;
- dscr_default = val;
+#ifdef HAS_PPC_PMC_CLASSIC
+SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
+SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
+SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
+SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
+SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
+SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
+SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
+#endif
- on_each_cpu(write_dscr, &val, 1);
+#ifdef HAS_PPC_PMC_G4
+SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2);
+#endif
- return count;
-}
+#ifdef HAS_PPC_PMC56
+SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
+SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
-static DEVICE_ATTR(dscr_default, 0600,
- show_dscr_default, store_dscr_default);
+SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
-static void sysfs_create_dscr_default(void)
-{
- if (cpu_has_feature(CPU_FTR_DSCR)) {
- int err = 0;
- int cpu;
+static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+#endif /* HAS_PPC_PMC56 */
- dscr_default = spr_default_dscr;
- for_each_possible_cpu(cpu)
- paca_ptrs[cpu]->dscr_default = dscr_default;
- err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
- }
-}
-#endif /* CONFIG_PPC64 */
#ifdef HAS_PPC_PMC_PA6T
SYSFS_PMCSETUP(pa6t_pmc0, SPRN_PA6T_PMC0);
@@ -612,7 +635,9 @@ SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2);
SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);
SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);
SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);
-#ifdef CONFIG_DEBUG_MISC
+#endif
+
+#ifdef HAS_PPC_PA6T
SYSFS_SPRSETUP(hid0, SPRN_HID0);
SYSFS_SPRSETUP(hid1, SPRN_HID1);
SYSFS_SPRSETUP(hid4, SPRN_HID4);
@@ -641,15 +666,14 @@ SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0);
SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1);
SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2);
SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3);
-#endif /* CONFIG_DEBUG_MISC */
-#endif /* HAS_PPC_PMC_PA6T */
+#endif /* HAS_PPC_PA6T */
#ifdef HAS_PPC_PMC_IBM
static struct device_attribute ibm_common_attrs[] = {
__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
};
-#endif /* HAS_PPC_PMC_G4 */
+#endif /* HAS_PPC_PMC_IBM */
#ifdef HAS_PPC_PMC_G4
static struct device_attribute g4_common_attrs[] = {
@@ -659,6 +683,7 @@ static struct device_attribute g4_common_attrs[] = {
};
#endif /* HAS_PPC_PMC_G4 */
+#ifdef HAS_PPC_PMC_CLASSIC
static struct device_attribute classic_pmc_attrs[] = {
__ATTR(pmc1, 0600, show_pmc1, store_pmc1),
__ATTR(pmc2, 0600, show_pmc2, store_pmc2),
@@ -666,14 +691,16 @@ static struct device_attribute classic_pmc_attrs[] = {
__ATTR(pmc4, 0600, show_pmc4, store_pmc4),
__ATTR(pmc5, 0600, show_pmc5, store_pmc5),
__ATTR(pmc6, 0600, show_pmc6, store_pmc6),
-#ifdef CONFIG_PPC64
+#ifdef HAS_PPC_PMC56
__ATTR(pmc7, 0600, show_pmc7, store_pmc7),
__ATTR(pmc8, 0600, show_pmc8, store_pmc8),
#endif
};
+#endif
-#ifdef HAS_PPC_PMC_PA6T
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
static struct device_attribute pa6t_attrs[] = {
+#ifdef HAS_PPC_PMC_PA6T
__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
__ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0),
@@ -682,7 +709,8 @@ static struct device_attribute pa6t_attrs[] = {
__ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
__ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
__ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
-#ifdef CONFIG_DEBUG_MISC
+#endif
+#ifdef HAS_PPC_PA6T
__ATTR(hid0, 0600, show_hid0, store_hid0),
__ATTR(hid1, 0600, show_hid1, store_hid1),
__ATTR(hid4, 0600, show_hid4, store_hid4),
@@ -711,10 +739,9 @@ static struct device_attribute pa6t_attrs[] = {
__ATTR(tsr1, 0600, show_tsr1, store_tsr1),
__ATTR(tsr2, 0600, show_tsr2, store_tsr2),
__ATTR(tsr3, 0600, show_tsr3, store_tsr3),
-#endif /* CONFIG_DEBUG_MISC */
+#endif /* HAS_PPC_PA6T */
};
-#endif /* HAS_PPC_PMC_PA6T */
-#endif /* HAS_PPC_PMC_CLASSIC */
+#endif
#ifdef CONFIG_PPC_SVM
static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf)
@@ -765,14 +792,14 @@ static int register_cpu_online(unsigned int cpu)
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_G4 */
-#ifdef HAS_PPC_PMC_PA6T
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
case PPC_PMC_PA6T:
/* PA Semi starts counting at PMC0 */
attrs = pa6t_attrs;
nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
pmc_attrs = NULL;
break;
-#endif /* HAS_PPC_PMC_PA6T */
+#endif
default:
attrs = NULL;
nattrs = 0;
@@ -787,8 +814,10 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, &pmc_attrs[i]);
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PMU_SYSFS
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, &dev_attr_mmcra);
+#endif /* CONFIG_PMU_SYSFS */
if (cpu_has_feature(CPU_FTR_PURR)) {
if (!firmware_has_feature(FW_FEATURE_LPAR))
@@ -854,14 +883,14 @@ static int unregister_cpu_online(unsigned int cpu)
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_G4 */
-#ifdef HAS_PPC_PMC_PA6T
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
case PPC_PMC_PA6T:
/* PA Semi starts counting at PMC0 */
attrs = pa6t_attrs;
nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
pmc_attrs = NULL;
break;
-#endif /* HAS_PPC_PMC_PA6T */
+#endif
default:
attrs = NULL;
nattrs = 0;
@@ -876,8 +905,10 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, &pmc_attrs[i]);
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PMU_SYSFS
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, &dev_attr_mmcra);
+#endif /* CONFIG_PMU_SYSFS */
if (cpu_has_feature(CPU_FTR_PURR))
device_remove_file(s, &dev_attr_purr);
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 5b905a2f4e4d..d34276f3c495 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -16,25 +16,22 @@
#ifdef CONFIG_PPC64
.p2align 3
+#define __SYSCALL(nr, entry) .8byte entry
+#else
+#define __SYSCALL(nr, entry) .long entry
#endif
.globl sys_call_table
sys_call_table:
#ifdef CONFIG_PPC64
-#define __SYSCALL(nr, entry) .8byte DOTSYM(entry)
#include <asm/syscall_table_64.h>
-#undef __SYSCALL
#else
-#define __SYSCALL(nr, entry) .long entry
#include <asm/syscall_table_32.h>
-#undef __SYSCALL
#endif
#ifdef CONFIG_COMPAT
.globl compat_sys_call_table
compat_sys_call_table:
#define compat_sys_sigsuspend sys_sigsuspend
-#define __SYSCALL(nr, entry) .8byte DOTSYM(entry)
#include <asm/syscall_table_c32.h>
-#undef __SYSCALL
#endif
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 1168e8b37e30..bda9cb4a0a5f 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -663,15 +663,6 @@ void timer_broadcast_interrupt(void)
}
#endif
-/*
- * Hypervisor decrementer interrupts shouldn't occur but are sometimes
- * left pending on exit from a KVM guest. We don't need to do anything
- * to clear them, as they are edge-triggered.
- */
-void hdec_interrupt(struct pt_regs *regs)
-{
-}
-
#ifdef CONFIG_SUSPEND
static void generic_suspend_disable_irqs(void)
{
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 82a3438300fd..3fca22276bb1 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -2278,35 +2278,20 @@ void ppc_warn_emulated_print(const char *type)
static int __init ppc_warn_emulated_init(void)
{
- struct dentry *dir, *d;
+ struct dentry *dir;
unsigned int i;
struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
- if (!powerpc_debugfs_root)
- return -ENODEV;
-
dir = debugfs_create_dir("emulated_instructions",
powerpc_debugfs_root);
- if (!dir)
- return -ENOMEM;
- d = debugfs_create_u32("do_warn", 0644, dir,
- &ppc_warn_emulated);
- if (!d)
- goto fail;
+ debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated);
- for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) {
- d = debugfs_create_u32(entries[i].name, 0644, dir,
- (u32 *)&entries[i].val.counter);
- if (!d)
- goto fail;
- }
+ for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++)
+ debugfs_create_u32(entries[i].name, 0644, dir,
+ (u32 *)&entries[i].val.counter);
return 0;
-
-fail:
- debugfs_remove_recursive(dir);
- return -ENOMEM;
}
device_initcall(ppc_warn_emulated_init);
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index b9a108411c0d..d3b77c15f9ce 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -391,12 +391,7 @@ static unsigned long __init find_function64(struct lib64_elfinfo *lib,
symname);
return 0;
}
-#ifdef VDS64_HAS_DESCRIPTORS
- return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) -
- VDSO64_LBASE;
-#else
return sym->st_value - VDSO64_LBASE;
-#endif
}
static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32,
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 25c14a0981bf..d20c5e79e03c 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -134,7 +134,7 @@ _GLOBAL(load_up_vsx)
/* enable use of VSX after return */
oris r12,r12,MSR_VSX@h
std r12,_MSR(r1)
- b fast_exception_return
+ b fast_interrupt_return
#endif /* CONFIG_VSX */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index b4c89a1acebb..31a0f201fb6f 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -256,6 +256,7 @@ SECTIONS
*(.dynamic)
}
.hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) }
+ .gnu.hash : AT(ADDR(.gnu.hash) - LOAD_OFFSET) { *(.gnu.hash) }
.interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) }
.rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET)
{