summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/asm-offsets.c7
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S50
-rw-r--r--arch/powerpc/kernel/cputable.c15
-rw-r--r--arch/powerpc/kernel/crash.c16
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c30
-rw-r--r--arch/powerpc/kernel/eeh.c59
-rw-r--r--arch/powerpc/kernel/eeh_driver.c10
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c64
-rw-r--r--arch/powerpc/kernel/entry_32.S10
-rw-r--r--arch/powerpc/kernel/entry_64.S67
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S20
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S118
-rw-r--r--arch/powerpc/kernel/head_64.S11
-rw-r--r--arch/powerpc/kernel/head_8xx.S275
-rw-r--r--arch/powerpc/kernel/idle_book3e.S5
-rw-r--r--arch/powerpc/kernel/idle_power4.S5
-rw-r--r--arch/powerpc/kernel/irq.c29
-rw-r--r--arch/powerpc/kernel/mce.c142
-rw-r--r--arch/powerpc/kernel/mce_power.c115
-rw-r--r--arch/powerpc/kernel/module.lds8
-rw-r--r--arch/powerpc/kernel/module_64.c35
-rw-r--r--arch/powerpc/kernel/optprobes_head.S2
-rw-r--r--arch/powerpc/kernel/paca.c13
-rw-r--r--arch/powerpc/kernel/pci-common.c27
-rw-r--r--arch/powerpc/kernel/pci_dn.c6
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c2
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c2
-rw-r--r--arch/powerpc/kernel/process.c28
-rw-r--r--arch/powerpc/kernel/prom.c115
-rw-r--r--arch/powerpc/kernel/prom_init.c2
-rw-r--r--arch/powerpc/kernel/ptrace.c78
-rw-r--r--arch/powerpc/kernel/rtas-proc.c14
-rw-r--r--arch/powerpc/kernel/rtas_flash.c2
-rw-r--r--arch/powerpc/kernel/rtasd.c2
-rw-r--r--arch/powerpc/kernel/setup-common.c21
-rw-r--r--arch/powerpc/kernel/setup.h4
-rw-r--r--arch/powerpc/kernel/setup_64.c48
-rw-r--r--arch/powerpc/kernel/signal_32.c8
-rw-r--r--arch/powerpc/kernel/signal_64.c11
-rw-r--r--arch/powerpc/kernel/smp.c18
-rw-r--r--arch/powerpc/kernel/sysfs.c8
-rw-r--r--arch/powerpc/kernel/time.c6
-rw-r--r--arch/powerpc/kernel/traps.c51
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S67
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S1
-rw-r--r--arch/powerpc/kernel/watchdog.c100
46 files changed, 980 insertions, 747 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f390d57cf2e1..88b84ac76b53 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -178,7 +178,7 @@ int main(void)
OFFSET(PACATOC, paca_struct, kernel_toc);
OFFSET(PACAKBASE, paca_struct, kernelbase);
OFFSET(PACAKMSR, paca_struct, kernel_msr);
- OFFSET(PACASOFTIRQEN, paca_struct, soft_enabled);
+ OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
#ifdef CONFIG_PPC_BOOK3S
OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
@@ -239,8 +239,7 @@ int main(void)
OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
OFFSET(PACA_EXRFI, paca_struct, exrfi);
- OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
- OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+ OFFSET(PACA_L1D_FLUSH_SIZE, paca_struct, l1d_flush_size);
#endif
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
@@ -401,6 +400,8 @@ int main(void)
/* Other bits used by the vdso */
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
+ DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 679bbe714e85..3f30c994e931 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -31,7 +31,6 @@ _GLOBAL(__setup_cpu_power7)
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
bl __init_LPCR_ISA206
- bl __init_tlb_power7
mtlr r11
blr
@@ -45,7 +44,6 @@ _GLOBAL(__restore_cpu_power7)
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
bl __init_LPCR_ISA206
- bl __init_tlb_power7
mtlr r11
blr
@@ -64,7 +62,6 @@ _GLOBAL(__setup_cpu_power8)
li r4,0 /* LPES = 0 */
bl __init_LPCR_ISA206
bl __init_HFSCR
- bl __init_tlb_power8
bl __init_PMU_HV
bl __init_PMU_HV_ISA207
mtlr r11
@@ -86,7 +83,6 @@ _GLOBAL(__restore_cpu_power8)
li r4,0 /* LPES = 0 */
bl __init_LPCR_ISA206
bl __init_HFSCR
- bl __init_tlb_power8
bl __init_PMU_HV
bl __init_PMU_HV_ISA207
mtlr r11
@@ -111,7 +107,6 @@ _GLOBAL(__setup_cpu_power9)
li r4,0 /* LPES = 0 */
bl __init_LPCR_ISA300
bl __init_HFSCR
- bl __init_tlb_power9
bl __init_PMU_HV
mtlr r11
blr
@@ -136,7 +131,6 @@ _GLOBAL(__restore_cpu_power9)
li r4,0 /* LPES = 0 */
bl __init_LPCR_ISA300
bl __init_HFSCR
- bl __init_tlb_power9
bl __init_PMU_HV
mtlr r11
blr
@@ -194,50 +188,6 @@ __init_HFSCR:
mtspr SPRN_HFSCR,r3
blr
-/*
- * Clear the TLB using the specified IS form of tlbiel instruction
- * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
- */
-__init_tlb_power7:
- li r6,POWER7_TLB_SETS
- mtctr r6
- li r7,0xc00 /* IS field = 0b11 */
- ptesync
-2: tlbiel r7
- addi r7,r7,0x1000
- bdnz 2b
- ptesync
-1: blr
-
-__init_tlb_power8:
- li r6,POWER8_TLB_SETS
- mtctr r6
- li r7,0xc00 /* IS field = 0b11 */
- ptesync
-2: tlbiel r7
- addi r7,r7,0x1000
- bdnz 2b
- ptesync
-1: blr
-
-/*
- * Flush the TLB in hash mode. Hash must flush with RIC=2 once for process
- * and one for partition scope to clear process and partition table entries.
- */
-__init_tlb_power9:
- li r6,POWER9_TLB_SETS_HASH - 1
- mtctr r6
- li r7,0xc00 /* IS field = 0b11 */
- li r8,0
- ptesync
- PPC_TLBIEL(7, 8, 2, 1, 0)
- PPC_TLBIEL(7, 8, 2, 0, 0)
-2: addi r7,r7,0x1000
- PPC_TLBIEL(7, 8, 0, 0, 0)
- bdnz 2b
- ptesync
-1: blr
-
__init_PMU_HV:
li r5,0
mtspr SPRN_MMCRC,r5
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 1350f49d81a8..c40a9fc1e5d1 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -74,9 +74,6 @@ extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power8(void);
extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power9(void);
-extern void __flush_tlb_power7(unsigned int action);
-extern void __flush_tlb_power8(unsigned int action);
-extern void __flush_tlb_power9(unsigned int action);
extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
@@ -368,7 +365,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
- .flush_tlb = __flush_tlb_power7,
.machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7",
},
@@ -386,7 +382,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -404,7 +399,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
- .flush_tlb = __flush_tlb_power9,
.platform = "power9",
},
{ /* Power7 */
@@ -423,7 +417,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
- .flush_tlb = __flush_tlb_power7,
.machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7",
},
@@ -443,7 +436,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
- .flush_tlb = __flush_tlb_power7,
.machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7+",
},
@@ -463,7 +455,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -483,7 +474,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -503,7 +493,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -523,7 +512,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
@@ -543,7 +531,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
- .flush_tlb = __flush_tlb_power9,
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
@@ -563,7 +550,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
- .flush_tlb = __flush_tlb_power9,
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
@@ -583,7 +569,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
- .flush_tlb = __flush_tlb_power9,
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index cbabb5adccd9..00b215125d3e 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -44,6 +44,14 @@
#define REAL_MODE_TIMEOUT 10000
static int time_to_dump;
+/*
+ * crash_wake_offline should be set to 1 by platforms that intend to wake
+ * up offline cpus prior to jumping to a kdump kernel. Currently powernv
+ * sets it to 1, since we want to avoid things from happening when an
+ * offline CPU wakes up due to something like an HMI (malfunction error),
+ * which propagates to all threads.
+ */
+int crash_wake_offline;
#define CRASH_HANDLER_MAX 3
/* List of shutdown handles */
@@ -63,15 +71,12 @@ static int handle_fault(struct pt_regs *regs)
#ifdef CONFIG_SMP
static atomic_t cpus_in_crash;
-static void crash_ipi_callback(struct pt_regs *regs)
+void crash_ipi_callback(struct pt_regs *regs)
{
static cpumask_t cpus_state_saved = CPU_MASK_NONE;
int cpu = smp_processor_id();
- if (!cpu_online(cpu))
- return;
-
hard_irq_disable();
if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
crash_save_cpu(regs, cpu);
@@ -109,6 +114,9 @@ static void crash_kexec_prepare_cpus(int cpu)
printk(KERN_EMERG "Sending IPI to other CPUs\n");
+ if (crash_wake_offline)
+ ncpus = num_present_cpus() - 1;
+
crash_send_ipi(crash_ipi_callback);
smp_wmb();
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 8bdc2f96c5d6..945e2c29ad2d 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -77,8 +77,6 @@ struct dt_cpu_feature {
* Set up the base CPU
*/
-extern void __flush_tlb_power8(unsigned int action);
-extern void __flush_tlb_power9(unsigned int action);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
@@ -92,27 +90,6 @@ static struct {
static void (*init_pmu_registers)(void);
-static void cpufeatures_flush_tlb(void)
-{
- /*
- * This is a temporary measure to keep equivalent TLB flush as the
- * cputable based setup code.
- */
- switch (PVR_VER(mfspr(SPRN_PVR))) {
- case PVR_POWER8:
- case PVR_POWER8E:
- case PVR_POWER8NVL:
- __flush_tlb_power8(TLB_INVAL_SCOPE_GLOBAL);
- break;
- case PVR_POWER9:
- __flush_tlb_power9(TLB_INVAL_SCOPE_GLOBAL);
- break;
- default:
- pr_err("unknown CPU version for boot TLB flush\n");
- break;
- }
-}
-
static void __restore_cpu_cpufeatures(void)
{
/*
@@ -137,8 +114,6 @@ static void __restore_cpu_cpufeatures(void)
if (init_pmu_registers)
init_pmu_registers();
-
- cpufeatures_flush_tlb();
}
static char dt_cpu_name[64];
@@ -157,7 +132,6 @@ static struct cpu_spec __initdata base_cpu_spec = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = NULL,
.cpu_restore = __restore_cpu_cpufeatures,
- .flush_tlb = NULL,
.machine_check_early = NULL,
.platform = NULL,
};
@@ -412,7 +386,6 @@ static void init_pmu_power8(void)
static int __init feat_enable_mce_power8(struct dt_cpu_feature *f)
{
cur_cpu_spec->platform = "power8";
- cur_cpu_spec->flush_tlb = __flush_tlb_power8;
cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p8;
return 1;
@@ -451,7 +424,6 @@ static void init_pmu_power9(void)
static int __init feat_enable_mce_power9(struct dt_cpu_feature *f)
{
cur_cpu_spec->platform = "power9";
- cur_cpu_spec->flush_tlb = __flush_tlb_power9;
cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p9;
return 1;
@@ -752,8 +724,6 @@ static void __init cpufeatures_setup_finished(void)
system_registers.hfscr = mfspr(SPRN_HFSCR);
system_registers.fscr = mfspr(SPRN_FSCR);
- cpufeatures_flush_tlb();
-
pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n",
cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index cbca0a667682..cc649809885e 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -740,6 +740,65 @@ static void *eeh_restore_dev_state(void *data, void *userdata)
return NULL;
}
+int eeh_restore_vf_config(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 devctl, cmd, cap2, aer_capctl;
+ int old_mps;
+
+ if (edev->pcie_cap) {
+ /* Restore MPS */
+ old_mps = (ffs(pdn->mps) - 8) << 5;
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, &devctl);
+ devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
+ devctl |= old_mps;
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+
+ /* Disable Completion Timeout */
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
+ 4, &cap2);
+ if (cap2 & 0x10) {
+ eeh_ops->read_config(pdn,
+ edev->pcie_cap + PCI_EXP_DEVCTL2,
+ 4, &cap2);
+ cap2 |= 0x10;
+ eeh_ops->write_config(pdn,
+ edev->pcie_cap + PCI_EXP_DEVCTL2,
+ 4, cap2);
+ }
+ }
+
+ /* Enable SERR and parity checking */
+ eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
+ cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
+ eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
+
+ /* Enable report various errors */
+ if (edev->pcie_cap) {
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, &devctl);
+ devctl &= ~PCI_EXP_DEVCTL_CERE;
+ devctl |= (PCI_EXP_DEVCTL_NFERE |
+ PCI_EXP_DEVCTL_FERE |
+ PCI_EXP_DEVCTL_URRE);
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+ }
+
+ /* Enable ECRC generation and check */
+ if (edev->pcie_cap && edev->aer_cap) {
+ eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+ 4, &aer_capctl);
+ aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
+ eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+ 4, aer_capctl);
+ }
+
+ return 0;
+}
+
/**
* pcibios_set_pcie_reset_state - Set PCI-E reset state
* @dev: pci device struct
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 4f71e4c9beb7..beea2182d754 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -228,6 +228,7 @@ static void *eeh_report_error(void *data, void *userdata)
edev->in_error = true;
eeh_pcid_put(dev);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
return NULL;
}
@@ -381,6 +382,10 @@ static void *eeh_report_resume(void *data, void *userdata)
driver->err_handler->resume(dev);
eeh_pcid_put(dev);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+#ifdef CONFIG_PCI_IOV
+ eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
+#endif
return NULL;
}
@@ -416,6 +421,7 @@ static void *eeh_report_failure(void *data, void *userdata)
driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
eeh_pcid_put(dev);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
return NULL;
}
@@ -440,7 +446,7 @@ static void *eeh_add_virt_device(void *data, void *userdata)
return NULL;
}
-#ifdef CONFIG_PPC_POWERNV
+#ifdef CONFIG_PCI_IOV
pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
#endif
return NULL;
@@ -496,7 +502,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
(*removed)++;
if (edev->physfn) {
-#ifdef CONFIG_PPC_POWERNV
+#ifdef CONFIG_PCI_IOV
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 797549289798..deed906dd8f1 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -48,7 +48,7 @@ static ssize_t eeh_show_##_name(struct device *dev, \
\
return sprintf(buf, _format "\n", edev->_memb); \
} \
-static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
+static DEVICE_ATTR(_name, 0444, eeh_show_##_name, NULL);
EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
@@ -90,6 +90,65 @@ static ssize_t eeh_pe_state_store(struct device *dev,
static DEVICE_ATTR_RW(eeh_pe_state);
+#ifdef CONFIG_PCI_IOV
+static ssize_t eeh_notify_resume_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!edev || !edev->pe)
+ return -ENODEV;
+
+ pdn = pci_get_pdn(pdev);
+ return sprintf(buf, "%d\n", pdn->last_allow_rc);
+}
+
+static ssize_t eeh_notify_resume_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+ if (!edev || !edev->pe || !eeh_ops->notify_resume)
+ return -ENODEV;
+
+ if (eeh_ops->notify_resume(pci_get_pdn(pdev)))
+ return -EIO;
+
+ return count;
+}
+static DEVICE_ATTR_RW(eeh_notify_resume);
+
+static int eeh_notify_resume_add(struct pci_dev *pdev)
+{
+ struct device_node *np;
+ int rc = 0;
+
+ np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn);
+
+ if (of_property_read_bool(np, "ibm,is-open-sriov-pf"))
+ rc = device_create_file(&pdev->dev, &dev_attr_eeh_notify_resume);
+
+ return rc;
+}
+
+static void eeh_notify_resume_remove(struct pci_dev *pdev)
+{
+ struct device_node *np;
+
+ np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn);
+
+ if (of_property_read_bool(np, "ibm,is-open-sriov-pf"))
+ device_remove_file(&pdev->dev, &dev_attr_eeh_notify_resume);
+}
+#else
+static inline int eeh_notify_resume_add(struct pci_dev *pdev) { return 0; }
+static inline void eeh_notify_resume_remove(struct pci_dev *pdev) { }
+#endif /* CONFIG_PCI_IOV */
+
void eeh_sysfs_add_device(struct pci_dev *pdev)
{
struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
@@ -104,6 +163,7 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state);
+ rc += eeh_notify_resume_add(pdev);
if (rc)
pr_warn("EEH: Unable to create sysfs entries\n");
@@ -129,6 +189,8 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
+ eeh_notify_resume_remove(pdev);
+
if (edev)
edev->mode &= ~EEH_DEV_SYSFS;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e780e1fbf6c2..eb8d01bae8c6 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -211,7 +211,7 @@ transfer_to_handler_cont:
mflr r9
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -301,7 +301,7 @@ stack_ovf:
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r9
@@ -430,7 +430,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
lwz r7,_NIP(r1)
lwz r2,GPR2(r1)
lwz r1,GPR1(r1)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r7
@@ -727,7 +727,7 @@ fast_exception_return:
lwz r10,_LINK(r11)
mtlr r10
REST_GPR(10, r11)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR1,r9
@@ -978,7 +978,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
.globl exc_exit_restart
exc_exit_restart:
lwz r12,_NIP(r1)
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r12
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2748584b767d..2cb5109a7ea3 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -133,10 +133,9 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
* of irq tracing is used, we additionally check that condition
* is correct
*/
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
- lbz r10,PACASOFTIRQEN(r13)
- xori r10,r10,1
-1: tdnei r10,0
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
+ lbz r10,PACAIRQSOFTMASK(r13)
+1: tdnei r10,IRQS_ENABLED
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
#endif
@@ -152,7 +151,7 @@ system_call: /* label this so stack traces look sane */
/* We do need to set SOFTE in the stack frame or the return
* from interrupt will be painful
*/
- li r10,1
+ li r10,IRQS_ENABLED
std r10,SOFTE(r1)
CURRENT_THREAD_INFO(r11, r1)
@@ -755,10 +754,10 @@ resume_kernel:
beq+ restore
/* Check that preempt_count() == 0 and interrupts are enabled */
lwz r8,TI_PREEMPT(r9)
- cmpwi cr1,r8,0
+ cmpwi cr0,r8,0
+ bne restore
ld r0,SOFTE(r1)
- cmpdi r0,0
- crandc eq,cr1*4+eq,eq
+ andi. r0,r0,IRQS_DISABLED
bne restore
/*
@@ -796,12 +795,12 @@ restore:
* are about to re-enable interrupts
*/
ld r5,SOFTE(r1)
- lbz r6,PACASOFTIRQEN(r13)
- cmpwi cr0,r5,0
- beq .Lrestore_irq_off
+ lbz r6,PACAIRQSOFTMASK(r13)
+ andi. r5,r5,IRQS_DISABLED
+ bne .Lrestore_irq_off
/* We are enabling, were we already enabled ? Yes, just return */
- cmpwi cr0,r6,1
+ andi. r6,r6,IRQS_DISABLED
beq cr0,.Ldo_restore
/*
@@ -820,8 +819,8 @@ restore:
*/
.Lrestore_no_replay:
TRACE_ENABLE_INTS
- li r0,1
- stb r0,PACASOFTIRQEN(r13);
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13);
/*
* Final return path. BookE is handled in a different file
@@ -939,9 +938,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
beq 1f
rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
stb r7,PACAIRQHAPPENED(r13)
-1: li r0,0
- stb r0,PACASOFTIRQEN(r13);
- TRACE_DISABLE_INTS
+1:
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
+ /* The interrupt should not have soft enabled. */
+ lbz r7,PACAIRQSOFTMASK(r13)
+1: tdeqi r7,IRQS_ENABLED
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+#endif
b .Ldo_restore
/*
@@ -979,6 +982,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
addi r3,r1,STACK_FRAME_OVERHEAD;
bl do_IRQ
b ret_from_except
+1: cmpwi cr0,r3,0xf00
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl performance_monitor_exception
+ b ret_from_except
1: cmpwi cr0,r3,0xe60
bne 1f
addi r3,r1,STACK_FRAME_OVERHEAD;
@@ -1055,15 +1063,15 @@ _GLOBAL(enter_rtas)
li r0,0
mtcr r0
-#ifdef CONFIG_BUG
+#ifdef CONFIG_BUG
/* There is no way it is acceptable to get here with interrupts enabled,
* check it with the asm equivalent of WARN_ON
*/
- lbz r0,PACASOFTIRQEN(r13)
-1: tdnei r0,0
+ lbz r0,PACAIRQSOFTMASK(r13)
+1: tdeqi r0,IRQS_ENABLED
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
#endif
-
+
/* Hard-disable interrupts */
mfmsr r6
rldicl r7,r6,48,1
@@ -1107,6 +1115,17 @@ __enter_rtas:
rtas_return_loc:
FIXUP_ENDIAN
+ /*
+ * Clear RI and set SF before anything.
+ */
+ mfmsr r6
+ li r0,MSR_RI
+ andc r6,r6,r0
+ sldi r0,r0,(MSR_SF_LG - MSR_RI_LG)
+ or r6,r6,r0
+ sync
+ mtmsrd r6
+
/* relocation is off at this point */
GET_PACA(r4)
clrldi r4,r4,2 /* convert to realmode address */
@@ -1115,12 +1134,6 @@ rtas_return_loc:
0: mflr r3
ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */
- mfmsr r6
- li r0,MSR_RI
- andc r6,r6,r0
- sync
- mtmsrd r6
-
ld r1,PACAR1(r4) /* Restore our SP */
ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index acd8ca76233e..ee832d344a5a 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -139,7 +139,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfspr r10,SPRN_ESR
SPECIAL_EXC_STORE(r10,ESR)
- lbz r10,PACASOFTIRQEN(r13)
+ lbz r10,PACAIRQSOFTMASK(r13)
SPECIAL_EXC_STORE(r10,SOFTE)
ld r10,_NIP(r1)
SPECIAL_EXC_STORE(r10,CSRR0)
@@ -206,17 +206,17 @@ BEGIN_FTR_SECTION
mtspr SPRN_MAS8,r10
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
- lbz r6,PACASOFTIRQEN(r13)
+ lbz r6,PACAIRQSOFTMASK(r13)
ld r5,SOFTE(r1)
/* Interrupts had better not already be enabled... */
- twnei r6,0
+ tweqi r6,IRQS_ENABLED
- cmpwi cr0,r5,0
- beq 1f
+ andi. r6,r5,IRQS_DISABLED
+ bne 1f
TRACE_ENABLE_INTS
- stb r5,PACASOFTIRQEN(r13)
+ stb r5,PACAIRQSOFTMASK(r13)
1:
/*
* Restore PACAIRQHAPPENED rather than setting it based on
@@ -351,9 +351,9 @@ ret_from_mc_except:
#define PROLOG_ADDITION_NONE_MC(n)
#define PROLOG_ADDITION_MASKABLE_GEN(n) \
- lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \
- cmpwi cr0,r10,0; /* yes -> go out of line */ \
- beq masked_interrupt_book3e_##n
+ lbz r10,PACAIRQSOFTMASK(r13); /* are irqs soft-masked? */ \
+ andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \
+ bne masked_interrupt_book3e_##n
#define PROLOG_ADDITION_2REGS_GEN(n) \
std r14,PACA_EXGEN+EX_R14(r13); \
@@ -397,7 +397,7 @@ exc_##n##_common: \
mfspr r8,SPRN_XER; /* save XER in stackframe */ \
ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \
lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \
- lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \
+ lbz r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */ \
ld r12,exception_marker@toc(r2); \
li r0,0; \
std r3,GPR10(r1); /* save r10 to stackframe */ \
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2dc10bf646b8..243d072a225a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -718,10 +718,12 @@ EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
hardware_interrupt_hv:
BEGIN_FTR_SECTION
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
- EXC_HV, SOFTEN_TEST_HV)
+ EXC_HV, SOFTEN_TEST_HV,
+ IRQS_DISABLED)
FTR_SECTION_ELSE
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
- EXC_STD, SOFTEN_TEST_PR)
+ EXC_STD, SOFTEN_TEST_PR,
+ IRQS_DISABLED)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
@@ -729,9 +731,13 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
.globl hardware_interrupt_relon_hv;
hardware_interrupt_relon_hv:
BEGIN_FTR_SECTION
- _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV)
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
+ EXC_HV, SOFTEN_TEST_HV,
+ IRQS_DISABLED)
FTR_SECTION_ELSE
- _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR)
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
+ EXC_STD, SOFTEN_TEST_PR,
+ IRQS_DISABLED)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
@@ -827,8 +833,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
-EXC_REAL_MASKABLE(decrementer, 0x900, 0x80)
-EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900)
+EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
+EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0x900)
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
@@ -839,8 +845,8 @@ TRAMP_KVM_HV(PACA_EXGEN, 0x980)
EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
-EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100)
-EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00)
+EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED)
+EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0xa00)
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
@@ -1052,7 +1058,7 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
* mode.
*/
__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early)
-__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60)
+__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60, IRQS_DISABLED)
EXC_VIRT_NONE(0x4e60, 0x20)
TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
TRAMP_REAL_BEGIN(hmi_exception_early)
@@ -1110,8 +1116,8 @@ EXC_COMMON_BEGIN(hmi_exception_common)
EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
-EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20)
-EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80)
+EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED)
+EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED)
TRAMP_KVM_HV(PACA_EXGEN, 0xe80)
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
@@ -1120,8 +1126,8 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
#endif
-EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20)
-EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0)
+EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED)
+EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED)
TRAMP_KVM_HV(PACA_EXGEN, 0xea0)
EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
@@ -1132,8 +1138,8 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
-EXC_REAL_OOL(performance_monitor, 0xf00, 0x20)
-EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x20, 0xf00)
+EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED)
+EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0xf00)
EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
@@ -1345,7 +1351,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
b .
#endif
-EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception)
+EXC_COMMON(denorm_common, 0x1500, unknown_exception)
#ifdef CONFIG_CBE_RAS
@@ -1455,39 +1461,37 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
std r9,PACA_EXRFI+EX_R9(r13)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
- std r12,PACA_EXRFI+EX_R12(r13)
- std r8,PACA_EXRFI+EX_R13(r13)
mfctr r9
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
- ld r11,PACA_L1D_FLUSH_SETS(r13)
- ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
- /*
- * The load adresses are at staggered offsets within cachelines,
- * which suits some pipelines better (on others it should not
- * hurt).
- */
- addi r12,r12,8
+ ld r11,PACA_L1D_FLUSH_SIZE(r13)
+ srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
-1: li r8,0
- .rept 8 /* 8-way set associative */
- ldx r11,r10,r8
- add r8,r8,r12
- xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
- add r8,r8,r11 // Add 0, this creates a dependency on the ldx
- .endr
- addi r10,r10,128 /* 128 byte cache line */
+
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+1:
+ ld r11,(0x80 + 8)*0(r10)
+ ld r11,(0x80 + 8)*1(r10)
+ ld r11,(0x80 + 8)*2(r10)
+ ld r11,(0x80 + 8)*3(r10)
+ ld r11,(0x80 + 8)*4(r10)
+ ld r11,(0x80 + 8)*5(r10)
+ ld r11,(0x80 + 8)*6(r10)
+ ld r11,(0x80 + 8)*7(r10)
+ addi r10,r10,0x80*8
bdnz 1b
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
ld r11,PACA_EXRFI+EX_R11(r13)
- ld r12,PACA_EXRFI+EX_R12(r13)
- ld r8,PACA_EXRFI+EX_R13(r13)
GET_SCRATCH0(r13);
rfid
@@ -1497,39 +1501,37 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
std r9,PACA_EXRFI+EX_R9(r13)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
- std r12,PACA_EXRFI+EX_R12(r13)
- std r8,PACA_EXRFI+EX_R13(r13)
mfctr r9
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
- ld r11,PACA_L1D_FLUSH_SETS(r13)
- ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
- /*
- * The load adresses are at staggered offsets within cachelines,
- * which suits some pipelines better (on others it should not
- * hurt).
- */
- addi r12,r12,8
+ ld r11,PACA_L1D_FLUSH_SIZE(r13)
+ srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
-1: li r8,0
- .rept 8 /* 8-way set associative */
- ldx r11,r10,r8
- add r8,r8,r12
- xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
- add r8,r8,r11 // Add 0, this creates a dependency on the ldx
- .endr
- addi r10,r10,128 /* 128 byte cache line */
+
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+1:
+ ld r11,(0x80 + 8)*0(r10)
+ ld r11,(0x80 + 8)*1(r10)
+ ld r11,(0x80 + 8)*2(r10)
+ ld r11,(0x80 + 8)*3(r10)
+ ld r11,(0x80 + 8)*4(r10)
+ ld r11,(0x80 + 8)*5(r10)
+ ld r11,(0x80 + 8)*6(r10)
+ ld r11,(0x80 + 8)*7(r10)
+ addi r10,r10,0x80*8
bdnz 1b
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
ld r11,PACA_EXRFI+EX_R11(r13)
- ld r12,PACA_EXRFI+EX_R12(r13)
- ld r8,PACA_EXRFI+EX_R13(r13)
GET_SCRATCH0(r13);
hrfid
@@ -1632,7 +1634,7 @@ USE_TEXT_SECTION()
.balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_BOOK3S_64
- lis r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h
+ lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
ori r0,r0,DSISR_BAD_FAULT_64S@l
and. r0,r4,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
@@ -1828,6 +1830,8 @@ BEGIN_FTR_SECTION
FTR_SECTION_ELSE
beq hardware_interrupt_common
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
+ cmpwi r3,0xf00
+ beq performance_monitor_common
BEGIN_FTR_SECTION
cmpwi r3,0xa00
beq h_doorbell_common_msgclr
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index aa71a90f5222..a61151a6ea5e 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -765,8 +765,8 @@ _GLOBAL(pmac_secondary_start)
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- li r0,0
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_DISABLED
+ stb r0,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
@@ -822,7 +822,8 @@ __secondary_start:
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- stb r7,PACASOFTIRQEN(r13)
+ li r7,IRQS_DISABLED
+ stb r7,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
@@ -988,8 +989,8 @@ start_here_common:
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- li r0,0
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_DISABLED
+ stb r0,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 4fee00d414e8..d8670a37d70c 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -33,23 +33,6 @@
#include <asm/fixmap.h>
#include <asm/export.h>
-/* Macro to make the code more readable. */
-#ifdef CONFIG_8xx_CPU6
-#define SPRN_MI_TWC_ADDR 0x2b80
-#define SPRN_MI_RPN_ADDR 0x2d80
-#define SPRN_MD_TWC_ADDR 0x3b80
-#define SPRN_MD_RPN_ADDR 0x3d80
-
-#define MTSPR_CPU6(spr, reg, treg) \
- li treg, spr##_ADDR; \
- stw treg, 12(r0); \
- lwz treg, 12(r0); \
- mtspr spr, reg
-#else
-#define MTSPR_CPU6(spr, reg, treg) \
- mtspr spr, reg
-#endif
-
#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
/* By simply checking Address >= 0x80000000, we know if its a kernel address */
#define SIMPLE_KERNEL_ADDRESS 1
@@ -69,11 +52,7 @@
* Value for the bits that have fixed value in RPN entries.
* Also used for tagging DAR for DTLBerror.
*/
-#ifdef CONFIG_PPC_16K_PAGES
-#define RPN_PATTERN (0x00f0 | MD_SPS16K)
-#else
#define RPN_PATTERN 0x00f0
-#endif
#define PAGE_SHIFT_512K 19
#define PAGE_SHIFT_8M 23
@@ -134,15 +113,12 @@ turn_on_mmu:
* task's thread_struct.
*/
#define EXCEPTION_PROLOG \
- EXCEPTION_PROLOG_0; \
+ mtspr SPRN_SPRG_SCRATCH0, r10; \
+ mtspr SPRN_SPRG_SCRATCH1, r11; \
mfcr r10; \
EXCEPTION_PROLOG_1; \
EXCEPTION_PROLOG_2
-#define EXCEPTION_PROLOG_0 \
- mtspr SPRN_SPRG_SCRATCH0,r10; \
- mtspr SPRN_SPRG_SCRATCH1,r11
-
#define EXCEPTION_PROLOG_1 \
mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
andi. r11,r11,MSR_PR; \
@@ -177,13 +153,6 @@ turn_on_mmu:
SAVE_2GPRS(7, r11)
/*
- * Exception exit code.
- */
-#define EXCEPTION_EPILOG_0 \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- mfspr r11,SPRN_SPRG_SCRATCH1
-
-/*
* Note: code which follows this uses cr0.eq (set if from kernel),
* r11, r12 (SRR0), and r9 (SRR1).
*
@@ -326,15 +295,10 @@ SystemCall:
#endif
InstructionTLBMiss:
-#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mtspr SPRN_SPRG_SCRATCH2, r3
-#endif
- EXCEPTION_PROLOG_0
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
- lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
- lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
- addi r11, r11, 1
- stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
+ mtspr SPRN_SPRG_SCRATCH2, r12
#endif
/* If we are faulting a kernel address, we have to use the
@@ -345,7 +309,7 @@ InstructionTLBMiss:
/* Only modules will cause ITLB Misses as we always
* pin the first 8MB of kernel memory */
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mfcr r3
+ mfcr r12
#endif
#ifdef ITLB_MISS_KERNEL
#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
@@ -388,40 +352,46 @@ _ENTRY(ITLBMiss_cmp)
lwz r10, 0(r10) /* Get the pte */
4:
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mtcr r3
+ mtcr r12
#endif
- /* Insert the APG into the TWC from the Linux PTE. */
- rlwimi r11, r10, 0, 25, 26
- /* Load the MI_TWC with the attributes for this "segment." */
- MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
- rlwimi r10, r11, 1, MI_SPS16K
-#endif
#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
+ rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1
#endif
- li r11, RPN_PATTERN
+ /* Load the MI_TWC with the attributes for this "segment." */
+ mtspr SPRN_MI_TWC, r11 /* Set segment attributes */
+
+ li r11, RPN_PATTERN | 0x200
/* The Linux PTE won't go exactly into the MMU TLB.
- * Software indicator bits 20-23 and 28 must be clear.
- * Software indicator bits 24, 25, 26, and 27 must be
+ * Software indicator bits 20 and 23 must be clear.
+ * Software indicator bits 22, 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
- rlwimi r10, r11, 0, 0x0ff0 /* Set 24-27, clear 20-23 */
-#else
- rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */
-#endif
- MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */
+ rlwimi r11, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */
+ rlwimi r10, r11, 0, 0x0ff0 /* Set 22, 24-27, clear 20,23 */
+ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
/* Restore registers */
-#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
- mfspr r3, SPRN_SPRG_SCRATCH2
+_ENTRY(itlb_miss_exit_1)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
+ mfspr r12, SPRN_SPRG_SCRATCH2
+#endif
+ rfi
+#ifdef CONFIG_PERF_EVENTS
+_ENTRY(itlb_miss_perf)
+ lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
+ lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+ addi r11, r11, 1
+ stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+#endif
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
+ mfspr r12, SPRN_SPRG_SCRATCH2
#endif
- EXCEPTION_EPILOG_0
rfi
#ifdef CONFIG_HUGETLB_PAGE
@@ -436,7 +406,6 @@ _ENTRY(ITLBMiss_cmp)
rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK
#endif
lwz r10, 0(r10) /* Get the pte */
- rlwinm r11, r11, 0, 0xf
b 4b
20: /* 512k pages */
@@ -445,21 +414,15 @@ _ENTRY(ITLBMiss_cmp)
/* Add level 2 base */
rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
lwz r10, 0(r10) /* Get the pte */
- rlwinm r11, r11, 0, 0xf
b 4b
#endif
. = 0x1200
DataStoreTLBMiss:
- mtspr SPRN_SPRG_SCRATCH2, r3
- EXCEPTION_PROLOG_0
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
- lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
- lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
- addi r11, r11, 1
- stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
-#endif
- mfcr r3
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
+ mtspr SPRN_SPRG_SCRATCH2, r12
+ mfcr r12
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
@@ -499,59 +462,49 @@ _ENTRY(DTLBMiss_jmp)
rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
4:
- mtcr r3
+ mtcr r12
- /* Insert the Guarded flag and APG into the TWC from the Linux PTE.
- * It is bit 26-27 of both the Linux PTE and the TWC (at least
+ /* Insert the Guarded flag into the TWC from the Linux PTE.
+ * It is bit 27 of both the Linux PTE and the TWC (at least
* I got that right :-). It will be better when we can put
* this into the Linux pgd/pmd and load it in the operation
* above.
*/
- rlwimi r11, r10, 0, 26, 27
- /* Insert the WriteThru flag into the TWC from the Linux PTE.
- * It is bit 25 in the Linux PTE and bit 30 in the TWC
- */
- rlwimi r11, r10, 32-5, 30, 30
- MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
-
- /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29)
- * In 16k pages mode, SPS is always 1 */
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
- rlwimi r10, r11, 1, MD_SPS16K
-#endif
- /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
- * We also need to know if the insn is a load/store, so:
- * Clear _PAGE_PRESENT and load that which will
- * trap into DTLB Error with store bit set accordinly.
- */
- /* PRESENT=0x1, ACCESSED=0x20
- * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
- * r10 = (r10 & ~PRESENT) | r11;
- */
+ rlwimi r11, r10, 0, _PAGE_GUARDED
#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
+ /* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0
+ * on that bit will represent a Non Access group
+ */
+ rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1
#endif
+ mtspr SPRN_MD_TWC, r11
+
/* The Linux PTE won't go exactly into the MMU TLB.
- * Software indicator bits 22 and 28 must be clear.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
li r11, RPN_PATTERN
-#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */
-#else
- rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
-#endif
- rlwimi r10, r11, 0, 20, 20 /* clear 20 */
- MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */
+ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
/* Restore registers */
- mfspr r3, SPRN_SPRG_SCRATCH2
mtspr SPRN_DAR, r11 /* Tag DAR */
- EXCEPTION_EPILOG_0
+_ENTRY(dtlb_miss_exit_1)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
+ rfi
+#ifdef CONFIG_PERF_EVENTS
+_ENTRY(dtlb_miss_perf)
+ lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
+ lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
+ addi r11, r11, 1
+ stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
+#endif
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
rfi
#ifdef CONFIG_HUGETLB_PAGE
@@ -566,7 +519,6 @@ _ENTRY(DTLBMiss_jmp)
rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK
#endif
lwz r10, 0(r10) /* Get the pte */
- rlwinm r11, r11, 0, 0xf
b 4b
20: /* 512k pages */
@@ -575,7 +527,6 @@ _ENTRY(DTLBMiss_jmp)
/* Add level 2 base */
rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
lwz r10, 0(r10) /* Get the pte */
- rlwinm r11, r11, 0, 0xf
b 4b
#endif
@@ -601,7 +552,8 @@ itlbie:
*/
. = 0x1400
DataTLBError:
- EXCEPTION_PROLOG_0
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
mfcr r10
mfspr r11, SPRN_DAR
@@ -636,7 +588,8 @@ dtlbie:
*/
. = 0x1c00
DataBreakpoint:
- EXCEPTION_PROLOG_0
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
mfcr r10
mfspr r11, SPRN_SRR0
cmplwi cr0, r11, (dtlbie - PAGE_OFFSET)@l
@@ -652,13 +605,15 @@ DataBreakpoint:
EXC_XFER_EE(0x1c00, do_break)
11:
mtcr r10
- EXCEPTION_EPILOG_0
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
rfi
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#ifdef CONFIG_PERF_EVENTS
. = 0x1d00
InstructionBreakpoint:
- EXCEPTION_PROLOG_0
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ mtspr SPRN_SPRG_SCRATCH1, r11
lis r10, (instruction_counter - PAGE_OFFSET)@ha
lwz r11, (instruction_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, -1
@@ -666,7 +621,8 @@ InstructionBreakpoint:
lis r10, 0xffff
ori r10, r10, 0x01
mtspr SPRN_COUNTA, r10
- EXCEPTION_EPILOG_0
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
rfi
#else
EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
@@ -681,51 +637,57 @@ InstructionBreakpoint:
* not enough space in the DataStoreTLBMiss area.
*/
DTLBMissIMMR:
- mtcr r3
- /* Set 512k byte guarded page and mark it valid */
- li r10, MD_PS512K | MD_GUARDED | MD_SVALID
- MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
+ mtcr r12
+ /* Set 512k byte guarded page and mark it valid and accessed */
+ li r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2
+ mtspr SPRN_MD_TWC, r10
mfspr r10, SPRN_IMMR /* Get current IMMR */
rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
_PAGE_PRESENT | _PAGE_NO_CACHE
- MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */
+ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r3, SPRN_SPRG_SCRATCH2
- EXCEPTION_EPILOG_0
+_ENTRY(dtlb_miss_exit_2)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
rfi
DTLBMissLinear:
- mtcr r3
- /* Set 8M byte page and mark it valid */
- li r11, MD_PS8MEG | MD_SVALID
- MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
+ mtcr r12
+ /* Set 8M byte page and mark it valid and accessed */
+ li r11, MD_PS8MEG | MD_SVALID | M_APG2
+ mtspr SPRN_MD_TWC, r11
rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
_PAGE_PRESENT
- MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */
+ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r3, SPRN_SPRG_SCRATCH2
- EXCEPTION_EPILOG_0
+_ENTRY(dtlb_miss_exit_3)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
rfi
#ifndef CONFIG_PIN_TLB_TEXT
ITLBMissLinear:
- mtcr r3
- /* Set 8M byte page and mark it valid */
- li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC
- MTSPR_CPU6(SPRN_MI_TWC, r11, r3)
+ mtcr r12
+ /* Set 8M byte page and mark it valid,accessed */
+ li r11, MI_PS8MEG | MI_SVALID | M_APG2
+ mtspr SPRN_MI_TWC, r11
rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
- ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
_PAGE_PRESENT
- MTSPR_CPU6(SPRN_MI_RPN, r10, r11) /* Update TLB entry */
+ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
- mfspr r3, SPRN_SPRG_SCRATCH2
- EXCEPTION_EPILOG_0
+_ENTRY(itlb_miss_exit_2)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r12, SPRN_SPRG_SCRATCH2
rfi
#endif
@@ -933,13 +895,6 @@ start_here:
*/
lis r6, swapper_pg_dir@ha
tophys(r6,r6)
-#ifdef CONFIG_8xx_CPU6
- lis r4, cpu6_errata_word@h
- ori r4, r4, cpu6_errata_word@l
- li r3, 0x3f80
- stw r3, 12(r4)
- lwz r3, 12(r4)
-#endif
mtspr SPRN_M_TW, r6
lis r4,2f@h
ori r4,r4,2f@l
@@ -1004,8 +959,8 @@ initial_mmu:
lis r8, KERNELBASE@h /* Create vaddr for TLB */
ori r8, r8, MI_EVALID /* Mark it valid */
mtspr SPRN_MI_EPN, r8
- li r8, MI_PS8MEG | (2 << 5) /* Set 8M byte page, APG 2 */
- ori r8, r8, MI_SVALID /* Make it valid */
+ li r8, MI_PS8MEG /* Set 8M byte page */
+ ori r8, r8, MI_SVALID | M_APG2 /* Make it valid, APG 2 */
mtspr SPRN_MI_TWC, r8
li r8, MI_BOOTINIT /* Create RPN for address 0 */
mtspr SPRN_MI_RPN, r8 /* Store TLB entry */
@@ -1032,7 +987,7 @@ initial_mmu:
ori r8, r8, MD_EVALID /* Mark it valid */
mtspr SPRN_MD_EPN, r8
li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */
- ori r8, r8, MD_SVALID /* Make it valid */
+ ori r8, r8, MD_SVALID | M_APG2 /* Make it valid and accessed */
mtspr SPRN_MD_TWC, r8
mr r8, r9 /* Create paddr for TLB */
ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
@@ -1061,7 +1016,7 @@ initial_mmu:
#endif
/* Disable debug mode entry on breakpoints */
mfspr r8, SPRN_DER
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#ifdef CONFIG_PERF_EVENTS
rlwinm r8, r8, 0, ~0xc
#else
rlwinm r8, r8, 0, ~0x8
@@ -1094,13 +1049,7 @@ swapper_pg_dir:
abatron_pteptrs:
.space 8
-#ifdef CONFIG_8xx_CPU6
- .globl cpu6_errata_word
-cpu6_errata_word:
- .space 16
-#endif
-
-#ifdef CONFIG_PPC_8xx_PERF_EVENT
+#ifdef CONFIG_PERF_EVENTS
.globl itlb_miss_counter
itlb_miss_counter:
.space 4
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index 48c21acef915..2b269315d377 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -17,6 +17,7 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/epapr_hcalls.h>
+#include <asm/hw_irq.h>
/* 64-bit version only for now */
#ifdef CONFIG_PPC64
@@ -46,8 +47,8 @@ _GLOBAL(\name)
bl trace_hardirqs_on
addi r1,r1,128
#endif
- li r0,1
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13)
/* Interrupts will make use return to LR, so get something we want
* in there
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index f57a19348bdd..08faa93755f9 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -15,6 +15,7 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/irqflags.h>
+#include <asm/hw_irq.h>
#undef DEBUG
@@ -53,8 +54,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
mfmsr r7
#endif /* CONFIG_TRACE_IRQFLAGS */
- li r0,1
- stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13) /* we'll hard-enable shortly */
BEGIN_FTR_SECTION
DSSALL
sync
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index b7a84522e652..f88038847790 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -67,6 +67,7 @@
#include <asm/smp.h>
#include <asm/livepatch.h>
#include <asm/asm-prototypes.h>
+#include <asm/hw_irq.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -106,12 +107,6 @@ static inline notrace unsigned long get_irq_happened(void)
return happened;
}
-static inline notrace void set_soft_enabled(unsigned long enable)
-{
- __asm__ __volatile__("stb %0,%1(13)"
- : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
-}
-
static inline notrace int decrementer_check_overflow(void)
{
u64 now = get_tb_or_rtc();
@@ -191,6 +186,11 @@ notrace unsigned int __check_irq_replay(void)
return 0x900;
}
+ if (happened & PACA_IRQ_PMI) {
+ local_paca->irq_happened &= ~PACA_IRQ_PMI;
+ return 0xf00;
+ }
+
if (happened & PACA_IRQ_EE) {
local_paca->irq_happened &= ~PACA_IRQ_EE;
return 0x500;
@@ -224,15 +224,16 @@ notrace unsigned int __check_irq_replay(void)
return 0;
}
-notrace void arch_local_irq_restore(unsigned long en)
+notrace void arch_local_irq_restore(unsigned long mask)
{
unsigned char irq_happened;
unsigned int replay;
/* Write the new soft-enabled value */
- set_soft_enabled(en);
- if (!en)
+ irq_soft_mask_set(mask);
+ if (mask)
return;
+
/*
* From this point onward, we can take interrupts, preempt,
* etc... unless we got hard-disabled. We check if an event
@@ -263,7 +264,7 @@ notrace void arch_local_irq_restore(unsigned long en)
*/
if (unlikely(irq_happened != PACA_IRQ_HARD_DIS))
__hard_irq_disable();
-#ifdef CONFIG_TRACE_IRQFLAGS
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
else {
/*
* We should already be hard disabled here. We had bugs
@@ -274,9 +275,9 @@ notrace void arch_local_irq_restore(unsigned long en)
if (WARN_ON(mfmsr() & MSR_EE))
__hard_irq_disable();
}
-#endif /* CONFIG_TRACE_IRQFLAGS */
+#endif
- set_soft_enabled(0);
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
trace_hardirqs_off();
/*
@@ -288,7 +289,7 @@ notrace void arch_local_irq_restore(unsigned long en)
/* We can soft-enable now */
trace_hardirqs_on();
- set_soft_enabled(1);
+ irq_soft_mask_set(IRQS_ENABLED);
/*
* And replay if we have to. This will return with interrupts
@@ -363,7 +364,7 @@ bool prep_irq_for_idle(void)
* of entering the low power state.
*/
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
- local_paca->soft_enabled = 1;
+ irq_soft_mask_set(IRQS_ENABLED);
/* Tell the caller to enter the low power state */
return true;
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 71e8a1b8c86e..efdd16a79075 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs)
return handled;
}
-long hmi_exception_realmode(struct pt_regs *regs)
+/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
+static enum {
+ DTRIG_UNKNOWN,
+ DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
+ DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
+} hmer_debug_trig_function;
+
+static int init_debug_trig_function(void)
{
- __this_cpu_inc(irq_stat.hmi_exceptions);
-
-#ifdef CONFIG_PPC_BOOK3S_64
- /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
- if (pvr_version_is(PVR_POWER9)) {
- unsigned long hmer = mfspr(SPRN_HMER);
-
- /* Do we have the debug bit set */
- if (hmer & PPC_BIT(17)) {
- hmer &= ~PPC_BIT(17);
- mtspr(SPRN_HMER, hmer);
-
- /*
- * Now to avoid problems with soft-disable we
- * only do the emulation if we are coming from
- * user space
- */
- if (user_mode(regs))
- local_paca->hmi_p9_special_emu = 1;
-
- /*
- * Don't bother going to OPAL if that's the
- * only relevant bit.
- */
- if (!(hmer & mfspr(SPRN_HMEER)))
- return local_paca->hmi_p9_special_emu;
+ int pvr;
+ struct device_node *cpun;
+ struct property *prop = NULL;
+ const char *str;
+
+ /* First look in the device tree */
+ preempt_disable();
+ cpun = of_get_cpu_node(smp_processor_id(), NULL);
+ if (cpun) {
+ of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
+ prop, str) {
+ if (strcmp(str, "bit17-vector-ci-load") == 0)
+ hmer_debug_trig_function = DTRIG_VECTOR_CI;
+ else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
+ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
}
+ of_node_put(cpun);
+ }
+ preempt_enable();
+
+ /* If we found the property, don't look at PVR */
+ if (prop)
+ goto out;
+
+ pvr = mfspr(SPRN_PVR);
+ /* Check for POWER9 Nimbus (scale-out) */
+ if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
+ /* DD2.2 and later */
+ if ((pvr & 0xfff) >= 0x202)
+ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+ /* DD2.0 and DD2.1 - used for vector CI load emulation */
+ else if ((pvr & 0xfff) >= 0x200)
+ hmer_debug_trig_function = DTRIG_VECTOR_CI;
+ }
+
+ out:
+ switch (hmer_debug_trig_function) {
+ case DTRIG_VECTOR_CI:
+ pr_debug("HMI debug trigger used for vector CI load\n");
+ break;
+ case DTRIG_SUSPEND_ESCAPE:
+ pr_debug("HMI debug trigger used for TM suspend escape\n");
+ break;
+ default:
+ break;
}
-#endif /* CONFIG_PPC_BOOK3S_64 */
+ return 0;
+}
+__initcall(init_debug_trig_function);
+
+/*
+ * Handle HMIs that occur as a result of a debug trigger.
+ * Return values:
+ * -1 means this is not a HMI cause that we know about
+ * 0 means no further handling is required
+ * 1 means further handling is required
+ */
+long hmi_handle_debugtrig(struct pt_regs *regs)
+{
+ unsigned long hmer = mfspr(SPRN_HMER);
+ long ret = 0;
+
+ /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
+ if (!((hmer & HMER_DEBUG_TRIG)
+ && hmer_debug_trig_function != DTRIG_UNKNOWN))
+ return -1;
+
+ hmer &= ~HMER_DEBUG_TRIG;
+ /* HMER is a write-AND register */
+ mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
+
+ switch (hmer_debug_trig_function) {
+ case DTRIG_VECTOR_CI:
+ /*
+ * Now to avoid problems with soft-disable we
+ * only do the emulation if we are coming from
+ * host user space
+ */
+ if (regs && user_mode(regs))
+ ret = local_paca->hmi_p9_special_emu = 1;
+
+ break;
+
+ default:
+ break;
+ }
+
+ /*
+ * See if any other HMI causes remain to be handled
+ */
+ if (hmer & mfspr(SPRN_HMEER))
+ return -1;
+
+ return ret;
+}
+
+/*
+ * Return values:
+ */
+long hmi_exception_realmode(struct pt_regs *regs)
+{
+ int ret;
+
+ __this_cpu_inc(irq_stat.hmi_exceptions);
+
+ ret = hmi_handle_debugtrig(regs);
+ if (ret >= 0)
+ return ret;
wait_for_subcore_guest_exit();
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 644f7040b91c..fe6fc63251fe 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -58,115 +58,6 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
return pte_pfn(*ptep);
}
-static void flush_tlb_206(unsigned int num_sets, unsigned int action)
-{
- unsigned long rb;
- unsigned int i;
-
- switch (action) {
- case TLB_INVAL_SCOPE_GLOBAL:
- rb = TLBIEL_INVAL_SET;
- break;
- case TLB_INVAL_SCOPE_LPID:
- rb = TLBIEL_INVAL_SET_LPID;
- break;
- default:
- BUG();
- break;
- }
-
- asm volatile("ptesync" : : : "memory");
- for (i = 0; i < num_sets; i++) {
- asm volatile("tlbiel %0" : : "r" (rb));
- rb += 1 << TLBIEL_INVAL_SET_SHIFT;
- }
- asm volatile("ptesync" : : : "memory");
-}
-
-static void flush_tlb_300(unsigned int num_sets, unsigned int action)
-{
- unsigned long rb;
- unsigned int i;
- unsigned int r;
-
- switch (action) {
- case TLB_INVAL_SCOPE_GLOBAL:
- rb = TLBIEL_INVAL_SET;
- break;
- case TLB_INVAL_SCOPE_LPID:
- rb = TLBIEL_INVAL_SET_LPID;
- break;
- default:
- BUG();
- break;
- }
-
- asm volatile("ptesync" : : : "memory");
-
- if (early_radix_enabled())
- r = 1;
- else
- r = 0;
-
- /*
- * First flush table/PWC caches with set 0, then flush the
- * rest of the sets, partition scope. Radix must then do it
- * all again with process scope. Hash just has to flush
- * process table.
- */
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
- "r"(rb), "r"(0), "i"(2), "i"(0), "r"(r));
- for (i = 1; i < num_sets; i++) {
- unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
-
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
- "r"(rb+set), "r"(0), "i"(2), "i"(0), "r"(r));
- }
-
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
- "r"(rb), "r"(0), "i"(2), "i"(1), "r"(r));
- if (early_radix_enabled()) {
- for (i = 1; i < num_sets; i++) {
- unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
-
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
- "r"(rb+set), "r"(0), "i"(2), "i"(1), "r"(r));
- }
- }
-
- asm volatile("ptesync" : : : "memory");
-}
-
-/*
- * Generic routines to flush TLB on POWER processors. These routines
- * are used as flush_tlb hook in the cpu_spec.
- *
- * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs.
- * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
- */
-void __flush_tlb_power7(unsigned int action)
-{
- flush_tlb_206(POWER7_TLB_SETS, action);
-}
-
-void __flush_tlb_power8(unsigned int action)
-{
- flush_tlb_206(POWER8_TLB_SETS, action);
-}
-
-void __flush_tlb_power9(unsigned int action)
-{
- unsigned int num_sets;
-
- if (early_radix_enabled())
- num_sets = POWER9_TLB_SETS_RADIX;
- else
- num_sets = POWER9_TLB_SETS_HASH;
-
- flush_tlb_300(num_sets, action);
-}
-
-
/* flush SLBs and reload */
#ifdef CONFIG_PPC_BOOK3S_64
static void flush_and_reload_slb(void)
@@ -226,10 +117,8 @@ static int mce_flush(int what)
return 1;
}
if (what == MCE_FLUSH_TLB) {
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
- return 1;
- }
+ tlbiel_all();
+ return 1;
}
return 0;
diff --git a/arch/powerpc/kernel/module.lds b/arch/powerpc/kernel/module.lds
new file mode 100644
index 000000000000..cea5dc124be4
--- /dev/null
+++ b/arch/powerpc/kernel/module.lds
@@ -0,0 +1,8 @@
+/* Force alignment of .toc section. */
+SECTIONS
+{
+ .toc 0 : ALIGN(256)
+ {
+ *(.got .toc)
+ }
+}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 218971ac7e04..a2636c250b7b 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -348,8 +348,11 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
char *p;
if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
me->arch.stubs_section = i;
- else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) {
me->arch.toc_section = i;
+ if (sechdrs[i].sh_addralign < 8)
+ sechdrs[i].sh_addralign = 8;
+ }
else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size);
@@ -387,12 +390,15 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
return 0;
}
-/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
- gives the value maximum span in an instruction which uses a signed
- offset) */
+/*
+ * r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the
+ * value maximum span in an instruction which uses a signed offset). Round down
+ * to a 256 byte boundary for the odd case where we are setting up r2 without a
+ * .toc section.
+ */
static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)
{
- return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
+ return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000;
}
/* Both low and high 16 bits are added as SIGNED additions, so if low
@@ -501,12 +507,22 @@ static bool is_early_mcount_callsite(u32 *instruction)
restore r2. */
static int restore_r2(u32 *instruction, struct module *me)
{
- if (is_early_mcount_callsite(instruction - 1))
+ u32 *prev_insn = instruction - 1;
+
+ if (is_early_mcount_callsite(prev_insn))
+ return 1;
+
+ /*
+ * Make sure the branch isn't a sibling call. Sibling calls aren't
+ * "link" branches and they don't return, so they don't need the r2
+ * restore afterwards.
+ */
+ if (!instr_is_relative_link_branch(*prev_insn))
return 1;
if (*instruction != PPC_INST_NOP) {
- pr_err("%s: Expect noop after relocate, got %08x\n",
- me->name, *instruction);
+ pr_err("%s: Expected nop after call, got %08x at %pS\n",
+ me->name, *instruction, instruction);
return 0;
}
/* ld r2,R2_STACK_OFFSET(r1) */
@@ -628,7 +644,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_PPC_REL24:
/* FIXME: Handle weak symbols here --RR */
- if (sym->st_shndx == SHN_UNDEF) {
+ if (sym->st_shndx == SHN_UNDEF ||
+ sym->st_shndx == SHN_LIVEPATCH) {
/* External: go via stub */
value = stub_for_addr(sechdrs, value, me);
if (!value)
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 52fc864cdec4..98a3aeeb3c8c 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -58,7 +58,7 @@ optprobe_template_entry:
std r5,_XER(r1)
mfcr r5
std r5,_CCR(r1)
- lbz r5,PACASOFTIRQEN(r13)
+ lbz r5,PACAIRQSOFTMASK(r13)
std r5,SOFTE(r1)
/*
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index d6597038931d..95ffedf14885 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -18,6 +18,8 @@
#include <asm/pgtable.h>
#include <asm/kexec.h>
+#include "setup.h"
+
#ifdef CONFIG_PPC_BOOK3S
/*
@@ -208,15 +210,14 @@ void __init allocate_pacas(void)
u64 limit;
int cpu;
- limit = ppc64_rma_size;
-
#ifdef CONFIG_PPC_BOOK3S_64
/*
- * We can't take SLB misses on the paca, and we want to access them
- * in real mode, so allocate them within the RMA and also within
- * the first segment.
+ * We access pacas in real mode, and cannot take SLB faults
+ * on them when in virtual mode, so allocate them accordingly.
*/
- limit = min(0x10000000ULL, limit);
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
+#else
+ limit = ppc64_rma_size;
#endif
paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 590f4d0a6cb1..208e623b2557 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -249,8 +249,31 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno)
return pci_iov_resource_size(pdev, resno);
}
+
+int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ if (ppc_md.pcibios_sriov_enable)
+ return ppc_md.pcibios_sriov_enable(pdev, num_vfs);
+
+ return 0;
+}
+
+int pcibios_sriov_disable(struct pci_dev *pdev)
+{
+ if (ppc_md.pcibios_sriov_disable)
+ return ppc_md.pcibios_sriov_disable(pdev);
+
+ return 0;
+}
+
#endif /* CONFIG_PCI_IOV */
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ if (ppc_md.pcibios_bus_add_device)
+ ppc_md.pcibios_bus_add_device(pdev);
+}
+
static resource_size_t pcibios_io_size(const struct pci_controller *hose)
{
#ifdef CONFIG_PPC64
@@ -1276,8 +1299,8 @@ static void pcibios_allocate_bus_resources(struct pci_bus *bus)
i + PCI_BRIDGE_RESOURCES) == 0)
continue;
}
- pr_warning("PCI: Cannot allocate resource region "
- "%d of PCI bridge %d, will remap\n", i, bus->number);
+ pr_warn("PCI: Cannot allocate resource region %d of PCI bridge %d, will remap\n",
+ i, bus->number);
clear_resource:
/* The resource might be figured out when doing
* reassignment based on the resources required
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 0e395afbf0f4..ab147a1909c8 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -156,10 +156,8 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
pdn->parent = parent;
pdn->busno = busno;
pdn->devfn = devfn;
-#ifdef CONFIG_PPC_POWERNV
pdn->vf_index = vf_index;
pdn->pe_number = IODA_INVALID_PE;
-#endif
INIT_LIST_HEAD(&pdn->child_list);
INIT_LIST_HEAD(&pdn->list);
list_add_tail(&pdn->list, &parent->child_list);
@@ -226,9 +224,7 @@ void remove_dev_pci_data(struct pci_dev *pdev)
*/
if (pdev->is_virtfn) {
pdn = pci_get_pdn(pdev);
-#ifdef CONFIG_PPC_POWERNV
pdn->pe_number = IODA_INVALID_PE;
-#endif
return;
}
@@ -294,9 +290,7 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
return NULL;
dn->data = pdn;
pdn->phb = hose;
-#ifdef CONFIG_PPC_POWERNV
pdn->pe_number = IODA_INVALID_PE;
-#endif
regs = of_get_property(dn, "reg", NULL);
if (regs) {
u32 addr = of_read_number(regs, 1);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 0d790f8432d2..20ceec4a5f5e 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -38,7 +38,7 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
* @addr0: value of 1st cell of a device tree PCI address.
* @bridge: Set this flag if the address is from a bridge 'ranges' property
*/
-static unsigned int pci_parse_of_flags(u32 addr0, int bridge)
+unsigned int pci_parse_of_flags(u32 addr0, int bridge)
{
unsigned int flags = 0;
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index 56548bf6231f..9bfbd800d32f 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -63,7 +63,7 @@ static int __init proc_ppc64_init(void)
{
struct proc_dir_entry *pde;
- pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL,
+ pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL,
&page_map_fops, vdso_data);
if (!pde)
return 1;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4208cbe2fb7f..1738c4127b32 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -42,6 +42,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/uaccess.h>
#include <linux/elf-randomize.h>
+#include <linux/pkeys.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -57,6 +58,7 @@
#include <asm/debug.h>
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
+#include <asm/hw_irq.h>
#endif
#include <asm/code-patching.h>
#include <asm/exec.h>
@@ -1097,6 +1099,8 @@ static inline void save_sprs(struct thread_struct *t)
t->tar = mfspr(SPRN_TAR);
}
#endif
+
+ thread_pkey_regs_save(t);
}
static inline void restore_sprs(struct thread_struct *old_thread,
@@ -1136,6 +1140,8 @@ static inline void restore_sprs(struct thread_struct *old_thread,
old_thread->tidr != new_thread->tidr)
mtspr(SPRN_TIDR, new_thread->tidr);
#endif
+
+ thread_pkey_regs_restore(new_thread, old_thread);
}
#ifdef CONFIG_PPC_BOOK3S_64
@@ -1404,7 +1410,7 @@ void show_regs(struct pt_regs * regs)
print_msr_bits(regs->msr);
pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
- if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
+ if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
pr_cont("CFAR: "REG" ", regs->orig_gpr3);
if (trap == 0x200 || trap == 0x300 || trap == 0x600)
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
@@ -1504,14 +1510,15 @@ static int assign_thread_tidr(void)
{
int index;
int err;
+ unsigned long flags;
again:
if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL))
return -ENOMEM;
- spin_lock(&vas_thread_id_lock);
+ spin_lock_irqsave(&vas_thread_id_lock, flags);
err = ida_get_new_above(&vas_thread_ida, 1, &index);
- spin_unlock(&vas_thread_id_lock);
+ spin_unlock_irqrestore(&vas_thread_id_lock, flags);
if (err == -EAGAIN)
goto again;
@@ -1519,9 +1526,9 @@ again:
return err;
if (index > MAX_THREAD_CONTEXT) {
- spin_lock(&vas_thread_id_lock);
+ spin_lock_irqsave(&vas_thread_id_lock, flags);
ida_remove(&vas_thread_ida, index);
- spin_unlock(&vas_thread_id_lock);
+ spin_unlock_irqrestore(&vas_thread_id_lock, flags);
return -ENOMEM;
}
@@ -1530,9 +1537,11 @@ again:
static void free_thread_tidr(int id)
{
- spin_lock(&vas_thread_id_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vas_thread_id_lock, flags);
ida_remove(&vas_thread_ida, id);
- spin_unlock(&vas_thread_id_lock);
+ spin_unlock_irqrestore(&vas_thread_id_lock, flags);
}
/*
@@ -1584,6 +1593,7 @@ int set_thread_tidr(struct task_struct *t)
return 0;
}
+EXPORT_SYMBOL_GPL(set_thread_tidr);
#endif /* CONFIG_PPC64 */
@@ -1669,7 +1679,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
childregs->gpr[14] = ppc_function_entry((void *)usp);
#ifdef CONFIG_PPC64
clear_tsk_thread_flag(p, TIF_32BIT);
- childregs->softe = 1;
+ childregs->softe = IRQS_ENABLED;
#endif
childregs->gpr[15] = kthread_arg;
p->thread.regs = NULL; /* no user register state */
@@ -1860,6 +1870,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.tm_tfiar = 0;
current->thread.load_tm = 0;
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+ thread_pkey_regs_init(&current->thread);
}
EXPORT_SYMBOL(start_thread);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b15bae265c90..4dffef947b8a 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -59,6 +59,7 @@
#include <asm/epapr_hcalls.h>
#include <asm/firmware.h>
#include <asm/dt_cpu_ftrs.h>
+#include <asm/drmem.h>
#include <mm/mmu_decl.h>
@@ -455,92 +456,74 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
#ifdef CONFIG_PPC_PSERIES
/*
- * Interpret the ibm,dynamic-memory property in the
- * /ibm,dynamic-reconfiguration-memory node.
+ * Interpret the ibm dynamic reconfiguration memory LMBs.
* This contains a list of memory blocks along with NUMA affinity
* information.
*/
-static int __init early_init_dt_scan_drconf_memory(unsigned long node)
+static void __init early_init_drmem_lmb(struct drmem_lmb *lmb,
+ const __be32 **usm)
{
- const __be32 *dm, *ls, *usm;
- int l;
- unsigned long n, flags;
- u64 base, size, memblock_size;
- unsigned int is_kexec_kdump = 0, rngs;
-
- ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
- if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))
- return 0;
- memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls);
+ u64 base, size;
+ int is_kexec_kdump = 0, rngs;
- dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
- if (dm == NULL || l < sizeof(__be32))
- return 0;
+ base = lmb->base_addr;
+ size = drmem_lmb_size();
+ rngs = 1;
- n = of_read_number(dm++, 1); /* number of entries */
- if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32))
- return 0;
+ /*
+ * Skip this block if the reserved bit is set in flags
+ * or if the block is not assigned to this partition.
+ */
+ if ((lmb->flags & DRCONF_MEM_RESERVED) ||
+ !(lmb->flags & DRCONF_MEM_ASSIGNED))
+ return;
- /* check if this is a kexec/kdump kernel. */
- usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
- &l);
- if (usm != NULL)
+ if (*usm)
is_kexec_kdump = 1;
- for (; n != 0; --n) {
- base = dt_mem_next_cell(dt_root_addr_cells, &dm);
- flags = of_read_number(&dm[3], 1);
- /* skip DRC index, pad, assoc. list index, flags */
- dm += 4;
- /* skip this block if the reserved bit is set in flags
- or if the block is not assigned to this partition */
- if ((flags & DRCONF_MEM_RESERVED) ||
- !(flags & DRCONF_MEM_ASSIGNED))
- continue;
- size = memblock_size;
- rngs = 1;
+ if (is_kexec_kdump) {
+ /*
+ * For each memblock in ibm,dynamic-memory, a
+ * corresponding entry in linux,drconf-usable-memory
+ * property contains a counter 'p' followed by 'p'
+ * (base, size) duple. Now read the counter from
+ * linux,drconf-usable-memory property
+ */
+ rngs = dt_mem_next_cell(dt_root_size_cells, usm);
+ if (!rngs) /* there are no (base, size) duple */
+ return;
+ }
+
+ do {
if (is_kexec_kdump) {
- /*
- * For each memblock in ibm,dynamic-memory, a corresponding
- * entry in linux,drconf-usable-memory property contains
- * a counter 'p' followed by 'p' (base, size) duple.
- * Now read the counter from
- * linux,drconf-usable-memory property
- */
- rngs = dt_mem_next_cell(dt_root_size_cells, &usm);
- if (!rngs) /* there are no (base, size) duple */
+ base = dt_mem_next_cell(dt_root_addr_cells, usm);
+ size = dt_mem_next_cell(dt_root_size_cells, usm);
+ }
+
+ if (iommu_is_off) {
+ if (base >= 0x80000000ul)
continue;
+ if ((base + size) > 0x80000000ul)
+ size = 0x80000000ul - base;
}
- do {
- if (is_kexec_kdump) {
- base = dt_mem_next_cell(dt_root_addr_cells,
- &usm);
- size = dt_mem_next_cell(dt_root_size_cells,
- &usm);
- }
- if (iommu_is_off) {
- if (base >= 0x80000000ul)
- continue;
- if ((base + size) > 0x80000000ul)
- size = 0x80000000ul - base;
- }
- memblock_add(base, size);
- } while (--rngs);
- }
- memblock_dump_all();
- return 0;
+
+ DBG("Adding: %llx -> %llx\n", base, size);
+ memblock_add(base, size);
+ } while (--rngs);
}
-#else
-#define early_init_dt_scan_drconf_memory(node) 0
#endif /* CONFIG_PPC_PSERIES */
static int __init early_init_dt_scan_memory_ppc(unsigned long node,
const char *uname,
int depth, void *data)
{
+#ifdef CONFIG_PPC_PSERIES
if (depth == 1 &&
- strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0)
- return early_init_dt_scan_drconf_memory(node);
+ strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+ walk_drmem_lmbs_early(node, early_init_drmem_lmb);
+ return 0;
+ }
+#endif
return early_init_dt_scan_memory(node, uname, depth, data);
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 02190e90c7ae..adf044daafd7 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -869,10 +869,12 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.reserved2 = 0,
.reserved3 = 0,
.subprocessors = 1,
+ .byte22 = OV5_FEAT(OV5_DRMEM_V2),
.intarch = 0,
.mmu = 0,
.hash_ext = 0,
.radix_ext = 0,
+ .byte22 = OV5_FEAT(OV5_DRC_INFO),
},
/* option vector 6: IBM PAPR hints */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index f52ad5bb7109..ca72d7391d40 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -35,6 +35,7 @@
#include <linux/context_tracking.h>
#include <linux/uaccess.h>
+#include <linux/pkeys.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/switch_to.h>
@@ -283,6 +284,18 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
if (regno == PT_DSCR)
return get_user_dscr(task, data);
+#ifdef CONFIG_PPC64
+ /*
+ * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
+ * no more used as a flag, lets force usr to alway see the softe value as 1
+ * which means interrupts are not soft disabled.
+ */
+ if (regno == PT_SOFTE) {
+ *data = 1;
+ return 0;
+ }
+#endif
+
if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
*data = ((unsigned long *)task->thread.regs)[regno];
return 0;
@@ -1775,6 +1788,61 @@ static int pmu_set(struct task_struct *target,
return ret;
}
#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+static int pkey_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pkey_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr));
+ BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor));
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.amr, 0,
+ ELF_NPKEY * sizeof(unsigned long));
+}
+
+static int pkey_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 new_amr;
+ int ret;
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ /* Only the AMR can be set from userspace */
+ if (pos != 0 || count != sizeof(new_amr))
+ return -EINVAL;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &new_amr, 0, sizeof(new_amr));
+ if (ret)
+ return ret;
+
+ /* UAMOR determines which bits of the AMR can be set from userspace. */
+ target->thread.amr = (new_amr & target->thread.uamor) |
+ (target->thread.amr & ~target->thread.uamor);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
/*
* These are our native regset flavors.
*/
@@ -1809,6 +1877,9 @@ enum powerpc_regset {
REGSET_EBB, /* EBB registers */
REGSET_PMR, /* Performance Monitor Registers */
#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ REGSET_PKEY, /* AMR register */
+#endif
};
static const struct user_regset native_regsets[] = {
@@ -1914,6 +1985,13 @@ static const struct user_regset native_regsets[] = {
.active = pmu_active, .get = pmu_get, .set = pmu_set
},
#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ [REGSET_PKEY] = {
+ .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pkey_active, .get = pkey_get, .set = pkey_set
+ },
+#endif
};
static const struct user_regset_view user_ppc_native_view = {
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index c8c5f3a550c2..fb070d8cad07 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -261,19 +261,19 @@ static int __init proc_rtas_init(void)
if (rtas_node == NULL)
return -ENODEV;
- proc_create("powerpc/rtas/progress", S_IRUGO|S_IWUSR, NULL,
+ proc_create("powerpc/rtas/progress", 0644, NULL,
&ppc_rtas_progress_operations);
- proc_create("powerpc/rtas/clock", S_IRUGO|S_IWUSR, NULL,
+ proc_create("powerpc/rtas/clock", 0644, NULL,
&ppc_rtas_clock_operations);
- proc_create("powerpc/rtas/poweron", S_IWUSR|S_IRUGO, NULL,
+ proc_create("powerpc/rtas/poweron", 0644, NULL,
&ppc_rtas_poweron_operations);
- proc_create("powerpc/rtas/sensors", S_IRUGO, NULL,
+ proc_create("powerpc/rtas/sensors", 0444, NULL,
&ppc_rtas_sensors_operations);
- proc_create("powerpc/rtas/frequency", S_IWUSR|S_IRUGO, NULL,
+ proc_create("powerpc/rtas/frequency", 0644, NULL,
&ppc_rtas_tone_freq_operations);
- proc_create("powerpc/rtas/volume", S_IWUSR|S_IRUGO, NULL,
+ proc_create("powerpc/rtas/volume", 0644, NULL,
&ppc_rtas_tone_volume_operations);
- proc_create("powerpc/rtas/rmo_buffer", S_IRUSR, NULL,
+ proc_create("powerpc/rtas/rmo_buffer", 0400, NULL,
&ppc_rtas_rmo_buf_ops);
return 0;
}
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index f6f6a8a5103a..10fabae2574d 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -727,7 +727,7 @@ static int __init rtas_flash_init(void)
const struct rtas_flash_file *f = &rtas_flash_files[i];
int token;
- if (!proc_create(f->filename, S_IRUSR | S_IWUSR, NULL, &f->fops))
+ if (!proc_create(f->filename, 0600, NULL, &f->fops))
goto enomem;
/*
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 1da8b7d8c6ca..fc600a8b1e77 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -581,7 +581,7 @@ static int __init rtas_init(void)
if (!rtas_log_buf)
return -ENODEV;
- entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL,
+ entry = proc_create("powerpc/rtas/error_log", 0400, NULL,
&proc_rtas_log_operations);
if (!entry)
printk(KERN_ERR "Failed to create error_log proc entry\n");
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 3f33869c6486..d73ec518ef80 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -346,10 +346,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
loops_per_jiffy / (500000/HZ),
(loops_per_jiffy / (5000/HZ)) % 100);
#endif
-
-#ifdef CONFIG_SMP
seq_printf(m, "\n");
-#endif
+
/* If this is the last cpu, print the summary */
if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
show_cpuinfo_summary(m);
@@ -379,10 +377,10 @@ static void c_stop(struct seq_file *m, void *v)
}
const struct seq_operations cpuinfo_op = {
- .start =c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo,
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
};
void __init check_for_initrd(void)
@@ -459,13 +457,13 @@ static void __init cpu_init_thread_core_maps(int tpc)
*/
void __init smp_setup_cpu_maps(void)
{
- struct device_node *dn = NULL;
+ struct device_node *dn;
int cpu = 0;
int nthreads = 1;
DBG("smp_setup_cpu_maps()\n");
- while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) {
+ for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
__be32 cpu_be;
int j, len;
@@ -505,6 +503,11 @@ void __init smp_setup_cpu_maps(void)
set_cpu_possible(cpu, true);
cpu++;
}
+
+ if (cpu >= nr_cpu_ids) {
+ of_node_put(dn);
+ break;
+ }
}
/* If no SMT supported, nthreads is forced to 1 */
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 21c18071d9d5..3fc11e30308f 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -51,6 +51,10 @@ void record_spr_defaults(void);
static inline void record_spr_defaults(void) { };
#endif
+#ifdef CONFIG_PPC64
+u64 ppc64_bolted_size(void);
+#endif
+
/*
* Having this in kvm_ppc.h makes include dependencies too
* tricky to solve for setup-common.c so have it here.
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e67413f4a8f0..c388cc3357fa 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -10,8 +10,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#define DEBUG
-
#include <linux/export.h>
#include <linux/string.h>
#include <linux/sched.h>
@@ -69,6 +67,7 @@
#include <asm/livepatch.h>
#include <asm/opal.h>
#include <asm/cputhreads.h>
+#include <asm/hw_irq.h>
#include "setup.h"
@@ -190,6 +189,8 @@ static void __init fixup_boot_paca(void)
get_paca()->cpu_start = 1;
/* Allow percpu accesses to work until we setup percpu data */
get_paca()->data_offset = 0;
+ /* Mark interrupts disabled in PACA */
+ irq_soft_mask_set(IRQS_DISABLED);
}
static void __init configure_exceptions(void)
@@ -352,7 +353,7 @@ void __init early_setup(unsigned long dt_ptr)
void early_setup_secondary(void)
{
/* Mark interrupts disabled in PACA */
- get_paca()->soft_enabled = 0;
+ irq_soft_mask_set(IRQS_DISABLED);
/* Initialize the hash table or TLB handling */
early_init_mmu_secondary();
@@ -568,25 +569,31 @@ void __init initialize_cache_info(void)
DBG(" <- initialize_cache_info()\n");
}
-/* This returns the limit below which memory accesses to the linear
- * mapping are guarnateed not to cause a TLB or SLB miss. This is
- * used to allocate interrupt or emergency stacks for which our
- * exception entry path doesn't deal with being interrupted.
+/*
+ * This returns the limit below which memory accesses to the linear
+ * mapping are guarnateed not to cause an architectural exception (e.g.,
+ * TLB or SLB miss fault).
+ *
+ * This is used to allocate PACAs and various interrupt stacks that
+ * that are accessed early in interrupt handlers that must not cause
+ * re-entrant interrupts.
*/
-static __init u64 safe_stack_limit(void)
+__init u64 ppc64_bolted_size(void)
{
#ifdef CONFIG_PPC_BOOK3E
/* Freescale BookE bolts the entire linear mapping */
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+ /* XXX: BookE ppc64_rma_limit setup seems to disagree? */
+ if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E))
return linear_map_top;
/* Other BookE, we assume the first GB is bolted */
return 1ul << 30;
#else
+ /* BookS radix, does not take faults on linear mapping */
if (early_radix_enabled())
return ULONG_MAX;
- /* BookS, the first segment is bolted */
- if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ /* BookS hash, the first segment is bolted */
+ if (early_mmu_has_feature(MMU_FTR_1T_SEGMENT))
return 1UL << SID_SHIFT_1T;
return 1UL << SID_SHIFT;
#endif
@@ -594,7 +601,7 @@ static __init u64 safe_stack_limit(void)
void __init irqstack_early_init(void)
{
- u64 limit = safe_stack_limit();
+ u64 limit = ppc64_bolted_size();
unsigned int i;
/*
@@ -679,7 +686,7 @@ void __init emergency_stack_init(void)
* initialized in kernel/irq.c. These are initialized here in order
* to have emergency stacks available as early as possible.
*/
- limit = min(safe_stack_limit(), ppc64_rma_size);
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
for_each_possible_cpu(i) {
struct thread_info *ti;
@@ -857,7 +864,7 @@ static void init_fallback_flush(void)
int cpu;
l1d_size = ppc64_caches.l1d.size;
- limit = min(safe_stack_limit(), ppc64_rma_size);
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
/*
* Align to L1d size, and size it at 2x L1d size, to catch possible
@@ -868,19 +875,8 @@ static void init_fallback_flush(void)
memset(l1d_flush_fallback_area, 0, l1d_size * 2);
for_each_possible_cpu(cpu) {
- /*
- * The fallback flush is currently coded for 8-way
- * associativity. Different associativity is possible, but it
- * will be treated as 8-way and may not evict the lines as
- * effectively.
- *
- * 128 byte lines are mandatory.
- */
- u64 c = l1d_size / 8;
-
paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
- paca[cpu].l1d_flush_congruence = c;
- paca[cpu].l1d_flush_sets = c / 128;
+ paca[cpu].l1d_flush_size = l1d_size;
}
}
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index aded81169648..a46de0035214 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -111,12 +111,20 @@ static inline int save_general_regs(struct pt_regs *regs,
{
elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
int i;
+ /* Force usr to alway see softe as 1 (interrupts enabled) */
+ elf_greg_t64 softe = 0x1;
WARN_ON(!FULL_REGS(regs));
for (i = 0; i <= PT_RESULT; i ++) {
if (i == 14 && !FULL_REGS(regs))
i = 32;
+ if ( i == PT_SOFTE) {
+ if(__put_user((unsigned int)softe, &frame->mc_gregs[i]))
+ return -EFAULT;
+ else
+ continue;
+ }
if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
return -EFAULT;
}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 4b9ca3570344..720117690822 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -110,6 +110,8 @@ static long setup_sigcontext(struct sigcontext __user *sc,
struct pt_regs *regs = tsk->thread.regs;
unsigned long msr = regs->msr;
long err = 0;
+ /* Force usr to alway see softe as 1 (interrupts enabled) */
+ unsigned long softe = 0x1;
BUG_ON(tsk != current);
@@ -169,6 +171,7 @@ static long setup_sigcontext(struct sigcontext __user *sc,
WARN_ON(!FULL_REGS(regs));
err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
+ err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]);
err |= __put_user(signr, &sc->signal);
err |= __put_user(handler, &sc->handler);
if (set != NULL)
@@ -207,7 +210,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc);
#endif
struct pt_regs *regs = tsk->thread.regs;
- unsigned long msr = tsk->thread.ckpt_regs.msr;
+ unsigned long msr = tsk->thread.regs->msr;
long err = 0;
BUG_ON(tsk != current);
@@ -216,6 +219,12 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
WARN_ON(tm_suspend_disabled);
+ /* Restore checkpointed FP, VEC, and VSX bits from ckpt_regs as
+ * it contains the correct FP, VEC, VSX state after we treclaimed
+ * the transaction and giveup_all() was called on reclaiming.
+ */
+ msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX);
+
/* Remove TM bits from thread's MSR. The MSR in the sigcontext
* just indicates to userland that we were doing a transaction, but we
* don't want to return in transactional state. This also ensures
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e0a4c1f82e25..bbe7634b3a43 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -543,7 +543,25 @@ void smp_send_debugger_break(void)
#ifdef CONFIG_KEXEC_CORE
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
+ int cpu;
+
smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
+ if (kdump_in_progress() && crash_wake_offline) {
+ for_each_present_cpu(cpu) {
+ if (cpu_online(cpu))
+ continue;
+ /*
+ * crash_ipi_callback will wait for
+ * all cpus, including offline CPUs.
+ * We don't care about nmi_ipi_function.
+ * Offline cpus will jump straight into
+ * crash_ipi_callback, we can skip the
+ * entire NMI dance and waiting for
+ * cpus to clear pending mask, etc.
+ */
+ do_smp_send_nmi_ipi(cpu);
+ }
+ }
}
#endif
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index b8d4a1dac39f..5a8bfee6e187 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
SYSFS_SPRSETUP(purr, SPRN_PURR);
SYSFS_SPRSETUP(spurr, SPRN_SPURR);
SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(tscr, SPRN_TSCR);
/*
Lets only enable read for phyp resources and
@@ -495,6 +496,7 @@ static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
static DEVICE_ATTR(pir, 0400, show_pir, NULL);
+static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr);
/*
* This is the system wide DSCR register default value. Any
@@ -785,6 +787,9 @@ static int register_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_create_file(s, &dev_attr_pir);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ device_create_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_PPC_FSL_BOOK3E
@@ -867,6 +872,9 @@ static int unregister_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_remove_file(s, &dev_attr_pir);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ device_remove_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_PPC_FSL_BOOK3E
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index fe6f3a285455..a32823dcd9a4 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -244,7 +244,7 @@ static u64 scan_dispatch_log(u64 stop_tb)
void accumulate_stolen_time(void)
{
u64 sst, ust;
- u8 save_soft_enabled = local_paca->soft_enabled;
+ unsigned long save_irq_soft_mask = irq_soft_mask_return();
struct cpu_accounting_data *acct = &local_paca->accounting;
/* We are called early in the exception entry, before
@@ -253,7 +253,7 @@ void accumulate_stolen_time(void)
* needs to reflect that so various debug stuff doesn't
* complain
*/
- local_paca->soft_enabled = 0;
+ irq_soft_mask_set(IRQS_DISABLED);
sst = scan_dispatch_log(acct->starttime_user);
ust = scan_dispatch_log(acct->starttime);
@@ -261,7 +261,7 @@ void accumulate_stolen_time(void)
acct->utime -= ust;
acct->steal_time += ust + sst;
- local_paca->soft_enabled = save_soft_enabled;
+ irq_soft_mask_set(save_irq_soft_mask);
}
static inline u64 calculate_stolen_time(u64 stop_tb)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index c93f1e6a9fff..1e48d157196a 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -20,6 +20,7 @@
#include <linux/sched/debug.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/pkeys.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
@@ -38,6 +39,8 @@
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
#include <linux/smp.h>
+#include <linux/console.h>
+#include <linux/kmsg_dump.h>
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
@@ -142,6 +145,28 @@ static int die_owner = -1;
static unsigned int die_nest_count;
static int die_counter;
+extern void panic_flush_kmsg_start(void)
+{
+ /*
+ * These are mostly taken from kernel/panic.c, but tries to do
+ * relatively minimal work. Don't use delay functions (TB may
+ * be broken), don't crash dump (need to set a firmware log),
+ * don't run notifiers. We do want to get some information to
+ * Linux console.
+ */
+ console_verbose();
+ bust_spinlocks(1);
+}
+
+extern void panic_flush_kmsg_end(void)
+{
+ printk_safe_flush_on_panic();
+ kmsg_dump(KMSG_DUMP_PANIC);
+ bust_spinlocks(0);
+ debug_locks_off();
+ console_flush_on_panic();
+}
+
static unsigned long oops_begin(struct pt_regs *regs)
{
int cpu;
@@ -266,7 +291,9 @@ void user_single_step_siginfo(struct task_struct *tsk,
info->si_addr = (void __user *)regs->nip;
}
-void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+
+void _exception_pkey(int signr, struct pt_regs *regs, int code,
+ unsigned long addr, int key)
{
siginfo_t info;
const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \
@@ -289,13 +316,27 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
local_irq_enable();
current->thread.trap_nr = code;
+
+ /*
+ * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need
+ * to capture the content, if the task gets killed.
+ */
+ thread_pkey_regs_save(&current->thread);
+
memset(&info, 0, sizeof(info));
info.si_signo = signr;
info.si_code = code;
info.si_addr = (void __user *) addr;
+ info.si_pkey = key;
+
force_sig_info(signr, &info, current);
}
+void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+{
+ _exception_pkey(signr, regs, code, addr, 0);
+}
+
void system_reset_exception(struct pt_regs *regs)
{
/*
@@ -337,7 +378,7 @@ void system_reset_exception(struct pt_regs *regs)
* No debugger or crash dump registered, print logs then
* panic.
*/
- __die("System Reset", regs, SIGABRT);
+ die("System Reset", regs, SIGABRT);
mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
@@ -1564,7 +1605,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
u8 status;
bool hv;
- hv = (regs->trap == 0xf80);
+ hv = (TRAP(regs) == 0xf80);
if (hv)
value = mfspr(SPRN_HFSCR);
else
@@ -2113,13 +2154,13 @@ static int __init ppc_warn_emulated_init(void)
if (!dir)
return -ENOMEM;
- d = debugfs_create_u32("do_warn", S_IRUGO | S_IWUSR, dir,
+ d = debugfs_create_u32("do_warn", 0644, dir,
&ppc_warn_emulated);
if (!d)
goto fail;
for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) {
- d = debugfs_create_u32(entries[i].name, S_IRUGO | S_IWUSR, dir,
+ d = debugfs_create_u32(entries[i].name, 0644, dir,
(u32 *)&entries[i].val.counter);
if (!d)
goto fail;
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 382021324883..c002adcc694c 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -64,6 +64,12 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
cmpwi cr0,r3,CLOCK_REALTIME
cmpwi cr1,r3,CLOCK_MONOTONIC
cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+
+ cmpwi cr5,r3,CLOCK_REALTIME_COARSE
+ cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE
+ cror cr5*4+eq,cr5*4+eq,cr6*4+eq
+
+ cror cr0*4+eq,cr0*4+eq,cr5*4+eq
bne cr0,99f
mflr r12 /* r12 saves lr */
@@ -72,6 +78,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
bl V_LOCAL_FUNC(__get_datapage) /* get data page */
lis r7,NSEC_PER_SEC@h /* want nanoseconds */
ori r7,r7,NSEC_PER_SEC@l
+ beq cr5,70f
50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
bne cr1,80f /* if not monotonic, all done */
@@ -97,19 +104,57 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
ld r0,CFG_TB_UPDATE_COUNT(r3)
cmpld cr0,r0,r8 /* check if updated */
bne- 50b
+ b 78f
- /* Add wall->monotonic offset and check for overflow or underflow.
+ /*
+ * For coarse clocks we get data directly from the vdso data page, so
+ * we don't need to call __do_get_tspec, but we still need to do the
+ * counter trick.
*/
- add r4,r4,r6
- add r5,r5,r9
- cmpd cr0,r5,r7
- cmpdi cr1,r5,0
- blt 1f
- subf r5,r7,r5
- addi r4,r4,1
-1: bge cr1,80f
- addi r4,r4,-1
- add r5,r5,r7
+70: ld r8,CFG_TB_UPDATE_COUNT(r3)
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- 70b
+ add r3,r3,r0 /* r0 is already 0 */
+
+ /*
+ * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
+ * too
+ */
+ ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+ ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
+ bne cr6,75f
+
+ /* CLOCK_MONOTONIC_COARSE */
+ lwa r6,WTOM_CLOCK_SEC(r3)
+ lwa r9,WTOM_CLOCK_NSEC(r3)
+
+ /* check if counter has updated */
+ or r0,r6,r9
+75: or r0,r0,r4
+ or r0,r0,r5
+ xor r0,r0,r0
+ add r3,r3,r0
+ ld r0,CFG_TB_UPDATE_COUNT(r3)
+ cmpld cr0,r0,r8 /* check if updated */
+ bne- 70b
+
+ /* Counter has not updated, so continue calculating proper values for
+ * sec and nsec if monotonic coarse, or just return with the proper
+ * values for realtime.
+ */
+ bne cr6,80f
+
+ /* Add wall->monotonic offset and check for overflow or underflow */
+78: add r4,r4,r6
+ add r5,r5,r9
+ cmpd cr0,r5,r7
+ cmpdi cr1,r5,0
+ blt 79f
+ subf r5,r7,r5
+ addi r4,r4,1
+79: bge cr1,80f
+ addi r4,r4,-1
+ add r5,r5,r7
80: std r4,TSPC64_TV_SEC(r11)
std r5,TSPC64_TV_NSEC(r11)
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 74901a87bf7a..c8af90ff49f0 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -273,6 +273,7 @@ SECTIONS
#ifdef CONFIG_PPC32
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
+ *(.data.rel*)
*(.sdata)
*(.sdata2)
*(.got.plt) *(.got)
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 87da80ccced1..6256dc3b0087 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -6,6 +6,9 @@
*
* This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
*/
+
+#define pr_fmt(fmt) "watchdog: " fmt
+
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/init.h>
@@ -26,15 +29,45 @@
#include <asm/paca.h>
/*
- * The watchdog has a simple timer that runs on each CPU, once per timer
- * period. This is the heartbeat.
+ * The powerpc watchdog ensures that each CPU is able to service timers.
+ * The watchdog sets up a simple timer on each CPU to run once per timer
+ * period, and updates a per-cpu timestamp and a "pending" cpumask. This is
+ * the heartbeat.
+ *
+ * Then there are two systems to check that the heartbeat is still running.
+ * The local soft-NMI, and the SMP checker.
+ *
+ * The soft-NMI checker can detect lockups on the local CPU. When interrupts
+ * are disabled with local_irq_disable(), platforms that use soft-masking
+ * can leave hardware interrupts enabled and handle them with a masked
+ * interrupt handler. The masked handler can send the timer interrupt to the
+ * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI
+ * interrupt, and can be used to detect CPUs stuck with IRQs disabled.
+ *
+ * The soft-NMI checker will compare the heartbeat timestamp for this CPU
+ * with the current time, and take action if the difference exceeds the
+ * watchdog threshold.
+ *
+ * The limitation of the soft-NMI watchdog is that it does not work when
+ * interrupts are hard disabled or otherwise not being serviced. This is
+ * solved by also having a SMP watchdog where all CPUs check all other
+ * CPUs heartbeat.
*
- * Then there are checks to see if the heartbeat has not triggered on a CPU
- * for the panic timeout period. Currently the watchdog only supports an
- * SMP check, so the heartbeat only turns on when we have 2 or more CPUs.
+ * The SMP checker can detect lockups on other CPUs. A gobal "pending"
+ * cpumask is kept, containing all CPUs which enable the watchdog. Each
+ * CPU clears their pending bit in their heartbeat timer. When the bitmask
+ * becomes empty, the last CPU to clear its pending bit updates a global
+ * timestamp and refills the pending bitmask.
*
- * This is not an NMI watchdog, but Linux uses that name for a generic
- * watchdog in some cases, so NMI gets used in some places.
+ * In the heartbeat timer, if any CPU notices that the global timestamp has
+ * not been updated for a period exceeding the watchdog threshold, then it
+ * means the CPU(s) with their bit still set in the pending mask have had
+ * their heartbeat stop, and action is taken.
+ *
+ * Some platforms implement true NMI IPIs, which can by used by the SMP
+ * watchdog to detect an unresponsive CPU and pull it out of its stuck
+ * state with the NMI IPI, to get crash/debug data from it. This way the
+ * SMP watchdog can detect hardware interrupts off lockups.
*/
static cpumask_t wd_cpus_enabled __read_mostly;
@@ -47,19 +80,7 @@ static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
static DEFINE_PER_CPU(struct timer_list, wd_timer);
static DEFINE_PER_CPU(u64, wd_timer_tb);
-/*
- * These are for the SMP checker. CPUs clear their pending bit in their
- * heartbeat. If the bitmask becomes empty, the time is noted and the
- * bitmask is refilled.
- *
- * All CPUs clear their bit in the pending mask every timer period.
- * Once all have cleared, the time is noted and the bits are reset.
- * If the time since all clear was greater than the panic timeout,
- * we can panic with the list of stuck CPUs.
- *
- * This will work best with NMI IPIs for crash code so the stuck CPUs
- * can be pulled out to get their backtraces.
- */
+/* SMP checker bits */
static unsigned long __wd_smp_lock;
static cpumask_t wd_smp_cpus_pending;
static cpumask_t wd_smp_cpus_stuck;
@@ -90,7 +111,7 @@ static inline void wd_smp_unlock(unsigned long *flags)
static void wd_lockup_ipi(struct pt_regs *regs)
{
- pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id());
+ pr_emerg("CPU %d Hard LOCKUP\n", raw_smp_processor_id());
print_modules();
print_irqtrace_events(current);
if (regs)
@@ -131,8 +152,8 @@ static void watchdog_smp_panic(int cpu, u64 tb)
if (cpumask_weight(&wd_smp_cpus_pending) == 0)
goto out;
- pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
- cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+ pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
+ cpu, cpumask_pr_args(&wd_smp_cpus_pending));
if (!sysctl_hardlockup_all_cpu_backtrace) {
/*
@@ -175,7 +196,7 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
unsigned long flags;
- pr_emerg("Watchdog CPU:%d became unstuck\n", cpu);
+ pr_emerg("CPU %d became unstuck\n", cpu);
wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
wd_smp_unlock(&flags);
@@ -233,13 +254,10 @@ void soft_nmi_interrupt(struct pt_regs *regs)
}
set_cpu_stuck(cpu, tb);
- pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu);
+ pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", cpu, (void *)regs->nip);
print_modules();
print_irqtrace_events(current);
- if (regs)
- show_regs(regs);
- else
- dump_stack();
+ show_regs(regs);
wd_smp_unlock(&flags);
@@ -388,30 +406,8 @@ int __init watchdog_nmi_probe(void)
"powerpc/watchdog:online",
start_wd_on_cpu, stop_wd_on_cpu);
if (err < 0) {
- pr_warn("Watchdog could not be initialized");
+ pr_warn("could not be initialized");
return err;
}
return 0;
}
-
-static void handle_backtrace_ipi(struct pt_regs *regs)
-{
- nmi_cpu_backtrace(regs);
-}
-
-static void raise_backtrace_ipi(cpumask_t *mask)
-{
- unsigned int cpu;
-
- for_each_cpu(cpu, mask) {
- if (cpu == smp_processor_id())
- handle_backtrace_ipi(NULL);
- else
- smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000);
- }
-}
-
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
-{
- nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
-}