diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-inject.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 67 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/threshold.c | 24 |
6 files changed, 85 insertions, 42 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index ddc72f839332..5ac2d1fb28bc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -153,7 +153,7 @@ static void raise_mce(struct mce *m) return; #ifdef CONFIG_X86_LOCAL_APIC - if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) { + if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) { unsigned long start; int cpu; @@ -167,7 +167,7 @@ static void raise_mce(struct mce *m) cpumask_clear_cpu(cpu, mce_inject_cpumask); } if (!cpumask_empty(mce_inject_cpumask)) { - if (m->inject_flags & MCJ_IRQ_BRAODCAST) { + if (m->inject_flags & MCJ_IRQ_BROADCAST) { /* * don't wait because mce_irq_ipi is necessary * to be sync with following raise_local diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index beb1f1689e52..e2703520d120 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -110,22 +110,17 @@ static struct severity { /* known AR MCACODs: */ #ifdef CONFIG_MEMORY_FAILURE MCESEV( - KEEP, "HT thread notices Action required: data load error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), - MCGMASK(MCG_STATUS_EIPV, 0) + KEEP, "Action required but unaffected thread is continuable", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR), + MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV) ), MCESEV( - AR, "Action required: data load error", + AR, "Action required: data load error in a user process", SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), USER ), MCESEV( - KEEP, "HT thread notices Action required: instruction fetch error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), - MCGMASK(MCG_STATUS_EIPV, 0) - ), - MCESEV( - AR, "Action required: instruction fetch error", + AR, "Action required: instruction fetch error in a user process", SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), USER ), diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9239504b41cb..bf49cdbb010f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -89,7 +89,10 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; -/* MCA banks polled by the period polling timer for corrected events */ +/* + * MCA banks polled by the period polling timer for corrected events. + * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). + */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL }; diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index ae1697c2afe3..d56405309dc1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -24,6 +24,18 @@ * Also supports reliable discovery of shared banks. */ +/* + * CMCI can be delivered to multiple cpus that share a machine check bank + * so we need to designate a single cpu to process errors logged in each bank + * in the interrupt handler (otherwise we would have many races and potential + * double reporting of the same error). + * Note that this can change when a cpu is offlined or brought online since + * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() + * disables CMCI on all banks owned by the cpu and clears this bitfield. At + * this point, cmci_rediscover() kicks in and a different cpu may end up + * taking ownership of some of the shared MCA banks that were previously + * owned by the offlined cpu. + */ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); /* diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 47a1870279aa..98f2083832eb 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -29,6 +29,7 @@ #include <asm/idle.h> #include <asm/mce.h> #include <asm/msr.h> +#include <asm/trace/irq_vectors.h> /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) @@ -181,11 +182,6 @@ static int therm_throt_process(bool new_event, int event, int level) this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); - else - printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n", - this_cpu, - level == CORE_LEVEL ? "Core" : "Package", - state->count); return 1; } if (old_event) { @@ -193,10 +189,6 @@ static int therm_throt_process(bool new_event, int event, int level) printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package"); - else - printk(KERN_INFO "CPU%d: %s power limit normal\n", - this_cpu, - level == CORE_LEVEL ? "Core" : "Package"); return 1; } @@ -219,6 +211,15 @@ static int thresh_event_valid(int event) return 1; } +static bool int_pln_enable; +static int __init int_pln_enable_setup(char *s) +{ + int_pln_enable = true; + + return 1; +} +__setup("int_pln_enable", int_pln_enable_setup); + #ifdef CONFIG_SYSFS /* Add/Remove thermal_throttle interface for CPU device: */ static __cpuinit int thermal_throttle_add_dev(struct device *dev, @@ -231,7 +232,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev, if (err) return err; - if (cpu_has(c, X86_FEATURE_PLN)) + if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) err = sysfs_add_file_to_group(&dev->kobj, &dev_attr_core_power_limit_count.attr, thermal_attr_group.name); @@ -239,7 +240,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev, err = sysfs_add_file_to_group(&dev->kobj, &dev_attr_package_throttle_count.attr, thermal_attr_group.name); - if (cpu_has(c, X86_FEATURE_PLN)) + if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) err = sysfs_add_file_to_group(&dev->kobj, &dev_attr_package_power_limit_count.attr, thermal_attr_group.name); @@ -352,7 +353,7 @@ static void intel_thermal_interrupt(void) CORE_LEVEL) != 0) mce_log_therm_throt_event(msr_val); - if (this_cpu_has(X86_FEATURE_PLN)) + if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable) therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, CORE_LEVEL); @@ -362,7 +363,7 @@ static void intel_thermal_interrupt(void) therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, THERMAL_THROTTLING_EVENT, PACKAGE_LEVEL); - if (this_cpu_has(X86_FEATURE_PLN)) + if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable) therm_throt_process(msr_val & PACKAGE_THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, @@ -378,15 +379,26 @@ static void unexpected_thermal_interrupt(void) static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; -asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) +static inline void __smp_thermal_interrupt(void) { - irq_enter(); - exit_idle(); inc_irq_stat(irq_thermal_count); smp_thermal_vector(); - irq_exit(); - /* Ack only at the end to avoid potential reentry */ - ack_APIC_irq(); +} + +asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) +{ + entering_irq(); + __smp_thermal_interrupt(); + exiting_ack_irq(); +} + +asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs) +{ + entering_irq(); + trace_thermal_apic_entry(THERMAL_APIC_VECTOR); + __smp_thermal_interrupt(); + trace_thermal_apic_exit(THERMAL_APIC_VECTOR); + exiting_ack_irq(); } /* Thermal monitoring depends on APIC, ACPI and clock modulation */ @@ -470,9 +482,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c) apic_write(APIC_LVTTHMR, h); rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); - if (cpu_has(c, X86_FEATURE_PLN)) + if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable) + wrmsr(MSR_IA32_THERM_INTERRUPT, + (l | (THERM_INT_LOW_ENABLE + | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h); + else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) wrmsr(MSR_IA32_THERM_INTERRUPT, - l | (THERM_INT_LOW_ENABLE + l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); else wrmsr(MSR_IA32_THERM_INTERRUPT, @@ -480,9 +496,14 @@ void intel_init_thermal(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_PTS)) { rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); - if (cpu_has(c, X86_FEATURE_PLN)) + if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable) wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, - l | (PACKAGE_THERM_INT_LOW_ENABLE + (l | (PACKAGE_THERM_INT_LOW_ENABLE + | PACKAGE_THERM_INT_HIGH_ENABLE)) + & ~PACKAGE_THERM_INT_PLN_ENABLE, h); + else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) + wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, + l | (PACKAGE_THERM_INT_LOW_ENABLE | PACKAGE_THERM_INT_HIGH_ENABLE | PACKAGE_THERM_INT_PLN_ENABLE), h); else diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index aa578cadb940..fe6b1c86645b 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -8,6 +8,7 @@ #include <asm/apic.h> #include <asm/idle.h> #include <asm/mce.h> +#include <asm/trace/irq_vectors.h> static void default_threshold_interrupt(void) { @@ -17,13 +18,24 @@ static void default_threshold_interrupt(void) void (*mce_threshold_vector)(void) = default_threshold_interrupt; -asmlinkage void smp_threshold_interrupt(void) +static inline void __smp_threshold_interrupt(void) { - irq_enter(); - exit_idle(); inc_irq_stat(irq_threshold_count); mce_threshold_vector(); - irq_exit(); - /* Ack only at the end to avoid potential reentry */ - ack_APIC_irq(); +} + +asmlinkage void smp_threshold_interrupt(void) +{ + entering_irq(); + __smp_threshold_interrupt(); + exiting_ack_irq(); +} + +asmlinkage void smp_trace_threshold_interrupt(void) +{ + entering_irq(); + trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); + __smp_threshold_interrupt(); + trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR); + exiting_ack_irq(); } |