diff options
Diffstat (limited to 'arch/x86/kernel/apic/apic.c')
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 308 |
1 files changed, 197 insertions, 111 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f5291362da1a..2b0faf86da1b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -65,10 +65,10 @@ unsigned int num_processors; unsigned disabled_cpus; /* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; +unsigned int boot_cpu_physical_apicid __ro_after_init = -1U; EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); -u8 boot_cpu_apic_version; +u8 boot_cpu_apic_version __ro_after_init; /* * The highest APIC ID seen during enumeration. @@ -85,13 +85,13 @@ physid_mask_t phys_cpu_present_map; * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to * avoid undefined behaviour caused by sending INIT from AP to BSP. */ -static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID; +static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID; /* * This variable controls which CPUs receive external NMIs. By default, * external NMIs are delivered only to the BSP. */ -static int apic_extnmi = APIC_EXTNMI_BSP; +static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP; /* * Map cpu index to physical APIC ID @@ -114,7 +114,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); /* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; +static int enabled_via_apicbase __ro_after_init; /* * Handle interrupt mode configuration register (IMCR). @@ -172,23 +172,23 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif -unsigned long mp_lapic_addr; -int disable_apic; +unsigned long mp_lapic_addr __ro_after_init; +int disable_apic __ro_after_init; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __initdata; /* Local APIC timer works in C2 */ -int local_apic_timer_c2_ok; +int local_apic_timer_c2_ok __ro_after_init; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); /* * Debug level, exported for io_apic.c */ -int apic_verbosity; +int apic_verbosity __ro_after_init; -int pic_mode; +int pic_mode __ro_after_init; /* Have we found an MP table */ -int smp_found_config; +int smp_found_config __ro_after_init; static struct resource lapic_resource = { .name = "Local APIC", @@ -199,7 +199,7 @@ unsigned int lapic_timer_period = 0; static void apic_pm_activate(void); -static unsigned long apic_phys; +static unsigned long apic_phys __ro_after_init; /* * Get the LAPIC version @@ -590,21 +590,21 @@ static u32 skx_deadline_rev(void) static const struct x86_cpu_id deadline_match[] = { DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev), DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020), - DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev), + DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D, bdx_deadline_rev), DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE, 0x22), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT, 0x20), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_GT3E, 0x17), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL, 0x22), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L, 0x20), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G, 0x17), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_CORE, 0x25), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_GT3E, 0x17), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL, 0x25), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G, 0x17), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_MOBILE, 0xb2), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_DESKTOP, 0xb2), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L, 0xb2), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE, 0xb2), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_MOBILE, 0x52), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_DESKTOP, 0x52), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L, 0x52), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE, 0x52), {}, }; @@ -722,7 +722,7 @@ static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; /* - * Temporary interrupt handler. + * Temporary interrupt handler and polled calibration function. */ static void __init lapic_cal_handler(struct clock_event_device *dev) { @@ -834,6 +834,10 @@ bool __init apic_needs_pit(void) if (!boot_cpu_has(X86_FEATURE_APIC)) return true; + /* Virt guests may lack ARAT, but still have DEADLINE */ + if (!boot_cpu_has(X86_FEATURE_ARAT)) + return true; + /* Deadline timer is based on TSC so no further PIT action required */ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) return false; @@ -851,7 +855,8 @@ bool __init apic_needs_pit(void) static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = this_cpu_ptr(&lapic_events); - void (*real_handler)(struct clock_event_device *dev); + u64 tsc_perj = 0, tsc_start = 0; + unsigned long jif_start; unsigned long deltaj; long delta, deltatsc; int pm_referenced = 0; @@ -878,28 +883,64 @@ static int __init calibrate_APIC_clock(void) apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" "calibrating APIC timer ...\n"); + /* + * There are platforms w/o global clockevent devices. Instead of + * making the calibration conditional on that, use a polling based + * approach everywhere. + */ local_irq_disable(); - /* Replace the global interrupt handler */ - real_handler = global_clock_event->event_handler; - global_clock_event->event_handler = lapic_cal_handler; - /* * Setup the APIC counter to maximum. There is no way the lapic * can underflow in the 100ms detection time frame */ __setup_APIC_LVTT(0xffffffff, 0, 0); - /* Let the interrupts run */ + /* + * Methods to terminate the calibration loop: + * 1) Global clockevent if available (jiffies) + * 2) TSC if available and frequency is known + */ + jif_start = READ_ONCE(jiffies); + + if (tsc_khz) { + tsc_start = rdtsc(); + tsc_perj = div_u64((u64)tsc_khz * 1000, HZ); + } + + /* + * Enable interrupts so the tick can fire, if a global + * clockevent device is available + */ local_irq_enable(); - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) { + /* Wait for a tick to elapse */ + while (1) { + if (tsc_khz) { + u64 tsc_now = rdtsc(); + if ((tsc_now - tsc_start) >= tsc_perj) { + tsc_start += tsc_perj; + break; + } + } else { + unsigned long jif_now = READ_ONCE(jiffies); - local_irq_disable(); + if (time_after(jif_now, jif_start)) { + jif_start = jif_now; + break; + } + } + cpu_relax(); + } - /* Restore the real event handler */ - global_clock_event->event_handler = real_handler; + /* Invoke the calibration routine */ + local_irq_disable(); + lapic_cal_handler(NULL); + local_irq_enable(); + } + + local_irq_disable(); /* Build delta t1-t2 as apic timer counts down */ delta = lapic_cal_t1 - lapic_cal_t2; @@ -943,10 +984,11 @@ static int __init calibrate_APIC_clock(void) levt->features &= ~CLOCK_EVT_FEAT_DUMMY; /* - * PM timer calibration failed or not turned on - * so lets try APIC timer based calibration + * PM timer calibration failed or not turned on so lets try APIC + * timer based calibration, if a global clockevent device is + * available. */ - if (!pm_referenced) { + if (!pm_referenced && global_clock_event) { apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); /* @@ -1182,25 +1224,38 @@ void clear_local_APIC(void) } /** - * disable_local_APIC - clear and disable the local APIC + * apic_soft_disable - Clears and software disables the local APIC on hotplug + * + * Contrary to disable_local_APIC() this does not touch the enable bit in + * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC + * bus would require a hardware reset as the APIC would lose track of bus + * arbitration. On systems with FSB delivery APICBASE could be disabled, + * but it has to be guaranteed that no interrupt is sent to the APIC while + * in that state and it's not clear from the SDM whether it still responds + * to INIT/SIPI messages. Stay on the safe side and use software disable. */ -void disable_local_APIC(void) +void apic_soft_disable(void) { - unsigned int value; - - /* APIC hasn't been mapped yet */ - if (!x2apic_mode && !apic_phys) - return; + u32 value; clear_local_APIC(); - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ + /* Soft disable APIC (implies clearing of registers for 82489DX!). */ value = apic_read(APIC_SPIV); value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); +} + +/** + * disable_local_APIC - clear and disable the local APIC + */ +void disable_local_APIC(void) +{ + /* APIC hasn't been mapped yet */ + if (!x2apic_mode && !apic_phys) + return; + + apic_soft_disable(); #ifdef CONFIG_X86_32 /* @@ -1265,7 +1320,7 @@ void __init sync_Arb_IDs(void) APIC_INT_LEVELTRIG | APIC_DM_INIT); } -enum apic_intr_mode_id apic_intr_mode; +enum apic_intr_mode_id apic_intr_mode __ro_after_init; static int __init apic_intr_mode_select(void) { @@ -1453,54 +1508,72 @@ static void lapic_setup_esr(void) oldvalue, value); } -static void apic_pending_intr_clear(void) +#define APIC_IR_REGS APIC_ISR_NR +#define APIC_IR_BITS (APIC_IR_REGS * 32) +#define APIC_IR_MAPSIZE (APIC_IR_BITS / BITS_PER_LONG) + +union apic_ir { + unsigned long map[APIC_IR_MAPSIZE]; + u32 regs[APIC_IR_REGS]; +}; + +static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr) { - long long max_loops = cpu_khz ? cpu_khz : 1000000; - unsigned long long tsc = 0, ntsc; - unsigned int queued; - unsigned long value; - int i, j, acked = 0; + int i, bit; + + /* Read the IRRs */ + for (i = 0; i < APIC_IR_REGS; i++) + irr->regs[i] = apic_read(APIC_IRR + i * 0x10); + + /* Read the ISRs */ + for (i = 0; i < APIC_IR_REGS; i++) + isr->regs[i] = apic_read(APIC_ISR + i * 0x10); - if (boot_cpu_has(X86_FEATURE_TSC)) - tsc = rdtsc(); /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. - * - * Most probably by now CPU has serviced that pending interrupt and - * it might not have done the ack_APIC_irq() because it thought, - * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it - * does not clear the ISR bit and cpu thinks it has already serivced - * the interrupt. Hence a vector might get locked. It was noticed - * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + * If the ISR map is not empty. ACK the APIC and run another round + * to verify whether a pending IRR has been unblocked and turned + * into a ISR. */ - do { - queued = 0; - for (i = APIC_ISR_NR - 1; i >= 0; i--) - queued |= apic_read(APIC_IRR + i*0x10); - - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for_each_set_bit(j, &value, 32) { - ack_APIC_irq(); - acked++; - } - } - if (acked > 256) { - pr_err("LAPIC pending interrupts after %d EOI\n", acked); - break; - } - if (queued) { - if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { - ntsc = rdtsc(); - max_loops = (long long)cpu_khz << 10; - max_loops -= ntsc - tsc; - } else { - max_loops--; - } - } - } while (queued && max_loops > 0); - WARN_ON(max_loops <= 0); + if (!bitmap_empty(isr->map, APIC_IR_BITS)) { + /* + * There can be multiple ISR bits set when a high priority + * interrupt preempted a lower priority one. Issue an ACK + * per set bit. + */ + for_each_set_bit(bit, isr->map, APIC_IR_BITS) + ack_APIC_irq(); + return true; + } + + return !bitmap_empty(irr->map, APIC_IR_BITS); +} + +/* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now the CPU has serviced that pending interrupt and it + * might not have done the ack_APIC_irq() because it thought, interrupt + * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear + * the ISR bit and cpu thinks it has already serivced the interrupt. Hence + * a vector might get locked. It was noticed for timer irq (vector + * 0x31). Issue an extra EOI to clear ISR. + * + * If there are pending IRR bits they turn into ISR bits after a higher + * priority ISR bit has been acked. + */ +static void apic_pending_intr_clear(void) +{ + union apic_ir irr, isr; + unsigned int i; + + /* 512 loops are way oversized and give the APIC a chance to obey. */ + for (i = 0; i < 512; i++) { + if (!apic_check_and_ack(&irr, &isr)) + return; + } + /* Dump the IRR/ISR content if that failed */ + pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map); } /** @@ -1513,16 +1586,20 @@ static void setup_local_APIC(void) { int cpu = smp_processor_id(); unsigned int value; -#ifdef CONFIG_X86_32 - int logical_apicid, ldr_apicid; -#endif - if (disable_apic) { disable_ioapic_support(); return; } + /* + * If this comes from kexec/kcrash the APIC might be enabled in + * SPIV. Soft disable it before doing further initialization. + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_SPIV_APIC_ENABLED; + apic_write(APIC_SPIV, value); + #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (lapic_is_integrated() && apic->disable_esr) { @@ -1532,8 +1609,6 @@ static void setup_local_APIC(void) apic_write(APIC_ESR, 0); } #endif - perf_events_lapic_init(); - /* * Double-check whether this APIC is really registered. * This is meaningless in clustered apic mode, so we skip it. @@ -1548,26 +1623,35 @@ static void setup_local_APIC(void) apic->init_apic_ldr(); #ifdef CONFIG_X86_32 - /* - * APIC LDR is initialized. If logical_apicid mapping was - * initialized during get_smp_config(), make sure it matches the - * actual value. - */ - logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); - WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid); - /* always use the value from LDR */ - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; + if (apic->dest_logical) { + int logical_apicid, ldr_apicid; + + /* + * APIC LDR is initialized. If logical_apicid mapping was + * initialized during get_smp_config(), make sure it matches + * the actual value. + */ + logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); + ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); + if (logical_apicid != BAD_APICID) + WARN_ON(logical_apicid != ldr_apicid); + /* Always use the value from LDR. */ + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; + } #endif /* - * Set Task Priority to 'accept all'. We never change this - * later on. + * Set Task Priority to 'accept all except vectors 0-31'. An APIC + * vector in the 16-31 range could be delivered if TPR == 0, but we + * would think it's an exception and terrible things will happen. We + * never change this later on. */ value = apic_read(APIC_TASKPRI); value &= ~APIC_TPRI_MASK; + value |= 0x10; apic_write(APIC_TASKPRI, value); + /* Clear eventually stale ISR/IRR bits */ apic_pending_intr_clear(); /* @@ -1614,6 +1698,8 @@ static void setup_local_APIC(void) value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); + perf_events_lapic_init(); + /* * Set up LVT0, LVT1: * |