diff options
Diffstat (limited to 'arch/x86/kernel/apic')
| -rw-r--r-- | arch/x86/kernel/apic/Makefile | 8 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic.c | 851 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic_common.c | 35 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic_flat_64.c | 203 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic_noop.c | 93 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic_numachip.c | 88 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/bigsmp_32.c | 189 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/hw_nmi.c | 8 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/init.c | 110 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 928 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/ipi.c | 212 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/local.h | 19 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/msi.c | 18 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/probe_32.c | 128 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/probe_64.c | 18 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/vector.c | 383 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/x2apic_cluster.c | 158 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/x2apic_phys.c | 85 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/x2apic_savic.c | 428 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 453 |
20 files changed, 1922 insertions, 2493 deletions
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index a6fcaf16cdbf..581db89477f9 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -4,10 +4,10 @@ # # Leads to non-deterministic coverage that is not a function of syscall inputs. -# In particualr, smp_apic_timer_interrupt() is called in random places. +# In particular, smp_apic_timer_interrupt() is called in random places. KCOV_INSTRUMENT := n -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_common.o apic_noop.o ipi.o vector.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_common.o apic_noop.o ipi.o vector.o init.o obj-y += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o @@ -18,13 +18,11 @@ ifeq ($(CONFIG_X86_64),y) # APIC probe will depend on the listing order here obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o obj-$(CONFIG_X86_UV) += x2apic_uv_x.o +obj-$(CONFIG_AMD_SECURE_AVIC) += x2apic_savic.o obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o obj-y += apic_flat_64.o endif -# APIC probe will depend on the listing order here -obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o - # For 32bit, probe_32 need to be listed last obj-$(CONFIG_X86_LOCAL_APIC) += probe_$(BITS).o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 20d9a604da7c..d93f87f29d03 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -19,6 +19,7 @@ #include <linux/kernel_stat.h> #include <linux/mc146818rtc.h> #include <linux/acpi_pmtmr.h> +#include <linux/bitmap.h> #include <linux/clockchips.h> #include <linux/interrupt.h> #include <linux/memblock.h> @@ -35,6 +36,9 @@ #include <linux/dmi.h> #include <linux/smp.h> #include <linux/mm.h> +#include <linux/kvm_types.h> + +#include <xen/xen.h> #include <asm/trace/irq_vectors.h> #include <asm/irq_remapping.h> @@ -56,6 +60,7 @@ #include <asm/time.h> #include <asm/smp.h> #include <asm/mce.h> +#include <asm/msr.h> #include <asm/tsc.h> #include <asm/hypervisor.h> #include <asm/cpu_device_id.h> @@ -63,34 +68,15 @@ #include <asm/irq_regs.h> #include <asm/cpu.h> -unsigned int num_processors; - -unsigned disabled_cpus; +#include "local.h" /* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid __ro_after_init = -1U; +u32 boot_cpu_physical_apicid __ro_after_init = BAD_APICID; EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); u8 boot_cpu_apic_version __ro_after_init; /* - * The highest APIC ID seen during enumeration. - */ -static unsigned int max_physical_apicid; - -/* - * Bitmask of physically existing CPUs: - */ -physid_mask_t phys_cpu_present_map; - -/* - * Processor to be disabled specified by kernel parameter - * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to - * avoid undefined behaviour caused by sending INIT from AP to BSP. - */ -static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID; - -/* * This variable controls which CPUs receive external NMIs. By default, * external NMIs are delivered only to the BSP. */ @@ -101,26 +87,15 @@ static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP; */ static bool virt_ext_dest_id __ro_after_init; -/* - * Map cpu index to physical APIC ID - */ -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); - -#ifdef CONFIG_X86_32 +/* For parallel bootup. */ +unsigned long apic_mmio_base __ro_after_init; -/* - * On x86_32, the mapping between cpu and logical apicid may vary - * depending on apic in use. The following early percpu variable is - * used for the mapping. This is where the behaviors of x86_64 and 32 - * actually diverge. Let's keep it ugly for now. - */ -DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); +static inline bool apic_accessible(void) +{ + return x2apic_mode || apic_mmio_base; +} +#ifdef CONFIG_X86_32 /* Local APIC was disabled by the BIOS and enabled by the kernel */ static int enabled_via_apicbase __ro_after_init; @@ -176,8 +151,8 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif -unsigned long mp_lapic_addr __ro_after_init; -int disable_apic __ro_after_init; +static unsigned long mp_lapic_addr __ro_after_init; +bool apic_is_disabled __ro_after_init; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __initdata; /* Local APIC timer works in C2 */ @@ -199,12 +174,11 @@ static struct resource lapic_resource = { .flags = IORESOURCE_MEM | IORESOURCE_BUSY, }; +/* Measured in ticks per HZ. */ unsigned int lapic_timer_period = 0; static void apic_pm_activate(void); -static unsigned long apic_phys __ro_after_init; - /* * Get the LAPIC version */ @@ -244,31 +218,7 @@ static int modern_apic(void) */ static void __init apic_disable(void) { - pr_info("APIC: switched to apic NOOP\n"); - apic = &apic_noop; -} - -void native_apic_wait_icr_idle(void) -{ - while (apic_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - -u32 native_safe_apic_wait_icr_idle(void) -{ - u32 send_status; - int timeout; - - timeout = 0; - do { - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - if (!send_status) - break; - inc_irq_stat(icr_read_retry_count); - udelay(100); - } while (timeout++ < 1000); - - return send_status; + apic_install_driver(&apic_noop); } void native_apic_icr_write(u32 low, u32 id) @@ -291,16 +241,6 @@ u64 native_apic_icr_read(void) return icr1 | ((u64)icr2 << 32); } -#ifdef CONFIG_X86_32 -/** - * get_physical_broadcast - Get number of physical broadcast IDs - */ -int get_physical_broadcast(void) -{ - return modern_apic() ? 0xff : 0xf; -} -#endif - /** * lapic_get_maxlvt - get the maximum number of local vector table entries */ @@ -422,10 +362,9 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new) if (vector && !eilvt_entry_is_changeable(vector, new)) /* may not change if vectors are different */ return rsvd; - rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); - } while (rsvd != new); + } while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new)); - rsvd &= ~APIC_EILVT_MASKED; + rsvd = new & ~APIC_EILVT_MASKED; if (rsvd && rsvd != vector) pr_info("LVT offset %d assigned for vector 0x%02x\n", offset, rsvd); @@ -489,7 +428,7 @@ static int lapic_next_deadline(unsigned long delta, weak_wrmsr_fence(); tsc = rdtsc(); - wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); + wrmsrq(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); return 0; } @@ -504,7 +443,19 @@ static int lapic_timer_shutdown(struct clock_event_device *evt) v = apic_read(APIC_LVTT); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, v); - apic_write(APIC_TMICT, 0); + + /* + * Setting APIC_LVT_MASKED (above) should be enough to tell + * the hardware that this timer will never fire. But AMD + * erratum 411 and some Intel CPU behavior circa 2024 say + * otherwise. Time for belt and suspenders programming: mask + * the timer _and_ zero the counter registers: + */ + if (v & APIC_LVT_TIMER_TSCDEADLINE) + wrmsrq(MSR_IA32_TSC_DEADLINE, 0); + else + apic_write(APIC_TMICT, 0); + return 0; } @@ -535,7 +486,7 @@ static int lapic_timer_set_oneshot(struct clock_event_device *evt) static void lapic_timer_broadcast(const struct cpumask *mask) { #ifdef CONFIG_SMP - apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR); + __apic_send_IPI_mask(mask, LOCAL_TIMER_VECTOR); #endif } @@ -561,32 +512,32 @@ static struct clock_event_device lapic_clockevent = { static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static const struct x86_cpu_id deadline_match[] __initconst = { - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */ - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */ + X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 0x2, 0x2, 0x3a), /* EP */ + X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 0x4, 0x4, 0x0f), /* EX */ - X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020), + X86_MATCH_VFM(INTEL_BROADWELL_X, 0x0b000020), - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011), - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e), - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c), - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x2, 0x2, 0x00000011), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x3, 0x3, 0x0700000e), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x4, 0x4, 0x0f00000c), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x5, 0x5, 0x0e000003), - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136), - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014), - X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0), + X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 0x3, 0x3, 0x01000136), + X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 0x4, 0x4, 0x02000014), + X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 0x5, 0xf, 0), - X86_MATCH_INTEL_FAM6_MODEL( HASWELL, 0x22), - X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L, 0x20), - X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G, 0x17), + X86_MATCH_VFM(INTEL_HASWELL, 0x22), + X86_MATCH_VFM(INTEL_HASWELL_L, 0x20), + X86_MATCH_VFM(INTEL_HASWELL_G, 0x17), - X86_MATCH_INTEL_FAM6_MODEL( BROADWELL, 0x25), - X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G, 0x17), + X86_MATCH_VFM(INTEL_BROADWELL, 0x25), + X86_MATCH_VFM(INTEL_BROADWELL_G, 0x17), - X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L, 0xb2), - X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE, 0xb2), + X86_MATCH_VFM(INTEL_SKYLAKE_L, 0xb2), + X86_MATCH_VFM(INTEL_SKYLAKE, 0xb2), - X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L, 0x52), - X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE, 0x52), + X86_MATCH_VFM(INTEL_KABYLAKE_L, 0x52), + X86_MATCH_VFM(INTEL_KABYLAKE, 0x52), {}, }; @@ -643,6 +594,8 @@ static void setup_APIC_timer(void) 0xF, ~0UL); } else clockevents_register_device(levt); + + apic_update_vector(smp_processor_id(), LOCAL_TIMER_VECTOR, true); } /* @@ -695,7 +648,7 @@ void lapic_update_tsc_freq(void) static __initdata int lapic_cal_loops = -1; static __initdata long lapic_cal_t1, lapic_cal_t2; static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; -static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; +static __initdata u32 lapic_cal_pm1, lapic_cal_pm2; static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; /* @@ -705,7 +658,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) { unsigned long long tsc = 0; long tapic = apic_read(APIC_TMCCT); - unsigned long pm = acpi_pm_read_early(); + u32 pm = acpi_pm_read_early(); if (boot_cpu_has(X86_FEATURE_TSC)) tsc = rdtsc(); @@ -730,7 +683,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) } static int __init -calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) +calibrate_by_pmtimer(u32 deltapm, long *delta, long *deltatsc) { const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; const long pm_thresh = pm_100ms / 100; @@ -741,7 +694,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) return -1; #endif - apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); + apic_pr_verbose("... PM-Timer delta = %u\n", deltapm); /* Check, if the PM timer is available */ if (!deltapm) @@ -751,14 +704,14 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) if (deltapm > (pm_100ms - pm_thresh) && deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n"); + apic_pr_verbose("... PM-Timer result ok\n"); return 0; } res = (((u64)deltapm) * mult) >> 22; do_div(res, 1000000); - pr_warn("APIC calibration not consistent " - "with PM-Timer: %ldms instead of 100ms\n", (long)res); + pr_warn("APIC calibration not consistent with PM-Timer: %ldms instead of 100ms\n", + (long)res); /* Correct the lapic counter value */ res = (((u64)(*delta)) * pm_100ms); @@ -771,9 +724,8 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) if (boot_cpu_has(X86_FEATURE_TSC)) { res = (((u64)(*deltatsc)) * pm_100ms); do_div(res, deltapm); - apic_printk(APIC_VERBOSE, "TSC delta adjusted to " - "PM-Timer: %lu (%ld)\n", - (unsigned long)res, *deltatsc); + apic_pr_verbose("TSC delta adjusted to PM-Timer: %lu (%ld)\n", + (unsigned long)res, *deltatsc); *deltatsc = (long)res; } @@ -808,12 +760,12 @@ bool __init apic_needs_pit(void) return true; /* Is there an APIC at all or is it disabled? */ - if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) + if (!boot_cpu_has(X86_FEATURE_APIC) || apic_is_disabled) return true; /* * If interrupt delivery mode is legacy PIC or virtual wire without - * configuration, the local APIC timer wont be set up. Make sure + * configuration, the local APIC timer won't be set up. Make sure * that the PIT is initialized. */ if (apic_intr_mode == APIC_PIC || @@ -842,6 +794,7 @@ static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = this_cpu_ptr(&lapic_events); u64 tsc_perj = 0, tsc_start = 0; + long delta_tsc_khz, bus_khz; unsigned long jif_start; unsigned long deltaj; long delta, deltatsc; @@ -856,8 +809,7 @@ static int __init calibrate_APIC_clock(void) * in the clockevent structure and return. */ if (!lapic_init_clockevent()) { - apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", - lapic_timer_period); + apic_pr_verbose("lapic timer already calibrated %d\n", lapic_timer_period); /* * Direct calibration methods must have an always running * local APIC timer, no need for broadcast timer. @@ -866,8 +818,7 @@ static int __init calibrate_APIC_clock(void) return 0; } - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" - "calibrating APIC timer ...\n"); + apic_pr_verbose("Using local APIC timer interrupts. Calibrating APIC timer ...\n"); /* * There are platforms w/o global clockevent devices. Instead of @@ -930,7 +881,7 @@ static int __init calibrate_APIC_clock(void) /* Build delta t1-t2 as apic timer counts down */ delta = lapic_cal_t1 - lapic_cal_t2; - apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); + apic_pr_verbose("... lapic delta = %ld\n", delta); deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); @@ -941,22 +892,20 @@ static int __init calibrate_APIC_clock(void) lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; lapic_init_clockevent(); - apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); - apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); - apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - lapic_timer_period); + apic_pr_verbose("..... delta %ld\n", delta); + apic_pr_verbose("..... mult: %u\n", lapic_clockevent.mult); + apic_pr_verbose("..... calibration result: %u\n", lapic_timer_period); if (boot_cpu_has(X86_FEATURE_TSC)) { - apic_printk(APIC_VERBOSE, "..... CPU clock speed is " - "%ld.%04ld MHz.\n", - (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); + delta_tsc_khz = (deltatsc * HZ) / (1000 * LAPIC_CAL_LOOPS); + + apic_pr_verbose("..... CPU clock speed is %ld.%03ld MHz.\n", + delta_tsc_khz / 1000, delta_tsc_khz % 1000); } - apic_printk(APIC_VERBOSE, "..... host bus clock speed is " - "%u.%04u MHz.\n", - lapic_timer_period / (1000000 / HZ), - lapic_timer_period % (1000000 / HZ)); + bus_khz = (long)lapic_timer_period * HZ / 1000; + apic_pr_verbose("..... host bus clock speed is %ld.%03ld MHz.\n", + bus_khz / 1000, bus_khz % 1000); /* * Do a sanity check on the APIC calibration result @@ -975,7 +924,7 @@ static int __init calibrate_APIC_clock(void) * available. */ if (!pm_referenced && global_clock_event) { - apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); + apic_pr_verbose("... verify APIC timer\n"); /* * Setup the apic timer manually @@ -996,11 +945,11 @@ static int __init calibrate_APIC_clock(void) /* Jiffies delta */ deltaj = lapic_cal_j2 - lapic_cal_j1; - apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); + apic_pr_verbose("... jiffies delta = %lu\n", deltaj); /* Check, if the jiffies result is consistent */ if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) - apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); + apic_pr_verbose("... jiffies result ok\n"); else levt->features |= CLOCK_EVT_FEAT_DUMMY; } @@ -1108,7 +1057,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt) { struct pt_regs *old_regs = set_irq_regs(regs); - ack_APIC_irq(); + apic_eoi(); trace_local_timer_entry(LOCAL_TIMER_VECTOR); local_apic_timer_interrupt(); trace_local_timer_exit(LOCAL_TIMER_VECTOR); @@ -1132,8 +1081,7 @@ void clear_local_APIC(void) int maxlvt; u32 v; - /* APIC hasn't been mapped yet */ - if (!x2apic_mode && !apic_phys) + if (!apic_accessible()) return; maxlvt = lapic_get_maxlvt(); @@ -1223,10 +1171,12 @@ void apic_soft_disable(void) */ void disable_local_APIC(void) { - /* APIC hasn't been mapped yet */ - if (!x2apic_mode && !apic_phys) + if (!apic_accessible()) return; + if (apic->teardown) + apic->teardown(); + apic_soft_disable(); #ifdef CONFIG_X86_32 @@ -1287,9 +1237,8 @@ void __init sync_Arb_IDs(void) */ apic_wait_icr_idle(); - apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | - APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_pr_debug("Synchronizing Arb IDs.\n"); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } enum apic_intr_mode_id apic_intr_mode __ro_after_init; @@ -1297,7 +1246,7 @@ enum apic_intr_mode_id apic_intr_mode __ro_after_init; static int __init __apic_intr_mode_select(void) { /* Check kernel option */ - if (disable_apic) { + if (apic_is_disabled) { pr_info("APIC disabled via kernel command line\n"); return APIC_PIC; } @@ -1306,7 +1255,7 @@ static int __init __apic_intr_mode_select(void) #ifdef CONFIG_X86_64 /* On 64-bit, the APIC must be integrated, Check local APIC only */ if (!boot_cpu_has(X86_FEATURE_APIC)) { - disable_apic = 1; + apic_is_disabled = true; pr_info("APIC disabled by BIOS\n"); return APIC_PIC; } @@ -1315,16 +1264,15 @@ static int __init __apic_intr_mode_select(void) /* Neither 82489DX nor integrated APIC ? */ if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) { - disable_apic = 1; + apic_is_disabled = true; return APIC_PIC; } /* If the BIOS pretends there is an integrated APIC ? */ if (!boot_cpu_has(X86_FEATURE_APIC) && APIC_INTEGRATED(boot_cpu_apic_version)) { - disable_apic = 1; - pr_err(FW_BUG "Local APIC %d not detected, force emulation\n", - boot_cpu_physical_apicid); + apic_is_disabled = true; + pr_err(FW_BUG "Local APIC not detected, force emulation\n"); return APIC_PIC; } #endif @@ -1345,12 +1293,6 @@ static int __init __apic_intr_mode_select(void) pr_info("APIC: SMP mode deactivated\n"); return APIC_SYMMETRIC_IO_NO_ROUTING; } - - if (read_apic_id() != boot_cpu_physical_apicid) { - panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - read_apic_id(), boot_cpu_physical_apicid); - /* Or can we switch back to PIC here? */ - } #endif return APIC_SYMMETRIC_IO; @@ -1437,7 +1379,7 @@ void __init apic_intr_mode_init(void) break; } - default_setup_apic_routing(); + x86_64_probe_apic(); if (x86_platform.apic_post_init) x86_platform.apic_post_init(); @@ -1480,10 +1422,10 @@ static void lapic_setup_esr(void) if (maxlvt > 3) apic_write(APIC_ESR, 0); value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08x after: 0x%08x\n", - oldvalue, value); + if (value != oldvalue) { + apic_pr_verbose("ESR value before enabling vector: 0x%08x after: 0x%08x\n", + oldvalue, value); + } } #define APIC_IR_REGS APIC_ISR_NR @@ -1495,63 +1437,61 @@ union apic_ir { u32 regs[APIC_IR_REGS]; }; -static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr) +static bool apic_check_and_eoi_isr(union apic_ir *isr) { int i, bit; - /* Read the IRRs */ - for (i = 0; i < APIC_IR_REGS; i++) - irr->regs[i] = apic_read(APIC_IRR + i * 0x10); - /* Read the ISRs */ for (i = 0; i < APIC_IR_REGS; i++) isr->regs[i] = apic_read(APIC_ISR + i * 0x10); + /* If the ISR map empty, nothing to do here. */ + if (bitmap_empty(isr->map, APIC_IR_BITS)) + return true; + /* - * If the ISR map is not empty. ACK the APIC and run another round - * to verify whether a pending IRR has been unblocked and turned - * into a ISR. + * There can be multiple ISR bits set when a high priority + * interrupt preempted a lower priority one. Issue an EOI for each + * set bit. The priority traversal order does not matter as there + * can't be new ISR bits raised at this point. What matters is that + * an EOI is issued for each ISR bit. */ - if (!bitmap_empty(isr->map, APIC_IR_BITS)) { - /* - * There can be multiple ISR bits set when a high priority - * interrupt preempted a lower priority one. Issue an ACK - * per set bit. - */ - for_each_set_bit(bit, isr->map, APIC_IR_BITS) - ack_APIC_irq(); - return true; - } + for_each_set_bit(bit, isr->map, APIC_IR_BITS) + apic_eoi(); - return !bitmap_empty(irr->map, APIC_IR_BITS); + /* Reread the ISRs, they should be empty now */ + for (i = 0; i < APIC_IR_REGS; i++) + isr->regs[i] = apic_read(APIC_ISR + i * 0x10); + + return bitmap_empty(isr->map, APIC_IR_BITS); } /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. + * If a CPU services an interrupt and crashes before issuing EOI to the + * local APIC, the corresponding ISR bit is still set when the crashing CPU + * jumps into a crash kernel. Read the ISR and issue an EOI for each set + * bit to acknowledge it as otherwise these slots would be locked forever + * waiting for an EOI. * - * Most probably by now the CPU has serviced that pending interrupt and it - * might not have done the ack_APIC_irq() because it thought, interrupt - * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear - * the ISR bit and cpu thinks it has already serviced the interrupt. Hence - * a vector might get locked. It was noticed for timer irq (vector - * 0x31). Issue an extra EOI to clear ISR. + * If there are pending bits in the IRR, then they won't be converted into + * ISR bits as the CPU has interrupts disabled. They will be delivered once + * the CPU enables interrupts and there is nothing which can prevent that. * - * If there are pending IRR bits they turn into ISR bits after a higher - * priority ISR bit has been acked. + * In the worst case this results in spurious interrupt warnings. */ -static void apic_pending_intr_clear(void) +static void apic_clear_isr(void) { - union apic_ir irr, isr; + union apic_ir ir; unsigned int i; - /* 512 loops are way oversized and give the APIC a chance to obey. */ - for (i = 0; i < 512; i++) { - if (!apic_check_and_ack(&irr, &isr)) - return; - } - /* Dump the IRR/ISR content if that failed */ - pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map); + if (!apic_check_and_eoi_isr(&ir)) + pr_warn("APIC: Stale ISR: %256pb\n", ir.map); + + for (i = 0; i < APIC_IR_REGS; i++) + ir.regs[i] = apic_read(APIC_IRR + i * 0x10); + + if (!bitmap_empty(ir.map, APIC_IR_BITS)) + pr_warn("APIC: Stale IRR: %256pb\n", ir.map); } /** @@ -1565,11 +1505,14 @@ static void setup_local_APIC(void) int cpu = smp_processor_id(); unsigned int value; - if (disable_apic) { + if (apic_is_disabled) { disable_ioapic_support(); return; } + if (apic->setup) + apic->setup(); + /* * If this comes from kexec/kcrash the APIC might be enabled in * SPIV. Soft disable it before doing further initialization. @@ -1588,35 +1531,14 @@ static void setup_local_APIC(void) } #endif /* - * Double-check whether this APIC is really registered. - * This is meaningless in clustered apic mode, so we skip it. - */ - BUG_ON(!apic->apic_id_registered()); - - /* * Intel recommends to set DFR, LDR and TPR before enabling * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... + * document number 292116). + * + * Except for APICs which operate in physical destination mode. */ - apic->init_apic_ldr(); - -#ifdef CONFIG_X86_32 - if (apic->dest_mode_logical) { - int logical_apicid, ldr_apicid; - - /* - * APIC LDR is initialized. If logical_apicid mapping was - * initialized during get_smp_config(), make sure it matches - * the actual value. - */ - logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); - if (logical_apicid != BAD_APICID) - WARN_ON(logical_apicid != ldr_apicid); - /* Always use the value from LDR. */ - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; - } -#endif + if (apic->init_apic_ldr) + apic->init_apic_ldr(); /* * Set Task Priority to 'accept all except vectors 0-31'. An APIC @@ -1629,8 +1551,7 @@ static void setup_local_APIC(void) value |= 0x10; apic_write(APIC_TASKPRI, value); - /* Clear eventually stale ISR/IRR bits */ - apic_pending_intr_clear(); + apic_clear_isr(); /* * Now that we are all set up, enable the APIC @@ -1689,12 +1610,12 @@ static void setup_local_APIC(void) * TODO: set up through-local-APIC from through-I/O-APIC? --macro */ value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; - if (!cpu && (pic_mode || !value || skip_ioapic_setup)) { + if (!cpu && (pic_mode || !value || ioapic_is_disabled)) { value = APIC_DM_EXTINT; - apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu); + apic_pr_verbose("Enabled ExtINT on CPU#%d\n", cpu); } else { value = APIC_DM_EXTINT | APIC_LVT_MASKED; - apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu); + apic_pr_verbose("Masked ExtINT on CPU#%d\n", cpu); } apic_write(APIC_LVT0, value); @@ -1746,6 +1667,23 @@ void apic_ap_setup(void) end_local_APIC_setup(); } +static __init void apic_read_boot_cpu_id(bool x2apic) +{ + /* + * This can be invoked from check_x2apic() before the APIC has been + * selected. But that code knows for sure that the BIOS enabled + * X2APIC. + */ + if (x2apic) { + boot_cpu_physical_apicid = native_apic_msr_read(APIC_ID); + boot_cpu_apic_version = GET_APIC_VERSION(native_apic_msr_read(APIC_LVR)); + } else { + boot_cpu_physical_apicid = read_apic_id(); + boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); + } + topology_register_boot_apic(boot_cpu_physical_apicid); +} + #ifdef CONFIG_X86_X2APIC int x2apic_mode; EXPORT_SYMBOL_GPL(x2apic_mode); @@ -1761,12 +1699,12 @@ static int x2apic_state; static bool x2apic_hw_locked(void) { - u64 ia32_cap; + u64 x86_arch_cap_msr; u64 msr; - ia32_cap = x86_read_arch_cap_msr(); - if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) { - rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr); + x86_arch_cap_msr = x86_read_arch_cap_msr(); + if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) { + rdmsrq(MSR_IA32_XAPIC_DISABLE_STATUS, msr); return (msr & LEGACY_XAPIC_DISABLED); } return false; @@ -1779,12 +1717,12 @@ static void __x2apic_disable(void) if (!boot_cpu_has(X86_FEATURE_APIC)) return; - rdmsrl(MSR_IA32_APICBASE, msr); + rdmsrq(MSR_IA32_APICBASE, msr); if (!(msr & X2APIC_ENABLE)) return; /* Disable xapic and x2apic first and then reenable xapic mode */ - wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); - wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); + wrmsrq(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); + wrmsrq(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); printk_once(KERN_INFO "x2apic disabled\n"); } @@ -1792,17 +1730,17 @@ static void __x2apic_enable(void) { u64 msr; - rdmsrl(MSR_IA32_APICBASE, msr); + rdmsrq(MSR_IA32_APICBASE, msr); if (msr & X2APIC_ENABLE) return; - wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); + wrmsrq(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); printk_once(KERN_INFO "x2apic enabled\n"); } static int __init setup_nox2apic(char *str) { if (x2apic_enabled()) { - int apicid = native_apic_msr_read(APIC_ID); + u32 apicid = native_apic_msr_read(APIC_ID); if (apicid >= 255) { pr_warn("Apicid: %08x, cannot enforce nox2apic\n", @@ -1845,14 +1783,13 @@ void x2apic_setup(void) __x2apic_enable(); } +static __init void apic_set_fixmap(bool read_apic); + static __init void x2apic_disable(void) { - u32 x2apic_id, state = x2apic_state; - - x2apic_mode = 0; - x2apic_state = X2APIC_DISABLED; + u32 x2apic_id; - if (state != X2APIC_ON) + if (x2apic_state < X2APIC_ON) return; x2apic_id = read_apic_id(); @@ -1865,7 +1802,16 @@ static __init void x2apic_disable(void) } __x2apic_disable(); - register_lapic_address(mp_lapic_addr); + + x2apic_mode = 0; + x2apic_state = X2APIC_DISABLED; + + /* + * Don't reread the APIC ID as it was already done from + * check_x2apic() and the APIC driver still is a x2APIC variant, + * which fails to do the read after x2APIC was disabled. + */ + apic_set_fixmap(false); } static __init void x2apic_enable(void) @@ -1926,6 +1872,7 @@ void __init check_x2apic(void) x2apic_state = X2APIC_ON_LOCKED; else x2apic_state = X2APIC_ON; + apic_read_boot_cpu_id(true); } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) { x2apic_state = X2APIC_DISABLED; } @@ -1941,7 +1888,7 @@ void __init check_x2apic(void) pr_err("Kernel does not support x2APIC, please recompile with CONFIG_X86_X2APIC.\n"); pr_err("Disabling APIC, expect reduced performance and functionality.\n"); - disable_apic = 1; + apic_is_disabled = true; setup_clear_cpu_cap(X86_FEATURE_APIC); } @@ -1954,7 +1901,7 @@ void __init enable_IR_x2apic(void) unsigned long flags; int ret, ir_stat; - if (skip_ioapic_setup) { + if (ioapic_is_disabled) { pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); return; } @@ -1992,19 +1939,19 @@ void __init enable_IR_x2apic(void) * On AMD64 we trust the BIOS - if it says no APIC it is likely * not correctly set up (usually the APIC timer won't work etc.) */ -static int __init detect_init_APIC(void) +static bool __init detect_init_APIC(void) { if (!boot_cpu_has(X86_FEATURE_APIC)) { pr_info("No local APIC present\n"); - return -1; + return false; } - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - return 0; + register_lapic_address(APIC_DEFAULT_PHYS_BASE); + return true; } #else -static int __init apic_verify(void) +static bool __init apic_verify(unsigned long addr) { u32 features, h, l; @@ -2015,28 +1962,28 @@ static int __init apic_verify(void) features = cpuid_edx(1); if (!(features & (1 << X86_FEATURE_APIC))) { pr_warn("Could not enable APIC!\n"); - return -1; + return false; } set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; /* The BIOS may have set up the APIC at some other address */ if (boot_cpu_data.x86 >= 6) { rdmsr(MSR_IA32_APICBASE, l, h); if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + addr = l & MSR_IA32_APICBASE_BASE; } + register_lapic_address(addr); pr_info("Found and enabled local APIC!\n"); - return 0; + return true; } -int __init apic_force_enable(unsigned long addr) +bool __init apic_force_enable(unsigned long addr) { u32 h, l; - if (disable_apic) - return -1; + if (apic_is_disabled) + return false; /* * Some BIOSes disable the local APIC in the APIC_BASE @@ -2053,17 +2000,17 @@ int __init apic_force_enable(unsigned long addr) enabled_via_apicbase = 1; } } - return apic_verify(); + return apic_verify(addr); } /* * Detect and initialize APIC */ -static int __init detect_init_APIC(void) +static bool __init detect_init_APIC(void) { /* Disabled by kernel option? */ - if (disable_apic) - return -1; + if (apic_is_disabled) + return false; switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: @@ -2074,8 +2021,8 @@ static int __init detect_init_APIC(void) case X86_VENDOR_HYGON: break; case X86_VENDOR_INTEL: - if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || - (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC))) + if ((boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)) || + boot_cpu_data.x86_vfm >= INTEL_PENTIUM_PRO) break; goto no_apic; default: @@ -2090,22 +2037,22 @@ static int __init detect_init_APIC(void) if (!force_enable_local_apic) { pr_info("Local APIC disabled by BIOS -- " "you can enable it with \"lapic\"\n"); - return -1; + return false; } - if (apic_force_enable(APIC_DEFAULT_PHYS_BASE)) - return -1; + if (!apic_force_enable(APIC_DEFAULT_PHYS_BASE)) + return false; } else { - if (apic_verify()) - return -1; + if (!apic_verify(APIC_DEFAULT_PHYS_BASE)) + return false; } apic_pm_activate(); - return 0; + return true; no_apic: pr_info("No local APIC present or hardware disabled\n"); - return -1; + return false; } #endif @@ -2114,63 +2061,37 @@ no_apic: */ void __init init_apic_mappings(void) { - unsigned int new_apicid; - if (apic_validate_deadline_timer()) pr_info("TSC deadline timer available\n"); - if (x2apic_mode) { - boot_cpu_physical_apicid = read_apic_id(); + if (x2apic_mode) return; - } - - /* If no local APIC can be found return early */ - if (!smp_found_config && detect_init_APIC()) { - /* lets NOP'ify apic operations */ - pr_info("APIC: disable apic facility\n"); - apic_disable(); - } else { - apic_phys = mp_lapic_addr; - /* - * If the system has ACPI MADT tables or MP info, the LAPIC - * address is already registered. - */ - if (!acpi_lapic && !smp_found_config) - register_lapic_address(apic_phys); + if (!smp_found_config) { + if (!detect_init_APIC()) { + pr_info("APIC: disable apic facility\n"); + apic_disable(); + } } +} - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - new_apicid = read_apic_id(); - if (boot_cpu_physical_apicid != new_apicid) { - boot_cpu_physical_apicid = new_apicid; - /* - * yeah -- we lie about apic_version - * in case if apic was disabled via boot option - * but it's not a problem for SMP compiled kernel - * since apic_intr_mode_select is prepared for such - * a case and disable smp mode - */ - boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); - } +static __init void apic_set_fixmap(bool read_apic) +{ + set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); + apic_mmio_base = APIC_BASE; + apic_pr_verbose("Mapped APIC to %16lx (%16lx)\n", apic_mmio_base, mp_lapic_addr); + if (read_apic) + apic_read_boot_cpu_id(false); } void __init register_lapic_address(unsigned long address) { + /* This should only happen once */ + WARN_ON_ONCE(mp_lapic_addr); mp_lapic_addr = address; - if (!x2apic_mode) { - set_fixmap_nocache(FIX_APIC_BASE, address); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, address); - } - if (boot_cpu_physical_apicid == -1U) { - boot_cpu_physical_apicid = read_apic_id(); - boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); - } + if (!x2apic_mode) + apic_set_fixmap(true); } /* @@ -2207,7 +2128,7 @@ static noinline void handle_spurious_interrupt(u8 vector) if (v & (1 << (vector & 0x1f))) { pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", vector, smp_processor_id()); - ack_APIC_irq(); + apic_eoi(); } else { pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", vector, smp_processor_id()); @@ -2258,21 +2179,20 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt) if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); v = apic_read(APIC_ESR); - ack_APIC_irq(); + apic_eoi(); atomic_inc(&irq_err_count); - apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x", - smp_processor_id(), v); + apic_pr_debug("APIC error on CPU%d: %02x", smp_processor_id(), v); v &= 0xff; while (v) { if (v & 0x1) - apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); + apic_pr_debug_cont(" : %s", error_interrupt_reason[i]); i++; v >>= 1; } - apic_printk(APIC_DEBUG, KERN_CONT "\n"); + apic_pr_debug_cont("\n"); trace_error_apic_exit(ERROR_APIC_VECTOR); } @@ -2292,8 +2212,7 @@ static void __init connect_bsp_APIC(void) * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's * local APIC to INT and NMI lines. */ - apic_printk(APIC_VERBOSE, "leaving PIC mode, " - "enabling APIC mode.\n"); + apic_pr_verbose("Leaving PIC mode, enabling APIC mode.\n"); imcr_pic_to_apic(); } #endif @@ -2318,8 +2237,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) * IPIs, won't work beyond this point! The only exception are * INIT IPIs. */ - apic_printk(APIC_VERBOSE, "disabling APIC mode, " - "entering PIC mode.\n"); + apic_pr_verbose("Disabling APIC mode, entering PIC mode.\n"); imcr_apic_to_pic(); return; } @@ -2364,195 +2282,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_LVT1, value); } -/* - * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated - * contiguously, it equals to current allocated max logical CPU ID plus 1. - * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, - * so the maximum of nr_logical_cpuids is nr_cpu_ids. - * - * NOTE: Reserve 0 for BSP. - */ -static int nr_logical_cpuids = 1; - -/* - * Used to store mapping between logical CPU IDs and APIC IDs. - */ -static int cpuid_to_apicid[] = { - [0 ... NR_CPUS - 1] = -1, -}; - -bool arch_match_cpu_phys_id(int cpu, u64 phys_id) -{ - return phys_id == cpuid_to_apicid[cpu]; -} - -#ifdef CONFIG_SMP -/** - * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread - * @apicid: APIC ID to check - */ -bool apic_id_is_primary_thread(unsigned int apicid) -{ - u32 mask; - - if (smp_num_siblings == 1) - return true; - /* Isolate the SMT bit(s) in the APICID and check for 0 */ - mask = (1U << (fls(smp_num_siblings) - 1)) - 1; - return !(apicid & mask); -} -#endif - -/* - * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids - * and cpuid_to_apicid[] synchronized. - */ -static int allocate_logical_cpuid(int apicid) -{ - int i; - - /* - * cpuid <-> apicid mapping is persistent, so when a cpu is up, - * check if the kernel has allocated a cpuid for it. - */ - for (i = 0; i < nr_logical_cpuids; i++) { - if (cpuid_to_apicid[i] == apicid) - return i; - } - - /* Allocate a new cpuid. */ - if (nr_logical_cpuids >= nr_cpu_ids) { - WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " - "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids, nr_logical_cpuids, apicid); - return -EINVAL; - } - - cpuid_to_apicid[nr_logical_cpuids] = apicid; - return nr_logical_cpuids++; -} - -int generic_processor_info(int apicid, int version) -{ - int cpu, max = nr_cpu_ids; - bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, - phys_cpu_present_map); - - /* - * boot_cpu_physical_apicid is designed to have the apicid - * returned by read_apic_id(), i.e, the apicid of the - * currently booting-up processor. However, on some platforms, - * it is temporarily modified by the apicid reported as BSP - * through MP table. Concretely: - * - * - arch/x86/kernel/mpparse.c: MP_processor_info() - * - arch/x86/mm/amdtopology.c: amd_numa_init() - * - * This function is executed with the modified - * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel - * parameter doesn't work to disable APs on kdump 2nd kernel. - * - * Since fixing handling of boot_cpu_physical_apicid requires - * another discussion and tests on each platform, we leave it - * for now and here we use read_apic_id() directly in this - * function, generic_processor_info(). - */ - if (disabled_cpu_apicid != BAD_APICID && - disabled_cpu_apicid != read_apic_id() && - disabled_cpu_apicid == apicid) { - int thiscpu = num_processors + disabled_cpus; - - pr_warn("APIC: Disabling requested cpu." - " Processor %d/0x%x ignored.\n", thiscpu, apicid); - - disabled_cpus++; - return -ENODEV; - } - - /* - * If boot cpu has not been detected yet, then only allow upto - * nr_cpu_ids - 1 processors and keep one slot free for boot cpu - */ - if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 && - apicid != boot_cpu_physical_apicid) { - int thiscpu = max + disabled_cpus - 1; - - pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost" - " reached. Keeping one slot for boot cpu." - " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - - disabled_cpus++; - return -ENODEV; - } - - if (num_processors >= nr_cpu_ids) { - int thiscpu = max + disabled_cpus; - - pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. " - "Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - - disabled_cpus++; - return -EINVAL; - } - - if (apicid == boot_cpu_physical_apicid) { - /* - * x86_bios_cpu_apicid is required to have processors listed - * in same order as logical cpu numbers. Hence the first - * entry is BSP, and so on. - * boot_cpu_init() already hold bit 0 in cpu_present_mask - * for BSP. - */ - cpu = 0; - - /* Logical cpuid 0 is reserved for BSP. */ - cpuid_to_apicid[0] = apicid; - } else { - cpu = allocate_logical_cpuid(apicid); - if (cpu < 0) { - disabled_cpus++; - return -EINVAL; - } - } - - /* - * Validate version - */ - if (version == 0x0) { - pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n", - cpu, apicid); - version = 0x10; - } - - if (version != boot_cpu_apic_version) { - pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", - boot_cpu_apic_version, cpu, version); - } - - if (apicid > max_physical_apicid) - max_physical_apicid = apicid; - -#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) - early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; - early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; -#endif -#ifdef CONFIG_X86_32 - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = - apic->x86_32_early_logical_apicid(cpu); -#endif - set_cpu_possible(cpu, true); - physid_set(apicid, phys_cpu_present_map); - set_cpu_present(cpu, true); - num_processors++; - - return cpu; -} - -int hard_smp_processor_id(void) -{ - return read_apic_id(); -} - void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, bool dmar) { @@ -2591,51 +2320,11 @@ u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid) dest |= msg->arch_addr_hi.destid_8_31 << 8; return dest; } -EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid); - -#ifdef CONFIG_X86_64 -void __init acpi_wake_cpu_handler_update(wakeup_cpu_handler handler) -{ - struct apic **drv; - - for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) - (*drv)->wakeup_secondary_cpu_64 = handler; -} -#endif - -/* - * Override the generic EOI implementation with an optimized version. - * Only called during early boot when only one CPU is active and with - * interrupts disabled, so we know this does not race with actual APIC driver - * use. - */ -void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) -{ - struct apic **drv; - - for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { - /* Should happen once for each apic */ - WARN_ON((*drv)->eoi_write == eoi_write); - (*drv)->native_eoi_write = (*drv)->eoi_write; - (*drv)->eoi_write = eoi_write; - } -} +EXPORT_SYMBOL_FOR_KVM(x86_msi_msg_get_destid); static void __init apic_bsp_up_setup(void) { -#ifdef CONFIG_X86_64 - apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid)); -#else - /* - * Hack: In case of kdump, after a crash, kernel might be booting - * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid - * might be zero if read from MP tables. Get it from LAPIC. - */ -# ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = read_apic_id(); -# endif -#endif - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); + reset_phys_cpu_present_map(boot_cpu_physical_apicid); } /** @@ -2680,7 +2369,7 @@ static struct { */ int active; /* r/w apic fields */ - unsigned int apic_id; + u32 apic_id; unsigned int apic_taskpri; unsigned int apic_ldr; unsigned int apic_dfr; @@ -2696,7 +2385,7 @@ static struct { unsigned int apic_cmci; } apic_pm_state; -static int lapic_suspend(void) +static int lapic_suspend(void *data) { unsigned long flags; int maxlvt; @@ -2744,7 +2433,7 @@ static int lapic_suspend(void) return 0; } -static void lapic_resume(void) +static void lapic_resume(void *data) { unsigned int l, h; unsigned long flags; @@ -2819,11 +2508,15 @@ static void lapic_resume(void) * are needed on every CPU up until machine_halt/restart/poweroff. */ -static struct syscore_ops lapic_syscore_ops = { +static const struct syscore_ops lapic_syscore_ops = { .resume = lapic_resume, .suspend = lapic_suspend, }; +static struct syscore lapic_syscore = { + .ops = &lapic_syscore_ops, +}; + static void apic_pm_activate(void) { apic_pm_state.active = 1; @@ -2833,7 +2526,7 @@ static int __init init_lapic_sysfs(void) { /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ if (boot_cpu_has(X86_FEATURE_APIC)) - register_syscore_ops(&lapic_syscore_ops); + register_syscore(&lapic_syscore); return 0; } @@ -2900,19 +2593,12 @@ int apic_is_clustered_box(void) /* * APIC command line parameters */ -static int __init setup_disableapic(char *arg) +static int __init setup_nolapic(char *arg) { - disable_apic = 1; + apic_is_disabled = true; setup_clear_cpu_cap(X86_FEATURE_APIC); return 0; } -early_param("disableapic", setup_disableapic); - -/* same as disableapic, for compatibility */ -static int __init setup_nolapic(char *arg) -{ - return setup_disableapic(arg); -} early_param("nolapic", setup_nolapic); static int __init parse_lapic_timer_c2_ok(char *arg) @@ -2939,11 +2625,11 @@ early_param("nolapic_timer", parse_nolapic_timer); static int __init apic_set_verbosity(char *arg) { if (!arg) { -#ifdef CONFIG_X86_64 - skip_ioapic_setup = 0; + if (IS_ENABLED(CONFIG_X86_32)) + return -EINVAL; + + ioapic_is_disabled = false; return 0; -#endif - return -EINVAL; } if (strcmp("debug", arg) == 0) @@ -2964,11 +2650,11 @@ early_param("apic", apic_set_verbosity); static int __init lapic_insert_resource(void) { - if (!apic_phys) + if (!apic_mmio_base) return -1; /* Put local APIC into the resource map. */ - lapic_resource.start = apic_phys; + lapic_resource.start = apic_mmio_base; lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; insert_resource(&iomem_resource, &lapic_resource); @@ -2981,15 +2667,6 @@ static int __init lapic_insert_resource(void) */ late_initcall(lapic_insert_resource); -static int __init apic_set_disabled_cpu_apicid(char *arg) -{ - if (!arg || !get_option(&arg, &disabled_cpu_apicid)) - return -EINVAL; - - return 0; -} -early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); - static int __init apic_set_extnmi(char *arg) { if (!arg) diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c index 02b4839478b1..2ed3b5c88c7f 100644 --- a/arch/x86/kernel/apic/apic_common.c +++ b/arch/x86/kernel/apic/apic_common.c @@ -4,8 +4,11 @@ * SPDX-License-Identifier: GPL-2.0 */ #include <linux/irq.h> +#include <linux/kvm_types.h> #include <asm/apic.h> +#include "local.h" + u32 apic_default_calc_apicid(unsigned int cpu) { return per_cpu(x86_cpu_to_apicid, cpu); @@ -16,31 +19,25 @@ u32 apic_flat_calc_apicid(unsigned int cpu) return 1U << cpu; } -bool default_check_apicid_used(physid_mask_t *map, int apicid) -{ - return physid_isset(apicid, *map); -} - -void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - *retmap = *phys_map; -} - -int default_cpu_present_to_apicid(int mps_cpu) +u32 default_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + return (int)per_cpu(x86_cpu_to_apicid, mps_cpu); else return BAD_APICID; } -EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid); +EXPORT_SYMBOL_FOR_KVM(default_cpu_present_to_apicid); -int default_check_phys_apicid_present(int phys_apicid) +/* + * Set up the logical destination ID when the APIC operates in logical + * destination mode. + */ +void default_init_apic_ldr(void) { - return physid_isset(phys_apicid, phys_cpu_present_map); -} + unsigned long val; -int default_apic_id_valid(u32 apicid) -{ - return (apicid < 255); + apic_write(APIC_DFR, APIC_DFR_FLAT); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); + apic_write(APIC_LDR, val); } diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 8f72b4351c9f..e0308d8c4e6c 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -8,192 +8,25 @@ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and * James Cleverdon. */ -#include <linux/cpumask.h> #include <linux/export.h> -#include <linux/acpi.h> -#include <asm/jailhouse_para.h> #include <asm/apic.h> #include "local.h" -static struct apic apic_physflat; -static struct apic apic_flat; - -struct apic *apic __ro_after_init = &apic_flat; -EXPORT_SYMBOL_GPL(apic); - -static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 1; -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -void flat_init_apic_ldr(void) -{ - unsigned long val; - unsigned long num, id; - - num = smp_processor_id(); - id = 1UL << num; - apic_write(APIC_DFR, APIC_DFR_FLAT); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(id); - apic_write(APIC_LDR, val); -} - -static void _flat_send_IPI_mask(unsigned long mask, int vector) -{ - unsigned long flags; - - local_irq_save(flags); - __default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); - local_irq_restore(flags); -} - -static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - - _flat_send_IPI_mask(mask, vector); -} - -static void -flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - int cpu = smp_processor_id(); - - if (cpu < BITS_PER_LONG) - __clear_bit(cpu, &mask); - - _flat_send_IPI_mask(mask, vector); -} - -static unsigned int flat_get_apic_id(unsigned long x) +static u32 physflat_get_apic_id(u32 x) { return (x >> 24) & 0xFF; } -static u32 set_apic_id(unsigned int id) -{ - return (id & 0xFF) << 24; -} - -static unsigned int read_xapic_id(void) -{ - return flat_get_apic_id(apic_read(APIC_ID)); -} - -static int flat_apic_id_registered(void) -{ - return physid_isset(read_xapic_id(), phys_cpu_present_map); -} - -static int flat_phys_pkg_id(int initial_apic_id, int index_msb) -{ - return initial_apic_id >> index_msb; -} - -static int flat_probe(void) +static int physflat_probe(void) { return 1; } -static struct apic apic_flat __ro_after_init = { - .name = "flat", - .probe = flat_probe, - .acpi_madt_oem_check = flat_acpi_madt_oem_check, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = flat_apic_id_registered, - - .delivery_mode = APIC_DELIVERY_MODE_FIXED, - .dest_mode_logical = true, - - .disable_esr = 0, - - .check_apicid_used = NULL, - .init_apic_ldr = flat_init_apic_ldr, - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, - .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .phys_pkg_id = flat_phys_pkg_id, - - .get_apic_id = flat_get_apic_id, - .set_apic_id = set_apic_id, - - .calc_dest_apicid = apic_flat_calc_apicid, - - .send_IPI = default_send_IPI_single, - .send_IPI_mask = flat_send_IPI_mask, - .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, - .send_IPI_allbutself = default_send_IPI_allbutself, - .send_IPI_all = default_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .inquire_remote_apic = default_inquire_remote_apic, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .eoi_write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, -}; - -/* - * Physflat mode is used when there are more than 8 CPUs on a system. - * We cannot use logical delivery in this case because the mask - * overflows, so use physical mode. - */ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { -#ifdef CONFIG_ACPI - /* - * Quirk: some x86_64 machines can only use physical APIC mode - * regardless of how many processors are present (x86_64 ES7000 - * is an example). - */ - if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { - printk(KERN_DEBUG "system APIC only can use physical flat"); - return 1; - } - - if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) { - printk(KERN_DEBUG "IBM Summit detected, will use apic physical"); - return 1; - } -#endif - - return 0; -} - -static void physflat_init_apic_ldr(void) -{ - /* - * LDR and DFR are not involved in physflat mode, rather: - * "In physical destination mode, the destination processor is - * specified by its local APIC ID [...]." (Intel SDM, 10.6.2.1) - */ -} - -static int physflat_probe(void) -{ - if (apic == &apic_physflat || num_possible_cpus() > 8 || - jailhouse_paravirt()) - return 1; - - return 0; + return 1; } static struct apic apic_physflat __ro_after_init = { @@ -201,25 +34,15 @@ static struct apic apic_physflat __ro_after_init = { .name = "physical flat", .probe = physflat_probe, .acpi_madt_oem_check = physflat_acpi_madt_oem_check, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = flat_apic_id_registered, - .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = false, .disable_esr = 0, - .check_apicid_used = NULL, - .init_apic_ldr = physflat_init_apic_ldr, - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .phys_pkg_id = flat_phys_pkg_id, - .get_apic_id = flat_get_apic_id, - .set_apic_id = set_apic_id, + .max_apic_id = 0xFE, + .get_apic_id = physflat_get_apic_id, .calc_dest_apicid = apic_default_calc_apicid, @@ -229,19 +52,17 @@ static struct apic apic_physflat __ro_after_init = { .send_IPI_allbutself = default_send_IPI_allbutself, .send_IPI_all = default_send_IPI_all, .send_IPI_self = default_send_IPI_self, - - .inquire_remote_apic = default_inquire_remote_apic, + .nmi_to_offline_cpu = true, .read = native_apic_mem_read, .write = native_apic_mem_write, - .eoi_write = native_apic_mem_write, + .eoi = native_apic_mem_eoi, .icr_read = native_apic_icr_read, .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, + .wait_icr_idle = apic_mem_wait_icr_idle, + .safe_wait_icr_idle = apic_mem_wait_icr_idle_timeout, }; +apic_driver(apic_physflat); -/* - * We need to check for physflat first, so this order is important. - */ -apic_drivers(apic_physflat, apic_flat); +struct apic *apic __ro_after_init = &apic_physflat; +EXPORT_SYMBOL_GPL(apic); diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index fe78319e0f7a..58abb941c45b 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -8,111 +8,58 @@ * Though in case if apic is disabled (for some reason) we try * to not uglify the caller's code and allow to call (some) apic routines * like self-ipi, etc... + * + * FIXME: Remove this gunk. The above argument which was intentionally left + * in place is silly to begin with because none of the callbacks except for + * APIC::read/write() have a WARN_ON_ONCE() in them. Sigh... */ #include <linux/cpumask.h> #include <linux/thread_info.h> #include <asm/apic.h> -static void noop_init_apic_ldr(void) { } +#include "local.h" + static void noop_send_IPI(int cpu, int vector) { } static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } static void noop_send_IPI_allbutself(int vector) { } static void noop_send_IPI_all(int vector) { } static void noop_send_IPI_self(int vector) { } -static void noop_apic_wait_icr_idle(void) { } static void noop_apic_icr_write(u32 low, u32 id) { } -static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) +static int noop_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip, + unsigned int cpu) { return -1; } -static u32 noop_safe_apic_wait_icr_idle(void) -{ - return 0; -} - -static u64 noop_apic_icr_read(void) -{ - return 0; -} - -static int noop_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return 0; -} - -static unsigned int noop_get_apic_id(unsigned long x) -{ - return 0; -} - -static int noop_probe(void) -{ - /* - * NOOP apic should not ever be - * enabled via probe routine - */ - return 0; -} - -static int noop_apic_id_registered(void) -{ - /* - * if we would be really "pedantic" - * we should pass read_apic_id() here - * but since NOOP suppose APIC ID = 0 - * lets save a few cycles - */ - return physid_isset(0, phys_cpu_present_map); -} +static u64 noop_apic_icr_read(void) { return 0; } +static u32 noop_get_apic_id(u32 apicid) { return 0; } +static void noop_apic_eoi(void) { } static u32 noop_apic_read(u32 reg) { - WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); + WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !apic_is_disabled); return 0; } -static void noop_apic_write(u32 reg, u32 v) +static void noop_apic_write(u32 reg, u32 val) { - WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); + WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !apic_is_disabled); } -#ifdef CONFIG_X86_32 -static int noop_x86_32_early_logical_apicid(int cpu) -{ - return BAD_APICID; -} -#endif - struct apic apic_noop __ro_after_init = { .name = "noop", - .probe = noop_probe, - .acpi_madt_oem_check = NULL, - - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = noop_apic_id_registered, - .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = true, .disable_esr = 0, - .check_apicid_used = default_check_apicid_used, - .init_apic_ldr = noop_init_apic_ldr, - .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = physid_set_mask_of_physid, - - .check_phys_apicid_present = default_check_phys_apicid_present, - - .phys_pkg_id = noop_phys_pkg_id, + .max_apic_id = 0xFE, .get_apic_id = noop_get_apic_id, - .set_apic_id = NULL, .calc_dest_apicid = apic_flat_calc_apicid, @@ -125,17 +72,9 @@ struct apic apic_noop __ro_after_init = { .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, - .inquire_remote_apic = NULL, - .read = noop_apic_read, .write = noop_apic_write, - .eoi_write = noop_apic_write, + .eoi = noop_apic_eoi, .icr_read = noop_apic_icr_read, .icr_write = noop_apic_icr_write, - .wait_icr_idle = noop_apic_wait_icr_idle, - .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, - -#ifdef CONFIG_X86_32 - .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, -#endif }; diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index a54d817eb4b6..5c5be2d58242 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/pgtable.h> +#include <asm/msr.h> #include <asm/numachip/numachip.h> #include <asm/numachip/numachip_csr.h> @@ -25,53 +26,27 @@ static const struct apic apic_numachip1; static const struct apic apic_numachip2; static void (*numachip_apic_icr_write)(int apicid, unsigned int val) __read_mostly; -static unsigned int numachip1_get_apic_id(unsigned long x) +static u32 numachip1_get_apic_id(u32 x) { unsigned long value; unsigned int id = (x >> 24) & 0xff; if (static_cpu_has(X86_FEATURE_NODEID_MSR)) { - rdmsrl(MSR_FAM10H_NODE_ID, value); + rdmsrq(MSR_FAM10H_NODE_ID, value); id |= (value << 2) & 0xff00; } return id; } -static u32 numachip1_set_apic_id(unsigned int id) -{ - return (id & 0xff) << 24; -} - -static unsigned int numachip2_get_apic_id(unsigned long x) +static u32 numachip2_get_apic_id(u32 x) { u64 mcfg; - rdmsrl(MSR_FAM10H_MMIO_CONF_BASE, mcfg); + rdmsrq(MSR_FAM10H_MMIO_CONF_BASE, mcfg); return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24); } -static u32 numachip2_set_apic_id(unsigned int id) -{ - return id << 24; -} - -static int numachip_apic_id_valid(u32 apicid) -{ - /* Trust what bootloader passes in MADT */ - return 1; -} - -static int numachip_apic_id_registered(void) -{ - return 1; -} - -static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) -{ - return initial_apic_id >> index_msb; -} - static void numachip1_apic_icr_write(int apicid, unsigned int val) { write_lcsr(CSR_G3_EXT_IRQ_GEN, (apicid << 16) | val); @@ -82,7 +57,7 @@ static void numachip2_apic_icr_write(int apicid, unsigned int val) numachip2_write32_lcsr(NUMACHIP2_APIC_ICR, (apicid << 12) | val); } -static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) +static int numachip_wakeup_secondary(u32 phys_apicid, unsigned long start_rip, unsigned int cpu) { numachip_apic_icr_write(phys_apicid, APIC_DM_INIT); numachip_apic_icr_write(phys_apicid, APIC_DM_STARTUP | @@ -172,15 +147,15 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) u64 val; u32 nodes = 1; - this_cpu_write(cpu_llc_id, node); + c->topo.llc_id = node; /* Account for nodes per socket in multi-core-module processors */ if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { - rdmsrl(MSR_FAM10H_NODE_ID, val); + rdmsrq(MSR_FAM10H_NODE_ID, val); nodes = ((val >> 3) & 7) + 1; } - c->phys_proc_id = node / nodes; + c->topo.pkg_id = node / nodes; } static int __init numachip_system_init(void) @@ -228,40 +203,19 @@ static int numachip2_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 1; } -/* APIC IPIs are queued */ -static void numachip_apic_wait_icr_idle(void) -{ -} - -/* APIC NMI IPIs are queued */ -static u32 numachip_safe_apic_wait_icr_idle(void) -{ - return 0; -} - static const struct apic apic_numachip1 __refconst = { .name = "NumaConnect system", .probe = numachip1_probe, .acpi_madt_oem_check = numachip1_acpi_madt_oem_check, - .apic_id_valid = numachip_apic_id_valid, - .apic_id_registered = numachip_apic_id_registered, - .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = false, .disable_esr = 0, - .check_apicid_used = NULL, - .init_apic_ldr = flat_init_apic_ldr, - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .phys_pkg_id = numachip_phys_pkg_id, + .max_apic_id = UINT_MAX, .get_apic_id = numachip1_get_apic_id, - .set_apic_id = numachip1_set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, @@ -273,15 +227,12 @@ static const struct apic apic_numachip1 __refconst = { .send_IPI_self = numachip_send_IPI_self, .wakeup_secondary_cpu = numachip_wakeup_secondary, - .inquire_remote_apic = NULL, /* REMRD not supported */ .read = native_apic_mem_read, .write = native_apic_mem_write, - .eoi_write = native_apic_mem_write, + .eoi = native_apic_mem_eoi, .icr_read = native_apic_icr_read, .icr_write = native_apic_icr_write, - .wait_icr_idle = numachip_apic_wait_icr_idle, - .safe_wait_icr_idle = numachip_safe_apic_wait_icr_idle, }; apic_driver(apic_numachip1); @@ -290,25 +241,15 @@ static const struct apic apic_numachip2 __refconst = { .name = "NumaConnect2 system", .probe = numachip2_probe, .acpi_madt_oem_check = numachip2_acpi_madt_oem_check, - .apic_id_valid = numachip_apic_id_valid, - .apic_id_registered = numachip_apic_id_registered, - .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = false, .disable_esr = 0, - .check_apicid_used = NULL, - .init_apic_ldr = flat_init_apic_ldr, - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .phys_pkg_id = numachip_phys_pkg_id, + .max_apic_id = UINT_MAX, .get_apic_id = numachip2_get_apic_id, - .set_apic_id = numachip2_set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, @@ -320,15 +261,12 @@ static const struct apic apic_numachip2 __refconst = { .send_IPI_self = numachip_send_IPI_self, .wakeup_secondary_cpu = numachip_wakeup_secondary, - .inquire_remote_apic = NULL, /* REMRD not supported */ .read = native_apic_mem_read, .write = native_apic_mem_write, - .eoi_write = native_apic_mem_write, + .eoi = native_apic_mem_eoi, .icr_read = native_apic_icr_read, .icr_write = native_apic_icr_write, - .wait_icr_idle = numachip_apic_wait_icr_idle, - .safe_wait_icr_idle = numachip_safe_apic_wait_icr_idle, }; apic_driver(apic_numachip2); diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c deleted file mode 100644 index 77555f66c14d..000000000000 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ /dev/null @@ -1,189 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs. - * - * Drives the local APIC in "clustered mode". - */ -#include <linux/cpumask.h> -#include <linux/dmi.h> -#include <linux/smp.h> - -#include <asm/apic.h> -#include <asm/io_apic.h> - -#include "local.h" - -static unsigned bigsmp_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -static int bigsmp_apic_id_registered(void) -{ - return 1; -} - -static bool bigsmp_check_apicid_used(physid_mask_t *map, int apicid) -{ - return false; -} - -static int bigsmp_early_logical_apicid(int cpu) -{ - /* on bigsmp, logical apicid is the same as physical */ - return early_per_cpu(x86_cpu_to_apicid, cpu); -} - -/* - * bigsmp enables physical destination mode - * and doesn't use LDR and DFR - */ -static void bigsmp_init_apic_ldr(void) -{ -} - -static void bigsmp_setup_apic_routing(void) -{ - printk(KERN_INFO - "Enabling APIC mode: Physflat. Using %d I/O APICs\n", - nr_ioapics); -} - -static int bigsmp_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - - return BAD_APICID; -} - -static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - /* For clustered we don't have a good way to do this yet - hack */ - physids_promote(0xFFL, retmap); -} - -static int bigsmp_check_phys_apicid_present(int phys_apicid) -{ - return 1; -} - -static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -static void bigsmp_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); -} - -static void bigsmp_send_IPI_all(int vector) -{ - default_send_IPI_mask_sequence_phys(cpu_online_mask, vector); -} - -static int dmi_bigsmp; /* can be set by dmi scanners */ - -static int hp_ht_bigsmp(const struct dmi_system_id *d) -{ - printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); - dmi_bigsmp = 1; - - return 0; -} - - -static const struct dmi_system_id bigsmp_dmi_table[] = { - { hp_ht_bigsmp, "HP ProLiant DL760 G2", - { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), - DMI_MATCH(DMI_BIOS_VERSION, "P44-"), - } - }, - - { hp_ht_bigsmp, "HP ProLiant DL740", - { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), - DMI_MATCH(DMI_BIOS_VERSION, "P47-"), - } - }, - { } /* NULL entry stops DMI scanning */ -}; - -static int probe_bigsmp(void) -{ - if (def_to_bigsmp) - dmi_bigsmp = 1; - else - dmi_check_system(bigsmp_dmi_table); - - return dmi_bigsmp; -} - -static struct apic apic_bigsmp __ro_after_init = { - - .name = "bigsmp", - .probe = probe_bigsmp, - .acpi_madt_oem_check = NULL, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = bigsmp_apic_id_registered, - - .delivery_mode = APIC_DELIVERY_MODE_FIXED, - .dest_mode_logical = false, - - .disable_esr = 1, - - .check_apicid_used = bigsmp_check_apicid_used, - .init_apic_ldr = bigsmp_init_apic_ldr, - .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, - .setup_apic_routing = bigsmp_setup_apic_routing, - .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, - .apicid_to_cpu_present = physid_set_mask_of_physid, - .check_phys_apicid_present = bigsmp_check_phys_apicid_present, - .phys_pkg_id = bigsmp_phys_pkg_id, - - .get_apic_id = bigsmp_get_apic_id, - .set_apic_id = NULL, - - .calc_dest_apicid = apic_default_calc_apicid, - - .send_IPI = default_send_IPI_single_phys, - .send_IPI_mask = default_send_IPI_mask_sequence_phys, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = bigsmp_send_IPI_allbutself, - .send_IPI_all = bigsmp_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .inquire_remote_apic = default_inquire_remote_apic, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .eoi_write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, - - .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, -}; - -void __init generic_bigsmp_probe(void) -{ - unsigned int cpu; - - if (!probe_bigsmp()) - return; - - apic = &apic_bigsmp; - - for_each_possible_cpu(cpu) { - if (early_per_cpu(x86_cpu_to_logical_apicid, - cpu) == BAD_APICID) - continue; - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = - bigsmp_early_logical_apicid(cpu); - } - - pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name); -} - -apic_driver(apic_bigsmp); diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 34a992e275ef..45af535c44a0 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -21,6 +21,8 @@ #include <linux/init.h> #include <linux/delay.h> +#include "local.h" + #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF u64 hw_nmi_get_sample_period(int watchdog_thresh) { @@ -31,12 +33,12 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh) #ifdef arch_trigger_cpumask_backtrace static void nmi_raise_cpu_backtrace(cpumask_t *mask) { - apic->send_IPI_mask(mask, NMI_VECTOR); + __apic_send_IPI_mask(mask, NMI_VECTOR); } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) { - nmi_trigger_cpumask_backtrace(mask, exclude_self, + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, nmi_raise_cpu_backtrace); } diff --git a/arch/x86/kernel/apic/init.c b/arch/x86/kernel/apic/init.c new file mode 100644 index 000000000000..821e2e536f19 --- /dev/null +++ b/arch/x86/kernel/apic/init.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define pr_fmt(fmt) "APIC: " fmt + +#include <asm/apic.h> + +#include "local.h" + +/* + * Use DEFINE_STATIC_CALL_NULL() to avoid having to provide stub functions + * for each callback. The callbacks are setup during boot and all except + * wait_icr_idle() must be initialized before usage. The IPI wrappers + * use static_call() and not static_call_cond() to catch any fails. + */ +#define DEFINE_APIC_CALL(__cb) \ + DEFINE_STATIC_CALL_NULL(apic_call_##__cb, *apic->__cb) + +DEFINE_APIC_CALL(eoi); +DEFINE_APIC_CALL(native_eoi); +DEFINE_APIC_CALL(icr_read); +DEFINE_APIC_CALL(icr_write); +DEFINE_APIC_CALL(read); +DEFINE_APIC_CALL(send_IPI); +DEFINE_APIC_CALL(send_IPI_mask); +DEFINE_APIC_CALL(send_IPI_mask_allbutself); +DEFINE_APIC_CALL(send_IPI_allbutself); +DEFINE_APIC_CALL(send_IPI_all); +DEFINE_APIC_CALL(send_IPI_self); +DEFINE_APIC_CALL(wait_icr_idle); +DEFINE_APIC_CALL(wakeup_secondary_cpu); +DEFINE_APIC_CALL(wakeup_secondary_cpu_64); +DEFINE_APIC_CALL(write); + +EXPORT_STATIC_CALL_TRAMP_GPL(apic_call_send_IPI_mask); +EXPORT_STATIC_CALL_TRAMP_GPL(apic_call_send_IPI_self); + +/* The container for function call overrides */ +struct apic_override __x86_apic_override __initdata; + +#define apply_override(__cb) \ + if (__x86_apic_override.__cb) \ + apic->__cb = __x86_apic_override.__cb + +static __init void restore_override_callbacks(void) +{ + apply_override(eoi); + apply_override(native_eoi); + apply_override(write); + apply_override(read); + apply_override(send_IPI); + apply_override(send_IPI_mask); + apply_override(send_IPI_mask_allbutself); + apply_override(send_IPI_allbutself); + apply_override(send_IPI_all); + apply_override(send_IPI_self); + apply_override(icr_read); + apply_override(icr_write); + apply_override(wakeup_secondary_cpu); + apply_override(wakeup_secondary_cpu_64); +} + +#define update_call(__cb) \ + static_call_update(apic_call_##__cb, *apic->__cb) + +static __init void update_static_calls(void) +{ + update_call(eoi); + update_call(native_eoi); + update_call(write); + update_call(read); + update_call(send_IPI); + update_call(send_IPI_mask); + update_call(send_IPI_mask_allbutself); + update_call(send_IPI_allbutself); + update_call(send_IPI_all); + update_call(send_IPI_self); + update_call(icr_read); + update_call(icr_write); + update_call(wait_icr_idle); + update_call(wakeup_secondary_cpu); + update_call(wakeup_secondary_cpu_64); +} + +void __init apic_setup_apic_calls(void) +{ + /* Ensure that the default APIC has native_eoi populated */ + apic->native_eoi = apic->eoi; + update_static_calls(); + pr_info("Static calls initialized\n"); +} + +void __init apic_install_driver(struct apic *driver) +{ + if (apic == driver) + return; + + apic = driver; + + if (IS_ENABLED(CONFIG_X86_X2APIC) && apic->x2apic_set_max_apicid) + apic->max_apic_id = x2apic_max_apicid; + + /* Copy the original eoi() callback as KVM/HyperV might overwrite it */ + if (!apic->native_eoi) + apic->native_eoi = apic->eoi; + + /* Apply any already installed callback overrides */ + restore_override_callbacks(); + update_static_calls(); + + pr_info("Switched APIC routing to: %s\n", driver->name); +} diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a868b76cd3d4..28f934f05a85 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -66,6 +66,7 @@ #include <asm/hw_irq.h> #include <asm/apic.h> #include <asm/pgtable.h> +#include <asm/x86_init.h> #define for_each_ioapic(idx) \ for ((idx) = 0; (idx) < nr_ioapics; (idx)++) @@ -85,8 +86,8 @@ static unsigned int ioapic_dynirq_base; static int ioapic_initialized; struct irq_pin_list { - struct list_head list; - int apic, pin; + struct list_head list; + int apic, pin; }; struct mp_chip_data { @@ -95,7 +96,7 @@ struct mp_chip_data { bool is_level; bool active_low; bool isa_irq; - u32 count; + u32 count; }; struct mp_ioapic_gsi { @@ -104,21 +105,17 @@ struct mp_ioapic_gsi { }; static struct ioapic { - /* - * # of IRQ routing registers - */ - int nr_registers; - /* - * Saved state during suspend/resume, or while enabling intr-remap. - */ - struct IO_APIC_route_entry *saved_registers; + /* # of IRQ routing registers */ + int nr_registers; + /* Saved state during suspend/resume, or while enabling intr-remap. */ + struct IO_APIC_route_entry *saved_registers; /* I/O APIC config */ - struct mpc_ioapic mp_config; + struct mpc_ioapic mp_config; /* IO APIC gsi routing info */ - struct mp_ioapic_gsi gsi_config; - struct ioapic_domain_cfg irqdomain_cfg; - struct irq_domain *irqdomain; - struct resource *iomem_res; + struct mp_ioapic_gsi gsi_config; + struct ioapic_domain_cfg irqdomain_cfg; + struct irq_domain *irqdomain; + struct resource *iomem_res; } ioapics[MAX_IO_APICS]; #define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver @@ -177,7 +174,7 @@ int mp_bus_id_to_type[MAX_MP_BUSSES]; DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -int skip_ioapic_setup; +bool ioapic_is_disabled __ro_after_init; /** * disable_ioapic_support() - disables ioapic support at runtime @@ -188,7 +185,7 @@ void disable_ioapic_support(void) noioapicquirk = 1; noioapicreroute = -1; #endif - skip_ioapic_setup = 1; + ioapic_is_disabled = true; } static int __init parse_noapic(char *str) @@ -204,10 +201,9 @@ void mp_save_irq(struct mpc_intsrc *m) { int i; - apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," - " IRQ %02x, APIC ID %x, APIC INT %02x\n", - m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus, - m->srcbusirq, m->dstapic, m->dstirq); + apic_pr_verbose("Int: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC INT %02x\n", + m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus, + m->srcbusirq, m->dstapic, m->dstirq); for (i = 0; i < mp_irq_entries; i++) { if (!memcmp(&mp_irqs[i], m, sizeof(*m))) @@ -268,12 +264,14 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) static inline void io_apic_eoi(unsigned int apic, unsigned int vector) { struct io_apic __iomem *io_apic = io_apic_base(apic); + writel(vector, &io_apic->eoi); } unsigned int native_io_apic_read(unsigned int apic, unsigned int reg) { struct io_apic __iomem *io_apic = io_apic_base(apic); + writel(reg, &io_apic->index); return readl(&io_apic->data); } @@ -299,14 +297,8 @@ static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin) static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) { - struct IO_APIC_route_entry entry; - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - entry = __ioapic_read_entry(apic, pin); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - return entry; + guard(raw_spinlock_irqsave)(&ioapic_lock); + return __ioapic_read_entry(apic, pin); } /* @@ -323,11 +315,8 @@ static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); + guard(raw_spinlock_irqsave)(&ioapic_lock); __ioapic_write_entry(apic, pin, e); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -338,12 +327,10 @@ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) static void ioapic_mask_entry(int apic, int pin) { struct IO_APIC_route_entry e = { .masked = true }; - unsigned long flags; - raw_spin_lock_irqsave(&ioapic_lock, flags); + guard(raw_spinlock_irqsave)(&ioapic_lock); io_apic_write(apic, 0x10 + 2*pin, e.w1); io_apic_write(apic, 0x11 + 2*pin, e.w2); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -351,68 +338,39 @@ static void ioapic_mask_entry(int apic, int pin) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static int __add_pin_to_irq_node(struct mp_chip_data *data, - int node, int apic, int pin) +static bool add_pin_to_irq_node(struct mp_chip_data *data, int node, int apic, int pin) { struct irq_pin_list *entry; - /* don't allow duplicates */ - for_each_irq_pin(entry, data->irq_2_pin) + /* Don't allow duplicates */ + for_each_irq_pin(entry, data->irq_2_pin) { if (entry->apic == apic && entry->pin == pin) - return 0; + return true; + } entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node); if (!entry) { - pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", - node, apic, pin); - return -ENOMEM; + pr_err("Cannot allocate irq_pin_list (%d,%d,%d)\n", node, apic, pin); + return false; } + entry->apic = apic; entry->pin = pin; list_add_tail(&entry->list, &data->irq_2_pin); - - return 0; + return true; } static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin) { struct irq_pin_list *tmp, *entry; - list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list) + list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list) { if (entry->apic == apic && entry->pin == pin) { list_del(&entry->list); kfree(entry); return; } -} - -static void add_pin_to_irq_node(struct mp_chip_data *data, - int node, int apic, int pin) -{ - if (__add_pin_to_irq_node(data, node, apic, pin)) - panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); -} - -/* - * Reroute an IRQ to a different pin. - */ -static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node, - int oldapic, int oldpin, - int newapic, int newpin) -{ - struct irq_pin_list *entry; - - for_each_irq_pin(entry, data->irq_2_pin) { - if (entry->apic == oldapic && entry->pin == oldpin) { - entry->apic = newapic; - entry->pin = newpin; - /* every one is different, right? */ - return; - } } - - /* old apic/pin didn't exist, so just add new ones */ - add_pin_to_irq_node(data, node, newapic, newpin); } static void io_apic_modify_irq(struct mp_chip_data *data, bool masked, @@ -429,12 +387,12 @@ static void io_apic_modify_irq(struct mp_chip_data *data, bool masked, } } +/* + * Synchronize the IO-APIC and the CPU by doing a dummy read from the + * IO-APIC + */ static void io_apic_sync(struct irq_pin_list *entry) { - /* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ struct io_apic __iomem *io_apic; io_apic = io_apic_base(entry->apic); @@ -444,11 +402,9 @@ static void io_apic_sync(struct irq_pin_list *entry) static void mask_ioapic_irq(struct irq_data *irq_data) { struct mp_chip_data *data = irq_data->chip_data; - unsigned long flags; - raw_spin_lock_irqsave(&ioapic_lock, flags); + guard(raw_spinlock_irqsave)(&ioapic_lock); io_apic_modify_irq(data, true, &io_apic_sync); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void __unmask_ioapic(struct mp_chip_data *data) @@ -459,11 +415,9 @@ static void __unmask_ioapic(struct mp_chip_data *data) static void unmask_ioapic_irq(struct irq_data *irq_data) { struct mp_chip_data *data = irq_data->chip_data; - unsigned long flags; - raw_spin_lock_irqsave(&ioapic_lock, flags); + guard(raw_spinlock_irqsave)(&ioapic_lock); __unmask_ioapic(data); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -491,30 +445,24 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector) entry = entry1 = __ioapic_read_entry(apic, pin); - /* - * Mask the entry and change the trigger mode to edge. - */ + /* Mask the entry and change the trigger mode to edge. */ entry1.masked = true; entry1.is_level = false; __ioapic_write_entry(apic, pin, entry1); - /* - * Restore the previous level triggered entry. - */ + /* Restore the previous level triggered entry. */ __ioapic_write_entry(apic, pin, entry); } } static void eoi_ioapic_pin(int vector, struct mp_chip_data *data) { - unsigned long flags; struct irq_pin_list *entry; - raw_spin_lock_irqsave(&ioapic_lock, flags); + guard(raw_spinlock_irqsave)(&ioapic_lock); for_each_irq_pin(entry, data->irq_2_pin) __eoi_ioapic_pin(entry->apic, entry->pin, vector); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) @@ -537,8 +485,6 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) } if (entry.irr) { - unsigned long flags; - /* * Make sure the trigger mode is set to level. Explicit EOI * doesn't clear the remote-IRR if the trigger mode is not @@ -548,9 +494,8 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) entry.is_level = true; ioapic_write_entry(apic, pin, entry); } - raw_spin_lock_irqsave(&ioapic_lock, flags); + guard(raw_spinlock_irqsave)(&ioapic_lock); __eoi_ioapic_pin(apic, pin, entry.vector); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -585,28 +530,23 @@ static int pirq_entries[MAX_PIRQS] = { static int __init ioapic_pirq_setup(char *str) { - int i, max; - int ints[MAX_PIRQS+1]; + int i, max, ints[MAX_PIRQS+1]; get_options(str, ARRAY_SIZE(ints), ints); - apic_printk(APIC_VERBOSE, KERN_INFO - "PIRQ redirection, working around broken MP-BIOS.\n"); + apic_pr_verbose("PIRQ redirection, working around broken MP-BIOS.\n"); + max = MAX_PIRQS; if (ints[0] < MAX_PIRQS) max = ints[0]; for (i = 0; i < max; i++) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); - /* - * PIRQs are mapped upside down, usually. - */ + apic_pr_verbose("... PIRQ%d -> IRQ %d\n", i, ints[i + 1]); + /* PIRQs are mapped upside down, usually */ pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; } return 1; } - __setup("pirq=", ioapic_pirq_setup); #endif /* CONFIG_X86_32 */ @@ -625,8 +565,7 @@ int save_ioapic_entries(void) } for_each_pin(apic, pin) - ioapics[apic].saved_registers[pin] = - ioapic_read_entry(apic, pin); + ioapics[apic].saved_registers[pin] = ioapic_read_entry(apic, pin); } return err; @@ -667,8 +606,7 @@ int restore_ioapic_entries(void) continue; for_each_pin(apic, pin) - ioapic_write_entry(apic, pin, - ioapics[apic].saved_registers[pin]); + ioapic_write_entry(apic, pin, ioapics[apic].saved_registers[pin]); } return 0; } @@ -680,12 +618,13 @@ static int find_irq_entry(int ioapic_idx, int pin, int type) { int i; - for (i = 0; i < mp_irq_entries; i++) + for (i = 0; i < mp_irq_entries; i++) { if (mp_irqs[i].irqtype == type && (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) || mp_irqs[i].dstapic == MP_APIC_ALL) && mp_irqs[i].dstirq == pin) return i; + } return -1; } @@ -700,10 +639,8 @@ static int __init find_isa_irq_pin(int irq, int type) for (i = 0; i < mp_irq_entries; i++) { int lbus = mp_irqs[i].srcbus; - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].irqtype == type) && + if (test_bit(lbus, mp_bus_not_pci) && (mp_irqs[i].irqtype == type) && (mp_irqs[i].srcbusirq == irq)) - return mp_irqs[i].dstirq; } return -1; @@ -716,8 +653,7 @@ static int __init find_isa_irq_apic(int irq, int type) for (i = 0; i < mp_irq_entries; i++) { int lbus = mp_irqs[i].srcbus; - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].irqtype == type) && + if (test_bit(lbus, mp_bus_not_pci) && (mp_irqs[i].irqtype == type) && (mp_irqs[i].srcbusirq == irq)) break; } @@ -725,9 +661,10 @@ static int __init find_isa_irq_apic(int irq, int type) if (i < mp_irq_entries) { int ioapic_idx; - for_each_ioapic(ioapic_idx) + for_each_ioapic(ioapic_idx) { if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic) return ioapic_idx; + } } return -1; @@ -768,8 +705,7 @@ static bool EISA_ELCR(unsigned int irq) unsigned int port = PIC_ELCR1 + (irq >> 3); return (inb(port) >> (irq & 7)) & 1; } - apic_printk(APIC_VERBOSE, KERN_INFO - "Broken MPtable reports ISA irq %d\n", irq); + apic_pr_verbose("Broken MPtable reports ISA irq %d\n", irq); return false; } @@ -830,7 +766,7 @@ static int __acpi_get_override_irq(u32 gsi, bool *trigger, bool *polarity) { int ioapic, pin, idx; - if (skip_ioapic_setup) + if (ioapic_is_disabled) return -1; ioapic = mp_find_ioapic(gsi); @@ -946,9 +882,9 @@ static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info) static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, struct irq_alloc_info *info) { + int type = ioapics[ioapic].irqdomain_cfg.type; bool legacy = false; int irq = -1; - int type = ioapics[ioapic].irqdomain_cfg.type; switch (type) { case IOAPIC_DOMAIN_LEGACY: @@ -970,8 +906,7 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, return -1; } - return __irq_domain_alloc_irqs(domain, irq, 1, - ioapic_alloc_attr_node(info), + return __irq_domain_alloc_irqs(domain, irq, 1, ioapic_alloc_attr_node(info), info, legacy, NULL); } @@ -985,29 +920,26 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, * PIRQs instead of reprogramming the interrupt routing logic. Thus there may be * multiple pins sharing the same legacy IRQ number when ACPI is disabled. */ -static int alloc_isa_irq_from_domain(struct irq_domain *domain, - int irq, int ioapic, int pin, +static int alloc_isa_irq_from_domain(struct irq_domain *domain, int irq, int ioapic, int pin, struct irq_alloc_info *info) { - struct mp_chip_data *data; struct irq_data *irq_data = irq_get_irq_data(irq); int node = ioapic_alloc_attr_node(info); + struct mp_chip_data *data; /* * Legacy ISA IRQ has already been allocated, just add pin to * the pin list associated with this IRQ and program the IOAPIC - * entry. The IOAPIC entry + * entry. */ if (irq_data && irq_data->parent_data) { if (!mp_check_pin_attr(irq, info)) return -EBUSY; - if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic, - info->ioapic.pin)) + if (!add_pin_to_irq_node(irq_data->chip_data, node, ioapic, info->ioapic.pin)) return -ENOMEM; } else { info->flags |= X86_IRQ_ALLOC_LEGACY; - irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, - NULL); + irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, NULL); if (irq >= 0) { irq_data = irq_domain_get_irq_data(domain, irq); data = irq_data->chip_data; @@ -1021,11 +953,11 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain, static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, unsigned int flags, struct irq_alloc_info *info) { - int irq; - bool legacy = false; + struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); struct irq_alloc_info tmp; struct mp_chip_data *data; - struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); + bool legacy = false; + int irq; if (!domain) return -ENOSYS; @@ -1045,7 +977,7 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, return -EINVAL; } - mutex_lock(&ioapic_mutex); + guard(mutex)(&ioapic_mutex); if (!(flags & IOAPIC_MAP_ALLOC)) { if (!legacy) { irq = irq_find_mapping(domain, pin); @@ -1066,8 +998,6 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, data->count++; } } - mutex_unlock(&ioapic_mutex); - return irq; } @@ -1075,26 +1005,20 @@ static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags) { u32 gsi = mp_pin_to_gsi(ioapic, pin); - /* - * Debugging check, we are in big trouble if this message pops up! - */ + /* Debugging check, we are in big trouble if this message pops up! */ if (mp_irqs[idx].dstirq != pin) pr_err("broken BIOS or MPTABLE parser, ayiee!!\n"); #ifdef CONFIG_X86_32 - /* - * PCI IRQ command line redirection. Yes, limits are hardcoded. - */ + /* PCI IRQ command line redirection. Yes, limits are hardcoded. */ if ((pin >= 16) && (pin <= 23)) { - if (pirq_entries[pin-16] != -1) { - if (!pirq_entries[pin-16]) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "disabling PIRQ%d\n", pin-16); + if (pirq_entries[pin - 16] != -1) { + if (!pirq_entries[pin - 16]) { + apic_pr_verbose("Disabling PIRQ%d\n", pin - 16); } else { int irq = pirq_entries[pin-16]; - apic_printk(APIC_VERBOSE, KERN_DEBUG - "using PIRQ%d -> IRQ %d\n", - pin-16, irq); + + apic_pr_verbose("Using PIRQ%d -> IRQ %d\n", pin - 16, irq); return irq; } } @@ -1132,10 +1056,9 @@ void mp_unmap_irq(int irq) if (!data || data->isa_irq) return; - mutex_lock(&ioapic_mutex); + guard(mutex)(&ioapic_mutex); if (--data->count == 0) irq_domain_free_irqs(irq, 1); - mutex_unlock(&ioapic_mutex); } /* @@ -1146,12 +1069,10 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) { int irq, i, best_ioapic = -1, best_idx = -1; - apic_printk(APIC_DEBUG, - "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", - bus, slot, pin); + apic_pr_debug("Querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", + bus, slot, pin); if (test_bit(bus, mp_bus_not_pci)) { - apic_printk(APIC_VERBOSE, - "PCI BIOS passed nonexistent PCI bus %d!\n", bus); + apic_pr_verbose("PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } @@ -1196,8 +1117,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) return -1; out: - return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, - IOAPIC_MAP_ALLOC); + return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, IOAPIC_MAP_ALLOC); } EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); @@ -1208,17 +1128,16 @@ static void __init setup_IO_APIC_irqs(void) unsigned int ioapic, pin; int idx; - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + apic_pr_verbose("Init IO_APIC IRQs\n"); for_each_ioapic_pin(ioapic, pin) { idx = find_irq_entry(ioapic, pin, mp_INT); - if (idx < 0) - apic_printk(APIC_VERBOSE, - KERN_DEBUG " apic %d pin %d not connected\n", - mpc_ioapic_id(ioapic), pin); - else - pin_2_irq(idx, ioapic, pin, - ioapic ? 0 : IOAPIC_MAP_ALLOC); + if (idx < 0) { + apic_pr_verbose("apic %d pin %d not connected\n", + mpc_ioapic_id(ioapic), pin); + } else { + pin_2_irq(idx, ioapic, pin, ioapic ? 0 : IOAPIC_MAP_ALLOC); + } } } @@ -1233,26 +1152,21 @@ static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries) char buf[256]; int i; - printk(KERN_DEBUG "IOAPIC %d:\n", apic); + apic_dbg("IOAPIC %d:\n", apic); for (i = 0; i <= nr_entries; i++) { entry = ioapic_read_entry(apic, i); - snprintf(buf, sizeof(buf), - " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)", - i, - entry.masked ? "disabled" : "enabled ", + snprintf(buf, sizeof(buf), " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)", + i, entry.masked ? "disabled" : "enabled ", entry.is_level ? "level" : "edge ", entry.active_low ? "low " : "high", entry.vector, entry.irr, entry.delivery_status); if (entry.ir_format) { - printk(KERN_DEBUG "%s, remapped, I(%04X), Z(%X)\n", - buf, - (entry.ir_index_15 << 15) | entry.ir_index_0_14, - entry.ir_zero); + apic_dbg("%s, remapped, I(%04X), Z(%X)\n", buf, + (entry.ir_index_15 << 15) | entry.ir_index_0_14, entry.ir_zero); } else { - printk(KERN_DEBUG "%s, %s, D(%02X%02X), M(%1d)\n", buf, - entry.dest_mode_logical ? "logical " : "physical", - entry.virt_destid_8_14, entry.destid_0_7, - entry.delivery_mode); + apic_dbg("%s, %s, D(%02X%02X), M(%1d)\n", buf, + entry.dest_mode_logical ? "logical " : "physical", + entry.virt_destid_8_14, entry.destid_0_7, entry.delivery_mode); } } } @@ -1263,30 +1177,25 @@ static void __init print_IO_APIC(int ioapic_idx) union IO_APIC_reg_01 reg_01; union IO_APIC_reg_02 reg_02; union IO_APIC_reg_03 reg_03; - unsigned long flags; - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic_idx, 0); - reg_01.raw = io_apic_read(ioapic_idx, 1); - if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(ioapic_idx, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(ioapic_idx, 3); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); - printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); - printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); - printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); - - printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); - printk(KERN_DEBUG "....... : max redirection entries: %02X\n", - reg_01.bits.entries); - - printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); - printk(KERN_DEBUG "....... : IO APIC version: %02X\n", - reg_01.bits.version); + scoped_guard (raw_spinlock_irqsave, &ioapic_lock) { + reg_00.raw = io_apic_read(ioapic_idx, 0); + reg_01.raw = io_apic_read(ioapic_idx, 1); + if (reg_01.bits.version >= 0x10) + reg_02.raw = io_apic_read(ioapic_idx, 2); + if (reg_01.bits.version >= 0x20) + reg_03.raw = io_apic_read(ioapic_idx, 3); + } + + apic_dbg("IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); + apic_dbg(".... register #00: %08X\n", reg_00.raw); + apic_dbg("....... : physical APIC id: %02X\n", reg_00.bits.ID); + apic_dbg("....... : Delivery Type: %X\n", reg_00.bits.delivery_type); + apic_dbg("....... : LTS : %X\n", reg_00.bits.LTS); + apic_dbg(".... register #01: %08X\n", *(int *)®_01); + apic_dbg("....... : max redirection entries: %02X\n", reg_01.bits.entries); + apic_dbg("....... : PRQ implemented: %X\n", reg_01.bits.PRQ); + apic_dbg("....... : IO APIC version: %02X\n", reg_01.bits.version); /* * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, @@ -1294,8 +1203,8 @@ static void __init print_IO_APIC(int ioapic_idx) * value, so ignore it if reg_02 == reg_01. */ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); - printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); + apic_dbg(".... register #02: %08X\n", reg_02.raw); + apic_dbg("....... : arbitration: %02X\n", reg_02.bits.arbitration); } /* @@ -1305,11 +1214,11 @@ static void __init print_IO_APIC(int ioapic_idx) */ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && reg_03.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); - printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); + apic_dbg(".... register #03: %08X\n", reg_03.raw); + apic_dbg("....... : Boot DT : %X\n", reg_03.bits.boot_DT); } - printk(KERN_DEBUG ".... IRQ redirection table:\n"); + apic_dbg(".... IRQ redirection table:\n"); io_apic_print_entries(ioapic_idx, reg_01.bits.entries); } @@ -1318,11 +1227,11 @@ void __init print_IO_APICs(void) int ioapic_idx; unsigned int irq; - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for_each_ioapic(ioapic_idx) - printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mpc_ioapic_id(ioapic_idx), - ioapics[ioapic_idx].nr_registers); + apic_dbg("number of MP IRQ sources: %d.\n", mp_irq_entries); + for_each_ioapic(ioapic_idx) { + apic_dbg("number of IO-APIC #%d registers: %d.\n", + mpc_ioapic_id(ioapic_idx), ioapics[ioapic_idx].nr_registers); + } /* * We are a bit conservative about what we expect. We have to @@ -1333,7 +1242,7 @@ void __init print_IO_APICs(void) for_each_ioapic(ioapic_idx) print_IO_APIC(ioapic_idx); - printk(KERN_DEBUG "IRQ to pin mappings:\n"); + apic_dbg("IRQ to pin mappings:\n"); for_each_active_irq(irq) { struct irq_pin_list *entry; struct irq_chip *chip; @@ -1348,7 +1257,7 @@ void __init print_IO_APICs(void) if (list_empty(&data->irq_2_pin)) continue; - printk(KERN_DEBUG "IRQ%d ", irq); + apic_dbg("IRQ%d ", irq); for_each_irq_pin(entry, data->irq_2_pin) pr_cont("-> %d:%d", entry->apic, entry->pin); pr_cont("\n"); @@ -1362,10 +1271,9 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; void __init enable_IO_APIC(void) { - int i8259_apic, i8259_pin; - int apic, pin; + int i8259_apic, i8259_pin, apic, pin; - if (skip_ioapic_setup) + if (ioapic_is_disabled) nr_ioapics = 0; if (!nr_legacy_irqs() || !nr_ioapics) @@ -1375,19 +1283,21 @@ void __init enable_IO_APIC(void) /* See if any of the pins is in ExtINT mode */ struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin); - /* If the interrupt line is enabled and in ExtInt mode - * I have found the pin where the i8259 is connected. + /* + * If the interrupt line is enabled and in ExtInt mode I + * have found the pin where the i8259 is connected. */ - if (!entry.masked && - entry.delivery_mode == APIC_DELIVERY_MODE_EXTINT) { + if (!entry.masked && entry.delivery_mode == APIC_DELIVERY_MODE_EXTINT) { ioapic_i8259.apic = apic; ioapic_i8259.pin = pin; - goto found_i8259; + break; } } - found_i8259: - /* Look to see what if the MP table has reported the ExtINT */ - /* If we could not find the appropriate pin by looking at the ioapic + + /* + * Look to see what if the MP table has reported the ExtINT + * + * If we could not find the appropriate pin by looking at the ioapic * the i8259 probably is not connected the ioapic but give the * mptable a chance anyway. */ @@ -1395,29 +1305,24 @@ void __init enable_IO_APIC(void) i8259_apic = find_isa_irq_apic(0, mp_ExtINT); /* Trust the MP table if nothing is setup in the hardware */ if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { - printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); + pr_warn("ExtINT not setup in hardware but reported by MP table\n"); ioapic_i8259.pin = i8259_pin; ioapic_i8259.apic = i8259_apic; } /* Complain if the MP table and the hardware disagree */ if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && - (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) - { - printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); - } + (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) + pr_warn("ExtINT in hardware and MP table differ\n"); - /* - * Do not trust the IO-APIC being empty at bootup - */ + /* Do not trust the IO-APIC being empty at bootup */ clear_IO_APIC(); } void native_restore_boot_irq_mode(void) { /* - * If the i8259 is routed through an IOAPIC - * Put that IOAPIC in virtual wire mode - * so legacy interrupts can be delivered. + * If the i8259 is routed through an IOAPIC Put that IOAPIC in + * virtual wire mode so legacy interrupts can be delivered. */ if (ioapic_i8259.pin != -1) { struct IO_APIC_route_entry entry; @@ -1432,9 +1337,7 @@ void native_restore_boot_irq_mode(void) entry.destid_0_7 = apic_id & 0xFF; entry.virt_destid_8_14 = apic_id >> 8; - /* - * Add it to the IO-APIC irq-routing table: - */ + /* Add it to the IO-APIC irq-routing table */ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } @@ -1457,37 +1360,34 @@ void restore_boot_irq_mode(void) * * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 */ -void __init setup_ioapic_ids_from_mpc_nocheck(void) +static void __init setup_ioapic_ids_from_mpc_nocheck(void) { + DECLARE_BITMAP(phys_id_present_map, MAX_LOCAL_APIC); + const u32 broadcast_id = 0xF; union IO_APIC_reg_00 reg_00; - physid_mask_t phys_id_present_map; - int ioapic_idx; - int i; unsigned char old_id; - unsigned long flags; + int ioapic_idx, i; /* * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); + copy_phys_cpu_present_map(phys_id_present_map); /* * Set the IOAPIC ID to the value stored in the MPC table. */ for_each_ioapic(ioapic_idx) { /* Read the register 0 value */ - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic_idx, 0); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); + scoped_guard (raw_spinlock_irqsave, &ioapic_lock) + reg_00.raw = io_apic_read(ioapic_idx, 0); old_id = mpc_ioapic_id(ioapic_idx); - if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - ioapic_idx, mpc_ioapic_id(ioapic_idx)); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - reg_00.bits.ID); + if (mpc_ioapic_id(ioapic_idx) >= broadcast_id) { + pr_err(FW_BUG "IO-APIC#%d ID is %d in the MPC table!...\n", + ioapic_idx, mpc_ioapic_id(ioapic_idx)); + pr_err("... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID; } @@ -1496,65 +1396,54 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(&phys_id_present_map, - mpc_ioapic_id(ioapic_idx))) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - ioapic_idx, mpc_ioapic_id(ioapic_idx)); - for (i = 0; i < get_physical_broadcast(); i++) - if (!physid_isset(i, phys_id_present_map)) + if (test_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map)) { + pr_err(FW_BUG "IO-APIC#%d ID %d is already used!...\n", + ioapic_idx, mpc_ioapic_id(ioapic_idx)); + for (i = 0; i < broadcast_id; i++) + if (!test_bit(i, phys_id_present_map)) break; - if (i >= get_physical_broadcast()) + if (i >= broadcast_id) panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); - physid_set(i, phys_id_present_map); + pr_err("... fixing up to %d. (tell your hw vendor)\n", i); + set_bit(i, phys_id_present_map); ioapics[ioapic_idx].mp_config.apicid = i; } else { - physid_mask_t tmp; - apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx), - &tmp); - apic_printk(APIC_VERBOSE, "Setting %d in the " - "phys_id_present_map\n", + apic_pr_verbose("Setting %d in the phys_id_present_map\n", mpc_ioapic_id(ioapic_idx)); - physids_or(phys_id_present_map, phys_id_present_map, tmp); + set_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map); } /* - * We need to adjust the IRQ routing table - * if the ID changed. + * We need to adjust the IRQ routing table if the ID + * changed. */ - if (old_id != mpc_ioapic_id(ioapic_idx)) - for (i = 0; i < mp_irq_entries; i++) + if (old_id != mpc_ioapic_id(ioapic_idx)) { + for (i = 0; i < mp_irq_entries; i++) { if (mp_irqs[i].dstapic == old_id) - mp_irqs[i].dstapic - = mpc_ioapic_id(ioapic_idx); + mp_irqs[i].dstapic = mpc_ioapic_id(ioapic_idx); + } + } /* - * Update the ID register according to the right value - * from the MPC table if they are different. + * Update the ID register according to the right value from + * the MPC table if they are different. */ if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID) continue; - apic_printk(APIC_VERBOSE, KERN_INFO - "...changing IO-APIC physical APIC ID to %d ...", - mpc_ioapic_id(ioapic_idx)); + apic_pr_verbose("...changing IO-APIC physical APIC ID to %d ...", + mpc_ioapic_id(ioapic_idx)); reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); - raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic_idx, 0, reg_00.raw); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - /* - * Sanity check - */ - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic_idx, 0); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); + scoped_guard (raw_spinlock_irqsave, &ioapic_lock) { + io_apic_write(ioapic_idx, 0, reg_00.raw); + reg_00.raw = io_apic_read(ioapic_idx, 0); + } + /* Sanity check */ if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) pr_cont("could not set ID!\n"); else - apic_printk(APIC_VERBOSE, " ok.\n"); + apic_pr_verbose(" ok.\n"); } } @@ -1597,10 +1486,9 @@ static void __init delay_with_tsc(void) * 1 GHz == 40 jiffies */ do { - rep_nop(); + native_pause(); now = rdtsc(); - } while ((now - start) < 40000000000ULL / HZ && - time_before_eq(jiffies, end)); + } while ((now - start) < 40000000000ULL / HZ && time_before_eq(jiffies, end)); } static void __init delay_without_tsc(void) @@ -1661,36 +1549,29 @@ static int __init timer_irq_works(void) * so we 'resend' these IRQs via IPIs, to the same CPU. It's much * better to do it this way as thus we do not have to be aware of * 'pending' interrupts in the IRQ path, except at this point. - */ -/* - * Edge triggered needs to resend any interrupt - * that was delayed but this is now handled in the device - * independent code. - */ - -/* - * Starting up a edge-triggered IO-APIC interrupt is - * nasty - we need to make sure that we get the edge. - * If it is already asserted for some reason, we need - * return 1 to indicate that is was pending. * - * This is not complete - we should be able to fake - * an edge even if it isn't on the 8259A... + * + * Edge triggered needs to resend any interrupt that was delayed but this + * is now handled in the device independent code. + * + * Starting up a edge-triggered IO-APIC interrupt is nasty - we need to + * make sure that we get the edge. If it is already asserted for some + * reason, we need return 1 to indicate that is was pending. + * + * This is not complete - we should be able to fake an edge even if it + * isn't on the 8259A... */ static unsigned int startup_ioapic_irq(struct irq_data *data) { int was_pending = 0, irq = data->irq; - unsigned long flags; - raw_spin_lock_irqsave(&ioapic_lock, flags); + guard(raw_spinlock_irqsave)(&ioapic_lock); if (irq < nr_legacy_irqs()) { legacy_pic->mask(irq); if (legacy_pic->irq_pending(irq)) was_pending = 1; } __unmask_ioapic(data->chip_data); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return was_pending; } @@ -1700,9 +1581,8 @@ atomic_t irq_mis_count; static bool io_apic_level_ack_pending(struct mp_chip_data *data) { struct irq_pin_list *entry; - unsigned long flags; - raw_spin_lock_irqsave(&ioapic_lock, flags); + guard(raw_spinlock_irqsave)(&ioapic_lock); for_each_irq_pin(entry, data->irq_2_pin) { struct IO_APIC_route_entry e; int pin; @@ -1710,13 +1590,9 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) pin = entry->pin; e.w1 = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ - if (e.irr) { - raw_spin_unlock_irqrestore(&ioapic_lock, flags); + if (e.irr) return true; - } } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return false; } @@ -1734,7 +1610,8 @@ static inline bool ioapic_prepare_move(struct irq_data *data) static inline void ioapic_finish_move(struct irq_data *data, bool moveit) { if (unlikely(moveit)) { - /* Only migrate the irq if the ack has been received. + /* + * Only migrate the irq if the ack has been received. * * On rare occasions the broadcast level triggered ack gets * delayed going to ioapics, and if we reprogram the @@ -1826,7 +1703,7 @@ static void ioapic_ack_level(struct irq_data *irq_data) * We must acknowledge the irq before we move it or the acknowledge will * not propagate properly. */ - ack_APIC_irq(); + apic_eoi(); /* * Tail end of clearing remote IRR bit (either by delivering the EOI @@ -1917,18 +1794,16 @@ static void ioapic_configure_entry(struct irq_data *irqd) __ioapic_write_entry(entry->apic, entry->pin, mpd->entry); } -static int ioapic_set_affinity(struct irq_data *irq_data, - const struct cpumask *mask, bool force) +static int ioapic_set_affinity(struct irq_data *irq_data, const struct cpumask *mask, bool force) { struct irq_data *parent = irq_data->parent_data; - unsigned long flags; int ret; ret = parent->chip->irq_set_affinity(parent, mask, force); - raw_spin_lock_irqsave(&ioapic_lock, flags); + + guard(raw_spinlock_irqsave)(&ioapic_lock); if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) ioapic_configure_entry(irq_data); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); return ret; } @@ -1947,9 +1822,8 @@ static int ioapic_set_affinity(struct irq_data *irq_data, * * Verify that the corresponding Remote-IRR bits are clear. */ -static int ioapic_irq_get_chip_state(struct irq_data *irqd, - enum irqchip_irq_state which, - bool *state) +static int ioapic_irq_get_chip_state(struct irq_data *irqd, enum irqchip_irq_state which, + bool *state) { struct mp_chip_data *mcd = irqd->chip_data; struct IO_APIC_route_entry rentry; @@ -1959,7 +1833,8 @@ static int ioapic_irq_get_chip_state(struct irq_data *irqd, return -EINVAL; *state = false; - raw_spin_lock(&ioapic_lock); + + guard(raw_spinlock)(&ioapic_lock); for_each_irq_pin(p, mcd->irq_2_pin) { rentry = __ioapic_read_entry(p->apic, p->pin); /* @@ -1973,7 +1848,6 @@ static int ioapic_irq_get_chip_state(struct irq_data *irqd, break; } } - raw_spin_unlock(&ioapic_lock); return 0; } @@ -1987,7 +1861,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, - .flags = IRQCHIP_SKIP_SET_WAKE | + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED | IRQCHIP_AFFINITY_PRE_STARTUP, }; @@ -2014,14 +1888,13 @@ static inline void init_IO_APIC_traps(void) cfg = irq_cfg(irq); if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { /* - * Hmm.. We don't have an entry for this, - * so default to an old-fashioned 8259 - * interrupt if we can.. + * Hmm.. We don't have an entry for this, so + * default to an old-fashioned 8259 interrupt if we + * can. Otherwise set the dummy interrupt chip. */ if (irq < nr_legacy_irqs()) legacy_pic->make_irq(irq); else - /* Strange. Oh, well.. */ irq_set_chip(irq, &no_irq_chip); } } @@ -2030,26 +1903,23 @@ static inline void init_IO_APIC_traps(void) /* * The local APIC irq-chip implementation: */ - static void mask_lapic_irq(struct irq_data *data) { - unsigned long v; + unsigned long v = apic_read(APIC_LVT0); - v = apic_read(APIC_LVT0); apic_write(APIC_LVT0, v | APIC_LVT_MASKED); } static void unmask_lapic_irq(struct irq_data *data) { - unsigned long v; + unsigned long v = apic_read(APIC_LVT0); - v = apic_read(APIC_LVT0); apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } static void ack_lapic_irq(struct irq_data *data) { - ack_APIC_irq(); + apic_eoi(); } static struct irq_chip lapic_chip __read_mostly = { @@ -2062,8 +1932,7 @@ static struct irq_chip lapic_chip __read_mostly = { static void lapic_register_intr(int irq) { irq_clear_status_flags(irq, IRQ_LEVEL); - irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, - "edge"); + irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); } /* @@ -2075,9 +1944,9 @@ static void lapic_register_intr(int irq) */ static inline void __init unlock_ExtINT_logic(void) { - int apic, pin, i; - struct IO_APIC_route_entry entry0, entry1; unsigned char save_control, save_freq_select; + struct IO_APIC_route_entry entry0, entry1; + int apic, pin, i; u32 apic_id; pin = find_isa_irq_pin(8, mp_INT); @@ -2094,7 +1963,7 @@ static inline void __init unlock_ExtINT_logic(void) entry0 = ioapic_read_entry(apic, pin); clear_IO_APIC_pin(apic, pin); - apic_id = hard_smp_processor_id(); + apic_id = read_apic_id(); memset(&entry1, 0, sizeof(entry1)); entry1.dest_mode_logical = true; @@ -2137,10 +2006,10 @@ static int __init disable_timer_pin_setup(char *arg) } early_param("disable_timer_pin_1", disable_timer_pin_setup); -static int mp_alloc_timer_irq(int ioapic, int pin) +static int __init mp_alloc_timer_irq(int ioapic, int pin) { - int irq = -1; struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); + int irq = -1; if (domain) { struct irq_alloc_info info; @@ -2148,21 +2017,36 @@ static int mp_alloc_timer_irq(int ioapic, int pin) ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0); info.devid = mpc_ioapic_id(ioapic); info.ioapic.pin = pin; - mutex_lock(&ioapic_mutex); + guard(mutex)(&ioapic_mutex); irq = alloc_isa_irq_from_domain(domain, 0, ioapic, pin, &info); - mutex_unlock(&ioapic_mutex); } return irq; } +static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node, + int oldapic, int oldpin, + int newapic, int newpin) +{ + struct irq_pin_list *entry; + + for_each_irq_pin(entry, data->irq_2_pin) { + if (entry->apic == oldapic && entry->pin == oldpin) { + entry->apic = newapic; + entry->pin = newpin; + return; + } + } + + /* Old apic/pin didn't exist, so just add a new one */ + add_pin_to_irq_node(data, node, newapic, newpin); +} + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. - * - * FIXME: really need to revamp this for all platforms. */ static inline void __init check_timer(void) { @@ -2200,9 +2084,8 @@ static inline void __init check_timer(void) pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; - apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " - "apic1=%d pin1=%d apic2=%d pin2=%d\n", - cfg->vector, apic1, pin1, apic2, pin2); + pr_info("..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", + cfg->vector, apic1, pin1, apic2, pin2); /* * Some BIOS writers are clueless and report the ExtINTA @@ -2212,7 +2095,7 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { - panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC"); + panic_if_irq_remap(FW_BUG "Timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2246,13 +2129,10 @@ static inline void __init check_timer(void) panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC"); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) - apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " - "8254 timer not connected to IO-APIC\n"); + pr_err("..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); - apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " - "(IRQ0) through the 8259A ...\n"); - apic_printk(APIC_QUIET, KERN_INFO - "..... (found apic %d pin %d) ...\n", apic2, pin2); + pr_info("...trying to set up timer (IRQ0) through the 8259A ...\n"); + pr_info("..... (found apic %d pin %d) ...\n", apic2, pin2); /* * legacy devices should be connected to IO APIC #0 */ @@ -2261,7 +2141,7 @@ static inline void __init check_timer(void) irq_domain_activate_irq(irq_data, false); legacy_pic->unmask(0); if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); + pr_info("....... works.\n"); goto out; } /* @@ -2269,26 +2149,24 @@ static inline void __init check_timer(void) */ legacy_pic->mask(0); clear_IO_APIC_pin(apic2, pin2); - apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); + pr_info("....... failed.\n"); } - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as Virtual Wire IRQ...\n"); + pr_info("...trying to set up timer as Virtual Wire IRQ...\n"); lapic_register_intr(0); apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ legacy_pic->unmask(0); if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); + pr_info("..... works.\n"); goto out; } legacy_pic->mask(0); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); - apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); + pr_info("..... failed.\n"); - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as ExtINT IRQ...\n"); + pr_info("...trying to set up timer as ExtINT IRQ...\n"); legacy_pic->init(0); legacy_pic->make_irq(0); @@ -2298,14 +2176,15 @@ static inline void __init check_timer(void) unlock_ExtINT_logic(); if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); + pr_info("..... works.\n"); goto out; } - apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); - if (apic_is_x2apic_enabled()) - apic_printk(APIC_QUIET, KERN_INFO - "Perhaps problem with the pre-enabled x2apic mode\n" - "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); + + pr_info("..... failed :\n"); + if (apic_is_x2apic_enabled()) { + pr_info("Perhaps problem with the pre-enabled x2apic mode\n" + "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); + } panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " "report. Then try booting with the 'noapic' option.\n"); out: @@ -2333,11 +2212,11 @@ out: static int mp_irqdomain_create(int ioapic) { - struct irq_domain *parent; + struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); int hwirqs = mp_ioapic_pin_count(ioapic); struct ioapic *ip = &ioapics[ioapic]; struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg; - struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); + struct irq_domain *parent; struct fwnode_handle *fn; struct irq_fwspec fwspec; @@ -2346,7 +2225,7 @@ static int mp_irqdomain_create(int ioapic) /* Handle device tree enumerated APICs proper */ if (cfg->dev) { - fn = of_node_to_fwnode(cfg->dev); + fn = of_fwnode_handle(cfg->dev); } else { fn = irq_domain_alloc_named_id_fwnode("IO-APIC", mpc_ioapic_id(ioapic)); if (!fn) @@ -2357,16 +2236,15 @@ static int mp_irqdomain_create(int ioapic) fwspec.param_count = 1; fwspec.param[0] = mpc_ioapic_id(ioapic); - parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_ANY); + parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_GENERIC_MSI); if (!parent) { if (!cfg->dev) irq_domain_free_fwnode(fn); return -ENODEV; } - ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops, - (void *)(long)ioapic); - + ip->irqdomain = irq_domain_create_hierarchy(parent, 0, hwirqs, fn, cfg->ops, + (void *)(long)ioapic); if (!ip->irqdomain) { /* Release fw handle if it was allocated above */ if (!cfg->dev) @@ -2374,12 +2252,8 @@ static int mp_irqdomain_create(int ioapic) return -ENOMEM; } - ip->irqdomain->parent = parent; - - if (cfg->type == IOAPIC_DOMAIN_LEGACY || - cfg->type == IOAPIC_DOMAIN_STRICT) - ioapic_dynirq_base = max(ioapic_dynirq_base, - gsi_cfg->gsi_end + 1); + if (cfg->type == IOAPIC_DOMAIN_LEGACY || cfg->type == IOAPIC_DOMAIN_STRICT) + ioapic_dynirq_base = max(ioapic_dynirq_base, gsi_cfg->gsi_end + 1); return 0; } @@ -2401,18 +2275,16 @@ void __init setup_IO_APIC(void) { int ioapic; - if (skip_ioapic_setup || !nr_ioapics) + if (ioapic_is_disabled || !nr_ioapics) return; io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; - apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); + apic_pr_verbose("ENABLING IO-APIC IRQs\n"); for_each_ioapic(ioapic) BUG_ON(mp_irqdomain_create(ioapic)); - /* - * Set up IO-APIC IRQ routing. - */ + /* Set up IO-APIC IRQ routing. */ x86_init.mpparse.setup_ioapic_ids(); sync_Arb_IDs(); @@ -2426,19 +2298,22 @@ void __init setup_IO_APIC(void) static void resume_ioapic_id(int ioapic_idx) { - unsigned long flags; union IO_APIC_reg_00 reg_00; - raw_spin_lock_irqsave(&ioapic_lock, flags); + guard(raw_spinlock_irqsave)(&ioapic_lock); reg_00.raw = io_apic_read(ioapic_idx, 0); if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) { reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); io_apic_write(ioapic_idx, 0, reg_00.raw); } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); } -static void ioapic_resume(void) +static int ioapic_suspend(void *data) +{ + return save_ioapic_entries(); +} + +static void ioapic_resume(void *data) { int ioapic_idx; @@ -2448,14 +2323,18 @@ static void ioapic_resume(void) restore_ioapic_entries(); } -static struct syscore_ops ioapic_syscore_ops = { - .suspend = save_ioapic_entries, - .resume = ioapic_resume, +static const struct syscore_ops ioapic_syscore_ops = { + .suspend = ioapic_suspend, + .resume = ioapic_resume, +}; + +static struct syscore ioapic_syscore = { + .ops = &ioapic_syscore_ops, }; static int __init ioapic_init_ops(void) { - register_syscore_ops(&ioapic_syscore_ops); + register_syscore(&ioapic_syscore); return 0; } @@ -2465,124 +2344,104 @@ device_initcall(ioapic_init_ops); static int io_apic_get_redir_entries(int ioapic) { union IO_APIC_reg_01 reg_01; - unsigned long flags; - raw_spin_lock_irqsave(&ioapic_lock, flags); + guard(raw_spinlock_irqsave)(&ioapic_lock); reg_01.raw = io_apic_read(ioapic, 1); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - /* The register returns the maximum index redir index - * supported, which is one less than the total number of redir - * entries. + /* + * The register returns the maximum index redir index supported, + * which is one less than the total number of redir entries. */ return reg_01.bits.entries + 1; } unsigned int arch_dynirq_lower_bound(unsigned int from) { + unsigned int ret; + /* * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use * gsi_top if ioapic_dynirq_base hasn't been initialized yet. */ - if (!ioapic_initialized) - return gsi_top; + ret = ioapic_dynirq_base ? : gsi_top; + /* - * For DT enabled machines ioapic_dynirq_base is irrelevant and not - * updated. So simply return @from if ioapic_dynirq_base == 0. + * For DT enabled machines ioapic_dynirq_base is irrelevant and + * always 0. gsi_top can be 0 if there is no IO/APIC registered. + * 0 is an invalid interrupt number for dynamic allocations. Return + * @from instead. */ - return ioapic_dynirq_base ? : from; + return ret ? : from; } #ifdef CONFIG_X86_32 static int io_apic_get_unique_id(int ioapic, int apic_id) { + static DECLARE_BITMAP(apic_id_map, MAX_LOCAL_APIC); + const u32 broadcast_id = 0xF; union IO_APIC_reg_00 reg_00; - static physid_mask_t apic_id_map = PHYSID_MASK_NONE; - physid_mask_t tmp; - unsigned long flags; int i = 0; - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ + /* Initialize the ID map */ + if (bitmap_empty(apic_id_map, MAX_LOCAL_APIC)) + copy_phys_cpu_present_map(apic_id_map); - if (physids_empty(apic_id_map)) - apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); - - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic, 0); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); + scoped_guard (raw_spinlock_irqsave, &ioapic_lock) + reg_00.raw = io_apic_read(ioapic, 0); - if (apic_id >= get_physical_broadcast()) { - printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); + if (apic_id >= broadcast_id) { + pr_warn("IOAPIC[%d]: Invalid apic_id %d, trying %d\n", + ioapic, apic_id, reg_00.bits.ID); apic_id = reg_00.bits.ID; } - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (apic->check_apicid_used(&apic_id_map, apic_id)) { - - for (i = 0; i < get_physical_broadcast(); i++) { - if (!apic->check_apicid_used(&apic_id_map, i)) + /* Every APIC in a system must have a unique ID */ + if (test_bit(apic_id, apic_id_map)) { + for (i = 0; i < broadcast_id; i++) { + if (!test_bit(i, apic_id_map)) break; } - if (i == get_physical_broadcast()) + if (i == broadcast_id) panic("Max apic_id exceeded!\n"); - printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - + pr_warn("IOAPIC[%d]: apic_id %d already used, trying %d\n", ioapic, apic_id, i); apic_id = i; } - apic->apicid_to_cpu_present(apic_id, &tmp); - physids_or(apic_id_map, apic_id_map, tmp); + set_bit(apic_id, apic_id_map); if (reg_00.bits.ID != apic_id) { reg_00.bits.ID = apic_id; - raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0, reg_00.raw); - reg_00.raw = io_apic_read(ioapic, 0); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); + scoped_guard (raw_spinlock_irqsave, &ioapic_lock) { + io_apic_write(ioapic, 0, reg_00.raw); + reg_00.raw = io_apic_read(ioapic, 0); + } /* Sanity check */ if (reg_00.bits.ID != apic_id) { - pr_err("IOAPIC[%d]: Unable to change apic_id!\n", - ioapic); + pr_err("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); return -1; } } - apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); + apic_pr_verbose("IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); return apic_id; } static u8 io_apic_unique_id(int idx, u8 id) { - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(boot_cpu_apic_version)) + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && !APIC_XAPIC(boot_cpu_apic_version)) return io_apic_get_unique_id(idx, id); - else - return id; + return id; } #else static u8 io_apic_unique_id(int idx, u8 id) { union IO_APIC_reg_00 reg_00; DECLARE_BITMAP(used, 256); - unsigned long flags; u8 new_id; int i; @@ -2598,26 +2457,23 @@ static u8 io_apic_unique_id(int idx, u8 id) * Read the current id from the ioapic and keep it if * available. */ - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(idx, 0); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); + scoped_guard (raw_spinlock_irqsave, &ioapic_lock) + reg_00.raw = io_apic_read(idx, 0); + new_id = reg_00.bits.ID; if (!test_bit(new_id, used)) { - apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Using reg apic_id %d instead of %d\n", - idx, new_id, id); + apic_pr_verbose("IOAPIC[%d]: Using reg apic_id %d instead of %d\n", + idx, new_id, id); return new_id; } - /* - * Get the next free id and write it to the ioapic. - */ + /* Get the next free id and write it to the ioapic. */ new_id = find_first_zero_bit(used, 256); reg_00.bits.ID = new_id; - raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(idx, 0, reg_00.raw); - reg_00.raw = io_apic_read(idx, 0); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); + scoped_guard (raw_spinlock_irqsave, &ioapic_lock) { + io_apic_write(idx, 0, reg_00.raw); + reg_00.raw = io_apic_read(idx, 0); + } /* Sanity check */ BUG_ON(reg_00.bits.ID != new_id); @@ -2627,12 +2483,10 @@ static u8 io_apic_unique_id(int idx, u8 id) static int io_apic_get_version(int ioapic) { - union IO_APIC_reg_01 reg_01; - unsigned long flags; + union IO_APIC_reg_01 reg_01; - raw_spin_lock_irqsave(&ioapic_lock, flags); + guard(raw_spinlock_irqsave)(&ioapic_lock); reg_01.raw = io_apic_read(ioapic, 1); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.version; } @@ -2647,8 +2501,8 @@ static struct resource *ioapic_resources; static struct resource * __init ioapic_setup_resources(void) { - unsigned long n; struct resource *res; + unsigned long n; char *mem; int i; @@ -2658,9 +2512,7 @@ static struct resource * __init ioapic_setup_resources(void) n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); n *= nr_ioapics; - mem = memblock_alloc(n, SMP_CACHE_BYTES); - if (!mem) - panic("%s: Failed to allocate %lu bytes\n", __func__, n); + mem = memblock_alloc_or_panic(n, SMP_CACHE_BYTES); res = (void *)mem; mem += sizeof(struct resource) * nr_ioapics; @@ -2683,10 +2535,15 @@ static void io_apic_set_fixmap(enum fixed_addresses idx, phys_addr_t phys) pgprot_t flags = FIXMAP_PAGE_NOCACHE; /* - * Ensure fixmaps for IOAPIC MMIO respect memory encryption pgprot + * Ensure fixmaps for IO-APIC MMIO respect memory encryption pgprot * bits, just like normal ioremap(): */ - flags = pgprot_decrypted(flags); + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { + if (x86_platform.hyper.is_private_mmio(phys)) + flags = pgprot_encrypted(flags); + else + flags = pgprot_decrypted(flags); + } __set_fixmap(idx, phys, flags); } @@ -2703,12 +2560,10 @@ void __init io_apic_init_mappings(void) ioapic_phys = mpc_ioapic_addr(i); #ifdef CONFIG_X86_32 if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " + pr_err("WARNING: bogus zero IO-APIC address found in MPTABLE, " "disabling IO/APIC support!\n"); smp_found_config = 0; - skip_ioapic_setup = 1; + ioapic_is_disabled = true; goto fake_ioapic_page; } #endif @@ -2716,17 +2571,13 @@ void __init io_apic_init_mappings(void) #ifdef CONFIG_X86_32 fake_ioapic_page: #endif - ioapic_phys = (unsigned long)memblock_alloc(PAGE_SIZE, + ioapic_phys = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); - if (!ioapic_phys) - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } io_apic_set_fixmap(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), - ioapic_phys); + apic_pr_verbose("mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), ioapic_phys); idx++; ioapic_res->start = ioapic_phys; @@ -2737,13 +2588,12 @@ fake_ioapic_page: void __init ioapic_insert_resources(void) { - int i; struct resource *r = ioapic_resources; + int i; if (!r) { if (nr_ioapics > 0) - printk(KERN_ERR - "IO APIC resources couldn't be allocated.\n"); + pr_err("IO APIC resources couldn't be allocated.\n"); return; } @@ -2763,11 +2613,12 @@ int mp_find_ioapic(u32 gsi) /* Find the IOAPIC that manages this GSI. */ for_each_ioapic(i) { struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i); + if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end) return i; } - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); + pr_err("ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); return -1; } @@ -2806,12 +2657,10 @@ static int bad_ioapic_register(int idx) static int find_free_ioapic_entry(void) { - int idx; - - for (idx = 0; idx < MAX_IO_APICS; idx++) + for (int idx = 0; idx < MAX_IO_APICS; idx++) { if (ioapics[idx].nr_registers == 0) return idx; - + } return MAX_IO_APICS; } @@ -2822,8 +2671,7 @@ static int find_free_ioapic_entry(void) * @gsi_base: base of GSI associated with the IOAPIC * @cfg: configuration information for the IOAPIC */ -int mp_register_ioapic(int id, u32 address, u32 gsi_base, - struct ioapic_domain_cfg *cfg) +int mp_register_ioapic(int id, u32 address, u32 gsi_base, struct ioapic_domain_cfg *cfg) { bool hotplug = !!ioapic_initialized; struct mp_ioapic_gsi *gsi_cfg; @@ -2834,12 +2682,13 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base, pr_warn("Bogus (zero) I/O APIC address found, skipping!\n"); return -EINVAL; } - for_each_ioapic(ioapic) + + for_each_ioapic(ioapic) { if (ioapics[ioapic].mp_config.apicaddr == address) { - pr_warn("address 0x%x conflicts with IOAPIC%d\n", - address, ioapic); + pr_warn("address 0x%x conflicts with IOAPIC%d\n", address, ioapic); return -EEXIST; } + } idx = find_free_ioapic_entry(); if (idx >= MAX_IO_APICS) { @@ -2874,8 +2723,7 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base, (gsi_end >= gsi_cfg->gsi_base && gsi_end <= gsi_cfg->gsi_end)) { pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n", - gsi_base, gsi_end, - gsi_cfg->gsi_base, gsi_cfg->gsi_end); + gsi_base, gsi_end, gsi_cfg->gsi_base, gsi_cfg->gsi_end); clear_fixmap(FIX_IO_APIC_BASE_0 + idx); return -ENOSPC; } @@ -2909,8 +2757,7 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base, ioapics[idx].nr_registers = entries; pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n", - idx, mpc_ioapic_id(idx), - mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), + idx, mpc_ioapic_id(idx), mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), gsi_cfg->gsi_base, gsi_cfg->gsi_end); return 0; @@ -2921,11 +2768,13 @@ int mp_unregister_ioapic(u32 gsi_base) int ioapic, pin; int found = 0; - for_each_ioapic(ioapic) + for_each_ioapic(ioapic) { if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) { found = 1; break; } + } + if (!found) { pr_warn("can't find IOAPIC for GSI %d\n", gsi_base); return -ENODEV; @@ -2939,8 +2788,7 @@ int mp_unregister_ioapic(u32 gsi_base) if (irq >= 0) { data = irq_get_chip_data(irq); if (data && data->count) { - pr_warn("pin%d on IOAPIC%d is still in use.\n", - pin, ioapic); + pr_warn("pin%d on IOAPIC%d is still in use.\n", pin, ioapic); return -EBUSY; } } @@ -2975,8 +2823,7 @@ static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data, if (info && info->ioapic.valid) { data->is_level = info->ioapic.is_level; data->active_low = info->ioapic.active_low; - } else if (__acpi_get_override_irq(gsi, &data->is_level, - &data->active_low) < 0) { + } else if (__acpi_get_override_irq(gsi, &data->is_level, &data->active_low) < 0) { /* PCI interrupts are always active low level triggered. */ data->is_level = true; data->active_low = true; @@ -3026,7 +2873,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, ioapic = mp_irqdomain_ioapic_idx(domain); pin = info->ioapic.pin; - if (irq_find_mapping(domain, (irq_hw_number_t)pin) > 0) + if (irq_resolve_mapping(domain, (irq_hw_number_t)pin)) return -EEXIST; data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -3034,10 +2881,8 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, return -ENOMEM; ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info); - if (ret < 0) { - kfree(data); - return ret; - } + if (ret < 0) + goto free_data; INIT_LIST_HEAD(&data->irq_2_pin); irq_data->hwirq = info->ioapic.pin; @@ -3046,7 +2891,10 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, irq_data->chip_data = data; mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info); - add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin); + if (!add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin)) { + ret = -ENOMEM; + goto free_irqs; + } mp_preconfigure_entry(data); mp_register_handler(virq, data->is_level); @@ -3056,11 +2904,15 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, legacy_pic->mask(virq); local_irq_restore(flags); - apic_printk(APIC_VERBOSE, KERN_DEBUG - "IOAPIC[%d]: Preconfigured routing entry (%d-%d -> IRQ %d Level:%i ActiveLow:%i)\n", - ioapic, mpc_ioapic_id(ioapic), pin, virq, - data->is_level, data->active_low); + apic_pr_verbose("IOAPIC[%d]: Preconfigured routing entry (%d-%d -> IRQ %d Level:%i ActiveLow:%i)\n", + ioapic, mpc_ioapic_id(ioapic), pin, virq, data->is_level, data->active_low); return 0; + +free_irqs: + irq_domain_free_irqs_parent(domain, virq, nr_irqs); +free_data: + kfree(data); + return ret; } void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, @@ -3073,22 +2925,17 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, irq_data = irq_domain_get_irq_data(domain, virq); if (irq_data && irq_data->chip_data) { data = irq_data->chip_data; - __remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain), - (int)irq_data->hwirq); + __remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain), (int)irq_data->hwirq); WARN_ON(!list_empty(&data->irq_2_pin)); kfree(irq_data->chip_data); } irq_domain_free_irqs_top(domain, virq, nr_irqs); } -int mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool reserve) +int mp_irqdomain_activate(struct irq_domain *domain, struct irq_data *irq_data, bool reserve) { - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); + guard(raw_spinlock_irqsave)(&ioapic_lock); ioapic_configure_entry(irq_data); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); return 0; } @@ -3096,8 +2943,7 @@ void mp_irqdomain_deactivate(struct irq_domain *domain, struct irq_data *irq_data) { /* It won't be called for IRQ with multiple IOAPIC pins associated */ - ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain), - (int)irq_data->hwirq); + ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain), (int)irq_data->hwirq); } int mp_irqdomain_ioapic_idx(struct irq_domain *domain) diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 2a6509e8c840..98a57cb4aa86 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -1,7 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/cpumask.h> +#include <linux/delay.h> #include <linux/smp.h> +#include <linux/string_choices.h> + #include <asm/io_apic.h> #include "local.h" @@ -21,7 +24,7 @@ __setup("no_ipi_broadcast=", apic_ipi_shorthand); static int __init print_ipi_mode(void) { pr_info("IPI shorthand broadcast: %s\n", - apic_ipi_shorthand_off ? "disabled" : "enabled"); + str_disabled_enabled(apic_ipi_shorthand_off)); return 0; } late_initcall(print_ipi_mode); @@ -52,9 +55,9 @@ void apic_send_IPI_allbutself(unsigned int vector) return; if (static_branch_likely(&apic_use_ipi_shorthand)) - apic->send_IPI_allbutself(vector); + __apic_send_IPI_allbutself(vector); else - apic->send_IPI_mask_allbutself(cpu_online_mask, vector); + __apic_send_IPI_mask_allbutself(cpu_online_mask, vector); } /* @@ -68,12 +71,12 @@ void native_smp_send_reschedule(int cpu) WARN(1, "sched: Unexpected reschedule of offline CPU#%d!\n", cpu); return; } - apic->send_IPI(cpu, RESCHEDULE_VECTOR); + __apic_send_IPI(cpu, RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - apic->send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR); + __apic_send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR); } void native_send_call_func_ipi(const struct cpumask *mask) @@ -85,16 +88,24 @@ void native_send_call_func_ipi(const struct cpumask *mask) goto sendmask; if (cpumask_test_cpu(cpu, mask)) - apic->send_IPI_all(CALL_FUNCTION_VECTOR); + __apic_send_IPI_all(CALL_FUNCTION_VECTOR); else if (num_online_cpus() > 1) - apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); + __apic_send_IPI_allbutself(CALL_FUNCTION_VECTOR); return; } sendmask: - apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); + __apic_send_IPI_mask(mask, CALL_FUNCTION_VECTOR); } +void apic_send_nmi_to_offline_cpu(unsigned int cpu) +{ + if (WARN_ON_ONCE(!apic->nmi_to_offline_cpu)) + return; + if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, &cpus_booted_once_mask))) + return; + apic->send_IPI(cpu, NMI_VECTOR); +} #endif /* CONFIG_SMP */ static inline int __prepare_ICR2(unsigned int mask) @@ -102,74 +113,77 @@ static inline int __prepare_ICR2(unsigned int mask) return SET_XAPIC_DEST_FIELD(mask); } -static inline void __xapic_wait_icr_idle(void) +u32 apic_mem_wait_icr_idle_timeout(void) +{ + int cnt; + + for (cnt = 0; cnt < 1000; cnt++) { + if (!(apic_read(APIC_ICR) & APIC_ICR_BUSY)) + return 0; + inc_irq_stat(icr_read_retry_count); + udelay(100); + } + return APIC_ICR_BUSY; +} + +void apic_mem_wait_icr_idle(void) { while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -void __default_send_IPI_shortcut(unsigned int shortcut, int vector) +/* + * This is safe against interruption because it only writes the lower 32 + * bits of the APIC_ICR register. The destination field is ignored for + * short hand IPIs. + * + * wait_icr_idle() + * write(ICR2, dest) + * NMI + * wait_icr_idle() + * write(ICR) + * wait_icr_idle() + * write(ICR) + * + * This function does not need to disable interrupts as there is no ICR2 + * interaction. The memory write is direct except when the machine is + * affected by the 11AP Pentium erratum, which turns the plain write into + * an XCHG operation. + */ +static void __default_send_IPI_shortcut(unsigned int shortcut, int vector) { /* - * Subtle. In the case of the 'never do double writes' workaround - * we have to lock out interrupts to be safe. As we don't care - * of the value read we use an atomic rmw access to avoid costly - * cli/sti. Otherwise we use an even cheaper single atomic write - * to the APIC. - */ - unsigned int cfg; - - /* - * Wait for idle. + * Wait for the previous ICR command to complete. Use + * safe_apic_wait_icr_idle() for the NMI vector as there have been + * issues where otherwise the system hangs when the panic CPU tries + * to stop the others before launching the kdump kernel. */ if (unlikely(vector == NMI_VECTOR)) - safe_apic_wait_icr_idle(); + apic_mem_wait_icr_idle_timeout(); else - __xapic_wait_icr_idle(); - - /* - * No need to touch the target chip field. Also the destination - * mode is ignored when a shorthand is used. - */ - cfg = __prepare_ICR(shortcut, vector, 0); + apic_mem_wait_icr_idle(); - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - native_apic_mem_write(APIC_ICR, cfg); + /* Destination field (ICR2) and the destination mode are ignored */ + native_apic_mem_write(APIC_ICR, __prepare_ICR(shortcut, vector, 0)); } /* * This is used to send an IPI with no shorthand notation (the destination is * specified in bits 56 to 63 of the ICR). */ -void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) +void __default_send_IPI_dest_field(unsigned int dest_mask, int vector, + unsigned int dest_mode) { - unsigned long cfg; - - /* - * Wait for idle. - */ + /* See comment in __default_send_IPI_shortcut() */ if (unlikely(vector == NMI_VECTOR)) - safe_apic_wait_icr_idle(); + apic_mem_wait_icr_idle_timeout(); else - __xapic_wait_icr_idle(); + apic_mem_wait_icr_idle(); - /* - * prepare target chip field - */ - cfg = __prepare_ICR2(mask); - native_apic_mem_write(APIC_ICR2, cfg); - - /* - * program the ICR - */ - cfg = __prepare_ICR(0, vector, dest); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - native_apic_mem_write(APIC_ICR, cfg); + /* Set the IPI destination field in the ICR */ + native_apic_mem_write(APIC_ICR2, __prepare_ICR2(dest_mask)); + /* Send it with the proper destination mode */ + native_apic_mem_write(APIC_ICR, __prepare_ICR(0, vector, dest_mode)); } void default_send_IPI_single_phys(int cpu, int vector) @@ -184,18 +198,13 @@ void default_send_IPI_single_phys(int cpu, int vector) void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) { - unsigned long query_cpu; unsigned long flags; + unsigned long cpu; - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicast to each CPU instead. - * - mbligh - */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) { + for_each_cpu(cpu, mask) { __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, - query_cpu), vector, APIC_DEST_PHYSICAL); + cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } @@ -203,18 +212,15 @@ void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector) { - unsigned int this_cpu = smp_processor_id(); - unsigned int query_cpu; + unsigned int cpu, this_cpu = smp_processor_id(); unsigned long flags; - /* See Hack comment above */ - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu == this_cpu) + for_each_cpu(cpu, mask) { + if (cpu == this_cpu) continue; __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, - query_cpu), vector, APIC_DEST_PHYSICAL); + cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } @@ -224,7 +230,7 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, */ void default_send_IPI_single(int cpu, int vector) { - apic->send_IPI_mask(cpumask_of(cpu), vector); + __apic_send_IPI_mask(cpumask_of(cpu), vector); } void default_send_IPI_allbutself(int vector) @@ -243,50 +249,32 @@ void default_send_IPI_self(int vector) } #ifdef CONFIG_X86_32 - -void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, - int vector) +void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector) { unsigned long flags; - unsigned int query_cpu; - - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicasts to each CPU instead. This - * should be modified to do 1 message per cluster ID - mbligh - */ + unsigned int cpu; local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __default_send_IPI_dest_field( - early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + for_each_cpu(cpu, mask) + __default_send_IPI_dest_field(1U << cpu, vector, APIC_DEST_LOGICAL); local_irq_restore(flags); } void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector) { + unsigned int cpu, this_cpu = smp_processor_id(); unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu == this_cpu) + for_each_cpu(cpu, mask) { + if (cpu == this_cpu) continue; - __default_send_IPI_dest_field( - early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); - } + __default_send_IPI_dest_field(1U << cpu, vector, APIC_DEST_LOGICAL); + } local_irq_restore(flags); } -/* - * This is only used on smaller machines. - */ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) { unsigned long mask = cpumask_bits(cpumask)[0]; @@ -300,32 +288,4 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) __default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); local_irq_restore(flags); } - -/* must come after the send_IPI functions above for inlining */ -static int convert_apicid_to_cpu(int apic_id) -{ - int i; - - for_each_possible_cpu(i) { - if (per_cpu(x86_cpu_to_apicid, i) == apic_id) - return i; - } - return -1; -} - -int safe_smp_processor_id(void) -{ - int apicid, cpuid; - - if (!boot_cpu_has(X86_FEATURE_APIC)) - return 0; - - apicid = hard_smp_processor_id(); - if (apicid == BAD_APICID) - return 0; - - cpuid = convert_apicid_to_cpu(apicid); - - return cpuid >= 0 ? cpuid : 0; -} #endif diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index a997d849509a..bdcf609eb283 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -13,18 +13,14 @@ #include <asm/irq_vectors.h> #include <asm/apic.h> -/* APIC flat 64 */ -void flat_init_apic_ldr(void); - /* X2APIC */ -int x2apic_apic_id_valid(u32 apicid); -int x2apic_apic_id_registered(void); void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); -unsigned int x2apic_get_apic_id(unsigned long id); -u32 x2apic_set_apic_id(unsigned int id); -int x2apic_phys_pkg_id(int initial_apicid, int index_msb); +u32 x2apic_get_apic_id(u32 id); + +void x2apic_send_IPI_all(int vector); +void x2apic_send_IPI_allbutself(int vector); void x2apic_send_IPI_self(int vector); -void __x2apic_send_IPI_shorthand(int vector, u32 which); +extern u32 x2apic_max_apicid; /* IPI */ @@ -46,7 +42,10 @@ static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector, return icr; } -void __default_send_IPI_shortcut(unsigned int shortcut, int vector); +void default_init_apic_ldr(void); + +void apic_mem_wait_icr_idle(void); +u32 apic_mem_wait_icr_idle_timeout(void); /* * This is used to send an IPI with no shorthand notation (the destination is diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 35d5b8fb18ef..66bc5d3e79db 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -55,14 +55,14 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) * caused by the non-atomic update of the address/data pair. * * Direct update is possible when: - * - The MSI is maskable (remapped MSI does not use this code path)). - * The quirk bit is not set in this case. + * - The MSI is maskable (remapped MSI does not use this code path). + * The reservation mode bit is set in this case. * - The new vector is the same as the old vector * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) * - The interrupt is not yet started up * - The new destination CPU is the same as the old destination CPU */ - if (!irqd_msi_nomask_quirk(irqd) || + if (!irqd_can_reserve(irqd) || cfg->vector == old_cfg.vector || old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || !irqd_is_started(irqd) || @@ -184,7 +184,6 @@ static int x86_msi_prepare(struct irq_domain *domain, struct device *dev, alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; return 0; case DOMAIN_BUS_PCI_DEVICE_MSIX: - case DOMAIN_BUS_PCI_DEVICE_IMS: alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; return 0; default: @@ -215,8 +214,7 @@ static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain, if (WARN_ON_ONCE(domain != real_parent)) return false; info->chip->irq_set_affinity = msi_set_affinity; - /* See msi_set_affinity() for the gory details */ - info->flags |= MSI_FLAG_NOMASK_QUIRK; + info->chip->flags |= IRQCHIP_MOVE_DEFERRED; break; case DOMAIN_BUS_DMAR: case DOMAIN_BUS_AMDVI: @@ -231,10 +229,6 @@ static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain, case DOMAIN_BUS_PCI_DEVICE_MSI: case DOMAIN_BUS_PCI_DEVICE_MSIX: break; - case DOMAIN_BUS_PCI_DEVICE_IMS: - if (!(pops->supported_flags & MSI_FLAG_PCI_IMS)) - return false; - break; default: WARN_ON_ONCE(1); return false; @@ -269,7 +263,7 @@ static const struct msi_parent_ops x86_vector_msi_parent_ops = { struct irq_domain * __init native_create_pci_msi_domain(void) { - if (disable_apic) + if (apic_is_disabled) return NULL; x86_vector_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; @@ -322,7 +316,7 @@ static struct irq_chip dmar_msi_controller = { .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = dmar_msi_compose_msg, .irq_write_msi_msg = dmar_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE | + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED | IRQCHIP_AFFINITY_PRE_STARTUP, }; diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index a61f642b1b90..87bc9e7ca5d6 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -10,49 +10,22 @@ #include <linux/errno.h> #include <linux/smp.h> +#include <xen/xen.h> + #include <asm/io_apic.h> #include <asm/apic.h> #include <asm/acpi.h> #include "local.h" -static int default_x86_32_early_logical_apicid(int cpu) +static u32 default_get_apic_id(u32 x) { - return 1 << cpu; -} - -static void setup_apic_flat_routing(void) -{ -#ifdef CONFIG_X86_IO_APIC - printk(KERN_INFO - "Enabling APIC mode: Flat. Using %d I/O APICs\n", - nr_ioapics); -#endif -} + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); -static int default_apic_id_registered(void) -{ - return physid_isset(read_apic_id(), phys_cpu_present_map); -} - -/* - * Set up the logical destination ID. Intel recommends to set DFR, LDR and - * TPR before enabling an APIC. See e.g. "AP-388 82489DX User's Manual" - * (Intel document number 292116). - */ -static void default_init_apic_ldr(void) -{ - unsigned long val; - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); - apic_write(APIC_LDR, val); -} - -static int default_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; + if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID)) + return (x >> 24) & 0xFF; + else + return (x >> 24) & 0x0F; } /* should be called last. */ @@ -65,26 +38,16 @@ static struct apic apic_default __ro_after_init = { .name = "default", .probe = probe_default, - .acpi_madt_oem_check = NULL, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = default_apic_id_registered, - .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = true, .disable_esr = 0, - .check_apicid_used = default_check_apicid_used, .init_apic_ldr = default_init_apic_ldr, - .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = setup_apic_flat_routing, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = physid_set_mask_of_physid, - .check_phys_apicid_present = default_check_phys_apicid_present, - .phys_pkg_id = default_phys_pkg_id, + .max_apic_id = 0xFE, .get_apic_id = default_get_apic_id, - .set_apic_id = NULL, .calc_dest_apicid = apic_flat_calc_apicid, @@ -95,17 +58,13 @@ static struct apic apic_default __ro_after_init = { .send_IPI_all = default_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .inquire_remote_apic = default_inquire_remote_apic, - .read = native_apic_mem_read, .write = native_apic_mem_write, - .eoi_write = native_apic_mem_write, + .eoi = native_apic_mem_eoi, .icr_read = native_apic_icr_read, .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, - - .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid, + .wait_icr_idle = apic_mem_wait_icr_idle, + .safe_wait_icr_idle = apic_mem_wait_icr_idle_timeout, }; apic_driver(apic_default); @@ -123,7 +82,7 @@ static int __init parse_apic(char *arg) for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { if (!strcmp((*drv)->name, arg)) { - apic = *drv; + apic_install_driver(*drv); cmdline_apic = 1; return 0; } @@ -134,49 +93,14 @@ static int __init parse_apic(char *arg) } early_param("apic", parse_apic); -void __init default_setup_apic_routing(void) -{ - int version = boot_cpu_apic_version; - - if (num_possible_cpus() > 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* P4 and above */ - fallthrough; - case X86_VENDOR_HYGON: - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } - -#ifdef CONFIG_X86_BIGSMP - /* - * This is used to switch to bigsmp mode when - * - There is no apic= option specified by the user - * - generic_apic_probe() has chosen apic_default as the sub_arch - * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support - */ - - if (!cmdline_apic && apic == &apic_default) - generic_bigsmp_probe(); -#endif - - if (apic->setup_apic_routing) - apic->setup_apic_routing(); -} - -void __init generic_apic_probe(void) +void __init x86_32_probe_apic(void) { if (!cmdline_apic) { struct apic **drv; for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { if ((*drv)->probe()) { - apic = *drv; + apic_install_driver(*drv); break; } } @@ -184,26 +108,4 @@ void __init generic_apic_probe(void) if (drv == __apicdrivers_end) panic("Didn't find an APIC driver"); } - printk(KERN_INFO "Using APIC driver %s\n", apic->name); -} - -/* This function can switch the APIC even after the initial ->probe() */ -int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - struct apic **drv; - - for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { - if (!(*drv)->acpi_madt_oem_check) - continue; - if (!(*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) - continue; - - if (!cmdline_apic) { - apic = *drv; - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - apic->name); - } - return 1; - } - return 0; } diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index c46720f185c0..ecdf0c4121e1 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -13,10 +13,8 @@ #include "local.h" -/* - * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. - */ -void __init default_setup_apic_routing(void) +/* Select the appropriate APIC driver */ +void __init x86_64_probe_apic(void) { struct apic **drv; @@ -24,11 +22,7 @@ void __init default_setup_apic_routing(void) for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { if ((*drv)->probe && (*drv)->probe()) { - if (apic != *drv) { - apic = *drv; - pr_info("Switched APIC routing to %s.\n", - apic->name); - } + apic_install_driver(*drv); break; } } @@ -40,11 +34,7 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { if ((*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) { - if (apic != *drv) { - apic = *drv; - pr_info("Setting APIC routing to %s.\n", - apic->name); - } + apic_install_driver(*drv); return 1; } } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index c1efebd27e6c..bddc54465399 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -44,7 +44,18 @@ static cpumask_var_t vector_searchmask; static struct irq_chip lapic_controller; static struct irq_matrix *vector_matrix; #ifdef CONFIG_SMP -static DEFINE_PER_CPU(struct hlist_head, cleanup_list); + +static void vector_cleanup_callback(struct timer_list *tmr); + +struct vector_cleanup { + struct hlist_head head; + struct timer_list timer; +}; + +static DEFINE_PER_CPU(struct vector_cleanup, vector_cleanup) = { + .head = HLIST_HEAD_INIT, + .timer = __TIMER_INITIALIZER(vector_cleanup_callback, TIMER_PINNED), +}; #endif void lock_vector_lock(void) @@ -123,13 +134,20 @@ static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector, apicd->hw_irq_cfg.vector = vector; apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu); + + apic_update_vector(cpu, vector, true); + irq_data_update_effective_affinity(irqd, cpumask_of(cpu)); - trace_vector_config(irqd->irq, vector, cpu, - apicd->hw_irq_cfg.dest_apicid); + trace_vector_config(irqd->irq, vector, cpu, apicd->hw_irq_cfg.dest_apicid); } -static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, - unsigned int newcpu) +static void apic_free_vector(unsigned int cpu, unsigned int vector, bool managed) +{ + apic_update_vector(cpu, vector, false); + irq_matrix_free(vector_matrix, cpu, vector, managed); +} + +static void chip_data_update(struct irq_data *irqd, unsigned int newvec, unsigned int newcpu) { struct apic_chip_data *apicd = apic_chip_data(irqd); struct irq_desc *desc = irq_data_to_desc(irqd); @@ -163,8 +181,7 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, apicd->prev_cpu = apicd->cpu; WARN_ON_ONCE(apicd->cpu == newcpu); } else { - irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, - managed); + apic_free_vector(apicd->cpu, apicd->vector, managed); } setnew: @@ -172,6 +189,7 @@ setnew: apicd->cpu = newcpu; BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); per_cpu(vector_irq, newcpu)[newvec] = desc; + apic_update_irq_cfg(irqd, newvec, newcpu); } static void vector_assign_managed_shutdown(struct irq_data *irqd) @@ -249,8 +267,7 @@ assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest) trace_vector_alloc(irqd->irq, vector, resvd, vector); if (vector < 0) return vector; - apic_update_vector(irqd, vector, cpu); - apic_update_irq_cfg(irqd, vector, cpu); + chip_data_update(irqd, vector, cpu); return 0; } @@ -326,8 +343,8 @@ assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest) trace_vector_alloc_managed(irqd->irq, vector, vector); if (vector < 0) return vector; - apic_update_vector(irqd, vector, cpu); - apic_update_irq_cfg(irqd, vector, cpu); + chip_data_update(irqd, vector, cpu); + return 0; } @@ -346,7 +363,7 @@ static void clear_irq_vector(struct irq_data *irqd) apicd->prev_cpu); per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN; - irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); + apic_free_vector(apicd->cpu, vector, managed); apicd->vector = 0; /* Clean up move in progress */ @@ -355,7 +372,7 @@ static void clear_irq_vector(struct irq_data *irqd) return; per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN; - irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); + apic_free_vector(apicd->prev_cpu, vector, managed); apicd->prev_vector = 0; apicd->move_in_progress = 0; hlist_del_init(&apicd->clist); @@ -536,7 +553,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, struct irq_data *irqd; int i, err, node; - if (disable_apic) + if (apic_is_disabled) return -ENXIO; /* @@ -680,7 +697,7 @@ static int x86_vector_select(struct irq_domain *d, struct irq_fwspec *fwspec, * if IRQ remapping is enabled. APIC IDs above 15 bits are * only permitted if IRQ remapping is enabled, so check that. */ - if (apic->apic_id_valid(32768)) + if (apic_id_valid(32768)) return 0; return x86_fwspec_is_ioapic(fwspec) || x86_fwspec_is_hpet(fwspec); @@ -701,8 +718,8 @@ int __init arch_probe_nr_irqs(void) { int nr; - if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) - nr_irqs = NR_VECTORS * nr_cpu_ids; + if (irq_get_nr_irqs() > NR_VECTORS * nr_cpu_ids) + irq_set_nr_irqs(NR_VECTORS * nr_cpu_ids); nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; #if defined(CONFIG_PCI_MSI) @@ -714,8 +731,8 @@ int __init arch_probe_nr_irqs(void) else nr += gsi_top * 16; #endif - if (nr < nr_irqs) - nr_irqs = nr; + if (nr < irq_get_nr_irqs()) + irq_set_nr_irqs(nr); /* * We don't know if PIC is present at this point so we need to do @@ -727,8 +744,8 @@ int __init arch_probe_nr_irqs(void) void lapic_assign_legacy_vector(unsigned int irq, bool replace) { /* - * Use assign system here so it wont get accounted as allocated - * and moveable in the cpu hotplug check and it prevents managed + * Use assign system here so it won't get accounted as allocated + * and movable in the cpu hotplug check and it prevents managed * irq reservation from touching it. */ irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); @@ -788,7 +805,7 @@ int __init arch_early_irq_init(void) x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops, NULL); BUG_ON(x86_vector_domain == NULL); - irq_set_default_host(x86_vector_domain); + irq_set_default_domain(x86_vector_domain); BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); @@ -841,10 +858,21 @@ void lapic_online(void) this_cpu_write(vector_irq[vector], __setup_vector_irq(vector)); } +static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr); + void lapic_offline(void) { + struct vector_cleanup *cl = this_cpu_ptr(&vector_cleanup); + lock_vector_lock(); + + /* In case the vector cleanup timer has not expired */ + __vector_cleanup(cl, false); + irq_matrix_offline(vector_matrix); + WARN_ON_ONCE(timer_delete_sync_try(&cl->timer) < 0); + WARN_ON_ONCE(!hlist_empty(&cl->head)); + unlock_vector_lock(); } @@ -866,50 +894,6 @@ static int apic_set_affinity(struct irq_data *irqd, return err ? err : IRQ_SET_MASK_OK; } -#else -# define apic_set_affinity NULL -#endif - -static int apic_retrigger_irq(struct irq_data *irqd) -{ - struct apic_chip_data *apicd = apic_chip_data(irqd); - unsigned long flags; - - raw_spin_lock_irqsave(&vector_lock, flags); - apic->send_IPI(apicd->cpu, apicd->vector); - raw_spin_unlock_irqrestore(&vector_lock, flags); - - return 1; -} - -void apic_ack_irq(struct irq_data *irqd) -{ - irq_move_irq(irqd); - ack_APIC_irq(); -} - -void apic_ack_edge(struct irq_data *irqd) -{ - irq_complete_move(irqd_cfg(irqd)); - apic_ack_irq(irqd); -} - -static void x86_vector_msi_compose_msg(struct irq_data *data, - struct msi_msg *msg) -{ - __irq_msi_compose_msg(irqd_cfg(data), msg, false); -} - -static struct irq_chip lapic_controller = { - .name = "APIC", - .irq_ack = apic_ack_edge, - .irq_set_affinity = apic_set_affinity, - .irq_compose_msi_msg = x86_vector_msi_compose_msg, - .irq_retrigger = apic_retrigger_irq, -}; - -#ifdef CONFIG_SMP - static void free_moved_vector(struct apic_chip_data *apicd) { unsigned int vector = apicd->prev_vector; @@ -927,123 +911,34 @@ static void free_moved_vector(struct apic_chip_data *apicd) * affinity mask comes online. */ trace_vector_free_moved(apicd->irq, cpu, vector, managed); - irq_matrix_free(vector_matrix, cpu, vector, managed); + apic_free_vector(cpu, vector, managed); per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; hlist_del_init(&apicd->clist); apicd->prev_vector = 0; apicd->move_in_progress = 0; } -DEFINE_IDTENTRY_SYSVEC(sysvec_irq_move_cleanup) -{ - struct hlist_head *clhead = this_cpu_ptr(&cleanup_list); - struct apic_chip_data *apicd; - struct hlist_node *tmp; - - ack_APIC_irq(); - /* Prevent vectors vanishing under us */ - raw_spin_lock(&vector_lock); - - hlist_for_each_entry_safe(apicd, tmp, clhead, clist) { - unsigned int irr, vector = apicd->prev_vector; - - /* - * Paranoia: Check if the vector that needs to be cleaned - * up is registered at the APICs IRR. If so, then this is - * not the best time to clean it up. Clean it up in the - * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR - * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest - * priority external vector, so on return from this - * interrupt the device interrupt will happen first. - */ - irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - if (irr & (1U << (vector % 32))) { - apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); - continue; - } - free_moved_vector(apicd); - } - - raw_spin_unlock(&vector_lock); -} - -static void __send_cleanup_vector(struct apic_chip_data *apicd) -{ - unsigned int cpu; - - raw_spin_lock(&vector_lock); - apicd->move_in_progress = 0; - cpu = apicd->prev_cpu; - if (cpu_online(cpu)) { - hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu)); - apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR); - } else { - apicd->prev_vector = 0; - } - raw_spin_unlock(&vector_lock); -} - -void send_cleanup_vector(struct irq_cfg *cfg) -{ - struct apic_chip_data *apicd; - - apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); - if (apicd->move_in_progress) - __send_cleanup_vector(apicd); -} - -void irq_complete_move(struct irq_cfg *cfg) -{ - struct apic_chip_data *apicd; - - apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); - if (likely(!apicd->move_in_progress)) - return; - - /* - * If the interrupt arrived on the new target CPU, cleanup the - * vector on the old target CPU. A vector check is not required - * because an interrupt can never move from one vector to another - * on the same CPU. - */ - if (apicd->cpu == smp_processor_id()) - __send_cleanup_vector(apicd); -} - /* * Called from fixup_irqs() with @desc->lock held and interrupts disabled. */ -void irq_force_complete_move(struct irq_desc *desc) +static void apic_force_complete_move(struct irq_data *irqd) { + unsigned int cpu = smp_processor_id(); struct apic_chip_data *apicd; - struct irq_data *irqd; unsigned int vector; - /* - * The function is called for all descriptors regardless of which - * irqdomain they belong to. For example if an IRQ is provided by - * an irq_chip as part of a GPIO driver, the chip data for that - * descriptor is specific to the irq_chip in question. - * - * Check first that the chip_data is what we expect - * (apic_chip_data) before touching it any further. - */ - irqd = irq_domain_get_irq_data(x86_vector_domain, - irq_desc_get_irq(desc)); - if (!irqd) - return; - - raw_spin_lock(&vector_lock); + guard(raw_spinlock)(&vector_lock); apicd = apic_chip_data(irqd); if (!apicd) - goto unlock; + return; /* - * If prev_vector is empty, no action required. + * If prev_vector is empty or the descriptor is neither currently + * nor previously on the outgoing CPU no action required. */ vector = apicd->prev_vector; - if (!vector) - goto unlock; + if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) + return; /* * This is tricky. If the cleanup of the old vector has not been @@ -1097,10 +992,166 @@ void irq_force_complete_move(struct irq_desc *desc) irqd->irq, vector); } free_moved_vector(apicd); -unlock: +} + +#else +# define apic_set_affinity NULL +# define apic_force_complete_move NULL +#endif + +static int apic_retrigger_irq(struct irq_data *irqd) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + unsigned long flags; + + raw_spin_lock_irqsave(&vector_lock, flags); + __apic_send_IPI(apicd->cpu, apicd->vector); + raw_spin_unlock_irqrestore(&vector_lock, flags); + + return 1; +} + +void apic_ack_irq(struct irq_data *irqd) +{ + irq_move_irq(irqd); + apic_eoi(); +} + +void apic_ack_edge(struct irq_data *irqd) +{ + irq_complete_move(irqd_cfg(irqd)); + apic_ack_irq(irqd); +} + +static void x86_vector_msi_compose_msg(struct irq_data *data, + struct msi_msg *msg) +{ + __irq_msi_compose_msg(irqd_cfg(data), msg, false); +} + +static struct irq_chip lapic_controller = { + .name = "APIC", + .irq_ack = apic_ack_edge, + .irq_set_affinity = apic_set_affinity, + .irq_compose_msi_msg = x86_vector_msi_compose_msg, + .irq_force_complete_move = apic_force_complete_move, + .irq_retrigger = apic_retrigger_irq, +}; + +#ifdef CONFIG_SMP + +static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr) +{ + struct apic_chip_data *apicd; + struct hlist_node *tmp; + bool rearm = false; + + lockdep_assert_held(&vector_lock); + + hlist_for_each_entry_safe(apicd, tmp, &cl->head, clist) { + unsigned int vector = apicd->prev_vector; + + /* + * Paranoia: Check if the vector that needs to be cleaned + * up is registered at the APICs IRR. That's clearly a + * hardware issue if the vector arrived on the old target + * _after_ interrupts were disabled above. Keep @apicd + * on the list and schedule the timer again to give the CPU + * a chance to handle the pending interrupt. + * + * Do not check IRR when called from lapic_offline(), because + * fixup_irqs() was just called to scan IRR for set bits and + * forward them to new destination CPUs via IPIs. + */ + if (check_irr && is_vector_pending(vector)) { + pr_warn_once("Moved interrupt pending in old target APIC %u\n", apicd->irq); + rearm = true; + continue; + } + free_moved_vector(apicd); + } + + /* + * Must happen under vector_lock to make the timer_pending() check + * in __vector_schedule_cleanup() race free against the rearm here. + */ + if (rearm) + mod_timer(&cl->timer, jiffies + 1); +} + +static void vector_cleanup_callback(struct timer_list *tmr) +{ + struct vector_cleanup *cl = container_of(tmr, typeof(*cl), timer); + + /* Prevent vectors vanishing under us */ + raw_spin_lock_irq(&vector_lock); + __vector_cleanup(cl, true); + raw_spin_unlock_irq(&vector_lock); +} + +static void __vector_schedule_cleanup(struct apic_chip_data *apicd) +{ + unsigned int cpu = apicd->prev_cpu; + + raw_spin_lock(&vector_lock); + apicd->move_in_progress = 0; + if (cpu_online(cpu)) { + struct vector_cleanup *cl = per_cpu_ptr(&vector_cleanup, cpu); + + hlist_add_head(&apicd->clist, &cl->head); + + /* + * The lockless timer_pending() check is safe here. If it + * returns true, then the callback will observe this new + * apic data in the hlist as everything is serialized by + * vector lock. + * + * If it returns false then the timer is either not armed + * or the other CPU executes the callback, which again + * would be blocked on vector lock. Rearming it in the + * latter case makes it fire for nothing. + * + * This is also safe against the callback rearming the timer + * because that's serialized via vector lock too. + */ + if (!timer_pending(&cl->timer)) { + cl->timer.expires = jiffies + 1; + add_timer_on(&cl->timer, cpu); + } + } else { + pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu); + free_moved_vector(apicd); + } raw_spin_unlock(&vector_lock); } +void vector_schedule_cleanup(struct irq_cfg *cfg) +{ + struct apic_chip_data *apicd; + + apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); + if (apicd->move_in_progress) + __vector_schedule_cleanup(apicd); +} + +void irq_complete_move(struct irq_cfg *cfg) +{ + struct apic_chip_data *apicd; + + apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); + if (likely(!apicd->move_in_progress)) + return; + + /* + * If the interrupt arrived on the new target CPU, cleanup the + * vector on the old target CPU. A vector check is not required + * because an interrupt can never move from one vector to another + * on the same CPU. + */ + if (apicd->cpu == smp_processor_id()) + __vector_schedule_cleanup(apicd); +} + #ifdef CONFIG_HOTPLUG_CPU /* * Note, this is not accurate accounting, but at least good enough to @@ -1150,7 +1201,7 @@ static void __init print_local_APIC(void *dummy) u64 icr; pr_debug("printing local APIC contents on CPU#%d/%d:\n", - smp_processor_id(), hard_smp_processor_id()); + smp_processor_id(), read_apic_id()); v = apic_read(APIC_ID); pr_info("... APIC ID: %08x (%01x)\n", v, read_apic_id()); v = apic_read(APIC_LVR); diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index e696e22d0531..7db83212effb 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -9,11 +9,7 @@ #include "local.h" -struct cluster_mask { - unsigned int clusterid; - int node; - struct cpumask mask; -}; +#define apic_cluster(apicid) ((apicid) >> 4) /* * __x2apic_send_IPI_mask() possibly needs to read @@ -23,8 +19,7 @@ struct cluster_mask { static u32 *x86_cpu_to_logical_apicid __read_mostly; static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); -static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks); -static struct cluster_mask *cluster_hotplug_mask; +static DEFINE_PER_CPU_READ_MOSTLY(struct cpumask *, cluster_masks); static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { @@ -60,10 +55,10 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) /* Collapse cpus in a cluster so a single IPI per cluster is sent */ for_each_cpu(cpu, tmpmsk) { - struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu); + struct cpumask *cmsk = per_cpu(cluster_masks, cpu); dest = 0; - for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask) + for_each_cpu_and(clustercpu, tmpmsk, cmsk) dest |= x86_cpu_to_logical_apicid[clustercpu]; if (!dest) @@ -71,7 +66,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); /* Remove cluster CPUs from tmpmask */ - cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask); + cpumask_andnot(tmpmsk, tmpmsk, cmsk); } local_irq_restore(flags); @@ -88,84 +83,120 @@ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } -static void x2apic_send_IPI_allbutself(int vector) +static u32 x2apic_calc_apicid(unsigned int cpu) { - __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT); + return x86_cpu_to_logical_apicid[cpu]; } -static void x2apic_send_IPI_all(int vector) +static void init_x2apic_ldr(void) { - __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC); -} + struct cpumask *cmsk = this_cpu_read(cluster_masks); -static u32 x2apic_calc_apicid(unsigned int cpu) -{ - return x86_cpu_to_logical_apicid[cpu]; + BUG_ON(!cmsk); + + cpumask_set_cpu(smp_processor_id(), cmsk); } -static void init_x2apic_ldr(void) +/* + * As an optimisation during boot, set the cluster_mask for all present + * CPUs at once, to prevent each of them having to iterate over the others + * to find the existing cluster_mask. + */ +static void prefill_clustermask(struct cpumask *cmsk, unsigned int cpu, u32 cluster) { - struct cluster_mask *cmsk = this_cpu_read(cluster_masks); - u32 cluster, apicid = apic_read(APIC_LDR); - unsigned int cpu; + int cpu_i; - x86_cpu_to_logical_apicid[smp_processor_id()] = apicid; + for_each_present_cpu(cpu_i) { + struct cpumask **cpu_cmsk = &per_cpu(cluster_masks, cpu_i); + u32 apicid = apic->cpu_present_to_apicid(cpu_i); - if (cmsk) - goto update; - - cluster = apicid >> 16; - for_each_online_cpu(cpu) { - cmsk = per_cpu(cluster_masks, cpu); - /* Matching cluster found. Link and update it. */ - if (cmsk && cmsk->clusterid == cluster) - goto update; + if (apicid == BAD_APICID || cpu_i == cpu || apic_cluster(apicid) != cluster) + continue; + + if (WARN_ON_ONCE(*cpu_cmsk == cmsk)) + continue; + + BUG_ON(*cpu_cmsk); + *cpu_cmsk = cmsk; } - cmsk = cluster_hotplug_mask; - cmsk->clusterid = cluster; - cluster_hotplug_mask = NULL; -update: - this_cpu_write(cluster_masks, cmsk); - cpumask_set_cpu(smp_processor_id(), &cmsk->mask); } -static int alloc_clustermask(unsigned int cpu, int node) +static int alloc_clustermask(unsigned int cpu, u32 cluster, int node) { + struct cpumask *cmsk = NULL; + unsigned int cpu_i; + + /* + * At boot time, the CPU present mask is stable. The cluster mask is + * allocated for the first CPU in the cluster and propagated to all + * present siblings in the cluster. If the cluster mask is already set + * on entry to this function for a given CPU, there is nothing to do. + */ if (per_cpu(cluster_masks, cpu)) return 0; + + if (system_state < SYSTEM_RUNNING) + goto alloc; + /* - * If a hotplug spare mask exists, check whether it's on the right - * node. If not, free it and allocate a new one. + * On post boot hotplug for a CPU which was not present at boot time, + * iterate over all possible CPUs (even those which are not present + * any more) to find any existing cluster mask. */ - if (cluster_hotplug_mask) { - if (cluster_hotplug_mask->node == node) - return 0; - kfree(cluster_hotplug_mask); + for_each_possible_cpu(cpu_i) { + u32 apicid = apic->cpu_present_to_apicid(cpu_i); + + if (apicid != BAD_APICID && apic_cluster(apicid) == cluster) { + cmsk = per_cpu(cluster_masks, cpu_i); + /* + * If the cluster is already initialized, just store + * the mask and return. There's no need to propagate. + */ + if (cmsk) { + per_cpu(cluster_masks, cpu) = cmsk; + return 0; + } + } } - - cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask), - GFP_KERNEL, node); - if (!cluster_hotplug_mask) + /* + * No CPU in the cluster has ever been initialized, so fall through to + * the boot time code which will also populate the cluster mask for any + * other CPU in the cluster which is (now) present. + */ +alloc: + cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node); + if (!cmsk) return -ENOMEM; - cluster_hotplug_mask->node = node; + per_cpu(cluster_masks, cpu) = cmsk; + prefill_clustermask(cmsk, cpu, cluster); + return 0; } static int x2apic_prepare_cpu(unsigned int cpu) { - if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0) + u32 phys_apicid = apic->cpu_present_to_apicid(cpu); + u32 cluster = apic_cluster(phys_apicid); + u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf)); + int node = cpu_to_node(cpu); + + x86_cpu_to_logical_apicid[cpu] = logical_apicid; + + if (alloc_clustermask(cpu, cluster, node) < 0) return -ENOMEM; - if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) + + if (!zalloc_cpumask_var_node(&per_cpu(ipi_mask, cpu), GFP_KERNEL, node)) return -ENOMEM; + return 0; } static int x2apic_dead_cpu(unsigned int dead_cpu) { - struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); + struct cpumask *cmsk = per_cpu(cluster_masks, dead_cpu); if (cmsk) - cpumask_clear_cpu(dead_cpu, &cmsk->mask); + cpumask_clear_cpu(dead_cpu, cmsk); free_cpumask_var(per_cpu(ipi_mask, dead_cpu)); return 0; } @@ -198,25 +229,17 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .name = "cluster x2apic", .probe = x2apic_cluster_probe, .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .apic_id_valid = x2apic_apic_id_valid, - .apic_id_registered = x2apic_apic_id_registered, - .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = true, .disable_esr = 0, - .check_apicid_used = NULL, .init_apic_ldr = init_x2apic_ldr, - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .phys_pkg_id = x2apic_phys_pkg_id, + .max_apic_id = UINT_MAX, + .x2apic_set_max_apicid = true, .get_apic_id = x2apic_get_apic_id, - .set_apic_id = x2apic_set_apic_id, .calc_dest_apicid = x2apic_calc_apicid, @@ -226,16 +249,13 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, - - .inquire_remote_apic = NULL, + .nmi_to_offline_cpu = true, .read = native_apic_msr_read, .write = native_apic_msr_write, - .eoi_write = native_apic_msr_eoi_write, + .eoi = native_apic_msr_eoi, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, - .wait_icr_idle = native_x2apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, }; apic_driver(apic_x2apic_cluster); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 6bde05a86b4e..12d4c35547a6 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -8,11 +8,13 @@ int x2apic_phys; static struct apic apic_x2apic_phys; -static u32 x2apic_max_apicid __ro_after_init; +u32 x2apic_max_apicid __ro_after_init = UINT_MAX; void __init x2apic_set_max_apicid(u32 apicid) { x2apic_max_apicid = apicid; + if (apic->x2apic_set_max_apicid) + apic->max_apic_id = apicid; } static int __init set_x2apic_phys_mode(char *arg) @@ -81,40 +83,28 @@ static void __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } -static void x2apic_send_IPI_allbutself(int vector) +static void __x2apic_send_IPI_shorthand(int vector, u32 which) { - __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT); -} + unsigned long cfg = __prepare_ICR(which, vector, 0); -static void x2apic_send_IPI_all(int vector) -{ - __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); + native_x2apic_icr_write(cfg, 0); } -static void init_x2apic_ldr(void) +void x2apic_send_IPI_allbutself(int vector) { + __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT); } -static int x2apic_phys_probe(void) -{ - if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) - return 1; - - return apic == &apic_x2apic_phys; -} - -/* Common x2apic functions, also used by x2apic_cluster */ -int x2apic_apic_id_valid(u32 apicid) +void x2apic_send_IPI_all(int vector) { - if (x2apic_max_apicid && apicid > x2apic_max_apicid) - return 0; - - return 1; + __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC); } -int x2apic_apic_id_registered(void) +void x2apic_send_IPI_self(int vector) { - return 1; + apic_write(APIC_SELF_IPI, vector); } void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) @@ -123,59 +113,37 @@ void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) native_x2apic_icr_write(cfg, apicid); } -void __x2apic_send_IPI_shorthand(int vector, u32 which) +static int x2apic_phys_probe(void) { - unsigned long cfg = __prepare_ICR(which, vector, 0); + if (!x2apic_mode) + return 0; - /* x2apic MSRs are special and need a special fence: */ - weak_wrmsr_fence(); - native_x2apic_icr_write(cfg, 0); -} + if (x2apic_phys || x2apic_fadt_phys()) + return 1; -unsigned int x2apic_get_apic_id(unsigned long id) -{ - return id; + return apic == &apic_x2apic_phys; } -u32 x2apic_set_apic_id(unsigned int id) +u32 x2apic_get_apic_id(u32 id) { return id; } -int x2apic_phys_pkg_id(int initial_apicid, int index_msb) -{ - return initial_apicid >> index_msb; -} - -void x2apic_send_IPI_self(int vector) -{ - apic_write(APIC_SELF_IPI, vector); -} - static struct apic apic_x2apic_phys __ro_after_init = { .name = "physical x2apic", .probe = x2apic_phys_probe, .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .apic_id_valid = x2apic_apic_id_valid, - .apic_id_registered = x2apic_apic_id_registered, - .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = false, .disable_esr = 0, - .check_apicid_used = NULL, - .init_apic_ldr = init_x2apic_ldr, - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .phys_pkg_id = x2apic_phys_pkg_id, + .max_apic_id = UINT_MAX, + .x2apic_set_max_apicid = true, .get_apic_id = x2apic_get_apic_id, - .set_apic_id = x2apic_set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, @@ -185,16 +153,13 @@ static struct apic apic_x2apic_phys __ro_after_init = { .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, - - .inquire_remote_apic = NULL, + .nmi_to_offline_cpu = true, .read = native_apic_msr_read, .write = native_apic_msr_write, - .eoi_write = native_apic_msr_eoi_write, + .eoi = native_apic_msr_eoi, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, - .wait_icr_idle = native_x2apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, }; apic_driver(apic_x2apic_phys); diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c new file mode 100644 index 000000000000..dbc5678bc3b6 --- /dev/null +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -0,0 +1,428 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Secure AVIC Support (SEV-SNP Guests) + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Author: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com> + */ + +#include <linux/cc_platform.h> +#include <linux/cpumask.h> +#include <linux/percpu-defs.h> +#include <linux/align.h> + +#include <asm/apic.h> +#include <asm/sev.h> + +#include "local.h" + +struct secure_avic_page { + u8 regs[PAGE_SIZE]; +} __aligned(PAGE_SIZE); + +static struct secure_avic_page __percpu *savic_page __ro_after_init; + +static int savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); +} + +static inline void *get_reg_bitmap(unsigned int cpu, unsigned int offset) +{ + return &per_cpu_ptr(savic_page, cpu)->regs[offset]; +} + +static inline void update_vector(unsigned int cpu, unsigned int offset, + unsigned int vector, bool set) +{ + void *bitmap = get_reg_bitmap(cpu, offset); + + if (set) + apic_set_vector(vector, bitmap); + else + apic_clear_vector(vector, bitmap); +} + +#define SAVIC_ALLOWED_IRR 0x204 + +/* + * When Secure AVIC is enabled, RDMSR/WRMSR of the APIC registers + * result in #VC exception (for non-accelerated register accesses) + * with VMEXIT_AVIC_NOACCEL error code. The #VC exception handler + * can read/write the x2APIC register in the guest APIC backing page. + * + * Since doing this would increase the latency of accessing x2APIC + * registers, instead of doing RDMSR/WRMSR based accesses and + * handling the APIC register reads/writes in the #VC exception handler, + * the read() and write() callbacks directly read/write the APIC register + * from/to the vCPU's APIC backing page. + */ +static u32 savic_read(u32 reg) +{ + void *ap = this_cpu_ptr(savic_page); + + switch (reg) { + case APIC_LVTT: + case APIC_TMICT: + case APIC_TMCCT: + case APIC_TDCR: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVT0: + case APIC_LVT1: + case APIC_LVTERR: + return savic_ghcb_msr_read(reg); + case APIC_ID: + case APIC_LVR: + case APIC_TASKPRI: + case APIC_ARBPRI: + case APIC_PROCPRI: + case APIC_LDR: + case APIC_SPIV: + case APIC_ESR: + case APIC_EFEAT: + case APIC_ECTRL: + case APIC_SEOI: + case APIC_IER: + case APIC_EILVTn(0) ... APIC_EILVTn(3): + return apic_get_reg(ap, reg); + case APIC_ICR: + return (u32)apic_get_reg64(ap, reg); + case APIC_ISR ... APIC_ISR + 0x70: + case APIC_TMR ... APIC_TMR + 0x70: + if (WARN_ONCE(!IS_ALIGNED(reg, 16), + "APIC register read offset 0x%x not aligned at 16 bytes", reg)) + return 0; + return apic_get_reg(ap, reg); + /* IRR and ALLOWED_IRR offset range */ + case APIC_IRR ... APIC_IRR + 0x74: + /* + * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from + * their respective base offset. APIC_IRRs are in the range + * + * (0x200, 0x210, ..., 0x270) + * + * while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the range + * + * (0x204, 0x214, ..., 0x274). + * + * Filter out everything else. + */ + if (WARN_ONCE(!(IS_ALIGNED(reg, 16) || + IS_ALIGNED(reg - 4, 16)), + "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg)) + return 0; + return apic_get_reg(ap, reg); + default: + pr_err("Error reading unknown Secure AVIC reg offset 0x%x\n", reg); + return 0; + } +} + +#define SAVIC_NMI_REQ 0x278 + +/* + * On WRMSR to APIC_SELF_IPI register by the guest, Secure AVIC hardware + * updates the APIC_IRR in the APIC backing page of the vCPU. In addition, + * hardware evaluates the new APIC_IRR update for interrupt injection to + * the vCPU. So, self IPIs are hardware-accelerated. + */ +static inline void self_ipi_reg_write(unsigned int vector) +{ + native_apic_msr_write(APIC_SELF_IPI, vector); +} + +static void send_ipi_dest(unsigned int cpu, unsigned int vector, bool nmi) +{ + if (nmi) + apic_set_reg(per_cpu_ptr(savic_page, cpu), SAVIC_NMI_REQ, 1); + else + update_vector(cpu, APIC_IRR, vector, true); +} + +static void send_ipi_allbut(unsigned int vector, bool nmi) +{ + unsigned int cpu, src_cpu; + + guard(irqsave)(); + + src_cpu = raw_smp_processor_id(); + + for_each_cpu(cpu, cpu_online_mask) { + if (cpu == src_cpu) + continue; + send_ipi_dest(cpu, vector, nmi); + } +} + +static inline void self_ipi(unsigned int vector, bool nmi) +{ + u32 icr_low = APIC_SELF_IPI | vector; + + if (nmi) + icr_low |= APIC_DM_NMI; + + native_x2apic_icr_write(icr_low, 0); +} + +static void savic_icr_write(u32 icr_low, u32 icr_high) +{ + unsigned int dsh, vector; + u64 icr_data; + bool nmi; + + dsh = icr_low & APIC_DEST_ALLBUT; + vector = icr_low & APIC_VECTOR_MASK; + nmi = ((icr_low & APIC_DM_FIXED_MASK) == APIC_DM_NMI); + + switch (dsh) { + case APIC_DEST_SELF: + self_ipi(vector, nmi); + break; + case APIC_DEST_ALLINC: + self_ipi(vector, nmi); + fallthrough; + case APIC_DEST_ALLBUT: + send_ipi_allbut(vector, nmi); + break; + default: + send_ipi_dest(icr_high, vector, nmi); + break; + } + + icr_data = ((u64)icr_high) << 32 | icr_low; + if (dsh != APIC_DEST_SELF) + savic_ghcb_msr_write(APIC_ICR, icr_data); + apic_set_reg64(this_cpu_ptr(savic_page), APIC_ICR, icr_data); +} + +static void savic_write(u32 reg, u32 data) +{ + void *ap = this_cpu_ptr(savic_page); + + switch (reg) { + case APIC_LVTT: + case APIC_TMICT: + case APIC_TDCR: + case APIC_LVT0: + case APIC_LVT1: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVTERR: + savic_ghcb_msr_write(reg, data); + break; + case APIC_TASKPRI: + case APIC_EOI: + case APIC_SPIV: + case SAVIC_NMI_REQ: + case APIC_ESR: + case APIC_ECTRL: + case APIC_SEOI: + case APIC_IER: + case APIC_EILVTn(0) ... APIC_EILVTn(3): + apic_set_reg(ap, reg, data); + break; + case APIC_ICR: + savic_icr_write(data, 0); + break; + case APIC_SELF_IPI: + self_ipi_reg_write(data); + break; + /* ALLOWED_IRR offsets are writable */ + case SAVIC_ALLOWED_IRR ... SAVIC_ALLOWED_IRR + 0x70: + if (IS_ALIGNED(reg - 4, 16)) { + apic_set_reg(ap, reg, data); + break; + } + fallthrough; + default: + pr_err("Error writing unknown Secure AVIC reg offset 0x%x\n", reg); + } +} + +static void send_ipi(u32 dest, unsigned int vector, unsigned int dsh) +{ + unsigned int icr_low; + + icr_low = __prepare_ICR(dsh, vector, APIC_DEST_PHYSICAL); + savic_icr_write(icr_low, dest); +} + +static void savic_send_ipi(int cpu, int vector) +{ + u32 dest = per_cpu(x86_cpu_to_apicid, cpu); + + send_ipi(dest, vector, 0); +} + +static void send_ipi_mask(const struct cpumask *mask, unsigned int vector, bool excl_self) +{ + unsigned int cpu, this_cpu; + + guard(irqsave)(); + + this_cpu = raw_smp_processor_id(); + + for_each_cpu(cpu, mask) { + if (excl_self && cpu == this_cpu) + continue; + send_ipi(per_cpu(x86_cpu_to_apicid, cpu), vector, 0); + } +} + +static void savic_send_ipi_mask(const struct cpumask *mask, int vector) +{ + send_ipi_mask(mask, vector, false); +} + +static void savic_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) +{ + send_ipi_mask(mask, vector, true); +} + +static void savic_send_ipi_allbutself(int vector) +{ + send_ipi(0, vector, APIC_DEST_ALLBUT); +} + +static void savic_send_ipi_all(int vector) +{ + send_ipi(0, vector, APIC_DEST_ALLINC); +} + +static void savic_send_ipi_self(int vector) +{ + self_ipi_reg_write(vector); +} + +static void savic_update_vector(unsigned int cpu, unsigned int vector, bool set) +{ + update_vector(cpu, SAVIC_ALLOWED_IRR, vector, set); +} + +static void savic_eoi(void) +{ + unsigned int cpu; + int vec; + + cpu = raw_smp_processor_id(); + vec = apic_find_highest_vector(get_reg_bitmap(cpu, APIC_ISR)); + if (WARN_ONCE(vec == -1, "EOI write while no active interrupt in APIC_ISR")) + return; + + /* Is level-triggered interrupt? */ + if (apic_test_vector(vec, get_reg_bitmap(cpu, APIC_TMR))) { + update_vector(cpu, APIC_ISR, vec, false); + /* + * Propagate the EOI write to the hypervisor for level-triggered + * interrupts. Return to the guest from GHCB protocol event takes + * care of re-evaluating interrupt state. + */ + savic_ghcb_msr_write(APIC_EOI, 0); + } else { + /* + * Hardware clears APIC_ISR and re-evaluates the interrupt state + * to determine if there is any pending interrupt which can be + * delivered to CPU. + */ + native_apic_msr_eoi(); + } +} + +static void savic_teardown(void) +{ + /* Disable Secure AVIC */ + native_wrmsrq(MSR_AMD64_SAVIC_CONTROL, 0); + savic_unregister_gpa(NULL); +} + +static void savic_setup(void) +{ + void *ap = this_cpu_ptr(savic_page); + enum es_result res; + unsigned long gpa; + + /* + * Before Secure AVIC is enabled, APIC MSR reads are intercepted. + * APIC_ID MSR read returns the value from the hypervisor. + */ + apic_set_reg(ap, APIC_ID, native_apic_msr_read(APIC_ID)); + + gpa = __pa(ap); + + /* + * The NPT entry for a vCPU's APIC backing page must always be + * present when the vCPU is running in order for Secure AVIC to + * function. A VMEXIT_BUSY is returned on VMRUN and the vCPU cannot + * be resumed if the NPT entry for the APIC backing page is not + * present. Notify GPA of the vCPU's APIC backing page to the + * hypervisor by calling savic_register_gpa(). Before executing + * VMRUN, the hypervisor makes use of this information to make sure + * the APIC backing page is mapped in NPT. + */ + res = savic_register_gpa(gpa); + if (res != ES_OK) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); + + native_wrmsrq(MSR_AMD64_SAVIC_CONTROL, + gpa | MSR_AMD64_SAVIC_EN | MSR_AMD64_SAVIC_ALLOWEDNMI); +} + +static int savic_probe(void) +{ + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return 0; + + if (!x2apic_mode) { + pr_err("Secure AVIC enabled in non x2APIC mode\n"); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); + /* unreachable */ + } + + savic_page = alloc_percpu(struct secure_avic_page); + if (!savic_page) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); + + return 1; +} + +static struct apic apic_x2apic_savic __ro_after_init = { + + .name = "secure avic x2apic", + .probe = savic_probe, + .acpi_madt_oem_check = savic_acpi_madt_oem_check, + .setup = savic_setup, + .teardown = savic_teardown, + + .dest_mode_logical = false, + + .disable_esr = 0, + + .cpu_present_to_apicid = default_cpu_present_to_apicid, + + .max_apic_id = UINT_MAX, + .x2apic_set_max_apicid = true, + .get_apic_id = x2apic_get_apic_id, + + .calc_dest_apicid = apic_default_calc_apicid, + + .send_IPI = savic_send_ipi, + .send_IPI_mask = savic_send_ipi_mask, + .send_IPI_mask_allbutself = savic_send_ipi_mask_allbutself, + .send_IPI_allbutself = savic_send_ipi_allbutself, + .send_IPI_all = savic_send_ipi_all, + .send_IPI_self = savic_send_ipi_self, + + .nmi_to_offline_cpu = true, + + .read = savic_read, + .write = savic_write, + .eoi = savic_eoi, + .icr_read = native_x2apic_icr_read, + .icr_write = savic_icr_write, + + .update_vector = savic_update_vector, +}; + +apic_driver(apic_x2apic_savic); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 482855227964..15209f220e1f 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -25,6 +25,8 @@ #include <asm/uv/uv.h> #include <asm/apic.h> +#include "local.h" + static enum uv_system_type uv_system_type; static int uv_hubbed_system; static int uv_hubless_system; @@ -108,7 +110,7 @@ static void __init early_get_pnodeid(void) } else if (UVH_RH_GAM_ADDR_MAP_CONFIG) { union uvh_rh_gam_addr_map_config_u m_n_config; - m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_ADDR_MAP_CONFIG); + m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_ADDR_MAP_CONFIG); uv_cpuid.n_skt = m_n_config.s.n_skt; if (is_uv(UV3)) uv_cpuid.m_skt = m_n_config.s3.m_skt; @@ -239,54 +241,20 @@ static void __init uv_tsc_check_sync(void) is_uv(UV3) ? sname.s3.field : \ undef) -/* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */ - -#define SMT_LEVEL 0 /* Leaf 0xb SMT level */ -#define INVALID_TYPE 0 /* Leaf 0xb sub-leaf types */ -#define SMT_TYPE 1 -#define CORE_TYPE 2 -#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) -#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) - -static void set_x2apic_bits(void) -{ - unsigned int eax, ebx, ecx, edx, sub_index; - unsigned int sid_shift; - - cpuid(0, &eax, &ebx, &ecx, &edx); - if (eax < 0xb) { - pr_info("UV: CPU does not have CPUID.11\n"); - return; - } - - cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); - if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) { - pr_info("UV: CPUID.11 not implemented\n"); - return; - } - - sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); - sub_index = 1; - do { - cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); - if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { - sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); - break; - } - sub_index++; - } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); - - uv_cpuid.apicid_shift = 0; - uv_cpuid.apicid_mask = (~(-1 << sid_shift)); - uv_cpuid.socketid_shift = sid_shift; -} - static void __init early_get_apic_socketid_shift(void) { + unsigned int sid_shift = topology_get_domain_shift(TOPO_PKG_DOMAIN); + if (is_uv2_hub() || is_uv3_hub()) uvh_apicid.v = uv_early_read_mmr(UVH_APICID); - set_x2apic_bits(); + if (sid_shift) { + uv_cpuid.apicid_shift = 0; + uv_cpuid.apicid_mask = (~(-1 << sid_shift)); + uv_cpuid.socketid_shift = sid_shift; + } else { + pr_info("UV: CPU does not have valid CPUID.11\n"); + } pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n", uv_cpuid.apicid_shift, uv_cpuid.apicid_mask); pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n", uv_cpuid.socketid_shift, uv_cpuid.pnode_mask); @@ -294,8 +262,7 @@ static void __init early_get_apic_socketid_shift(void) static void __init uv_stringify(int len, char *to, char *from) { - /* Relies on 'to' being NULL chars so result will be NULL terminated */ - strncpy(to, from, len-1); + strscpy(to, from, len); /* Trim trailing spaces */ (void)strim(to); @@ -546,7 +513,6 @@ unsigned long sn_rtc_cycles_per_second; EXPORT_SYMBOL(sn_rtc_cycles_per_second); /* The following values are used for the per node hub info struct */ -static __initdata unsigned short *_node_to_pnode; static __initdata unsigned short _min_socket, _max_socket; static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len; static __initdata struct uv_gam_range_entry *uv_gre_table; @@ -554,6 +520,7 @@ static __initdata struct uv_gam_parameters *uv_gp_table; static __initdata unsigned short *_socket_to_node; static __initdata unsigned short *_socket_to_pnode; static __initdata unsigned short *_pnode_to_socket; +static __initdata unsigned short *_node_to_socket; static __initdata struct uv_gam_range_s *_gr_table; @@ -617,7 +584,8 @@ static __init void build_uv_gr_table(void) bytes = _gr_table_len * sizeof(struct uv_gam_range_s); grt = kzalloc(bytes, GFP_KERNEL); - BUG_ON(!grt); + if (WARN_ON_ONCE(!grt)) + return; _gr_table = grt; for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { @@ -699,7 +667,7 @@ static __init void build_uv_gr_table(void) } } -static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) +static int uv_wakeup_secondary(u32 phys_apicid, unsigned long start_rip, unsigned int cpu) { unsigned long val; int pnode; @@ -777,50 +745,6 @@ static void uv_send_IPI_all(int vector) uv_send_IPI_mask(cpu_online_mask, vector); } -static int uv_apic_id_valid(u32 apicid) -{ - return 1; -} - -static int uv_apic_id_registered(void) -{ - return 1; -} - -static void uv_init_apic_ldr(void) -{ -} - -static u32 apic_uv_calc_apicid(unsigned int cpu) -{ - return apic_default_calc_apicid(cpu); -} - -static unsigned int x2apic_get_apic_id(unsigned long id) -{ - return id; -} - -static u32 set_apic_id(unsigned int id) -{ - return id; -} - -static unsigned int uv_read_apic_id(void) -{ - return x2apic_get_apic_id(apic_read(APIC_ID)); -} - -static int uv_phys_pkg_id(int initial_apicid, int index_msb) -{ - return uv_read_apic_id() >> index_msb; -} - -static void uv_send_IPI_self(int vector) -{ - apic_write(APIC_SELF_IPI, vector); -} - static int uv_probe(void) { return apic == &apic_x2apic_uv_x; @@ -831,45 +755,32 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .name = "UV large system", .probe = uv_probe, .acpi_madt_oem_check = uv_acpi_madt_oem_check, - .apic_id_valid = uv_apic_id_valid, - .apic_id_registered = uv_apic_id_registered, - .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = false, .disable_esr = 0, - .check_apicid_used = NULL, - .init_apic_ldr = uv_init_apic_ldr, - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .phys_pkg_id = uv_phys_pkg_id, + .max_apic_id = UINT_MAX, .get_apic_id = x2apic_get_apic_id, - .set_apic_id = set_apic_id, - .calc_dest_apicid = apic_uv_calc_apicid, + .calc_dest_apicid = apic_default_calc_apicid, .send_IPI = uv_send_IPI_one, .send_IPI_mask = uv_send_IPI_mask, .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_all = uv_send_IPI_all, - .send_IPI_self = uv_send_IPI_self, + .send_IPI_self = x2apic_send_IPI_self, .wakeup_secondary_cpu = uv_wakeup_secondary, - .inquire_remote_apic = NULL, .read = native_apic_msr_read, .write = native_apic_msr_write, - .eoi_write = native_apic_msr_eoi_write, + .eoi = native_apic_msr_eoi, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, - .wait_icr_idle = native_x2apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, }; #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH 3 @@ -1012,7 +923,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, /* One (UV2) mapping */ if (index == UV2_MMIOH) { - strncpy(id, "MMIOH", sizeof(id)); + strscpy(id, "MMIOH", sizeof(id)); max_io = max_pnode; mapped = 0; goto map_exit; @@ -1022,7 +933,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, switch (index) { case UVY_MMIOH0: mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0; - nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK; + nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK; n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH; min_nasid = min_pnode; max_nasid = max_pnode; @@ -1030,7 +941,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, break; case UVY_MMIOH1: mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1; - nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK; + nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK; n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH; min_nasid = min_pnode; max_nasid = max_pnode; @@ -1038,7 +949,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, break; case UVX_MMIOH0: mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0; - nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK; + nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK; n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH; min_nasid = min_pnode * 2; max_nasid = max_pnode * 2; @@ -1046,7 +957,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, break; case UVX_MMIOH1: mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1; - nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK; + nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK; n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH; min_nasid = min_pnode * 2; max_nasid = max_pnode * 2; @@ -1072,8 +983,9 @@ static void __init calc_mmioh_map(enum mmioh_arch index, /* Invalid NASID check */ if (nasid < min_nasid || max_nasid < nasid) { - pr_err("UV:%s:Invalid NASID:%x (range:%x..%x)\n", - __func__, index, min_nasid, max_nasid); + /* Not an error: unused table entries get "poison" values */ + pr_debug("UV:%s:Invalid NASID(%x):%x (range:%x..%x)\n", + __func__, index, nasid, min_nasid, max_nasid); nasid = -1; } @@ -1292,6 +1204,7 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi) hi->nasid_shift = uv_cpuid.nasid_shift; hi->min_pnode = _min_pnode; hi->min_socket = _min_socket; + hi->node_to_socket = _node_to_socket; hi->pnode_to_socket = _pnode_to_socket; hi->socket_to_node = _socket_to_node; hi->socket_to_pnode = _socket_to_pnode; @@ -1348,7 +1261,7 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr; unsigned long lgre = 0, gend = 0; int index = 0; - int sock_min = 999999, pnode_min = 99999; + int sock_min = INT_MAX, pnode_min = INT_MAX; int sock_max = -1, pnode_max = -1; uv_gre_table = gre; @@ -1459,11 +1372,37 @@ static int __init decode_uv_systab(void) return 0; } +/* + * Given a bitmask 'bits' representing presnt blades, numbered + * starting at 'base', masking off unused high bits of blade number + * with 'mask', update the minimum and maximum blade numbers that we + * have found. (Masking with 'mask' necessary because of BIOS + * treatment of system partitioning when creating this table we are + * interpreting.) + */ +static inline void blade_update_min_max(unsigned long bits, int base, int mask, int *min, int *max) +{ + int first, last; + + if (!bits) + return; + first = (base + __ffs(bits)) & mask; + last = (base + __fls(bits)) & mask; + + if (*min > first) + *min = first; + if (*max < last) + *max = last; +} + /* Set up physical blade translations from UVH_NODE_PRESENT_TABLE */ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info) { unsigned long np; int i, uv_pb = 0; + int sock_min = INT_MAX, sock_max = -1, s_mask; + + s_mask = (1 << uv_cpuid.n_skt) - 1; if (UVH_NODE_PRESENT_TABLE) { pr_info("UV: NODE_PRESENT_DEPTH = %d\n", @@ -1471,35 +1410,82 @@ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info) for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) { np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np); - uv_pb += hweight64(np); + blade_update_min_max(np, i * 64, s_mask, &sock_min, &sock_max); } } if (UVH_NODE_PRESENT_0) { np = uv_read_local_mmr(UVH_NODE_PRESENT_0); pr_info("UV: NODE_PRESENT_0 = 0x%016lx\n", np); - uv_pb += hweight64(np); + blade_update_min_max(np, 0, s_mask, &sock_min, &sock_max); } if (UVH_NODE_PRESENT_1) { np = uv_read_local_mmr(UVH_NODE_PRESENT_1); pr_info("UV: NODE_PRESENT_1 = 0x%016lx\n", np); - uv_pb += hweight64(np); + blade_update_min_max(np, 64, s_mask, &sock_min, &sock_max); + } + + /* Only update if we actually found some bits indicating blades present */ + if (sock_max >= sock_min) { + _min_socket = sock_min; + _max_socket = sock_max; + uv_pb = sock_max - sock_min + 1; } if (uv_possible_blades != uv_pb) uv_possible_blades = uv_pb; - pr_info("UV: number nodes/possible blades %d\n", uv_pb); + pr_info("UV: number nodes/possible blades %d (%d - %d)\n", + uv_pb, sock_min, sock_max); } +static int __init alloc_conv_table(int num_elem, unsigned short **table) +{ + int i; + size_t bytes; + + bytes = num_elem * sizeof(*table[0]); + *table = kmalloc(bytes, GFP_KERNEL); + if (WARN_ON_ONCE(!*table)) + return -ENOMEM; + for (i = 0; i < num_elem; i++) + ((unsigned short *)*table)[i] = SOCK_EMPTY; + return 0; +} + +/* Remove conversion table if it's 1:1 */ +#define FREE_1_TO_1_TABLE(tbl, min, max, max2) free_1_to_1_table(&tbl, #tbl, min, max, max2) + +static void __init free_1_to_1_table(unsigned short **tp, char *tname, int min, int max, int max2) +{ + int i; + unsigned short *table = *tp; + + if (table == NULL) + return; + if (max != max2) + return; + for (i = 0; i < max; i++) { + if (i != table[i]) + return; + } + kfree(table); + *tp = NULL; + pr_info("UV: %s is 1:1, conversion table removed\n", tname); +} + +/* + * Build Socket Tables + * If the number of nodes is >1 per socket, socket to node table will + * contain lowest node number on that socket. + */ static void __init build_socket_tables(void) { struct uv_gam_range_entry *gre = uv_gre_table; - int num, nump; - int cpu, i, lnid; + int nums, numn, nump; + int i, lnid, apicid; int minsock = _min_socket; int maxsock = _max_socket; int minpnode = _min_pnode; int maxpnode = _max_pnode; - size_t bytes; if (!gre) { if (is_uv2_hub() || is_uv3_hub()) { @@ -1507,39 +1493,36 @@ static void __init build_socket_tables(void) return; } pr_err("UV: Error: UVsystab address translations not available!\n"); - BUG(); + WARN_ON_ONCE(!gre); + return; } - /* Build socket id -> node id, pnode */ - num = maxsock - minsock + 1; - bytes = num * sizeof(_socket_to_node[0]); - _socket_to_node = kmalloc(bytes, GFP_KERNEL); - _socket_to_pnode = kmalloc(bytes, GFP_KERNEL); - + numn = num_possible_nodes(); nump = maxpnode - minpnode + 1; - bytes = nump * sizeof(_pnode_to_socket[0]); - _pnode_to_socket = kmalloc(bytes, GFP_KERNEL); - BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket); - - for (i = 0; i < num; i++) - _socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY; - - for (i = 0; i < nump; i++) - _pnode_to_socket[i] = SOCK_EMPTY; + nums = maxsock - minsock + 1; + + /* Allocate and clear tables */ + if ((alloc_conv_table(nump, &_pnode_to_socket) < 0) + || (alloc_conv_table(nums, &_socket_to_pnode) < 0) + || (alloc_conv_table(numn, &_node_to_socket) < 0) + || (alloc_conv_table(nums, &_socket_to_node) < 0)) { + kfree(_pnode_to_socket); + kfree(_socket_to_pnode); + kfree(_node_to_socket); + return; + } /* Fill in pnode/node/addr conversion list values: */ - pr_info("UV: GAM Building socket/pnode conversion tables\n"); for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { if (gre->type == UV_GAM_RANGE_TYPE_HOLE) continue; i = gre->sockid - minsock; - /* Duplicate: */ - if (_socket_to_pnode[i] != SOCK_EMPTY) - continue; - _socket_to_pnode[i] = gre->pnode; + if (_socket_to_pnode[i] == SOCK_EMPTY) + _socket_to_pnode[i] = gre->pnode; i = gre->pnode - minpnode; - _pnode_to_socket[i] = gre->sockid; + if (_pnode_to_socket[i] == SOCK_EMPTY) + _pnode_to_socket[i] = gre->sockid; pr_info("UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n", gre->sockid, gre->type, gre->nasid, @@ -1549,66 +1532,38 @@ static void __init build_socket_tables(void) /* Set socket -> node values: */ lnid = NUMA_NO_NODE; - for_each_present_cpu(cpu) { - int nid = cpu_to_node(cpu); - int apicid, sockid; + for (apicid = 0; apicid < ARRAY_SIZE(__apicid_to_node); apicid++) { + int nid = __apicid_to_node[apicid]; + int sockid; - if (lnid == nid) + if ((nid == NUMA_NO_NODE) || (lnid == nid)) continue; lnid = nid; - apicid = per_cpu(x86_cpu_to_apicid, cpu); + sockid = apicid >> uv_cpuid.socketid_shift; - _socket_to_node[sockid - minsock] = nid; - pr_info("UV: sid:%02x: apicid:%04x node:%2d\n", - sockid, apicid, nid); - } - /* Set up physical blade to pnode translation from GAM Range Table: */ - bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]); - _node_to_pnode = kmalloc(bytes, GFP_KERNEL); - BUG_ON(!_node_to_pnode); + if (_socket_to_node[sockid - minsock] == SOCK_EMPTY) + _socket_to_node[sockid - minsock] = nid; - for (lnid = 0; lnid < num_possible_nodes(); lnid++) { - unsigned short sockid; + if (_node_to_socket[nid] == SOCK_EMPTY) + _node_to_socket[nid] = sockid; - for (sockid = minsock; sockid <= maxsock; sockid++) { - if (lnid == _socket_to_node[sockid - minsock]) { - _node_to_pnode[lnid] = _socket_to_pnode[sockid - minsock]; - break; - } - } - if (sockid > maxsock) { - pr_err("UV: socket for node %d not found!\n", lnid); - BUG(); - } + pr_info("UV: sid:%02x: apicid:%04x socket:%02d node:%03x s2n:%03x\n", + sockid, + apicid, + _node_to_socket[nid], + nid, + _socket_to_node[sockid - minsock]); } /* - * If socket id == pnode or socket id == node for all nodes, + * If e.g. socket id == pnode for all pnodes, * system runs faster by removing corresponding conversion table. */ - pr_info("UV: Checking socket->node/pnode for identity maps\n"); - if (minsock == 0) { - for (i = 0; i < num; i++) - if (_socket_to_node[i] == SOCK_EMPTY || i != _socket_to_node[i]) - break; - if (i >= num) { - kfree(_socket_to_node); - _socket_to_node = NULL; - pr_info("UV: 1:1 socket_to_node table removed\n"); - } - } - if (minsock == minpnode) { - for (i = 0; i < num; i++) - if (_socket_to_pnode[i] != SOCK_EMPTY && - _socket_to_pnode[i] != i + minpnode) - break; - if (i >= num) { - kfree(_socket_to_pnode); - _socket_to_pnode = NULL; - pr_info("UV: 1:1 socket_to_pnode table removed\n"); - } - } + FREE_1_TO_1_TABLE(_socket_to_node, _min_socket, nums, numn); + FREE_1_TO_1_TABLE(_node_to_socket, _min_socket, nums, numn); + FREE_1_TO_1_TABLE(_socket_to_pnode, _min_pnode, nums, nump); + FREE_1_TO_1_TABLE(_pnode_to_socket, _min_pnode, nums, nump); } /* Check which reboot to use */ @@ -1692,12 +1647,13 @@ static __init int uv_system_init_hubless(void) static void __init uv_system_init_hub(void) { struct uv_hub_info_s hub_info = {0}; - int bytes, cpu, nodeid; - unsigned short min_pnode = 9999, max_pnode = 0; + int bytes, cpu, nodeid, bid; + unsigned short min_pnode = USHRT_MAX, max_pnode = 0; char *hub = is_uv5_hub() ? "UV500" : is_uv4_hub() ? "UV400" : is_uv3_hub() ? "UV300" : is_uv2_hub() ? "UV2000/3000" : NULL; + struct uv_hub_info_s **uv_hub_info_list_blade; if (!hub) { pr_err("UV: Unknown/unsupported UV hub\n"); @@ -1720,9 +1676,12 @@ static void __init uv_system_init_hub(void) build_uv_gr_table(); set_block_size(); uv_init_hub_info(&hub_info); - uv_possible_blades = num_possible_nodes(); - if (!_node_to_pnode) + /* If UV2 or UV3 may need to get # blades from HW */ + if (is_uv(UV2|UV3) && !uv_gre_table) boot_init_possible_blades(&hub_info); + else + /* min/max sockets set in decode_gam_rng_tbl */ + uv_possible_blades = (_max_socket - _min_socket) + 1; /* uv_num_possible_blades() is really the hub count: */ pr_info("UV: Found %d hubs, %d nodes, %d CPUs\n", uv_num_possible_blades(), num_possible_nodes(), num_possible_cpus()); @@ -1731,79 +1690,98 @@ static void __init uv_system_init_hub(void) hub_info.coherency_domain_number = sn_coherency_id; uv_rtc_init(); + /* + * __uv_hub_info_list[] is indexed by node, but there is only + * one hub_info structure per blade. First, allocate one + * structure per blade. Further down we create a per-node + * table (__uv_hub_info_list[]) pointing to hub_info + * structures for the correct blade. + */ + bytes = sizeof(void *) * uv_num_possible_blades(); - __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); - BUG_ON(!__uv_hub_info_list); + uv_hub_info_list_blade = kzalloc(bytes, GFP_KERNEL); + if (WARN_ON_ONCE(!uv_hub_info_list_blade)) + return; bytes = sizeof(struct uv_hub_info_s); - for_each_node(nodeid) { + for_each_possible_blade(bid) { struct uv_hub_info_s *new_hub; - if (__uv_hub_info_list[nodeid]) { - pr_err("UV: Node %d UV HUB already initialized!?\n", nodeid); - BUG(); + /* Allocate & fill new per hub info list */ + new_hub = (bid == 0) ? &uv_hub_info_node0 + : kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid)); + if (WARN_ON_ONCE(!new_hub)) { + /* do not kfree() bid 0, which is statically allocated */ + while (--bid > 0) + kfree(uv_hub_info_list_blade[bid]); + kfree(uv_hub_info_list_blade); + return; } - /* Allocate new per hub info list */ - new_hub = (nodeid == 0) ? &uv_hub_info_node0 : kzalloc_node(bytes, GFP_KERNEL, nodeid); - BUG_ON(!new_hub); - __uv_hub_info_list[nodeid] = new_hub; - new_hub = uv_hub_info_list(nodeid); - BUG_ON(!new_hub); + uv_hub_info_list_blade[bid] = new_hub; *new_hub = hub_info; /* Use information from GAM table if available: */ - if (_node_to_pnode) - new_hub->pnode = _node_to_pnode[nodeid]; + if (uv_gre_table) + new_hub->pnode = uv_blade_to_pnode(bid); else /* Or fill in during CPU loop: */ new_hub->pnode = 0xffff; - new_hub->numa_blade_id = uv_node_to_blade_id(nodeid); + new_hub->numa_blade_id = bid; new_hub->memory_nid = NUMA_NO_NODE; new_hub->nr_possible_cpus = 0; new_hub->nr_online_cpus = 0; } + /* + * Now populate __uv_hub_info_list[] for each node with the + * pointer to the struct for the blade it resides on. + */ + + bytes = sizeof(void *) * num_possible_nodes(); + __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); + if (WARN_ON_ONCE(!__uv_hub_info_list)) { + for_each_possible_blade(bid) + /* bid 0 is statically allocated */ + if (bid != 0) + kfree(uv_hub_info_list_blade[bid]); + kfree(uv_hub_info_list_blade); + return; + } + + for_each_node(nodeid) + __uv_hub_info_list[nodeid] = uv_hub_info_list_blade[uv_node_to_blade_id(nodeid)]; + /* Initialize per CPU info: */ for_each_possible_cpu(cpu) { int apicid = per_cpu(x86_cpu_to_apicid, cpu); - int numa_node_id; + unsigned short bid; unsigned short pnode; - nodeid = cpu_to_node(cpu); - numa_node_id = numa_cpu_node(cpu); pnode = uv_apicid_to_pnode(apicid); + bid = uv_pnode_to_socket(pnode) - _min_socket; - uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid); + uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list_blade[bid]; uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++; if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE) uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu); - /* Init memoryless node: */ - if (nodeid != numa_node_id && - uv_hub_info_list(numa_node_id)->pnode == 0xffff) - uv_hub_info_list(numa_node_id)->pnode = pnode; - else if (uv_cpu_hub_info(cpu)->pnode == 0xffff) + if (uv_cpu_hub_info(cpu)->pnode == 0xffff) uv_cpu_hub_info(cpu)->pnode = pnode; } - for_each_node(nodeid) { - unsigned short pnode = uv_hub_info_list(nodeid)->pnode; + for_each_possible_blade(bid) { + unsigned short pnode = uv_hub_info_list_blade[bid]->pnode; - /* Add pnode info for pre-GAM list nodes without CPUs: */ - if (pnode == 0xffff) { - unsigned long paddr; + if (pnode == 0xffff) + continue; - paddr = node_start_pfn(nodeid) << PAGE_SHIFT; - pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); - uv_hub_info_list(nodeid)->pnode = pnode; - } min_pnode = min(pnode, min_pnode); max_pnode = max(pnode, max_pnode); - pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n", - nodeid, - uv_hub_info_list(nodeid)->pnode, - uv_hub_info_list(nodeid)->nr_possible_cpus); + pr_info("UV: HUB:%2d pn:%02x nrcpus:%d\n", + bid, + uv_hub_info_list_blade[bid]->pnode, + uv_hub_info_list_blade[bid]->nr_possible_cpus); } pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode); @@ -1811,6 +1789,9 @@ static void __init uv_system_init_hub(void) map_mmr_high(max_pnode); map_mmioh_high(min_pnode, max_pnode); + kfree(uv_hub_info_list_blade); + uv_hub_info_list_blade = NULL; + uv_nmi_setup(); uv_cpu_init(); uv_setup_proc_files(0); |
