summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/apic
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/apic')
-rw-r--r--arch/x86/kernel/apic/Makefile15
-rw-r--r--arch/x86/kernel/apic/apic.c2049
-rw-r--r--arch/x86/kernel/apic/apic_common.c43
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c320
-rw-r--r--arch/x86/kernel/apic/apic_noop.c158
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c260
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c238
-rw-r--r--arch/x86/kernel/apic/es7000_32.c746
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c70
-rw-r--r--arch/x86/kernel/apic/init.c110
-rw-r--r--arch/x86/kernel/apic/io_apic.c3916
-rw-r--r--arch/x86/kernel/apic/ipi.c332
-rw-r--r--arch/x86/kernel/apic/local.h68
-rw-r--r--arch/x86/kernel/apic/msi.c391
-rw-r--r--arch/x86/kernel/apic/numaq_32.c525
-rw-r--r--arch/x86/kernel/apic/probe_32.c198
-rw-r--r--arch/x86/kernel/apic/probe_64.c46
-rw-r--r--arch/x86/kernel/apic/summit_32.c552
-rw-r--r--arch/x86/kernel/apic/vector.c1387
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c347
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c113
-rw-r--r--arch/x86/kernel/apic/x2apic_savic.c428
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2115
23 files changed, 7134 insertions, 7293 deletions
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 0ae0323b1f9c..581db89477f9 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -1,27 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for local APIC drivers and for the IO-APIC code
#
-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o
+# Leads to non-deterministic coverage that is not a function of syscall inputs.
+# In particular, smp_apic_timer_interrupt() is called in random places.
+KCOV_INSTRUMENT := n
+
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_common.o apic_noop.o ipi.o vector.o init.o
obj-y += hw_nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
+obj-$(CONFIG_PCI_MSI) += msi.o
obj-$(CONFIG_SMP) += ipi.o
ifeq ($(CONFIG_X86_64),y)
# APIC probe will depend on the listing order here
obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o
obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
+obj-$(CONFIG_AMD_SECURE_AVIC) += x2apic_savic.o
obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
obj-y += apic_flat_64.o
endif
-# APIC probe will depend on the listing order here
-obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
-obj-$(CONFIG_X86_SUMMIT) += summit_32.o
-obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o
-obj-$(CONFIG_X86_ES7000) += es7000_32.o
-
# For 32bit, probe_32 need to be listed last
obj-$(CONFIG_X86_LOCAL_APIC) += probe_$(BITS).o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 99663b59123a..d93f87f29d03 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Local APIC handling, local APIC timers
*
@@ -18,12 +19,13 @@
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/acpi_pmtmr.h>
+#include <linux/bitmap.h>
#include <linux/clockchips.h>
#include <linux/interrupt.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/ftrace.h>
#include <linux/ioport.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/syscore_ops.h>
#include <linux/delay.h>
#include <linux/timex.h>
@@ -34,65 +36,68 @@
#include <linux/dmi.h>
#include <linux/smp.h>
#include <linux/mm.h>
+#include <linux/kvm_types.h>
+
+#include <xen/xen.h>
#include <asm/trace/irq_vectors.h>
#include <asm/irq_remapping.h>
+#include <asm/pc-conf-reg.h>
#include <asm/perf_event.h>
#include <asm/x86_init.h>
-#include <asm/pgalloc.h>
#include <linux/atomic.h>
+#include <asm/barrier.h>
#include <asm/mpspec.h>
#include <asm/i8259.h>
#include <asm/proto.h>
+#include <asm/traps.h>
#include <asm/apic.h>
+#include <asm/acpi.h>
#include <asm/io_apic.h>
#include <asm/desc.h>
#include <asm/hpet.h>
-#include <asm/idle.h>
#include <asm/mtrr.h>
#include <asm/time.h>
#include <asm/smp.h>
#include <asm/mce.h>
+#include <asm/msr.h>
#include <asm/tsc.h>
#include <asm/hypervisor.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/irq_regs.h>
+#include <asm/cpu.h>
-unsigned int num_processors;
-
-unsigned disabled_cpus __cpuinitdata;
+#include "local.h"
/* Processor that is doing the boot up */
-unsigned int boot_cpu_physical_apicid = -1U;
+u32 boot_cpu_physical_apicid __ro_after_init = BAD_APICID;
+EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
-/*
- * The highest APIC ID seen during enumeration.
- */
-unsigned int max_physical_apicid;
+u8 boot_cpu_apic_version __ro_after_init;
/*
- * Bitmask of physically existing CPUs:
+ * This variable controls which CPUs receive external NMIs. By default,
+ * external NMIs are delivered only to the BSP.
*/
-physid_mask_t phys_cpu_present_map;
+static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
/*
- * Map cpu index to physical APIC ID
+ * Hypervisor supports 15 bits of APIC ID in MSI Extended Destination ID
*/
-DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
-DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+static bool virt_ext_dest_id __ro_after_init;
-#ifdef CONFIG_X86_32
+/* For parallel bootup. */
+unsigned long apic_mmio_base __ro_after_init;
-/*
- * On x86_32, the mapping between cpu and logical apicid may vary
- * depending on apic in use. The following early percpu variable is
- * used for the mapping. This is where the behaviors of x86_64 and 32
- * actually diverge. Let's keep it ugly for now.
- */
-DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
+static inline bool apic_accessible(void)
+{
+ return x2apic_mode || apic_mmio_base;
+}
+#ifdef CONFIG_X86_32
/* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
+static int enabled_via_apicbase __ro_after_init;
/*
* Handle interrupt mode configuration register (IMCR).
@@ -104,18 +109,14 @@ static int enabled_via_apicbase;
*/
static inline void imcr_pic_to_apic(void)
{
- /* select IMCR register */
- outb(0x70, 0x22);
/* NMI and 8259 INTR go through APIC */
- outb(0x01, 0x23);
+ pc_conf_set(PC_CONF_MPS_IMCR, 0x01);
}
static inline void imcr_apic_to_pic(void)
{
- /* select IMCR register */
- outb(0x70, 0x22);
/* NMI and 8259 INTR go directly to BSP */
- outb(0x00, 0x23);
+ pc_conf_set(PC_CONF_MPS_IMCR, 0x00);
}
#endif
@@ -125,12 +126,13 @@ static inline void imcr_apic_to_pic(void)
* +1=force-enable
*/
static int force_enable_local_apic __initdata;
+
/*
* APIC command line parameters
*/
static int __init parse_lapic(char *arg)
{
- if (config_enabled(CONFIG_X86_32) && !arg)
+ if (IS_ENABLED(CONFIG_X86_32) && !arg)
force_enable_local_apic = 1;
else if (arg && !strncmp(arg, "notscdeadline", 13))
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
@@ -144,70 +146,39 @@ static __init int setup_apicpmtimer(char *s)
{
apic_calibrate_pmtmr = 1;
notsc_setup(NULL);
- return 0;
+ return 1;
}
__setup("apicpmtimer", setup_apicpmtimer);
#endif
-int x2apic_mode;
-#ifdef CONFIG_X86_X2APIC
-/* x2apic enabled before OS handover */
-int x2apic_preenabled;
-static int x2apic_disabled;
-static int nox2apic;
-static __init int setup_nox2apic(char *str)
-{
- if (x2apic_enabled()) {
- int apicid = native_apic_msr_read(APIC_ID);
-
- if (apicid >= 255) {
- pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
- apicid);
- return 0;
- }
-
- pr_warning("x2apic already enabled. will disable it\n");
- } else
- setup_clear_cpu_cap(X86_FEATURE_X2APIC);
-
- nox2apic = 1;
-
- return 0;
-}
-early_param("nox2apic", setup_nox2apic);
-#endif
-
-unsigned long mp_lapic_addr;
-int disable_apic;
+static unsigned long mp_lapic_addr __ro_after_init;
+bool apic_is_disabled __ro_after_init;
/* Disable local APIC timer from the kernel commandline or via dmi quirk */
static int disable_apic_timer __initdata;
/* Local APIC timer works in C2 */
-int local_apic_timer_c2_ok;
+int local_apic_timer_c2_ok __ro_after_init;
EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
-int first_system_vector = 0xfe;
-
/*
* Debug level, exported for io_apic.c
*/
-unsigned int apic_verbosity;
+int apic_verbosity __ro_after_init;
-int pic_mode;
+int pic_mode __ro_after_init;
/* Have we found an MP table */
-int smp_found_config;
+int smp_found_config __ro_after_init;
static struct resource lapic_resource = {
.name = "Local APIC",
.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
};
-unsigned int lapic_timer_frequency = 0;
+/* Measured in ticks per HZ. */
+unsigned int lapic_timer_period = 0;
static void apic_pm_activate(void);
-static unsigned long apic_phys;
-
/*
* Get the LAPIC version
*/
@@ -221,11 +192,7 @@ static inline int lapic_get_version(void)
*/
static inline int lapic_is_integrated(void)
{
-#ifdef CONFIG_X86_64
- return 1;
-#else
return APIC_INTEGRATED(lapic_get_version());
-#endif
}
/*
@@ -237,6 +204,11 @@ static int modern_apic(void)
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
boot_cpu_data.x86 >= 0xf)
return 1;
+
+ /* Hygon systems use modern APIC */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+ return 1;
+
return lapic_get_version() >= 0x14;
}
@@ -246,37 +218,17 @@ static int modern_apic(void)
*/
static void __init apic_disable(void)
{
- pr_info("APIC: switched to apic NOOP\n");
- apic = &apic_noop;
-}
-
-void native_apic_wait_icr_idle(void)
-{
- while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
- cpu_relax();
-}
-
-u32 native_safe_apic_wait_icr_idle(void)
-{
- u32 send_status;
- int timeout;
-
- timeout = 0;
- do {
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- if (!send_status)
- break;
- inc_irq_stat(icr_read_retry_count);
- udelay(100);
- } while (timeout++ < 1000);
-
- return send_status;
+ apic_install_driver(&apic_noop);
}
void native_apic_icr_write(u32 low, u32 id)
{
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+ unsigned long flags;
+
+ local_irq_save(flags);
+ apic_write(APIC_ICR2, SET_XAPIC_DEST_FIELD(id));
apic_write(APIC_ICR, low);
+ local_irq_restore(flags);
}
u64 native_apic_icr_read(void)
@@ -289,29 +241,16 @@ u64 native_apic_icr_read(void)
return icr1 | ((u64)icr2 << 32);
}
-#ifdef CONFIG_X86_32
-/**
- * get_physical_broadcast - Get number of physical broadcast IDs
- */
-int get_physical_broadcast(void)
-{
- return modern_apic() ? 0xff : 0xf;
-}
-#endif
-
/**
* lapic_get_maxlvt - get the maximum number of local vector table entries
*/
int lapic_get_maxlvt(void)
{
- unsigned int v;
-
- v = apic_read(APIC_LVR);
/*
* - we always have APIC integrated on 64bit mode
* - 82489DXs do not report # of LVT entries
*/
- return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
+ return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
}
/*
@@ -320,7 +259,10 @@ int lapic_get_maxlvt(void)
/* Clock divisor */
#define APIC_DIVISOR 16
-#define TSC_DIVISOR 32
+#define TSC_DIVISOR 8
+
+/* i82489DX specific */
+#define I82489DX_BASE_DIVIDER (((0x2) << 18))
/*
* This function sets up the local APIC timer, with a timeout of
@@ -342,8 +284,14 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
+ /*
+ * The i82489DX APIC uses bit 18 and 19 for the base divider. This
+ * overlaps with bit 18 on integrated APICs, but is not documented
+ * in the SDM. No problem though. i82489DX equipped systems do not
+ * have TSC deadline timer.
+ */
if (!lapic_is_integrated())
- lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+ lvtt_value |= I82489DX_BASE_DIVIDER;
if (!irqen)
lvtt_value |= APIC_LVT_MASKED;
@@ -351,7 +299,12 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
apic_write(APIC_LVTT, lvtt_value);
if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
- printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
+ /*
+ * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
+ * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
+ * According to Intel, MFENCE can do the serialization here.
+ */
+ asm volatile("mfence" : : : "memory");
return;
}
@@ -409,10 +362,9 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
if (vector && !eilvt_entry_is_changeable(vector, new))
/* may not change if vectors are different */
return rsvd;
- rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
- } while (rsvd != new);
+ } while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new));
- rsvd &= ~APIC_EILVT_MASKED;
+ rsvd = new & ~APIC_EILVT_MASKED;
if (rsvd && rsvd != vector)
pr_info("LVT offset %d assigned for vector 0x%02x\n",
offset, rsvd);
@@ -472,45 +424,60 @@ static int lapic_next_deadline(unsigned long delta,
{
u64 tsc;
- rdtscll(tsc);
- wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
+ /* This MSR is special and need a special fence: */
+ weak_wrmsr_fence();
+
+ tsc = rdtsc();
+ wrmsrq(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
return 0;
}
-/*
- * Setup the lapic timer in periodic or oneshot mode
- */
-static void lapic_timer_setup(enum clock_event_mode mode,
- struct clock_event_device *evt)
+static int lapic_timer_shutdown(struct clock_event_device *evt)
{
- unsigned long flags;
unsigned int v;
/* Lapic used as dummy for broadcast ? */
if (evt->features & CLOCK_EVT_FEAT_DUMMY)
- return;
+ return 0;
- local_irq_save(flags);
+ v = apic_read(APIC_LVTT);
+ v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+ apic_write(APIC_LVTT, v);
- switch (mode) {
- case CLOCK_EVT_MODE_PERIODIC:
- case CLOCK_EVT_MODE_ONESHOT:
- __setup_APIC_LVTT(lapic_timer_frequency,
- mode != CLOCK_EVT_MODE_PERIODIC, 1);
- break;
- case CLOCK_EVT_MODE_UNUSED:
- case CLOCK_EVT_MODE_SHUTDOWN:
- v = apic_read(APIC_LVTT);
- v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
- apic_write(APIC_LVTT, v);
+ /*
+ * Setting APIC_LVT_MASKED (above) should be enough to tell
+ * the hardware that this timer will never fire. But AMD
+ * erratum 411 and some Intel CPU behavior circa 2024 say
+ * otherwise. Time for belt and suspenders programming: mask
+ * the timer _and_ zero the counter registers:
+ */
+ if (v & APIC_LVT_TIMER_TSCDEADLINE)
+ wrmsrq(MSR_IA32_TSC_DEADLINE, 0);
+ else
apic_write(APIC_TMICT, 0);
- break;
- case CLOCK_EVT_MODE_RESUME:
- /* Nothing to do here */
- break;
- }
- local_irq_restore(flags);
+ return 0;
+}
+
+static inline int
+lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
+{
+ /* Lapic used as dummy for broadcast ? */
+ if (evt->features & CLOCK_EVT_FEAT_DUMMY)
+ return 0;
+
+ __setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
+ return 0;
+}
+
+static int lapic_timer_set_periodic(struct clock_event_device *evt)
+{
+ return lapic_timer_set_periodic_oneshot(evt, false);
+}
+
+static int lapic_timer_set_oneshot(struct clock_event_device *evt)
+{
+ return lapic_timer_set_periodic_oneshot(evt, true);
}
/*
@@ -519,7 +486,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
static void lapic_timer_broadcast(const struct cpumask *mask)
{
#ifdef CONFIG_SMP
- apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+ __apic_send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
#endif
}
@@ -528,29 +495,89 @@ static void lapic_timer_broadcast(const struct cpumask *mask)
* The local apic timer can be used for any function which is CPU local.
*/
static struct clock_event_device lapic_clockevent = {
- .name = "lapic",
- .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
- | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
- .shift = 32,
- .set_mode = lapic_timer_setup,
- .set_next_event = lapic_next_event,
- .broadcast = lapic_timer_broadcast,
- .rating = 100,
- .irq = -1,
+ .name = "lapic",
+ .features = CLOCK_EVT_FEAT_PERIODIC |
+ CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
+ | CLOCK_EVT_FEAT_DUMMY,
+ .shift = 32,
+ .set_state_shutdown = lapic_timer_shutdown,
+ .set_state_periodic = lapic_timer_set_periodic,
+ .set_state_oneshot = lapic_timer_set_oneshot,
+ .set_state_oneshot_stopped = lapic_timer_shutdown,
+ .set_next_event = lapic_next_event,
+ .broadcast = lapic_timer_broadcast,
+ .rating = 100,
+ .irq = -1,
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
+static const struct x86_cpu_id deadline_match[] __initconst = {
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 0x2, 0x2, 0x3a), /* EP */
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 0x4, 0x4, 0x0f), /* EX */
+
+ X86_MATCH_VFM(INTEL_BROADWELL_X, 0x0b000020),
+
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x2, 0x2, 0x00000011),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x3, 0x3, 0x0700000e),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x4, 0x4, 0x0f00000c),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x5, 0x5, 0x0e000003),
+
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 0x3, 0x3, 0x01000136),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 0x4, 0x4, 0x02000014),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 0x5, 0xf, 0),
+
+ X86_MATCH_VFM(INTEL_HASWELL, 0x22),
+ X86_MATCH_VFM(INTEL_HASWELL_L, 0x20),
+ X86_MATCH_VFM(INTEL_HASWELL_G, 0x17),
+
+ X86_MATCH_VFM(INTEL_BROADWELL, 0x25),
+ X86_MATCH_VFM(INTEL_BROADWELL_G, 0x17),
+
+ X86_MATCH_VFM(INTEL_SKYLAKE_L, 0xb2),
+ X86_MATCH_VFM(INTEL_SKYLAKE, 0xb2),
+
+ X86_MATCH_VFM(INTEL_KABYLAKE_L, 0x52),
+ X86_MATCH_VFM(INTEL_KABYLAKE, 0x52),
+
+ {},
+};
+
+static __init bool apic_validate_deadline_timer(void)
+{
+ const struct x86_cpu_id *m;
+ u32 rev;
+
+ if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return false;
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return true;
+
+ m = x86_match_cpu(deadline_match);
+ if (!m)
+ return true;
+
+ rev = (u32)m->driver_data;
+
+ if (boot_cpu_data.microcode >= rev)
+ return true;
+
+ setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
+ pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
+ "please update microcode to version: 0x%x (or later)\n", rev);
+ return false;
+}
+
/*
* Setup the local APIC timer for this CPU. Copy the initialized values
* of the boot CPU and register the clock event in the framework.
*/
-static void __cpuinit setup_APIC_timer(void)
+static void setup_APIC_timer(void)
{
- struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+ struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
if (this_cpu_has(X86_FEATURE_ARAT)) {
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
- /* Make LAPIC timer preferrable over percpu HPET */
+ /* Make LAPIC timer preferable over percpu HPET */
lapic_clockevent.rating = 150;
}
@@ -558,21 +585,48 @@ static void __cpuinit setup_APIC_timer(void)
levt->cpumask = cpumask_of(smp_processor_id());
if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
+ levt->name = "lapic-deadline";
levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
CLOCK_EVT_FEAT_DUMMY);
levt->set_next_event = lapic_next_deadline;
clockevents_config_and_register(levt,
- (tsc_khz / TSC_DIVISOR) * 1000,
+ tsc_khz * (1000 / TSC_DIVISOR),
0xF, ~0UL);
} else
clockevents_register_device(levt);
+
+ apic_update_vector(smp_processor_id(), LOCAL_TIMER_VECTOR, true);
+}
+
+/*
+ * Install the updated TSC frequency from recalibration at the TSC
+ * deadline clockevent devices.
+ */
+static void __lapic_update_tsc_freq(void *info)
+{
+ struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
+
+ if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return;
+
+ clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
+}
+
+void lapic_update_tsc_freq(void)
+{
+ /*
+ * The clockevent device's ->mult and ->shift can both be
+ * changed. In order to avoid races, schedule the frequency
+ * update code on each CPU.
+ */
+ on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
}
/*
* In this functions we calibrate APIC bus clocks to the external timer.
*
* We want to do the calibration only once since we want to have local timer
- * irqs syncron. CPUs connected by the same APIC bus have the very same bus
+ * irqs synchronous. CPUs connected by the same APIC bus have the very same bus
* frequency.
*
* This was previously done by reading the PIT/HPET and waiting for a wrap
@@ -594,20 +648,20 @@ static void __cpuinit setup_APIC_timer(void)
static __initdata int lapic_cal_loops = -1;
static __initdata long lapic_cal_t1, lapic_cal_t2;
static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
-static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
+static __initdata u32 lapic_cal_pm1, lapic_cal_pm2;
static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
/*
- * Temporary interrupt handler.
+ * Temporary interrupt handler and polled calibration function.
*/
static void __init lapic_cal_handler(struct clock_event_device *dev)
{
unsigned long long tsc = 0;
long tapic = apic_read(APIC_TMCCT);
- unsigned long pm = acpi_pm_read_early();
+ u32 pm = acpi_pm_read_early();
- if (cpu_has_tsc)
- rdtscll(tsc);
+ if (boot_cpu_has(X86_FEATURE_TSC))
+ tsc = rdtsc();
switch (lapic_cal_loops++) {
case 0:
@@ -629,7 +683,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
}
static int __init
-calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
+calibrate_by_pmtimer(u32 deltapm, long *delta, long *deltatsc)
{
const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
const long pm_thresh = pm_100ms / 100;
@@ -640,7 +694,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
return -1;
#endif
- apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
+ apic_pr_verbose("... PM-Timer delta = %u\n", deltapm);
/* Check, if the PM timer is available */
if (!deltapm)
@@ -650,14 +704,14 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
if (deltapm > (pm_100ms - pm_thresh) &&
deltapm < (pm_100ms + pm_thresh)) {
- apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
+ apic_pr_verbose("... PM-Timer result ok\n");
return 0;
}
res = (((u64)deltapm) * mult) >> 22;
do_div(res, 1000000);
- pr_warning("APIC calibration not consistent "
- "with PM-Timer: %ldms instead of 100ms\n",(long)res);
+ pr_warn("APIC calibration not consistent with PM-Timer: %ldms instead of 100ms\n",
+ (long)res);
/* Correct the lapic counter value */
res = (((u64)(*delta)) * pm_100ms);
@@ -667,76 +721,167 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
*delta = (long)res;
/* Correct the tsc counter value */
- if (cpu_has_tsc) {
+ if (boot_cpu_has(X86_FEATURE_TSC)) {
res = (((u64)(*deltatsc)) * pm_100ms);
do_div(res, deltapm);
- apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
- "PM-Timer: %lu (%ld)\n",
- (unsigned long)res, *deltatsc);
+ apic_pr_verbose("TSC delta adjusted to PM-Timer: %lu (%ld)\n",
+ (unsigned long)res, *deltatsc);
*deltatsc = (long)res;
}
return 0;
}
+static int __init lapic_init_clockevent(void)
+{
+ if (!lapic_timer_period)
+ return -1;
+
+ /* Calculate the scaled math multiplication factor */
+ lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
+ TICK_NSEC, lapic_clockevent.shift);
+ lapic_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
+ lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
+ lapic_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &lapic_clockevent);
+ lapic_clockevent.min_delta_ticks = 0xF;
+
+ return 0;
+}
+
+bool __init apic_needs_pit(void)
+{
+ /*
+ * If the frequencies are not known, PIT is required for both TSC
+ * and apic timer calibration.
+ */
+ if (!tsc_khz || !cpu_khz)
+ return true;
+
+ /* Is there an APIC at all or is it disabled? */
+ if (!boot_cpu_has(X86_FEATURE_APIC) || apic_is_disabled)
+ return true;
+
+ /*
+ * If interrupt delivery mode is legacy PIC or virtual wire without
+ * configuration, the local APIC timer won't be set up. Make sure
+ * that the PIT is initialized.
+ */
+ if (apic_intr_mode == APIC_PIC ||
+ apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG)
+ return true;
+
+ /* Virt guests may lack ARAT, but still have DEADLINE */
+ if (!boot_cpu_has(X86_FEATURE_ARAT))
+ return true;
+
+ /* Deadline timer is based on TSC so no further PIT action required */
+ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return false;
+
+ /* APIC timer disabled? */
+ if (disable_apic_timer)
+ return true;
+ /*
+ * The APIC timer frequency is known already, no PIT calibration
+ * required. If unknown, let the PIT be initialized.
+ */
+ return lapic_timer_period == 0;
+}
+
static int __init calibrate_APIC_clock(void)
{
- struct clock_event_device *levt = &__get_cpu_var(lapic_events);
- void (*real_handler)(struct clock_event_device *dev);
+ struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
+ u64 tsc_perj = 0, tsc_start = 0;
+ long delta_tsc_khz, bus_khz;
+ unsigned long jif_start;
unsigned long deltaj;
long delta, deltatsc;
int pm_referenced = 0;
- /**
- * check if lapic timer has already been calibrated by platform
- * specific routine, such as tsc calibration code. if so, we just fill
+ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return 0;
+
+ /*
+ * Check if lapic timer has already been calibrated by platform
+ * specific routine, such as tsc calibration code. If so just fill
* in the clockevent structure and return.
*/
-
- if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
- return 0;
- } else if (lapic_timer_frequency) {
- apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
- lapic_timer_frequency);
- lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
- TICK_NSEC, lapic_clockevent.shift);
- lapic_clockevent.max_delta_ns =
- clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
- lapic_clockevent.min_delta_ns =
- clockevent_delta2ns(0xF, &lapic_clockevent);
+ if (!lapic_init_clockevent()) {
+ apic_pr_verbose("lapic timer already calibrated %d\n", lapic_timer_period);
+ /*
+ * Direct calibration methods must have an always running
+ * local APIC timer, no need for broadcast timer.
+ */
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
return 0;
}
- apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
- "calibrating APIC timer ...\n");
+ apic_pr_verbose("Using local APIC timer interrupts. Calibrating APIC timer ...\n");
+ /*
+ * There are platforms w/o global clockevent devices. Instead of
+ * making the calibration conditional on that, use a polling based
+ * approach everywhere.
+ */
local_irq_disable();
- /* Replace the global interrupt handler */
- real_handler = global_clock_event->event_handler;
- global_clock_event->event_handler = lapic_cal_handler;
-
/*
* Setup the APIC counter to maximum. There is no way the lapic
* can underflow in the 100ms detection time frame
*/
__setup_APIC_LVTT(0xffffffff, 0, 0);
- /* Let the interrupts run */
+ /*
+ * Methods to terminate the calibration loop:
+ * 1) Global clockevent if available (jiffies)
+ * 2) TSC if available and frequency is known
+ */
+ jif_start = READ_ONCE(jiffies);
+
+ if (tsc_khz) {
+ tsc_start = rdtsc();
+ tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
+ }
+
+ /*
+ * Enable interrupts so the tick can fire, if a global
+ * clockevent device is available
+ */
local_irq_enable();
- while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
- cpu_relax();
+ while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
+ /* Wait for a tick to elapse */
+ while (1) {
+ if (tsc_khz) {
+ u64 tsc_now = rdtsc();
+ if ((tsc_now - tsc_start) >= tsc_perj) {
+ tsc_start += tsc_perj;
+ break;
+ }
+ } else {
+ unsigned long jif_now = READ_ONCE(jiffies);
- local_irq_disable();
+ if (time_after(jif_now, jif_start)) {
+ jif_start = jif_now;
+ break;
+ }
+ }
+ cpu_relax();
+ }
- /* Restore the real event handler */
- global_clock_event->event_handler = real_handler;
+ /* Invoke the calibration routine */
+ local_irq_disable();
+ lapic_cal_handler(NULL);
+ local_irq_enable();
+ }
+
+ local_irq_disable();
/* Build delta t1-t2 as apic timer counts down */
delta = lapic_cal_t1 - lapic_cal_t2;
- apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
+ apic_pr_verbose("... lapic delta = %ld\n", delta);
deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
@@ -744,56 +889,48 @@ static int __init calibrate_APIC_clock(void)
pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
&delta, &deltatsc);
- /* Calculate the scaled math multiplication factor */
- lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
- lapic_clockevent.shift);
- lapic_clockevent.max_delta_ns =
- clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
- lapic_clockevent.min_delta_ns =
- clockevent_delta2ns(0xF, &lapic_clockevent);
+ lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+ lapic_init_clockevent();
- lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+ apic_pr_verbose("..... delta %ld\n", delta);
+ apic_pr_verbose("..... mult: %u\n", lapic_clockevent.mult);
+ apic_pr_verbose("..... calibration result: %u\n", lapic_timer_period);
- apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
- apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
- apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
- lapic_timer_frequency);
+ if (boot_cpu_has(X86_FEATURE_TSC)) {
+ delta_tsc_khz = (deltatsc * HZ) / (1000 * LAPIC_CAL_LOOPS);
- if (cpu_has_tsc) {
- apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
- "%ld.%04ld MHz.\n",
- (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
- (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
+ apic_pr_verbose("..... CPU clock speed is %ld.%03ld MHz.\n",
+ delta_tsc_khz / 1000, delta_tsc_khz % 1000);
}
- apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
- "%u.%04u MHz.\n",
- lapic_timer_frequency / (1000000 / HZ),
- lapic_timer_frequency % (1000000 / HZ));
+ bus_khz = (long)lapic_timer_period * HZ / 1000;
+ apic_pr_verbose("..... host bus clock speed is %ld.%03ld MHz.\n",
+ bus_khz / 1000, bus_khz % 1000);
/*
* Do a sanity check on the APIC calibration result
*/
- if (lapic_timer_frequency < (1000000 / HZ)) {
+ if (lapic_timer_period < (1000000 / HZ)) {
local_irq_enable();
- pr_warning("APIC frequency too slow, disabling apic timer\n");
+ pr_warn("APIC frequency too slow, disabling apic timer\n");
return -1;
}
levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
/*
- * PM timer calibration failed or not turned on
- * so lets try APIC timer based calibration
+ * PM timer calibration failed or not turned on so lets try APIC
+ * timer based calibration, if a global clockevent device is
+ * available.
*/
- if (!pm_referenced) {
- apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
+ if (!pm_referenced && global_clock_event) {
+ apic_pr_verbose("... verify APIC timer\n");
/*
* Setup the apic timer manually
*/
levt->event_handler = lapic_cal_handler;
- lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
+ lapic_timer_set_periodic(levt);
lapic_cal_loops = -1;
/* Let the interrupts run */
@@ -803,23 +940,24 @@ static int __init calibrate_APIC_clock(void)
cpu_relax();
/* Stop the lapic timer */
- lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
+ local_irq_disable();
+ lapic_timer_shutdown(levt);
/* Jiffies delta */
deltaj = lapic_cal_j2 - lapic_cal_j1;
- apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
+ apic_pr_verbose("... jiffies delta = %lu\n", deltaj);
/* Check, if the jiffies result is consistent */
if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
- apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
+ apic_pr_verbose("... jiffies result ok\n");
else
levt->features |= CLOCK_EVT_FEAT_DUMMY;
- } else
- local_irq_enable();
+ }
+ local_irq_enable();
if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
- pr_warning("APIC timer disabled due to verification failure\n");
- return -1;
+ pr_warn("APIC timer disabled due to verification failure\n");
+ return -1;
}
return 0;
@@ -864,11 +1002,13 @@ void __init setup_boot_APIC_clock(void)
/* Setup the lapic or request the broadcast */
setup_APIC_timer();
+ amd_e400_c1e_apic_setup();
}
-void __cpuinit setup_secondary_APIC_clock(void)
+void setup_secondary_APIC_clock(void)
{
setup_APIC_timer();
+ amd_e400_c1e_apic_setup();
}
/*
@@ -876,8 +1016,7 @@ void __cpuinit setup_secondary_APIC_clock(void)
*/
static void local_apic_timer_interrupt(void)
{
- int cpu = smp_processor_id();
- struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+ struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
/*
* Normally we should not be here till LAPIC has been initialized but
@@ -891,9 +1030,10 @@ static void local_apic_timer_interrupt(void)
* spurious.
*/
if (!evt->event_handler) {
- pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+ pr_warn("Spurious LAPIC timer interrupt on cpu %d\n",
+ smp_processor_id());
/* Switch it off */
- lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
+ lapic_timer_shutdown(evt);
return;
}
@@ -913,51 +1053,18 @@ static void local_apic_timer_interrupt(void)
* [ if a single-CPU system runs an SMP kernel then we call the local
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
-void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- /*
- * NOTE! We'd better ACK the irq immediately,
- * because timer handling can be slow.
- *
- * update_process_times() expects us to have done irq_enter().
- * Besides, if we don't timer interrupts ignore the global
- * interrupt lock, which is the WrongThing (tm) to do.
- */
- entering_ack_irq();
- local_apic_timer_interrupt();
- exiting_irq();
-
- set_irq_regs(old_regs);
-}
-
-void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
-{
- struct pt_regs *old_regs = set_irq_regs(regs);
-
- /*
- * NOTE! We'd better ACK the irq immediately,
- * because timer handling can be slow.
- *
- * update_process_times() expects us to have done irq_enter().
- * Besides, if we don't timer interrupts ignore the global
- * interrupt lock, which is the WrongThing (tm) to do.
- */
- entering_ack_irq();
+ apic_eoi();
trace_local_timer_entry(LOCAL_TIMER_VECTOR);
local_apic_timer_interrupt();
trace_local_timer_exit(LOCAL_TIMER_VECTOR);
- exiting_irq();
set_irq_regs(old_regs);
}
-int setup_profiling_timer(unsigned int multiplier)
-{
- return -EINVAL;
-}
-
/*
* Local APIC start and shutdown
*/
@@ -974,8 +1081,7 @@ void clear_local_APIC(void)
int maxlvt;
u32 v;
- /* APIC hasn't been mapped yet */
- if (!x2apic_mode && !apic_phys)
+ if (!apic_accessible())
return;
maxlvt = lapic_get_maxlvt();
@@ -1038,25 +1144,40 @@ void clear_local_APIC(void)
}
/**
- * disable_local_APIC - clear and disable the local APIC
+ * apic_soft_disable - Clears and software disables the local APIC on hotplug
+ *
+ * Contrary to disable_local_APIC() this does not touch the enable bit in
+ * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
+ * bus would require a hardware reset as the APIC would lose track of bus
+ * arbitration. On systems with FSB delivery APICBASE could be disabled,
+ * but it has to be guaranteed that no interrupt is sent to the APIC while
+ * in that state and it's not clear from the SDM whether it still responds
+ * to INIT/SIPI messages. Stay on the safe side and use software disable.
*/
-void disable_local_APIC(void)
+void apic_soft_disable(void)
{
- unsigned int value;
-
- /* APIC hasn't been mapped yet */
- if (!x2apic_mode && !apic_phys)
- return;
+ u32 value;
clear_local_APIC();
- /*
- * Disable APIC (implies clearing of registers
- * for 82489DX!).
- */
+ /* Soft disable APIC (implies clearing of registers for 82489DX!). */
value = apic_read(APIC_SPIV);
value &= ~APIC_SPIV_APIC_ENABLED;
apic_write(APIC_SPIV, value);
+}
+
+/**
+ * disable_local_APIC - clear and disable the local APIC
+ */
+void disable_local_APIC(void)
+{
+ if (!apic_accessible())
+ return;
+
+ if (apic->teardown)
+ apic->teardown();
+
+ apic_soft_disable();
#ifdef CONFIG_X86_32
/*
@@ -1083,7 +1204,7 @@ void lapic_shutdown(void)
{
unsigned long flags;
- if (!cpu_has_apic && !apic_from_smp_config())
+ if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
return;
local_irq_save(flags);
@@ -1099,67 +1220,6 @@ void lapic_shutdown(void)
local_irq_restore(flags);
}
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
- unsigned int reg0, reg1;
-
- /*
- * The version register is read-only in a real APIC.
- */
- reg0 = apic_read(APIC_LVR);
- apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
- apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
- reg1 = apic_read(APIC_LVR);
- apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
- /*
- * The two version reads above should print the same
- * numbers. If the second one is different, then we
- * poke at a non-APIC.
- */
- if (reg1 != reg0)
- return 0;
-
- /*
- * Check if the version looks reasonably.
- */
- reg1 = GET_APIC_VERSION(reg0);
- if (reg1 == 0x00 || reg1 == 0xff)
- return 0;
- reg1 = lapic_get_maxlvt();
- if (reg1 < 0x02 || reg1 == 0xff)
- return 0;
-
- /*
- * The ID register is read/write in a real APIC.
- */
- reg0 = apic_read(APIC_ID);
- apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
- apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
- reg1 = apic_read(APIC_ID);
- apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
- apic_write(APIC_ID, reg0);
- if (reg1 != (reg0 ^ apic->apic_id_mask))
- return 0;
-
- /*
- * The next two are just to see if we have sane values.
- * They're only really relevant if we're in Virtual Wire
- * compatibility mode, but most boxes are anymore.
- */
- reg0 = apic_read(APIC_LVT0);
- apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
- reg1 = apic_read(APIC_LVT1);
- apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
- return 1;
-}
-
/**
* sync_Arb_IDs - synchronize APIC bus arbitration IDs
*/
@@ -1177,9 +1237,71 @@ void __init sync_Arb_IDs(void)
*/
apic_wait_icr_idle();
- apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
- apic_write(APIC_ICR, APIC_DEST_ALLINC |
- APIC_INT_LEVELTRIG | APIC_DM_INIT);
+ apic_pr_debug("Synchronizing Arb IDs.\n");
+ apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
+}
+
+enum apic_intr_mode_id apic_intr_mode __ro_after_init;
+
+static int __init __apic_intr_mode_select(void)
+{
+ /* Check kernel option */
+ if (apic_is_disabled) {
+ pr_info("APIC disabled via kernel command line\n");
+ return APIC_PIC;
+ }
+
+ /* Check BIOS */
+#ifdef CONFIG_X86_64
+ /* On 64-bit, the APIC must be integrated, Check local APIC only */
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
+ apic_is_disabled = true;
+ pr_info("APIC disabled by BIOS\n");
+ return APIC_PIC;
+ }
+#else
+ /* On 32-bit, the APIC may be integrated APIC or 82489DX */
+
+ /* Neither 82489DX nor integrated APIC ? */
+ if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
+ apic_is_disabled = true;
+ return APIC_PIC;
+ }
+
+ /* If the BIOS pretends there is an integrated APIC ? */
+ if (!boot_cpu_has(X86_FEATURE_APIC) &&
+ APIC_INTEGRATED(boot_cpu_apic_version)) {
+ apic_is_disabled = true;
+ pr_err(FW_BUG "Local APIC not detected, force emulation\n");
+ return APIC_PIC;
+ }
+#endif
+
+ /* Check MP table or ACPI MADT configuration */
+ if (!smp_found_config) {
+ disable_ioapic_support();
+ if (!acpi_lapic) {
+ pr_info("APIC: ACPI MADT or MP tables are not detected\n");
+ return APIC_VIRTUAL_WIRE_NO_CONFIG;
+ }
+ return APIC_VIRTUAL_WIRE;
+ }
+
+#ifdef CONFIG_SMP
+ /* If SMP should be disabled, then really disable it! */
+ if (!setup_max_cpus) {
+ pr_info("APIC: SMP mode deactivated\n");
+ return APIC_SYMMETRIC_IO_NO_ROUTING;
+ }
+#endif
+
+ return APIC_SYMMETRIC_IO;
+}
+
+/* Select the interrupt delivery mode for the BSP */
+void __init apic_intr_mode_select(void)
+{
+ apic_intr_mode = __apic_intr_mode_select();
}
/*
@@ -1193,7 +1315,7 @@ void __init init_bsp_APIC(void)
* Don't do the setup now if we have a SMP BIOS as the
* through-I/O-APIC virtual wire mode might be active.
*/
- if (smp_found_config || !cpu_has_apic)
+ if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
return;
/*
@@ -1226,10 +1348,46 @@ void __init init_bsp_APIC(void)
value = APIC_DM_NMI;
if (!lapic_is_integrated()) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER;
+ if (apic_extnmi == APIC_EXTNMI_NONE)
+ value |= APIC_LVT_MASKED;
apic_write(APIC_LVT1, value);
}
-static void __cpuinit lapic_setup_esr(void)
+static void __init apic_bsp_setup(bool upmode);
+
+/* Init the interrupt delivery mode for the BSP */
+void __init apic_intr_mode_init(void)
+{
+ bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
+
+ switch (apic_intr_mode) {
+ case APIC_PIC:
+ pr_info("APIC: Keep in PIC mode(8259)\n");
+ return;
+ case APIC_VIRTUAL_WIRE:
+ pr_info("APIC: Switch to virtual wire mode setup\n");
+ break;
+ case APIC_VIRTUAL_WIRE_NO_CONFIG:
+ pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
+ upmode = true;
+ break;
+ case APIC_SYMMETRIC_IO:
+ pr_info("APIC: Switch to symmetric I/O mode setup\n");
+ break;
+ case APIC_SYMMETRIC_IO_NO_ROUTING:
+ pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
+ break;
+ }
+
+ x86_64_probe_apic();
+
+ if (x86_platform.apic_post_init)
+ x86_platform.apic_post_init();
+
+ apic_bsp_setup(upmode);
+}
+
+static void lapic_setup_esr(void)
{
unsigned int oldvalue, value, maxlvt;
@@ -1264,34 +1422,105 @@ static void __cpuinit lapic_setup_esr(void)
if (maxlvt > 3)
apic_write(APIC_ESR, 0);
value = apic_read(APIC_ESR);
- if (value != oldvalue)
- apic_printk(APIC_VERBOSE, "ESR value before enabling "
- "vector: 0x%08x after: 0x%08x\n",
- oldvalue, value);
+ if (value != oldvalue) {
+ apic_pr_verbose("ESR value before enabling vector: 0x%08x after: 0x%08x\n",
+ oldvalue, value);
+ }
+}
+
+#define APIC_IR_REGS APIC_ISR_NR
+#define APIC_IR_BITS (APIC_IR_REGS * 32)
+#define APIC_IR_MAPSIZE (APIC_IR_BITS / BITS_PER_LONG)
+
+union apic_ir {
+ unsigned long map[APIC_IR_MAPSIZE];
+ u32 regs[APIC_IR_REGS];
+};
+
+static bool apic_check_and_eoi_isr(union apic_ir *isr)
+{
+ int i, bit;
+
+ /* Read the ISRs */
+ for (i = 0; i < APIC_IR_REGS; i++)
+ isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
+
+ /* If the ISR map empty, nothing to do here. */
+ if (bitmap_empty(isr->map, APIC_IR_BITS))
+ return true;
+
+ /*
+ * There can be multiple ISR bits set when a high priority
+ * interrupt preempted a lower priority one. Issue an EOI for each
+ * set bit. The priority traversal order does not matter as there
+ * can't be new ISR bits raised at this point. What matters is that
+ * an EOI is issued for each ISR bit.
+ */
+ for_each_set_bit(bit, isr->map, APIC_IR_BITS)
+ apic_eoi();
+
+ /* Reread the ISRs, they should be empty now */
+ for (i = 0; i < APIC_IR_REGS; i++)
+ isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
+
+ return bitmap_empty(isr->map, APIC_IR_BITS);
+}
+
+/*
+ * If a CPU services an interrupt and crashes before issuing EOI to the
+ * local APIC, the corresponding ISR bit is still set when the crashing CPU
+ * jumps into a crash kernel. Read the ISR and issue an EOI for each set
+ * bit to acknowledge it as otherwise these slots would be locked forever
+ * waiting for an EOI.
+ *
+ * If there are pending bits in the IRR, then they won't be converted into
+ * ISR bits as the CPU has interrupts disabled. They will be delivered once
+ * the CPU enables interrupts and there is nothing which can prevent that.
+ *
+ * In the worst case this results in spurious interrupt warnings.
+ */
+static void apic_clear_isr(void)
+{
+ union apic_ir ir;
+ unsigned int i;
+
+ if (!apic_check_and_eoi_isr(&ir))
+ pr_warn("APIC: Stale ISR: %256pb\n", ir.map);
+
+ for (i = 0; i < APIC_IR_REGS; i++)
+ ir.regs[i] = apic_read(APIC_IRR + i * 0x10);
+
+ if (!bitmap_empty(ir.map, APIC_IR_BITS))
+ pr_warn("APIC: Stale IRR: %256pb\n", ir.map);
}
/**
* setup_local_APIC - setup the local APIC
*
- * Used to setup local APIC while initializing BSP or bringin up APs.
+ * Used to setup local APIC while initializing BSP or bringing up APs.
* Always called with preemption disabled.
*/
-void __cpuinit setup_local_APIC(void)
+static void setup_local_APIC(void)
{
int cpu = smp_processor_id();
- unsigned int value, queued;
- int i, j, acked = 0;
- unsigned long long tsc = 0, ntsc;
- long long max_loops = cpu_khz;
-
- if (cpu_has_tsc)
- rdtscll(tsc);
+ unsigned int value;
- if (disable_apic) {
+ if (apic_is_disabled) {
disable_ioapic_support();
return;
}
+ if (apic->setup)
+ apic->setup();
+
+ /*
+ * If this comes from kexec/kcrash the APIC might be enabled in
+ * SPIV. Soft disable it before doing further initialization.
+ */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_SPIV_APIC_ENABLED;
+ apic_write(APIC_SPIV, value);
+
#ifdef CONFIG_X86_32
/* Pound the ESR really hard over the head with a big hammer - mbligh */
if (lapic_is_integrated() && apic->disable_esr) {
@@ -1301,92 +1530,28 @@ void __cpuinit setup_local_APIC(void)
apic_write(APIC_ESR, 0);
}
#endif
- perf_events_lapic_init();
-
- /*
- * Double-check whether this APIC is really registered.
- * This is meaningless in clustered apic mode, so we skip it.
- */
- BUG_ON(!apic->apic_id_registered());
-
/*
* Intel recommends to set DFR, LDR and TPR before enabling
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
- apic->init_apic_ldr();
-
-#ifdef CONFIG_X86_32
- /*
- * APIC LDR is initialized. If logical_apicid mapping was
- * initialized during get_smp_config(), make sure it matches the
- * actual value.
- */
- i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
- WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
- /* always use the value from LDR */
- early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
- logical_smp_processor_id();
-
- /*
- * Some NUMA implementations (NUMAQ) don't initialize apicid to
- * node mapping during NUMA init. Now that logical apicid is
- * guaranteed to be known, give it another chance. This is already
- * a bit too late - percpu allocation has already happened without
- * proper NUMA affinity.
+ * document number 292116).
+ *
+ * Except for APICs which operate in physical destination mode.
*/
- if (apic->x86_32_numa_cpu_node)
- set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
- apic->x86_32_numa_cpu_node(cpu));
-#endif
+ if (apic->init_apic_ldr)
+ apic->init_apic_ldr();
/*
- * Set Task Priority to 'accept all'. We never change this
- * later on.
+ * Set Task Priority to 'accept all except vectors 0-31'. An APIC
+ * vector in the 16-31 range could be delivered if TPR == 0, but we
+ * would think it's an exception and terrible things will happen. We
+ * never change this later on.
*/
value = apic_read(APIC_TASKPRI);
value &= ~APIC_TPRI_MASK;
+ value |= 0x10;
apic_write(APIC_TASKPRI, value);
- /*
- * After a crash, we no longer service the interrupts and a pending
- * interrupt from previous kernel might still have ISR bit set.
- *
- * Most probably by now CPU has serviced that pending interrupt and
- * it might not have done the ack_APIC_irq() because it thought,
- * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
- * does not clear the ISR bit and cpu thinks it has already serivced
- * the interrupt. Hence a vector might get locked. It was noticed
- * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
- */
- do {
- queued = 0;
- for (i = APIC_ISR_NR - 1; i >= 0; i--)
- queued |= apic_read(APIC_IRR + i*0x10);
-
- for (i = APIC_ISR_NR - 1; i >= 0; i--) {
- value = apic_read(APIC_ISR + i*0x10);
- for (j = 31; j >= 0; j--) {
- if (value & (1<<j)) {
- ack_APIC_irq();
- acked++;
- }
- }
- }
- if (acked > 256) {
- printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n",
- acked);
- break;
- }
- if (queued) {
- if (cpu_has_tsc) {
- rdtscll(ntsc);
- max_loops = (cpu_khz << 10) - (ntsc - tsc);
- } else
- max_loops--;
- }
- } while (queued && max_loops > 0);
- WARN_ON(max_loops <= 0);
+ apic_clear_isr();
/*
* Now that we are all set up, enable the APIC
@@ -1414,9 +1579,8 @@ void __cpuinit setup_local_APIC(void)
*/
/*
* Actually disabling the focus CPU check just makes the hang less
- * frequent as it makes the interrupt distributon model be more
+ * frequent as it makes the interrupt distribution model be more
* like LRU than MRU (the short-term load is more even across CPUs).
- * See also the comment in end_level_ioapic_irq(). --macro
*/
/*
@@ -1433,10 +1597,12 @@ void __cpuinit setup_local_APIC(void)
value |= SPURIOUS_APIC_VECTOR;
apic_write(APIC_SPIV, value);
+ perf_events_lapic_init();
+
/*
* Set up LVT0, LVT1:
*
- * set up through-local-APIC on the BP's LINT0. This is not
+ * set up through-local-APIC on the boot CPU's LINT0. This is not
* strictly necessary in pure symmetric-IO mode, but sometimes
* we delegate interrupts to the 8259A.
*/
@@ -1444,23 +1610,27 @@ void __cpuinit setup_local_APIC(void)
* TODO: set up through-local-APIC from through-I/O-APIC? --macro
*/
value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
- if (!cpu && (pic_mode || !value)) {
+ if (!cpu && (pic_mode || !value || ioapic_is_disabled)) {
value = APIC_DM_EXTINT;
- apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
+ apic_pr_verbose("Enabled ExtINT on CPU#%d\n", cpu);
} else {
value = APIC_DM_EXTINT | APIC_LVT_MASKED;
- apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
+ apic_pr_verbose("Masked ExtINT on CPU#%d\n", cpu);
}
apic_write(APIC_LVT0, value);
/*
- * only the BP should see the LINT1 NMI signal, obviously.
+ * Only the BSP sees the LINT1 NMI signal by default. This can be
+ * modified by apic_extnmi= boot option.
*/
- if (!cpu)
+ if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
+ apic_extnmi == APIC_EXTNMI_ALL)
value = APIC_DM_NMI;
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
- if (!lapic_is_integrated()) /* 82489DX */
+
+ /* Is 82489DX ? */
+ if (!lapic_is_integrated())
value |= APIC_LVT_LEVEL_TRIGGER;
apic_write(APIC_LVT1, value);
@@ -1471,7 +1641,7 @@ void __cpuinit setup_local_APIC(void)
#endif
}
-void __cpuinit end_local_APIC_setup(void)
+static void end_local_APIC_setup(void)
{
lapic_setup_esr();
@@ -1488,116 +1658,256 @@ void __cpuinit end_local_APIC_setup(void)
apic_pm_activate();
}
-void __init bsp_end_local_APIC_setup(void)
+/*
+ * APIC setup function for application processors. Called from smpboot.c
+ */
+void apic_ap_setup(void)
{
+ setup_local_APIC();
end_local_APIC_setup();
+}
+static __init void apic_read_boot_cpu_id(bool x2apic)
+{
/*
- * Now that local APIC setup is completed for BP, configure the fault
- * handling for interrupt remapping.
+ * This can be invoked from check_x2apic() before the APIC has been
+ * selected. But that code knows for sure that the BIOS enabled
+ * X2APIC.
*/
- irq_remap_enable_fault_handling();
-
+ if (x2apic) {
+ boot_cpu_physical_apicid = native_apic_msr_read(APIC_ID);
+ boot_cpu_apic_version = GET_APIC_VERSION(native_apic_msr_read(APIC_LVR));
+ } else {
+ boot_cpu_physical_apicid = read_apic_id();
+ boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
+ }
+ topology_register_boot_apic(boot_cpu_physical_apicid);
}
#ifdef CONFIG_X86_X2APIC
-/*
- * Need to disable xapic and x2apic at the same time and then enable xapic mode
- */
-static inline void __disable_x2apic(u64 msr)
+int x2apic_mode;
+EXPORT_SYMBOL_GPL(x2apic_mode);
+
+enum {
+ X2APIC_OFF,
+ X2APIC_DISABLED,
+ /* All states below here have X2APIC enabled */
+ X2APIC_ON,
+ X2APIC_ON_LOCKED
+};
+static int x2apic_state;
+
+static bool x2apic_hw_locked(void)
{
- wrmsrl(MSR_IA32_APICBASE,
- msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
- wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
+ u64 x86_arch_cap_msr;
+ u64 msr;
+
+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+ if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
+ rdmsrq(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
+ return (msr & LEGACY_XAPIC_DISABLED);
+ }
+ return false;
}
-static __init void disable_x2apic(void)
+static void __x2apic_disable(void)
{
u64 msr;
- if (!cpu_has_x2apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return;
- rdmsrl(MSR_IA32_APICBASE, msr);
- if (msr & X2APIC_ENABLE) {
- u32 x2apic_id = read_apic_id();
-
- if (x2apic_id >= 255)
- panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
+ rdmsrq(MSR_IA32_APICBASE, msr);
+ if (!(msr & X2APIC_ENABLE))
+ return;
+ /* Disable xapic and x2apic first and then reenable xapic mode */
+ wrmsrq(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
+ wrmsrq(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
+ printk_once(KERN_INFO "x2apic disabled\n");
+}
- pr_info("Disabling x2apic\n");
- __disable_x2apic(msr);
+static void __x2apic_enable(void)
+{
+ u64 msr;
- if (nox2apic) {
- clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC);
- setup_clear_cpu_cap(X86_FEATURE_X2APIC);
- }
+ rdmsrq(MSR_IA32_APICBASE, msr);
+ if (msr & X2APIC_ENABLE)
+ return;
+ wrmsrq(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
+ printk_once(KERN_INFO "x2apic enabled\n");
+}
- x2apic_disabled = 1;
- x2apic_mode = 0;
+static int __init setup_nox2apic(char *str)
+{
+ if (x2apic_enabled()) {
+ u32 apicid = native_apic_msr_read(APIC_ID);
- register_lapic_address(mp_lapic_addr);
+ if (apicid >= 255) {
+ pr_warn("Apicid: %08x, cannot enforce nox2apic\n",
+ apicid);
+ return 0;
+ }
+ if (x2apic_hw_locked()) {
+ pr_warn("APIC locked in x2apic mode, can't disable\n");
+ return 0;
+ }
+ pr_warn("x2apic already enabled.\n");
+ __x2apic_disable();
}
+ setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+ x2apic_state = X2APIC_DISABLED;
+ x2apic_mode = 0;
+ return 0;
}
+early_param("nox2apic", setup_nox2apic);
-void check_x2apic(void)
+/* Called from cpu_init() to enable x2apic on (secondary) cpus */
+void x2apic_setup(void)
{
- if (x2apic_enabled()) {
- pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
- x2apic_preenabled = x2apic_mode = 1;
+ /*
+ * Try to make the AP's APIC state match that of the BSP, but if the
+ * BSP is unlocked and the AP is locked then there is a state mismatch.
+ * Warn about the mismatch in case a GP fault occurs due to a locked AP
+ * trying to be turned off.
+ */
+ if (x2apic_state != X2APIC_ON_LOCKED && x2apic_hw_locked())
+ pr_warn("x2apic lock mismatch between BSP and AP.\n");
+ /*
+ * If x2apic is not in ON or LOCKED state, disable it if already enabled
+ * from BIOS.
+ */
+ if (x2apic_state < X2APIC_ON) {
+ __x2apic_disable();
+ return;
}
+ __x2apic_enable();
}
-void enable_x2apic(void)
+static __init void apic_set_fixmap(bool read_apic);
+
+static __init void x2apic_disable(void)
{
- u64 msr;
+ u32 x2apic_id;
+
+ if (x2apic_state < X2APIC_ON)
+ return;
+
+ x2apic_id = read_apic_id();
+ if (x2apic_id >= 255)
+ panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
- rdmsrl(MSR_IA32_APICBASE, msr);
- if (x2apic_disabled) {
- __disable_x2apic(msr);
+ if (x2apic_hw_locked()) {
+ pr_warn("Cannot disable locked x2apic, id: %08x\n", x2apic_id);
return;
}
- if (!x2apic_mode)
+ __x2apic_disable();
+
+ x2apic_mode = 0;
+ x2apic_state = X2APIC_DISABLED;
+
+ /*
+ * Don't reread the APIC ID as it was already done from
+ * check_x2apic() and the APIC driver still is a x2APIC variant,
+ * which fails to do the read after x2APIC was disabled.
+ */
+ apic_set_fixmap(false);
+}
+
+static __init void x2apic_enable(void)
+{
+ if (x2apic_state != X2APIC_OFF)
return;
- if (!(msr & X2APIC_ENABLE)) {
- printk_once(KERN_INFO "Enabling x2apic\n");
- wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
- }
+ x2apic_mode = 1;
+ x2apic_state = X2APIC_ON;
+ __x2apic_enable();
}
-#endif /* CONFIG_X86_X2APIC */
-int __init enable_IR(void)
+static __init void try_to_enable_x2apic(int remap_mode)
{
-#ifdef CONFIG_IRQ_REMAP
- if (!irq_remapping_supported()) {
- pr_debug("intr-remapping not supported\n");
- return -1;
+ if (x2apic_state == X2APIC_DISABLED)
+ return;
+
+ if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
+ u32 apic_limit = 255;
+
+ /*
+ * Using X2APIC without IR is not architecturally supported
+ * on bare metal but may be supported in guests.
+ */
+ if (!x86_init.hyper.x2apic_available()) {
+ pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
+ x2apic_disable();
+ return;
+ }
+
+ /*
+ * If the hypervisor supports extended destination ID in
+ * MSI, that increases the maximum APIC ID that can be
+ * used for non-remapped IRQ domains.
+ */
+ if (x86_init.hyper.msi_ext_dest_id()) {
+ virt_ext_dest_id = 1;
+ apic_limit = 32767;
+ }
+
+ /*
+ * Without IR, all CPUs can be addressed by IOAPIC/MSI only
+ * in physical mode, and CPUs with an APIC ID that cannot
+ * be addressed must not be brought online.
+ */
+ x2apic_set_max_apicid(apic_limit);
+ x2apic_phys = 1;
}
+ x2apic_enable();
+}
- if (!x2apic_preenabled && skip_ioapic_setup) {
- pr_info("Skipped enabling intr-remap because of skipping "
- "io-apic setup\n");
- return -1;
+void __init check_x2apic(void)
+{
+ if (x2apic_enabled()) {
+ pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
+ x2apic_mode = 1;
+ if (x2apic_hw_locked())
+ x2apic_state = X2APIC_ON_LOCKED;
+ else
+ x2apic_state = X2APIC_ON;
+ apic_read_boot_cpu_id(true);
+ } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
+ x2apic_state = X2APIC_DISABLED;
}
+}
+#else /* CONFIG_X86_X2APIC */
+void __init check_x2apic(void)
+{
+ if (!apic_is_x2apic_enabled())
+ return;
+ /*
+ * Checkme: Can we simply turn off x2APIC here instead of disabling the APIC?
+ */
+ pr_err("Kernel does not support x2APIC, please recompile with CONFIG_X86_X2APIC.\n");
+ pr_err("Disabling APIC, expect reduced performance and functionality.\n");
- return irq_remapping_enable();
-#endif
- return -1;
+ apic_is_disabled = true;
+ setup_clear_cpu_cap(X86_FEATURE_APIC);
}
+static inline void try_to_enable_x2apic(int remap_mode) { }
+static inline void __x2apic_enable(void) { }
+#endif /* !CONFIG_X86_X2APIC */
+
void __init enable_IR_x2apic(void)
{
unsigned long flags;
- int ret, x2apic_enabled = 0;
- int hardware_init_ret;
+ int ret, ir_stat;
- /* Make sure irq_remap_ops are initialized */
- setup_irq_remapping_ops();
+ if (ioapic_is_disabled) {
+ pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
+ return;
+ }
- hardware_init_ret = irq_remapping_prepare();
- if (hardware_init_ret && !x2apic_supported())
+ ir_stat = irq_remapping_prepare();
+ if (ir_stat < 0 && !x2apic_supported())
return;
ret = save_ioapic_entries();
@@ -1610,49 +1920,13 @@ void __init enable_IR_x2apic(void)
legacy_pic->mask_all();
mask_ioapic_entries();
- if (x2apic_preenabled && nox2apic)
- disable_x2apic();
+ /* If irq_remapping_prepare() succeeded, try to enable it */
+ if (ir_stat >= 0)
+ ir_stat = irq_remapping_enable();
+ /* ir_stat contains the remap mode or an error code */
+ try_to_enable_x2apic(ir_stat);
- if (hardware_init_ret)
- ret = -1;
- else
- ret = enable_IR();
-
- if (!x2apic_supported())
- goto skip_x2apic;
-
- if (ret < 0) {
- /* IR is required if there is APIC ID > 255 even when running
- * under KVM
- */
- if (max_physical_apicid > 255 ||
- !hypervisor_x2apic_available()) {
- if (x2apic_preenabled)
- disable_x2apic();
- goto skip_x2apic;
- }
- /*
- * without IR all CPUs can be addressed by IOAPIC/MSI
- * only in physical mode
- */
- x2apic_force_phys();
- }
-
- if (ret == IRQ_REMAP_XAPIC_MODE) {
- pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
- goto skip_x2apic;
- }
-
- x2apic_enabled = 1;
-
- if (x2apic_supported() && !x2apic_mode) {
- x2apic_mode = 1;
- enable_x2apic();
- pr_info("Enabled x2apic\n");
- }
-
-skip_x2apic:
- if (ret < 0) /* IR enabling failed */
+ if (ir_stat < 0)
restore_ioapic_entries();
legacy_pic->restore_mask();
local_irq_restore(flags);
@@ -1665,19 +1939,19 @@ skip_x2apic:
* On AMD64 we trust the BIOS - if it says no APIC it is likely
* not correctly set up (usually the APIC timer won't work etc.)
*/
-static int __init detect_init_APIC(void)
+static bool __init detect_init_APIC(void)
{
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
pr_info("No local APIC present\n");
- return -1;
+ return false;
}
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
- return 0;
+ register_lapic_address(APIC_DEFAULT_PHYS_BASE);
+ return true;
}
#else
-static int __init apic_verify(void)
+static bool __init apic_verify(unsigned long addr)
{
u32 features, h, l;
@@ -1687,29 +1961,29 @@ static int __init apic_verify(void)
*/
features = cpuid_edx(1);
if (!(features & (1 << X86_FEATURE_APIC))) {
- pr_warning("Could not enable APIC!\n");
- return -1;
+ pr_warn("Could not enable APIC!\n");
+ return false;
}
set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
/* The BIOS may have set up the APIC at some other address */
if (boot_cpu_data.x86 >= 6) {
rdmsr(MSR_IA32_APICBASE, l, h);
if (l & MSR_IA32_APICBASE_ENABLE)
- mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+ addr = l & MSR_IA32_APICBASE_BASE;
}
+ register_lapic_address(addr);
pr_info("Found and enabled local APIC!\n");
- return 0;
+ return true;
}
-int __init apic_force_enable(unsigned long addr)
+bool __init apic_force_enable(unsigned long addr)
{
u32 h, l;
- if (disable_apic)
- return -1;
+ if (apic_is_disabled)
+ return false;
/*
* Some BIOSes disable the local APIC in the APIC_BASE
@@ -1726,17 +2000,17 @@ int __init apic_force_enable(unsigned long addr)
enabled_via_apicbase = 1;
}
}
- return apic_verify();
+ return apic_verify(addr);
}
/*
* Detect and initialize APIC
*/
-static int __init detect_init_APIC(void)
+static bool __init detect_init_APIC(void)
{
/* Disabled by kernel option? */
- if (disable_apic)
- return -1;
+ if (apic_is_disabled)
+ return false;
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
@@ -1744,16 +2018,18 @@ static int __init detect_init_APIC(void)
(boot_cpu_data.x86 >= 15))
break;
goto no_apic;
+ case X86_VENDOR_HYGON:
+ break;
case X86_VENDOR_INTEL:
- if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
- (boot_cpu_data.x86 == 5 && cpu_has_apic))
+ if ((boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)) ||
+ boot_cpu_data.x86_vfm >= INTEL_PENTIUM_PRO)
break;
goto no_apic;
default:
goto no_apic;
}
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
/*
* Over-ride BIOS and try to enable the local APIC only if
* "lapic" specified.
@@ -1761,22 +2037,22 @@ static int __init detect_init_APIC(void)
if (!force_enable_local_apic) {
pr_info("Local APIC disabled by BIOS -- "
"you can enable it with \"lapic\"\n");
- return -1;
+ return false;
}
- if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
- return -1;
+ if (!apic_force_enable(APIC_DEFAULT_PHYS_BASE))
+ return false;
} else {
- if (apic_verify())
- return -1;
+ if (!apic_verify(APIC_DEFAULT_PHYS_BASE))
+ return false;
}
apic_pm_activate();
- return 0;
+ return true;
no_apic:
pr_info("No local APIC present or hardware disabled\n");
- return -1;
+ return false;
}
#endif
@@ -1785,190 +2061,106 @@ no_apic:
*/
void __init init_apic_mappings(void)
{
- unsigned int new_apicid;
+ if (apic_validate_deadline_timer())
+ pr_info("TSC deadline timer available\n");
- if (x2apic_mode) {
- boot_cpu_physical_apicid = read_apic_id();
+ if (x2apic_mode)
return;
- }
- /* If no local APIC can be found return early */
- if (!smp_found_config && detect_init_APIC()) {
- /* lets NOP'ify apic operations */
- pr_info("APIC: disable apic facility\n");
- apic_disable();
- } else {
- apic_phys = mp_lapic_addr;
-
- /*
- * acpi lapic path already maps that address in
- * acpi_register_lapic_address()
- */
- if (!acpi_lapic && !smp_found_config)
- register_lapic_address(apic_phys);
+ if (!smp_found_config) {
+ if (!detect_init_APIC()) {
+ pr_info("APIC: disable apic facility\n");
+ apic_disable();
+ }
}
+}
- /*
- * Fetch the APIC ID of the BSP in case we have a
- * default configuration (or the MP table is broken).
- */
- new_apicid = read_apic_id();
- if (boot_cpu_physical_apicid != new_apicid) {
- boot_cpu_physical_apicid = new_apicid;
- /*
- * yeah -- we lie about apic_version
- * in case if apic was disabled via boot option
- * but it's not a problem for SMP compiled kernel
- * since smp_sanity_check is prepared for such a case
- * and disable smp mode
- */
- apic_version[new_apicid] =
- GET_APIC_VERSION(apic_read(APIC_LVR));
- }
+static __init void apic_set_fixmap(bool read_apic)
+{
+ set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+ apic_mmio_base = APIC_BASE;
+ apic_pr_verbose("Mapped APIC to %16lx (%16lx)\n", apic_mmio_base, mp_lapic_addr);
+ if (read_apic)
+ apic_read_boot_cpu_id(false);
}
void __init register_lapic_address(unsigned long address)
{
+ /* This should only happen once */
+ WARN_ON_ONCE(mp_lapic_addr);
mp_lapic_addr = address;
- if (!x2apic_mode) {
- set_fixmap_nocache(FIX_APIC_BASE, address);
- apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
- APIC_BASE, mp_lapic_addr);
- }
- if (boot_cpu_physical_apicid == -1U) {
- boot_cpu_physical_apicid = read_apic_id();
- apic_version[boot_cpu_physical_apicid] =
- GET_APIC_VERSION(apic_read(APIC_LVR));
- }
+ if (!x2apic_mode)
+ apic_set_fixmap(true);
}
/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
+ * Local APIC interrupts
*/
-int apic_version[MAX_LOCAL_APIC];
-int __init APIC_init_uniprocessor(void)
+/*
+ * Common handling code for spurious_interrupt and spurious_vector entry
+ * points below. No point in allowing the compiler to inline it twice.
+ */
+static noinline void handle_spurious_interrupt(u8 vector)
{
- if (disable_apic) {
- pr_info("Apic disabled\n");
- return -1;
- }
-#ifdef CONFIG_X86_64
- if (!cpu_has_apic) {
- disable_apic = 1;
- pr_info("Apic disabled by BIOS\n");
- return -1;
- }
-#else
- if (!smp_found_config && !cpu_has_apic)
- return -1;
-
- /*
- * Complain if the BIOS pretends there is one.
- */
- if (!cpu_has_apic &&
- APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
- pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
- boot_cpu_physical_apicid);
- return -1;
- }
-#endif
+ u32 v;
- default_setup_apic_routing();
+ trace_spurious_apic_entry(vector);
- verify_local_APIC();
- connect_bsp_APIC();
+ inc_irq_stat(irq_spurious_count);
-#ifdef CONFIG_X86_64
- apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
-#else
/*
- * Hack: In case of kdump, after a crash, kernel might be booting
- * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
- * might be zero if read from MP tables. Get it from LAPIC.
+ * If this is a spurious interrupt then do not acknowledge
*/
-# ifdef CONFIG_CRASH_DUMP
- boot_cpu_physical_apicid = read_apic_id();
-# endif
-#endif
- physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
- setup_local_APIC();
+ if (vector == SPURIOUS_APIC_VECTOR) {
+ /* See SDM vol 3 */
+ pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
+ smp_processor_id());
+ goto out;
+ }
-#ifdef CONFIG_X86_IO_APIC
/*
- * Now enable IO-APICs, actually call clear_IO_APIC
- * We need clear_IO_APIC before enabling error vector
+ * If it is a vectored one, verify it's set in the ISR. If set,
+ * acknowledge it.
*/
- if (!skip_ioapic_setup && nr_ioapics)
- enable_IO_APIC();
-#endif
-
- bsp_end_local_APIC_setup();
-
-#ifdef CONFIG_X86_IO_APIC
- if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
- else {
- nr_ioapics = 0;
+ v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
+ if (v & (1 << (vector & 0x1f))) {
+ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
+ vector, smp_processor_id());
+ apic_eoi();
+ } else {
+ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
+ vector, smp_processor_id());
}
-#endif
-
- x86_init.timers.setup_percpu_clockev();
- return 0;
+out:
+ trace_spurious_apic_exit(vector);
}
-/*
- * Local APIC interrupts
- */
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
+/**
+ * spurious_interrupt - Catch all for interrupts raised on unused vectors
+ * @regs: Pointer to pt_regs on stack
+ * @vector: The vector number
+ *
+ * This is invoked from ASM entry code to catch all interrupts which
+ * trigger on an entry which is routed to the common_spurious idtentry
+ * point.
*/
-static inline void __smp_spurious_interrupt(void)
-{
- u32 v;
-
- /*
- * Check if this really is a spurious interrupt and ACK it
- * if it is a vectored one. Just in case...
- * Spurious interrupts should not be ACKed.
- */
- v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
- if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
- ack_APIC_irq();
-
- inc_irq_stat(irq_spurious_count);
-
- /* see sw-dev-man vol 3, chapter 7.4.13.5 */
- pr_info("spurious APIC interrupt on CPU#%d, "
- "should never happen.\n", smp_processor_id());
-}
-
-void smp_spurious_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_IRQ(spurious_interrupt)
{
- entering_irq();
- __smp_spurious_interrupt();
- exiting_irq();
+ handle_spurious_interrupt(vector);
}
-void smp_trace_spurious_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt)
{
- entering_irq();
- trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
- __smp_spurious_interrupt();
- trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR);
- exiting_irq();
+ handle_spurious_interrupt(SPURIOUS_APIC_VECTOR);
}
/*
* This interrupt should never happen with our APIC/SMP architecture
*/
-static inline void __smp_error_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt)
{
- u32 v0, v1;
- u32 i = 0;
static const char * const error_interrupt_reason[] = {
"Send CS error", /* APIC Error Bit 0 */
"Receive CS error", /* APIC Error Bit 1 */
@@ -1979,49 +2171,36 @@ static inline void __smp_error_interrupt(struct pt_regs *regs)
"Received illegal vector", /* APIC Error Bit 6 */
"Illegal register address", /* APIC Error Bit 7 */
};
+ u32 v, i = 0;
+
+ trace_error_apic_entry(ERROR_APIC_VECTOR);
/* First tickle the hardware, only then report what went on. -- REW */
- v0 = apic_read(APIC_ESR);
- apic_write(APIC_ESR, 0);
- v1 = apic_read(APIC_ESR);
- ack_APIC_irq();
+ if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ v = apic_read(APIC_ESR);
+ apic_eoi();
atomic_inc(&irq_err_count);
- apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)",
- smp_processor_id(), v0 , v1);
+ apic_pr_debug("APIC error on CPU%d: %02x", smp_processor_id(), v);
- v1 = v1 & 0xff;
- while (v1) {
- if (v1 & 0x1)
- apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
+ v &= 0xff;
+ while (v) {
+ if (v & 0x1)
+ apic_pr_debug_cont(" : %s", error_interrupt_reason[i]);
i++;
- v1 >>= 1;
+ v >>= 1;
}
- apic_printk(APIC_DEBUG, KERN_CONT "\n");
-
-}
-
-void smp_error_interrupt(struct pt_regs *regs)
-{
- entering_irq();
- __smp_error_interrupt(regs);
- exiting_irq();
-}
+ apic_pr_debug_cont("\n");
-void smp_trace_error_interrupt(struct pt_regs *regs)
-{
- entering_irq();
- trace_error_apic_entry(ERROR_APIC_VECTOR);
- __smp_error_interrupt(regs);
trace_error_apic_exit(ERROR_APIC_VECTOR);
- exiting_irq();
}
/**
* connect_bsp_APIC - attach the APIC to the interrupt system
*/
-void __init connect_bsp_APIC(void)
+static void __init connect_bsp_APIC(void)
{
#ifdef CONFIG_X86_32
if (pic_mode) {
@@ -2033,13 +2212,10 @@ void __init connect_bsp_APIC(void)
* PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
* local APIC to INT and NMI lines.
*/
- apic_printk(APIC_VERBOSE, "leaving PIC mode, "
- "enabling APIC mode.\n");
+ apic_pr_verbose("Leaving PIC mode, enabling APIC mode.\n");
imcr_pic_to_apic();
}
#endif
- if (apic->enable_apic_mode)
- apic->enable_apic_mode();
}
/**
@@ -2061,8 +2237,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
* IPIs, won't work beyond this point! The only exception are
* INIT IPIs.
*/
- apic_printk(APIC_VERBOSE, "disabling APIC mode, "
- "entering PIC mode.\n");
+ apic_pr_verbose("Disabling APIC mode, entering PIC mode.\n");
imcr_apic_to_pic();
return;
}
@@ -2107,134 +2282,79 @@ void disconnect_bsp_APIC(int virt_wire_setup)
apic_write(APIC_LVT1, value);
}
-void __cpuinit generic_processor_info(int apicid, int version)
+void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,
+ bool dmar)
{
- int cpu, max = nr_cpu_ids;
- bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
- phys_cpu_present_map);
-
- /*
- * If boot cpu has not been detected yet, then only allow upto
- * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
- */
- if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 &&
- apicid != boot_cpu_physical_apicid) {
- int thiscpu = max + disabled_cpus - 1;
-
- pr_warning(
- "ACPI: NR_CPUS/possible_cpus limit of %i almost"
- " reached. Keeping one slot for boot cpu."
- " Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
-
- disabled_cpus++;
- return;
- }
-
- if (num_processors >= nr_cpu_ids) {
- int thiscpu = max + disabled_cpus;
+ memset(msg, 0, sizeof(*msg));
- pr_warning(
- "ACPI: NR_CPUS/possible_cpus limit of %i reached."
- " Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+ msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
+ msg->arch_addr_lo.dest_mode_logical = apic->dest_mode_logical;
+ msg->arch_addr_lo.destid_0_7 = cfg->dest_apicid & 0xFF;
- disabled_cpus++;
- return;
- }
-
- num_processors++;
- if (apicid == boot_cpu_physical_apicid) {
- /*
- * x86_bios_cpu_apicid is required to have processors listed
- * in same order as logical cpu numbers. Hence the first
- * entry is BSP, and so on.
- * boot_cpu_init() already hold bit 0 in cpu_present_mask
- * for BSP.
- */
- cpu = 0;
- } else
- cpu = cpumask_next_zero(-1, cpu_present_mask);
+ msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_FIXED;
+ msg->arch_data.vector = cfg->vector;
+ msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
/*
- * Validate version
+ * Only the IOMMU itself can use the trick of putting destination
+ * APIC ID into the high bits of the address. Anything else would
+ * just be writing to memory if it tried that, and needs IR to
+ * address APICs which can't be addressed in the normal 32-bit
+ * address range at 0xFFExxxxx. That is typically just 8 bits, but
+ * some hypervisors allow the extended destination ID field in bits
+ * 5-11 to be used, giving support for 15 bits of APIC IDs in total.
*/
- if (version == 0x0) {
- pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
- cpu, apicid);
- version = 0x10;
- }
- apic_version[apicid] = version;
-
- if (version != apic_version[boot_cpu_physical_apicid]) {
- pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
- apic_version[boot_cpu_physical_apicid], cpu, version);
- }
-
- physid_set(apicid, phys_cpu_present_map);
- if (apicid > max_physical_apicid)
- max_physical_apicid = apicid;
-
-#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
- early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
- early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
-#endif
-#ifdef CONFIG_X86_32
- early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
- apic->x86_32_early_logical_apicid(cpu);
-#endif
- set_cpu_possible(cpu, true);
- set_cpu_present(cpu, true);
+ if (dmar)
+ msg->arch_addr_hi.destid_8_31 = cfg->dest_apicid >> 8;
+ else if (virt_ext_dest_id && cfg->dest_apicid < 0x8000)
+ msg->arch_addr_lo.virt_destid_8_14 = cfg->dest_apicid >> 8;
+ else
+ WARN_ON_ONCE(cfg->dest_apicid > 0xFF);
}
-int hard_smp_processor_id(void)
+u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid)
{
- return read_apic_id();
+ u32 dest = msg->arch_addr_lo.destid_0_7;
+
+ if (extid)
+ dest |= msg->arch_addr_hi.destid_8_31 << 8;
+ return dest;
}
+EXPORT_SYMBOL_FOR_KVM(x86_msi_msg_get_destid);
-void default_init_apic_ldr(void)
+static void __init apic_bsp_up_setup(void)
{
- unsigned long val;
-
- apic_write(APIC_DFR, APIC_DFR_VALUE);
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
- apic_write(APIC_LDR, val);
+ reset_phys_cpu_present_map(boot_cpu_physical_apicid);
}
-int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask,
- unsigned int *apicid)
+/**
+ * apic_bsp_setup - Setup function for local apic and io-apic
+ * @upmode: Force UP mode (for APIC_init_uniprocessor)
+ */
+static void __init apic_bsp_setup(bool upmode)
{
- unsigned int cpu;
-
- for_each_cpu_and(cpu, cpumask, andmask) {
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
- }
-
- if (likely(cpu < nr_cpu_ids)) {
- *apicid = per_cpu(x86_cpu_to_apicid, cpu);
- return 0;
- }
+ connect_bsp_APIC();
+ if (upmode)
+ apic_bsp_up_setup();
+ setup_local_APIC();
- return -EINVAL;
+ enable_IO_APIC();
+ end_local_APIC_setup();
+ irq_remap_enable_fault_handling();
+ setup_IO_APIC();
+ lapic_update_legacy_vectors();
}
-/*
- * Override the generic EOI implementation with an optimized version.
- * Only called during early boot when only one CPU is active and with
- * interrupts disabled, so we know this does not race with actual APIC driver
- * use.
- */
-void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
+#ifdef CONFIG_UP_LATE_INIT
+void __init up_late_init(void)
{
- struct apic **drv;
+ if (apic_intr_mode == APIC_PIC)
+ return;
- for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
- /* Should happen once for each apic */
- WARN_ON((*drv)->eoi_write == eoi_write);
- (*drv)->eoi_write = eoi_write;
- }
+ /* Setup local timer */
+ x86_init.timers.setup_percpu_clockev();
}
+#endif
/*
* Power management
@@ -2249,7 +2369,7 @@ static struct {
*/
int active;
/* r/w apic fields */
- unsigned int apic_id;
+ u32 apic_id;
unsigned int apic_taskpri;
unsigned int apic_ldr;
unsigned int apic_dfr;
@@ -2262,9 +2382,10 @@ static struct {
unsigned int apic_tmict;
unsigned int apic_tdcr;
unsigned int apic_thmr;
+ unsigned int apic_cmci;
} apic_pm_state;
-static int lapic_suspend(void)
+static int lapic_suspend(void *data)
{
unsigned long flags;
int maxlvt;
@@ -2291,8 +2412,19 @@ static int lapic_suspend(void)
if (maxlvt >= 5)
apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
#endif
+#ifdef CONFIG_X86_MCE_INTEL
+ if (maxlvt >= 6)
+ apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
+#endif
local_irq_save(flags);
+
+ /*
+ * Mask IOAPIC before disabling the local APIC to prevent stale IRR
+ * entries on some implementations.
+ */
+ mask_ioapic_entries();
+
disable_local_APIC();
irq_remapping_disable();
@@ -2301,7 +2433,7 @@ static int lapic_suspend(void)
return 0;
}
-static void lapic_resume(void)
+static void lapic_resume(void *data)
{
unsigned int l, h;
unsigned long flags;
@@ -2321,9 +2453,9 @@ static void lapic_resume(void)
mask_ioapic_entries();
legacy_pic->mask_all();
- if (x2apic_mode)
- enable_x2apic();
- else {
+ if (x2apic_mode) {
+ __x2apic_enable();
+ } else {
/*
* Make sure the APICBASE points to the right address
*
@@ -2347,10 +2479,14 @@ static void lapic_resume(void)
apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_INTEL)
+#ifdef CONFIG_X86_THERMAL_VECTOR
if (maxlvt >= 5)
apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
#endif
+#ifdef CONFIG_X86_MCE_INTEL
+ if (maxlvt >= 6)
+ apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci);
+#endif
if (maxlvt >= 4)
apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
@@ -2372,12 +2508,16 @@ static void lapic_resume(void)
* are needed on every CPU up until machine_halt/restart/poweroff.
*/
-static struct syscore_ops lapic_syscore_ops = {
+static const struct syscore_ops lapic_syscore_ops = {
.resume = lapic_resume,
.suspend = lapic_suspend,
};
-static void __cpuinit apic_pm_activate(void)
+static struct syscore lapic_syscore = {
+ .ops = &lapic_syscore_ops,
+};
+
+static void apic_pm_activate(void)
{
apic_pm_state.active = 1;
}
@@ -2385,8 +2525,8 @@ static void __cpuinit apic_pm_activate(void)
static int __init init_lapic_sysfs(void)
{
/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
- if (cpu_has_apic)
- register_syscore_ops(&lapic_syscore_ops);
+ if (boot_cpu_has(X86_FEATURE_APIC))
+ register_syscore(&lapic_syscore);
return 0;
}
@@ -2402,55 +2542,10 @@ static void apic_pm_activate(void) { }
#ifdef CONFIG_X86_64
-static int __cpuinit apic_cluster_num(void)
-{
- int i, clusters, zeros;
- unsigned id;
- u16 *bios_cpu_apicid;
- DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
-
- bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
- bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
-
- for (i = 0; i < nr_cpu_ids; i++) {
- /* are we being called early in kernel startup? */
- if (bios_cpu_apicid) {
- id = bios_cpu_apicid[i];
- } else if (i < nr_cpu_ids) {
- if (cpu_present(i))
- id = per_cpu(x86_bios_cpu_apicid, i);
- else
- continue;
- } else
- break;
-
- if (id != BAD_APICID)
- __set_bit(APIC_CLUSTERID(id), clustermap);
- }
-
- /* Problem: Partially populated chassis may not have CPUs in some of
- * the APIC clusters they have been allocated. Only present CPUs have
- * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
- * Since clusters are allocated sequentially, count zeros only if
- * they are bounded by ones.
- */
- clusters = 0;
- zeros = 0;
- for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
- if (test_bit(i, clustermap)) {
- clusters += 1 + zeros;
- zeros = 0;
- } else
- ++zeros;
- }
-
- return clusters;
-}
+static int multi_checked;
+static int multi;
-static int __cpuinitdata multi_checked;
-static int __cpuinitdata multi;
-
-static int __cpuinit set_multi(const struct dmi_system_id *d)
+static int set_multi(const struct dmi_system_id *d)
{
if (multi)
return 0;
@@ -2459,7 +2554,7 @@ static int __cpuinit set_multi(const struct dmi_system_id *d)
return 0;
}
-static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
+static const struct dmi_system_id multi_dmi_table[] = {
{
.callback = set_multi,
.ident = "IBM System Summit2",
@@ -2471,7 +2566,7 @@ static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
{}
};
-static void __cpuinit dmi_check_multi(void)
+static void dmi_check_multi(void)
{
if (multi_checked)
return;
@@ -2488,42 +2583,22 @@ static void __cpuinit dmi_check_multi(void)
* multi-chassis.
* Use DMI to check them
*/
-__cpuinit int apic_is_clustered_box(void)
+int apic_is_clustered_box(void)
{
dmi_check_multi();
- if (multi)
- return 1;
-
- if (!is_vsmp_box())
- return 0;
-
- /*
- * ScaleMP vSMPowered boxes have one cluster per board and TSCs are
- * not guaranteed to be synced between boards
- */
- if (apic_cluster_num() > 1)
- return 1;
-
- return 0;
+ return multi;
}
#endif
/*
* APIC command line parameters
*/
-static int __init setup_disableapic(char *arg)
+static int __init setup_nolapic(char *arg)
{
- disable_apic = 1;
+ apic_is_disabled = true;
setup_clear_cpu_cap(X86_FEATURE_APIC);
return 0;
}
-early_param("disableapic", setup_disableapic);
-
-/* same as disableapic, for compatibility */
-static int __init setup_nolapic(char *arg)
-{
- return setup_disableapic(arg);
-}
early_param("nolapic", setup_nolapic);
static int __init parse_lapic_timer_c2_ok(char *arg)
@@ -2550,22 +2625,24 @@ early_param("nolapic_timer", parse_nolapic_timer);
static int __init apic_set_verbosity(char *arg)
{
if (!arg) {
-#ifdef CONFIG_X86_64
- skip_ioapic_setup = 0;
+ if (IS_ENABLED(CONFIG_X86_32))
+ return -EINVAL;
+
+ ioapic_is_disabled = false;
return 0;
-#endif
- return -EINVAL;
}
if (strcmp("debug", arg) == 0)
apic_verbosity = APIC_DEBUG;
else if (strcmp("verbose", arg) == 0)
apic_verbosity = APIC_VERBOSE;
+#ifdef CONFIG_X86_64
else {
- pr_warning("APIC Verbosity level %s not recognised"
+ pr_warn("APIC Verbosity level %s not recognised"
" use apic=verbose or apic=debug\n", arg);
return -EINVAL;
}
+#endif
return 0;
}
@@ -2573,11 +2650,11 @@ early_param("apic", apic_set_verbosity);
static int __init lapic_insert_resource(void)
{
- if (!apic_phys)
+ if (!apic_mmio_base)
return -1;
/* Put local APIC into the resource map. */
- lapic_resource.start = apic_phys;
+ lapic_resource.start = apic_mmio_base;
lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
insert_resource(&iomem_resource, &lapic_resource);
@@ -2585,7 +2662,27 @@ static int __init lapic_insert_resource(void)
}
/*
- * need call insert after e820_reserve_resources()
+ * need call insert after e820__reserve_resources()
* that is using request_resource
*/
late_initcall(lapic_insert_resource);
+
+static int __init apic_set_extnmi(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ if (!strncmp("all", arg, 3))
+ apic_extnmi = APIC_EXTNMI_ALL;
+ else if (!strncmp("none", arg, 4))
+ apic_extnmi = APIC_EXTNMI_NONE;
+ else if (!strncmp("bsp", arg, 3))
+ apic_extnmi = APIC_EXTNMI_BSP;
+ else {
+ pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+early_param("apic_extnmi", apic_set_extnmi);
diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
new file mode 100644
index 000000000000..2ed3b5c88c7f
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_common.c
@@ -0,0 +1,43 @@
+/*
+ * Common functions shared between the various APIC flavours
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+#include <linux/irq.h>
+#include <linux/kvm_types.h>
+#include <asm/apic.h>
+
+#include "local.h"
+
+u32 apic_default_calc_apicid(unsigned int cpu)
+{
+ return per_cpu(x86_cpu_to_apicid, cpu);
+}
+
+u32 apic_flat_calc_apicid(unsigned int cpu)
+{
+ return 1U << cpu;
+}
+
+u32 default_cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
+ return (int)per_cpu(x86_cpu_to_apicid, mps_cpu);
+ else
+ return BAD_APICID;
+}
+EXPORT_SYMBOL_FOR_KVM(default_cpu_present_to_apicid);
+
+/*
+ * Set up the logical destination ID when the APIC operates in logical
+ * destination mode.
+ */
+void default_init_apic_ldr(void)
+{
+ unsigned long val;
+
+ apic_write(APIC_DFR, APIC_DFR_FLAT);
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
+ apic_write(APIC_LDR, val);
+}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 00c77cf78e9e..e0308d8c4e6c 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
*
* Flat APIC subarch code.
*
@@ -8,329 +8,61 @@
* Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
* James Cleverdon.
*/
-#include <linux/errno.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/hardirq.h>
-#include <linux/module.h>
-#include <asm/smp.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
+#include <linux/export.h>
-static struct apic apic_physflat;
-static struct apic apic_flat;
-
-struct apic __read_mostly *apic = &apic_flat;
-EXPORT_SYMBOL_GPL(apic);
-
-static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- return 1;
-}
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-void flat_init_apic_ldr(void)
-{
- unsigned long val;
- unsigned long num, id;
-
- num = smp_processor_id();
- id = 1UL << num;
- apic_write(APIC_DFR, APIC_DFR_FLAT);
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- val |= SET_APIC_LOGICAL_ID(id);
- apic_write(APIC_LDR, val);
-}
-
-static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
- local_irq_restore(flags);
-}
-
-static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
-{
- unsigned long mask = cpumask_bits(cpumask)[0];
-
- _flat_send_IPI_mask(mask, vector);
-}
-
-static void
-flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
-{
- unsigned long mask = cpumask_bits(cpumask)[0];
- int cpu = smp_processor_id();
-
- if (cpu < BITS_PER_LONG)
- clear_bit(cpu, &mask);
-
- _flat_send_IPI_mask(mask, vector);
-}
-
-static void flat_send_IPI_allbutself(int vector)
-{
- int cpu = smp_processor_id();
-#ifdef CONFIG_HOTPLUG_CPU
- int hotplug = 1;
-#else
- int hotplug = 0;
-#endif
- if (hotplug || vector == NMI_VECTOR) {
- if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
- unsigned long mask = cpumask_bits(cpu_online_mask)[0];
-
- if (cpu < BITS_PER_LONG)
- clear_bit(cpu, &mask);
-
- _flat_send_IPI_mask(mask, vector);
- }
- } else if (num_online_cpus() > 1) {
- __default_send_IPI_shortcut(APIC_DEST_ALLBUT,
- vector, apic->dest_logical);
- }
-}
-
-static void flat_send_IPI_all(int vector)
-{
- if (vector == NMI_VECTOR) {
- flat_send_IPI_mask(cpu_online_mask, vector);
- } else {
- __default_send_IPI_shortcut(APIC_DEST_ALLINC,
- vector, apic->dest_logical);
- }
-}
-
-static unsigned int flat_get_apic_id(unsigned long x)
-{
- unsigned int id;
-
- id = (((x)>>24) & 0xFFu);
-
- return id;
-}
-
-static unsigned long set_apic_id(unsigned int id)
-{
- unsigned long x;
-
- x = ((id & 0xFFu)<<24);
- return x;
-}
-
-static unsigned int read_xapic_id(void)
-{
- unsigned int id;
-
- id = flat_get_apic_id(apic_read(APIC_ID));
- return id;
-}
+#include <asm/apic.h>
-static int flat_apic_id_registered(void)
-{
- return physid_isset(read_xapic_id(), phys_cpu_present_map);
-}
+#include "local.h"
-static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
+static u32 physflat_get_apic_id(u32 x)
{
- return initial_apic_id >> index_msb;
+ return (x >> 24) & 0xFF;
}
-static int flat_probe(void)
+static int physflat_probe(void)
{
return 1;
}
-static struct apic apic_flat = {
- .name = "flat",
- .probe = flat_probe,
- .acpi_madt_oem_check = flat_acpi_madt_oem_check,
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = flat_apic_id_registered,
-
- .irq_delivery_mode = dest_LowestPrio,
- .irq_dest_mode = 1, /* logical */
-
- .target_cpus = online_target_cpus,
- .disable_esr = 0,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = NULL,
- .check_apicid_present = NULL,
-
- .vector_allocation_domain = flat_vector_allocation_domain,
- .init_apic_ldr = flat_init_apic_ldr,
-
- .ioapic_phys_id_map = NULL,
- .setup_apic_routing = NULL,
- .multi_timer_check = NULL,
- .cpu_present_to_apicid = default_cpu_present_to_apicid,
- .apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = flat_phys_pkg_id,
- .mps_oem_check = NULL,
-
- .get_apic_id = flat_get_apic_id,
- .set_apic_id = set_apic_id,
- .apic_id_mask = 0xFFu << 24,
-
- .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
-
- .send_IPI_mask = flat_send_IPI_mask,
- .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
- .send_IPI_allbutself = flat_send_IPI_allbutself,
- .send_IPI_all = flat_send_IPI_all,
- .send_IPI_self = apic_send_IPI_self,
-
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
- .wait_for_init_deassert = NULL,
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = default_inquire_remote_apic,
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
-};
-
-/*
- * Physflat mode is used when there are more than 8 CPUs on a system.
- * We cannot use logical delivery in this case because the mask
- * overflows, so use physical mode.
- */
static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
-#ifdef CONFIG_ACPI
- /*
- * Quirk: some x86_64 machines can only use physical APIC mode
- * regardless of how many processors are present (x86_64 ES7000
- * is an example).
- */
- if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
- printk(KERN_DEBUG "system APIC only can use physical flat");
- return 1;
- }
-
- if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) {
- printk(KERN_DEBUG "IBM Summit detected, will use apic physical");
- return 1;
- }
-#endif
-
- return 0;
-}
-
-static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
-{
- default_send_IPI_mask_sequence_phys(cpumask, vector);
-}
-
-static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
- int vector)
-{
- default_send_IPI_mask_allbutself_phys(cpumask, vector);
-}
-
-static void physflat_send_IPI_allbutself(int vector)
-{
- default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
-}
-
-static void physflat_send_IPI_all(int vector)
-{
- physflat_send_IPI_mask(cpu_online_mask, vector);
-}
-
-static int physflat_probe(void)
-{
- if (apic == &apic_physflat || num_possible_cpus() > 8)
- return 1;
-
- return 0;
+ return 1;
}
-static struct apic apic_physflat = {
+static struct apic apic_physflat __ro_after_init = {
.name = "physical flat",
.probe = physflat_probe,
.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = flat_apic_id_registered,
- .irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 0, /* physical */
+ .dest_mode_logical = false,
- .target_cpus = online_target_cpus,
.disable_esr = 0,
- .dest_logical = 0,
- .check_apicid_used = NULL,
- .check_apicid_present = NULL,
-
- .vector_allocation_domain = default_vector_allocation_domain,
- /* not needed, but shouldn't hurt: */
- .init_apic_ldr = flat_init_apic_ldr,
- .ioapic_phys_id_map = NULL,
- .setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = flat_phys_pkg_id,
- .mps_oem_check = NULL,
-
- .get_apic_id = flat_get_apic_id,
- .set_apic_id = set_apic_id,
- .apic_id_mask = 0xFFu << 24,
- .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .max_apic_id = 0xFE,
+ .get_apic_id = physflat_get_apic_id,
- .send_IPI_mask = physflat_send_IPI_mask,
- .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
- .send_IPI_allbutself = physflat_send_IPI_allbutself,
- .send_IPI_all = physflat_send_IPI_all,
- .send_IPI_self = apic_send_IPI_self,
+ .calc_dest_apicid = apic_default_calc_apicid,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
- .wait_for_init_deassert = NULL,
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = default_inquire_remote_apic,
+ .send_IPI = default_send_IPI_single_phys,
+ .send_IPI_mask = default_send_IPI_mask_sequence_phys,
+ .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_phys,
+ .send_IPI_allbutself = default_send_IPI_allbutself,
+ .send_IPI_all = default_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+ .nmi_to_offline_cpu = true,
.read = native_apic_mem_read,
.write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
+ .eoi = native_apic_mem_eoi,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+ .wait_icr_idle = apic_mem_wait_icr_idle,
+ .safe_wait_icr_idle = apic_mem_wait_icr_idle_timeout,
};
+apic_driver(apic_physflat);
-/*
- * We need to check for physflat first, so this order is important.
- */
-apic_drivers(apic_physflat, apic_flat);
+struct apic *apic __ro_after_init = &apic_physflat;
+EXPORT_SYMBOL_GPL(apic);
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index e145f28b4099..58abb941c45b 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* NOOP APIC driver.
*
@@ -7,160 +8,62 @@
* Though in case if apic is disabled (for some reason) we try
* to not uglify the caller's code and allow to call (some) apic routines
* like self-ipi, etc...
+ *
+ * FIXME: Remove this gunk. The above argument which was intentionally left
+ * in place is silly to begin with because none of the callbacks except for
+ * APIC::read/write() have a WARN_ON_ONCE() in them. Sigh...
*/
-
-#include <linux/threads.h>
#include <linux/cpumask.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <asm/fixmap.h>
-#include <asm/mpspec.h>
-#include <asm/apicdef.h>
-#include <asm/apic.h>
-#include <asm/setup.h>
+#include <linux/thread_info.h>
-#include <linux/smp.h>
-#include <asm/ipi.h>
+#include <asm/apic.h>
-#include <linux/interrupt.h>
-#include <asm/acpi.h>
-#include <asm/e820.h>
+#include "local.h"
-static void noop_init_apic_ldr(void) { }
+static void noop_send_IPI(int cpu, int vector) { }
static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { }
static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { }
static void noop_send_IPI_allbutself(int vector) { }
static void noop_send_IPI_all(int vector) { }
static void noop_send_IPI_self(int vector) { }
-static void noop_apic_wait_icr_idle(void) { }
static void noop_apic_icr_write(u32 low, u32 id) { }
-static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip)
+static int noop_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip,
+ unsigned int cpu)
{
return -1;
}
-static u32 noop_safe_apic_wait_icr_idle(void)
-{
- return 0;
-}
-
-static u64 noop_apic_icr_read(void)
-{
- return 0;
-}
-
-static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
-{
- return 0;
-}
-
-static unsigned int noop_get_apic_id(unsigned long x)
-{
- return 0;
-}
-
-static int noop_probe(void)
-{
- /*
- * NOOP apic should not ever be
- * enabled via probe routine
- */
- return 0;
-}
-
-static int noop_apic_id_registered(void)
-{
- /*
- * if we would be really "pedantic"
- * we should pass read_apic_id() here
- * but since NOOP suppose APIC ID = 0
- * lets save a few cycles
- */
- return physid_isset(0, phys_cpu_present_map);
-}
-
-static const struct cpumask *noop_target_cpus(void)
-{
- /* only BSP here */
- return cpumask_of(0);
-}
-
-static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid)
-{
- return physid_isset(apicid, *map);
-}
-
-static unsigned long noop_check_apicid_present(int bit)
-{
- return physid_isset(bit, phys_cpu_present_map);
-}
-
-static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
- const struct cpumask *mask)
-{
- if (cpu != 0)
- pr_warning("APIC: Vector allocated for non-BSP cpu\n");
- cpumask_copy(retmask, cpumask_of(cpu));
-}
+static u64 noop_apic_icr_read(void) { return 0; }
+static u32 noop_get_apic_id(u32 apicid) { return 0; }
+static void noop_apic_eoi(void) { }
static u32 noop_apic_read(u32 reg)
{
- WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+ WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !apic_is_disabled);
return 0;
}
-static void noop_apic_write(u32 reg, u32 v)
+static void noop_apic_write(u32 reg, u32 val)
{
- WARN_ON_ONCE(cpu_has_apic && !disable_apic);
+ WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !apic_is_disabled);
}
-struct apic apic_noop = {
+struct apic apic_noop __ro_after_init = {
.name = "noop",
- .probe = noop_probe,
- .acpi_madt_oem_check = NULL,
-
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = noop_apic_id_registered,
- .irq_delivery_mode = dest_LowestPrio,
- /* logical delivery broadcast to all CPUs: */
- .irq_dest_mode = 1,
+ .dest_mode_logical = true,
- .target_cpus = noop_target_cpus,
.disable_esr = 0,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = noop_check_apicid_used,
- .check_apicid_present = noop_check_apicid_present,
-
- .vector_allocation_domain = noop_vector_allocation_domain,
- .init_apic_ldr = noop_init_apic_ldr,
-
- .ioapic_phys_id_map = default_ioapic_phys_id_map,
- .setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .apicid_to_cpu_present = physid_set_mask_of_physid,
-
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
-
- .phys_pkg_id = noop_phys_pkg_id,
-
- .mps_oem_check = NULL,
+ .max_apic_id = 0xFE,
.get_apic_id = noop_get_apic_id,
- .set_apic_id = NULL,
- .apic_id_mask = 0x0F << 24,
- .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .calc_dest_apicid = apic_flat_calc_apicid,
+ .send_IPI = noop_send_IPI,
.send_IPI_mask = noop_send_IPI_mask,
.send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself,
.send_IPI_allbutself = noop_send_IPI_allbutself,
@@ -169,24 +72,9 @@ struct apic apic_noop = {
.wakeup_secondary_cpu = noop_wakeup_secondary_cpu,
- /* should be safe */
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-
- .wait_for_init_deassert = NULL,
-
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = NULL,
-
.read = noop_apic_read,
.write = noop_apic_write,
- .eoi_write = noop_apic_write,
+ .eoi = noop_apic_eoi,
.icr_read = noop_apic_icr_read,
.icr_write = noop_apic_icr_write,
- .wait_icr_idle = noop_apic_wait_icr_idle,
- .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
-
-#ifdef CONFIG_X86_32
- .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
-#endif
};
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 9a9110918ca7..5c5be2d58242 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -10,101 +10,85 @@
* Send feedback to <support@numascale.com>
*
*/
-
-#include <linux/errno.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/ctype.h>
+#include <linux/types.h>
#include <linux/init.h>
-#include <linux/hardirq.h>
-#include <linux/delay.h>
+#include <linux/pgtable.h>
+#include <asm/msr.h>
#include <asm/numachip/numachip.h>
#include <asm/numachip/numachip_csr.h>
-#include <asm/smp.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-#include <asm/apic_flat_64.h>
-#include <asm/pgtable.h>
-static int numachip_system __read_mostly;
-static const struct apic apic_numachip __read_mostly;
+#include "local.h"
+
+u8 numachip_system __read_mostly;
+static const struct apic apic_numachip1;
+static const struct apic apic_numachip2;
+static void (*numachip_apic_icr_write)(int apicid, unsigned int val) __read_mostly;
-static unsigned int get_apic_id(unsigned long x)
+static u32 numachip1_get_apic_id(u32 x)
{
unsigned long value;
- unsigned int id;
+ unsigned int id = (x >> 24) & 0xff;
- rdmsrl(MSR_FAM10H_NODE_ID, value);
- id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);
+ if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
+ rdmsrq(MSR_FAM10H_NODE_ID, value);
+ id |= (value << 2) & 0xff00;
+ }
return id;
}
-static unsigned long set_apic_id(unsigned int id)
-{
- unsigned long x;
-
- x = ((id & 0xffU) << 24);
- return x;
-}
-
-static unsigned int read_xapic_id(void)
+static u32 numachip2_get_apic_id(u32 x)
{
- return get_apic_id(apic_read(APIC_ID));
-}
+ u64 mcfg;
-static int numachip_apic_id_valid(int apicid)
-{
- /* Trust what bootloader passes in MADT */
- return 1;
+ rdmsrq(MSR_FAM10H_MMIO_CONF_BASE, mcfg);
+ return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24);
}
-static int numachip_apic_id_registered(void)
+static void numachip1_apic_icr_write(int apicid, unsigned int val)
{
- return physid_isset(read_xapic_id(), phys_cpu_present_map);
+ write_lcsr(CSR_G3_EXT_IRQ_GEN, (apicid << 16) | val);
}
-static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
+static void numachip2_apic_icr_write(int apicid, unsigned int val)
{
- return initial_apic_id >> index_msb;
+ numachip2_write32_lcsr(NUMACHIP2_APIC_ICR, (apicid << 12) | val);
}
-static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+static int numachip_wakeup_secondary(u32 phys_apicid, unsigned long start_rip, unsigned int cpu)
{
- union numachip_csr_g3_ext_irq_gen int_gen;
-
- int_gen.s._destination_apic_id = phys_apicid;
- int_gen.s._vector = 0;
- int_gen.s._msgtype = APIC_DM_INIT >> 8;
- int_gen.s._index = 0;
-
- write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
-
- int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
- int_gen.s._vector = start_rip >> 12;
-
- write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+ numachip_apic_icr_write(phys_apicid, APIC_DM_INIT);
+ numachip_apic_icr_write(phys_apicid, APIC_DM_STARTUP |
+ (start_rip >> 12));
- atomic_set(&init_deasserted, 1);
return 0;
}
static void numachip_send_IPI_one(int cpu, int vector)
{
- union numachip_csr_g3_ext_irq_gen int_gen;
- int apicid = per_cpu(x86_cpu_to_apicid, cpu);
-
- int_gen.s._destination_apic_id = apicid;
- int_gen.s._vector = vector;
- int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
- int_gen.s._index = 0;
+ int local_apicid, apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ unsigned int dmode;
+
+ preempt_disable();
+ local_apicid = __this_cpu_read(x86_cpu_to_apicid);
+
+ /* Send via local APIC where non-local part matches */
+ if (!((apicid ^ local_apicid) >> NUMACHIP_LAPIC_BITS)) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __default_send_IPI_dest_field(apicid, vector,
+ APIC_DEST_PHYSICAL);
+ local_irq_restore(flags);
+ preempt_enable();
+ return;
+ }
+ preempt_enable();
- write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+ dmode = (vector == NMI_VECTOR) ? APIC_DM_NMI : APIC_DM_FIXED;
+ numachip_apic_icr_write(apicid, dmode | vector);
}
static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
@@ -145,100 +129,131 @@ static void numachip_send_IPI_all(int vector)
static void numachip_send_IPI_self(int vector)
{
- __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+ apic_write(APIC_SELF_IPI, vector);
}
-static int __init numachip_probe(void)
+static int __init numachip1_probe(void)
{
- return apic == &apic_numachip;
+ return apic == &apic_numachip1;
}
-static void __init map_csrs(void)
+static int __init numachip2_probe(void)
{
- printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
- NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1);
- init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
-
- printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
- NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1);
- init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
+ return apic == &apic_numachip2;
}
static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
{
+ u64 val;
+ u32 nodes = 1;
+
+ c->topo.llc_id = node;
- if (c->phys_proc_id != node) {
- c->phys_proc_id = node;
- per_cpu(cpu_llc_id, smp_processor_id()) = node;
+ /* Account for nodes per socket in multi-core-module processors */
+ if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
+ rdmsrq(MSR_FAM10H_NODE_ID, val);
+ nodes = ((val >> 3) & 7) + 1;
}
+
+ c->topo.pkg_id = node / nodes;
}
static int __init numachip_system_init(void)
{
- unsigned int val;
-
- if (!numachip_system)
+ /* Map the LCSR area and set up the apic_icr_write function */
+ switch (numachip_system) {
+ case 1:
+ init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
+ numachip_apic_icr_write = numachip1_apic_icr_write;
+ break;
+ case 2:
+ init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE);
+ numachip_apic_icr_write = numachip2_apic_icr_write;
+ break;
+ default:
return 0;
+ }
x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
x86_init.pci.arch_init = pci_numachip_init;
- map_csrs();
-
- val = read_lcsr(CSR_G0_NODE_IDS);
- printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);
-
return 0;
}
early_initcall(numachip_system_init);
-static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static int numachip1_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
- if (!strncmp(oem_id, "NUMASC", 6)) {
- numachip_system = 1;
- return 1;
- }
+ if ((strncmp(oem_id, "NUMASC", 6) != 0) ||
+ (strncmp(oem_table_id, "NCONNECT", 8) != 0))
+ return 0;
- return 0;
+ numachip_system = 1;
+
+ return 1;
}
-static const struct apic apic_numachip __refconst = {
+static int numachip2_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if ((strncmp(oem_id, "NUMASC", 6) != 0) ||
+ (strncmp(oem_table_id, "NCONECT2", 8) != 0))
+ return 0;
+ numachip_system = 2;
+
+ return 1;
+}
+
+static const struct apic apic_numachip1 __refconst = {
.name = "NumaConnect system",
- .probe = numachip_probe,
- .acpi_madt_oem_check = numachip_acpi_madt_oem_check,
- .apic_id_valid = numachip_apic_id_valid,
- .apic_id_registered = numachip_apic_id_registered,
+ .probe = numachip1_probe,
+ .acpi_madt_oem_check = numachip1_acpi_madt_oem_check,
- .irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 0, /* physical */
+ .dest_mode_logical = false,
- .target_cpus = online_target_cpus,
.disable_esr = 0,
- .dest_logical = 0,
- .check_apicid_used = NULL,
- .check_apicid_present = NULL,
- .vector_allocation_domain = default_vector_allocation_domain,
- .init_apic_ldr = flat_init_apic_ldr,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+
+ .max_apic_id = UINT_MAX,
+ .get_apic_id = numachip1_get_apic_id,
+
+ .calc_dest_apicid = apic_default_calc_apicid,
+
+ .send_IPI = numachip_send_IPI_one,
+ .send_IPI_mask = numachip_send_IPI_mask,
+ .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = numachip_send_IPI_allbutself,
+ .send_IPI_all = numachip_send_IPI_all,
+ .send_IPI_self = numachip_send_IPI_self,
+
+ .wakeup_secondary_cpu = numachip_wakeup_secondary,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .eoi = native_apic_mem_eoi,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+};
+
+apic_driver(apic_numachip1);
+
+static const struct apic apic_numachip2 __refconst = {
+ .name = "NumaConnect2 system",
+ .probe = numachip2_probe,
+ .acpi_madt_oem_check = numachip2_acpi_madt_oem_check,
+
+ .dest_mode_logical = false,
+
+ .disable_esr = 0,
- .ioapic_phys_id_map = NULL,
- .setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = numachip_phys_pkg_id,
- .mps_oem_check = NULL,
- .get_apic_id = get_apic_id,
- .set_apic_id = set_apic_id,
- .apic_id_mask = 0xffU << 24,
+ .max_apic_id = UINT_MAX,
+ .get_apic_id = numachip2_get_apic_id,
- .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .calc_dest_apicid = apic_default_calc_apicid,
+ .send_IPI = numachip_send_IPI_one,
.send_IPI_mask = numachip_send_IPI_mask,
.send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
.send_IPI_allbutself = numachip_send_IPI_allbutself,
@@ -246,19 +261,12 @@ static const struct apic apic_numachip __refconst = {
.send_IPI_self = numachip_send_IPI_self,
.wakeup_secondary_cpu = numachip_wakeup_secondary,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
- .wait_for_init_deassert = NULL,
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = NULL, /* REMRD not supported */
.read = native_apic_mem_read,
.write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
+ .eoi = native_apic_mem_eoi,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
};
-apic_driver(apic_numachip);
+apic_driver(apic_numachip2);
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
deleted file mode 100644
index d50e3640d5ae..000000000000
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs.
- *
- * Drives the local APIC in "clustered mode".
- */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/dmi.h>
-#include <linux/smp.h>
-
-#include <asm/apicdef.h>
-#include <asm/fixmap.h>
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-
-static unsigned bigsmp_get_apic_id(unsigned long x)
-{
- return (x >> 24) & 0xFF;
-}
-
-static int bigsmp_apic_id_registered(void)
-{
- return 1;
-}
-
-static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
-{
- return 0;
-}
-
-static unsigned long bigsmp_check_apicid_present(int bit)
-{
- return 1;
-}
-
-static int bigsmp_early_logical_apicid(int cpu)
-{
- /* on bigsmp, logical apicid is the same as physical */
- return early_per_cpu(x86_cpu_to_apicid, cpu);
-}
-
-static inline unsigned long calculate_ldr(int cpu)
-{
- unsigned long val, id;
-
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- id = per_cpu(x86_bios_cpu_apicid, cpu);
- val |= SET_APIC_LOGICAL_ID(id);
-
- return val;
-}
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-static void bigsmp_init_apic_ldr(void)
-{
- unsigned long val;
- int cpu = smp_processor_id();
-
- apic_write(APIC_DFR, APIC_DFR_FLAT);
- val = calculate_ldr(cpu);
- apic_write(APIC_LDR, val);
-}
-
-static void bigsmp_setup_apic_routing(void)
-{
- printk(KERN_INFO
- "Enabling APIC mode: Physflat. Using %d I/O APICs\n",
- nr_ioapics);
-}
-
-static int bigsmp_cpu_present_to_apicid(int mps_cpu)
-{
- if (mps_cpu < nr_cpu_ids)
- return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
-
- return BAD_APICID;
-}
-
-static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
-{
- /* For clustered we don't have a good way to do this yet - hack */
- physids_promote(0xFFL, retmap);
-}
-
-static int bigsmp_check_phys_apicid_present(int phys_apicid)
-{
- return 1;
-}
-
-static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
-{
- return cpuid_apic >> index_msb;
-}
-
-static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
-{
- default_send_IPI_mask_sequence_phys(mask, vector);
-}
-
-static void bigsmp_send_IPI_allbutself(int vector)
-{
- default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
-}
-
-static void bigsmp_send_IPI_all(int vector)
-{
- bigsmp_send_IPI_mask(cpu_online_mask, vector);
-}
-
-static int dmi_bigsmp; /* can be set by dmi scanners */
-
-static int hp_ht_bigsmp(const struct dmi_system_id *d)
-{
- printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
- dmi_bigsmp = 1;
-
- return 0;
-}
-
-
-static const struct dmi_system_id bigsmp_dmi_table[] = {
- { hp_ht_bigsmp, "HP ProLiant DL760 G2",
- { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
- DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
- }
- },
-
- { hp_ht_bigsmp, "HP ProLiant DL740",
- { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
- DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
- }
- },
- { } /* NULL entry stops DMI scanning */
-};
-
-static int probe_bigsmp(void)
-{
- if (def_to_bigsmp)
- dmi_bigsmp = 1;
- else
- dmi_check_system(bigsmp_dmi_table);
-
- return dmi_bigsmp;
-}
-
-static struct apic apic_bigsmp = {
-
- .name = "bigsmp",
- .probe = probe_bigsmp,
- .acpi_madt_oem_check = NULL,
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = bigsmp_apic_id_registered,
-
- .irq_delivery_mode = dest_Fixed,
- /* phys delivery to target CPU: */
- .irq_dest_mode = 0,
-
- .target_cpus = default_target_cpus,
- .disable_esr = 1,
- .dest_logical = 0,
- .check_apicid_used = bigsmp_check_apicid_used,
- .check_apicid_present = bigsmp_check_apicid_present,
-
- .vector_allocation_domain = default_vector_allocation_domain,
- .init_apic_ldr = bigsmp_init_apic_ldr,
-
- .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
- .setup_apic_routing = bigsmp_setup_apic_routing,
- .multi_timer_check = NULL,
- .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
- .apicid_to_cpu_present = physid_set_mask_of_physid,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = bigsmp_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = bigsmp_phys_pkg_id,
- .mps_oem_check = NULL,
-
- .get_apic_id = bigsmp_get_apic_id,
- .set_apic_id = NULL,
- .apic_id_mask = 0xFF << 24,
-
- .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
-
- .send_IPI_mask = bigsmp_send_IPI_mask,
- .send_IPI_mask_allbutself = NULL,
- .send_IPI_allbutself = bigsmp_send_IPI_allbutself,
- .send_IPI_all = bigsmp_send_IPI_all,
- .send_IPI_self = default_send_IPI_self,
-
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-
- .wait_for_init_deassert = default_wait_for_init_deassert,
-
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = default_inquire_remote_apic,
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
-
- .x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
-};
-
-void __init generic_bigsmp_probe(void)
-{
- unsigned int cpu;
-
- if (!probe_bigsmp())
- return;
-
- apic = &apic_bigsmp;
-
- for_each_possible_cpu(cpu) {
- if (early_per_cpu(x86_cpu_to_logical_apicid,
- cpu) == BAD_APICID)
- continue;
- early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
- bigsmp_early_logical_apicid(cpu);
- }
-
- pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name);
-}
-
-apic_driver(apic_bigsmp);
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
deleted file mode 100644
index 0874799a98c6..000000000000
--- a/arch/x86/kernel/apic/es7000_32.c
+++ /dev/null
@@ -1,746 +0,0 @@
-/*
- * Written by: Garry Forsgren, Unisys Corporation
- * Natalie Protasevich, Unisys Corporation
- *
- * This file contains the code to configure and interface
- * with Unisys ES7000 series hardware system manager.
- *
- * Copyright (c) 2003 Unisys Corporation.
- * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
- *
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Unisys Corporation, Township Line & Union Meeting
- * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
- *
- * http://www.unisys.com
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/notifier.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
-#include <linux/threads.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/reboot.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/gfp.h>
-#include <linux/nmi.h>
-#include <linux/smp.h>
-#include <linux/io.h>
-
-#include <asm/apicdef.h>
-#include <linux/atomic.h>
-#include <asm/fixmap.h>
-#include <asm/mpspec.h>
-#include <asm/setup.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-
-/*
- * ES7000 chipsets
- */
-
-#define NON_UNISYS 0
-#define ES7000_CLASSIC 1
-#define ES7000_ZORRO 2
-
-#define MIP_REG 1
-#define MIP_PSAI_REG 4
-
-#define MIP_BUSY 1
-#define MIP_SPIN 0xf0000
-#define MIP_VALID 0x0100000000000000ULL
-#define MIP_SW_APIC 0x1020b
-
-#define MIP_PORT(val) ((val >> 32) & 0xffff)
-
-#define MIP_RD_LO(val) (val & 0xffffffff)
-
-struct mip_reg {
- unsigned long long off_0x00;
- unsigned long long off_0x08;
- unsigned long long off_0x10;
- unsigned long long off_0x18;
- unsigned long long off_0x20;
- unsigned long long off_0x28;
- unsigned long long off_0x30;
- unsigned long long off_0x38;
-};
-
-struct mip_reg_info {
- unsigned long long mip_info;
- unsigned long long delivery_info;
- unsigned long long host_reg;
- unsigned long long mip_reg;
-};
-
-struct psai {
- unsigned long long entry_type;
- unsigned long long addr;
- unsigned long long bep_addr;
-};
-
-#ifdef CONFIG_ACPI
-
-struct es7000_oem_table {
- struct acpi_table_header Header;
- u32 OEMTableAddr;
- u32 OEMTableSize;
-};
-
-static unsigned long oem_addrX;
-static unsigned long oem_size;
-
-#endif
-
-/*
- * ES7000 Globals
- */
-
-static volatile unsigned long *psai;
-static struct mip_reg *mip_reg;
-static struct mip_reg *host_reg;
-static int mip_port;
-static unsigned long mip_addr;
-static unsigned long host_addr;
-
-int es7000_plat;
-
-/*
- * GSI override for ES7000 platforms.
- */
-
-
-static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
-{
- unsigned long vect = 0, psaival = 0;
-
- if (psai == NULL)
- return -1;
-
- vect = ((unsigned long)__pa(eip)/0x1000) << 16;
- psaival = (0x1000000 | vect | cpu);
-
- while (*psai & 0x1000000)
- ;
-
- *psai = psaival;
-
- return 0;
-}
-
-static int es7000_apic_is_cluster(void)
-{
- /* MPENTIUMIII */
- if (boot_cpu_data.x86 == 6 &&
- (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11))
- return 1;
-
- return 0;
-}
-
-static void setup_unisys(void)
-{
- /*
- * Determine the generation of the ES7000 currently running.
- *
- * es7000_plat = 1 if the machine is a 5xx ES7000 box
- * es7000_plat = 2 if the machine is a x86_64 ES7000 box
- *
- */
- if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
- es7000_plat = ES7000_ZORRO;
- else
- es7000_plat = ES7000_CLASSIC;
-}
-
-/*
- * Parse the OEM Table:
- */
-static int parse_unisys_oem(char *oemptr)
-{
- int i;
- int success = 0;
- unsigned char type, size;
- unsigned long val;
- char *tp = NULL;
- struct psai *psaip = NULL;
- struct mip_reg_info *mi;
- struct mip_reg *host, *mip;
-
- tp = oemptr;
-
- tp += 8;
-
- for (i = 0; i <= 6; i++) {
- type = *tp++;
- size = *tp++;
- tp -= 2;
- switch (type) {
- case MIP_REG:
- mi = (struct mip_reg_info *)tp;
- val = MIP_RD_LO(mi->host_reg);
- host_addr = val;
- host = (struct mip_reg *)val;
- host_reg = __va(host);
- val = MIP_RD_LO(mi->mip_reg);
- mip_port = MIP_PORT(mi->mip_info);
- mip_addr = val;
- mip = (struct mip_reg *)val;
- mip_reg = __va(mip);
- pr_debug("host_reg = 0x%lx\n",
- (unsigned long)host_reg);
- pr_debug("mip_reg = 0x%lx\n",
- (unsigned long)mip_reg);
- success++;
- break;
- case MIP_PSAI_REG:
- psaip = (struct psai *)tp;
- if (tp != NULL) {
- if (psaip->addr)
- psai = __va(psaip->addr);
- else
- psai = NULL;
- success++;
- }
- break;
- default:
- break;
- }
- tp += size;
- }
-
- if (success < 2)
- es7000_plat = NON_UNISYS;
- else
- setup_unisys();
-
- return es7000_plat;
-}
-
-#ifdef CONFIG_ACPI
-static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
-{
- struct acpi_table_header *header = NULL;
- struct es7000_oem_table *table;
- acpi_size tbl_size;
- acpi_status ret;
- int i = 0;
-
- for (;;) {
- ret = acpi_get_table_with_size("OEM1", i++, &header, &tbl_size);
- if (!ACPI_SUCCESS(ret))
- return -1;
-
- if (!memcmp((char *) &header->oem_id, "UNISYS", 6))
- break;
-
- early_acpi_os_unmap_memory(header, tbl_size);
- }
-
- table = (void *)header;
-
- oem_addrX = table->OEMTableAddr;
- oem_size = table->OEMTableSize;
-
- early_acpi_os_unmap_memory(header, tbl_size);
-
- *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size);
-
- return 0;
-}
-
-static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
-{
- if (!oem_addr)
- return;
-
- __acpi_unmap_table((char *)oem_addr, oem_size);
-}
-
-static int es7000_check_dsdt(void)
-{
- struct acpi_table_header header;
-
- if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) &&
- !strncmp(header.oem_id, "UNISYS", 6))
- return 1;
- return 0;
-}
-
-static int es7000_acpi_ret;
-
-/* Hook from generic ACPI tables.c */
-static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- unsigned long oem_addr = 0;
- int check_dsdt;
- int ret = 0;
-
- /* check dsdt at first to avoid clear fix_map for oem_addr */
- check_dsdt = es7000_check_dsdt();
-
- if (!find_unisys_acpi_oem_table(&oem_addr)) {
- if (check_dsdt) {
- ret = parse_unisys_oem((char *)oem_addr);
- } else {
- setup_unisys();
- ret = 1;
- }
- /*
- * we need to unmap it
- */
- unmap_unisys_acpi_oem_table(oem_addr);
- }
-
- es7000_acpi_ret = ret;
-
- return ret && !es7000_apic_is_cluster();
-}
-
-static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
-{
- int ret = es7000_acpi_ret;
-
- return ret && es7000_apic_is_cluster();
-}
-
-#else /* !CONFIG_ACPI: */
-static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- return 0;
-}
-
-static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
-{
- return 0;
-}
-#endif /* !CONFIG_ACPI */
-
-static void es7000_spin(int n)
-{
- int i = 0;
-
- while (i++ < n)
- rep_nop();
-}
-
-static int es7000_mip_write(struct mip_reg *mip_reg)
-{
- int status = 0;
- int spin;
-
- spin = MIP_SPIN;
- while ((host_reg->off_0x38 & MIP_VALID) != 0) {
- if (--spin <= 0) {
- WARN(1, "Timeout waiting for Host Valid Flag\n");
- return -1;
- }
- es7000_spin(MIP_SPIN);
- }
-
- memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
- outb(1, mip_port);
-
- spin = MIP_SPIN;
-
- while ((mip_reg->off_0x38 & MIP_VALID) == 0) {
- if (--spin <= 0) {
- WARN(1, "Timeout waiting for MIP Valid Flag\n");
- return -1;
- }
- es7000_spin(MIP_SPIN);
- }
-
- status = (mip_reg->off_0x00 & 0xffff0000000000ULL) >> 48;
- mip_reg->off_0x38 &= ~MIP_VALID;
-
- return status;
-}
-
-static void es7000_enable_apic_mode(void)
-{
- struct mip_reg es7000_mip_reg;
- int mip_status;
-
- if (!es7000_plat)
- return;
-
- pr_info("Enabling APIC mode.\n");
- memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
- es7000_mip_reg.off_0x00 = MIP_SW_APIC;
- es7000_mip_reg.off_0x38 = MIP_VALID;
-
- while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
- WARN(1, "Command failed, status = %x\n", mip_status);
-}
-
-static void es7000_wait_for_init_deassert(atomic_t *deassert)
-{
- while (!atomic_read(deassert))
- cpu_relax();
-}
-
-static unsigned int es7000_get_apic_id(unsigned long x)
-{
- return (x >> 24) & 0xFF;
-}
-
-static void es7000_send_IPI_mask(const struct cpumask *mask, int vector)
-{
- default_send_IPI_mask_sequence_phys(mask, vector);
-}
-
-static void es7000_send_IPI_allbutself(int vector)
-{
- default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
-}
-
-static void es7000_send_IPI_all(int vector)
-{
- es7000_send_IPI_mask(cpu_online_mask, vector);
-}
-
-static int es7000_apic_id_registered(void)
-{
- return 1;
-}
-
-static const struct cpumask *target_cpus_cluster(void)
-{
- return cpu_all_mask;
-}
-
-static const struct cpumask *es7000_target_cpus(void)
-{
- return cpumask_of(smp_processor_id());
-}
-
-static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid)
-{
- return 0;
-}
-
-static unsigned long es7000_check_apicid_present(int bit)
-{
- return physid_isset(bit, phys_cpu_present_map);
-}
-
-static int es7000_early_logical_apicid(int cpu)
-{
- /* on es7000, logical apicid is the same as physical */
- return early_per_cpu(x86_bios_cpu_apicid, cpu);
-}
-
-static unsigned long calculate_ldr(int cpu)
-{
- unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
-
- return SET_APIC_LOGICAL_ID(id);
-}
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LdR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-static void es7000_init_apic_ldr_cluster(void)
-{
- unsigned long val;
- int cpu = smp_processor_id();
-
- apic_write(APIC_DFR, APIC_DFR_CLUSTER);
- val = calculate_ldr(cpu);
- apic_write(APIC_LDR, val);
-}
-
-static void es7000_init_apic_ldr(void)
-{
- unsigned long val;
- int cpu = smp_processor_id();
-
- apic_write(APIC_DFR, APIC_DFR_FLAT);
- val = calculate_ldr(cpu);
- apic_write(APIC_LDR, val);
-}
-
-static void es7000_setup_apic_routing(void)
-{
- int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
-
- pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
- (apic_version[apic] == 0x14) ?
- "Physical Cluster" : "Logical Cluster",
- nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
-}
-
-static int es7000_cpu_present_to_apicid(int mps_cpu)
-{
- if (!mps_cpu)
- return boot_cpu_physical_apicid;
- else if (mps_cpu < nr_cpu_ids)
- return per_cpu(x86_bios_cpu_apicid, mps_cpu);
- else
- return BAD_APICID;
-}
-
-static int cpu_id;
-
-static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
-{
- physid_set_mask_of_physid(cpu_id, retmap);
- ++cpu_id;
-}
-
-static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
-{
- /* For clustered we don't have a good way to do this yet - hack */
- physids_promote(0xFFL, retmap);
-}
-
-static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
-{
- boot_cpu_physical_apicid = read_apic_id();
- return 1;
-}
-
-static inline int
-es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
-{
- unsigned int round = 0;
- unsigned int cpu, uninitialized_var(apicid);
-
- /*
- * The cpus in the mask must all be on the apic cluster.
- */
- for_each_cpu_and(cpu, cpumask, cpu_online_mask) {
- int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-
- if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
- WARN(1, "Not a valid mask!");
-
- return -EINVAL;
- }
- apicid |= new_apicid;
- round++;
- }
- if (!round)
- return -EINVAL;
- *dest_id = apicid;
- return 0;
-}
-
-static int
-es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
- const struct cpumask *andmask,
- unsigned int *apicid)
-{
- cpumask_var_t cpumask;
- *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
-
- if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
- return 0;
-
- cpumask_and(cpumask, inmask, andmask);
- es7000_cpu_mask_to_apicid(cpumask, apicid);
-
- free_cpumask_var(cpumask);
-
- return 0;
-}
-
-static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
-{
- return cpuid_apic >> index_msb;
-}
-
-static int probe_es7000(void)
-{
- /* probed later in mptable/ACPI hooks */
- return 0;
-}
-
-static int es7000_mps_ret;
-static int es7000_mps_oem_check(struct mpc_table *mpc, char *oem,
- char *productid)
-{
- int ret = 0;
-
- if (mpc->oemptr) {
- struct mpc_oemtable *oem_table =
- (struct mpc_oemtable *)mpc->oemptr;
-
- if (!strncmp(oem, "UNISYS", 6))
- ret = parse_unisys_oem((char *)oem_table);
- }
-
- es7000_mps_ret = ret;
-
- return ret && !es7000_apic_is_cluster();
-}
-
-static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem,
- char *productid)
-{
- int ret = es7000_mps_ret;
-
- return ret && es7000_apic_is_cluster();
-}
-
-/* We've been warned by a false positive warning.Use __refdata to keep calm. */
-static struct apic __refdata apic_es7000_cluster = {
-
- .name = "es7000",
- .probe = probe_es7000,
- .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster,
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = es7000_apic_id_registered,
-
- .irq_delivery_mode = dest_LowestPrio,
- /* logical delivery broadcast to all procs: */
- .irq_dest_mode = 1,
-
- .target_cpus = target_cpus_cluster,
- .disable_esr = 1,
- .dest_logical = 0,
- .check_apicid_used = es7000_check_apicid_used,
- .check_apicid_present = es7000_check_apicid_present,
-
- .vector_allocation_domain = flat_vector_allocation_domain,
- .init_apic_ldr = es7000_init_apic_ldr_cluster,
-
- .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
- .setup_apic_routing = es7000_setup_apic_routing,
- .multi_timer_check = NULL,
- .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
- .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = es7000_check_phys_apicid_present,
- .enable_apic_mode = es7000_enable_apic_mode,
- .phys_pkg_id = es7000_phys_pkg_id,
- .mps_oem_check = es7000_mps_oem_check_cluster,
-
- .get_apic_id = es7000_get_apic_id,
- .set_apic_id = NULL,
- .apic_id_mask = 0xFF << 24,
-
- .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
-
- .send_IPI_mask = es7000_send_IPI_mask,
- .send_IPI_mask_allbutself = NULL,
- .send_IPI_allbutself = es7000_send_IPI_allbutself,
- .send_IPI_all = es7000_send_IPI_all,
- .send_IPI_self = default_send_IPI_self,
-
- .wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip,
-
- .trampoline_phys_low = 0x467,
- .trampoline_phys_high = 0x469,
-
- .wait_for_init_deassert = NULL,
-
- /* Nothing to do for most platforms, since cleared by the INIT cycle: */
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = default_inquire_remote_apic,
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
-
- .x86_32_early_logical_apicid = es7000_early_logical_apicid,
-};
-
-static struct apic __refdata apic_es7000 = {
-
- .name = "es7000",
- .probe = probe_es7000,
- .acpi_madt_oem_check = es7000_acpi_madt_oem_check,
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = es7000_apic_id_registered,
-
- .irq_delivery_mode = dest_Fixed,
- /* phys delivery to target CPUs: */
- .irq_dest_mode = 0,
-
- .target_cpus = es7000_target_cpus,
- .disable_esr = 1,
- .dest_logical = 0,
- .check_apicid_used = es7000_check_apicid_used,
- .check_apicid_present = es7000_check_apicid_present,
-
- .vector_allocation_domain = flat_vector_allocation_domain,
- .init_apic_ldr = es7000_init_apic_ldr,
-
- .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
- .setup_apic_routing = es7000_setup_apic_routing,
- .multi_timer_check = NULL,
- .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
- .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = es7000_check_phys_apicid_present,
- .enable_apic_mode = es7000_enable_apic_mode,
- .phys_pkg_id = es7000_phys_pkg_id,
- .mps_oem_check = es7000_mps_oem_check,
-
- .get_apic_id = es7000_get_apic_id,
- .set_apic_id = NULL,
- .apic_id_mask = 0xFF << 24,
-
- .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
-
- .send_IPI_mask = es7000_send_IPI_mask,
- .send_IPI_mask_allbutself = NULL,
- .send_IPI_allbutself = es7000_send_IPI_allbutself,
- .send_IPI_all = es7000_send_IPI_all,
- .send_IPI_self = default_send_IPI_self,
-
- .trampoline_phys_low = 0x467,
- .trampoline_phys_high = 0x469,
-
- .wait_for_init_deassert = es7000_wait_for_init_deassert,
-
- /* Nothing to do for most platforms, since cleared by the INIT cycle: */
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = default_inquire_remote_apic,
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
-
- .x86_32_early_logical_apicid = es7000_early_logical_apicid,
-};
-
-/*
- * Need to check for es7000 followed by es7000_cluster, so this order
- * in apic_drivers is important.
- */
-apic_drivers(apic_es7000, apic_es7000_cluster);
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index a698d7165c96..45af535c44a0 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* HW NMI watchdog support
*
@@ -8,6 +9,7 @@
* Bits copied from original nmi.c file
*
*/
+#include <linux/thread_info.h>
#include <asm/apic.h>
#include <asm/nmi.h>
@@ -16,76 +18,44 @@
#include <linux/notifier.h>
#include <linux/kprobes.h>
#include <linux/nmi.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/delay.h>
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#include "local.h"
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
u64 hw_nmi_get_sample_period(int watchdog_thresh)
{
return (u64)(cpu_khz) * 1000 * watchdog_thresh;
}
#endif
-#ifdef arch_trigger_all_cpu_backtrace
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
-/* "in progress" flag of arch_trigger_all_cpu_backtrace */
-static unsigned long backtrace_flag;
-
-void arch_trigger_all_cpu_backtrace(void)
+#ifdef arch_trigger_cpumask_backtrace
+static void nmi_raise_cpu_backtrace(cpumask_t *mask)
{
- int i;
-
- if (test_and_set_bit(0, &backtrace_flag))
- /*
- * If there is already a trigger_all_cpu_backtrace() in progress
- * (backtrace_flag == 1), don't output double cpu dump infos.
- */
- return;
-
- cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
-
- printk(KERN_INFO "sending NMI to all CPUs:\n");
- apic->send_IPI_all(NMI_VECTOR);
-
- /* Wait for up to 10 seconds for all CPUs to do the backtrace */
- for (i = 0; i < 10 * 1000; i++) {
- if (cpumask_empty(to_cpumask(backtrace_mask)))
- break;
- mdelay(1);
- }
-
- clear_bit(0, &backtrace_flag);
- smp_mb__after_clear_bit();
+ __apic_send_IPI_mask(mask, NMI_VECTOR);
}
-static int __kprobes
-arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
{
- int cpu;
-
- cpu = smp_processor_id();
-
- if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
- static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu,
+ nmi_raise_cpu_backtrace);
+}
- arch_spin_lock(&lock);
- printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
- show_regs(regs);
- arch_spin_unlock(&lock);
- cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
+static int nmi_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
+{
+ if (nmi_cpu_backtrace(regs))
return NMI_HANDLED;
- }
return NMI_DONE;
}
+NOKPROBE_SYMBOL(nmi_cpu_backtrace_handler);
-static int __init register_trigger_all_cpu_backtrace(void)
+static int __init register_nmi_cpu_backtrace_handler(void)
{
- register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler,
+ register_nmi_handler(NMI_LOCAL, nmi_cpu_backtrace_handler,
0, "arch_bt");
return 0;
}
-early_initcall(register_trigger_all_cpu_backtrace);
+early_initcall(register_nmi_cpu_backtrace_handler);
#endif
diff --git a/arch/x86/kernel/apic/init.c b/arch/x86/kernel/apic/init.c
new file mode 100644
index 000000000000..821e2e536f19
--- /dev/null
+++ b/arch/x86/kernel/apic/init.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define pr_fmt(fmt) "APIC: " fmt
+
+#include <asm/apic.h>
+
+#include "local.h"
+
+/*
+ * Use DEFINE_STATIC_CALL_NULL() to avoid having to provide stub functions
+ * for each callback. The callbacks are setup during boot and all except
+ * wait_icr_idle() must be initialized before usage. The IPI wrappers
+ * use static_call() and not static_call_cond() to catch any fails.
+ */
+#define DEFINE_APIC_CALL(__cb) \
+ DEFINE_STATIC_CALL_NULL(apic_call_##__cb, *apic->__cb)
+
+DEFINE_APIC_CALL(eoi);
+DEFINE_APIC_CALL(native_eoi);
+DEFINE_APIC_CALL(icr_read);
+DEFINE_APIC_CALL(icr_write);
+DEFINE_APIC_CALL(read);
+DEFINE_APIC_CALL(send_IPI);
+DEFINE_APIC_CALL(send_IPI_mask);
+DEFINE_APIC_CALL(send_IPI_mask_allbutself);
+DEFINE_APIC_CALL(send_IPI_allbutself);
+DEFINE_APIC_CALL(send_IPI_all);
+DEFINE_APIC_CALL(send_IPI_self);
+DEFINE_APIC_CALL(wait_icr_idle);
+DEFINE_APIC_CALL(wakeup_secondary_cpu);
+DEFINE_APIC_CALL(wakeup_secondary_cpu_64);
+DEFINE_APIC_CALL(write);
+
+EXPORT_STATIC_CALL_TRAMP_GPL(apic_call_send_IPI_mask);
+EXPORT_STATIC_CALL_TRAMP_GPL(apic_call_send_IPI_self);
+
+/* The container for function call overrides */
+struct apic_override __x86_apic_override __initdata;
+
+#define apply_override(__cb) \
+ if (__x86_apic_override.__cb) \
+ apic->__cb = __x86_apic_override.__cb
+
+static __init void restore_override_callbacks(void)
+{
+ apply_override(eoi);
+ apply_override(native_eoi);
+ apply_override(write);
+ apply_override(read);
+ apply_override(send_IPI);
+ apply_override(send_IPI_mask);
+ apply_override(send_IPI_mask_allbutself);
+ apply_override(send_IPI_allbutself);
+ apply_override(send_IPI_all);
+ apply_override(send_IPI_self);
+ apply_override(icr_read);
+ apply_override(icr_write);
+ apply_override(wakeup_secondary_cpu);
+ apply_override(wakeup_secondary_cpu_64);
+}
+
+#define update_call(__cb) \
+ static_call_update(apic_call_##__cb, *apic->__cb)
+
+static __init void update_static_calls(void)
+{
+ update_call(eoi);
+ update_call(native_eoi);
+ update_call(write);
+ update_call(read);
+ update_call(send_IPI);
+ update_call(send_IPI_mask);
+ update_call(send_IPI_mask_allbutself);
+ update_call(send_IPI_allbutself);
+ update_call(send_IPI_all);
+ update_call(send_IPI_self);
+ update_call(icr_read);
+ update_call(icr_write);
+ update_call(wait_icr_idle);
+ update_call(wakeup_secondary_cpu);
+ update_call(wakeup_secondary_cpu_64);
+}
+
+void __init apic_setup_apic_calls(void)
+{
+ /* Ensure that the default APIC has native_eoi populated */
+ apic->native_eoi = apic->eoi;
+ update_static_calls();
+ pr_info("Static calls initialized\n");
+}
+
+void __init apic_install_driver(struct apic *driver)
+{
+ if (apic == driver)
+ return;
+
+ apic = driver;
+
+ if (IS_ENABLED(CONFIG_X86_X2APIC) && apic->x2apic_set_max_apicid)
+ apic->max_apic_id = x2apic_max_apicid;
+
+ /* Copy the original eoi() callback as KVM/HyperV might overwrite it */
+ if (!apic->native_eoi)
+ apic->native_eoi = apic->eoi;
+
+ /* Apply any already installed callback overrides */
+ restore_override_callbacks();
+ update_static_calls();
+
+ pr_info("Switched APIC routing to: %s\n", driver->name);
+}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9ed796ccc32c..28f934f05a85 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Intel IO-APIC support for multi-Pentium hosts.
*
@@ -18,10 +19,21 @@
* and Rolf G. Tews
* for testing these extensively
* Paul Diefenbaugh : Added full ACPI support
+ *
+ * Historical information which is worth to be preserved:
+ *
+ * - SiS APIC rmw bug:
+ *
+ * We used to have a workaround for a bug in SiS chips which
+ * required to rewrite the index register for a read-modify-write
+ * operation as the chip lost the index information which was
+ * setup for the read already. We cache the data now, so that
+ * workaround has been removed.
*/
#include <linux/mm.h>
#include <linux/interrupt.h>
+#include <linux/irq.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/sched.h>
@@ -29,22 +41,16 @@
#include <linux/mc146818rtc.h>
#include <linux/compiler.h>
#include <linux/acpi.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/syscore_ops.h>
-#include <linux/msi.h>
-#include <linux/htirq.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/jiffies.h> /* time_after() */
#include <linux/slab.h>
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-#include <linux/bootmem.h>
-#include <linux/dmar.h>
-#include <linux/hpet.h>
+#include <linux/memblock.h>
+#include <linux/msi.h>
-#include <asm/idle.h>
+#include <asm/irqdomain.h>
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/cpu.h>
@@ -53,44 +59,63 @@
#include <asm/acpi.h>
#include <asm/dma.h>
#include <asm/timer.h>
+#include <asm/time.h>
#include <asm/i8259.h>
-#include <asm/msidef.h>
-#include <asm/hypertransport.h>
#include <asm/setup.h>
#include <asm/irq_remapping.h>
-#include <asm/hpet.h>
#include <asm/hw_irq.h>
-
#include <asm/apic.h>
+#include <asm/pgtable.h>
+#include <asm/x86_init.h>
+
+#define for_each_ioapic(idx) \
+ for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
+#define for_each_ioapic_reverse(idx) \
+ for ((idx) = nr_ioapics - 1; (idx) >= 0; (idx)--)
+#define for_each_pin(idx, pin) \
+ for ((pin) = 0; (pin) < ioapics[(idx)].nr_registers; (pin)++)
+#define for_each_ioapic_pin(idx, pin) \
+ for_each_ioapic((idx)) \
+ for_each_pin((idx), (pin))
+#define for_each_irq_pin(entry, head) \
+ list_for_each_entry(entry, &head, list)
-#define __apicdebuginit(type) static type __init
+static DEFINE_RAW_SPINLOCK(ioapic_lock);
+static DEFINE_MUTEX(ioapic_mutex);
+static unsigned int ioapic_dynirq_base;
+static int ioapic_initialized;
-#define for_each_irq_pin(entry, head) \
- for (entry = head; entry; entry = entry->next)
+struct irq_pin_list {
+ struct list_head list;
+ int apic, pin;
+};
-/*
- * Is the SiS APIC rmw bug present ?
- * -1 = don't know, 0 = no, 1 = yes
- */
-int sis_apic_bug = -1;
+struct mp_chip_data {
+ struct list_head irq_2_pin;
+ struct IO_APIC_route_entry entry;
+ bool is_level;
+ bool active_low;
+ bool isa_irq;
+ u32 count;
+};
-static DEFINE_RAW_SPINLOCK(ioapic_lock);
-static DEFINE_RAW_SPINLOCK(vector_lock);
+struct mp_ioapic_gsi {
+ u32 gsi_base;
+ u32 gsi_end;
+};
static struct ioapic {
- /*
- * # of IRQ routing registers
- */
- int nr_registers;
- /*
- * Saved state during suspend/resume, or while enabling intr-remap.
- */
- struct IO_APIC_route_entry *saved_registers;
+ /* # of IRQ routing registers */
+ int nr_registers;
+ /* Saved state during suspend/resume, or while enabling intr-remap. */
+ struct IO_APIC_route_entry *saved_registers;
/* I/O APIC config */
- struct mpc_ioapic mp_config;
+ struct mpc_ioapic mp_config;
/* IO APIC gsi routing info */
- struct mp_ioapic_gsi gsi_config;
- DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+ struct mp_ioapic_gsi gsi_config;
+ struct ioapic_domain_cfg irqdomain_cfg;
+ struct irq_domain *irqdomain;
+ struct resource *iomem_res;
} ioapics[MAX_IO_APICS];
#define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver
@@ -105,11 +130,33 @@ unsigned int mpc_ioapic_addr(int ioapic_idx)
return ioapics[ioapic_idx].mp_config.apicaddr;
}
-struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
+static inline struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
{
return &ioapics[ioapic_idx].gsi_config;
}
+static inline int mp_ioapic_pin_count(int ioapic)
+{
+ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+
+ return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
+}
+
+static inline u32 mp_pin_to_gsi(int ioapic, int pin)
+{
+ return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin;
+}
+
+static inline bool mp_is_legacy_irq(int irq)
+{
+ return irq >= 0 && irq < nr_legacy_irqs();
+}
+
+static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic)
+{
+ return ioapics[ioapic].irqdomain;
+}
+
int nr_ioapics;
/* The one past the highest gsi number used */
@@ -121,16 +168,13 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* # of MP IRQ source entries */
int mp_irq_entries;
-/* GSI interrupts */
-static int nr_irqs_gsi = NR_IRQS_LEGACY;
-
#ifdef CONFIG_EISA
int mp_bus_id_to_type[MAX_MP_BUSSES];
#endif
DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-int skip_ioapic_setup;
+bool ioapic_is_disabled __ro_after_init;
/**
* disable_ioapic_support() - disables ioapic support at runtime
@@ -141,7 +185,7 @@ void disable_ioapic_support(void)
noioapicquirk = 1;
noioapicreroute = -1;
#endif
- skip_ioapic_setup = 1;
+ ioapic_is_disabled = true;
}
static int __init parse_noapic(char *str)
@@ -152,18 +196,14 @@ static int __init parse_noapic(char *str)
}
early_param("noapic", parse_noapic);
-static int io_apic_setup_irq_pin(unsigned int irq, int node,
- struct io_apic_irq_attr *attr);
-
-/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
+/* Will be called in mpparse/ACPI codes for saving IRQ info */
void mp_save_irq(struct mpc_intsrc *m)
{
int i;
- apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
- " IRQ %02x, APIC ID %x, APIC INT %02x\n",
- m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
- m->srcbusirq, m->dstapic, m->dstirq);
+ apic_pr_verbose("Int: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC INT %02x\n",
+ m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
+ m->srcbusirq, m->dstapic, m->dstirq);
for (i = 0; i < mp_irq_entries; i++) {
if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
@@ -175,127 +215,38 @@ void mp_save_irq(struct mpc_intsrc *m)
panic("Max # of irq sources exceeded!!\n");
}
-struct irq_pin_list {
- int apic, pin;
- struct irq_pin_list *next;
-};
-
-static struct irq_pin_list *alloc_irq_pin_list(int node)
-{
- return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
-}
-
-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
-
-int __init arch_early_irq_init(void)
-{
- struct irq_cfg *cfg;
- int count, node, i;
-
- if (!legacy_pic->nr_legacy_irqs)
- io_apic_irqs = ~0UL;
-
- for (i = 0; i < nr_ioapics; i++) {
- ioapics[i].saved_registers =
- kzalloc(sizeof(struct IO_APIC_route_entry) *
- ioapics[i].nr_registers, GFP_KERNEL);
- if (!ioapics[i].saved_registers)
- pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
- }
-
- cfg = irq_cfgx;
- count = ARRAY_SIZE(irq_cfgx);
- node = cpu_to_node(0);
-
- /* Make sure the legacy interrupts are marked in the bitmap */
- irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
-
- for (i = 0; i < count; i++) {
- irq_set_chip_data(i, &cfg[i]);
- zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
- zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
- /*
- * For legacy IRQ's, start with assigning irq0 to irq15 to
- * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
- */
- if (i < legacy_pic->nr_legacy_irqs) {
- cfg[i].vector = IRQ0_VECTOR + i;
- cpumask_setall(cfg[i].domain);
- }
- }
-
- return 0;
-}
-
-static struct irq_cfg *irq_cfg(unsigned int irq)
+static void alloc_ioapic_saved_registers(int idx)
{
- return irq_get_chip_data(irq);
-}
+ size_t size;
-static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
-{
- struct irq_cfg *cfg;
+ if (ioapics[idx].saved_registers)
+ return;
- cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
- if (!cfg)
- return NULL;
- if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
- goto out_cfg;
- if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
- goto out_domain;
- return cfg;
-out_domain:
- free_cpumask_var(cfg->domain);
-out_cfg:
- kfree(cfg);
- return NULL;
+ size = sizeof(struct IO_APIC_route_entry) * ioapics[idx].nr_registers;
+ ioapics[idx].saved_registers = kzalloc(size, GFP_KERNEL);
+ if (!ioapics[idx].saved_registers)
+ pr_err("IOAPIC %d: suspend/resume impossible!\n", idx);
}
-static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
+static void free_ioapic_saved_registers(int idx)
{
- if (!cfg)
- return;
- irq_set_chip_data(at, NULL);
- free_cpumask_var(cfg->domain);
- free_cpumask_var(cfg->old_domain);
- kfree(cfg);
+ kfree(ioapics[idx].saved_registers);
+ ioapics[idx].saved_registers = NULL;
}
-static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
+int __init arch_early_ioapic_init(void)
{
- int res = irq_alloc_desc_at(at, node);
- struct irq_cfg *cfg;
-
- if (res < 0) {
- if (res != -EEXIST)
- return NULL;
- cfg = irq_get_chip_data(at);
- if (cfg)
- return cfg;
- }
+ int i;
- cfg = alloc_irq_cfg(at, node);
- if (cfg)
- irq_set_chip_data(at, cfg);
- else
- irq_free_desc(at);
- return cfg;
-}
+ if (!nr_legacy_irqs())
+ io_apic_irqs = ~0UL;
-static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
-{
- return irq_alloc_descs_from(from, count, node);
-}
+ for_each_ioapic(i)
+ alloc_ioapic_saved_registers(i);
-static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
-{
- free_irq_cfg(at, cfg);
- irq_free_desc(at);
+ return 0;
}
-
struct io_apic {
unsigned int index;
unsigned int unused[3];
@@ -310,20 +261,23 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+ (mpc_ioapic_addr(idx) & ~PAGE_MASK);
}
-void io_apic_eoi(unsigned int apic, unsigned int vector)
+static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
+
writel(vector, &io_apic->eoi);
}
unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
+
writel(reg, &io_apic->index);
return readl(&io_apic->data);
}
-void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+static void io_apic_write(unsigned int apic, unsigned int reg,
+ unsigned int value)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
@@ -331,46 +285,20 @@ void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int valu
writel(value, &io_apic->data);
}
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- *
- * Older SiS APIC requires we rewrite the index register
- */
-void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
- struct io_apic __iomem *io_apic = io_apic_base(apic);
-
- if (sis_apic_bug)
- writel(reg, &io_apic->index);
- writel(value, &io_apic->data);
-}
-
-union entry_union {
- struct { u32 w1, w2; };
- struct IO_APIC_route_entry entry;
-};
-
static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
{
- union entry_union eu;
+ struct IO_APIC_route_entry entry;
- eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
- eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+ entry.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+ entry.w2 = io_apic_read(apic, 0x11 + 2 * pin);
- return eu.entry;
+ return entry;
}
static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
{
- union entry_union eu;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- eu.entry = __ioapic_read_entry(apic, pin);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return eu.entry;
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
+ return __ioapic_read_entry(apic, pin);
}
/*
@@ -381,20 +309,14 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
*/
static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
- union entry_union eu = {{0, 0}};
-
- eu.entry = e;
- io_apic_write(apic, 0x11 + 2*pin, eu.w2);
- io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+ io_apic_write(apic, 0x11 + 2*pin, e.w2);
+ io_apic_write(apic, 0x10 + 2*pin, e.w1);
}
static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
__ioapic_write_entry(apic, pin, e);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
/*
@@ -404,13 +326,11 @@ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
*/
static void ioapic_mask_entry(int apic, int pin)
{
- unsigned long flags;
- union entry_union eu = { .entry.mask = 1 };
+ struct IO_APIC_route_entry e = { .masked = true };
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x10 + 2*pin, eu.w1);
- io_apic_write(apic, 0x11 + 2*pin, eu.w2);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
+ io_apic_write(apic, 0x10 + 2*pin, e.w1);
+ io_apic_write(apic, 0x11 + 2*pin, e.w2);
}
/*
@@ -418,127 +338,86 @@ static void ioapic_mask_entry(int apic, int pin)
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+static bool add_pin_to_irq_node(struct mp_chip_data *data, int node, int apic, int pin)
{
- struct irq_pin_list **last, *entry;
+ struct irq_pin_list *entry;
- /* don't allow duplicates */
- last = &cfg->irq_2_pin;
- for_each_irq_pin(entry, cfg->irq_2_pin) {
+ /* Don't allow duplicates */
+ for_each_irq_pin(entry, data->irq_2_pin) {
if (entry->apic == apic && entry->pin == pin)
- return 0;
- last = &entry->next;
+ return true;
}
- entry = alloc_irq_pin_list(node);
+ entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node);
if (!entry) {
- pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
- node, apic, pin);
- return -ENOMEM;
+ pr_err("Cannot allocate irq_pin_list (%d,%d,%d)\n", node, apic, pin);
+ return false;
}
+
entry->apic = apic;
entry->pin = pin;
-
- *last = entry;
- return 0;
-}
-
-static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
-{
- if (__add_pin_to_irq_node(cfg, node, apic, pin))
- panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
+ list_add_tail(&entry->list, &data->irq_2_pin);
+ return true;
}
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
- int oldapic, int oldpin,
- int newapic, int newpin)
+static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin)
{
- struct irq_pin_list *entry;
+ struct irq_pin_list *tmp, *entry;
- for_each_irq_pin(entry, cfg->irq_2_pin) {
- if (entry->apic == oldapic && entry->pin == oldpin) {
- entry->apic = newapic;
- entry->pin = newpin;
- /* every one is different, right? */
+ list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list) {
+ if (entry->apic == apic && entry->pin == pin) {
+ list_del(&entry->list);
+ kfree(entry);
return;
}
}
-
- /* old apic/pin didn't exist, so just add new ones */
- add_pin_to_irq_node(cfg, node, newapic, newpin);
-}
-
-static void __io_apic_modify_irq(struct irq_pin_list *entry,
- int mask_and, int mask_or,
- void (*final)(struct irq_pin_list *entry))
-{
- unsigned int reg, pin;
-
- pin = entry->pin;
- reg = io_apic_read(entry->apic, 0x10 + pin * 2);
- reg &= mask_and;
- reg |= mask_or;
- io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
- if (final)
- final(entry);
}
-static void io_apic_modify_irq(struct irq_cfg *cfg,
- int mask_and, int mask_or,
+static void io_apic_modify_irq(struct mp_chip_data *data, bool masked,
void (*final)(struct irq_pin_list *entry))
{
struct irq_pin_list *entry;
- for_each_irq_pin(entry, cfg->irq_2_pin)
- __io_apic_modify_irq(entry, mask_and, mask_or, final);
+ data->entry.masked = masked;
+
+ for_each_irq_pin(entry, data->irq_2_pin) {
+ io_apic_write(entry->apic, 0x10 + 2 * entry->pin, data->entry.w1);
+ if (final)
+ final(entry);
+ }
}
+/*
+ * Synchronize the IO-APIC and the CPU by doing a dummy read from the
+ * IO-APIC
+ */
static void io_apic_sync(struct irq_pin_list *entry)
{
- /*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
struct io_apic __iomem *io_apic;
io_apic = io_apic_base(entry->apic);
readl(&io_apic->data);
}
-static void mask_ioapic(struct irq_cfg *cfg)
+static void mask_ioapic_irq(struct irq_data *irq_data)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-}
+ struct mp_chip_data *data = irq_data->chip_data;
-static void mask_ioapic_irq(struct irq_data *data)
-{
- mask_ioapic(data->chip_data);
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
+ io_apic_modify_irq(data, true, &io_apic_sync);
}
-static void __unmask_ioapic(struct irq_cfg *cfg)
+static void __unmask_ioapic(struct mp_chip_data *data)
{
- io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
+ io_apic_modify_irq(data, false, NULL);
}
-static void unmask_ioapic(struct irq_cfg *cfg)
+static void unmask_ioapic_irq(struct irq_data *irq_data)
{
- unsigned long flags;
+ struct mp_chip_data *data = irq_data->chip_data;
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_ioapic(cfg);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void unmask_ioapic_irq(struct irq_data *data)
-{
- unmask_ioapic(data->chip_data);
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
+ __unmask_ioapic(data);
}
/*
@@ -557,7 +436,7 @@ static void unmask_ioapic_irq(struct irq_data *data)
* Otherwise, we simulate the EOI message manually by changing the trigger
* mode to edge and then back to level, with RTE being masked during this.
*/
-void native_eoi_ioapic_pin(int apic, int pin, int vector)
+static void __eoi_ioapic_pin(int apic, int pin, int vector)
{
if (mpc_ioapic_ver(apic) >= 0x20) {
io_apic_eoi(apic, vector);
@@ -566,31 +445,24 @@ void native_eoi_ioapic_pin(int apic, int pin, int vector)
entry = entry1 = __ioapic_read_entry(apic, pin);
- /*
- * Mask the entry and change the trigger mode to edge.
- */
- entry1.mask = 1;
- entry1.trigger = IOAPIC_EDGE;
+ /* Mask the entry and change the trigger mode to edge. */
+ entry1.masked = true;
+ entry1.is_level = false;
__ioapic_write_entry(apic, pin, entry1);
- /*
- * Restore the previous level triggered entry.
- */
+ /* Restore the previous level triggered entry. */
__ioapic_write_entry(apic, pin, entry);
}
}
-void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+static void eoi_ioapic_pin(int vector, struct mp_chip_data *data)
{
struct irq_pin_list *entry;
- unsigned long flags;
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- for_each_irq_pin(entry, cfg->irq_2_pin)
- x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin,
- cfg->vector);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
+ for_each_irq_pin(entry, data->irq_2_pin)
+ __eoi_ioapic_pin(entry->apic, entry->pin, vector);
}
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
@@ -599,35 +471,31 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
/* Check delivery_mode to be sure we're not clearing an SMI pin */
entry = ioapic_read_entry(apic, pin);
- if (entry.delivery_mode == dest_SMI)
+ if (entry.delivery_mode == APIC_DELIVERY_MODE_SMI)
return;
/*
* Make sure the entry is masked and re-read the contents to check
* if it is a level triggered pin and if the remote-IRR is set.
*/
- if (!entry.mask) {
- entry.mask = 1;
+ if (!entry.masked) {
+ entry.masked = true;
ioapic_write_entry(apic, pin, entry);
entry = ioapic_read_entry(apic, pin);
}
if (entry.irr) {
- unsigned long flags;
-
/*
* Make sure the trigger mode is set to level. Explicit EOI
* doesn't clear the remote-IRR if the trigger mode is not
* set to level.
*/
- if (!entry.trigger) {
- entry.trigger = IOAPIC_LEVEL;
+ if (!entry.is_level) {
+ entry.is_level = true;
ioapic_write_entry(apic, pin, entry);
}
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
+ __eoi_ioapic_pin(apic, pin, entry.vector);
}
/*
@@ -641,13 +509,12 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
mpc_ioapic_id(apic), pin);
}
-static void clear_IO_APIC (void)
+void clear_IO_APIC (void)
{
int apic, pin;
- for (apic = 0; apic < nr_ioapics; apic++)
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
- clear_IO_APIC_pin(apic, pin);
+ for_each_ioapic_pin(apic, pin)
+ clear_IO_APIC_pin(apic, pin);
}
#ifdef CONFIG_X86_32
@@ -663,28 +530,23 @@ static int pirq_entries[MAX_PIRQS] = {
static int __init ioapic_pirq_setup(char *str)
{
- int i, max;
- int ints[MAX_PIRQS+1];
+ int i, max, ints[MAX_PIRQS+1];
get_options(str, ARRAY_SIZE(ints), ints);
- apic_printk(APIC_VERBOSE, KERN_INFO
- "PIRQ redirection, working around broken MP-BIOS.\n");
+ apic_pr_verbose("PIRQ redirection, working around broken MP-BIOS.\n");
+
max = MAX_PIRQS;
if (ints[0] < MAX_PIRQS)
max = ints[0];
for (i = 0; i < max; i++) {
- apic_printk(APIC_VERBOSE, KERN_DEBUG
- "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
- /*
- * PIRQs are mapped upside down, usually.
- */
+ apic_pr_verbose("... PIRQ%d -> IRQ %d\n", i, ints[i + 1]);
+ /* PIRQs are mapped upside down, usually */
pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
}
return 1;
}
-
__setup("pirq=", ioapic_pirq_setup);
#endif /* CONFIG_X86_32 */
@@ -696,15 +558,14 @@ int save_ioapic_entries(void)
int apic, pin;
int err = 0;
- for (apic = 0; apic < nr_ioapics; apic++) {
+ for_each_ioapic(apic) {
if (!ioapics[apic].saved_registers) {
err = -ENOMEM;
continue;
}
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
- ioapics[apic].saved_registers[pin] =
- ioapic_read_entry(apic, pin);
+ for_each_pin(apic, pin)
+ ioapics[apic].saved_registers[pin] = ioapic_read_entry(apic, pin);
}
return err;
@@ -717,16 +578,16 @@ void mask_ioapic_entries(void)
{
int apic, pin;
- for (apic = 0; apic < nr_ioapics; apic++) {
+ for_each_ioapic(apic) {
if (!ioapics[apic].saved_registers)
continue;
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
+ for_each_pin(apic, pin) {
struct IO_APIC_route_entry entry;
entry = ioapics[apic].saved_registers[pin];
- if (!entry.mask) {
- entry.mask = 1;
+ if (!entry.masked) {
+ entry.masked = true;
ioapic_write_entry(apic, pin, entry);
}
}
@@ -740,13 +601,12 @@ int restore_ioapic_entries(void)
{
int apic, pin;
- for (apic = 0; apic < nr_ioapics; apic++) {
+ for_each_ioapic(apic) {
if (!ioapics[apic].saved_registers)
continue;
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
- ioapic_write_entry(apic, pin,
- ioapics[apic].saved_registers[pin]);
+ for_each_pin(apic, pin)
+ ioapic_write_entry(apic, pin, ioapics[apic].saved_registers[pin]);
}
return 0;
}
@@ -758,12 +618,13 @@ static int find_irq_entry(int ioapic_idx, int pin, int type)
{
int i;
- for (i = 0; i < mp_irq_entries; i++)
+ for (i = 0; i < mp_irq_entries; i++) {
if (mp_irqs[i].irqtype == type &&
(mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) ||
mp_irqs[i].dstapic == MP_APIC_ALL) &&
mp_irqs[i].dstirq == pin)
return i;
+ }
return -1;
}
@@ -778,10 +639,8 @@ static int __init find_isa_irq_pin(int irq, int type)
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].srcbus;
- if (test_bit(lbus, mp_bus_not_pci) &&
- (mp_irqs[i].irqtype == type) &&
+ if (test_bit(lbus, mp_bus_not_pci) && (mp_irqs[i].irqtype == type) &&
(mp_irqs[i].srcbusirq == irq))
-
return mp_irqs[i].dstirq;
}
return -1;
@@ -794,8 +653,7 @@ static int __init find_isa_irq_apic(int irq, int type)
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].srcbus;
- if (test_bit(lbus, mp_bus_not_pci) &&
- (mp_irqs[i].irqtype == type) &&
+ if (test_bit(lbus, mp_bus_not_pci) && (mp_irqs[i].irqtype == type) &&
(mp_irqs[i].srcbusirq == irq))
break;
}
@@ -803,476 +661,193 @@ static int __init find_isa_irq_apic(int irq, int type)
if (i < mp_irq_entries) {
int ioapic_idx;
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ for_each_ioapic(ioapic_idx) {
if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
return ioapic_idx;
+ }
}
return -1;
}
-#ifdef CONFIG_EISA
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int EISA_ELCR(unsigned int irq)
-{
- if (irq < legacy_pic->nr_legacy_irqs) {
- unsigned int port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
- }
- apic_printk(APIC_VERBOSE, KERN_INFO
- "Broken MPtable reports ISA irq %d\n", irq);
- return 0;
-}
-
-#endif
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx) (0)
-#define default_ISA_polarity(idx) (0)
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value. If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
-#define default_EISA_polarity(idx) default_ISA_polarity(idx)
-
-/* PCI interrupts are always polarity one level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx) (1)
-#define default_PCI_polarity(idx) (1)
-
-static int irq_polarity(int idx)
+static bool irq_active_low(int idx)
{
int bus = mp_irqs[idx].srcbus;
- int polarity;
/*
* Determine IRQ line polarity (high active or low active):
*/
- switch (mp_irqs[idx].irqflag & 3)
- {
- case 0: /* conforms, ie. bus-type dependent polarity */
- if (test_bit(bus, mp_bus_not_pci))
- polarity = default_ISA_polarity(idx);
- else
- polarity = default_PCI_polarity(idx);
- break;
- case 1: /* high active */
- {
- polarity = 0;
- break;
- }
- case 2: /* reserved */
- {
- pr_warn("broken BIOS!!\n");
- polarity = 1;
- break;
- }
- case 3: /* low active */
- {
- polarity = 1;
- break;
- }
- default: /* invalid */
- {
- pr_warn("broken BIOS!!\n");
- polarity = 1;
- break;
- }
+ switch (mp_irqs[idx].irqflag & MP_IRQPOL_MASK) {
+ case MP_IRQPOL_DEFAULT:
+ /*
+ * Conforms to spec, ie. bus-type dependent polarity. PCI
+ * defaults to low active. [E]ISA defaults to high active.
+ */
+ return !test_bit(bus, mp_bus_not_pci);
+ case MP_IRQPOL_ACTIVE_HIGH:
+ return false;
+ case MP_IRQPOL_RESERVED:
+ pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n");
+ fallthrough;
+ case MP_IRQPOL_ACTIVE_LOW:
+ default: /* Pointless default required due to do gcc stupidity */
+ return true;
}
- return polarity;
}
-static int irq_trigger(int idx)
-{
- int bus = mp_irqs[idx].srcbus;
- int trigger;
-
- /*
- * Determine IRQ trigger mode (edge or level sensitive):
- */
- switch ((mp_irqs[idx].irqflag>>2) & 3)
- {
- case 0: /* conforms, ie. bus-type dependent */
- if (test_bit(bus, mp_bus_not_pci))
- trigger = default_ISA_trigger(idx);
- else
- trigger = default_PCI_trigger(idx);
#ifdef CONFIG_EISA
- switch (mp_bus_id_to_type[bus]) {
- case MP_BUS_ISA: /* ISA pin */
- {
- /* set before the switch */
- break;
- }
- case MP_BUS_EISA: /* EISA pin */
- {
- trigger = default_EISA_trigger(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- /* set before the switch */
- break;
- }
- default:
- {
- pr_warn("broken BIOS!!\n");
- trigger = 1;
- break;
- }
- }
-#endif
- break;
- case 1: /* edge */
- {
- trigger = 0;
- break;
- }
- case 2: /* reserved */
- {
- pr_warn("broken BIOS!!\n");
- trigger = 1;
- break;
- }
- case 3: /* level */
- {
- trigger = 1;
- break;
- }
- default: /* invalid */
- {
- pr_warn("broken BIOS!!\n");
- trigger = 0;
- break;
- }
- }
- return trigger;
-}
-
-static int pin_2_irq(int idx, int apic, int pin)
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static bool EISA_ELCR(unsigned int irq)
{
- int irq;
- int bus = mp_irqs[idx].srcbus;
- struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(apic);
-
- /*
- * Debugging check, we are in big trouble if this message pops up!
- */
- if (mp_irqs[idx].dstirq != pin)
- pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
-
- if (test_bit(bus, mp_bus_not_pci)) {
- irq = mp_irqs[idx].srcbusirq;
- } else {
- u32 gsi = gsi_cfg->gsi_base + pin;
-
- if (gsi >= NR_IRQS_LEGACY)
- irq = gsi;
- else
- irq = gsi_top + gsi;
- }
-
-#ifdef CONFIG_X86_32
- /*
- * PCI IRQ command line redirection. Yes, limits are hardcoded.
- */
- if ((pin >= 16) && (pin <= 23)) {
- if (pirq_entries[pin-16] != -1) {
- if (!pirq_entries[pin-16]) {
- apic_printk(APIC_VERBOSE, KERN_DEBUG
- "disabling PIRQ%d\n", pin-16);
- } else {
- irq = pirq_entries[pin-16];
- apic_printk(APIC_VERBOSE, KERN_DEBUG
- "using PIRQ%d -> IRQ %d\n",
- pin-16, irq);
- }
- }
+ if (irq < nr_legacy_irqs()) {
+ unsigned int port = PIC_ELCR1 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
}
-#endif
-
- return irq;
+ apic_pr_verbose("Broken MPtable reports ISA irq %d\n", irq);
+ return false;
}
/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
+ * EISA interrupts are always active high and can be edge or level
+ * triggered depending on the ELCR value. If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must be
+ * read in from the ELCR.
*/
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
- struct io_apic_irq_attr *irq_attr)
+static bool eisa_irq_is_level(int idx, int bus, bool level)
{
- int ioapic_idx, i, best_guess = -1;
-
- apic_printk(APIC_DEBUG,
- "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
- bus, slot, pin);
- if (test_bit(bus, mp_bus_not_pci)) {
- apic_printk(APIC_VERBOSE,
- "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
- return -1;
- }
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].srcbus;
-
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
- if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
- mp_irqs[i].dstapic == MP_APIC_ALL)
- break;
-
- if (!test_bit(lbus, mp_bus_not_pci) &&
- !mp_irqs[i].irqtype &&
- (bus == lbus) &&
- (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq);
-
- if (!(ioapic_idx || IO_APIC_IRQ(irq)))
- continue;
-
- if (pin == (mp_irqs[i].srcbusirq & 3)) {
- set_io_apic_irq_attr(irq_attr, ioapic_idx,
- mp_irqs[i].dstirq,
- irq_trigger(i),
- irq_polarity(i));
- return irq;
- }
- /*
- * Use the first all-but-pin matching entry as a
- * best-guess fuzzy result for broken mptables.
- */
- if (best_guess < 0) {
- set_io_apic_irq_attr(irq_attr, ioapic_idx,
- mp_irqs[i].dstirq,
- irq_trigger(i),
- irq_polarity(i));
- best_guess = irq;
- }
- }
+ switch (mp_bus_id_to_type[bus]) {
+ case MP_BUS_PCI:
+ case MP_BUS_ISA:
+ return level;
+ case MP_BUS_EISA:
+ return EISA_ELCR(mp_irqs[idx].srcbusirq);
}
- return best_guess;
+ pr_warn("IOAPIC: Invalid srcbus: %d defaulting to level\n", bus);
+ return true;
}
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
-void lock_vector_lock(void)
+#else
+static inline int eisa_irq_is_level(int idx, int bus, bool level)
{
- /* Used to the online set of cpus does not change
- * during assign_irq_vector.
- */
- raw_spin_lock(&vector_lock);
+ return level;
}
+#endif
-void unlock_vector_lock(void)
+static bool irq_is_level(int idx)
{
- raw_spin_unlock(&vector_lock);
-}
+ int bus = mp_irqs[idx].srcbus;
+ bool level;
-static int
-__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
-{
/*
- * NOTE! The local APIC isn't very good at handling
- * multiple interrupts at the same interrupt level.
- * As the interrupt level is determined by taking the
- * vector number and shifting that right by 4, we
- * want to spread these out a bit so that they don't
- * all fall in the same interrupt level.
- *
- * Also, we've got to be careful not to trash gate
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
+ * Determine IRQ trigger mode (edge or level sensitive):
*/
- static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
- static int current_offset = VECTOR_OFFSET_START % 16;
- int cpu, err;
- cpumask_var_t tmp_mask;
-
- if (cfg->move_in_progress)
- return -EBUSY;
-
- if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
- return -ENOMEM;
-
- /* Only try and allocate irqs on cpus that are present */
- err = -ENOSPC;
- cpumask_clear(cfg->old_domain);
- cpu = cpumask_first_and(mask, cpu_online_mask);
- while (cpu < nr_cpu_ids) {
- int new_cpu, vector, offset;
-
- apic->vector_allocation_domain(cpu, tmp_mask, mask);
-
- if (cpumask_subset(tmp_mask, cfg->domain)) {
- err = 0;
- if (cpumask_equal(tmp_mask, cfg->domain))
- break;
- /*
- * New cpumask using the vector is a proper subset of
- * the current in use mask. So cleanup the vector
- * allocation for the members that are not used anymore.
- */
- cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
- cfg->move_in_progress =
- cpumask_intersects(cfg->old_domain, cpu_online_mask);
- cpumask_and(cfg->domain, cfg->domain, tmp_mask);
- break;
- }
-
- vector = current_vector;
- offset = current_offset;
-next:
- vector += 16;
- if (vector >= first_system_vector) {
- offset = (offset + 1) % 16;
- vector = FIRST_EXTERNAL_VECTOR + offset;
- }
-
- if (unlikely(current_vector == vector)) {
- cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
- cpumask_andnot(tmp_mask, mask, cfg->old_domain);
- cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
- continue;
- }
-
- if (test_bit(vector, used_vectors))
- goto next;
-
- for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
- if (per_cpu(vector_irq, new_cpu)[vector] != -1)
- goto next;
- /* Found one! */
- current_vector = vector;
- current_offset = offset;
- if (cfg->vector) {
- cpumask_copy(cfg->old_domain, cfg->domain);
- cfg->move_in_progress =
- cpumask_intersects(cfg->old_domain, cpu_online_mask);
- }
- for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
- per_cpu(vector_irq, new_cpu)[vector] = irq;
- cfg->vector = vector;
- cpumask_copy(cfg->domain, tmp_mask);
- err = 0;
- break;
+ switch (mp_irqs[idx].irqflag & MP_IRQTRIG_MASK) {
+ case MP_IRQTRIG_DEFAULT:
+ /*
+ * Conforms to spec, ie. bus-type dependent trigger
+ * mode. PCI defaults to level, ISA to edge.
+ */
+ level = !test_bit(bus, mp_bus_not_pci);
+ /* Take EISA into account */
+ return eisa_irq_is_level(idx, bus, level);
+ case MP_IRQTRIG_EDGE:
+ return false;
+ case MP_IRQTRIG_RESERVED:
+ pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n");
+ fallthrough;
+ case MP_IRQTRIG_LEVEL:
+ default: /* Pointless default required due to do gcc stupidity */
+ return true;
}
- free_cpumask_var(tmp_mask);
- return err;
}
-int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+static int __acpi_get_override_irq(u32 gsi, bool *trigger, bool *polarity)
{
- int err;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&vector_lock, flags);
- err = __assign_irq_vector(irq, cfg, mask);
- raw_spin_unlock_irqrestore(&vector_lock, flags);
- return err;
-}
+ int ioapic, pin, idx;
-static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
-{
- int cpu, vector;
+ if (ioapic_is_disabled)
+ return -1;
- BUG_ON(!cfg->vector);
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return -1;
- vector = cfg->vector;
- for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
- per_cpu(vector_irq, cpu)[vector] = -1;
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+ if (pin < 0)
+ return -1;
- cfg->vector = 0;
- cpumask_clear(cfg->domain);
+ idx = find_irq_entry(ioapic, pin, mp_INT);
+ if (idx < 0)
+ return -1;
- if (likely(!cfg->move_in_progress))
- return;
- for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
- for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
- vector++) {
- if (per_cpu(vector_irq, cpu)[vector] != irq)
- continue;
- per_cpu(vector_irq, cpu)[vector] = -1;
- break;
- }
- }
- cfg->move_in_progress = 0;
+ *trigger = irq_is_level(idx);
+ *polarity = irq_active_low(idx);
+ return 0;
}
-void __setup_vector_irq(int cpu)
+#ifdef CONFIG_ACPI
+int acpi_get_override_irq(u32 gsi, int *is_level, int *active_low)
{
- /* Initialize vector_irq on a new cpu */
- int irq, vector;
- struct irq_cfg *cfg;
-
- /*
- * vector_lock will make sure that we don't run into irq vector
- * assignments that might be happening on another cpu in parallel,
- * while we setup our initial vector to irq mappings.
- */
- raw_spin_lock(&vector_lock);
- /* Mark the inuse vectors */
- for_each_active_irq(irq) {
- cfg = irq_get_chip_data(irq);
- if (!cfg)
- continue;
-
- if (!cpumask_test_cpu(cpu, cfg->domain))
- continue;
- vector = cfg->vector;
- per_cpu(vector_irq, cpu)[vector] = irq;
- }
- /* Mark the free vectors */
- for (vector = 0; vector < NR_VECTORS; ++vector) {
- irq = per_cpu(vector_irq, cpu)[vector];
- if (irq < 0)
- continue;
-
- cfg = irq_cfg(irq);
- if (!cpumask_test_cpu(cpu, cfg->domain))
- per_cpu(vector_irq, cpu)[vector] = -1;
- }
- raw_spin_unlock(&vector_lock);
+ *is_level = *active_low = 0;
+ return __acpi_get_override_irq(gsi, (bool *)is_level,
+ (bool *)active_low);
}
+#endif
-static struct irq_chip ioapic_chip;
+void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node,
+ int trigger, int polarity)
+{
+ init_irq_alloc_info(info, NULL);
+ info->type = X86_IRQ_ALLOC_TYPE_IOAPIC;
+ info->ioapic.node = node;
+ info->ioapic.is_level = trigger;
+ info->ioapic.active_low = polarity;
+ info->ioapic.valid = 1;
+}
-#ifdef CONFIG_X86_32
-static inline int IO_APIC_irq_trigger(int irq)
+static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst,
+ struct irq_alloc_info *src,
+ u32 gsi, int ioapic_idx, int pin)
{
- int apic, idx, pin;
+ bool level, pol_low;
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
- idx = find_irq_entry(apic, pin, mp_INT);
- if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
- return irq_trigger(idx);
+ copy_irq_alloc_info(dst, src);
+ dst->type = X86_IRQ_ALLOC_TYPE_IOAPIC;
+ dst->devid = mpc_ioapic_id(ioapic_idx);
+ dst->ioapic.pin = pin;
+ dst->ioapic.valid = 1;
+ if (src && src->ioapic.valid) {
+ dst->ioapic.node = src->ioapic.node;
+ dst->ioapic.is_level = src->ioapic.is_level;
+ dst->ioapic.active_low = src->ioapic.active_low;
+ } else {
+ dst->ioapic.node = NUMA_NO_NODE;
+ if (__acpi_get_override_irq(gsi, &level, &pol_low) >= 0) {
+ dst->ioapic.is_level = level;
+ dst->ioapic.active_low = pol_low;
+ } else {
+ /*
+ * PCI interrupts are always active low level
+ * triggered.
+ */
+ dst->ioapic.is_level = true;
+ dst->ioapic.active_low = true;
}
}
- /*
- * nonexistent IRQs are edge default
- */
- return 0;
}
-#else
-static inline int IO_APIC_irq_trigger(int irq)
+
+static int ioapic_alloc_attr_node(struct irq_alloc_info *info)
{
- return 1;
+ return (info && info->ioapic.valid) ? info->ioapic.node : NUMA_NO_NODE;
}
-#endif
-static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
- unsigned long trigger)
+static void mp_register_handler(unsigned int irq, bool level)
{
- struct irq_chip *chip = &ioapic_chip;
irq_flow_handler_t hdl;
bool fasteoi;
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL) {
+ if (level) {
irq_set_status_flags(irq, IRQ_LEVEL);
fasteoi = true;
} else {
@@ -1280,290 +855,347 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
fasteoi = false;
}
- if (setup_remapped_irq(irq, cfg, chip))
- fasteoi = trigger != 0;
-
hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
- irq_set_chip_and_handler_name(irq, chip, hdl,
- fasteoi ? "fasteoi" : "edge");
+ __irq_set_handler(irq, hdl, 0, fasteoi ? "fasteoi" : "edge");
}
-int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
- unsigned int destination, int vector,
- struct io_apic_irq_attr *attr)
+static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info)
{
- memset(entry, 0, sizeof(*entry));
-
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->dest = destination;
- entry->vector = vector;
- entry->mask = 0; /* enable IRQ */
- entry->trigger = attr->trigger;
- entry->polarity = attr->polarity;
+ struct mp_chip_data *data = irq_get_chip_data(irq);
/*
- * Mask level triggered irqs.
- * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+ * setup_IO_APIC_irqs() programs all legacy IRQs with default trigger
+ * and polarity attributes. So allow the first user to reprogram the
+ * pin with real trigger and polarity attributes.
*/
- if (attr->trigger)
- entry->mask = 1;
+ if (irq < nr_legacy_irqs() && data->count == 1) {
+ if (info->ioapic.is_level != data->is_level)
+ mp_register_handler(irq, info->ioapic.is_level);
+ data->entry.is_level = data->is_level = info->ioapic.is_level;
+ data->entry.active_low = data->active_low = info->ioapic.active_low;
+ }
- return 0;
+ return data->is_level == info->ioapic.is_level &&
+ data->active_low == info->ioapic.active_low;
}
-static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
- struct io_apic_irq_attr *attr)
+static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
+ struct irq_alloc_info *info)
{
- struct IO_APIC_route_entry entry;
- unsigned int dest;
-
- if (!IO_APIC_IRQ(irq))
- return;
-
- if (assign_irq_vector(irq, cfg, apic->target_cpus()))
- return;
-
- if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(),
- &dest)) {
- pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n",
- mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
- __clear_irq_vector(irq, cfg);
-
- return;
- }
+ int type = ioapics[ioapic].irqdomain_cfg.type;
+ bool legacy = false;
+ int irq = -1;
- apic_printk(APIC_VERBOSE,KERN_DEBUG
- "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
- "IRQ %d Mode:%i Active:%i Dest:%d)\n",
- attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
- cfg->vector, irq, attr->trigger, attr->polarity, dest);
-
- if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
- pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
- mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
- __clear_irq_vector(irq, cfg);
-
- return;
+ switch (type) {
+ case IOAPIC_DOMAIN_LEGACY:
+ /*
+ * Dynamically allocate IRQ number for non-ISA IRQs in the first
+ * 16 GSIs on some weird platforms.
+ */
+ if (!ioapic_initialized || gsi >= nr_legacy_irqs())
+ irq = gsi;
+ legacy = mp_is_legacy_irq(irq);
+ break;
+ case IOAPIC_DOMAIN_STRICT:
+ irq = gsi;
+ break;
+ case IOAPIC_DOMAIN_DYNAMIC:
+ break;
+ default:
+ WARN(1, "ioapic: unknown irqdomain type %d\n", type);
+ return -1;
}
- ioapic_register_intr(irq, cfg, attr->trigger);
- if (irq < legacy_pic->nr_legacy_irqs)
- legacy_pic->mask(irq);
-
- ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry);
+ return __irq_domain_alloc_irqs(domain, irq, 1, ioapic_alloc_attr_node(info),
+ info, legacy, NULL);
}
-static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin)
+/*
+ * Need special handling for ISA IRQs because there may be multiple IOAPIC pins
+ * sharing the same ISA IRQ number and irqdomain only supports 1:1 mapping
+ * between IOAPIC pin and IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are
+ * used for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H).
+ * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are available, and
+ * some BIOSes may use MP Interrupt Source records to override IRQ numbers for
+ * PIRQs instead of reprogramming the interrupt routing logic. Thus there may be
+ * multiple pins sharing the same legacy IRQ number when ACPI is disabled.
+ */
+static int alloc_isa_irq_from_domain(struct irq_domain *domain, int irq, int ioapic, int pin,
+ struct irq_alloc_info *info)
{
- if (idx != -1)
- return false;
+ struct irq_data *irq_data = irq_get_irq_data(irq);
+ int node = ioapic_alloc_attr_node(info);
+ struct mp_chip_data *data;
- apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
- mpc_ioapic_id(ioapic_idx), pin);
- return true;
+ /*
+ * Legacy ISA IRQ has already been allocated, just add pin to
+ * the pin list associated with this IRQ and program the IOAPIC
+ * entry.
+ */
+ if (irq_data && irq_data->parent_data) {
+ if (!mp_check_pin_attr(irq, info))
+ return -EBUSY;
+ if (!add_pin_to_irq_node(irq_data->chip_data, node, ioapic, info->ioapic.pin))
+ return -ENOMEM;
+ } else {
+ info->flags |= X86_IRQ_ALLOC_LEGACY;
+ irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, NULL);
+ if (irq >= 0) {
+ irq_data = irq_domain_get_irq_data(domain, irq);
+ data = irq_data->chip_data;
+ data->isa_irq = true;
+ }
+ }
+
+ return irq;
}
-static void __init __io_apic_setup_irqs(unsigned int ioapic_idx)
+static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
+ unsigned int flags, struct irq_alloc_info *info)
{
- int idx, node = cpu_to_node(0);
- struct io_apic_irq_attr attr;
- unsigned int pin, irq;
-
- for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) {
- idx = find_irq_entry(ioapic_idx, pin, mp_INT);
- if (io_apic_pin_not_connected(idx, ioapic_idx, pin))
- continue;
-
- irq = pin_2_irq(idx, ioapic_idx, pin);
+ struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
+ struct irq_alloc_info tmp;
+ struct mp_chip_data *data;
+ bool legacy = false;
+ int irq;
- if ((ioapic_idx > 0) && (irq > 16))
- continue;
+ if (!domain)
+ return -ENOSYS;
+ if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
+ irq = mp_irqs[idx].srcbusirq;
+ legacy = mp_is_legacy_irq(irq);
/*
- * Skip the timer IRQ if there's a quirk handler
- * installed and if it returns 1:
+ * IRQ2 is unusable for historical reasons on systems which
+ * have a legacy PIC. See the comment vs. IRQ2 further down.
+ *
+ * If this gets removed at some point then the related code
+ * in lapic_assign_system_vectors() needs to be adjusted as
+ * well.
*/
- if (apic->multi_timer_check &&
- apic->multi_timer_check(ioapic_idx, irq))
- continue;
-
- set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
- irq_polarity(idx));
+ if (legacy && irq == PIC_CASCADE_IR)
+ return -EINVAL;
+ }
- io_apic_setup_irq_pin(irq, node, &attr);
+ guard(mutex)(&ioapic_mutex);
+ if (!(flags & IOAPIC_MAP_ALLOC)) {
+ if (!legacy) {
+ irq = irq_find_mapping(domain, pin);
+ if (irq == 0)
+ irq = -ENOENT;
+ }
+ } else {
+ ioapic_copy_alloc_attr(&tmp, info, gsi, ioapic, pin);
+ if (legacy)
+ irq = alloc_isa_irq_from_domain(domain, irq,
+ ioapic, pin, &tmp);
+ else if ((irq = irq_find_mapping(domain, pin)) == 0)
+ irq = alloc_irq_from_domain(domain, ioapic, gsi, &tmp);
+ else if (!mp_check_pin_attr(irq, &tmp))
+ irq = -EBUSY;
+ if (irq >= 0) {
+ data = irq_get_chip_data(irq);
+ data->count++;
+ }
}
+ return irq;
}
-static void __init setup_IO_APIC_irqs(void)
+static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags)
{
- unsigned int ioapic_idx;
+ u32 gsi = mp_pin_to_gsi(ioapic, pin);
+
+ /* Debugging check, we are in big trouble if this message pops up! */
+ if (mp_irqs[idx].dstirq != pin)
+ pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
+
+#ifdef CONFIG_X86_32
+ /* PCI IRQ command line redirection. Yes, limits are hardcoded. */
+ if ((pin >= 16) && (pin <= 23)) {
+ if (pirq_entries[pin - 16] != -1) {
+ if (!pirq_entries[pin - 16]) {
+ apic_pr_verbose("Disabling PIRQ%d\n", pin - 16);
+ } else {
+ int irq = pirq_entries[pin-16];
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+ apic_pr_verbose("Using PIRQ%d -> IRQ %d\n", pin - 16, irq);
+ return irq;
+ }
+ }
+ }
+#endif
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
- __io_apic_setup_irqs(ioapic_idx);
+ return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, NULL);
}
-/*
- * for the gsit that is not in first ioapic
- * but could not use acpi_register_gsi()
- * like some special sci in IBM x3330
- */
-void setup_IO_APIC_irq_extra(u32 gsi)
+int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info)
{
- int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0);
- struct io_apic_irq_attr attr;
+ int ioapic, pin, idx;
- /*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic_idx = mp_find_ioapic(gsi);
- if (ioapic_idx < 0)
- return;
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return -ENODEV;
- pin = mp_find_ioapic_pin(ioapic_idx, gsi);
- idx = find_irq_entry(ioapic_idx, pin, mp_INT);
- if (idx == -1)
- return;
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+ idx = find_irq_entry(ioapic, pin, mp_INT);
+ if ((flags & IOAPIC_MAP_CHECK) && idx < 0)
+ return -ENODEV;
- irq = pin_2_irq(idx, ioapic_idx, pin);
+ return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, info);
+}
- /* Only handle the non legacy irqs on secondary ioapics */
- if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY)
+void mp_unmap_irq(int irq)
+{
+ struct irq_data *irq_data = irq_get_irq_data(irq);
+ struct mp_chip_data *data;
+
+ if (!irq_data || !irq_data->domain)
return;
- set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
- irq_polarity(idx));
+ data = irq_data->chip_data;
+ if (!data || data->isa_irq)
+ return;
- io_apic_setup_irq_pin_once(irq, node, &attr);
+ guard(mutex)(&ioapic_mutex);
+ if (--data->count == 0)
+ irq_domain_free_irqs(irq, 1);
}
/*
- * Set up the timer pin, possibly with the 8259A-master behind.
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
*/
-static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
- unsigned int pin, int vector)
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
{
- struct IO_APIC_route_entry entry;
- unsigned int dest;
+ int irq, i, best_ioapic = -1, best_idx = -1;
- memset(&entry, 0, sizeof(entry));
+ apic_pr_debug("Querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+ bus, slot, pin);
+ if (test_bit(bus, mp_bus_not_pci)) {
+ apic_pr_verbose("PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+ return -1;
+ }
- /*
- * We use logical delivery to get the timer IRQ
- * to the first CPU.
- */
- if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(),
- apic->target_cpus(), &dest)))
- dest = BAD_APICID;
-
- entry.dest_mode = apic->irq_dest_mode;
- entry.mask = 0; /* don't mask IRQ for edge */
- entry.dest = dest;
- entry.delivery_mode = apic->irq_delivery_mode;
- entry.polarity = 0;
- entry.trigger = 0;
- entry.vector = vector;
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].srcbus;
+ int ioapic_idx, found = 0;
- /*
- * The timer IRQ doesn't have to know that behind the
- * scene we may have a 8259A-master in AEOI mode ...
- */
- irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
- "edge");
+ if (bus != lbus || mp_irqs[i].irqtype != mp_INT ||
+ slot != ((mp_irqs[i].srcbusirq >> 2) & 0x1f))
+ continue;
- /*
- * Add it to the IO-APIC irq-routing table:
- */
- ioapic_write_entry(ioapic_idx, pin, entry);
-}
+ for_each_ioapic(ioapic_idx)
+ if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
+ mp_irqs[i].dstapic == MP_APIC_ALL) {
+ found = 1;
+ break;
+ }
+ if (!found)
+ continue;
-void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
-{
- int i;
+ /* Skip ISA IRQs */
+ irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq, 0);
+ if (irq > 0 && !IO_APIC_IRQ(irq))
+ continue;
- pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n");
+ if (pin == (mp_irqs[i].srcbusirq & 3)) {
+ best_idx = i;
+ best_ioapic = ioapic_idx;
+ goto out;
+ }
- for (i = 0; i <= nr_entries; i++) {
- struct IO_APIC_route_entry entry;
+ /*
+ * Use the first all-but-pin matching entry as a
+ * best-guess fuzzy result for broken mptables.
+ */
+ if (best_idx < 0) {
+ best_idx = i;
+ best_ioapic = ioapic_idx;
+ }
+ }
+ if (best_idx < 0)
+ return -1;
- entry = ioapic_read_entry(apic, i);
+out:
+ return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, IOAPIC_MAP_ALLOC);
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+static struct irq_chip ioapic_chip, ioapic_ir_chip;
+
+static void __init setup_IO_APIC_irqs(void)
+{
+ unsigned int ioapic, pin;
+ int idx;
- pr_debug(" %02x %02X ", i, entry.dest);
- pr_cont("%1d %1d %1d %1d %1d "
- "%1d %1d %02X\n",
- entry.mask,
- entry.trigger,
- entry.irr,
- entry.polarity,
- entry.delivery_status,
- entry.dest_mode,
- entry.delivery_mode,
- entry.vector);
+ apic_pr_verbose("Init IO_APIC IRQs\n");
+
+ for_each_ioapic_pin(ioapic, pin) {
+ idx = find_irq_entry(ioapic, pin, mp_INT);
+ if (idx < 0) {
+ apic_pr_verbose("apic %d pin %d not connected\n",
+ mpc_ioapic_id(ioapic), pin);
+ } else {
+ pin_2_irq(idx, ioapic, pin, ioapic ? 0 : IOAPIC_MAP_ALLOC);
+ }
}
}
-void intel_ir_io_apic_print_entries(unsigned int apic,
- unsigned int nr_entries)
+void ioapic_zap_locks(void)
{
- int i;
+ raw_spin_lock_init(&ioapic_lock);
+}
- pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n");
+static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
+{
+ struct IO_APIC_route_entry entry;
+ char buf[256];
+ int i;
+ apic_dbg("IOAPIC %d:\n", apic);
for (i = 0; i <= nr_entries; i++) {
- struct IR_IO_APIC_route_entry *ir_entry;
- struct IO_APIC_route_entry entry;
-
entry = ioapic_read_entry(apic, i);
-
- ir_entry = (struct IR_IO_APIC_route_entry *)&entry;
-
- pr_debug(" %02x %04X ", i, ir_entry->index);
- pr_cont("%1d %1d %1d %1d %1d "
- "%1d %1d %X %02X\n",
- ir_entry->format,
- ir_entry->mask,
- ir_entry->trigger,
- ir_entry->irr,
- ir_entry->polarity,
- ir_entry->delivery_status,
- ir_entry->index2,
- ir_entry->zero,
- ir_entry->vector);
+ snprintf(buf, sizeof(buf), " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)",
+ i, entry.masked ? "disabled" : "enabled ",
+ entry.is_level ? "level" : "edge ",
+ entry.active_low ? "low " : "high",
+ entry.vector, entry.irr, entry.delivery_status);
+ if (entry.ir_format) {
+ apic_dbg("%s, remapped, I(%04X), Z(%X)\n", buf,
+ (entry.ir_index_15 << 15) | entry.ir_index_0_14, entry.ir_zero);
+ } else {
+ apic_dbg("%s, %s, D(%02X%02X), M(%1d)\n", buf,
+ entry.dest_mode_logical ? "logical " : "physical",
+ entry.virt_destid_8_14, entry.destid_0_7, entry.delivery_mode);
+ }
}
}
-__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
+static void __init print_IO_APIC(int ioapic_idx)
{
union IO_APIC_reg_00 reg_00;
union IO_APIC_reg_01 reg_01;
union IO_APIC_reg_02 reg_02;
union IO_APIC_reg_03 reg_03;
- unsigned long flags;
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(ioapic_idx, 0);
- reg_01.raw = io_apic_read(ioapic_idx, 1);
- if (reg_01.bits.version >= 0x10)
- reg_02.raw = io_apic_read(ioapic_idx, 2);
- if (reg_01.bits.version >= 0x20)
- reg_03.raw = io_apic_read(ioapic_idx, 3);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
- printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
- printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
- printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
- printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
- printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
-
- printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
- printk(KERN_DEBUG "....... : max redirection entries: %02X\n",
- reg_01.bits.entries);
-
- printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
- printk(KERN_DEBUG "....... : IO APIC version: %02X\n",
- reg_01.bits.version);
+ scoped_guard (raw_spinlock_irqsave, &ioapic_lock) {
+ reg_00.raw = io_apic_read(ioapic_idx, 0);
+ reg_01.raw = io_apic_read(ioapic_idx, 1);
+ if (reg_01.bits.version >= 0x10)
+ reg_02.raw = io_apic_read(ioapic_idx, 2);
+ if (reg_01.bits.version >= 0x20)
+ reg_03.raw = io_apic_read(ioapic_idx, 3);
+ }
+
+ apic_dbg("IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
+ apic_dbg(".... register #00: %08X\n", reg_00.raw);
+ apic_dbg("....... : physical APIC id: %02X\n", reg_00.bits.ID);
+ apic_dbg("....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
+ apic_dbg("....... : LTS : %X\n", reg_00.bits.LTS);
+ apic_dbg(".... register #01: %08X\n", *(int *)&reg_01);
+ apic_dbg("....... : max redirection entries: %02X\n", reg_01.bits.entries);
+ apic_dbg("....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
+ apic_dbg("....... : IO APIC version: %02X\n", reg_01.bits.version);
/*
* Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
@@ -1571,8 +1203,8 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
* value, so ignore it if reg_02 == reg_01.
*/
if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
- printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
- printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
+ apic_dbg(".... register #02: %08X\n", reg_02.raw);
+ apic_dbg("....... : arbitration: %02X\n", reg_02.bits.arbitration);
}
/*
@@ -1582,27 +1214,24 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
*/
if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
reg_03.raw != reg_01.raw) {
- printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
- printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
+ apic_dbg(".... register #03: %08X\n", reg_03.raw);
+ apic_dbg("....... : Boot DT : %X\n", reg_03.bits.boot_DT);
}
- printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
- x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
+ apic_dbg(".... IRQ redirection table:\n");
+ io_apic_print_entries(ioapic_idx, reg_01.bits.entries);
}
-__apicdebuginit(void) print_IO_APICs(void)
+void __init print_IO_APICs(void)
{
int ioapic_idx;
- struct irq_cfg *cfg;
unsigned int irq;
- struct irq_chip *chip;
- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
- printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
- mpc_ioapic_id(ioapic_idx),
- ioapics[ioapic_idx].nr_registers);
+ apic_dbg("number of MP IRQ sources: %d.\n", mp_irq_entries);
+ for_each_ioapic(ioapic_idx) {
+ apic_dbg("number of IO-APIC #%d registers: %d.\n",
+ mpc_ioapic_id(ioapic_idx), ioapics[ioapic_idx].nr_registers);
+ }
/*
* We are a bit conservative about what we expect. We have to
@@ -1610,25 +1239,26 @@ __apicdebuginit(void) print_IO_APICs(void)
*/
printk(KERN_INFO "testing the IO APIC.......................\n");
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ for_each_ioapic(ioapic_idx)
print_IO_APIC(ioapic_idx);
- printk(KERN_DEBUG "IRQ to pin mappings:\n");
+ apic_dbg("IRQ to pin mappings:\n");
for_each_active_irq(irq) {
struct irq_pin_list *entry;
+ struct irq_chip *chip;
+ struct mp_chip_data *data;
chip = irq_get_chip(irq);
- if (chip != &ioapic_chip)
+ if (chip != &ioapic_chip && chip != &ioapic_ir_chip)
continue;
-
- cfg = irq_get_chip_data(irq);
- if (!cfg)
+ data = irq_get_chip_data(irq);
+ if (!data)
continue;
- entry = cfg->irq_2_pin;
- if (!entry)
+ if (list_empty(&data->irq_2_pin))
continue;
- printk(KERN_DEBUG "IRQ%d ", irq);
- for_each_irq_pin(entry, cfg->irq_2_pin)
+
+ apic_dbg("IRQ%d ", irq);
+ for_each_irq_pin(entry, data->irq_2_pin)
pr_cont("-> %d:%d", entry->apic, entry->pin);
pr_cont("\n");
}
@@ -1636,236 +1266,38 @@ __apicdebuginit(void) print_IO_APICs(void)
printk(KERN_INFO ".................................... done.\n");
}
-__apicdebuginit(void) print_APIC_field(int base)
-{
- int i;
-
- printk(KERN_DEBUG);
-
- for (i = 0; i < 8; i++)
- pr_cont("%08x", apic_read(base + i*0x10));
-
- pr_cont("\n");
-}
-
-__apicdebuginit(void) print_local_APIC(void *dummy)
-{
- unsigned int i, v, ver, maxlvt;
- u64 icr;
-
- printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
- smp_processor_id(), hard_smp_processor_id());
- v = apic_read(APIC_ID);
- printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
- v = apic_read(APIC_LVR);
- printk(KERN_INFO "... APIC VERSION: %08x\n", v);
- ver = GET_APIC_VERSION(v);
- maxlvt = lapic_get_maxlvt();
-
- v = apic_read(APIC_TASKPRI);
- printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- if (!APIC_XAPIC(ver)) {
- v = apic_read(APIC_ARBPRI);
- printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
- v & APIC_ARBPRI_MASK);
- }
- v = apic_read(APIC_PROCPRI);
- printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
- }
-
- /*
- * Remote read supported only in the 82489DX and local APIC for
- * Pentium processors.
- */
- if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
- v = apic_read(APIC_RRR);
- printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
- }
-
- v = apic_read(APIC_LDR);
- printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
- if (!x2apic_enabled()) {
- v = apic_read(APIC_DFR);
- printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
- }
- v = apic_read(APIC_SPIV);
- printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
- printk(KERN_DEBUG "... APIC ISR field:\n");
- print_APIC_field(APIC_ISR);
- printk(KERN_DEBUG "... APIC TMR field:\n");
- print_APIC_field(APIC_TMR);
- printk(KERN_DEBUG "... APIC IRR field:\n");
- print_APIC_field(APIC_IRR);
-
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
-
- v = apic_read(APIC_ESR);
- printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
- }
-
- icr = apic_icr_read();
- printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
-
- v = apic_read(APIC_LVTT);
- printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
- if (maxlvt > 3) { /* PC is LVT#4. */
- v = apic_read(APIC_LVTPC);
- printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
- }
- v = apic_read(APIC_LVT0);
- printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
- v = apic_read(APIC_LVT1);
- printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
- if (maxlvt > 2) { /* ERR is LVT#3. */
- v = apic_read(APIC_LVTERR);
- printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
- }
-
- v = apic_read(APIC_TMICT);
- printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
- v = apic_read(APIC_TMCCT);
- printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
- v = apic_read(APIC_TDCR);
- printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
-
- if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
- v = apic_read(APIC_EFEAT);
- maxlvt = (v >> 16) & 0xff;
- printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
- v = apic_read(APIC_ECTRL);
- printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
- for (i = 0; i < maxlvt; i++) {
- v = apic_read(APIC_EILVTn(i));
- printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
- }
- }
- pr_cont("\n");
-}
-
-__apicdebuginit(void) print_local_APICs(int maxcpu)
-{
- int cpu;
-
- if (!maxcpu)
- return;
-
- preempt_disable();
- for_each_online_cpu(cpu) {
- if (cpu >= maxcpu)
- break;
- smp_call_function_single(cpu, print_local_APIC, NULL, 1);
- }
- preempt_enable();
-}
-
-__apicdebuginit(void) print_PIC(void)
-{
- unsigned int v;
- unsigned long flags;
-
- if (!legacy_pic->nr_legacy_irqs)
- return;
-
- printk(KERN_DEBUG "\nprinting PIC contents\n");
-
- raw_spin_lock_irqsave(&i8259A_lock, flags);
-
- v = inb(0xa1) << 8 | inb(0x21);
- printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
-
- v = inb(0xa0) << 8 | inb(0x20);
- printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
-
- outb(0x0b,0xa0);
- outb(0x0b,0x20);
- v = inb(0xa0) << 8 | inb(0x20);
- outb(0x0a,0xa0);
- outb(0x0a,0x20);
-
- raw_spin_unlock_irqrestore(&i8259A_lock, flags);
-
- printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
-
- v = inb(0x4d1) << 8 | inb(0x4d0);
- printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-static int __initdata show_lapic = 1;
-static __init int setup_show_lapic(char *arg)
-{
- int num = -1;
-
- if (strcmp(arg, "all") == 0) {
- show_lapic = CONFIG_NR_CPUS;
- } else {
- get_option(&arg, &num);
- if (num >= 0)
- show_lapic = num;
- }
-
- return 1;
-}
-__setup("show_lapic=", setup_show_lapic);
-
-__apicdebuginit(int) print_ICs(void)
-{
- if (apic_verbosity == APIC_QUIET)
- return 0;
-
- print_PIC();
-
- /* don't print out if apic is not there */
- if (!cpu_has_apic && !apic_from_smp_config())
- return 0;
-
- print_local_APICs(show_lapic);
- print_IO_APICs();
-
- return 0;
-}
-
-late_initcall(print_ICs);
-
-
/* Where if anywhere is the i8259 connect in external int mode */
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
void __init enable_IO_APIC(void)
{
- int i8259_apic, i8259_pin;
- int apic;
+ int i8259_apic, i8259_pin, apic, pin;
- if (!legacy_pic->nr_legacy_irqs)
+ if (ioapic_is_disabled)
+ nr_ioapics = 0;
+
+ if (!nr_legacy_irqs() || !nr_ioapics)
return;
- for(apic = 0; apic < nr_ioapics; apic++) {
- int pin;
+ for_each_ioapic_pin(apic, pin) {
/* See if any of the pins is in ExtINT mode */
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
- struct IO_APIC_route_entry entry;
- entry = ioapic_read_entry(apic, pin);
+ struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin);
- /* If the interrupt line is enabled and in ExtInt mode
- * I have found the pin where the i8259 is connected.
- */
- if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
- ioapic_i8259.apic = apic;
- ioapic_i8259.pin = pin;
- goto found_i8259;
- }
+ /*
+ * If the interrupt line is enabled and in ExtInt mode I
+ * have found the pin where the i8259 is connected.
+ */
+ if (!entry.masked && entry.delivery_mode == APIC_DELIVERY_MODE_EXTINT) {
+ ioapic_i8259.apic = apic;
+ ioapic_i8259.pin = pin;
+ break;
}
}
- found_i8259:
- /* Look to see what if the MP table has reported the ExtINT */
- /* If we could not find the appropriate pin by looking at the ioapic
+
+ /*
+ * Look to see what if the MP table has reported the ExtINT
+ *
+ * If we could not find the appropriate pin by looking at the ioapic
* the i8259 probably is not connected the ioapic but give the
* mptable a chance anyway.
*/
@@ -1873,69 +1305,52 @@ void __init enable_IO_APIC(void)
i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
/* Trust the MP table if nothing is setup in the hardware */
if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
- printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
+ pr_warn("ExtINT not setup in hardware but reported by MP table\n");
ioapic_i8259.pin = i8259_pin;
ioapic_i8259.apic = i8259_apic;
}
/* Complain if the MP table and the hardware disagree */
if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
- (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
- {
- printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
- }
+ (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+ pr_warn("ExtINT in hardware and MP table differ\n");
- /*
- * Do not trust the IO-APIC being empty at bootup
- */
+ /* Do not trust the IO-APIC being empty at bootup */
clear_IO_APIC();
}
-void native_disable_io_apic(void)
+void native_restore_boot_irq_mode(void)
{
/*
- * If the i8259 is routed through an IOAPIC
- * Put that IOAPIC in virtual wire mode
- * so legacy interrupts can be delivered.
+ * If the i8259 is routed through an IOAPIC Put that IOAPIC in
+ * virtual wire mode so legacy interrupts can be delivered.
*/
if (ioapic_i8259.pin != -1) {
struct IO_APIC_route_entry entry;
+ u32 apic_id = read_apic_id();
memset(&entry, 0, sizeof(entry));
- entry.mask = 0; /* Enabled */
- entry.trigger = 0; /* Edge */
- entry.irr = 0;
- entry.polarity = 0; /* High */
- entry.delivery_status = 0;
- entry.dest_mode = 0; /* Physical */
- entry.delivery_mode = dest_ExtINT; /* ExtInt */
- entry.vector = 0;
- entry.dest = read_apic_id();
-
- /*
- * Add it to the IO-APIC irq-routing table:
- */
+ entry.masked = false;
+ entry.is_level = false;
+ entry.active_low = false;
+ entry.dest_mode_logical = false;
+ entry.delivery_mode = APIC_DELIVERY_MODE_EXTINT;
+ entry.destid_0_7 = apic_id & 0xFF;
+ entry.virt_destid_8_14 = apic_id >> 8;
+
+ /* Add it to the IO-APIC irq-routing table */
ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
}
- if (cpu_has_apic || apic_from_smp_config())
+ if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-
}
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
+void restore_boot_irq_mode(void)
{
- /*
- * Clear the IO-APIC before rebooting:
- */
- clear_IO_APIC();
-
- if (!legacy_pic->nr_legacy_irqs)
+ if (!nr_legacy_irqs())
return;
- x86_io_apic_ops.disable();
+ x86_apic_ops.restore();
}
#ifdef CONFIG_X86_32
@@ -1945,37 +1360,34 @@ void disable_IO_APIC(void)
*
* by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
*/
-void __init setup_ioapic_ids_from_mpc_nocheck(void)
+static void __init setup_ioapic_ids_from_mpc_nocheck(void)
{
+ DECLARE_BITMAP(phys_id_present_map, MAX_LOCAL_APIC);
+ const u32 broadcast_id = 0xF;
union IO_APIC_reg_00 reg_00;
- physid_mask_t phys_id_present_map;
- int ioapic_idx;
- int i;
unsigned char old_id;
- unsigned long flags;
+ int ioapic_idx, i;
/*
* This is broken; anything with a real cpu count has to
* circumvent this idiocy regardless.
*/
- apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
+ copy_phys_cpu_present_map(phys_id_present_map);
/*
* Set the IOAPIC ID to the value stored in the MPC table.
*/
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
+ for_each_ioapic(ioapic_idx) {
/* Read the register 0 value */
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(ioapic_idx, 0);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ scoped_guard (raw_spinlock_irqsave, &ioapic_lock)
+ reg_00.raw = io_apic_read(ioapic_idx, 0);
old_id = mpc_ioapic_id(ioapic_idx);
- if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) {
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
- ioapic_idx, mpc_ioapic_id(ioapic_idx));
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
- reg_00.bits.ID);
+ if (mpc_ioapic_id(ioapic_idx) >= broadcast_id) {
+ pr_err(FW_BUG "IO-APIC#%d ID is %d in the MPC table!...\n",
+ ioapic_idx, mpc_ioapic_id(ioapic_idx));
+ pr_err("... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID);
ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;
}
@@ -1984,65 +1396,54 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
* system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
- if (apic->check_apicid_used(&phys_id_present_map,
- mpc_ioapic_id(ioapic_idx))) {
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
- ioapic_idx, mpc_ioapic_id(ioapic_idx));
- for (i = 0; i < get_physical_broadcast(); i++)
- if (!physid_isset(i, phys_id_present_map))
+ if (test_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map)) {
+ pr_err(FW_BUG "IO-APIC#%d ID %d is already used!...\n",
+ ioapic_idx, mpc_ioapic_id(ioapic_idx));
+ for (i = 0; i < broadcast_id; i++)
+ if (!test_bit(i, phys_id_present_map))
break;
- if (i >= get_physical_broadcast())
+ if (i >= broadcast_id)
panic("Max APIC ID exceeded!\n");
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
- i);
- physid_set(i, phys_id_present_map);
+ pr_err("... fixing up to %d. (tell your hw vendor)\n", i);
+ set_bit(i, phys_id_present_map);
ioapics[ioapic_idx].mp_config.apicid = i;
} else {
- physid_mask_t tmp;
- apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx),
- &tmp);
- apic_printk(APIC_VERBOSE, "Setting %d in the "
- "phys_id_present_map\n",
+ apic_pr_verbose("Setting %d in the phys_id_present_map\n",
mpc_ioapic_id(ioapic_idx));
- physids_or(phys_id_present_map, phys_id_present_map, tmp);
+ set_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map);
}
/*
- * We need to adjust the IRQ routing table
- * if the ID changed.
+ * We need to adjust the IRQ routing table if the ID
+ * changed.
*/
- if (old_id != mpc_ioapic_id(ioapic_idx))
- for (i = 0; i < mp_irq_entries; i++)
+ if (old_id != mpc_ioapic_id(ioapic_idx)) {
+ for (i = 0; i < mp_irq_entries; i++) {
if (mp_irqs[i].dstapic == old_id)
- mp_irqs[i].dstapic
- = mpc_ioapic_id(ioapic_idx);
+ mp_irqs[i].dstapic = mpc_ioapic_id(ioapic_idx);
+ }
+ }
/*
- * Update the ID register according to the right value
- * from the MPC table if they are different.
+ * Update the ID register according to the right value from
+ * the MPC table if they are different.
*/
if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID)
continue;
- apic_printk(APIC_VERBOSE, KERN_INFO
- "...changing IO-APIC physical APIC ID to %d ...",
- mpc_ioapic_id(ioapic_idx));
+ apic_pr_verbose("...changing IO-APIC physical APIC ID to %d ...",
+ mpc_ioapic_id(ioapic_idx));
reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic_idx, 0, reg_00.raw);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
- /*
- * Sanity check
- */
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(ioapic_idx, 0);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ scoped_guard (raw_spinlock_irqsave, &ioapic_lock) {
+ io_apic_write(ioapic_idx, 0, reg_00.raw);
+ reg_00.raw = io_apic_read(ioapic_idx, 0);
+ }
+ /* Sanity check */
if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
pr_cont("could not set ID!\n");
else
- apic_printk(APIC_VERBOSE, " ok.\n");
+ apic_pr_verbose(" ok.\n");
}
}
@@ -2056,7 +1457,7 @@ void __init setup_ioapic_ids_from_mpc(void)
* no meaning without the serial APIC bus.
*/
if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ || APIC_XAPIC(boot_cpu_apic_version))
return;
setup_ioapic_ids_from_mpc_nocheck();
}
@@ -2071,6 +1472,42 @@ static int __init notimercheck(char *s)
}
__setup("no_timer_check", notimercheck);
+static void __init delay_with_tsc(void)
+{
+ unsigned long long start, now;
+ unsigned long end = jiffies + 4;
+
+ start = rdtsc();
+
+ /*
+ * We don't know the TSC frequency yet, but waiting for
+ * 40000000000/HZ TSC cycles is safe:
+ * 4 GHz == 10 jiffies
+ * 1 GHz == 40 jiffies
+ */
+ do {
+ native_pause();
+ now = rdtsc();
+ } while ((now - start) < 40000000000ULL / HZ && time_before_eq(jiffies, end));
+}
+
+static void __init delay_without_tsc(void)
+{
+ unsigned long end = jiffies + 4;
+ int band = 1;
+
+ /*
+ * We don't know any frequency yet, but waiting for
+ * 40940000000/HZ cycles is safe:
+ * 4 GHz == 10 jiffies
+ * 1 GHz == 40 jiffies
+ * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094
+ */
+ do {
+ __delay(((1U << band++) * 10000000UL) / HZ);
+ } while (band < 12 && time_before_eq(jiffies, end));
+}
+
/*
* There is a nasty bug in some older SMP boards, their mptable lies
* about the timer IRQ. We do the following to work around the situation:
@@ -2082,16 +1519,15 @@ __setup("no_timer_check", notimercheck);
static int __init timer_irq_works(void)
{
unsigned long t1 = jiffies;
- unsigned long flags;
if (no_timer_check)
return 1;
- local_save_flags(flags);
local_irq_enable();
- /* Let ten ticks pass... */
- mdelay((10 * 1000) / HZ);
- local_irq_restore(flags);
+ if (boot_cpu_has(X86_FEATURE_TSC))
+ delay_with_tsc();
+ else
+ delay_without_tsc();
/*
* Expect a few ticks at least, to be sure some possible
@@ -2101,10 +1537,10 @@ static int __init timer_irq_works(void)
* least one tick may be lost due to delays.
*/
- /* jiffies wrap? */
- if (time_after(jiffies, t1 + 4))
- return 1;
- return 0;
+ local_irq_disable();
+
+ /* Did jiffies advance? */
+ return time_after(jiffies, t1 + 4);
}
/*
@@ -2113,296 +1549,69 @@ static int __init timer_irq_works(void)
* so we 'resend' these IRQs via IPIs, to the same CPU. It's much
* better to do it this way as thus we do not have to be aware of
* 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
-
-/*
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
*
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
+ *
+ * Edge triggered needs to resend any interrupt that was delayed but this
+ * is now handled in the device independent code.
+ *
+ * Starting up a edge-triggered IO-APIC interrupt is nasty - we need to
+ * make sure that we get the edge. If it is already asserted for some
+ * reason, we need return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake an edge even if it
+ * isn't on the 8259A...
*/
-
static unsigned int startup_ioapic_irq(struct irq_data *data)
{
int was_pending = 0, irq = data->irq;
- unsigned long flags;
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < legacy_pic->nr_legacy_irqs) {
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
+ if (irq < nr_legacy_irqs()) {
legacy_pic->mask(irq);
if (legacy_pic->irq_pending(irq))
was_pending = 1;
}
__unmask_ioapic(data->chip_data);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
return was_pending;
}
-static int ioapic_retrigger_irq(struct irq_data *data)
-{
- struct irq_cfg *cfg = data->chip_data;
- unsigned long flags;
- int cpu;
-
- raw_spin_lock_irqsave(&vector_lock, flags);
- cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
- apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
- raw_spin_unlock_irqrestore(&vector_lock, flags);
-
- return 1;
-}
-
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-
-#ifdef CONFIG_SMP
-void send_cleanup_vector(struct irq_cfg *cfg)
-{
- cpumask_var_t cleanup_mask;
-
- if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
- unsigned int i;
- for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
- apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
- } else {
- cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
- apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- free_cpumask_var(cleanup_mask);
- }
- cfg->move_in_progress = 0;
-}
-
-asmlinkage void smp_irq_move_cleanup_interrupt(void)
-{
- unsigned vector, me;
-
- ack_APIC_irq();
- irq_enter();
- exit_idle();
-
- me = smp_processor_id();
- for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
- unsigned int irq;
- unsigned int irr;
- struct irq_desc *desc;
- struct irq_cfg *cfg;
- irq = __this_cpu_read(vector_irq[vector]);
-
- if (irq == -1)
- continue;
-
- desc = irq_to_desc(irq);
- if (!desc)
- continue;
-
- cfg = irq_cfg(irq);
- if (!cfg)
- continue;
-
- raw_spin_lock(&desc->lock);
-
- /*
- * Check if the irq migration is in progress. If so, we
- * haven't received the cleanup request yet for this irq.
- */
- if (cfg->move_in_progress)
- goto unlock;
-
- if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
- goto unlock;
-
- irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
- /*
- * Check if the vector that needs to be cleanedup is
- * registered at the cpu's IRR. If so, then this is not
- * the best time to clean it up. Lets clean it up in the
- * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
- * to myself.
- */
- if (irr & (1 << (vector % 32))) {
- apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
- goto unlock;
- }
- __this_cpu_write(vector_irq[vector], -1);
-unlock:
- raw_spin_unlock(&desc->lock);
- }
-
- irq_exit();
-}
-
-static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
-{
- unsigned me;
-
- if (likely(!cfg->move_in_progress))
- return;
-
- me = smp_processor_id();
-
- if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
- send_cleanup_vector(cfg);
-}
-
-static void irq_complete_move(struct irq_cfg *cfg)
-{
- __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
-}
-
-void irq_force_complete_move(int irq)
-{
- struct irq_cfg *cfg = irq_get_chip_data(irq);
-
- if (!cfg)
- return;
-
- __irq_complete_move(cfg, cfg->vector);
-}
-#else
-static inline void irq_complete_move(struct irq_cfg *cfg) { }
-#endif
-
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
- int apic, pin;
- struct irq_pin_list *entry;
- u8 vector = cfg->vector;
-
- for_each_irq_pin(entry, cfg->irq_2_pin) {
- unsigned int reg;
-
- apic = entry->apic;
- pin = entry->pin;
-
- io_apic_write(apic, 0x11 + pin*2, dest);
- reg = io_apic_read(apic, 0x10 + pin*2);
- reg &= ~IO_APIC_REDIR_VECTOR_MASK;
- reg |= vector;
- io_apic_modify(apic, 0x10 + pin*2, reg);
- }
-}
-
-/*
- * Either sets data->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
- * leaves data->affinity untouched.
- */
-int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- unsigned int *dest_id)
-{
- struct irq_cfg *cfg = data->chip_data;
- unsigned int irq = data->irq;
- int err;
-
- if (!config_enabled(CONFIG_SMP))
- return -1;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return -EINVAL;
-
- err = assign_irq_vector(irq, cfg, mask);
- if (err)
- return err;
-
- err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
- if (err) {
- if (assign_irq_vector(irq, cfg, data->affinity))
- pr_err("Failed to recover vector for irq %d\n", irq);
- return err;
- }
-
- cpumask_copy(data->affinity, mask);
-
- return 0;
-}
-
-
-int native_ioapic_set_affinity(struct irq_data *data,
- const struct cpumask *mask,
- bool force)
-{
- unsigned int dest, irq = data->irq;
- unsigned long flags;
- int ret;
-
- if (!config_enabled(CONFIG_SMP))
- return -1;
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- ret = __ioapic_set_affinity(data, mask, &dest);
- if (!ret) {
- /* Only the high 8 bits are valid. */
- dest = SET_APIC_LOGICAL_ID(dest);
- __target_IO_APIC_irq(irq, dest, data->chip_data);
- ret = IRQ_SET_MASK_OK_NOCOPY;
- }
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
- return ret;
-}
-
-static void ack_apic_edge(struct irq_data *data)
-{
- irq_complete_move(data->chip_data);
- irq_move_irq(data);
- ack_APIC_irq();
-}
-
atomic_t irq_mis_count;
#ifdef CONFIG_GENERIC_PENDING_IRQ
-static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
+static bool io_apic_level_ack_pending(struct mp_chip_data *data)
{
struct irq_pin_list *entry;
- unsigned long flags;
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- for_each_irq_pin(entry, cfg->irq_2_pin) {
- unsigned int reg;
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
+ for_each_irq_pin(entry, data->irq_2_pin) {
+ struct IO_APIC_route_entry e;
int pin;
pin = entry->pin;
- reg = io_apic_read(entry->apic, 0x10 + pin*2);
+ e.w1 = io_apic_read(entry->apic, 0x10 + pin*2);
/* Is the remote IRR bit set? */
- if (reg & IO_APIC_REDIR_REMOTE_IRR) {
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (e.irr)
return true;
- }
}
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
return false;
}
-static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
+static inline bool ioapic_prepare_move(struct irq_data *data)
{
- /* If we are moving the irq we need to mask it */
+ /* If we are moving the IRQ we need to mask it */
if (unlikely(irqd_is_setaffinity_pending(data))) {
- mask_ioapic(cfg);
+ if (!irqd_irq_masked(data))
+ mask_ioapic_irq(data);
return true;
}
return false;
}
-static inline void ioapic_irqd_unmask(struct irq_data *data,
- struct irq_cfg *cfg, bool masked)
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
{
- if (unlikely(masked)) {
- /* Only migrate the irq if the ack has been received.
+ if (unlikely(moveit)) {
+ /*
+ * Only migrate the irq if the ack has been received.
*
* On rare occasions the broadcast level triggered ack gets
* delayed going to ioapics, and if we reprogram the
@@ -2421,38 +1630,39 @@ static inline void ioapic_irqd_unmask(struct irq_data *data,
* with masking the ioapic entry and then polling until
* Remote IRR was clear before reprogramming the
* ioapic I don't trust the Remote IRR bit to be
- * completey accurate.
+ * completely accurate.
*
* However there appears to be no other way to plug
* this race, so if the Remote IRR bit is not
* accurate and is causing problems then it is a hardware bug
* and you can go talk to the chipset vendor about it.
*/
- if (!io_apic_level_ack_pending(cfg))
+ if (!io_apic_level_ack_pending(data->chip_data))
irq_move_masked_irq(data);
- unmask_ioapic(cfg);
+ /* If the IRQ is masked in the core, leave it: */
+ if (!irqd_irq_masked(data))
+ unmask_ioapic_irq(data);
}
}
#else
-static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
+static inline bool ioapic_prepare_move(struct irq_data *data)
{
return false;
}
-static inline void ioapic_irqd_unmask(struct irq_data *data,
- struct irq_cfg *cfg, bool masked)
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
{
}
#endif
-static void ack_apic_level(struct irq_data *data)
+static void ioapic_ack_level(struct irq_data *irq_data)
{
- struct irq_cfg *cfg = data->chip_data;
- int i, irq = data->irq;
+ struct irq_cfg *cfg = irqd_cfg(irq_data);
unsigned long v;
- bool masked;
+ bool moveit;
+ int i;
irq_complete_move(cfg);
- masked = ioapic_irqd_mask(data, cfg);
+ moveit = ioapic_prepare_move(irq_data);
/*
* It appears there is an erratum which affects at least version 0x11
@@ -2493,22 +1703,152 @@ static void ack_apic_level(struct irq_data *data)
* We must acknowledge the irq before we move it or the acknowledge will
* not propagate properly.
*/
- ack_APIC_irq();
+ apic_eoi();
/*
* Tail end of clearing remote IRR bit (either by delivering the EOI
* message via io-apic EOI register write or simulating it using
- * mask+edge followed by unnask+level logic) manually when the
+ * mask+edge followed by unmask+level logic) manually when the
* level triggered interrupt is seen as the edge triggered interrupt
* at the cpu.
*/
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
-
- eoi_ioapic_irq(irq, cfg);
+ eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
}
- ioapic_irqd_unmask(data, cfg, masked);
+ ioapic_finish_move(irq_data, moveit);
+}
+
+static void ioapic_ir_ack_level(struct irq_data *irq_data)
+{
+ struct mp_chip_data *data = irq_data->chip_data;
+
+ /*
+ * Intr-remapping uses pin number as the virtual vector
+ * in the RTE. Actual vector is programmed in
+ * intr-remapping table entry. Hence for the io-apic
+ * EOI we use the pin number.
+ */
+ apic_ack_irq(irq_data);
+ eoi_ioapic_pin(data->entry.vector, data);
+}
+
+/*
+ * The I/OAPIC is just a device for generating MSI messages from legacy
+ * interrupt pins. Various fields of the RTE translate into bits of the
+ * resulting MSI which had a historical meaning.
+ *
+ * With interrupt remapping, many of those bits have different meanings
+ * in the underlying MSI, but the way that the I/OAPIC transforms them
+ * from its RTE to the MSI message is the same. This function allows
+ * the parent IRQ domain to compose the MSI message, then takes the
+ * relevant bits to put them in the appropriate places in the RTE in
+ * order to generate that message when the IRQ happens.
+ *
+ * The setup here relies on a preconfigured route entry (is_level,
+ * active_low, masked) because the parent domain is merely composing the
+ * generic message routing information which is used for the MSI.
+ */
+static void ioapic_setup_msg_from_msi(struct irq_data *irq_data,
+ struct IO_APIC_route_entry *entry)
+{
+ struct msi_msg msg;
+
+ /* Let the parent domain compose the MSI message */
+ irq_chip_compose_msi_msg(irq_data, &msg);
+
+ /*
+ * - Real vector
+ * - DMAR/IR: 8bit subhandle (ioapic.pin)
+ * - AMD/IR: 8bit IRTE index
+ */
+ entry->vector = msg.arch_data.vector;
+ /* Delivery mode (for DMAR/IR all 0) */
+ entry->delivery_mode = msg.arch_data.delivery_mode;
+ /* Destination mode or DMAR/IR index bit 15 */
+ entry->dest_mode_logical = msg.arch_addr_lo.dest_mode_logical;
+ /* DMAR/IR: 1, 0 for all other modes */
+ entry->ir_format = msg.arch_addr_lo.dmar_format;
+ /*
+ * - DMAR/IR: index bit 0-14.
+ *
+ * - Virt: If the host supports x2apic without a virtualized IR
+ * unit then bit 0-6 of dmar_index_0_14 are providing bit
+ * 8-14 of the destination id.
+ *
+ * All other modes have bit 0-6 of dmar_index_0_14 cleared and the
+ * topmost 8 bits are destination id bit 0-7 (entry::destid_0_7).
+ */
+ entry->ir_index_0_14 = msg.arch_addr_lo.dmar_index_0_14;
+}
+
+static void ioapic_configure_entry(struct irq_data *irqd)
+{
+ struct mp_chip_data *mpd = irqd->chip_data;
+ struct irq_pin_list *entry;
+
+ ioapic_setup_msg_from_msi(irqd, &mpd->entry);
+
+ for_each_irq_pin(entry, mpd->irq_2_pin)
+ __ioapic_write_entry(entry->apic, entry->pin, mpd->entry);
+}
+
+static int ioapic_set_affinity(struct irq_data *irq_data, const struct cpumask *mask, bool force)
+{
+ struct irq_data *parent = irq_data->parent_data;
+ int ret;
+
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
+ if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE)
+ ioapic_configure_entry(irq_data);
+
+ return ret;
+}
+
+/*
+ * Interrupt shutdown masks the ioapic pin, but the interrupt might already
+ * be in flight, but not yet serviced by the target CPU. That means
+ * __synchronize_hardirq() would return and claim that everything is calmed
+ * down. So free_irq() would proceed and deactivate the interrupt and free
+ * resources.
+ *
+ * Once the target CPU comes around to service it it will find a cleared
+ * vector and complain. While the spurious interrupt is harmless, the full
+ * release of resources might prevent the interrupt from being acknowledged
+ * which keeps the hardware in a weird state.
+ *
+ * Verify that the corresponding Remote-IRR bits are clear.
+ */
+static int ioapic_irq_get_chip_state(struct irq_data *irqd, enum irqchip_irq_state which,
+ bool *state)
+{
+ struct mp_chip_data *mcd = irqd->chip_data;
+ struct IO_APIC_route_entry rentry;
+ struct irq_pin_list *p;
+
+ if (which != IRQCHIP_STATE_ACTIVE)
+ return -EINVAL;
+
+ *state = false;
+
+ guard(raw_spinlock)(&ioapic_lock);
+ for_each_irq_pin(p, mcd->irq_2_pin) {
+ rentry = __ioapic_read_entry(p->apic, p->pin);
+ /*
+ * The remote IRR is only valid in level trigger mode. It's
+ * meaning is undefined for edge triggered interrupts and
+ * irrelevant because the IO-APIC treats them as fire and
+ * forget.
+ */
+ if (rentry.irr && rentry.is_level) {
+ *state = true;
+ break;
+ }
+ }
+ return 0;
}
static struct irq_chip ioapic_chip __read_mostly = {
@@ -2516,10 +1856,27 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_startup = startup_ioapic_irq,
.irq_mask = mask_ioapic_irq,
.irq_unmask = unmask_ioapic_irq,
- .irq_ack = ack_apic_edge,
- .irq_eoi = ack_apic_level,
- .irq_set_affinity = native_ioapic_set_affinity,
- .irq_retrigger = ioapic_retrigger_irq,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_eoi = ioapic_ack_level,
+ .irq_set_affinity = ioapic_set_affinity,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_get_irqchip_state = ioapic_irq_get_chip_state,
+ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
+};
+
+static struct irq_chip ioapic_ir_chip __read_mostly = {
+ .name = "IR-IO-APIC",
+ .irq_startup = startup_ioapic_irq,
+ .irq_mask = mask_ioapic_irq,
+ .irq_unmask = unmask_ioapic_irq,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_eoi = ioapic_ir_ack_level,
+ .irq_set_affinity = ioapic_set_affinity,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_get_irqchip_state = ioapic_irq_get_chip_state,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static inline void init_IO_APIC_traps(void)
@@ -2527,29 +1884,17 @@ static inline void init_IO_APIC_traps(void)
struct irq_cfg *cfg;
unsigned int irq;
- /*
- * NOTE! The local APIC isn't very good at handling
- * multiple interrupts at the same interrupt level.
- * As the interrupt level is determined by taking the
- * vector number and shifting that right by 4, we
- * want to spread these out a bit so that they don't
- * all fall in the same interrupt level.
- *
- * Also, we've got to be careful not to trash gate
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
- */
for_each_active_irq(irq) {
- cfg = irq_get_chip_data(irq);
+ cfg = irq_cfg(irq);
if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
/*
- * Hmm.. We don't have an entry for this,
- * so default to an old-fashioned 8259
- * interrupt if we can..
+ * Hmm.. We don't have an entry for this, so
+ * default to an old-fashioned 8259 interrupt if we
+ * can. Otherwise set the dummy interrupt chip.
*/
- if (irq < legacy_pic->nr_legacy_irqs)
+ if (irq < nr_legacy_irqs())
legacy_pic->make_irq(irq);
else
- /* Strange. Oh, well.. */
irq_set_chip(irq, &no_irq_chip);
}
}
@@ -2558,26 +1903,23 @@ static inline void init_IO_APIC_traps(void)
/*
* The local APIC irq-chip implementation:
*/
-
static void mask_lapic_irq(struct irq_data *data)
{
- unsigned long v;
+ unsigned long v = apic_read(APIC_LVT0);
- v = apic_read(APIC_LVT0);
apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
}
static void unmask_lapic_irq(struct irq_data *data)
{
- unsigned long v;
+ unsigned long v = apic_read(APIC_LVT0);
- v = apic_read(APIC_LVT0);
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
}
static void ack_lapic_irq(struct irq_data *data)
{
- ack_APIC_irq();
+ apic_eoi();
}
static struct irq_chip lapic_chip __read_mostly = {
@@ -2590,8 +1932,7 @@ static struct irq_chip lapic_chip __read_mostly = {
static void lapic_register_intr(int irq)
{
irq_clear_status_flags(irq, IRQ_LEVEL);
- irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
- "edge");
+ irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge");
}
/*
@@ -2603,9 +1944,10 @@ static void lapic_register_intr(int irq)
*/
static inline void __init unlock_ExtINT_logic(void)
{
- int apic, pin, i;
- struct IO_APIC_route_entry entry0, entry1;
unsigned char save_control, save_freq_select;
+ struct IO_APIC_route_entry entry0, entry1;
+ int apic, pin, i;
+ u32 apic_id;
pin = find_isa_irq_pin(8, mp_INT);
if (pin == -1) {
@@ -2621,14 +1963,16 @@ static inline void __init unlock_ExtINT_logic(void)
entry0 = ioapic_read_entry(apic, pin);
clear_IO_APIC_pin(apic, pin);
+ apic_id = read_apic_id();
memset(&entry1, 0, sizeof(entry1));
- entry1.dest_mode = 0; /* physical delivery */
- entry1.mask = 0; /* unmask IRQ now */
- entry1.dest = hard_smp_processor_id();
- entry1.delivery_mode = dest_ExtINT;
- entry1.polarity = entry0.polarity;
- entry1.trigger = 0;
+ entry1.dest_mode_logical = true;
+ entry1.masked = false;
+ entry1.destid_0_7 = apic_id & 0xFF;
+ entry1.virt_destid_8_14 = apic_id >> 8;
+ entry1.delivery_mode = APIC_DELIVERY_MODE_EXTINT;
+ entry1.active_low = entry0.active_low;
+ entry1.is_level = false;
entry1.vector = 0;
ioapic_write_entry(apic, pin, entry1);
@@ -2662,31 +2006,66 @@ static int __init disable_timer_pin_setup(char *arg)
}
early_param("disable_timer_pin_1", disable_timer_pin_setup);
-int timer_through_8259 __initdata;
+static int __init mp_alloc_timer_irq(int ioapic, int pin)
+{
+ struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
+ int irq = -1;
+
+ if (domain) {
+ struct irq_alloc_info info;
+
+ ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0);
+ info.devid = mpc_ioapic_id(ioapic);
+ info.ioapic.pin = pin;
+ guard(mutex)(&ioapic_mutex);
+ irq = alloc_isa_irq_from_domain(domain, 0, ioapic, pin, &info);
+ }
+
+ return irq;
+}
+
+static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node,
+ int oldapic, int oldpin,
+ int newapic, int newpin)
+{
+ struct irq_pin_list *entry;
+
+ for_each_irq_pin(entry, data->irq_2_pin) {
+ if (entry->apic == oldapic && entry->pin == oldpin) {
+ entry->apic = newapic;
+ entry->pin = newpin;
+ return;
+ }
+ }
+
+ /* Old apic/pin didn't exist, so just add a new one */
+ add_pin_to_irq_node(data, node, newapic, newpin);
+}
/*
* This code may look a bit paranoid, but it's supposed to cooperate with
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
* is so screwy. Thanks to Brian Perkins for testing/hacking this beast
* fanatically on his truly buggy board.
- *
- * FIXME: really need to revamp this for all platforms.
*/
static inline void __init check_timer(void)
{
- struct irq_cfg *cfg = irq_get_chip_data(0);
+ struct irq_data *irq_data = irq_get_irq_data(0);
+ struct mp_chip_data *data = irq_data->chip_data;
+ struct irq_cfg *cfg = irqd_cfg(irq_data);
int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
- unsigned long flags;
int no_pin1 = 0;
- local_irq_save(flags);
+ if (!global_clock_event)
+ return;
+
+ local_irq_disable();
/*
* get/set the timer IRQ vector:
*/
legacy_pic->mask(0);
- assign_irq_vector(0, cfg, apic->target_cpus());
/*
* As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2705,9 +2084,8 @@ static inline void __init check_timer(void)
pin2 = ioapic_i8259.pin;
apic2 = ioapic_i8259.apic;
- apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
- "apic1=%d pin1=%d apic2=%d pin2=%d\n",
- cfg->vector, apic1, pin1, apic2, pin2);
+ pr_info("..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
+ cfg->vector, apic1, pin1, apic2, pin2);
/*
* Some BIOS writers are clueless and report the ExtINTA
@@ -2717,7 +2095,7 @@ static inline void __init check_timer(void)
* 8259A.
*/
if (pin1 == -1) {
- panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
+ panic_if_irq_remap(FW_BUG "Timer not connected to IO-APIC");
pin1 = pin2;
apic1 = apic2;
no_pin1 = 1;
@@ -2727,98 +2105,90 @@ static inline void __init check_timer(void)
}
if (pin1 != -1) {
- /*
- * Ok, does IRQ0 through the IOAPIC work?
- */
+ /* Ok, does IRQ0 through the IOAPIC work? */
if (no_pin1) {
- add_pin_to_irq_node(cfg, node, apic1, pin1);
- setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
+ mp_alloc_timer_irq(apic1, pin1);
} else {
- /* for edge trigger, setup_ioapic_irq already
- * leave it unmasked.
+ /*
+ * for edge trigger, it's already unmasked,
* so only need to unmask if it is level-trigger
* do we really have level trigger timer?
*/
- int idx;
- idx = find_irq_entry(apic1, pin1, mp_INT);
- if (idx != -1 && irq_trigger(idx))
- unmask_ioapic(cfg);
+ int idx = find_irq_entry(apic1, pin1, mp_INT);
+
+ if (idx != -1 && irq_is_level(idx))
+ unmask_ioapic_irq(irq_get_irq_data(0));
}
+ irq_domain_deactivate_irq(irq_data);
+ irq_domain_activate_irq(irq_data, false);
if (timer_irq_works()) {
if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(0, pin1);
goto out;
}
panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
- local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
- apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
- "8254 timer not connected to IO-APIC\n");
+ pr_err("..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
- apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
- "(IRQ0) through the 8259A ...\n");
- apic_printk(APIC_QUIET, KERN_INFO
- "..... (found apic %d pin %d) ...\n", apic2, pin2);
+ pr_info("...trying to set up timer (IRQ0) through the 8259A ...\n");
+ pr_info("..... (found apic %d pin %d) ...\n", apic2, pin2);
/*
* legacy devices should be connected to IO APIC #0
*/
- replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
- setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
+ replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2);
+ irq_domain_deactivate_irq(irq_data);
+ irq_domain_activate_irq(irq_data, false);
legacy_pic->unmask(0);
if (timer_irq_works()) {
- apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
- timer_through_8259 = 1;
+ pr_info("....... works.\n");
goto out;
}
/*
* Cleanup, just in case ...
*/
- local_irq_disable();
legacy_pic->mask(0);
clear_IO_APIC_pin(apic2, pin2);
- apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
+ pr_info("....... failed.\n");
}
- apic_printk(APIC_QUIET, KERN_INFO
- "...trying to set up timer as Virtual Wire IRQ...\n");
+ pr_info("...trying to set up timer as Virtual Wire IRQ...\n");
lapic_register_intr(0);
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
legacy_pic->unmask(0);
if (timer_irq_works()) {
- apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+ pr_info("..... works.\n");
goto out;
}
- local_irq_disable();
legacy_pic->mask(0);
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
- apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
+ pr_info("..... failed.\n");
- apic_printk(APIC_QUIET, KERN_INFO
- "...trying to set up timer as ExtINT IRQ...\n");
+ pr_info("...trying to set up timer as ExtINT IRQ...\n");
legacy_pic->init(0);
legacy_pic->make_irq(0);
apic_write(APIC_LVT0, APIC_DM_EXTINT);
+ legacy_pic->unmask(0);
unlock_ExtINT_logic();
if (timer_irq_works()) {
- apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+ pr_info("..... works.\n");
goto out;
}
- local_irq_disable();
- apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
- if (x2apic_preenabled)
- apic_printk(APIC_QUIET, KERN_INFO
- "Perhaps problem with the pre-enabled x2apic mode\n"
- "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
+
+ pr_info("..... failed :\n");
+ if (apic_is_x2apic_enabled()) {
+ pr_info("Perhaps problem with the pre-enabled x2apic mode\n"
+ "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
+ }
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
"report. Then try booting with the 'noapic' option.\n");
out:
- local_irq_restore(flags);
+ local_irq_enable();
}
/*
@@ -2840,826 +2210,319 @@ out:
*/
#define PIC_IRQS (1UL << PIC_CASCADE_IR)
-void __init setup_IO_APIC(void)
-{
-
- /*
- * calling enable_IO_APIC() is moved to setup_local_APIC for BP
- */
- io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
-
- apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
- /*
- * Set up IO-APIC IRQ routing.
- */
- x86_init.mpparse.setup_ioapic_ids();
-
- sync_Arb_IDs();
- setup_IO_APIC_irqs();
- init_IO_APIC_traps();
- if (legacy_pic->nr_legacy_irqs)
- check_timer();
-}
-
-/*
- * Called after all the initialization is done. If we didn't find any
- * APIC bugs then we can allow the modify fast path
- */
-
-static int __init io_apic_bug_finalize(void)
-{
- if (sis_apic_bug == -1)
- sis_apic_bug = 0;
- return 0;
-}
-
-late_initcall(io_apic_bug_finalize);
-
-static void resume_ioapic_id(int ioapic_idx)
-{
- unsigned long flags;
- union IO_APIC_reg_00 reg_00;
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(ioapic_idx, 0);
- if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {
- reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
- io_apic_write(ioapic_idx, 0, reg_00.raw);
- }
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void ioapic_resume(void)
-{
- int ioapic_idx;
-
- for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--)
- resume_ioapic_id(ioapic_idx);
-
- restore_ioapic_entries();
-}
-
-static struct syscore_ops ioapic_syscore_ops = {
- .suspend = save_ioapic_entries,
- .resume = ioapic_resume,
-};
-
-static int __init ioapic_init_ops(void)
-{
- register_syscore_ops(&ioapic_syscore_ops);
-
- return 0;
-}
-
-device_initcall(ioapic_init_ops);
-
-/*
- * Dynamic irq allocate and deallocation
- */
-unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
+static int mp_irqdomain_create(int ioapic)
{
- struct irq_cfg **cfg;
- unsigned long flags;
- int irq, i;
-
- if (from < nr_irqs_gsi)
- from = nr_irqs_gsi;
+ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+ int hwirqs = mp_ioapic_pin_count(ioapic);
+ struct ioapic *ip = &ioapics[ioapic];
+ struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
+ struct irq_domain *parent;
+ struct fwnode_handle *fn;
+ struct irq_fwspec fwspec;
- cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
- if (!cfg)
+ if (cfg->type == IOAPIC_DOMAIN_INVALID)
return 0;
- irq = alloc_irqs_from(from, count, node);
- if (irq < 0)
- goto out_cfgs;
-
- for (i = 0; i < count; i++) {
- cfg[i] = alloc_irq_cfg(irq + i, node);
- if (!cfg[i])
- goto out_irqs;
+ /* Handle device tree enumerated APICs proper */
+ if (cfg->dev) {
+ fn = of_fwnode_handle(cfg->dev);
+ } else {
+ fn = irq_domain_alloc_named_id_fwnode("IO-APIC", mpc_ioapic_id(ioapic));
+ if (!fn)
+ return -ENOMEM;
}
- raw_spin_lock_irqsave(&vector_lock, flags);
- for (i = 0; i < count; i++)
- if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
- goto out_vecs;
- raw_spin_unlock_irqrestore(&vector_lock, flags);
+ fwspec.fwnode = fn;
+ fwspec.param_count = 1;
+ fwspec.param[0] = mpc_ioapic_id(ioapic);
- for (i = 0; i < count; i++) {
- irq_set_chip_data(irq + i, cfg[i]);
- irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
+ parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_GENERIC_MSI);
+ if (!parent) {
+ if (!cfg->dev)
+ irq_domain_free_fwnode(fn);
+ return -ENODEV;
}
- kfree(cfg);
- return irq;
-
-out_vecs:
- for (i--; i >= 0; i--)
- __clear_irq_vector(irq + i, cfg[i]);
- raw_spin_unlock_irqrestore(&vector_lock, flags);
-out_irqs:
- for (i = 0; i < count; i++)
- free_irq_at(irq + i, cfg[i]);
-out_cfgs:
- kfree(cfg);
- return 0;
-}
-
-unsigned int create_irq_nr(unsigned int from, int node)
-{
- return __create_irqs(from, 1, node);
-}
-
-int create_irq(void)
-{
- int node = cpu_to_node(0);
- unsigned int irq_want;
- int irq;
-
- irq_want = nr_irqs_gsi;
- irq = create_irq_nr(irq_want, node);
-
- if (irq == 0)
- irq = -1;
-
- return irq;
-}
-
-void destroy_irq(unsigned int irq)
-{
- struct irq_cfg *cfg = irq_get_chip_data(irq);
- unsigned long flags;
-
- irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
-
- free_remapped_irq(irq);
-
- raw_spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq, cfg);
- raw_spin_unlock_irqrestore(&vector_lock, flags);
- free_irq_at(irq, cfg);
-}
-
-void destroy_irqs(unsigned int irq, unsigned int count)
-{
- unsigned int i;
-
- for (i = 0; i < count; i++)
- destroy_irq(irq + i);
-}
-
-/*
- * MSI message composition
- */
-void native_compose_msi_msg(struct pci_dev *pdev,
- unsigned int irq, unsigned int dest,
- struct msi_msg *msg, u8 hpet_id)
-{
- struct irq_cfg *cfg = irq_cfg(irq);
-
- msg->address_hi = MSI_ADDR_BASE_HI;
-
- if (x2apic_enabled())
- msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
-
- msg->address_lo =
- MSI_ADDR_BASE_LO |
- ((apic->irq_dest_mode == 0) ?
- MSI_ADDR_DEST_MODE_PHYSICAL:
- MSI_ADDR_DEST_MODE_LOGICAL) |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- MSI_ADDR_REDIRECTION_CPU:
- MSI_ADDR_REDIRECTION_LOWPRI) |
- MSI_ADDR_DEST_ID(dest);
-
- msg->data =
- MSI_DATA_TRIGGER_EDGE |
- MSI_DATA_LEVEL_ASSERT |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- MSI_DATA_DELIVERY_FIXED:
- MSI_DATA_DELIVERY_LOWPRI) |
- MSI_DATA_VECTOR(cfg->vector);
-}
-
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
- struct msi_msg *msg, u8 hpet_id)
-{
- struct irq_cfg *cfg;
- int err;
- unsigned dest;
-
- if (disable_apic)
- return -ENXIO;
-
- cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
- if (err)
- return err;
-
- err = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus(), &dest);
- if (err)
- return err;
+ ip->irqdomain = irq_domain_create_hierarchy(parent, 0, hwirqs, fn, cfg->ops,
+ (void *)(long)ioapic);
+ if (!ip->irqdomain) {
+ /* Release fw handle if it was allocated above */
+ if (!cfg->dev)
+ irq_domain_free_fwnode(fn);
+ return -ENOMEM;
+ }
- x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+ if (cfg->type == IOAPIC_DOMAIN_LEGACY || cfg->type == IOAPIC_DOMAIN_STRICT)
+ ioapic_dynirq_base = max(ioapic_dynirq_base, gsi_cfg->gsi_end + 1);
return 0;
}
-static int
-msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+static void ioapic_destroy_irqdomain(int idx)
{
- struct irq_cfg *cfg = data->chip_data;
- struct msi_msg msg;
- unsigned int dest;
+ struct ioapic_domain_cfg *cfg = &ioapics[idx].irqdomain_cfg;
+ struct fwnode_handle *fn = ioapics[idx].irqdomain->fwnode;
- if (__ioapic_set_affinity(data, mask, &dest))
- return -1;
-
- __get_cached_msi_msg(data->msi_desc, &msg);
-
- msg.data &= ~MSI_DATA_VECTOR_MASK;
- msg.data |= MSI_DATA_VECTOR(cfg->vector);
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
- __write_msi_msg(data->msi_desc, &msg);
-
- return IRQ_SET_MASK_OK_NOCOPY;
+ if (ioapics[idx].irqdomain) {
+ irq_domain_remove(ioapics[idx].irqdomain);
+ if (!cfg->dev)
+ irq_domain_free_fwnode(fn);
+ ioapics[idx].irqdomain = NULL;
+ }
}
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
- .name = "PCI-MSI",
- .irq_unmask = unmask_msi_irq,
- .irq_mask = mask_msi_irq,
- .irq_ack = ack_apic_edge,
- .irq_set_affinity = msi_set_affinity,
- .irq_retrigger = ioapic_retrigger_irq,
-};
-
-int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
- unsigned int irq_base, unsigned int irq_offset)
+void __init setup_IO_APIC(void)
{
- struct irq_chip *chip = &msi_chip;
- struct msi_msg msg;
- unsigned int irq = irq_base + irq_offset;
- int ret;
-
- ret = msi_compose_msg(dev, irq, &msg, -1);
- if (ret < 0)
- return ret;
+ int ioapic;
- irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
+ if (ioapic_is_disabled || !nr_ioapics)
+ return;
- /*
- * MSI-X message is written per-IRQ, the offset is always 0.
- * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
- */
- if (!irq_offset)
- write_msi_msg(irq, &msg);
+ io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL;
- setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
+ apic_pr_verbose("ENABLING IO-APIC IRQs\n");
+ for_each_ioapic(ioapic)
+ BUG_ON(mp_irqdomain_create(ioapic));
- irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+ /* Set up IO-APIC IRQ routing. */
+ x86_init.mpparse.setup_ioapic_ids();
- dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
+ sync_Arb_IDs();
+ setup_IO_APIC_irqs();
+ init_IO_APIC_traps();
+ if (nr_legacy_irqs())
+ check_timer();
- return 0;
+ ioapic_initialized = 1;
}
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+static void resume_ioapic_id(int ioapic_idx)
{
- unsigned int irq, irq_want;
- struct msi_desc *msidesc;
- int node, ret;
-
- /* Multiple MSI vectors only supported with interrupt remapping */
- if (type == PCI_CAP_ID_MSI && nvec > 1)
- return 1;
-
- node = dev_to_node(&dev->dev);
- irq_want = nr_irqs_gsi;
- list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = create_irq_nr(irq_want, node);
- if (irq == 0)
- return -ENOSPC;
-
- irq_want = irq + 1;
+ union IO_APIC_reg_00 reg_00;
- ret = setup_msi_irq(dev, msidesc, irq, 0);
- if (ret < 0)
- goto error;
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
+ reg_00.raw = io_apic_read(ioapic_idx, 0);
+ if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {
+ reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
+ io_apic_write(ioapic_idx, 0, reg_00.raw);
}
- return 0;
-
-error:
- destroy_irq(irq);
- return ret;
}
-void native_teardown_msi_irq(unsigned int irq)
+static int ioapic_suspend(void *data)
{
- destroy_irq(irq);
+ return save_ioapic_entries();
}
-#ifdef CONFIG_DMAR_TABLE
-static int
-dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
+static void ioapic_resume(void *data)
{
- struct irq_cfg *cfg = data->chip_data;
- unsigned int dest, irq = data->irq;
- struct msi_msg msg;
-
- if (__ioapic_set_affinity(data, mask, &dest))
- return -1;
-
- dmar_msi_read(irq, &msg);
-
- msg.data &= ~MSI_DATA_VECTOR_MASK;
- msg.data |= MSI_DATA_VECTOR(cfg->vector);
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
- msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
+ int ioapic_idx;
- dmar_msi_write(irq, &msg);
+ for_each_ioapic_reverse(ioapic_idx)
+ resume_ioapic_id(ioapic_idx);
- return IRQ_SET_MASK_OK_NOCOPY;
+ restore_ioapic_entries();
}
-static struct irq_chip dmar_msi_type = {
- .name = "DMAR_MSI",
- .irq_unmask = dmar_msi_unmask,
- .irq_mask = dmar_msi_mask,
- .irq_ack = ack_apic_edge,
- .irq_set_affinity = dmar_msi_set_affinity,
- .irq_retrigger = ioapic_retrigger_irq,
+static const struct syscore_ops ioapic_syscore_ops = {
+ .suspend = ioapic_suspend,
+ .resume = ioapic_resume,
};
-int arch_setup_dmar_msi(unsigned int irq)
-{
- int ret;
- struct msi_msg msg;
-
- ret = msi_compose_msg(NULL, irq, &msg, -1);
- if (ret < 0)
- return ret;
- dmar_msi_write(irq, &msg);
- irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
- "edge");
- return 0;
-}
-#endif
-
-#ifdef CONFIG_HPET_TIMER
-
-static int hpet_msi_set_affinity(struct irq_data *data,
- const struct cpumask *mask, bool force)
-{
- struct irq_cfg *cfg = data->chip_data;
- struct msi_msg msg;
- unsigned int dest;
-
- if (__ioapic_set_affinity(data, mask, &dest))
- return -1;
-
- hpet_msi_read(data->handler_data, &msg);
-
- msg.data &= ~MSI_DATA_VECTOR_MASK;
- msg.data |= MSI_DATA_VECTOR(cfg->vector);
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
- hpet_msi_write(data->handler_data, &msg);
-
- return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip hpet_msi_type = {
- .name = "HPET_MSI",
- .irq_unmask = hpet_msi_unmask,
- .irq_mask = hpet_msi_mask,
- .irq_ack = ack_apic_edge,
- .irq_set_affinity = hpet_msi_set_affinity,
- .irq_retrigger = ioapic_retrigger_irq,
+static struct syscore ioapic_syscore = {
+ .ops = &ioapic_syscore_ops,
};
-int default_setup_hpet_msi(unsigned int irq, unsigned int id)
+static int __init ioapic_init_ops(void)
{
- struct irq_chip *chip = &hpet_msi_type;
- struct msi_msg msg;
- int ret;
+ register_syscore(&ioapic_syscore);
- ret = msi_compose_msg(NULL, irq, &msg, id);
- if (ret < 0)
- return ret;
-
- hpet_msi_write(irq_get_handler_data(irq), &msg);
- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
-
- irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
return 0;
}
-#endif
-
-#endif /* CONFIG_PCI_MSI */
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
- struct ht_irq_msg msg;
- fetch_ht_irq_msg(irq, &msg);
-
- msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
- msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
- msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
- msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
- write_ht_irq_msg(irq, &msg);
-}
-
-static int
-ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
- struct irq_cfg *cfg = data->chip_data;
- unsigned int dest;
-
- if (__ioapic_set_affinity(data, mask, &dest))
- return -1;
-
- target_ht_irq(data->irq, dest, cfg->vector);
- return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip ht_irq_chip = {
- .name = "PCI-HT",
- .irq_mask = mask_ht_irq,
- .irq_unmask = unmask_ht_irq,
- .irq_ack = ack_apic_edge,
- .irq_set_affinity = ht_set_affinity,
- .irq_retrigger = ioapic_retrigger_irq,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
- struct irq_cfg *cfg;
- struct ht_irq_msg msg;
- unsigned dest;
- int err;
-
- if (disable_apic)
- return -ENXIO;
-
- cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
- if (err)
- return err;
-
- err = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus(), &dest);
- if (err)
- return err;
-
- msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
- msg.address_lo =
- HT_IRQ_LOW_BASE |
- HT_IRQ_LOW_DEST_ID(dest) |
- HT_IRQ_LOW_VECTOR(cfg->vector) |
- ((apic->irq_dest_mode == 0) ?
- HT_IRQ_LOW_DM_PHYSICAL :
- HT_IRQ_LOW_DM_LOGICAL) |
- HT_IRQ_LOW_RQEOI_EDGE |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- HT_IRQ_LOW_MT_FIXED :
- HT_IRQ_LOW_MT_ARBITRATED) |
- HT_IRQ_LOW_IRQ_MASKED;
-
- write_ht_irq_msg(irq, &msg);
- irq_set_chip_and_handler_name(irq, &ht_irq_chip,
- handle_edge_irq, "edge");
-
- dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
-
- return 0;
-}
-#endif /* CONFIG_HT_IRQ */
-
-static int
-io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
-{
- struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
- int ret;
-
- if (!cfg)
- return -EINVAL;
- ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
- if (!ret)
- setup_ioapic_irq(irq, cfg, attr);
- return ret;
-}
-
-int io_apic_setup_irq_pin_once(unsigned int irq, int node,
- struct io_apic_irq_attr *attr)
-{
- unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin;
- int ret;
-
- /* Avoid redundant programming */
- if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mpc_ioapic_id(ioapic_idx), pin);
- return 0;
- }
- ret = io_apic_setup_irq_pin(irq, node, attr);
- if (!ret)
- set_bit(pin, ioapics[ioapic_idx].pin_programmed);
- return ret;
-}
+device_initcall(ioapic_init_ops);
-static int __init io_apic_get_redir_entries(int ioapic)
+static int io_apic_get_redir_entries(int ioapic)
{
union IO_APIC_reg_01 reg_01;
- unsigned long flags;
- raw_spin_lock_irqsave(&ioapic_lock, flags);
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
reg_01.raw = io_apic_read(ioapic, 1);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
- /* The register returns the maximum index redir index
- * supported, which is one less than the total number of redir
- * entries.
+ /*
+ * The register returns the maximum index redir index supported,
+ * which is one less than the total number of redir entries.
*/
return reg_01.bits.entries + 1;
}
-static void __init probe_nr_irqs_gsi(void)
-{
- int nr;
-
- nr = gsi_top + NR_IRQS_LEGACY;
- if (nr > nr_irqs_gsi)
- nr_irqs_gsi = nr;
-
- printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
-}
-
-int get_nr_irqs_gsi(void)
-{
- return nr_irqs_gsi;
-}
-
-int __init arch_probe_nr_irqs(void)
+unsigned int arch_dynirq_lower_bound(unsigned int from)
{
- int nr;
+ unsigned int ret;
- if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
- nr_irqs = NR_VECTORS * nr_cpu_ids;
-
- nr = nr_irqs_gsi + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
/*
- * for MSI and HT dyn irq
+ * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
+ * gsi_top if ioapic_dynirq_base hasn't been initialized yet.
*/
- nr += nr_irqs_gsi * 16;
-#endif
- if (nr < nr_irqs)
- nr_irqs = nr;
-
- return NR_IRQS_LEGACY;
-}
-
-int io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr)
-{
- int node;
-
- if (!IO_APIC_IRQ(irq)) {
- apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- irq_attr->ioapic);
- return -EINVAL;
- }
-
- node = dev ? dev_to_node(dev) : cpu_to_node(0);
+ ret = ioapic_dynirq_base ? : gsi_top;
- return io_apic_setup_irq_pin_once(irq, node, irq_attr);
+ /*
+ * For DT enabled machines ioapic_dynirq_base is irrelevant and
+ * always 0. gsi_top can be 0 if there is no IO/APIC registered.
+ * 0 is an invalid interrupt number for dynamic allocations. Return
+ * @from instead.
+ */
+ return ret ? : from;
}
#ifdef CONFIG_X86_32
-static int __init io_apic_get_unique_id(int ioapic, int apic_id)
+static int io_apic_get_unique_id(int ioapic, int apic_id)
{
+ static DECLARE_BITMAP(apic_id_map, MAX_LOCAL_APIC);
+ const u32 broadcast_id = 0xF;
union IO_APIC_reg_00 reg_00;
- static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
- physid_mask_t tmp;
- unsigned long flags;
int i = 0;
- /*
- * The P4 platform supports up to 256 APIC IDs on two separate APIC
- * buses (one for LAPICs, one for IOAPICs), where predecessors only
- * supports up to 16 on one shared APIC bus.
- *
- * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
- * advantage of new APIC bus architecture.
- */
-
- if (physids_empty(apic_id_map))
- apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
+ /* Initialize the ID map */
+ if (bitmap_empty(apic_id_map, MAX_LOCAL_APIC))
+ copy_phys_cpu_present_map(apic_id_map);
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(ioapic, 0);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ scoped_guard (raw_spinlock_irqsave, &ioapic_lock)
+ reg_00.raw = io_apic_read(ioapic, 0);
- if (apic_id >= get_physical_broadcast()) {
- printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
- "%d\n", ioapic, apic_id, reg_00.bits.ID);
+ if (apic_id >= broadcast_id) {
+ pr_warn("IOAPIC[%d]: Invalid apic_id %d, trying %d\n",
+ ioapic, apic_id, reg_00.bits.ID);
apic_id = reg_00.bits.ID;
}
- /*
- * Every APIC in a system must have a unique ID or we get lots of nice
- * 'stuck on smp_invalidate_needed IPI wait' messages.
- */
- if (apic->check_apicid_used(&apic_id_map, apic_id)) {
-
- for (i = 0; i < get_physical_broadcast(); i++) {
- if (!apic->check_apicid_used(&apic_id_map, i))
+ /* Every APIC in a system must have a unique ID */
+ if (test_bit(apic_id, apic_id_map)) {
+ for (i = 0; i < broadcast_id; i++) {
+ if (!test_bit(i, apic_id_map))
break;
}
- if (i == get_physical_broadcast())
+ if (i == broadcast_id)
panic("Max apic_id exceeded!\n");
- printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
- "trying %d\n", ioapic, apic_id, i);
-
+ pr_warn("IOAPIC[%d]: apic_id %d already used, trying %d\n", ioapic, apic_id, i);
apic_id = i;
}
- apic->apicid_to_cpu_present(apic_id, &tmp);
- physids_or(apic_id_map, apic_id_map, tmp);
+ set_bit(apic_id, apic_id_map);
if (reg_00.bits.ID != apic_id) {
reg_00.bits.ID = apic_id;
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic, 0, reg_00.raw);
- reg_00.raw = io_apic_read(ioapic, 0);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ scoped_guard (raw_spinlock_irqsave, &ioapic_lock) {
+ io_apic_write(ioapic, 0, reg_00.raw);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ }
/* Sanity check */
if (reg_00.bits.ID != apic_id) {
- pr_err("IOAPIC[%d]: Unable to change apic_id!\n",
- ioapic);
+ pr_err("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
return -1;
}
}
- apic_printk(APIC_VERBOSE, KERN_INFO
- "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+ apic_pr_verbose("IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
return apic_id;
}
-static u8 __init io_apic_unique_id(u8 id)
+static u8 io_apic_unique_id(int idx, u8 id)
{
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
- !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
- return io_apic_get_unique_id(nr_ioapics, id);
- else
- return id;
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && !APIC_XAPIC(boot_cpu_apic_version))
+ return io_apic_get_unique_id(idx, id);
+ return id;
}
#else
-static u8 __init io_apic_unique_id(u8 id)
+static u8 io_apic_unique_id(int idx, u8 id)
{
- int i;
+ union IO_APIC_reg_00 reg_00;
DECLARE_BITMAP(used, 256);
+ u8 new_id;
+ int i;
bitmap_zero(used, 256);
- for (i = 0; i < nr_ioapics; i++) {
+ for_each_ioapic(i)
__set_bit(mpc_ioapic_id(i), used);
- }
+
+ /* Hand out the requested id if available */
if (!test_bit(id, used))
return id;
- return find_first_zero_bit(used, 256);
-}
-#endif
-static int __init io_apic_get_version(int ioapic)
-{
- union IO_APIC_reg_01 reg_01;
- unsigned long flags;
+ /*
+ * Read the current id from the ioapic and keep it if
+ * available.
+ */
+ scoped_guard (raw_spinlock_irqsave, &ioapic_lock)
+ reg_00.raw = io_apic_read(idx, 0);
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(ioapic, 1);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ new_id = reg_00.bits.ID;
+ if (!test_bit(new_id, used)) {
+ apic_pr_verbose("IOAPIC[%d]: Using reg apic_id %d instead of %d\n",
+ idx, new_id, id);
+ return new_id;
+ }
- return reg_01.bits.version;
+ /* Get the next free id and write it to the ioapic. */
+ new_id = find_first_zero_bit(used, 256);
+ reg_00.bits.ID = new_id;
+ scoped_guard (raw_spinlock_irqsave, &ioapic_lock) {
+ io_apic_write(idx, 0, reg_00.raw);
+ reg_00.raw = io_apic_read(idx, 0);
+ }
+ /* Sanity check */
+ BUG_ON(reg_00.bits.ID != new_id);
+
+ return new_id;
}
+#endif
-int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
+static int io_apic_get_version(int ioapic)
{
- int ioapic, pin, idx;
-
- if (skip_ioapic_setup)
- return -1;
-
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return -1;
-
- pin = mp_find_ioapic_pin(ioapic, gsi);
- if (pin < 0)
- return -1;
+ union IO_APIC_reg_01 reg_01;
- idx = find_irq_entry(ioapic, pin, mp_INT);
- if (idx < 0)
- return -1;
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
+ reg_01.raw = io_apic_read(ioapic, 1);
- *trigger = irq_trigger(idx);
- *polarity = irq_polarity(idx);
- return 0;
+ return reg_01.bits.version;
}
/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be apic->target_cpus()
+ * This function updates target affinity of IOAPIC interrupts to include
+ * the CPUs which came online during SMP bringup.
*/
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
- int pin, ioapic, irq, irq_entry;
- const struct cpumask *mask;
- struct irq_data *idata;
-
- if (skip_ioapic_setup == 1)
- return;
-
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
- for (pin = 0; pin < ioapics[ioapic].nr_registers; pin++) {
- irq_entry = find_irq_entry(ioapic, pin, mp_INT);
- if (irq_entry == -1)
- continue;
- irq = pin_2_irq(irq_entry, ioapic, pin);
-
- if ((ioapic > 0) && (irq > 16))
- continue;
-
- idata = irq_get_irq_data(irq);
-
- /*
- * Honour affinities which have been set in early boot
- */
- if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
- mask = idata->affinity;
- else
- mask = apic->target_cpus();
-
- x86_io_apic_ops.set_affinity(idata, mask, false);
- }
-
-}
-#endif
-
#define IOAPIC_RESOURCE_NAME_SIZE 11
static struct resource *ioapic_resources;
-static struct resource * __init ioapic_setup_resources(int nr_ioapics)
+static struct resource * __init ioapic_setup_resources(void)
{
- unsigned long n;
struct resource *res;
+ unsigned long n;
char *mem;
int i;
- if (nr_ioapics <= 0)
+ if (nr_ioapics == 0)
return NULL;
n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
n *= nr_ioapics;
- mem = alloc_bootmem(n);
+ mem = memblock_alloc_or_panic(n, SMP_CACHE_BYTES);
res = (void *)mem;
mem += sizeof(struct resource) * nr_ioapics;
- for (i = 0; i < nr_ioapics; i++) {
+ for_each_ioapic(i) {
res[i].name = mem;
res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
mem += IOAPIC_RESOURCE_NAME_SIZE;
+ ioapics[i].iomem_res = &res[i];
}
ioapic_resources = res;
@@ -3667,24 +2530,40 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
return res;
}
-void __init native_io_apic_init_mappings(void)
+static void io_apic_set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
+{
+ pgprot_t flags = FIXMAP_PAGE_NOCACHE;
+
+ /*
+ * Ensure fixmaps for IO-APIC MMIO respect memory encryption pgprot
+ * bits, just like normal ioremap():
+ */
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
+ if (x86_platform.hyper.is_private_mmio(phys))
+ flags = pgprot_encrypted(flags);
+ else
+ flags = pgprot_decrypted(flags);
+ }
+
+ __set_fixmap(idx, phys, flags);
+}
+
+void __init io_apic_init_mappings(void)
{
unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
struct resource *ioapic_res;
int i;
- ioapic_res = ioapic_setup_resources(nr_ioapics);
- for (i = 0; i < nr_ioapics; i++) {
+ ioapic_res = ioapic_setup_resources();
+ for_each_ioapic(i) {
if (smp_found_config) {
ioapic_phys = mpc_ioapic_addr(i);
#ifdef CONFIG_X86_32
if (!ioapic_phys) {
- printk(KERN_ERR
- "WARNING: bogus zero IO-APIC "
- "address found in MPTABLE, "
+ pr_err("WARNING: bogus zero IO-APIC address found in MPTABLE, "
"disabling IO/APIC support!\n");
smp_found_config = 0;
- skip_ioapic_setup = 1;
+ ioapic_is_disabled = true;
goto fake_ioapic_page;
}
#endif
@@ -3692,36 +2571,33 @@ void __init native_io_apic_init_mappings(void)
#ifdef CONFIG_X86_32
fake_ioapic_page:
#endif
- ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
+ ioapic_phys = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE,
+ PAGE_SIZE);
ioapic_phys = __pa(ioapic_phys);
}
- set_fixmap_nocache(idx, ioapic_phys);
- apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
- __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
- ioapic_phys);
+ io_apic_set_fixmap(idx, ioapic_phys);
+ apic_pr_verbose("mapped IOAPIC to %08lx (%08lx)\n",
+ __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), ioapic_phys);
idx++;
ioapic_res->start = ioapic_phys;
ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
ioapic_res++;
}
-
- probe_nr_irqs_gsi();
}
void __init ioapic_insert_resources(void)
{
- int i;
struct resource *r = ioapic_resources;
+ int i;
if (!r) {
if (nr_ioapics > 0)
- printk(KERN_ERR
- "IO APIC resources couldn't be allocated.\n");
+ pr_err("IO APIC resources couldn't be allocated.\n");
return;
}
- for (i = 0; i < nr_ioapics; i++) {
+ for_each_ioapic(i) {
insert_resource(&iomem_resource, r);
r++;
}
@@ -3729,20 +2605,20 @@ void __init ioapic_insert_resources(void)
int mp_find_ioapic(u32 gsi)
{
- int i = 0;
+ int i;
if (nr_ioapics == 0)
return -1;
/* Find the IOAPIC that manages this GSI. */
- for (i = 0; i < nr_ioapics; i++) {
+ for_each_ioapic(i) {
struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
- if ((gsi >= gsi_cfg->gsi_base)
- && (gsi <= gsi_cfg->gsi_end))
+
+ if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end)
return i;
}
- printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+ pr_err("ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
return -1;
}
@@ -3750,7 +2626,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
{
struct mp_ioapic_gsi *gsi_cfg;
- if (WARN_ON(ioapic == -1))
+ if (WARN_ON(ioapic < 0))
return -1;
gsi_cfg = mp_ioapic_gsi_routing(ioapic);
@@ -3760,21 +2636,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
return gsi - gsi_cfg->gsi_base;
}
-static __init int bad_ioapic(unsigned long address)
-{
- if (nr_ioapics >= MAX_IO_APICS) {
- pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
- MAX_IO_APICS, nr_ioapics);
- return 1;
- }
- if (!address) {
- pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n");
- return 1;
- }
- return 0;
-}
-
-static __init int bad_ioapic_register(int idx)
+static int bad_ioapic_register(int idx)
{
union IO_APIC_reg_00 reg_00;
union IO_APIC_reg_01 reg_01;
@@ -3793,29 +2655,59 @@ static __init int bad_ioapic_register(int idx)
return 0;
}
-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
+static int find_free_ioapic_entry(void)
{
- int idx = 0;
- int entries;
+ for (int idx = 0; idx < MAX_IO_APICS; idx++) {
+ if (ioapics[idx].nr_registers == 0)
+ return idx;
+ }
+ return MAX_IO_APICS;
+}
+
+/**
+ * mp_register_ioapic - Register an IOAPIC device
+ * @id: hardware IOAPIC ID
+ * @address: physical address of IOAPIC register area
+ * @gsi_base: base of GSI associated with the IOAPIC
+ * @cfg: configuration information for the IOAPIC
+ */
+int mp_register_ioapic(int id, u32 address, u32 gsi_base, struct ioapic_domain_cfg *cfg)
+{
+ bool hotplug = !!ioapic_initialized;
struct mp_ioapic_gsi *gsi_cfg;
+ int idx, ioapic, entries;
+ u32 gsi_end;
- if (bad_ioapic(address))
- return;
+ if (!address) {
+ pr_warn("Bogus (zero) I/O APIC address found, skipping!\n");
+ return -EINVAL;
+ }
- idx = nr_ioapics;
+ for_each_ioapic(ioapic) {
+ if (ioapics[ioapic].mp_config.apicaddr == address) {
+ pr_warn("address 0x%x conflicts with IOAPIC%d\n", address, ioapic);
+ return -EEXIST;
+ }
+ }
+
+ idx = find_free_ioapic_entry();
+ if (idx >= MAX_IO_APICS) {
+ pr_warn("Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
+ MAX_IO_APICS, idx);
+ return -ENOSPC;
+ }
ioapics[idx].mp_config.type = MP_IOAPIC;
ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
ioapics[idx].mp_config.apicaddr = address;
- set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-
+ io_apic_set_fixmap(FIX_IO_APIC_BASE_0 + idx, address);
if (bad_ioapic_register(idx)) {
clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
- return;
+ return -ENODEV;
}
- ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
+ ioapics[idx].mp_config.apicid = io_apic_unique_id(idx, id);
ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
/*
@@ -3823,39 +2715,245 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
* and to prevent reprogramming of IOAPIC pins (PCI GSIs).
*/
entries = io_apic_get_redir_entries(idx);
+ gsi_end = gsi_base + entries - 1;
+ for_each_ioapic(ioapic) {
+ gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+ if ((gsi_base >= gsi_cfg->gsi_base &&
+ gsi_base <= gsi_cfg->gsi_end) ||
+ (gsi_end >= gsi_cfg->gsi_base &&
+ gsi_end <= gsi_cfg->gsi_end)) {
+ pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n",
+ gsi_base, gsi_end, gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+ clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+ return -ENOSPC;
+ }
+ }
gsi_cfg = mp_ioapic_gsi_routing(idx);
gsi_cfg->gsi_base = gsi_base;
- gsi_cfg->gsi_end = gsi_base + entries - 1;
+ gsi_cfg->gsi_end = gsi_end;
+
+ ioapics[idx].irqdomain = NULL;
+ ioapics[idx].irqdomain_cfg = *cfg;
/*
- * The number of IO-APIC IRQ registers (== #pins):
+ * If mp_register_ioapic() is called during early boot stage when
+ * walking ACPI/DT tables, it's too early to create irqdomain,
+ * we are still using bootmem allocator. So delay it to setup_IO_APIC().
*/
- ioapics[idx].nr_registers = entries;
+ if (hotplug) {
+ if (mp_irqdomain_create(idx)) {
+ clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+ return -ENOMEM;
+ }
+ alloc_ioapic_saved_registers(idx);
+ }
if (gsi_cfg->gsi_end >= gsi_top)
gsi_top = gsi_cfg->gsi_end + 1;
+ if (nr_ioapics <= idx)
+ nr_ioapics = idx + 1;
+
+ /* Set nr_registers to mark entry present */
+ ioapics[idx].nr_registers = entries;
pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
- idx, mpc_ioapic_id(idx),
- mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
+ idx, mpc_ioapic_id(idx), mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
gsi_cfg->gsi_base, gsi_cfg->gsi_end);
- nr_ioapics++;
+ return 0;
}
-/* Enable IOAPIC early just for system timer */
-void __init pre_init_apic_IRQ0(void)
+int mp_unregister_ioapic(u32 gsi_base)
{
- struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
+ int ioapic, pin;
+ int found = 0;
- printk(KERN_INFO "Early APIC setup for system timer0\n");
-#ifndef CONFIG_SMP
- physid_set_mask_of_physid(boot_cpu_physical_apicid,
- &phys_cpu_present_map);
-#endif
- setup_local_APIC();
+ for_each_ioapic(ioapic) {
+ if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_warn("can't find IOAPIC for GSI %d\n", gsi_base);
+ return -ENODEV;
+ }
+
+ for_each_pin(ioapic, pin) {
+ u32 gsi = mp_pin_to_gsi(ioapic, pin);
+ int irq = mp_map_gsi_to_irq(gsi, 0, NULL);
+ struct mp_chip_data *data;
+
+ if (irq >= 0) {
+ data = irq_get_chip_data(irq);
+ if (data && data->count) {
+ pr_warn("pin%d on IOAPIC%d is still in use.\n", pin, ioapic);
+ return -EBUSY;
+ }
+ }
+ }
+
+ /* Mark entry not present */
+ ioapics[ioapic].nr_registers = 0;
+ ioapic_destroy_irqdomain(ioapic);
+ free_ioapic_saved_registers(ioapic);
+ if (ioapics[ioapic].iomem_res)
+ release_resource(ioapics[ioapic].iomem_res);
+ clear_fixmap(FIX_IO_APIC_BASE_0 + ioapic);
+ memset(&ioapics[ioapic], 0, sizeof(ioapics[ioapic]));
+
+ return 0;
+}
- io_apic_setup_irq_pin(0, 0, &attr);
- irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
- "edge");
+int mp_ioapic_registered(u32 gsi_base)
+{
+ int ioapic;
+
+ for_each_ioapic(ioapic)
+ if (ioapics[ioapic].gsi_config.gsi_base == gsi_base)
+ return 1;
+
+ return 0;
+}
+
+static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data,
+ struct irq_alloc_info *info)
+{
+ if (info && info->ioapic.valid) {
+ data->is_level = info->ioapic.is_level;
+ data->active_low = info->ioapic.active_low;
+ } else if (__acpi_get_override_irq(gsi, &data->is_level, &data->active_low) < 0) {
+ /* PCI interrupts are always active low level triggered. */
+ data->is_level = true;
+ data->active_low = true;
+ }
+}
+
+/*
+ * Configure the I/O-APIC specific fields in the routing entry.
+ *
+ * This is important to setup the I/O-APIC specific bits (is_level,
+ * active_low, masked) because the underlying parent domain will only
+ * provide the routing information and is oblivious of the I/O-APIC
+ * specific bits.
+ *
+ * The entry is just preconfigured at this point and not written into the
+ * RTE. This happens later during activation which will fill in the actual
+ * routing information.
+ */
+static void mp_preconfigure_entry(struct mp_chip_data *data)
+{
+ struct IO_APIC_route_entry *entry = &data->entry;
+
+ memset(entry, 0, sizeof(*entry));
+ entry->is_level = data->is_level;
+ entry->active_low = data->active_low;
+ /*
+ * Mask level triggered irqs. Edge triggered irqs are masked
+ * by the irq core code in case they fire.
+ */
+ entry->masked = data->is_level;
}
+
+int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct irq_alloc_info *info = arg;
+ struct mp_chip_data *data;
+ struct irq_data *irq_data;
+ int ret, ioapic, pin;
+ unsigned long flags;
+
+ if (!info || nr_irqs > 1)
+ return -EINVAL;
+ irq_data = irq_domain_get_irq_data(domain, virq);
+ if (!irq_data)
+ return -EINVAL;
+
+ ioapic = mp_irqdomain_ioapic_idx(domain);
+ pin = info->ioapic.pin;
+ if (irq_resolve_mapping(domain, (irq_hw_number_t)pin))
+ return -EEXIST;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
+ if (ret < 0)
+ goto free_data;
+
+ INIT_LIST_HEAD(&data->irq_2_pin);
+ irq_data->hwirq = info->ioapic.pin;
+ irq_data->chip = (domain->parent == x86_vector_domain) ?
+ &ioapic_chip : &ioapic_ir_chip;
+ irq_data->chip_data = data;
+ mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info);
+
+ if (!add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin)) {
+ ret = -ENOMEM;
+ goto free_irqs;
+ }
+
+ mp_preconfigure_entry(data);
+ mp_register_handler(virq, data->is_level);
+
+ local_irq_save(flags);
+ if (virq < nr_legacy_irqs())
+ legacy_pic->mask(virq);
+ local_irq_restore(flags);
+
+ apic_pr_verbose("IOAPIC[%d]: Preconfigured routing entry (%d-%d -> IRQ %d Level:%i ActiveLow:%i)\n",
+ ioapic, mpc_ioapic_id(ioapic), pin, virq, data->is_level, data->active_low);
+ return 0;
+
+free_irqs:
+ irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+free_data:
+ kfree(data);
+ return ret;
+}
+
+void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *irq_data;
+ struct mp_chip_data *data;
+
+ BUG_ON(nr_irqs != 1);
+ irq_data = irq_domain_get_irq_data(domain, virq);
+ if (irq_data && irq_data->chip_data) {
+ data = irq_data->chip_data;
+ __remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain), (int)irq_data->hwirq);
+ WARN_ON(!list_empty(&data->irq_2_pin));
+ kfree(irq_data->chip_data);
+ }
+ irq_domain_free_irqs_top(domain, virq, nr_irqs);
+}
+
+int mp_irqdomain_activate(struct irq_domain *domain, struct irq_data *irq_data, bool reserve)
+{
+ guard(raw_spinlock_irqsave)(&ioapic_lock);
+ ioapic_configure_entry(irq_data);
+ return 0;
+}
+
+void mp_irqdomain_deactivate(struct irq_domain *domain,
+ struct irq_data *irq_data)
+{
+ /* It won't be called for IRQ with multiple IOAPIC pins associated */
+ ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain), (int)irq_data->hwirq);
+}
+
+int mp_irqdomain_ioapic_idx(struct irq_domain *domain)
+{
+ return (int)(long)domain->host_data;
+}
+
+const struct irq_domain_ops mp_ioapic_irqdomain_ops = {
+ .alloc = mp_irqdomain_alloc,
+ .free = mp_irqdomain_free,
+ .activate = mp_irqdomain_activate,
+ .deactivate = mp_irqdomain_deactivate,
+};
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 7434d8556d09..98a57cb4aa86 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -1,167 +1,291 @@
-#include <linux/cpumask.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
+// SPDX-License-Identifier: GPL-2.0
-#include <linux/mm.h>
+#include <linux/cpumask.h>
#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/cache.h>
-#include <linux/cpu.h>
-#include <linux/module.h>
-
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/apic.h>
-#include <asm/proto.h>
-#include <asm/ipi.h>
+#include <linux/smp.h>
+#include <linux/string_choices.h>
-void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
+#include <asm/io_apic.h>
+
+#include "local.h"
+
+DEFINE_STATIC_KEY_FALSE(apic_use_ipi_shorthand);
+
+#ifdef CONFIG_SMP
+static int apic_ipi_shorthand_off __ro_after_init;
+
+static __init int apic_ipi_shorthand(char *str)
{
- unsigned long query_cpu;
- unsigned long flags;
+ get_option(&str, &apic_ipi_shorthand_off);
+ return 1;
+}
+__setup("no_ipi_broadcast=", apic_ipi_shorthand);
+
+static int __init print_ipi_mode(void)
+{
+ pr_info("IPI shorthand broadcast: %s\n",
+ str_disabled_enabled(apic_ipi_shorthand_off));
+ return 0;
+}
+late_initcall(print_ipi_mode);
+void apic_smt_update(void)
+{
/*
- * Hack. The clustered APIC addressing mode doesn't allow us to send
- * to an arbitrary mask, so I do a unicast to each CPU instead.
- * - mbligh
+ * Do not switch to broadcast mode if:
+ * - Disabled on the command line
+ * - Only a single CPU is online
+ * - Not all present CPUs have been at least booted once
+ *
+ * The latter is important as the local APIC might be in some
+ * random state and a broadcast might cause havoc. That's
+ * especially true for NMI broadcasting.
*/
- local_irq_save(flags);
- for_each_cpu(query_cpu, mask) {
- __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
- query_cpu), vector, APIC_DEST_PHYSICAL);
+ if (apic_ipi_shorthand_off || num_online_cpus() == 1 ||
+ !cpumask_equal(cpu_present_mask, &cpus_booted_once_mask)) {
+ static_branch_disable(&apic_use_ipi_shorthand);
+ } else {
+ static_branch_enable(&apic_use_ipi_shorthand);
}
- local_irq_restore(flags);
}
-void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
- int vector)
+void apic_send_IPI_allbutself(unsigned int vector)
{
- unsigned int this_cpu = smp_processor_id();
- unsigned int query_cpu;
- unsigned long flags;
+ if (num_online_cpus() < 2)
+ return;
- /* See Hack comment above */
+ if (static_branch_likely(&apic_use_ipi_shorthand))
+ __apic_send_IPI_allbutself(vector);
+ else
+ __apic_send_IPI_mask_allbutself(cpu_online_mask, vector);
+}
- local_irq_save(flags);
- for_each_cpu(query_cpu, mask) {
- if (query_cpu == this_cpu)
- continue;
- __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
- query_cpu), vector, APIC_DEST_PHYSICAL);
+/*
+ * Send a 'reschedule' IPI to another CPU. It goes straight through and
+ * wastes no time serializing anything. Worst case is that we lose a
+ * reschedule ...
+ */
+void native_smp_send_reschedule(int cpu)
+{
+ if (unlikely(cpu_is_offline(cpu))) {
+ WARN(1, "sched: Unexpected reschedule of offline CPU#%d!\n", cpu);
+ return;
}
- local_irq_restore(flags);
+ __apic_send_IPI(cpu, RESCHEDULE_VECTOR);
}
-#ifdef CONFIG_X86_32
+void native_send_call_func_single_ipi(int cpu)
+{
+ __apic_send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR);
+}
-void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
- int vector)
+void native_send_call_func_ipi(const struct cpumask *mask)
{
- unsigned long flags;
- unsigned int query_cpu;
+ if (static_branch_likely(&apic_use_ipi_shorthand)) {
+ unsigned int cpu = smp_processor_id();
+
+ if (!cpumask_or_equal(mask, cpumask_of(cpu), cpu_online_mask))
+ goto sendmask;
+
+ if (cpumask_test_cpu(cpu, mask))
+ __apic_send_IPI_all(CALL_FUNCTION_VECTOR);
+ else if (num_online_cpus() > 1)
+ __apic_send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+ return;
+ }
+
+sendmask:
+ __apic_send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+}
+
+void apic_send_nmi_to_offline_cpu(unsigned int cpu)
+{
+ if (WARN_ON_ONCE(!apic->nmi_to_offline_cpu))
+ return;
+ if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, &cpus_booted_once_mask)))
+ return;
+ apic->send_IPI(cpu, NMI_VECTOR);
+}
+#endif /* CONFIG_SMP */
+
+static inline int __prepare_ICR2(unsigned int mask)
+{
+ return SET_XAPIC_DEST_FIELD(mask);
+}
+
+u32 apic_mem_wait_icr_idle_timeout(void)
+{
+ int cnt;
+ for (cnt = 0; cnt < 1000; cnt++) {
+ if (!(apic_read(APIC_ICR) & APIC_ICR_BUSY))
+ return 0;
+ inc_irq_stat(icr_read_retry_count);
+ udelay(100);
+ }
+ return APIC_ICR_BUSY;
+}
+
+void apic_mem_wait_icr_idle(void)
+{
+ while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
+ cpu_relax();
+}
+
+/*
+ * This is safe against interruption because it only writes the lower 32
+ * bits of the APIC_ICR register. The destination field is ignored for
+ * short hand IPIs.
+ *
+ * wait_icr_idle()
+ * write(ICR2, dest)
+ * NMI
+ * wait_icr_idle()
+ * write(ICR)
+ * wait_icr_idle()
+ * write(ICR)
+ *
+ * This function does not need to disable interrupts as there is no ICR2
+ * interaction. The memory write is direct except when the machine is
+ * affected by the 11AP Pentium erratum, which turns the plain write into
+ * an XCHG operation.
+ */
+static void __default_send_IPI_shortcut(unsigned int shortcut, int vector)
+{
/*
- * Hack. The clustered APIC addressing mode doesn't allow us to send
- * to an arbitrary mask, so I do a unicasts to each CPU instead. This
- * should be modified to do 1 message per cluster ID - mbligh
+ * Wait for the previous ICR command to complete. Use
+ * safe_apic_wait_icr_idle() for the NMI vector as there have been
+ * issues where otherwise the system hangs when the panic CPU tries
+ * to stop the others before launching the kdump kernel.
*/
+ if (unlikely(vector == NMI_VECTOR))
+ apic_mem_wait_icr_idle_timeout();
+ else
+ apic_mem_wait_icr_idle();
+
+ /* Destination field (ICR2) and the destination mode are ignored */
+ native_apic_mem_write(APIC_ICR, __prepare_ICR(shortcut, vector, 0));
+}
+
+/*
+ * This is used to send an IPI with no shorthand notation (the destination is
+ * specified in bits 56 to 63 of the ICR).
+ */
+void __default_send_IPI_dest_field(unsigned int dest_mask, int vector,
+ unsigned int dest_mode)
+{
+ /* See comment in __default_send_IPI_shortcut() */
+ if (unlikely(vector == NMI_VECTOR))
+ apic_mem_wait_icr_idle_timeout();
+ else
+ apic_mem_wait_icr_idle();
+
+ /* Set the IPI destination field in the ICR */
+ native_apic_mem_write(APIC_ICR2, __prepare_ICR2(dest_mask));
+ /* Send it with the proper destination mode */
+ native_apic_mem_write(APIC_ICR, __prepare_ICR(0, vector, dest_mode));
+}
+
+void default_send_IPI_single_phys(int cpu, int vector)
+{
+ unsigned long flags;
local_irq_save(flags);
- for_each_cpu(query_cpu, mask)
- __default_send_IPI_dest_field(
- early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
- vector, apic->dest_logical);
+ __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, cpu),
+ vector, APIC_DEST_PHYSICAL);
local_irq_restore(flags);
}
-void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
- int vector)
+void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
{
unsigned long flags;
- unsigned int query_cpu;
- unsigned int this_cpu = smp_processor_id();
-
- /* See Hack comment above */
+ unsigned long cpu;
local_irq_save(flags);
- for_each_cpu(query_cpu, mask) {
- if (query_cpu == this_cpu)
- continue;
- __default_send_IPI_dest_field(
- early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
- vector, apic->dest_logical);
- }
+ for_each_cpu(cpu, mask) {
+ __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
+ cpu), vector, APIC_DEST_PHYSICAL);
+ }
local_irq_restore(flags);
}
-/*
- * This is only used on smaller machines.
- */
-void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
+void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
+ int vector)
{
- unsigned long mask = cpumask_bits(cpumask)[0];
+ unsigned int cpu, this_cpu = smp_processor_id();
unsigned long flags;
- if (!mask)
- return;
-
local_irq_save(flags);
- WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
- __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+ for_each_cpu(cpu, mask) {
+ if (cpu == this_cpu)
+ continue;
+ __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
+ cpu), vector, APIC_DEST_PHYSICAL);
+ }
local_irq_restore(flags);
}
-void default_send_IPI_allbutself(int vector)
+/*
+ * Helper function for APICs which insist on cpumasks
+ */
+void default_send_IPI_single(int cpu, int vector)
{
- /*
- * if there are no other CPUs in the system then we get an APIC send
- * error if we try to broadcast, thus avoid sending IPIs in this case.
- */
- if (!(num_online_cpus() > 1))
- return;
+ __apic_send_IPI_mask(cpumask_of(cpu), vector);
+}
- __default_local_send_IPI_allbutself(vector);
+void default_send_IPI_allbutself(int vector)
+{
+ __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
}
void default_send_IPI_all(int vector)
{
- __default_local_send_IPI_all(vector);
+ __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector);
}
void default_send_IPI_self(int vector)
{
- __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
+ __default_send_IPI_shortcut(APIC_DEST_SELF, vector);
}
-/* must come after the send_IPI functions above for inlining */
-static int convert_apicid_to_cpu(int apic_id)
+#ifdef CONFIG_X86_32
+void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector)
{
- int i;
+ unsigned long flags;
+ unsigned int cpu;
- for_each_possible_cpu(i) {
- if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
- return i;
- }
- return -1;
+ local_irq_save(flags);
+ for_each_cpu(cpu, mask)
+ __default_send_IPI_dest_field(1U << cpu, vector, APIC_DEST_LOGICAL);
+ local_irq_restore(flags);
}
-int safe_smp_processor_id(void)
+void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+ int vector)
{
- int apicid, cpuid;
+ unsigned int cpu, this_cpu = smp_processor_id();
+ unsigned long flags;
- if (!cpu_has_apic)
- return 0;
+ local_irq_save(flags);
+ for_each_cpu(cpu, mask) {
+ if (cpu == this_cpu)
+ continue;
+ __default_send_IPI_dest_field(1U << cpu, vector, APIC_DEST_LOGICAL);
+ }
+ local_irq_restore(flags);
+}
- apicid = hard_smp_processor_id();
- if (apicid == BAD_APICID)
- return 0;
+void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
+{
+ unsigned long mask = cpumask_bits(cpumask)[0];
+ unsigned long flags;
- cpuid = convert_apicid_to_cpu(apicid);
+ if (!mask)
+ return;
- return cpuid >= 0 ? cpuid : 0;
+ local_irq_save(flags);
+ WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
+ __default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
+ local_irq_restore(flags);
}
#endif
diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
new file mode 100644
index 000000000000..bdcf609eb283
--- /dev/null
+++ b/arch/x86/kernel/apic/local.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Historical copyright notices:
+ *
+ * Copyright 2004 James Cleverdon, IBM.
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ * (c) 2002,2003 Andi Kleen, SuSE Labs.
+ */
+
+#include <linux/jump_label.h>
+
+#include <asm/irq_vectors.h>
+#include <asm/apic.h>
+
+/* X2APIC */
+void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest);
+u32 x2apic_get_apic_id(u32 id);
+
+void x2apic_send_IPI_all(int vector);
+void x2apic_send_IPI_allbutself(int vector);
+void x2apic_send_IPI_self(int vector);
+extern u32 x2apic_max_apicid;
+
+/* IPI */
+
+DECLARE_STATIC_KEY_FALSE(apic_use_ipi_shorthand);
+
+static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
+ unsigned int dest)
+{
+ unsigned int icr = shortcut | dest;
+
+ switch (vector) {
+ default:
+ icr |= APIC_DM_FIXED | vector;
+ break;
+ case NMI_VECTOR:
+ icr |= APIC_DM_NMI;
+ break;
+ }
+ return icr;
+}
+
+void default_init_apic_ldr(void);
+
+void apic_mem_wait_icr_idle(void);
+u32 apic_mem_wait_icr_idle_timeout(void);
+
+/*
+ * This is used to send an IPI with no shorthand notation (the destination is
+ * specified in bits 56 to 63 of the ICR).
+ */
+void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
+
+void default_send_IPI_single(int cpu, int vector);
+void default_send_IPI_single_phys(int cpu, int vector);
+void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector);
+void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector);
+void default_send_IPI_allbutself(int vector);
+void default_send_IPI_all(int vector);
+void default_send_IPI_self(int vector);
+
+#ifdef CONFIG_X86_32
+void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector);
+void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector);
+void default_send_IPI_mask_logical(const struct cpumask *mask, int vector);
+#endif
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
new file mode 100644
index 000000000000..66bc5d3e79db
--- /dev/null
+++ b/arch/x86/kernel/apic/msi.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Support of MSI, HPET and DMAR interrupts.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ * Moved from arch/x86/kernel/apic/io_apic.c.
+ * Jiang Liu <jiang.liu@linux.intel.com>
+ * Convert to hierarchical irqdomain
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/hpet.h>
+#include <linux/msi.h>
+#include <asm/irqdomain.h>
+#include <asm/hpet.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/irq_remapping.h>
+#include <asm/xen/hypervisor.h>
+
+struct irq_domain *x86_pci_msi_default_domain __ro_after_init;
+
+static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg)
+{
+ struct msi_msg msg[2] = { [1] = { }, };
+
+ __irq_msi_compose_msg(cfg, msg, false);
+ irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg);
+}
+
+static int
+msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)
+{
+ struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd);
+ struct irq_data *parent = irqd->parent_data;
+ unsigned int cpu;
+ int ret;
+
+ /* Save the current configuration */
+ cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd));
+ old_cfg = *cfg;
+
+ /* Allocate a new target vector */
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+ return ret;
+
+ /*
+ * For non-maskable and non-remapped MSI interrupts the migration
+ * to a different destination CPU and a different vector has to be
+ * done careful to handle the possible stray interrupt which can be
+ * caused by the non-atomic update of the address/data pair.
+ *
+ * Direct update is possible when:
+ * - The MSI is maskable (remapped MSI does not use this code path).
+ * The reservation mode bit is set in this case.
+ * - The new vector is the same as the old vector
+ * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up)
+ * - The interrupt is not yet started up
+ * - The new destination CPU is the same as the old destination CPU
+ */
+ if (!irqd_can_reserve(irqd) ||
+ cfg->vector == old_cfg.vector ||
+ old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR ||
+ !irqd_is_started(irqd) ||
+ cfg->dest_apicid == old_cfg.dest_apicid) {
+ irq_msi_update_msg(irqd, cfg);
+ return ret;
+ }
+
+ /*
+ * Paranoia: Validate that the interrupt target is the local
+ * CPU.
+ */
+ if (WARN_ON_ONCE(cpu != smp_processor_id())) {
+ irq_msi_update_msg(irqd, cfg);
+ return ret;
+ }
+
+ /*
+ * Redirect the interrupt to the new vector on the current CPU
+ * first. This might cause a spurious interrupt on this vector if
+ * the device raises an interrupt right between this update and the
+ * update to the final destination CPU.
+ *
+ * If the vector is in use then the installed device handler will
+ * denote it as spurious which is no harm as this is a rare event
+ * and interrupt handlers have to cope with spurious interrupts
+ * anyway. If the vector is unused, then it is marked so it won't
+ * trigger the 'No irq handler for vector' warning in
+ * common_interrupt().
+ *
+ * This requires to hold vector lock to prevent concurrent updates to
+ * the affected vector.
+ */
+ lock_vector_lock();
+
+ /*
+ * Mark the new target vector on the local CPU if it is currently
+ * unused. Reuse the VECTOR_RETRIGGERED state which is also used in
+ * the CPU hotplug path for a similar purpose. This cannot be
+ * undone here as the current CPU has interrupts disabled and
+ * cannot handle the interrupt before the whole set_affinity()
+ * section is done. In the CPU unplug case, the current CPU is
+ * about to vanish and will not handle any interrupts anymore. The
+ * vector is cleaned up when the CPU comes online again.
+ */
+ if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector])))
+ this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED);
+
+ /* Redirect it to the new vector on the local CPU temporarily */
+ old_cfg.vector = cfg->vector;
+ irq_msi_update_msg(irqd, &old_cfg);
+
+ /* Now transition it to the target CPU */
+ irq_msi_update_msg(irqd, cfg);
+
+ /*
+ * All interrupts after this point are now targeted at the new
+ * vector/CPU.
+ *
+ * Drop vector lock before testing whether the temporary assignment
+ * to the local CPU was hit by an interrupt raised in the device,
+ * because the retrigger function acquires vector lock again.
+ */
+ unlock_vector_lock();
+
+ /*
+ * Check whether the transition raced with a device interrupt and
+ * is pending in the local APICs IRR. It is safe to do this outside
+ * of vector lock as the irq_desc::lock of this interrupt is still
+ * held and interrupts are disabled: The check is not accessing the
+ * underlying vector store. It's just checking the local APIC's
+ * IRR.
+ */
+ if (lapic_vector_set_in_irr(cfg->vector))
+ irq_data_get_irq_chip(irqd)->irq_retrigger(irqd);
+
+ return ret;
+}
+
+/**
+ * pci_dev_has_default_msi_parent_domain - Check whether the device has the default
+ * MSI parent domain associated
+ * @dev: Pointer to the PCI device
+ */
+bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev)
+{
+ struct irq_domain *domain = dev_get_msi_domain(&dev->dev);
+
+ if (!domain)
+ domain = dev_get_msi_domain(&dev->bus->dev);
+ if (!domain)
+ return false;
+
+ return domain == x86_vector_domain;
+}
+
+/**
+ * x86_msi_prepare - Setup of msi_alloc_info_t for allocations
+ * @domain: The domain for which this setup happens
+ * @dev: The device for which interrupts are allocated
+ * @nvec: The number of vectors to allocate
+ * @alloc: The allocation info structure to initialize
+ *
+ * This function is to be used for all types of MSI domains above the x86
+ * vector domain and any intermediates. It is always invoked from the
+ * top level interrupt domain. The domain specific allocation
+ * functionality is determined via the @domain's bus token which allows to
+ * map the X86 specific allocation type.
+ */
+static int x86_msi_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *alloc)
+{
+ struct msi_domain_info *info = domain->host_data;
+
+ init_irq_alloc_info(alloc, NULL);
+
+ switch (info->bus_token) {
+ case DOMAIN_BUS_PCI_DEVICE_MSI:
+ alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
+ return 0;
+ case DOMAIN_BUS_PCI_DEVICE_MSIX:
+ alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
+ * x86_init_dev_msi_info - Domain info setup for MSI domains
+ * @dev: The device for which the domain should be created
+ * @domain: The (root) domain providing this callback
+ * @real_parent: The real parent domain of the to initialize domain
+ * @info: The domain info for the to initialize domain
+ *
+ * This function is to be used for all types of MSI domains above the x86
+ * vector domain and any intermediates. The domain specific functionality
+ * is determined via the @real_parent.
+ */
+static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+ struct irq_domain *real_parent, struct msi_domain_info *info)
+{
+ const struct msi_parent_ops *pops = real_parent->msi_parent_ops;
+
+ /* MSI parent domain specific settings */
+ switch (real_parent->bus_token) {
+ case DOMAIN_BUS_ANY:
+ /* Only the vector domain can have the ANY token */
+ if (WARN_ON_ONCE(domain != real_parent))
+ return false;
+ info->chip->irq_set_affinity = msi_set_affinity;
+ info->chip->flags |= IRQCHIP_MOVE_DEFERRED;
+ break;
+ case DOMAIN_BUS_DMAR:
+ case DOMAIN_BUS_AMDVI:
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ /* Is the target supported? */
+ switch(info->bus_token) {
+ case DOMAIN_BUS_PCI_DEVICE_MSI:
+ case DOMAIN_BUS_PCI_DEVICE_MSIX:
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ /*
+ * Mask out the domain specific MSI feature flags which are not
+ * supported by the real parent.
+ */
+ info->flags &= pops->supported_flags;
+ /* Enforce the required flags */
+ info->flags |= X86_VECTOR_MSI_FLAGS_REQUIRED;
+
+ /* This is always invoked from the top level MSI domain! */
+ info->ops->msi_prepare = x86_msi_prepare;
+
+ info->chip->irq_ack = irq_chip_ack_parent;
+ info->chip->irq_retrigger = irq_chip_retrigger_hierarchy;
+ info->chip->flags |= IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP;
+
+ info->handler = handle_edge_irq;
+ info->handler_name = "edge";
+
+ return true;
+}
+
+static const struct msi_parent_ops x86_vector_msi_parent_ops = {
+ .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED,
+ .init_dev_msi_info = x86_init_dev_msi_info,
+};
+
+struct irq_domain * __init native_create_pci_msi_domain(void)
+{
+ if (apic_is_disabled)
+ return NULL;
+
+ x86_vector_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+ x86_vector_domain->msi_parent_ops = &x86_vector_msi_parent_ops;
+ return x86_vector_domain;
+}
+
+void __init x86_create_pci_msi_domain(void)
+{
+ x86_pci_msi_default_domain = x86_init.irqs.create_pci_msi_domain();
+}
+
+/* Keep around for hyperV */
+int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+ msi_alloc_info_t *arg)
+{
+ init_irq_alloc_info(arg, NULL);
+
+ if (to_pci_dev(dev)->msix_enabled)
+ arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
+ else
+ arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pci_msi_prepare);
+
+#ifdef CONFIG_DMAR_TABLE
+/*
+ * The Intel IOMMU (ab)uses the high bits of the MSI address to contain the
+ * high bits of the destination APIC ID. This can't be done in the general
+ * case for MSIs as it would be targeting real memory above 4GiB not the
+ * APIC.
+ */
+static void dmar_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ __irq_msi_compose_msg(irqd_cfg(data), msg, true);
+}
+
+static void dmar_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ dmar_msi_write(data->irq, msg);
+}
+
+static struct irq_chip dmar_msi_controller = {
+ .name = "DMAR-MSI",
+ .irq_unmask = dmar_msi_unmask,
+ .irq_mask = dmar_msi_mask,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_set_affinity = msi_domain_set_affinity,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_compose_msi_msg = dmar_msi_compose_msg,
+ .irq_write_msi_msg = dmar_msi_write_msg,
+ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
+};
+
+static int dmar_msi_init(struct irq_domain *domain,
+ struct msi_domain_info *info, unsigned int virq,
+ irq_hw_number_t hwirq, msi_alloc_info_t *arg)
+{
+ irq_domain_set_info(domain, virq, arg->devid, info->chip, NULL,
+ handle_edge_irq, arg->data, "edge");
+
+ return 0;
+}
+
+static struct msi_domain_ops dmar_msi_domain_ops = {
+ .msi_init = dmar_msi_init,
+};
+
+static struct msi_domain_info dmar_msi_domain_info = {
+ .ops = &dmar_msi_domain_ops,
+ .chip = &dmar_msi_controller,
+ .flags = MSI_FLAG_USE_DEF_DOM_OPS,
+};
+
+static struct irq_domain *dmar_get_irq_domain(void)
+{
+ static struct irq_domain *dmar_domain;
+ static DEFINE_MUTEX(dmar_lock);
+ struct fwnode_handle *fn;
+
+ mutex_lock(&dmar_lock);
+ if (dmar_domain)
+ goto out;
+
+ fn = irq_domain_alloc_named_fwnode("DMAR-MSI");
+ if (fn) {
+ dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info,
+ x86_vector_domain);
+ if (!dmar_domain)
+ irq_domain_free_fwnode(fn);
+ }
+out:
+ mutex_unlock(&dmar_lock);
+ return dmar_domain;
+}
+
+int dmar_alloc_hwirq(int id, int node, void *arg)
+{
+ struct irq_domain *domain = dmar_get_irq_domain();
+ struct irq_alloc_info info;
+
+ if (!domain)
+ return -1;
+
+ init_irq_alloc_info(&info, NULL);
+ info.type = X86_IRQ_ALLOC_TYPE_DMAR;
+ info.devid = id;
+ info.hwirq = id;
+ info.data = arg;
+
+ return irq_domain_alloc_irqs(domain, 1, node, &info);
+}
+
+void dmar_free_hwirq(int irq)
+{
+ irq_domain_free_irqs(irq, 1);
+}
+#endif
+
+bool arch_restore_msi_irqs(struct pci_dev *dev)
+{
+ return xen_initdom_restore_msi(dev);
+}
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
deleted file mode 100644
index d661ee95cabf..000000000000
--- a/arch/x86/kernel/apic/numaq_32.c
+++ /dev/null
@@ -1,525 +0,0 @@
-/*
- * Written by: Patricia Gaughen, IBM Corporation
- *
- * Copyright (C) 2002, IBM Corp.
- * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <gone@us.ibm.com>
- */
-#include <linux/nodemask.h>
-#include <linux/topology.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/kernel.h>
-#include <linux/mmzone.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/numa.h>
-#include <linux/smp.h>
-#include <linux/io.h>
-#include <linux/mm.h>
-
-#include <asm/processor.h>
-#include <asm/fixmap.h>
-#include <asm/mpspec.h>
-#include <asm/numaq.h>
-#include <asm/setup.h>
-#include <asm/apic.h>
-#include <asm/e820.h>
-#include <asm/ipi.h>
-
-int found_numaq;
-
-/*
- * Have to match translation table entries to main table entries by counter
- * hence the mpc_record variable .... can't see a less disgusting way of
- * doing this ....
- */
-struct mpc_trans {
- unsigned char mpc_type;
- unsigned char trans_len;
- unsigned char trans_type;
- unsigned char trans_quad;
- unsigned char trans_global;
- unsigned char trans_local;
- unsigned short trans_reserved;
-};
-
-static int mpc_record;
-
-static struct mpc_trans *translation_table[MAX_MPC_ENTRY];
-
-int mp_bus_id_to_node[MAX_MP_BUSSES];
-int mp_bus_id_to_local[MAX_MP_BUSSES];
-int quad_local_to_mp_bus_id[NR_CPUS/4][4];
-
-
-static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
-{
- struct eachquadmem *eq = scd->eq + node;
- u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20;
- u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20;
- int ret;
-
- node_set(node, numa_nodes_parsed);
- ret = numa_add_memblk(node, start, end);
- BUG_ON(ret < 0);
-}
-
-/*
- * Function: smp_dump_qct()
- *
- * Description: gets memory layout from the quad config table. This
- * function also updates numa_nodes_parsed with the nodes (quads) present.
- */
-static void __init smp_dump_qct(void)
-{
- struct sys_cfg_data *scd;
- int node;
-
- scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
-
- for_each_node(node) {
- if (scd->quads_present31_0 & (1 << node))
- numaq_register_node(node, scd);
- }
-}
-
-void __cpuinit numaq_tsc_disable(void)
-{
- if (!found_numaq)
- return;
-
- if (num_online_nodes() > 1) {
- printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
- setup_clear_cpu_cap(X86_FEATURE_TSC);
- }
-}
-
-static void __init numaq_tsc_init(void)
-{
- numaq_tsc_disable();
-}
-
-static inline int generate_logical_apicid(int quad, int phys_apicid)
-{
- return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
-}
-
-/* x86_quirks member */
-static int mpc_apic_id(struct mpc_cpu *m)
-{
- int quad = translation_table[mpc_record]->trans_quad;
- int logical_apicid = generate_logical_apicid(quad, m->apicid);
-
- printk(KERN_DEBUG
- "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
- m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
- (m->cpufeature & CPU_MODEL_MASK) >> 4,
- m->apicver, quad, logical_apicid);
-
- return logical_apicid;
-}
-
-/* x86_quirks member */
-static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
-{
- int quad = translation_table[mpc_record]->trans_quad;
- int local = translation_table[mpc_record]->trans_local;
-
- mp_bus_id_to_node[m->busid] = quad;
- mp_bus_id_to_local[m->busid] = local;
-
- printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad);
-}
-
-/* x86_quirks member */
-static void mpc_oem_pci_bus(struct mpc_bus *m)
-{
- int quad = translation_table[mpc_record]->trans_quad;
- int local = translation_table[mpc_record]->trans_local;
-
- quad_local_to_mp_bus_id[quad][local] = m->busid;
-}
-
-/*
- * Called from mpparse code.
- * mode = 0: prescan
- * mode = 1: one mpc entry scanned
- */
-static void numaq_mpc_record(unsigned int mode)
-{
- if (!mode)
- mpc_record = 0;
- else
- mpc_record++;
-}
-
-static void __init MP_translation_info(struct mpc_trans *m)
-{
- printk(KERN_INFO
- "Translation: record %d, type %d, quad %d, global %d, local %d\n",
- mpc_record, m->trans_type, m->trans_quad, m->trans_global,
- m->trans_local);
-
- if (mpc_record >= MAX_MPC_ENTRY)
- printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
- else
- translation_table[mpc_record] = m; /* stash this for later */
-
- if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
- node_set_online(m->trans_quad);
-}
-
-static int __init mpf_checksum(unsigned char *mp, int len)
-{
- int sum = 0;
-
- while (len--)
- sum += *mp++;
-
- return sum & 0xFF;
-}
-
-/*
- * Read/parse the MPC oem tables
- */
-static void __init smp_read_mpc_oem(struct mpc_table *mpc)
-{
- struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr;
- int count = sizeof(*oemtable); /* the header size */
- unsigned char *oemptr = ((unsigned char *)oemtable) + count;
-
- mpc_record = 0;
- printk(KERN_INFO
- "Found an OEM MPC table at %8p - parsing it...\n", oemtable);
-
- if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
- printk(KERN_WARNING
- "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
- oemtable->signature[0], oemtable->signature[1],
- oemtable->signature[2], oemtable->signature[3]);
- return;
- }
-
- if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
- printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
- return;
- }
-
- while (count < oemtable->length) {
- switch (*oemptr) {
- case MP_TRANSLATION:
- {
- struct mpc_trans *m = (void *)oemptr;
-
- MP_translation_info(m);
- oemptr += sizeof(*m);
- count += sizeof(*m);
- ++mpc_record;
- break;
- }
- default:
- printk(KERN_WARNING
- "Unrecognised OEM table entry type! - %d\n",
- (int)*oemptr);
- return;
- }
- }
-}
-
-static __init void early_check_numaq(void)
-{
- /*
- * get boot-time SMP configuration:
- */
- if (smp_found_config)
- early_get_smp_config();
-
- if (found_numaq) {
- x86_init.mpparse.mpc_record = numaq_mpc_record;
- x86_init.mpparse.setup_ioapic_ids = x86_init_noop;
- x86_init.mpparse.mpc_apic_id = mpc_apic_id;
- x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem;
- x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus;
- x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info;
- x86_init.timers.tsc_pre_init = numaq_tsc_init;
- x86_init.pci.init = pci_numaq_init;
- }
-}
-
-int __init numaq_numa_init(void)
-{
- early_check_numaq();
- if (!found_numaq)
- return -ENOENT;
- smp_dump_qct();
-
- return 0;
-}
-
-#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-
-static inline unsigned int numaq_get_apic_id(unsigned long x)
-{
- return (x >> 24) & 0x0F;
-}
-
-static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector)
-{
- default_send_IPI_mask_sequence_logical(mask, vector);
-}
-
-static inline void numaq_send_IPI_allbutself(int vector)
-{
- default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
-}
-
-static inline void numaq_send_IPI_all(int vector)
-{
- numaq_send_IPI_mask(cpu_online_mask, vector);
-}
-
-#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8)
-#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa)
-
-/*
- * Because we use NMIs rather than the INIT-STARTUP sequence to
- * bootstrap the CPUs, the APIC may be in a weird state. Kick it:
- */
-static inline void numaq_smp_callin_clear_local_apic(void)
-{
- clear_local_APIC();
-}
-
-static inline const struct cpumask *numaq_target_cpus(void)
-{
- return cpu_all_mask;
-}
-
-static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid)
-{
- return physid_isset(apicid, *map);
-}
-
-static inline unsigned long numaq_check_apicid_present(int bit)
-{
- return physid_isset(bit, phys_cpu_present_map);
-}
-
-static inline int numaq_apic_id_registered(void)
-{
- return 1;
-}
-
-static inline void numaq_init_apic_ldr(void)
-{
- /* Already done in NUMA-Q firmware */
-}
-
-static inline void numaq_setup_apic_routing(void)
-{
- printk(KERN_INFO
- "Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n",
- nr_ioapics);
-}
-
-/*
- * Skip adding the timer int on secondary nodes, which causes
- * a small but painful rift in the time-space continuum.
- */
-static inline int numaq_multi_timer_check(int apic, int irq)
-{
- return apic != 0 && irq == 0;
-}
-
-static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
-{
- /* We don't have a good way to do this yet - hack */
- return physids_promote(0xFUL, retmap);
-}
-
-/*
- * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
- * cpu to APIC ID relation to properly interact with the intelligent
- * mode of the cluster controller.
- */
-static inline int numaq_cpu_present_to_apicid(int mps_cpu)
-{
- if (mps_cpu < 60)
- return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
- else
- return BAD_APICID;
-}
-
-static inline int numaq_apicid_to_node(int logical_apicid)
-{
- return logical_apicid >> 4;
-}
-
-static int numaq_numa_cpu_node(int cpu)
-{
- int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-
- if (logical_apicid != BAD_APICID)
- return numaq_apicid_to_node(logical_apicid);
- return NUMA_NO_NODE;
-}
-
-static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
-{
- int node = numaq_apicid_to_node(logical_apicid);
- int cpu = __ffs(logical_apicid & 0xf);
-
- physid_set_mask_of_physid(cpu + 4*node, retmap);
-}
-
-/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio;
-
-static inline int numaq_check_phys_apicid_present(int phys_apicid)
-{
- return 1;
-}
-
-/*
- * We use physical apicids here, not logical, so just return the default
- * physical broadcast to stop people from breaking us
- */
-static int
-numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask,
- unsigned int *apicid)
-{
- *apicid = 0x0F;
- return 0;
-}
-
-/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
-static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb)
-{
- return cpuid_apic >> index_msb;
-}
-
-static int
-numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
-{
- if (strncmp(oem, "IBM NUMA", 8))
- printk(KERN_ERR "Warning! Not a NUMA-Q system!\n");
- else
- found_numaq = 1;
-
- return found_numaq;
-}
-
-static int probe_numaq(void)
-{
- /* already know from get_memcfg_numaq() */
- return found_numaq;
-}
-
-static void numaq_setup_portio_remap(void)
-{
- int num_quads = num_online_nodes();
-
- if (num_quads <= 1)
- return;
-
- printk(KERN_INFO
- "Remapping cross-quad port I/O for %d quads\n", num_quads);
-
- xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
-
- printk(KERN_INFO
- "xquad_portio vaddr 0x%08lx, len %08lx\n",
- (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
-}
-
-/* Use __refdata to keep false positive warning calm. */
-static struct apic __refdata apic_numaq = {
-
- .name = "NUMAQ",
- .probe = probe_numaq,
- .acpi_madt_oem_check = NULL,
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = numaq_apic_id_registered,
-
- .irq_delivery_mode = dest_LowestPrio,
- /* physical delivery on LOCAL quad: */
- .irq_dest_mode = 0,
-
- .target_cpus = numaq_target_cpus,
- .disable_esr = 1,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = numaq_check_apicid_used,
- .check_apicid_present = numaq_check_apicid_present,
-
- .vector_allocation_domain = flat_vector_allocation_domain,
- .init_apic_ldr = numaq_init_apic_ldr,
-
- .ioapic_phys_id_map = numaq_ioapic_phys_id_map,
- .setup_apic_routing = numaq_setup_apic_routing,
- .multi_timer_check = numaq_multi_timer_check,
- .cpu_present_to_apicid = numaq_cpu_present_to_apicid,
- .apicid_to_cpu_present = numaq_apicid_to_cpu_present,
- .setup_portio_remap = numaq_setup_portio_remap,
- .check_phys_apicid_present = numaq_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = numaq_phys_pkg_id,
- .mps_oem_check = numaq_mps_oem_check,
-
- .get_apic_id = numaq_get_apic_id,
- .set_apic_id = NULL,
- .apic_id_mask = 0x0F << 24,
-
- .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and,
-
- .send_IPI_mask = numaq_send_IPI_mask,
- .send_IPI_mask_allbutself = NULL,
- .send_IPI_allbutself = numaq_send_IPI_allbutself,
- .send_IPI_all = numaq_send_IPI_all,
- .send_IPI_self = default_send_IPI_self,
-
- .wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi,
- .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH,
-
- /* We don't do anything here because we use NMI's to boot instead */
- .wait_for_init_deassert = NULL,
-
- .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic,
- .inquire_remote_apic = NULL,
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
-
- .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
- .x86_32_numa_cpu_node = numaq_numa_cpu_node,
-};
-
-apic_driver(apic_numaq);
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index eb35ef9ee63f..87bc9e7ca5d6 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -1,69 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Default generic APIC driver. This handles up to 8 CPUs.
*
* Copyright 2003 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License, v.2
*
* Generic x86 APIC driver probe layer.
*/
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
+#include <linux/export.h>
#include <linux/errno.h>
-#include <asm/fixmap.h>
-#include <asm/mpspec.h>
-#include <asm/apicdef.h>
-#include <asm/apic.h>
-#include <asm/setup.h>
-
#include <linux/smp.h>
-#include <asm/ipi.h>
-
-#include <linux/interrupt.h>
-#include <asm/acpi.h>
-#include <asm/e820.h>
-#ifdef CONFIG_HOTPLUG_CPU
-#define DEFAULT_SEND_IPI (1)
-#else
-#define DEFAULT_SEND_IPI (0)
-#endif
+#include <xen/xen.h>
-int no_broadcast = DEFAULT_SEND_IPI;
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+#include <asm/acpi.h>
-static __init int no_ipi_broadcast(char *str)
-{
- get_option(&str, &no_broadcast);
- pr_info("Using %s mode\n",
- no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
- return 1;
-}
-__setup("no_ipi_broadcast=", no_ipi_broadcast);
+#include "local.h"
-static int __init print_ipi_mode(void)
+static u32 default_get_apic_id(u32 x)
{
- pr_info("Using IPI %s mode\n",
- no_broadcast ? "No-Shortcut" : "Shortcut");
- return 0;
-}
-late_initcall(print_ipi_mode);
+ unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
-static int default_x86_32_early_logical_apicid(int cpu)
-{
- return 1 << cpu;
-}
-
-static void setup_apic_flat_routing(void)
-{
-#ifdef CONFIG_X86_IO_APIC
- printk(KERN_INFO
- "Enabling APIC mode: Flat. Using %d I/O APICs\n",
- nr_ioapics);
-#endif
+ if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID))
+ return (x >> 24) & 0xFF;
+ else
+ return (x >> 24) & 0x0F;
}
/* should be called last. */
@@ -72,72 +34,42 @@ static int probe_default(void)
return 1;
}
-static struct apic apic_default = {
+static struct apic apic_default __ro_after_init = {
.name = "default",
.probe = probe_default,
- .acpi_madt_oem_check = NULL,
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = default_apic_id_registered,
- .irq_delivery_mode = dest_LowestPrio,
- /* logical delivery broadcast to all CPUs: */
- .irq_dest_mode = 1,
+ .dest_mode_logical = true,
- .target_cpus = default_target_cpus,
.disable_esr = 0,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = default_check_apicid_used,
- .check_apicid_present = default_check_apicid_present,
- .vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = default_init_apic_ldr,
-
- .ioapic_phys_id_map = default_ioapic_phys_id_map,
- .setup_apic_routing = setup_apic_flat_routing,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .apicid_to_cpu_present = physid_set_mask_of_physid,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = default_phys_pkg_id,
- .mps_oem_check = NULL,
+ .max_apic_id = 0xFE,
.get_apic_id = default_get_apic_id,
- .set_apic_id = NULL,
- .apic_id_mask = 0x0F << 24,
- .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .calc_dest_apicid = apic_flat_calc_apicid,
+ .send_IPI = default_send_IPI_single,
.send_IPI_mask = default_send_IPI_mask_logical,
.send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical,
.send_IPI_allbutself = default_send_IPI_allbutself,
.send_IPI_all = default_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-
- .wait_for_init_deassert = default_wait_for_init_deassert,
-
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = default_inquire_remote_apic,
-
.read = native_apic_mem_read,
.write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
+ .eoi = native_apic_mem_eoi,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
-
- .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid,
+ .wait_icr_idle = apic_mem_wait_icr_idle,
+ .safe_wait_icr_idle = apic_mem_wait_icr_idle_timeout,
};
apic_driver(apic_default);
-struct apic *apic = &apic_default;
+struct apic *apic __ro_after_init = &apic_default;
EXPORT_SYMBOL_GPL(apic);
static int cmdline_apic __initdata;
@@ -150,7 +82,7 @@ static int __init parse_apic(char *arg)
for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
if (!strcmp((*drv)->name, arg)) {
- apic = *drv;
+ apic_install_driver(*drv);
cmdline_apic = 1;
return 0;
}
@@ -161,50 +93,14 @@ static int __init parse_apic(char *arg)
}
early_param("apic", parse_apic);
-void __init default_setup_apic_routing(void)
-{
- int version = apic_version[boot_cpu_physical_apicid];
-
- if (num_possible_cpus() > 8) {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_INTEL:
- if (!APIC_XAPIC(version)) {
- def_to_bigsmp = 0;
- break;
- }
- /* If P4 and above fall through */
- case X86_VENDOR_AMD:
- def_to_bigsmp = 1;
- }
- }
-
-#ifdef CONFIG_X86_BIGSMP
- /*
- * This is used to switch to bigsmp mode when
- * - There is no apic= option specified by the user
- * - generic_apic_probe() has chosen apic_default as the sub_arch
- * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
- */
-
- if (!cmdline_apic && apic == &apic_default)
- generic_bigsmp_probe();
-#endif
-
- if (apic->setup_apic_routing)
- apic->setup_apic_routing();
-
- if (x86_platform.apic_post_init)
- x86_platform.apic_post_init();
-}
-
-void __init generic_apic_probe(void)
+void __init x86_32_probe_apic(void)
{
if (!cmdline_apic) {
struct apic **drv;
for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
if ((*drv)->probe()) {
- apic = *drv;
+ apic_install_driver(*drv);
break;
}
}
@@ -212,48 +108,4 @@ void __init generic_apic_probe(void)
if (drv == __apicdrivers_end)
panic("Didn't find an APIC driver");
}
- printk(KERN_INFO "Using APIC driver %s\n", apic->name);
-}
-
-/* These functions can switch the APIC even after the initial ->probe() */
-
-int __init
-generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
-{
- struct apic **drv;
-
- for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
- if (!((*drv)->mps_oem_check))
- continue;
- if (!(*drv)->mps_oem_check(mpc, oem, productid))
- continue;
-
- if (!cmdline_apic) {
- apic = *drv;
- printk(KERN_INFO "Switched to APIC driver `%s'.\n",
- apic->name);
- }
- return 1;
- }
- return 0;
-}
-
-int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- struct apic **drv;
-
- for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
- if (!(*drv)->acpi_madt_oem_check)
- continue;
- if (!(*drv)->acpi_madt_oem_check(oem_id, oem_table_id))
- continue;
-
- if (!cmdline_apic) {
- apic = *drv;
- printk(KERN_INFO "Switched to APIC driver `%s'.\n",
- apic->name);
- }
- return 1;
- }
- return 0;
}
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 1793dba7a741..ecdf0c4121e1 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
*
* Generic APIC sub-arch probe layer.
*
@@ -8,25 +8,13 @@
* Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
* James Cleverdon.
*/
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/hardirq.h>
-#include <linux/dmar.h>
-
-#include <asm/smp.h>
+#include <linux/thread_info.h>
#include <asm/apic.h>
-#include <asm/ipi.h>
-#include <asm/setup.h>
-/*
- * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
- */
-void __init default_setup_apic_routing(void)
+#include "local.h"
+
+/* Select the appropriate APIC driver */
+void __init x86_64_probe_apic(void)
{
struct apic **drv;
@@ -34,24 +22,10 @@ void __init default_setup_apic_routing(void)
for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
if ((*drv)->probe && (*drv)->probe()) {
- if (apic != *drv) {
- apic = *drv;
- pr_info("Switched APIC routing to %s.\n",
- apic->name);
- }
+ apic_install_driver(*drv);
break;
}
}
-
- if (x86_platform.apic_post_init)
- x86_platform.apic_post_init();
-}
-
-/* Same for both flat and physical. */
-
-void apic_send_IPI_self(int vector)
-{
- __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
}
int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
@@ -60,11 +34,7 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
if ((*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) {
- if (apic != *drv) {
- apic = *drv;
- pr_info("Setting APIC routing to %s.\n",
- apic->name);
- }
+ apic_install_driver(*drv);
return 1;
}
}
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
deleted file mode 100644
index 77c95c0e1bf7..000000000000
--- a/arch/x86/kernel/apic/summit_32.c
+++ /dev/null
@@ -1,552 +0,0 @@
-/*
- * IBM Summit-Specific Code
- *
- * Written By: Matthew Dobson, IBM Corporation
- *
- * Copyright (c) 2003 IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <colpatch@us.ibm.com>
- *
- */
-
-#define pr_fmt(fmt) "summit: %s: " fmt, __func__
-
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <asm/io.h>
-#include <asm/bios_ebda.h>
-
-/*
- * APIC driver for the IBM "Summit" chipset.
- */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#include <asm/smp.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <asm/ipi.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/gfp.h>
-#include <linux/smp.h>
-
-static unsigned summit_get_apic_id(unsigned long x)
-{
- return (x >> 24) & 0xFF;
-}
-
-static inline void summit_send_IPI_mask(const struct cpumask *mask, int vector)
-{
- default_send_IPI_mask_sequence_logical(mask, vector);
-}
-
-static void summit_send_IPI_allbutself(int vector)
-{
- default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
-}
-
-static void summit_send_IPI_all(int vector)
-{
- summit_send_IPI_mask(cpu_online_mask, vector);
-}
-
-#include <asm/tsc.h>
-
-extern int use_cyclone;
-
-#ifdef CONFIG_X86_SUMMIT_NUMA
-static void setup_summit(void);
-#else
-static inline void setup_summit(void) {}
-#endif
-
-static int summit_mps_oem_check(struct mpc_table *mpc, char *oem,
- char *productid)
-{
- if (!strncmp(oem, "IBM ENSW", 8) &&
- (!strncmp(productid, "VIGIL SMP", 9)
- || !strncmp(productid, "EXA", 3)
- || !strncmp(productid, "RUTHLESS SMP", 12))){
- mark_tsc_unstable("Summit based system");
- use_cyclone = 1; /*enable cyclone-timer*/
- setup_summit();
- return 1;
- }
- return 0;
-}
-
-/* Hook from generic ACPI tables.c */
-static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- if (!strncmp(oem_id, "IBM", 3) &&
- (!strncmp(oem_table_id, "SERVIGIL", 8)
- || !strncmp(oem_table_id, "EXA", 3))){
- mark_tsc_unstable("Summit based system");
- use_cyclone = 1; /*enable cyclone-timer*/
- setup_summit();
- return 1;
- }
- return 0;
-}
-
-struct rio_table_hdr {
- unsigned char version; /* Version number of this data structure */
- /* Version 3 adds chassis_num & WP_index */
- unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */
- unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */
-} __attribute__((packed));
-
-struct scal_detail {
- unsigned char node_id; /* Scalability Node ID */
- unsigned long CBAR; /* Address of 1MB register space */
- unsigned char port0node; /* Node ID port connected to: 0xFF=None */
- unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char port1node; /* Node ID port connected to: 0xFF = None */
- unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char port2node; /* Node ID port connected to: 0xFF = None */
- unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */
-} __attribute__((packed));
-
-struct rio_detail {
- unsigned char node_id; /* RIO Node ID */
- unsigned long BBAR; /* Address of 1MB register space */
- unsigned char type; /* Type of device */
- unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/
- /* For CYC: Node ID of Twister that owns this CYC */
- unsigned char port0node; /* Node ID port connected to: 0xFF=None */
- unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char port1node; /* Node ID port connected to: 0xFF=None */
- unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */
- /* For CYC: 0 */
- unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */
- /* = 0 : the XAPIC is not used, ie:*/
- /* ints fwded to another XAPIC */
- /* Bits1:7 Reserved */
- /* For CYC: Bits0:7 Reserved */
- unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */
- /* lower slot numbers/PCI bus numbers */
- /* For CYC: No meaning */
- unsigned char chassis_num; /* 1 based Chassis number */
- /* For LookOut WPEGs this field indicates the */
- /* Expansion Chassis #, enumerated from Boot */
- /* Node WPEG external port, then Boot Node CYC */
- /* external port, then Next Vigil chassis WPEG */
- /* external port, etc. */
- /* Shared Lookouts have only 1 chassis number (the */
- /* first one assigned) */
-} __attribute__((packed));
-
-
-typedef enum {
- CompatTwister = 0, /* Compatibility Twister */
- AltTwister = 1, /* Alternate Twister of internal 8-way */
- CompatCyclone = 2, /* Compatibility Cyclone */
- AltCyclone = 3, /* Alternate Cyclone of internal 8-way */
- CompatWPEG = 4, /* Compatibility WPEG */
- AltWPEG = 5, /* Second Planar WPEG */
- LookOutAWPEG = 6, /* LookOut WPEG */
- LookOutBWPEG = 7, /* LookOut WPEG */
-} node_type;
-
-static inline int is_WPEG(struct rio_detail *rio){
- return (rio->type == CompatWPEG || rio->type == AltWPEG ||
- rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
-}
-
-#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-
-static const struct cpumask *summit_target_cpus(void)
-{
- /* CPU_MASK_ALL (0xff) has undefined behaviour with
- * dest_LowestPrio mode logical clustered apic interrupt routing
- * Just start on cpu 0. IRQ balancing will spread load
- */
- return cpumask_of(0);
-}
-
-static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid)
-{
- return 0;
-}
-
-/* we don't use the phys_cpu_present_map to indicate apicid presence */
-static unsigned long summit_check_apicid_present(int bit)
-{
- return 1;
-}
-
-static int summit_early_logical_apicid(int cpu)
-{
- int count = 0;
- u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu);
- u8 my_cluster = APIC_CLUSTER(my_id);
-#ifdef CONFIG_SMP
- u8 lid;
- int i;
-
- /* Create logical APIC IDs by counting CPUs already in cluster. */
- for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
- lid = early_per_cpu(x86_cpu_to_logical_apicid, i);
- if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
- ++count;
- }
-#endif
- /* We only have a 4 wide bitmap in cluster mode. If a deranged
- * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
- BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
- return my_cluster | (1UL << count);
-}
-
-static void summit_init_apic_ldr(void)
-{
- int cpu = smp_processor_id();
- unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
- unsigned long val;
-
- apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- val |= SET_APIC_LOGICAL_ID(id);
- apic_write(APIC_LDR, val);
-}
-
-static int summit_apic_id_registered(void)
-{
- return 1;
-}
-
-static void summit_setup_apic_routing(void)
-{
- pr_info("Enabling APIC mode: Summit. Using %d I/O APICs\n",
- nr_ioapics);
-}
-
-static int summit_cpu_present_to_apicid(int mps_cpu)
-{
- if (mps_cpu < nr_cpu_ids)
- return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
- else
- return BAD_APICID;
-}
-
-static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap)
-{
- /* For clustered we don't have a good way to do this yet - hack */
- physids_promote(0x0FL, retmap);
-}
-
-static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap)
-{
- physid_set_mask_of_physid(0, retmap);
-}
-
-static int summit_check_phys_apicid_present(int physical_apicid)
-{
- return 1;
-}
-
-static inline int
-summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
-{
- unsigned int round = 0;
- unsigned int cpu, apicid = 0;
-
- /*
- * The cpus in the mask must all be on the apic cluster.
- */
- for_each_cpu_and(cpu, cpumask, cpu_online_mask) {
- int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-
- if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
- pr_err("Not a valid mask!\n");
- return -EINVAL;
- }
- apicid |= new_apicid;
- round++;
- }
- if (!round)
- return -EINVAL;
- *dest_id = apicid;
- return 0;
-}
-
-static int
-summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
- const struct cpumask *andmask,
- unsigned int *apicid)
-{
- cpumask_var_t cpumask;
- *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
-
- if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
- return 0;
-
- cpumask_and(cpumask, inmask, andmask);
- summit_cpu_mask_to_apicid(cpumask, apicid);
-
- free_cpumask_var(cpumask);
-
- return 0;
-}
-
-/*
- * cpuid returns the value latched in the HW at reset, not the APIC ID
- * register's value. For any box whose BIOS changes APIC IDs, like
- * clustered APIC systems, we must use hard_smp_processor_id.
- *
- * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
- */
-static int summit_phys_pkg_id(int cpuid_apic, int index_msb)
-{
- return hard_smp_processor_id() >> index_msb;
-}
-
-static int probe_summit(void)
-{
- /* probed later in mptable/ACPI hooks */
- return 0;
-}
-
-#ifdef CONFIG_X86_SUMMIT_NUMA
-static struct rio_table_hdr *rio_table_hdr;
-static struct scal_detail *scal_devs[MAX_NUMNODES];
-static struct rio_detail *rio_devs[MAX_NUMNODES*4];
-
-#ifndef CONFIG_X86_NUMAQ
-static int mp_bus_id_to_node[MAX_MP_BUSSES];
-#endif
-
-static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
-{
- int twister = 0, node = 0;
- int i, bus, num_buses;
-
- for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
- if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) {
- twister = rio_devs[i]->owner_id;
- break;
- }
- }
- if (i == rio_table_hdr->num_rio_dev) {
- pr_err("Couldn't find owner Cyclone for Winnipeg!\n");
- return last_bus;
- }
-
- for (i = 0; i < rio_table_hdr->num_scal_dev; i++) {
- if (scal_devs[i]->node_id == twister) {
- node = scal_devs[i]->node_id;
- break;
- }
- }
- if (i == rio_table_hdr->num_scal_dev) {
- pr_err("Couldn't find owner Twister for Cyclone!\n");
- return last_bus;
- }
-
- switch (rio_devs[wpeg_num]->type) {
- case CompatWPEG:
- /*
- * The Compatibility Winnipeg controls the 2 legacy buses,
- * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
- * a PCI-PCI bridge card is used in either slot: total 5 buses.
- */
- num_buses = 5;
- break;
- case AltWPEG:
- /*
- * The Alternate Winnipeg controls the 2 133MHz buses [1 slot
- * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
- * the "extra" buses for each of those slots: total 7 buses.
- */
- num_buses = 7;
- break;
- case LookOutAWPEG:
- case LookOutBWPEG:
- /*
- * A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
- * & the "extra" buses for each of those slots: total 9 buses.
- */
- num_buses = 9;
- break;
- default:
- pr_info("Unsupported Winnipeg type!\n");
- return last_bus;
- }
-
- for (bus = last_bus; bus < last_bus + num_buses; bus++)
- mp_bus_id_to_node[bus] = node;
- return bus;
-}
-
-static int build_detail_arrays(void)
-{
- unsigned long ptr;
- int i, scal_detail_size, rio_detail_size;
-
- if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
- pr_warn("MAX_NUMNODES too low! Defined as %d, but system has %d nodes\n",
- MAX_NUMNODES, rio_table_hdr->num_scal_dev);
- return 0;
- }
-
- switch (rio_table_hdr->version) {
- default:
- pr_warn("Invalid Rio Grande Table Version: %d\n",
- rio_table_hdr->version);
- return 0;
- case 2:
- scal_detail_size = 11;
- rio_detail_size = 13;
- break;
- case 3:
- scal_detail_size = 12;
- rio_detail_size = 15;
- break;
- }
-
- ptr = (unsigned long)rio_table_hdr + 3;
- for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
- scal_devs[i] = (struct scal_detail *)ptr;
-
- for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
- rio_devs[i] = (struct rio_detail *)ptr;
-
- return 1;
-}
-
-void setup_summit(void)
-{
- unsigned long ptr;
- unsigned short offset;
- int i, next_wpeg, next_bus = 0;
-
- /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
- ptr = get_bios_ebda();
- ptr = (unsigned long)phys_to_virt(ptr);
-
- rio_table_hdr = NULL;
- offset = 0x180;
- while (offset) {
- /* The block id is stored in the 2nd word */
- if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) {
- /* set the pointer past the offset & block id */
- rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
- break;
- }
- /* The next offset is stored in the 1st word. 0 means no more */
- offset = *((unsigned short *)(ptr + offset));
- }
- if (!rio_table_hdr) {
- pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n");
- return;
- }
-
- if (!build_detail_arrays())
- return;
-
- /* The first Winnipeg we're looking for has an index of 0 */
- next_wpeg = 0;
- do {
- for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
- if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) {
- /* It's the Winnipeg we're looking for! */
- next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
- next_wpeg++;
- break;
- }
- }
- /*
- * If we go through all Rio devices and don't find one with
- * the next index, it means we've found all the Winnipegs,
- * and thus all the PCI buses.
- */
- if (i == rio_table_hdr->num_rio_dev)
- next_wpeg = 0;
- } while (next_wpeg != 0);
-}
-#endif
-
-static struct apic apic_summit = {
-
- .name = "summit",
- .probe = probe_summit,
- .acpi_madt_oem_check = summit_acpi_madt_oem_check,
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = summit_apic_id_registered,
-
- .irq_delivery_mode = dest_LowestPrio,
- /* logical delivery broadcast to all CPUs: */
- .irq_dest_mode = 1,
-
- .target_cpus = summit_target_cpus,
- .disable_esr = 1,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = summit_check_apicid_used,
- .check_apicid_present = summit_check_apicid_present,
-
- .vector_allocation_domain = flat_vector_allocation_domain,
- .init_apic_ldr = summit_init_apic_ldr,
-
- .ioapic_phys_id_map = summit_ioapic_phys_id_map,
- .setup_apic_routing = summit_setup_apic_routing,
- .multi_timer_check = NULL,
- .cpu_present_to_apicid = summit_cpu_present_to_apicid,
- .apicid_to_cpu_present = summit_apicid_to_cpu_present,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = summit_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = summit_phys_pkg_id,
- .mps_oem_check = summit_mps_oem_check,
-
- .get_apic_id = summit_get_apic_id,
- .set_apic_id = NULL,
- .apic_id_mask = 0xFF << 24,
-
- .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and,
-
- .send_IPI_mask = summit_send_IPI_mask,
- .send_IPI_mask_allbutself = NULL,
- .send_IPI_allbutself = summit_send_IPI_allbutself,
- .send_IPI_all = summit_send_IPI_all,
- .send_IPI_self = default_send_IPI_self,
-
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-
- .wait_for_init_deassert = default_wait_for_init_deassert,
-
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = default_inquire_remote_apic,
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
-
- .x86_32_early_logical_apicid = summit_early_logical_apicid,
-};
-
-apic_driver(apic_summit);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
new file mode 100644
index 000000000000..bddc54465399
--- /dev/null
+++ b/arch/x86/kernel/apic/vector.c
@@ -0,0 +1,1387 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Local APIC related interfaces to support IOAPIC, MSI, etc.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ * Moved from arch/x86/kernel/apic/io_apic.c.
+ * Jiang Liu <jiang.liu@linux.intel.com>
+ * Enable support of hierarchical irqdomains
+ */
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/compiler.h>
+#include <linux/slab.h>
+#include <asm/irqdomain.h>
+#include <asm/hw_irq.h>
+#include <asm/traps.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
+#include <asm/desc.h>
+#include <asm/irq_remapping.h>
+
+#include <asm/trace/irq_vectors.h>
+
+struct apic_chip_data {
+ struct irq_cfg hw_irq_cfg;
+ unsigned int vector;
+ unsigned int prev_vector;
+ unsigned int cpu;
+ unsigned int prev_cpu;
+ unsigned int irq;
+ struct hlist_node clist;
+ unsigned int move_in_progress : 1,
+ is_managed : 1,
+ can_reserve : 1,
+ has_reserved : 1;
+};
+
+struct irq_domain *x86_vector_domain;
+EXPORT_SYMBOL_GPL(x86_vector_domain);
+static DEFINE_RAW_SPINLOCK(vector_lock);
+static cpumask_var_t vector_searchmask;
+static struct irq_chip lapic_controller;
+static struct irq_matrix *vector_matrix;
+#ifdef CONFIG_SMP
+
+static void vector_cleanup_callback(struct timer_list *tmr);
+
+struct vector_cleanup {
+ struct hlist_head head;
+ struct timer_list timer;
+};
+
+static DEFINE_PER_CPU(struct vector_cleanup, vector_cleanup) = {
+ .head = HLIST_HEAD_INIT,
+ .timer = __TIMER_INITIALIZER(vector_cleanup_callback, TIMER_PINNED),
+};
+#endif
+
+void lock_vector_lock(void)
+{
+ /* Used to the online set of cpus does not change
+ * during assign_irq_vector.
+ */
+ raw_spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+ raw_spin_unlock(&vector_lock);
+}
+
+void init_irq_alloc_info(struct irq_alloc_info *info,
+ const struct cpumask *mask)
+{
+ memset(info, 0, sizeof(*info));
+ info->mask = mask;
+}
+
+void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
+{
+ if (src)
+ *dst = *src;
+ else
+ memset(dst, 0, sizeof(*dst));
+}
+
+static struct apic_chip_data *apic_chip_data(struct irq_data *irqd)
+{
+ if (!irqd)
+ return NULL;
+
+ while (irqd->parent_data)
+ irqd = irqd->parent_data;
+
+ return irqd->chip_data;
+}
+
+struct irq_cfg *irqd_cfg(struct irq_data *irqd)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+
+ return apicd ? &apicd->hw_irq_cfg : NULL;
+}
+EXPORT_SYMBOL_GPL(irqd_cfg);
+
+struct irq_cfg *irq_cfg(unsigned int irq)
+{
+ return irqd_cfg(irq_get_irq_data(irq));
+}
+
+static struct apic_chip_data *alloc_apic_chip_data(int node)
+{
+ struct apic_chip_data *apicd;
+
+ apicd = kzalloc_node(sizeof(*apicd), GFP_KERNEL, node);
+ if (apicd)
+ INIT_HLIST_NODE(&apicd->clist);
+ return apicd;
+}
+
+static void free_apic_chip_data(struct apic_chip_data *apicd)
+{
+ kfree(apicd);
+}
+
+static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector,
+ unsigned int cpu)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+
+ lockdep_assert_held(&vector_lock);
+
+ apicd->hw_irq_cfg.vector = vector;
+ apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu);
+
+ apic_update_vector(cpu, vector, true);
+
+ irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
+ trace_vector_config(irqd->irq, vector, cpu, apicd->hw_irq_cfg.dest_apicid);
+}
+
+static void apic_free_vector(unsigned int cpu, unsigned int vector, bool managed)
+{
+ apic_update_vector(cpu, vector, false);
+ irq_matrix_free(vector_matrix, cpu, vector, managed);
+}
+
+static void chip_data_update(struct irq_data *irqd, unsigned int newvec, unsigned int newcpu)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ struct irq_desc *desc = irq_data_to_desc(irqd);
+ bool managed = irqd_affinity_is_managed(irqd);
+
+ lockdep_assert_held(&vector_lock);
+
+ trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
+ apicd->cpu);
+
+ /*
+ * If there is no vector associated or if the associated vector is
+ * the shutdown vector, which is associated to make PCI/MSI
+ * shutdown mode work, then there is nothing to release. Clear out
+ * prev_vector for this and the offlined target case.
+ */
+ apicd->prev_vector = 0;
+ if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR)
+ goto setnew;
+ /*
+ * If the target CPU of the previous vector is online, then mark
+ * the vector as move in progress and store it for cleanup when the
+ * first interrupt on the new vector arrives. If the target CPU is
+ * offline then the regular release mechanism via the cleanup
+ * vector is not possible and the vector can be immediately freed
+ * in the underlying matrix allocator.
+ */
+ if (cpu_online(apicd->cpu)) {
+ apicd->move_in_progress = true;
+ apicd->prev_vector = apicd->vector;
+ apicd->prev_cpu = apicd->cpu;
+ WARN_ON_ONCE(apicd->cpu == newcpu);
+ } else {
+ apic_free_vector(apicd->cpu, apicd->vector, managed);
+ }
+
+setnew:
+ apicd->vector = newvec;
+ apicd->cpu = newcpu;
+ BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
+ per_cpu(vector_irq, newcpu)[newvec] = desc;
+ apic_update_irq_cfg(irqd, newvec, newcpu);
+}
+
+static void vector_assign_managed_shutdown(struct irq_data *irqd)
+{
+ unsigned int cpu = cpumask_first(cpu_online_mask);
+
+ apic_update_irq_cfg(irqd, MANAGED_IRQ_SHUTDOWN_VECTOR, cpu);
+}
+
+static int reserve_managed_vector(struct irq_data *irqd)
+{
+ const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ unsigned long flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ apicd->is_managed = true;
+ ret = irq_matrix_reserve_managed(vector_matrix, affmsk);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ trace_vector_reserve_managed(irqd->irq, ret);
+ return ret;
+}
+
+static void reserve_irq_vector_locked(struct irq_data *irqd)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+
+ irq_matrix_reserve(vector_matrix);
+ apicd->can_reserve = true;
+ apicd->has_reserved = true;
+ irqd_set_can_reserve(irqd);
+ trace_vector_reserve(irqd->irq, 0);
+ vector_assign_managed_shutdown(irqd);
+}
+
+static int reserve_irq_vector(struct irq_data *irqd)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ reserve_irq_vector_locked(irqd);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ return 0;
+}
+
+static int
+assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ bool resvd = apicd->has_reserved;
+ unsigned int cpu = apicd->cpu;
+ int vector = apicd->vector;
+
+ lockdep_assert_held(&vector_lock);
+
+ /*
+ * If the current target CPU is online and in the new requested
+ * affinity mask, there is no point in moving the interrupt from
+ * one CPU to another.
+ */
+ if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest))
+ return 0;
+
+ /*
+ * Careful here. @apicd might either have move_in_progress set or
+ * be enqueued for cleanup. Assigning a new vector would either
+ * leave a stale vector on some CPU around or in case of a pending
+ * cleanup corrupt the hlist.
+ */
+ if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist))
+ return -EBUSY;
+
+ vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
+ trace_vector_alloc(irqd->irq, vector, resvd, vector);
+ if (vector < 0)
+ return vector;
+ chip_data_update(irqd, vector, cpu);
+
+ return 0;
+}
+
+static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest)
+{
+ unsigned long flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ cpumask_and(vector_searchmask, dest, cpu_online_mask);
+ ret = assign_vector_locked(irqd, vector_searchmask);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ return ret;
+}
+
+static int assign_irq_vector_any_locked(struct irq_data *irqd)
+{
+ /* Get the affinity mask - either irq_default_affinity or (user) set */
+ const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
+ int node = irq_data_get_node(irqd);
+
+ if (node != NUMA_NO_NODE) {
+ /* Try the intersection of @affmsk and node mask */
+ cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
+ if (!assign_vector_locked(irqd, vector_searchmask))
+ return 0;
+ }
+
+ /* Try the full affinity mask */
+ cpumask_and(vector_searchmask, affmsk, cpu_online_mask);
+ if (!assign_vector_locked(irqd, vector_searchmask))
+ return 0;
+
+ if (node != NUMA_NO_NODE) {
+ /* Try the node mask */
+ if (!assign_vector_locked(irqd, cpumask_of_node(node)))
+ return 0;
+ }
+
+ /* Try the full online mask */
+ return assign_vector_locked(irqd, cpu_online_mask);
+}
+
+static int
+assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
+{
+ if (irqd_affinity_is_managed(irqd))
+ return reserve_managed_vector(irqd);
+ if (info->mask)
+ return assign_irq_vector(irqd, info->mask);
+ /*
+ * Make only a global reservation with no guarantee. A real vector
+ * is associated at activation time.
+ */
+ return reserve_irq_vector(irqd);
+}
+
+static int
+assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
+{
+ const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ int vector, cpu;
+
+ cpumask_and(vector_searchmask, dest, affmsk);
+
+ /* set_affinity might call here for nothing */
+ if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask))
+ return 0;
+ vector = irq_matrix_alloc_managed(vector_matrix, vector_searchmask,
+ &cpu);
+ trace_vector_alloc_managed(irqd->irq, vector, vector);
+ if (vector < 0)
+ return vector;
+ chip_data_update(irqd, vector, cpu);
+
+ return 0;
+}
+
+static void clear_irq_vector(struct irq_data *irqd)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ bool managed = irqd_affinity_is_managed(irqd);
+ unsigned int vector = apicd->vector;
+
+ lockdep_assert_held(&vector_lock);
+
+ if (!vector)
+ return;
+
+ trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
+ apicd->prev_cpu);
+
+ per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
+ apic_free_vector(apicd->cpu, vector, managed);
+ apicd->vector = 0;
+
+ /* Clean up move in progress */
+ vector = apicd->prev_vector;
+ if (!vector)
+ return;
+
+ per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
+ apic_free_vector(apicd->prev_cpu, vector, managed);
+ apicd->prev_vector = 0;
+ apicd->move_in_progress = 0;
+ hlist_del_init(&apicd->clist);
+}
+
+static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ unsigned long flags;
+
+ trace_vector_deactivate(irqd->irq, apicd->is_managed,
+ apicd->can_reserve, false);
+
+ /* Regular fixed assigned interrupt */
+ if (!apicd->is_managed && !apicd->can_reserve)
+ return;
+ /* If the interrupt has a global reservation, nothing to do */
+ if (apicd->has_reserved)
+ return;
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ clear_irq_vector(irqd);
+ if (apicd->can_reserve)
+ reserve_irq_vector_locked(irqd);
+ else
+ vector_assign_managed_shutdown(irqd);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+static int activate_reserved(struct irq_data *irqd)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ int ret;
+
+ ret = assign_irq_vector_any_locked(irqd);
+ if (!ret) {
+ apicd->has_reserved = false;
+ /*
+ * Core might have disabled reservation mode after
+ * allocating the irq descriptor. Ideally this should
+ * happen before allocation time, but that would require
+ * completely convoluted ways of transporting that
+ * information.
+ */
+ if (!irqd_can_reserve(irqd))
+ apicd->can_reserve = false;
+ }
+
+ /*
+ * Check to ensure that the effective affinity mask is a subset
+ * the user supplied affinity mask, and warn the user if it is not
+ */
+ if (!cpumask_subset(irq_data_get_effective_affinity_mask(irqd),
+ irq_data_get_affinity_mask(irqd))) {
+ pr_warn("irq %u: Affinity broken due to vector space exhaustion.\n",
+ irqd->irq);
+ }
+
+ return ret;
+}
+
+static int activate_managed(struct irq_data *irqd)
+{
+ const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
+ int ret;
+
+ cpumask_and(vector_searchmask, dest, cpu_online_mask);
+ if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) {
+ /* Something in the core code broke! Survive gracefully */
+ pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq);
+ return -EINVAL;
+ }
+
+ ret = assign_managed_vector(irqd, vector_searchmask);
+ /*
+ * This should not happen. The vector reservation got buggered. Handle
+ * it gracefully.
+ */
+ if (WARN_ON_ONCE(ret < 0)) {
+ pr_err("Managed startup irq %u, no vector available\n",
+ irqd->irq);
+ }
+ return ret;
+}
+
+static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
+ bool reserve)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ unsigned long flags;
+ int ret = 0;
+
+ trace_vector_activate(irqd->irq, apicd->is_managed,
+ apicd->can_reserve, reserve);
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ if (!apicd->can_reserve && !apicd->is_managed)
+ assign_irq_vector_any_locked(irqd);
+ else if (reserve || irqd_is_managed_and_shutdown(irqd))
+ vector_assign_managed_shutdown(irqd);
+ else if (apicd->is_managed)
+ ret = activate_managed(irqd);
+ else if (apicd->has_reserved)
+ ret = activate_reserved(irqd);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ return ret;
+}
+
+static void vector_free_reserved_and_managed(struct irq_data *irqd)
+{
+ const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+
+ trace_vector_teardown(irqd->irq, apicd->is_managed,
+ apicd->has_reserved);
+
+ if (apicd->has_reserved)
+ irq_matrix_remove_reserved(vector_matrix);
+ if (apicd->is_managed)
+ irq_matrix_remove_managed(vector_matrix, dest);
+}
+
+static void x86_vector_free_irqs(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
+{
+ struct apic_chip_data *apicd;
+ struct irq_data *irqd;
+ unsigned long flags;
+ int i;
+
+ for (i = 0; i < nr_irqs; i++) {
+ irqd = irq_domain_get_irq_data(x86_vector_domain, virq + i);
+ if (irqd && irqd->chip_data) {
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ clear_irq_vector(irqd);
+ vector_free_reserved_and_managed(irqd);
+ apicd = irqd->chip_data;
+ irq_domain_reset_irq_data(irqd);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ free_apic_chip_data(apicd);
+ }
+ }
+}
+
+static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
+ struct apic_chip_data *apicd)
+{
+ unsigned long flags;
+ bool realloc = false;
+
+ apicd->vector = ISA_IRQ_VECTOR(virq);
+ apicd->cpu = 0;
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ /*
+ * If the interrupt is activated, then it must stay at this vector
+ * position. That's usually the timer interrupt (0).
+ */
+ if (irqd_is_activated(irqd)) {
+ trace_vector_setup(virq, true, 0);
+ apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
+ } else {
+ /* Release the vector */
+ apicd->can_reserve = true;
+ irqd_set_can_reserve(irqd);
+ clear_irq_vector(irqd);
+ realloc = true;
+ }
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ return realloc;
+}
+
+static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct irq_alloc_info *info = arg;
+ struct apic_chip_data *apicd;
+ struct irq_data *irqd;
+ int i, err, node;
+
+ if (apic_is_disabled)
+ return -ENXIO;
+
+ /*
+ * Catch any attempt to touch the cascade interrupt on a PIC
+ * equipped system.
+ */
+ if (WARN_ON_ONCE(info->flags & X86_IRQ_ALLOC_LEGACY &&
+ virq == PIC_CASCADE_IR))
+ return -EINVAL;
+
+ for (i = 0; i < nr_irqs; i++) {
+ irqd = irq_domain_get_irq_data(domain, virq + i);
+ BUG_ON(!irqd);
+ node = irq_data_get_node(irqd);
+ WARN_ON_ONCE(irqd->chip_data);
+ apicd = alloc_apic_chip_data(node);
+ if (!apicd) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ apicd->irq = virq + i;
+ irqd->chip = &lapic_controller;
+ irqd->chip_data = apicd;
+ irqd->hwirq = virq + i;
+ irqd_set_single_target(irqd);
+ /*
+ * Prevent that any of these interrupts is invoked in
+ * non interrupt context via e.g. generic_handle_irq()
+ * as that can corrupt the affinity move state.
+ */
+ irqd_set_handle_enforce_irqctx(irqd);
+
+ /* Don't invoke affinity setter on deactivated interrupts */
+ irqd_set_affinity_on_activate(irqd);
+
+ /*
+ * Legacy vectors are already assigned when the IOAPIC
+ * takes them over. They stay on the same vector. This is
+ * required for check_timer() to work correctly as it might
+ * switch back to legacy mode. Only update the hardware
+ * config.
+ */
+ if (info->flags & X86_IRQ_ALLOC_LEGACY) {
+ if (!vector_configure_legacy(virq + i, irqd, apicd))
+ continue;
+ }
+
+ err = assign_irq_vector_policy(irqd, info);
+ trace_vector_setup(virq + i, false, err);
+ if (err) {
+ irqd->chip_data = NULL;
+ free_apic_chip_data(apicd);
+ goto error;
+ }
+ }
+
+ return 0;
+
+error:
+ x86_vector_free_irqs(domain, virq, i);
+ return err;
+}
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
+ struct irq_data *irqd, int ind)
+{
+ struct apic_chip_data apicd;
+ unsigned long flags;
+ int irq;
+
+ if (!irqd) {
+ irq_matrix_debug_show(m, vector_matrix, ind);
+ return;
+ }
+
+ irq = irqd->irq;
+ if (irq < nr_legacy_irqs() && !test_bit(irq, &io_apic_irqs)) {
+ seq_printf(m, "%*sVector: %5d\n", ind, "", ISA_IRQ_VECTOR(irq));
+ seq_printf(m, "%*sTarget: Legacy PIC all CPUs\n", ind, "");
+ return;
+ }
+
+ if (!irqd->chip_data) {
+ seq_printf(m, "%*sVector: Not assigned\n", ind, "");
+ return;
+ }
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ memcpy(&apicd, irqd->chip_data, sizeof(apicd));
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+ seq_printf(m, "%*sVector: %5u\n", ind, "", apicd.vector);
+ seq_printf(m, "%*sTarget: %5u\n", ind, "", apicd.cpu);
+ if (apicd.prev_vector) {
+ seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", apicd.prev_vector);
+ seq_printf(m, "%*sPrevious target: %5u\n", ind, "", apicd.prev_cpu);
+ }
+ seq_printf(m, "%*smove_in_progress: %u\n", ind, "", apicd.move_in_progress ? 1 : 0);
+ seq_printf(m, "%*sis_managed: %u\n", ind, "", apicd.is_managed ? 1 : 0);
+ seq_printf(m, "%*scan_reserve: %u\n", ind, "", apicd.can_reserve ? 1 : 0);
+ seq_printf(m, "%*shas_reserved: %u\n", ind, "", apicd.has_reserved ? 1 : 0);
+ seq_printf(m, "%*scleanup_pending: %u\n", ind, "", !hlist_unhashed(&apicd.clist));
+}
+#endif
+
+int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec)
+{
+ if (fwspec->param_count != 1)
+ return 0;
+
+ if (is_fwnode_irqchip(fwspec->fwnode)) {
+ const char *fwname = fwnode_get_name(fwspec->fwnode);
+ return fwname && !strncmp(fwname, "IO-APIC-", 8) &&
+ simple_strtol(fwname+8, NULL, 10) == fwspec->param[0];
+ }
+ return to_of_node(fwspec->fwnode) &&
+ of_device_is_compatible(to_of_node(fwspec->fwnode),
+ "intel,ce4100-ioapic");
+}
+
+int x86_fwspec_is_hpet(struct irq_fwspec *fwspec)
+{
+ if (fwspec->param_count != 1)
+ return 0;
+
+ if (is_fwnode_irqchip(fwspec->fwnode)) {
+ const char *fwname = fwnode_get_name(fwspec->fwnode);
+ return fwname && !strncmp(fwname, "HPET-MSI-", 9) &&
+ simple_strtol(fwname+9, NULL, 10) == fwspec->param[0];
+ }
+ return 0;
+}
+
+static int x86_vector_select(struct irq_domain *d, struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
+{
+ /*
+ * HPET and I/OAPIC cannot be parented in the vector domain
+ * if IRQ remapping is enabled. APIC IDs above 15 bits are
+ * only permitted if IRQ remapping is enabled, so check that.
+ */
+ if (apic_id_valid(32768))
+ return 0;
+
+ return x86_fwspec_is_ioapic(fwspec) || x86_fwspec_is_hpet(fwspec);
+}
+
+static const struct irq_domain_ops x86_vector_domain_ops = {
+ .select = x86_vector_select,
+ .alloc = x86_vector_alloc_irqs,
+ .free = x86_vector_free_irqs,
+ .activate = x86_vector_activate,
+ .deactivate = x86_vector_deactivate,
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+ .debug_show = x86_vector_debug_show,
+#endif
+};
+
+int __init arch_probe_nr_irqs(void)
+{
+ int nr;
+
+ if (irq_get_nr_irqs() > NR_VECTORS * nr_cpu_ids)
+ irq_set_nr_irqs(NR_VECTORS * nr_cpu_ids);
+
+ nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
+#if defined(CONFIG_PCI_MSI)
+ /*
+ * for MSI and HT dyn irq
+ */
+ if (gsi_top <= NR_IRQS_LEGACY)
+ nr += 8 * nr_cpu_ids;
+ else
+ nr += gsi_top * 16;
+#endif
+ if (nr < irq_get_nr_irqs())
+ irq_set_nr_irqs(nr);
+
+ /*
+ * We don't know if PIC is present at this point so we need to do
+ * probe() to get the right number of legacy IRQs.
+ */
+ return legacy_pic->probe();
+}
+
+void lapic_assign_legacy_vector(unsigned int irq, bool replace)
+{
+ /*
+ * Use assign system here so it won't get accounted as allocated
+ * and movable in the cpu hotplug check and it prevents managed
+ * irq reservation from touching it.
+ */
+ irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
+}
+
+void __init lapic_update_legacy_vectors(void)
+{
+ unsigned int i;
+
+ if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0)
+ return;
+
+ /*
+ * If the IO/APIC is disabled via config, kernel command line or
+ * lack of enumeration then all legacy interrupts are routed
+ * through the PIC. Make sure that they are marked as legacy
+ * vectors. PIC_CASCADE_IRQ has already been marked in
+ * lapic_assign_system_vectors().
+ */
+ for (i = 0; i < nr_legacy_irqs(); i++) {
+ if (i != PIC_CASCADE_IR)
+ lapic_assign_legacy_vector(i, true);
+ }
+}
+
+void __init lapic_assign_system_vectors(void)
+{
+ unsigned int i, vector;
+
+ for_each_set_bit(vector, system_vectors, NR_VECTORS)
+ irq_matrix_assign_system(vector_matrix, vector, false);
+
+ if (nr_legacy_irqs() > 1)
+ lapic_assign_legacy_vector(PIC_CASCADE_IR, false);
+
+ /* System vectors are reserved, online it */
+ irq_matrix_online(vector_matrix);
+
+ /* Mark the preallocated legacy interrupts */
+ for (i = 0; i < nr_legacy_irqs(); i++) {
+ /*
+ * Don't touch the cascade interrupt. It's unusable
+ * on PIC equipped machines. See the large comment
+ * in the IO/APIC code.
+ */
+ if (i != PIC_CASCADE_IR)
+ irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i));
+ }
+}
+
+int __init arch_early_irq_init(void)
+{
+ struct fwnode_handle *fn;
+
+ fn = irq_domain_alloc_named_fwnode("VECTOR");
+ BUG_ON(!fn);
+ x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,
+ NULL);
+ BUG_ON(x86_vector_domain == NULL);
+ irq_set_default_domain(x86_vector_domain);
+
+ BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
+
+ /*
+ * Allocate the vector matrix allocator data structure and limit the
+ * search area.
+ */
+ vector_matrix = irq_alloc_matrix(NR_VECTORS, FIRST_EXTERNAL_VECTOR,
+ FIRST_SYSTEM_VECTOR);
+ BUG_ON(!vector_matrix);
+
+ return arch_early_ioapic_init();
+}
+
+#ifdef CONFIG_SMP
+
+static struct irq_desc *__setup_vector_irq(int vector)
+{
+ int isairq = vector - ISA_IRQ_VECTOR(0);
+
+ /* Check whether the irq is in the legacy space */
+ if (isairq < 0 || isairq >= nr_legacy_irqs())
+ return VECTOR_UNUSED;
+ /* Check whether the irq is handled by the IOAPIC */
+ if (test_bit(isairq, &io_apic_irqs))
+ return VECTOR_UNUSED;
+ return irq_to_desc(isairq);
+}
+
+/* Online the local APIC infrastructure and initialize the vectors */
+void lapic_online(void)
+{
+ unsigned int vector;
+
+ lockdep_assert_held(&vector_lock);
+
+ /* Online the vector matrix array for this CPU */
+ irq_matrix_online(vector_matrix);
+
+ /*
+ * The interrupt affinity logic never targets interrupts to offline
+ * CPUs. The exception are the legacy PIC interrupts. In general
+ * they are only targeted to CPU0, but depending on the platform
+ * they can be distributed to any online CPU in hardware. The
+ * kernel has no influence on that. So all active legacy vectors
+ * must be installed on all CPUs. All non legacy interrupts can be
+ * cleared.
+ */
+ for (vector = 0; vector < NR_VECTORS; vector++)
+ this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
+}
+
+static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr);
+
+void lapic_offline(void)
+{
+ struct vector_cleanup *cl = this_cpu_ptr(&vector_cleanup);
+
+ lock_vector_lock();
+
+ /* In case the vector cleanup timer has not expired */
+ __vector_cleanup(cl, false);
+
+ irq_matrix_offline(vector_matrix);
+ WARN_ON_ONCE(timer_delete_sync_try(&cl->timer) < 0);
+ WARN_ON_ONCE(!hlist_empty(&cl->head));
+
+ unlock_vector_lock();
+}
+
+static int apic_set_affinity(struct irq_data *irqd,
+ const struct cpumask *dest, bool force)
+{
+ int err;
+
+ if (WARN_ON_ONCE(!irqd_is_activated(irqd)))
+ return -EIO;
+
+ raw_spin_lock(&vector_lock);
+ cpumask_and(vector_searchmask, dest, cpu_online_mask);
+ if (irqd_affinity_is_managed(irqd))
+ err = assign_managed_vector(irqd, vector_searchmask);
+ else
+ err = assign_vector_locked(irqd, vector_searchmask);
+ raw_spin_unlock(&vector_lock);
+ return err ? err : IRQ_SET_MASK_OK;
+}
+
+static void free_moved_vector(struct apic_chip_data *apicd)
+{
+ unsigned int vector = apicd->prev_vector;
+ unsigned int cpu = apicd->prev_cpu;
+ bool managed = apicd->is_managed;
+
+ /*
+ * Managed interrupts are usually not migrated away
+ * from an online CPU, but CPU isolation 'managed_irq'
+ * can make that happen.
+ * 1) Activation does not take the isolation into account
+ * to keep the code simple
+ * 2) Migration away from an isolated CPU can happen when
+ * a non-isolated CPU which is in the calculated
+ * affinity mask comes online.
+ */
+ trace_vector_free_moved(apicd->irq, cpu, vector, managed);
+ apic_free_vector(cpu, vector, managed);
+ per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
+ hlist_del_init(&apicd->clist);
+ apicd->prev_vector = 0;
+ apicd->move_in_progress = 0;
+}
+
+/*
+ * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
+ */
+static void apic_force_complete_move(struct irq_data *irqd)
+{
+ unsigned int cpu = smp_processor_id();
+ struct apic_chip_data *apicd;
+ unsigned int vector;
+
+ guard(raw_spinlock)(&vector_lock);
+ apicd = apic_chip_data(irqd);
+ if (!apicd)
+ return;
+
+ /*
+ * If prev_vector is empty or the descriptor is neither currently
+ * nor previously on the outgoing CPU no action required.
+ */
+ vector = apicd->prev_vector;
+ if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
+ return;
+
+ /*
+ * This is tricky. If the cleanup of the old vector has not been
+ * done yet, then the following setaffinity call will fail with
+ * -EBUSY. This can leave the interrupt in a stale state.
+ *
+ * All CPUs are stuck in stop machine with interrupts disabled so
+ * calling __irq_complete_move() would be completely pointless.
+ *
+ * 1) The interrupt is in move_in_progress state. That means that we
+ * have not seen an interrupt since the io_apic was reprogrammed to
+ * the new vector.
+ *
+ * 2) The interrupt has fired on the new vector, but the cleanup IPIs
+ * have not been processed yet.
+ */
+ if (apicd->move_in_progress) {
+ /*
+ * In theory there is a race:
+ *
+ * set_ioapic(new_vector) <-- Interrupt is raised before update
+ * is effective, i.e. it's raised on
+ * the old vector.
+ *
+ * So if the target cpu cannot handle that interrupt before
+ * the old vector is cleaned up, we get a spurious interrupt
+ * and in the worst case the ioapic irq line becomes stale.
+ *
+ * But in case of cpu hotplug this should be a non issue
+ * because if the affinity update happens right before all
+ * cpus rendezvous in stop machine, there is no way that the
+ * interrupt can be blocked on the target cpu because all cpus
+ * loops first with interrupts enabled in stop machine, so the
+ * old vector is not yet cleaned up when the interrupt fires.
+ *
+ * So the only way to run into this issue is if the delivery
+ * of the interrupt on the apic/system bus would be delayed
+ * beyond the point where the target cpu disables interrupts
+ * in stop machine. I doubt that it can happen, but at least
+ * there is a theoretical chance. Virtualization might be
+ * able to expose this, but AFAICT the IOAPIC emulation is not
+ * as stupid as the real hardware.
+ *
+ * Anyway, there is nothing we can do about that at this point
+ * w/o refactoring the whole fixup_irq() business completely.
+ * We print at least the irq number and the old vector number,
+ * so we have the necessary information when a problem in that
+ * area arises.
+ */
+ pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
+ irqd->irq, vector);
+ }
+ free_moved_vector(apicd);
+}
+
+#else
+# define apic_set_affinity NULL
+# define apic_force_complete_move NULL
+#endif
+
+static int apic_retrigger_irq(struct irq_data *irqd)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ __apic_send_IPI(apicd->cpu, apicd->vector);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+ return 1;
+}
+
+void apic_ack_irq(struct irq_data *irqd)
+{
+ irq_move_irq(irqd);
+ apic_eoi();
+}
+
+void apic_ack_edge(struct irq_data *irqd)
+{
+ irq_complete_move(irqd_cfg(irqd));
+ apic_ack_irq(irqd);
+}
+
+static void x86_vector_msi_compose_msg(struct irq_data *data,
+ struct msi_msg *msg)
+{
+ __irq_msi_compose_msg(irqd_cfg(data), msg, false);
+}
+
+static struct irq_chip lapic_controller = {
+ .name = "APIC",
+ .irq_ack = apic_ack_edge,
+ .irq_set_affinity = apic_set_affinity,
+ .irq_compose_msi_msg = x86_vector_msi_compose_msg,
+ .irq_force_complete_move = apic_force_complete_move,
+ .irq_retrigger = apic_retrigger_irq,
+};
+
+#ifdef CONFIG_SMP
+
+static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr)
+{
+ struct apic_chip_data *apicd;
+ struct hlist_node *tmp;
+ bool rearm = false;
+
+ lockdep_assert_held(&vector_lock);
+
+ hlist_for_each_entry_safe(apicd, tmp, &cl->head, clist) {
+ unsigned int vector = apicd->prev_vector;
+
+ /*
+ * Paranoia: Check if the vector that needs to be cleaned
+ * up is registered at the APICs IRR. That's clearly a
+ * hardware issue if the vector arrived on the old target
+ * _after_ interrupts were disabled above. Keep @apicd
+ * on the list and schedule the timer again to give the CPU
+ * a chance to handle the pending interrupt.
+ *
+ * Do not check IRR when called from lapic_offline(), because
+ * fixup_irqs() was just called to scan IRR for set bits and
+ * forward them to new destination CPUs via IPIs.
+ */
+ if (check_irr && is_vector_pending(vector)) {
+ pr_warn_once("Moved interrupt pending in old target APIC %u\n", apicd->irq);
+ rearm = true;
+ continue;
+ }
+ free_moved_vector(apicd);
+ }
+
+ /*
+ * Must happen under vector_lock to make the timer_pending() check
+ * in __vector_schedule_cleanup() race free against the rearm here.
+ */
+ if (rearm)
+ mod_timer(&cl->timer, jiffies + 1);
+}
+
+static void vector_cleanup_callback(struct timer_list *tmr)
+{
+ struct vector_cleanup *cl = container_of(tmr, typeof(*cl), timer);
+
+ /* Prevent vectors vanishing under us */
+ raw_spin_lock_irq(&vector_lock);
+ __vector_cleanup(cl, true);
+ raw_spin_unlock_irq(&vector_lock);
+}
+
+static void __vector_schedule_cleanup(struct apic_chip_data *apicd)
+{
+ unsigned int cpu = apicd->prev_cpu;
+
+ raw_spin_lock(&vector_lock);
+ apicd->move_in_progress = 0;
+ if (cpu_online(cpu)) {
+ struct vector_cleanup *cl = per_cpu_ptr(&vector_cleanup, cpu);
+
+ hlist_add_head(&apicd->clist, &cl->head);
+
+ /*
+ * The lockless timer_pending() check is safe here. If it
+ * returns true, then the callback will observe this new
+ * apic data in the hlist as everything is serialized by
+ * vector lock.
+ *
+ * If it returns false then the timer is either not armed
+ * or the other CPU executes the callback, which again
+ * would be blocked on vector lock. Rearming it in the
+ * latter case makes it fire for nothing.
+ *
+ * This is also safe against the callback rearming the timer
+ * because that's serialized via vector lock too.
+ */
+ if (!timer_pending(&cl->timer)) {
+ cl->timer.expires = jiffies + 1;
+ add_timer_on(&cl->timer, cpu);
+ }
+ } else {
+ pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu);
+ free_moved_vector(apicd);
+ }
+ raw_spin_unlock(&vector_lock);
+}
+
+void vector_schedule_cleanup(struct irq_cfg *cfg)
+{
+ struct apic_chip_data *apicd;
+
+ apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
+ if (apicd->move_in_progress)
+ __vector_schedule_cleanup(apicd);
+}
+
+void irq_complete_move(struct irq_cfg *cfg)
+{
+ struct apic_chip_data *apicd;
+
+ apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
+ if (likely(!apicd->move_in_progress))
+ return;
+
+ /*
+ * If the interrupt arrived on the new target CPU, cleanup the
+ * vector on the old target CPU. A vector check is not required
+ * because an interrupt can never move from one vector to another
+ * on the same CPU.
+ */
+ if (apicd->cpu == smp_processor_id())
+ __vector_schedule_cleanup(apicd);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Note, this is not accurate accounting, but at least good enough to
+ * prevent that the actual interrupt move will run out of vectors.
+ */
+int lapic_can_unplug_cpu(void)
+{
+ unsigned int rsvd, avl, tomove, cpu = smp_processor_id();
+ int ret = 0;
+
+ raw_spin_lock(&vector_lock);
+ tomove = irq_matrix_allocated(vector_matrix);
+ avl = irq_matrix_available(vector_matrix, true);
+ if (avl < tomove) {
+ pr_warn("CPU %u has %u vectors, %u available. Cannot disable CPU\n",
+ cpu, tomove, avl);
+ ret = -ENOSPC;
+ goto out;
+ }
+ rsvd = irq_matrix_reserved(vector_matrix);
+ if (avl < rsvd) {
+ pr_warn("Reserved vectors %u > available %u. IRQ request may fail\n",
+ rsvd, avl);
+ }
+out:
+ raw_spin_unlock(&vector_lock);
+ return ret;
+}
+#endif /* HOTPLUG_CPU */
+#endif /* SMP */
+
+static void __init print_APIC_field(int base)
+{
+ int i;
+
+ printk(KERN_DEBUG);
+
+ for (i = 0; i < 8; i++)
+ pr_cont("%08x", apic_read(base + i*0x10));
+
+ pr_cont("\n");
+}
+
+static void __init print_local_APIC(void *dummy)
+{
+ unsigned int i, v, ver, maxlvt;
+ u64 icr;
+
+ pr_debug("printing local APIC contents on CPU#%d/%d:\n",
+ smp_processor_id(), read_apic_id());
+ v = apic_read(APIC_ID);
+ pr_info("... APIC ID: %08x (%01x)\n", v, read_apic_id());
+ v = apic_read(APIC_LVR);
+ pr_info("... APIC VERSION: %08x\n", v);
+ ver = GET_APIC_VERSION(v);
+ maxlvt = lapic_get_maxlvt();
+
+ v = apic_read(APIC_TASKPRI);
+ pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+ /* !82489DX */
+ if (APIC_INTEGRATED(ver)) {
+ if (!APIC_XAPIC(ver)) {
+ v = apic_read(APIC_ARBPRI);
+ pr_debug("... APIC ARBPRI: %08x (%02x)\n",
+ v, v & APIC_ARBPRI_MASK);
+ }
+ v = apic_read(APIC_PROCPRI);
+ pr_debug("... APIC PROCPRI: %08x\n", v);
+ }
+
+ /*
+ * Remote read supported only in the 82489DX and local APIC for
+ * Pentium processors.
+ */
+ if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+ v = apic_read(APIC_RRR);
+ pr_debug("... APIC RRR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_LDR);
+ pr_debug("... APIC LDR: %08x\n", v);
+ if (!x2apic_enabled()) {
+ v = apic_read(APIC_DFR);
+ pr_debug("... APIC DFR: %08x\n", v);
+ }
+ v = apic_read(APIC_SPIV);
+ pr_debug("... APIC SPIV: %08x\n", v);
+
+ pr_debug("... APIC ISR field:\n");
+ print_APIC_field(APIC_ISR);
+ pr_debug("... APIC TMR field:\n");
+ print_APIC_field(APIC_TMR);
+ pr_debug("... APIC IRR field:\n");
+ print_APIC_field(APIC_IRR);
+
+ /* !82489DX */
+ if (APIC_INTEGRATED(ver)) {
+ /* Due to the Pentium erratum 3AP. */
+ if (maxlvt > 3)
+ apic_write(APIC_ESR, 0);
+
+ v = apic_read(APIC_ESR);
+ pr_debug("... APIC ESR: %08x\n", v);
+ }
+
+ icr = apic_icr_read();
+ pr_debug("... APIC ICR: %08x\n", (u32)icr);
+ pr_debug("... APIC ICR2: %08x\n", (u32)(icr >> 32));
+
+ v = apic_read(APIC_LVTT);
+ pr_debug("... APIC LVTT: %08x\n", v);
+
+ if (maxlvt > 3) {
+ /* PC is LVT#4. */
+ v = apic_read(APIC_LVTPC);
+ pr_debug("... APIC LVTPC: %08x\n", v);
+ }
+ v = apic_read(APIC_LVT0);
+ pr_debug("... APIC LVT0: %08x\n", v);
+ v = apic_read(APIC_LVT1);
+ pr_debug("... APIC LVT1: %08x\n", v);
+
+ if (maxlvt > 2) {
+ /* ERR is LVT#3. */
+ v = apic_read(APIC_LVTERR);
+ pr_debug("... APIC LVTERR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_TMICT);
+ pr_debug("... APIC TMICT: %08x\n", v);
+ v = apic_read(APIC_TMCCT);
+ pr_debug("... APIC TMCCT: %08x\n", v);
+ v = apic_read(APIC_TDCR);
+ pr_debug("... APIC TDCR: %08x\n", v);
+
+ if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+ v = apic_read(APIC_EFEAT);
+ maxlvt = (v >> 16) & 0xff;
+ pr_debug("... APIC EFEAT: %08x\n", v);
+ v = apic_read(APIC_ECTRL);
+ pr_debug("... APIC ECTRL: %08x\n", v);
+ for (i = 0; i < maxlvt; i++) {
+ v = apic_read(APIC_EILVTn(i));
+ pr_debug("... APIC EILVT%d: %08x\n", i, v);
+ }
+ }
+ pr_cont("\n");
+}
+
+static void __init print_local_APICs(int maxcpu)
+{
+ int cpu;
+
+ if (!maxcpu)
+ return;
+
+ preempt_disable();
+ for_each_online_cpu(cpu) {
+ if (cpu >= maxcpu)
+ break;
+ smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+ }
+ preempt_enable();
+}
+
+static void __init print_PIC(void)
+{
+ unsigned int v;
+ unsigned long flags;
+
+ if (!nr_legacy_irqs())
+ return;
+
+ pr_debug("\nprinting PIC contents\n");
+
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
+
+ v = inb(0xa1) << 8 | inb(0x21);
+ pr_debug("... PIC IMR: %04x\n", v);
+
+ v = inb(0xa0) << 8 | inb(0x20);
+ pr_debug("... PIC IRR: %04x\n", v);
+
+ outb(0x0b, 0xa0);
+ outb(0x0b, 0x20);
+ v = inb(0xa0) << 8 | inb(0x20);
+ outb(0x0a, 0xa0);
+ outb(0x0a, 0x20);
+
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+
+ pr_debug("... PIC ISR: %04x\n", v);
+
+ v = inb(PIC_ELCR2) << 8 | inb(PIC_ELCR1);
+ pr_debug("... PIC ELCR: %04x\n", v);
+}
+
+static int show_lapic __initdata = 1;
+static __init int setup_show_lapic(char *arg)
+{
+ int num = -1;
+
+ if (strcmp(arg, "all") == 0) {
+ show_lapic = CONFIG_NR_CPUS;
+ } else {
+ get_option(&arg, &num);
+ if (num >= 0)
+ show_lapic = num;
+ }
+
+ return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+static int __init print_ICs(void)
+{
+ if (apic_verbosity == APIC_QUIET)
+ return 0;
+
+ print_PIC();
+
+ /* don't print out if apic is not there */
+ if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
+ return 0;
+
+ print_local_APICs(show_lapic);
+ print_IO_APICs();
+
+ return 0;
+}
+
+late_initcall(print_ICs);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index c88baa4ff0e5..7db83212effb 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -1,75 +1,72 @@
-#include <linux/threads.h>
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/cpuhotplug.h>
#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/dmar.h>
-#include <linux/cpu.h>
-
-#include <asm/smp.h>
-#include <asm/x2apic.h>
-
-static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
-static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster);
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include <asm/apic.h>
+
+#include "local.h"
+
+#define apic_cluster(apicid) ((apicid) >> 4)
+
+/*
+ * __x2apic_send_IPI_mask() possibly needs to read
+ * x86_cpu_to_logical_apicid for all online cpus in a sequential way.
+ * Using per cpu variable would cost one cache line per cpu.
+ */
+static u32 *x86_cpu_to_logical_apicid __read_mostly;
+
static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
+static DEFINE_PER_CPU_READ_MOSTLY(struct cpumask *, cluster_masks);
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
return x2apic_enabled();
}
-static inline u32 x2apic_cluster(int cpu)
+static void x2apic_send_IPI(int cpu, int vector)
{
- return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
+ u32 dest = x86_cpu_to_logical_apicid[cpu];
+
+ /* x2apic MSRs are special and need a special fence: */
+ weak_wrmsr_fence();
+ __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
}
static void
__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
{
- struct cpumask *cpus_in_cluster_ptr;
- struct cpumask *ipi_mask_ptr;
- unsigned int cpu, this_cpu;
+ unsigned int cpu, clustercpu;
+ struct cpumask *tmpmsk;
unsigned long flags;
u32 dest;
- x2apic_wrmsr_fence();
-
+ /* x2apic MSRs are special and need a special fence: */
+ weak_wrmsr_fence();
local_irq_save(flags);
- this_cpu = smp_processor_id();
+ tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
+ cpumask_copy(tmpmsk, mask);
+ /* If IPI should not be sent to self, clear current CPU */
+ if (apic_dest != APIC_DEST_ALLINC)
+ __cpumask_clear_cpu(smp_processor_id(), tmpmsk);
- /*
- * We are to modify mask, so we need an own copy
- * and be sure it's manipulated with irq off.
- */
- ipi_mask_ptr = __raw_get_cpu_var(ipi_mask);
- cpumask_copy(ipi_mask_ptr, mask);
+ /* Collapse cpus in a cluster so a single IPI per cluster is sent */
+ for_each_cpu(cpu, tmpmsk) {
+ struct cpumask *cmsk = per_cpu(cluster_masks, cpu);
- /*
- * The idea is to send one IPI per cluster.
- */
- for_each_cpu(cpu, ipi_mask_ptr) {
- unsigned long i;
-
- cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu);
dest = 0;
-
- /* Collect cpus in cluster. */
- for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) {
- if (apic_dest == APIC_DEST_ALLINC || i != this_cpu)
- dest |= per_cpu(x86_cpu_to_logical_apicid, i);
- }
+ for_each_cpu_and(clustercpu, tmpmsk, cmsk)
+ dest |= x86_cpu_to_logical_apicid[clustercpu];
if (!dest)
continue;
- __x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
- /*
- * Cluster sibling cpus should be discared now so
- * we would not send IPI them second time.
- */
- cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr);
+ __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
+ /* Remove cluster CPUs from tmpmask */
+ cpumask_andnot(tmpmsk, tmpmsk, cmsk);
}
local_irq_restore(flags);
@@ -86,211 +83,179 @@ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
}
-static void x2apic_send_IPI_allbutself(int vector)
+static u32 x2apic_calc_apicid(unsigned int cpu)
{
- __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
+ return x86_cpu_to_logical_apicid[cpu];
}
-static void x2apic_send_IPI_all(int vector)
+static void init_x2apic_ldr(void)
{
- __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
+ struct cpumask *cmsk = this_cpu_read(cluster_masks);
+
+ BUG_ON(!cmsk);
+
+ cpumask_set_cpu(smp_processor_id(), cmsk);
}
-static int
-x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask,
- unsigned int *apicid)
+/*
+ * As an optimisation during boot, set the cluster_mask for all present
+ * CPUs at once, to prevent each of them having to iterate over the others
+ * to find the existing cluster_mask.
+ */
+static void prefill_clustermask(struct cpumask *cmsk, unsigned int cpu, u32 cluster)
{
- u32 dest = 0;
- u16 cluster;
- int i;
+ int cpu_i;
- for_each_cpu_and(i, cpumask, andmask) {
- if (!cpumask_test_cpu(i, cpu_online_mask))
- continue;
- dest = per_cpu(x86_cpu_to_logical_apicid, i);
- cluster = x2apic_cluster(i);
- break;
- }
-
- if (!dest)
- return -EINVAL;
+ for_each_present_cpu(cpu_i) {
+ struct cpumask **cpu_cmsk = &per_cpu(cluster_masks, cpu_i);
+ u32 apicid = apic->cpu_present_to_apicid(cpu_i);
- for_each_cpu_and(i, cpumask, andmask) {
- if (!cpumask_test_cpu(i, cpu_online_mask))
+ if (apicid == BAD_APICID || cpu_i == cpu || apic_cluster(apicid) != cluster)
continue;
- if (cluster != x2apic_cluster(i))
- continue;
- dest |= per_cpu(x86_cpu_to_logical_apicid, i);
- }
- *apicid = dest;
+ if (WARN_ON_ONCE(*cpu_cmsk == cmsk))
+ continue;
- return 0;
+ BUG_ON(*cpu_cmsk);
+ *cpu_cmsk = cmsk;
+ }
}
-static void init_x2apic_ldr(void)
+static int alloc_clustermask(unsigned int cpu, u32 cluster, int node)
{
- unsigned int this_cpu = smp_processor_id();
- unsigned int cpu;
+ struct cpumask *cmsk = NULL;
+ unsigned int cpu_i;
- per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
+ /*
+ * At boot time, the CPU present mask is stable. The cluster mask is
+ * allocated for the first CPU in the cluster and propagated to all
+ * present siblings in the cluster. If the cluster mask is already set
+ * on entry to this function for a given CPU, there is nothing to do.
+ */
+ if (per_cpu(cluster_masks, cpu))
+ return 0;
- __cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
- for_each_online_cpu(cpu) {
- if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
- continue;
- __cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu));
- __cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu));
- }
-}
+ if (system_state < SYSTEM_RUNNING)
+ goto alloc;
- /*
- * At CPU state changes, update the x2apic cluster sibling info.
- */
-static int __cpuinit
-update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
- unsigned int this_cpu = (unsigned long)hcpu;
- unsigned int cpu;
- int err = 0;
-
- switch (action) {
- case CPU_UP_PREPARE:
- if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu),
- GFP_KERNEL)) {
- err = -ENOMEM;
- } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu),
- GFP_KERNEL)) {
- free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
- err = -ENOMEM;
- }
- break;
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- for_each_online_cpu(cpu) {
- if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
- continue;
- __cpu_clear(this_cpu, per_cpu(cpus_in_cluster, cpu));
- __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu));
+ /*
+ * On post boot hotplug for a CPU which was not present at boot time,
+ * iterate over all possible CPUs (even those which are not present
+ * any more) to find any existing cluster mask.
+ */
+ for_each_possible_cpu(cpu_i) {
+ u32 apicid = apic->cpu_present_to_apicid(cpu_i);
+
+ if (apicid != BAD_APICID && apic_cluster(apicid) == cluster) {
+ cmsk = per_cpu(cluster_masks, cpu_i);
+ /*
+ * If the cluster is already initialized, just store
+ * the mask and return. There's no need to propagate.
+ */
+ if (cmsk) {
+ per_cpu(cluster_masks, cpu) = cmsk;
+ return 0;
+ }
}
- free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
- free_cpumask_var(per_cpu(ipi_mask, this_cpu));
- break;
}
+ /*
+ * No CPU in the cluster has ever been initialized, so fall through to
+ * the boot time code which will also populate the cluster mask for any
+ * other CPU in the cluster which is (now) present.
+ */
+alloc:
+ cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node);
+ if (!cmsk)
+ return -ENOMEM;
+ per_cpu(cluster_masks, cpu) = cmsk;
+ prefill_clustermask(cmsk, cpu, cluster);
- return notifier_from_errno(err);
+ return 0;
}
-static struct notifier_block __refdata x2apic_cpu_notifier = {
- .notifier_call = update_clusterinfo,
-};
-
-static int x2apic_init_cpu_notifier(void)
+static int x2apic_prepare_cpu(unsigned int cpu)
{
- int cpu = smp_processor_id();
+ u32 phys_apicid = apic->cpu_present_to_apicid(cpu);
+ u32 cluster = apic_cluster(phys_apicid);
+ u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf));
+ int node = cpu_to_node(cpu);
- zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL);
+ x86_cpu_to_logical_apicid[cpu] = logical_apicid;
- BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
+ if (alloc_clustermask(cpu, cluster, node) < 0)
+ return -ENOMEM;
- __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu));
- register_hotcpu_notifier(&x2apic_cpu_notifier);
- return 1;
-}
+ if (!zalloc_cpumask_var_node(&per_cpu(ipi_mask, cpu), GFP_KERNEL, node))
+ return -ENOMEM;
-static int x2apic_cluster_probe(void)
-{
- if (x2apic_mode)
- return x2apic_init_cpu_notifier();
- else
- return 0;
+ return 0;
}
-static const struct cpumask *x2apic_cluster_target_cpus(void)
+static int x2apic_dead_cpu(unsigned int dead_cpu)
{
- return cpu_all_mask;
+ struct cpumask *cmsk = per_cpu(cluster_masks, dead_cpu);
+
+ if (cmsk)
+ cpumask_clear_cpu(dead_cpu, cmsk);
+ free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
+ return 0;
}
-/*
- * Each x2apic cluster is an allocation domain.
- */
-static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
- const struct cpumask *mask)
+static int x2apic_cluster_probe(void)
{
- /*
- * To minimize vector pressure, default case of boot, device bringup
- * etc will use a single cpu for the interrupt destination.
- *
- * On explicit migration requests coming from irqbalance etc,
- * interrupts will be routed to the x2apic cluster (cluster-id
- * derived from the first cpu in the mask) members specified
- * in the mask.
- */
- if (mask == x2apic_cluster_target_cpus())
- cpumask_copy(retmask, cpumask_of(cpu));
- else
- cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
+ u32 slots;
+
+ if (!x2apic_mode)
+ return 0;
+
+ slots = max_t(u32, L1_CACHE_BYTES/sizeof(u32), nr_cpu_ids);
+ x86_cpu_to_logical_apicid = kcalloc(slots, sizeof(u32), GFP_KERNEL);
+ if (!x86_cpu_to_logical_apicid)
+ return 0;
+
+ if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
+ x2apic_prepare_cpu, x2apic_dead_cpu) < 0) {
+ pr_err("Failed to register X2APIC_PREPARE\n");
+ kfree(x86_cpu_to_logical_apicid);
+ x86_cpu_to_logical_apicid = NULL;
+ return 0;
+ }
+ init_x2apic_ldr();
+ return 1;
}
-static struct apic apic_x2apic_cluster = {
+static struct apic apic_x2apic_cluster __ro_after_init = {
.name = "cluster x2apic",
.probe = x2apic_cluster_probe,
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
- .apic_id_valid = x2apic_apic_id_valid,
- .apic_id_registered = x2apic_apic_id_registered,
- .irq_delivery_mode = dest_LowestPrio,
- .irq_dest_mode = 1, /* logical */
+ .dest_mode_logical = true,
- .target_cpus = x2apic_cluster_target_cpus,
.disable_esr = 0,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = NULL,
- .check_apicid_present = NULL,
- .vector_allocation_domain = cluster_vector_allocation_domain,
.init_apic_ldr = init_x2apic_ldr,
-
- .ioapic_phys_id_map = NULL,
- .setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = x2apic_phys_pkg_id,
- .mps_oem_check = NULL,
+ .max_apic_id = UINT_MAX,
+ .x2apic_set_max_apicid = true,
.get_apic_id = x2apic_get_apic_id,
- .set_apic_id = x2apic_set_apic_id,
- .apic_id_mask = 0xFFFFFFFFu,
- .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
+ .calc_dest_apicid = x2apic_calc_apicid,
+ .send_IPI = x2apic_send_IPI,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_self = x2apic_send_IPI_self,
-
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
- .wait_for_init_deassert = NULL,
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = NULL,
+ .nmi_to_offline_cpu = true,
.read = native_apic_msr_read,
.write = native_apic_msr_write,
- .eoi_write = native_apic_msr_eoi_write,
+ .eoi = native_apic_msr_eoi,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
- .wait_icr_idle = native_x2apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
};
apic_driver(apic_x2apic_cluster);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 562a76d433c8..12d4c35547a6 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -1,19 +1,23 @@
-#include <linux/threads.h>
+// SPDX-License-Identifier: GPL-2.0
+
#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/dmar.h>
+#include <linux/acpi.h>
-#include <asm/smp.h>
-#include <asm/x2apic.h>
+#include "local.h"
int x2apic_phys;
static struct apic apic_x2apic_phys;
+u32 x2apic_max_apicid __ro_after_init = UINT_MAX;
+
+void __init x2apic_set_max_apicid(u32 apicid)
+{
+ x2apic_max_apicid = apicid;
+ if (apic->x2apic_set_max_apicid)
+ apic->max_apic_id = apicid;
+}
-static int set_x2apic_phys_mode(char *arg)
+static int __init set_x2apic_phys_mode(char *arg)
{
x2apic_phys = 1;
return 0;
@@ -22,11 +26,13 @@ early_param("x2apic_phys", set_x2apic_phys_mode);
static bool x2apic_fadt_phys(void)
{
+#ifdef CONFIG_ACPI
if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
printk(KERN_DEBUG "System requires x2apic physical mode\n");
return true;
}
+#endif
return false;
}
@@ -35,6 +41,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
}
+static void x2apic_send_IPI(int cpu, int vector)
+{
+ u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
+
+ /* x2apic MSRs are special and need a special fence: */
+ weak_wrmsr_fence();
+ __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL);
+}
+
static void
__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
{
@@ -42,7 +57,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
unsigned long this_cpu;
unsigned long flags;
- x2apic_wrmsr_fence();
+ /* x2apic MSRs are special and need a special fence: */
+ weak_wrmsr_fence();
local_irq_save(flags);
@@ -67,84 +83,83 @@ static void
__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
}
-static void x2apic_send_IPI_allbutself(int vector)
+static void __x2apic_send_IPI_shorthand(int vector, u32 which)
{
- __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
+ unsigned long cfg = __prepare_ICR(which, vector, 0);
+
+ /* x2apic MSRs are special and need a special fence: */
+ weak_wrmsr_fence();
+ native_x2apic_icr_write(cfg, 0);
}
-static void x2apic_send_IPI_all(int vector)
+void x2apic_send_IPI_allbutself(int vector)
{
- __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
+ __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT);
}
-static void init_x2apic_ldr(void)
+void x2apic_send_IPI_all(int vector)
{
+ __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC);
+}
+
+void x2apic_send_IPI_self(int vector)
+{
+ apic_write(APIC_SELF_IPI, vector);
+}
+
+void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
+{
+ unsigned long cfg = __prepare_ICR(0, vector, dest);
+ native_x2apic_icr_write(cfg, apicid);
}
static int x2apic_phys_probe(void)
{
- if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
+ if (!x2apic_mode)
+ return 0;
+
+ if (x2apic_phys || x2apic_fadt_phys())
return 1;
return apic == &apic_x2apic_phys;
}
-static struct apic apic_x2apic_phys = {
+u32 x2apic_get_apic_id(u32 id)
+{
+ return id;
+}
+
+static struct apic apic_x2apic_phys __ro_after_init = {
.name = "physical x2apic",
.probe = x2apic_phys_probe,
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
- .apic_id_valid = x2apic_apic_id_valid,
- .apic_id_registered = x2apic_apic_id_registered,
- .irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 0, /* physical */
+ .dest_mode_logical = false,
- .target_cpus = online_target_cpus,
.disable_esr = 0,
- .dest_logical = 0,
- .check_apicid_used = NULL,
- .check_apicid_present = NULL,
- .vector_allocation_domain = default_vector_allocation_domain,
- .init_apic_ldr = init_x2apic_ldr,
-
- .ioapic_phys_id_map = NULL,
- .setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = x2apic_phys_pkg_id,
- .mps_oem_check = NULL,
+ .max_apic_id = UINT_MAX,
+ .x2apic_set_max_apicid = true,
.get_apic_id = x2apic_get_apic_id,
- .set_apic_id = x2apic_set_apic_id,
- .apic_id_mask = 0xFFFFFFFFu,
- .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .calc_dest_apicid = apic_default_calc_apicid,
+ .send_IPI = x2apic_send_IPI,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_self = x2apic_send_IPI_self,
-
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
- .wait_for_init_deassert = NULL,
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = NULL,
+ .nmi_to_offline_cpu = true,
.read = native_apic_msr_read,
.write = native_apic_msr_write,
- .eoi_write = native_apic_msr_eoi_write,
+ .eoi = native_apic_msr_eoi,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
- .wait_icr_idle = native_x2apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
};
apic_driver(apic_x2apic_phys);
diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c
new file mode 100644
index 000000000000..dbc5678bc3b6
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic_savic.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Secure AVIC Support (SEV-SNP Guests)
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ *
+ * Author: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
+ */
+
+#include <linux/cc_platform.h>
+#include <linux/cpumask.h>
+#include <linux/percpu-defs.h>
+#include <linux/align.h>
+
+#include <asm/apic.h>
+#include <asm/sev.h>
+
+#include "local.h"
+
+struct secure_avic_page {
+ u8 regs[PAGE_SIZE];
+} __aligned(PAGE_SIZE);
+
+static struct secure_avic_page __percpu *savic_page __ro_after_init;
+
+static int savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC);
+}
+
+static inline void *get_reg_bitmap(unsigned int cpu, unsigned int offset)
+{
+ return &per_cpu_ptr(savic_page, cpu)->regs[offset];
+}
+
+static inline void update_vector(unsigned int cpu, unsigned int offset,
+ unsigned int vector, bool set)
+{
+ void *bitmap = get_reg_bitmap(cpu, offset);
+
+ if (set)
+ apic_set_vector(vector, bitmap);
+ else
+ apic_clear_vector(vector, bitmap);
+}
+
+#define SAVIC_ALLOWED_IRR 0x204
+
+/*
+ * When Secure AVIC is enabled, RDMSR/WRMSR of the APIC registers
+ * result in #VC exception (for non-accelerated register accesses)
+ * with VMEXIT_AVIC_NOACCEL error code. The #VC exception handler
+ * can read/write the x2APIC register in the guest APIC backing page.
+ *
+ * Since doing this would increase the latency of accessing x2APIC
+ * registers, instead of doing RDMSR/WRMSR based accesses and
+ * handling the APIC register reads/writes in the #VC exception handler,
+ * the read() and write() callbacks directly read/write the APIC register
+ * from/to the vCPU's APIC backing page.
+ */
+static u32 savic_read(u32 reg)
+{
+ void *ap = this_cpu_ptr(savic_page);
+
+ switch (reg) {
+ case APIC_LVTT:
+ case APIC_TMICT:
+ case APIC_TMCCT:
+ case APIC_TDCR:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTERR:
+ return savic_ghcb_msr_read(reg);
+ case APIC_ID:
+ case APIC_LVR:
+ case APIC_TASKPRI:
+ case APIC_ARBPRI:
+ case APIC_PROCPRI:
+ case APIC_LDR:
+ case APIC_SPIV:
+ case APIC_ESR:
+ case APIC_EFEAT:
+ case APIC_ECTRL:
+ case APIC_SEOI:
+ case APIC_IER:
+ case APIC_EILVTn(0) ... APIC_EILVTn(3):
+ return apic_get_reg(ap, reg);
+ case APIC_ICR:
+ return (u32)apic_get_reg64(ap, reg);
+ case APIC_ISR ... APIC_ISR + 0x70:
+ case APIC_TMR ... APIC_TMR + 0x70:
+ if (WARN_ONCE(!IS_ALIGNED(reg, 16),
+ "APIC register read offset 0x%x not aligned at 16 bytes", reg))
+ return 0;
+ return apic_get_reg(ap, reg);
+ /* IRR and ALLOWED_IRR offset range */
+ case APIC_IRR ... APIC_IRR + 0x74:
+ /*
+ * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from
+ * their respective base offset. APIC_IRRs are in the range
+ *
+ * (0x200, 0x210, ..., 0x270)
+ *
+ * while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the range
+ *
+ * (0x204, 0x214, ..., 0x274).
+ *
+ * Filter out everything else.
+ */
+ if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
+ IS_ALIGNED(reg - 4, 16)),
+ "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg))
+ return 0;
+ return apic_get_reg(ap, reg);
+ default:
+ pr_err("Error reading unknown Secure AVIC reg offset 0x%x\n", reg);
+ return 0;
+ }
+}
+
+#define SAVIC_NMI_REQ 0x278
+
+/*
+ * On WRMSR to APIC_SELF_IPI register by the guest, Secure AVIC hardware
+ * updates the APIC_IRR in the APIC backing page of the vCPU. In addition,
+ * hardware evaluates the new APIC_IRR update for interrupt injection to
+ * the vCPU. So, self IPIs are hardware-accelerated.
+ */
+static inline void self_ipi_reg_write(unsigned int vector)
+{
+ native_apic_msr_write(APIC_SELF_IPI, vector);
+}
+
+static void send_ipi_dest(unsigned int cpu, unsigned int vector, bool nmi)
+{
+ if (nmi)
+ apic_set_reg(per_cpu_ptr(savic_page, cpu), SAVIC_NMI_REQ, 1);
+ else
+ update_vector(cpu, APIC_IRR, vector, true);
+}
+
+static void send_ipi_allbut(unsigned int vector, bool nmi)
+{
+ unsigned int cpu, src_cpu;
+
+ guard(irqsave)();
+
+ src_cpu = raw_smp_processor_id();
+
+ for_each_cpu(cpu, cpu_online_mask) {
+ if (cpu == src_cpu)
+ continue;
+ send_ipi_dest(cpu, vector, nmi);
+ }
+}
+
+static inline void self_ipi(unsigned int vector, bool nmi)
+{
+ u32 icr_low = APIC_SELF_IPI | vector;
+
+ if (nmi)
+ icr_low |= APIC_DM_NMI;
+
+ native_x2apic_icr_write(icr_low, 0);
+}
+
+static void savic_icr_write(u32 icr_low, u32 icr_high)
+{
+ unsigned int dsh, vector;
+ u64 icr_data;
+ bool nmi;
+
+ dsh = icr_low & APIC_DEST_ALLBUT;
+ vector = icr_low & APIC_VECTOR_MASK;
+ nmi = ((icr_low & APIC_DM_FIXED_MASK) == APIC_DM_NMI);
+
+ switch (dsh) {
+ case APIC_DEST_SELF:
+ self_ipi(vector, nmi);
+ break;
+ case APIC_DEST_ALLINC:
+ self_ipi(vector, nmi);
+ fallthrough;
+ case APIC_DEST_ALLBUT:
+ send_ipi_allbut(vector, nmi);
+ break;
+ default:
+ send_ipi_dest(icr_high, vector, nmi);
+ break;
+ }
+
+ icr_data = ((u64)icr_high) << 32 | icr_low;
+ if (dsh != APIC_DEST_SELF)
+ savic_ghcb_msr_write(APIC_ICR, icr_data);
+ apic_set_reg64(this_cpu_ptr(savic_page), APIC_ICR, icr_data);
+}
+
+static void savic_write(u32 reg, u32 data)
+{
+ void *ap = this_cpu_ptr(savic_page);
+
+ switch (reg) {
+ case APIC_LVTT:
+ case APIC_TMICT:
+ case APIC_TDCR:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVTERR:
+ savic_ghcb_msr_write(reg, data);
+ break;
+ case APIC_TASKPRI:
+ case APIC_EOI:
+ case APIC_SPIV:
+ case SAVIC_NMI_REQ:
+ case APIC_ESR:
+ case APIC_ECTRL:
+ case APIC_SEOI:
+ case APIC_IER:
+ case APIC_EILVTn(0) ... APIC_EILVTn(3):
+ apic_set_reg(ap, reg, data);
+ break;
+ case APIC_ICR:
+ savic_icr_write(data, 0);
+ break;
+ case APIC_SELF_IPI:
+ self_ipi_reg_write(data);
+ break;
+ /* ALLOWED_IRR offsets are writable */
+ case SAVIC_ALLOWED_IRR ... SAVIC_ALLOWED_IRR + 0x70:
+ if (IS_ALIGNED(reg - 4, 16)) {
+ apic_set_reg(ap, reg, data);
+ break;
+ }
+ fallthrough;
+ default:
+ pr_err("Error writing unknown Secure AVIC reg offset 0x%x\n", reg);
+ }
+}
+
+static void send_ipi(u32 dest, unsigned int vector, unsigned int dsh)
+{
+ unsigned int icr_low;
+
+ icr_low = __prepare_ICR(dsh, vector, APIC_DEST_PHYSICAL);
+ savic_icr_write(icr_low, dest);
+}
+
+static void savic_send_ipi(int cpu, int vector)
+{
+ u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
+
+ send_ipi(dest, vector, 0);
+}
+
+static void send_ipi_mask(const struct cpumask *mask, unsigned int vector, bool excl_self)
+{
+ unsigned int cpu, this_cpu;
+
+ guard(irqsave)();
+
+ this_cpu = raw_smp_processor_id();
+
+ for_each_cpu(cpu, mask) {
+ if (excl_self && cpu == this_cpu)
+ continue;
+ send_ipi(per_cpu(x86_cpu_to_apicid, cpu), vector, 0);
+ }
+}
+
+static void savic_send_ipi_mask(const struct cpumask *mask, int vector)
+{
+ send_ipi_mask(mask, vector, false);
+}
+
+static void savic_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ send_ipi_mask(mask, vector, true);
+}
+
+static void savic_send_ipi_allbutself(int vector)
+{
+ send_ipi(0, vector, APIC_DEST_ALLBUT);
+}
+
+static void savic_send_ipi_all(int vector)
+{
+ send_ipi(0, vector, APIC_DEST_ALLINC);
+}
+
+static void savic_send_ipi_self(int vector)
+{
+ self_ipi_reg_write(vector);
+}
+
+static void savic_update_vector(unsigned int cpu, unsigned int vector, bool set)
+{
+ update_vector(cpu, SAVIC_ALLOWED_IRR, vector, set);
+}
+
+static void savic_eoi(void)
+{
+ unsigned int cpu;
+ int vec;
+
+ cpu = raw_smp_processor_id();
+ vec = apic_find_highest_vector(get_reg_bitmap(cpu, APIC_ISR));
+ if (WARN_ONCE(vec == -1, "EOI write while no active interrupt in APIC_ISR"))
+ return;
+
+ /* Is level-triggered interrupt? */
+ if (apic_test_vector(vec, get_reg_bitmap(cpu, APIC_TMR))) {
+ update_vector(cpu, APIC_ISR, vec, false);
+ /*
+ * Propagate the EOI write to the hypervisor for level-triggered
+ * interrupts. Return to the guest from GHCB protocol event takes
+ * care of re-evaluating interrupt state.
+ */
+ savic_ghcb_msr_write(APIC_EOI, 0);
+ } else {
+ /*
+ * Hardware clears APIC_ISR and re-evaluates the interrupt state
+ * to determine if there is any pending interrupt which can be
+ * delivered to CPU.
+ */
+ native_apic_msr_eoi();
+ }
+}
+
+static void savic_teardown(void)
+{
+ /* Disable Secure AVIC */
+ native_wrmsrq(MSR_AMD64_SAVIC_CONTROL, 0);
+ savic_unregister_gpa(NULL);
+}
+
+static void savic_setup(void)
+{
+ void *ap = this_cpu_ptr(savic_page);
+ enum es_result res;
+ unsigned long gpa;
+
+ /*
+ * Before Secure AVIC is enabled, APIC MSR reads are intercepted.
+ * APIC_ID MSR read returns the value from the hypervisor.
+ */
+ apic_set_reg(ap, APIC_ID, native_apic_msr_read(APIC_ID));
+
+ gpa = __pa(ap);
+
+ /*
+ * The NPT entry for a vCPU's APIC backing page must always be
+ * present when the vCPU is running in order for Secure AVIC to
+ * function. A VMEXIT_BUSY is returned on VMRUN and the vCPU cannot
+ * be resumed if the NPT entry for the APIC backing page is not
+ * present. Notify GPA of the vCPU's APIC backing page to the
+ * hypervisor by calling savic_register_gpa(). Before executing
+ * VMRUN, the hypervisor makes use of this information to make sure
+ * the APIC backing page is mapped in NPT.
+ */
+ res = savic_register_gpa(gpa);
+ if (res != ES_OK)
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+
+ native_wrmsrq(MSR_AMD64_SAVIC_CONTROL,
+ gpa | MSR_AMD64_SAVIC_EN | MSR_AMD64_SAVIC_ALLOWEDNMI);
+}
+
+static int savic_probe(void)
+{
+ if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
+ return 0;
+
+ if (!x2apic_mode) {
+ pr_err("Secure AVIC enabled in non x2APIC mode\n");
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+ /* unreachable */
+ }
+
+ savic_page = alloc_percpu(struct secure_avic_page);
+ if (!savic_page)
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+
+ return 1;
+}
+
+static struct apic apic_x2apic_savic __ro_after_init = {
+
+ .name = "secure avic x2apic",
+ .probe = savic_probe,
+ .acpi_madt_oem_check = savic_acpi_madt_oem_check,
+ .setup = savic_setup,
+ .teardown = savic_teardown,
+
+ .dest_mode_logical = false,
+
+ .disable_esr = 0,
+
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+
+ .max_apic_id = UINT_MAX,
+ .x2apic_set_max_apicid = true,
+ .get_apic_id = x2apic_get_apic_id,
+
+ .calc_dest_apicid = apic_default_calc_apicid,
+
+ .send_IPI = savic_send_ipi,
+ .send_IPI_mask = savic_send_ipi_mask,
+ .send_IPI_mask_allbutself = savic_send_ipi_mask_allbutself,
+ .send_IPI_allbutself = savic_send_ipi_allbutself,
+ .send_IPI_all = savic_send_ipi_all,
+ .send_IPI_self = savic_send_ipi_self,
+
+ .nmi_to_offline_cpu = true,
+
+ .read = savic_read,
+ .write = savic_write,
+ .eoi = savic_eoi,
+ .icr_read = native_x2apic_icr_read,
+ .icr_write = savic_icr_write,
+
+ .update_vector = savic_update_vector,
+};
+
+apic_driver(apic_x2apic_savic);
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 63092afb142e..15209f220e1f 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,62 +5,72 @@
*
* SGI UV APIC functions (note: not an Intel compatible APIC)
*
- * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved.
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
*/
+#include <linux/crash_dump.h>
+#include <linux/cpuhotplug.h>
#include <linux/cpumask.h>
-#include <linux/hardirq.h>
#include <linux/proc_fs.h>
-#include <linux/threads.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/sched.h>
-#include <linux/timer.h>
-#include <linux/slab.h>
-#include <linux/cpu.h>
-#include <linux/init.h>
-#include <linux/io.h>
+#include <linux/memory.h>
+#include <linux/export.h>
#include <linux/pci.h>
-#include <linux/kdebug.h>
-#include <linux/delay.h>
-#include <linux/crash_dump.h>
-#include <linux/reboot.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <asm/e820/api.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
-#include <asm/current.h>
-#include <asm/pgtable.h>
#include <asm/uv/bios.h>
#include <asm/uv/uv.h>
#include <asm/apic.h>
-#include <asm/ipi.h>
-#include <asm/smp.h>
-#include <asm/x86_init.h>
-#include <asm/nmi.h>
-
-/* BMC sets a bit this MMR non-zero before sending an NMI */
-#define UVH_NMI_MMR UVH_SCRATCH5
-#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8)
-#define UV_NMI_PENDING_MASK (1UL << 63)
-DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
-
-DEFINE_PER_CPU(int, x2apic_extra_bits);
-
-#define PR_DEVEL(fmt, args...) pr_devel("%s: " fmt, __func__, args)
-
-static enum uv_system_type uv_system_type;
-static u64 gru_start_paddr, gru_end_paddr;
-static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
-static u64 gru_dist_lmask, gru_dist_umask;
-static union uvh_apicid uvh_apicid;
-int uv_min_hub_revision_id;
-EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
-unsigned int uv_apicid_hibits;
-EXPORT_SYMBOL_GPL(uv_apicid_hibits);
-static DEFINE_SPINLOCK(uv_nmi_lock);
+
+#include "local.h"
+
+static enum uv_system_type uv_system_type;
+static int uv_hubbed_system;
+static int uv_hubless_system;
+static u64 gru_start_paddr, gru_end_paddr;
+static union uvh_apicid uvh_apicid;
+static int uv_node_id;
+
+/* Unpack AT/OEM/TABLE ID's to be NULL terminated strings */
+static u8 uv_archtype[UV_AT_SIZE + 1];
+static u8 oem_id[ACPI_OEM_ID_SIZE + 1];
+static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
+
+/* Information derived from CPUID and some UV MMRs */
+static struct {
+ unsigned int apicid_shift;
+ unsigned int apicid_mask;
+ unsigned int socketid_shift; /* aka pnode_shift for UV2/3 */
+ unsigned int pnode_mask;
+ unsigned int nasid_shift;
+ unsigned int gpa_shift;
+ unsigned int gnode_shift;
+ unsigned int m_skt;
+ unsigned int n_skt;
+} uv_cpuid;
+
+static int uv_min_hub_revision_id;
static struct apic apic_x2apic_uv_x;
+static struct uv_hub_info_s uv_hub_info_node0;
+
+/* Set this to use hardware error handler instead of kernel panic: */
+static int disable_uv_undefined_panic = 1;
+
+unsigned long uv_undefined(char *str)
+{
+ if (likely(!disable_uv_undefined_panic))
+ panic("UV: error: undefined MMR: %s\n", str);
+ else
+ pr_crit("UV: error: undefined MMR: %s\n", str);
+
+ /* Cause a machine fault: */
+ return ~0ul;
+}
+EXPORT_SYMBOL(uv_undefined);
static unsigned long __init uv_early_read_mmr(unsigned long addr)
{
@@ -69,25 +79,16 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr)
mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
val = *mmr;
early_iounmap(mmr, sizeof(*mmr));
+
return val;
}
static inline bool is_GRU_range(u64 start, u64 end)
{
- if (gru_dist_base) {
- u64 su = start & gru_dist_umask; /* upper (incl pnode) bits */
- u64 sl = start & gru_dist_lmask; /* base offset bits */
- u64 eu = end & gru_dist_umask;
- u64 el = end & gru_dist_lmask;
-
- /* Must reside completely within a single GRU range */
- return (sl == gru_dist_base && el == gru_dist_base &&
- su >= gru_first_node_paddr &&
- su <= gru_last_node_paddr &&
- eu == su);
- } else {
- return start >= gru_start_paddr && end <= gru_end_paddr;
- }
+ if (!gru_start_paddr)
+ return false;
+
+ return start >= gru_start_paddr && end <= gru_end_paddr;
}
static bool uv_is_untracked_pat_range(u64 start, u64 end)
@@ -95,113 +96,415 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
return is_ISA_range(start, end) || is_GRU_range(start, end);
}
-static int __init early_get_pnodeid(void)
+static void __init early_get_pnodeid(void)
{
- union uvh_node_id_u node_id;
- union uvh_rh_gam_config_mmr_u m_n_config;
int pnode;
- /* Currently, all blades have same revision number */
+ uv_cpuid.m_skt = 0;
+ if (UVH_RH10_GAM_ADDR_MAP_CONFIG) {
+ union uvh_rh10_gam_addr_map_config_u m_n_config;
+
+ m_n_config.v = uv_early_read_mmr(UVH_RH10_GAM_ADDR_MAP_CONFIG);
+ uv_cpuid.n_skt = m_n_config.s.n_skt;
+ uv_cpuid.nasid_shift = 0;
+ } else if (UVH_RH_GAM_ADDR_MAP_CONFIG) {
+ union uvh_rh_gam_addr_map_config_u m_n_config;
+
+ m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_ADDR_MAP_CONFIG);
+ uv_cpuid.n_skt = m_n_config.s.n_skt;
+ if (is_uv(UV3))
+ uv_cpuid.m_skt = m_n_config.s3.m_skt;
+ if (is_uv(UV2))
+ uv_cpuid.m_skt = m_n_config.s2.m_skt;
+ uv_cpuid.nasid_shift = 1;
+ } else {
+ unsigned long GAM_ADDR_MAP_CONFIG = 0;
+
+ WARN(GAM_ADDR_MAP_CONFIG == 0,
+ "UV: WARN: GAM_ADDR_MAP_CONFIG is not available\n");
+ uv_cpuid.n_skt = 0;
+ uv_cpuid.nasid_shift = 0;
+ }
+
+ if (is_uv(UV4|UVY))
+ uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */
+
+ uv_cpuid.pnode_mask = (1 << uv_cpuid.n_skt) - 1;
+ pnode = (uv_node_id >> uv_cpuid.nasid_shift) & uv_cpuid.pnode_mask;
+ uv_cpuid.gpa_shift = 46; /* Default unless changed */
+
+ pr_info("UV: n_skt:%d pnmsk:%x pn:%x\n",
+ uv_cpuid.n_skt, uv_cpuid.pnode_mask, pnode);
+}
+
+/* Running on a UV Hubbed system, determine which UV Hub Type it is */
+static int __init early_set_hub_type(void)
+{
+ union uvh_node_id_u node_id;
+
+ /*
+ * The NODE_ID MMR is always at offset 0.
+ * Contains the chip part # + revision.
+ * Node_id field started with 15 bits,
+ * ... now 7 but upper 8 are masked to 0.
+ * All blades/nodes have the same part # and hub revision.
+ */
node_id.v = uv_early_read_mmr(UVH_NODE_ID);
- m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
- uv_min_hub_revision_id = node_id.s.revision;
+ uv_node_id = node_id.sx.node_id;
switch (node_id.s.part_number) {
- case UV2_HUB_PART_NUMBER:
- case UV2_HUB_PART_NUMBER_X:
- uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
+
+ case UV5_HUB_PART_NUMBER:
+ uv_min_hub_revision_id = node_id.s.revision
+ + UV5_HUB_REVISION_BASE;
+ uv_hub_type_set(UV5);
+ break;
+
+ /* UV4/4A only have a revision difference */
+ case UV4_HUB_PART_NUMBER:
+ uv_min_hub_revision_id = node_id.s.revision
+ + UV4_HUB_REVISION_BASE - 1;
+ uv_hub_type_set(UV4);
+ if (uv_min_hub_revision_id == UV4A_HUB_REVISION_BASE)
+ uv_hub_type_set(UV4|UV4A);
break;
+
case UV3_HUB_PART_NUMBER:
case UV3_HUB_PART_NUMBER_X:
- uv_min_hub_revision_id += UV3_HUB_REVISION_BASE - 1;
+ uv_min_hub_revision_id = node_id.s.revision
+ + UV3_HUB_REVISION_BASE;
+ uv_hub_type_set(UV3);
+ break;
+
+ case UV2_HUB_PART_NUMBER:
+ case UV2_HUB_PART_NUMBER_X:
+ uv_min_hub_revision_id = node_id.s.revision
+ + UV2_HUB_REVISION_BASE - 1;
+ uv_hub_type_set(UV2);
break;
+
+ default:
+ return 0;
}
- uv_hub_info->hub_revision = uv_min_hub_revision_id;
- pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
- return pnode;
+ pr_info("UV: part#:%x rev:%d rev_id:%d UVtype:0x%x\n",
+ node_id.s.part_number, node_id.s.revision,
+ uv_min_hub_revision_id, is_uv(~0));
+
+ return 1;
}
-static void __init early_get_apic_pnode_shift(void)
+static void __init uv_tsc_check_sync(void)
{
- uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
- if (!uvh_apicid.v)
- /*
- * Old bios, use default value
- */
- uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
+ u64 mmr;
+ int sync_state;
+ int mmr_shift;
+ char *state;
+
+ /* UV5 guarantees synced TSCs; do not zero TSC_ADJUST */
+ if (!is_uv(UV2|UV3|UV4)) {
+ mark_tsc_async_resets("UV5+");
+ return;
+ }
+
+ /* UV2,3,4, UV BIOS TSC sync state available */
+ mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR);
+ mmr_shift =
+ is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT;
+ sync_state = (mmr >> mmr_shift) & UVH_TSC_SYNC_MASK;
+
+ /* Check if TSC is valid for all sockets */
+ switch (sync_state) {
+ case UVH_TSC_SYNC_VALID:
+ state = "in sync";
+ mark_tsc_async_resets("UV BIOS");
+ break;
+
+ /* If BIOS state unknown, don't do anything */
+ case UVH_TSC_SYNC_UNKNOWN:
+ state = "unknown";
+ break;
+
+ /* Otherwise, BIOS indicates problem with TSC */
+ default:
+ state = "unstable";
+ mark_tsc_unstable("UV BIOS");
+ break;
+ }
+ pr_info("UV: TSC sync state from BIOS:0%d(%s)\n", sync_state, state);
}
-/*
- * Add an extra bit as dictated by bios to the destination apicid of
- * interrupts potentially passing through the UV HUB. This prevents
- * a deadlock between interrupts and IO port operations.
- */
-static void __init uv_set_apicid_hibit(void)
+/* Selector for (4|4A|5) structs */
+#define uvxy_field(sname, field, undef) ( \
+ is_uv(UV4A) ? sname.s4a.field : \
+ is_uv(UV4) ? sname.s4.field : \
+ is_uv(UV3) ? sname.s3.field : \
+ undef)
+
+static void __init early_get_apic_socketid_shift(void)
{
- union uv1h_lb_target_physical_apic_id_mask_u apicid_mask;
+ unsigned int sid_shift = topology_get_domain_shift(TOPO_PKG_DOMAIN);
+
+ if (is_uv2_hub() || is_uv3_hub())
+ uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
- if (is_uv1_hub()) {
- apicid_mask.v =
- uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK);
- uv_apicid_hibits =
- apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK;
+ if (sid_shift) {
+ uv_cpuid.apicid_shift = 0;
+ uv_cpuid.apicid_mask = (~(-1 << sid_shift));
+ uv_cpuid.socketid_shift = sid_shift;
+ } else {
+ pr_info("UV: CPU does not have valid CPUID.11\n");
}
+
+ pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n", uv_cpuid.apicid_shift, uv_cpuid.apicid_mask);
+ pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n", uv_cpuid.socketid_shift, uv_cpuid.pnode_mask);
}
-static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static void __init uv_stringify(int len, char *to, char *from)
{
- int pnodeid, is_uv1, is_uv2, is_uv3;
-
- is_uv1 = !strcmp(oem_id, "SGI");
- is_uv2 = !strcmp(oem_id, "SGI2");
- is_uv3 = !strncmp(oem_id, "SGI3", 4); /* there are varieties of UV3 */
- if (is_uv1 || is_uv2 || is_uv3) {
- uv_hub_info->hub_revision =
- (is_uv1 ? UV1_HUB_REVISION_BASE :
- (is_uv2 ? UV2_HUB_REVISION_BASE :
- UV3_HUB_REVISION_BASE));
- pnodeid = early_get_pnodeid();
- early_get_apic_pnode_shift();
- x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
- x86_platform.nmi_init = uv_nmi_init;
- if (!strcmp(oem_table_id, "UVL"))
- uv_system_type = UV_LEGACY_APIC;
- else if (!strcmp(oem_table_id, "UVX"))
- uv_system_type = UV_X2APIC;
- else if (!strcmp(oem_table_id, "UVH")) {
- __this_cpu_write(x2apic_extra_bits,
- pnodeid << uvh_apicid.s.pnode_shift);
- uv_system_type = UV_NON_UNIQUE_APIC;
- uv_set_apicid_hibit();
- return 1;
- }
+ strscpy(to, from, len);
+
+ /* Trim trailing spaces */
+ (void)strim(to);
+}
+
+/* Find UV arch type entry in UVsystab */
+static unsigned long __init early_find_archtype(struct uv_systab *st)
+{
+ int i;
+
+ for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) {
+ unsigned long ptr = st->entry[i].offset;
+
+ if (!ptr)
+ continue;
+ ptr += (unsigned long)st;
+ if (st->entry[i].type == UV_SYSTAB_TYPE_ARCH_TYPE)
+ return ptr;
+ }
+ return 0;
+}
+
+/* Validate UV arch type field in UVsystab */
+static int __init decode_arch_type(unsigned long ptr)
+{
+ struct uv_arch_type_entry *uv_ate = (struct uv_arch_type_entry *)ptr;
+ int n = strlen(uv_ate->archtype);
+
+ if (n > 0 && n < sizeof(uv_ate->archtype)) {
+ pr_info("UV: UVarchtype received from BIOS\n");
+ uv_stringify(sizeof(uv_archtype), uv_archtype, uv_ate->archtype);
+ return 1;
}
return 0;
}
+/* Determine if UV arch type entry might exist in UVsystab */
+static int __init early_get_arch_type(void)
+{
+ unsigned long uvst_physaddr, uvst_size, ptr;
+ struct uv_systab *st;
+ u32 rev;
+ int ret;
+
+ uvst_physaddr = get_uv_systab_phys(0);
+ if (!uvst_physaddr)
+ return 0;
+
+ st = early_memremap_ro(uvst_physaddr, sizeof(struct uv_systab));
+ if (!st) {
+ pr_err("UV: Cannot access UVsystab, remap failed\n");
+ return 0;
+ }
+
+ rev = st->revision;
+ if (rev < UV_SYSTAB_VERSION_UV5) {
+ early_memunmap(st, sizeof(struct uv_systab));
+ return 0;
+ }
+
+ uvst_size = st->size;
+ early_memunmap(st, sizeof(struct uv_systab));
+ st = early_memremap_ro(uvst_physaddr, uvst_size);
+ if (!st) {
+ pr_err("UV: Cannot access UVarchtype, remap failed\n");
+ return 0;
+ }
+
+ ptr = early_find_archtype(st);
+ if (!ptr) {
+ early_memunmap(st, uvst_size);
+ return 0;
+ }
+
+ ret = decode_arch_type(ptr);
+ early_memunmap(st, uvst_size);
+ return ret;
+}
+
+/* UV system found, check which APIC MODE BIOS already selected */
+static void __init early_set_apic_mode(void)
+{
+ if (x2apic_enabled())
+ uv_system_type = UV_X2APIC;
+ else
+ uv_system_type = UV_LEGACY_APIC;
+}
+
+static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id)
+{
+ /* Save OEM_ID passed from ACPI MADT */
+ uv_stringify(sizeof(oem_id), oem_id, _oem_id);
+
+ /* Check if BIOS sent us a UVarchtype */
+ if (!early_get_arch_type())
+
+ /* If not use OEM ID for UVarchtype */
+ uv_stringify(sizeof(uv_archtype), uv_archtype, oem_id);
+
+ /* Check if not hubbed */
+ if (strncmp(uv_archtype, "SGI", 3) != 0) {
+
+ /* (Not hubbed), check if not hubless */
+ if (strncmp(uv_archtype, "NSGI", 4) != 0)
+
+ /* (Not hubless), not a UV */
+ return 0;
+
+ /* Is UV hubless system */
+ uv_hubless_system = 0x01;
+
+ /* UV5 Hubless */
+ if (strncmp(uv_archtype, "NSGI5", 5) == 0)
+ uv_hubless_system |= 0x20;
+
+ /* UV4 Hubless: CH */
+ else if (strncmp(uv_archtype, "NSGI4", 5) == 0)
+ uv_hubless_system |= 0x10;
+
+ /* UV3 Hubless: UV300/MC990X w/o hub */
+ else
+ uv_hubless_system |= 0x8;
+
+ /* Copy OEM Table ID */
+ uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id);
+
+ pr_info("UV: OEM IDs %s/%s, SystemType %d, HUBLESS ID %x\n",
+ oem_id, oem_table_id, uv_system_type, uv_hubless_system);
+
+ return 0;
+ }
+
+ if (numa_off) {
+ pr_err("UV: NUMA is off, disabling UV support\n");
+ return 0;
+ }
+
+ /* Set hubbed type if true */
+ uv_hub_info->hub_revision =
+ !strncmp(uv_archtype, "SGI5", 4) ? UV5_HUB_REVISION_BASE :
+ !strncmp(uv_archtype, "SGI4", 4) ? UV4_HUB_REVISION_BASE :
+ !strncmp(uv_archtype, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
+ !strcmp(uv_archtype, "SGI2") ? UV2_HUB_REVISION_BASE : 0;
+
+ switch (uv_hub_info->hub_revision) {
+ case UV5_HUB_REVISION_BASE:
+ uv_hubbed_system = 0x21;
+ uv_hub_type_set(UV5);
+ break;
+
+ case UV4_HUB_REVISION_BASE:
+ uv_hubbed_system = 0x11;
+ uv_hub_type_set(UV4);
+ break;
+
+ case UV3_HUB_REVISION_BASE:
+ uv_hubbed_system = 0x9;
+ uv_hub_type_set(UV3);
+ break;
+
+ case UV2_HUB_REVISION_BASE:
+ uv_hubbed_system = 0x5;
+ uv_hub_type_set(UV2);
+ break;
+
+ default:
+ return 0;
+ }
+
+ /* Get UV hub chip part number & revision */
+ early_set_hub_type();
+
+ /* Other UV setup functions */
+ early_set_apic_mode();
+ early_get_pnodeid();
+ early_get_apic_socketid_shift();
+ x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
+ x86_platform.nmi_init = uv_nmi_init;
+ uv_tsc_check_sync();
+
+ return 1;
+}
+
+/* Called early to probe for the correct APIC driver */
+static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)
+{
+ /* Set up early hub info fields for Node 0 */
+ uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
+
+ /* If not UV, return. */
+ if (uv_set_system_type(_oem_id, _oem_table_id) == 0)
+ return 0;
+
+ /* Save for display of the OEM Table ID */
+ uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id);
+
+ pr_info("UV: OEM IDs %s/%s, System/UVType %d/0x%x, HUB RevID %d\n",
+ oem_id, oem_table_id, uv_system_type, is_uv(UV_ANY),
+ uv_min_hub_revision_id);
+
+ return 0;
+}
+
enum uv_system_type get_uv_system_type(void)
{
return uv_system_type;
}
+int uv_get_hubless_system(void)
+{
+ return uv_hubless_system;
+}
+EXPORT_SYMBOL_GPL(uv_get_hubless_system);
+
+ssize_t uv_get_archtype(char *buf, int len)
+{
+ return scnprintf(buf, len, "%s/%s", uv_archtype, oem_table_id);
+}
+EXPORT_SYMBOL_GPL(uv_get_archtype);
+
int is_uv_system(void)
{
return uv_system_type != UV_NONE;
}
EXPORT_SYMBOL_GPL(is_uv_system);
-DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
-EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
+int is_uv_hubbed(int uvtype)
+{
+ return (uv_hubbed_system & uvtype);
+}
+EXPORT_SYMBOL_GPL(is_uv_hubbed);
-struct uv_blade_info *uv_blade_info;
-EXPORT_SYMBOL_GPL(uv_blade_info);
+static int is_uv_hubless(int uvtype)
+{
+ return (uv_hubless_system & uvtype);
+}
-short *uv_node_to_blade;
-EXPORT_SYMBOL_GPL(uv_node_to_blade);
+void **__uv_hub_info_list;
+EXPORT_SYMBOL_GPL(__uv_hub_info_list);
-short *uv_cpu_to_blade;
-EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
+DEFINE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info);
+EXPORT_PER_CPU_SYMBOL_GPL(__uv_cpu_info);
short uv_possible_blades;
EXPORT_SYMBOL_GPL(uv_possible_blades);
@@ -209,39 +512,202 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);
unsigned long sn_rtc_cycles_per_second;
EXPORT_SYMBOL(sn_rtc_cycles_per_second);
-static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+/* The following values are used for the per node hub info struct */
+static __initdata unsigned short _min_socket, _max_socket;
+static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len;
+static __initdata struct uv_gam_range_entry *uv_gre_table;
+static __initdata struct uv_gam_parameters *uv_gp_table;
+static __initdata unsigned short *_socket_to_node;
+static __initdata unsigned short *_socket_to_pnode;
+static __initdata unsigned short *_pnode_to_socket;
+static __initdata unsigned short *_node_to_socket;
+
+static __initdata struct uv_gam_range_s *_gr_table;
+
+#define SOCK_EMPTY ((unsigned short)~0)
+
+/* Default UV memory block size is 2GB */
+static unsigned long mem_block_size __initdata = (2UL << 30);
+
+/* Kernel parameter to specify UV mem block size */
+static int __init parse_mem_block_size(char *ptr)
+{
+ unsigned long size = memparse(ptr, NULL);
+
+ /* Size will be rounded down by set_block_size() below */
+ mem_block_size = size;
+ return 0;
+}
+early_param("uv_memblksize", parse_mem_block_size);
+
+static __init int adj_blksize(u32 lgre)
+{
+ unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT;
+ unsigned long size;
+
+ for (size = mem_block_size; size > MIN_MEMORY_BLOCK_SIZE; size >>= 1)
+ if (IS_ALIGNED(base, size))
+ break;
+
+ if (size >= mem_block_size)
+ return 0;
+
+ mem_block_size = size;
+ return 1;
+}
+
+static __init void set_block_size(void)
+{
+ unsigned int order = ffs(mem_block_size);
+
+ if (order) {
+ /* adjust for ffs return of 1..64 */
+ set_memory_block_size_order(order - 1);
+ pr_info("UV: mem_block_size set to 0x%lx\n", mem_block_size);
+ } else {
+ /* bad or zero value, default to 1UL << 31 (2GB) */
+ pr_err("UV: mem_block_size error with 0x%lx\n", mem_block_size);
+ set_memory_block_size_order(31);
+ }
+}
+
+/* Build GAM range lookup table: */
+static __init void build_uv_gr_table(void)
+{
+ struct uv_gam_range_entry *gre = uv_gre_table;
+ struct uv_gam_range_s *grt;
+ unsigned long last_limit = 0, ram_limit = 0;
+ int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;
+
+ if (!gre)
+ return;
+
+ bytes = _gr_table_len * sizeof(struct uv_gam_range_s);
+ grt = kzalloc(bytes, GFP_KERNEL);
+ if (WARN_ON_ONCE(!grt))
+ return;
+ _gr_table = grt;
+
+ for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+ if (gre->type == UV_GAM_RANGE_TYPE_HOLE) {
+ if (!ram_limit) {
+ /* Mark hole between RAM/non-RAM: */
+ ram_limit = last_limit;
+ last_limit = gre->limit;
+ lsid++;
+ continue;
+ }
+ last_limit = gre->limit;
+ pr_info("UV: extra hole in GAM RE table @%d\n", (int)(gre - uv_gre_table));
+ continue;
+ }
+ if (_max_socket < gre->sockid) {
+ pr_err("UV: GAM table sockid(%d) too large(>%d) @%d\n", gre->sockid, _max_socket, (int)(gre - uv_gre_table));
+ continue;
+ }
+ sid = gre->sockid - _min_socket;
+ if (lsid < sid) {
+ /* New range: */
+ grt = &_gr_table[indx];
+ grt->base = lindx;
+ grt->nasid = gre->nasid;
+ grt->limit = last_limit = gre->limit;
+ lsid = sid;
+ lindx = indx++;
+ continue;
+ }
+ /* Update range: */
+ if (lsid == sid && !ram_limit) {
+ /* .. if contiguous: */
+ if (grt->limit == last_limit) {
+ grt->limit = last_limit = gre->limit;
+ continue;
+ }
+ }
+ /* Non-contiguous RAM range: */
+ if (!ram_limit) {
+ grt++;
+ grt->base = lindx;
+ grt->nasid = gre->nasid;
+ grt->limit = last_limit = gre->limit;
+ continue;
+ }
+ /* Non-contiguous/non-RAM: */
+ grt++;
+ /* base is this entry */
+ grt->base = grt - _gr_table;
+ grt->nasid = gre->nasid;
+ grt->limit = last_limit = gre->limit;
+ lsid++;
+ }
+
+ /* Shorten table if possible */
+ grt++;
+ i = grt - _gr_table;
+ if (i < _gr_table_len) {
+ void *ret;
+
+ bytes = i * sizeof(struct uv_gam_range_s);
+ ret = krealloc(_gr_table, bytes, GFP_KERNEL);
+ if (ret) {
+ _gr_table = ret;
+ _gr_table_len = i;
+ }
+ }
+
+ /* Display resultant GAM range table: */
+ for (i = 0, grt = _gr_table; i < _gr_table_len; i++, grt++) {
+ unsigned long start, end;
+ int gb = grt->base;
+
+ start = gb < 0 ? 0 : (unsigned long)_gr_table[gb].limit << UV_GAM_RANGE_SHFT;
+ end = (unsigned long)grt->limit << UV_GAM_RANGE_SHFT;
+
+ pr_info("UV: GAM Range %2d %04x 0x%013lx-0x%013lx (%d)\n", i, grt->nasid, start, end, gb);
+ }
+}
+
+static int uv_wakeup_secondary(u32 phys_apicid, unsigned long start_rip, unsigned int cpu)
{
-#ifdef CONFIG_SMP
unsigned long val;
int pnode;
pnode = uv_apicid_to_pnode(phys_apicid);
- phys_apicid |= uv_apicid_hibits;
+
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
APIC_DM_INIT;
+
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
APIC_DM_STARTUP;
+
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
- atomic_set(&init_deasserted, 1);
-#endif
return 0;
}
static void uv_send_IPI_one(int cpu, int vector)
{
- unsigned long apicid;
- int pnode;
+ unsigned long apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ int pnode = uv_apicid_to_pnode(apicid);
+ unsigned long dmode, val;
+
+ if (vector == NMI_VECTOR)
+ dmode = APIC_DELIVERY_MODE_NMI;
+ else
+ dmode = APIC_DELIVERY_MODE_FIXED;
+
+ val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+ (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+ (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
+ (vector << UVH_IPI_INT_VECTOR_SHFT);
- apicid = per_cpu(x86_cpu_to_apicid, cpu);
- pnode = uv_apicid_to_pnode(apicid);
- uv_hub_send_ipi(pnode, apicid, vector);
+ uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
}
static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
@@ -279,185 +745,74 @@ static void uv_send_IPI_all(int vector)
uv_send_IPI_mask(cpu_online_mask, vector);
}
-static int uv_apic_id_valid(int apicid)
-{
- return 1;
-}
-
-static int uv_apic_id_registered(void)
-{
- return 1;
-}
-
-static void uv_init_apic_ldr(void)
-{
-}
-
-static int
-uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask,
- unsigned int *apicid)
-{
- int unsigned cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- for_each_cpu_and(cpu, cpumask, andmask) {
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
- }
-
- if (likely(cpu < nr_cpu_ids)) {
- *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
- return 0;
- }
-
- return -EINVAL;
-}
-
-static unsigned int x2apic_get_apic_id(unsigned long x)
-{
- unsigned int id;
-
- WARN_ON(preemptible() && num_online_cpus() > 1);
- id = x | __this_cpu_read(x2apic_extra_bits);
-
- return id;
-}
-
-static unsigned long set_apic_id(unsigned int id)
-{
- unsigned long x;
-
- /* maskout x2apic_extra_bits ? */
- x = id;
- return x;
-}
-
-static unsigned int uv_read_apic_id(void)
-{
-
- return x2apic_get_apic_id(apic_read(APIC_ID));
-}
-
-static int uv_phys_pkg_id(int initial_apicid, int index_msb)
-{
- return uv_read_apic_id() >> index_msb;
-}
-
-static void uv_send_IPI_self(int vector)
-{
- apic_write(APIC_SELF_IPI, vector);
-}
-
static int uv_probe(void)
{
return apic == &apic_x2apic_uv_x;
}
-static struct apic __refdata apic_x2apic_uv_x = {
+static struct apic apic_x2apic_uv_x __ro_after_init = {
.name = "UV large system",
.probe = uv_probe,
.acpi_madt_oem_check = uv_acpi_madt_oem_check,
- .apic_id_valid = uv_apic_id_valid,
- .apic_id_registered = uv_apic_id_registered,
- .irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 0, /* physical */
+ .dest_mode_logical = false,
- .target_cpus = online_target_cpus,
.disable_esr = 0,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = NULL,
- .check_apicid_present = NULL,
- .vector_allocation_domain = default_vector_allocation_domain,
- .init_apic_ldr = uv_init_apic_ldr,
-
- .ioapic_phys_id_map = NULL,
- .setup_apic_routing = NULL,
- .multi_timer_check = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .apicid_to_cpu_present = NULL,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = default_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = uv_phys_pkg_id,
- .mps_oem_check = NULL,
+ .max_apic_id = UINT_MAX,
.get_apic_id = x2apic_get_apic_id,
- .set_apic_id = set_apic_id,
- .apic_id_mask = 0xFFFFFFFFu,
- .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
+ .calc_dest_apicid = apic_default_calc_apicid,
+ .send_IPI = uv_send_IPI_one,
.send_IPI_mask = uv_send_IPI_mask,
.send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
.send_IPI_allbutself = uv_send_IPI_allbutself,
.send_IPI_all = uv_send_IPI_all,
- .send_IPI_self = uv_send_IPI_self,
+ .send_IPI_self = x2apic_send_IPI_self,
.wakeup_secondary_cpu = uv_wakeup_secondary,
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
- .wait_for_init_deassert = NULL,
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = NULL,
.read = native_apic_msr_read,
.write = native_apic_msr_write,
- .eoi_write = native_apic_msr_eoi_write,
+ .eoi = native_apic_msr_eoi,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
- .wait_icr_idle = native_x2apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
-};
-
-static __cpuinit void set_x2apic_extra_bits(int pnode)
-{
- __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
-}
-
-/*
- * Called on boot cpu.
- */
-static __init int boot_pnode_to_blade(int pnode)
-{
- int blade;
-
- for (blade = 0; blade < uv_num_possible_blades(); blade++)
- if (pnode == uv_blade_info[blade].pnode)
- return blade;
- BUG();
-}
-
-struct redir_addr {
- unsigned long redirect;
- unsigned long alias;
};
-#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
-
-static __initdata struct redir_addr redir_addrs[] = {
- {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR},
- {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR},
- {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR},
-};
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH 3
+#define DEST_SHIFT UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_SHFT
static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
{
- union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
- union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
+ union uvh_rh_gam_alias_2_overlay_config_u alias;
+ union uvh_rh_gam_alias_2_redirect_config_u redirect;
+ unsigned long m_redirect;
+ unsigned long m_overlay;
int i;
- for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
- alias.v = uv_read_local_mmr(redir_addrs[i].alias);
+ for (i = 0; i < UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH; i++) {
+ switch (i) {
+ case 0:
+ m_redirect = UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG;
+ m_overlay = UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG;
+ break;
+ case 1:
+ m_redirect = UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG;
+ m_overlay = UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG;
+ break;
+ case 2:
+ m_redirect = UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG;
+ m_overlay = UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG;
+ break;
+ }
+ alias.v = uv_read_local_mmr(m_overlay);
if (alias.s.enable && alias.s.base == 0) {
*size = (1UL << alias.s.m_alias);
- redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
+ redirect.v = uv_read_local_mmr(m_redirect);
*base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
return;
}
@@ -466,9 +821,9 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
}
enum map_type {map_wb, map_uc};
+static const char * const mt[] = { "WB", "UC" };
-static __init void map_high(char *id, unsigned long base, int pshift,
- int bshift, int max_pnode, enum map_type map_type)
+static __init void map_high(char *id, unsigned long base, int pshift, int bshift, int max_pnode, enum map_type map_type)
{
unsigned long bytes, paddr;
@@ -478,142 +833,170 @@ static __init void map_high(char *id, unsigned long base, int pshift,
pr_info("UV: Map %s_HI base address NULL\n", id);
return;
}
- pr_debug("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
if (map_type == map_uc)
init_extra_mapping_uc(paddr, bytes);
else
init_extra_mapping_wb(paddr, bytes);
-}
-static __init void map_gru_distributed(unsigned long c)
-{
- union uvh_rh_gam_gru_overlay_config_mmr_u gru;
- u64 paddr;
- unsigned long bytes;
- int nid;
-
- gru.v = c;
- /* only base bits 42:28 relevant in dist mode */
- gru_dist_base = gru.v & 0x000007fff0000000UL;
- if (!gru_dist_base) {
- pr_info("UV: Map GRU_DIST base address NULL\n");
- return;
- }
- bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
- gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1);
- gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1);
- gru_dist_base &= gru_dist_lmask; /* Clear bits above M */
- for_each_online_node(nid) {
- paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) |
- gru_dist_base;
- init_extra_mapping_wb(paddr, bytes);
- gru_first_node_paddr = min(paddr, gru_first_node_paddr);
- gru_last_node_paddr = max(paddr, gru_last_node_paddr);
- }
- /* Save upper (63:M) bits of address only for is_GRU_range */
- gru_first_node_paddr &= gru_dist_umask;
- gru_last_node_paddr &= gru_dist_umask;
- pr_debug("UV: Map GRU_DIST base 0x%016llx 0x%016llx - 0x%016llx\n",
- gru_dist_base, gru_first_node_paddr, gru_last_node_paddr);
+ pr_info("UV: Map %s_HI 0x%lx - 0x%lx %s (%d segments)\n",
+ id, paddr, paddr + bytes, mt[map_type], max_pnode + 1);
}
static __init void map_gru_high(int max_pnode)
{
- union uvh_rh_gam_gru_overlay_config_mmr_u gru;
- int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
-
- gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
- if (!gru.s.enable) {
- pr_info("UV: GRU disabled\n");
+ union uvh_rh_gam_gru_overlay_config_u gru;
+ unsigned long mask, base;
+ int shift;
+
+ if (UVH_RH_GAM_GRU_OVERLAY_CONFIG) {
+ gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG);
+ shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT;
+ mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK;
+ } else if (UVH_RH10_GAM_GRU_OVERLAY_CONFIG) {
+ gru.v = uv_read_local_mmr(UVH_RH10_GAM_GRU_OVERLAY_CONFIG);
+ shift = UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT;
+ mask = UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK;
+ } else {
+ pr_err("UV: GRU unavailable (no MMR)\n");
return;
}
- if (is_uv3_hub() && gru.s3.mode) {
- map_gru_distributed(gru.v);
+ if (!gru.s.enable) {
+ pr_info("UV: GRU disabled (by BIOS)\n");
return;
}
- map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
- gru_start_paddr = ((u64)gru.s.base << shift);
+
+ base = (gru.v & mask) >> shift;
+ map_high("GRU", base, shift, shift, max_pnode, map_wb);
+ gru_start_paddr = ((u64)base << shift);
gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
}
static __init void map_mmr_high(int max_pnode)
{
- union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
- int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
+ unsigned long base;
+ int shift;
+ bool enable;
+
+ if (UVH_RH10_GAM_MMR_OVERLAY_CONFIG) {
+ union uvh_rh10_gam_mmr_overlay_config_u mmr;
+
+ mmr.v = uv_read_local_mmr(UVH_RH10_GAM_MMR_OVERLAY_CONFIG);
+ enable = mmr.s.enable;
+ base = mmr.s.base;
+ shift = UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT;
+ } else if (UVH_RH_GAM_MMR_OVERLAY_CONFIG) {
+ union uvh_rh_gam_mmr_overlay_config_u mmr;
+
+ mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG);
+ enable = mmr.s.enable;
+ base = mmr.s.base;
+ shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT;
+ } else {
+ pr_err("UV:%s:RH_GAM_MMR_OVERLAY_CONFIG MMR undefined?\n",
+ __func__);
+ return;
+ }
- mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
- if (mmr.s.enable)
- map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc);
+ if (enable)
+ map_high("MMR", base, shift, shift, max_pnode, map_uc);
else
pr_info("UV: MMR disabled\n");
}
-/*
- * This commonality works because both 0 & 1 versions of the MMIOH OVERLAY
- * and REDIRECT MMR regs are exactly the same on UV3.
- */
-struct mmioh_config {
- unsigned long overlay;
- unsigned long redirect;
- char *id;
-};
-
-static __initdata struct mmioh_config mmiohs[] = {
- {
- UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR,
- UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR,
- "MMIOH0"
- },
- {
- UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR,
- UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR,
- "MMIOH1"
- },
+/* Arch specific ENUM cases */
+enum mmioh_arch {
+ UV2_MMIOH = -1,
+ UVY_MMIOH0, UVY_MMIOH1,
+ UVX_MMIOH0, UVX_MMIOH1,
};
-static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
+/* Calculate and Map MMIOH Regions */
+static void __init calc_mmioh_map(enum mmioh_arch index,
+ int min_pnode, int max_pnode,
+ int shift, unsigned long base, int m_io, int n_io)
{
- union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay;
- unsigned long mmr;
- unsigned long base;
- int i, n, shift, m_io, max_io;
- int nasid, lnasid, fi, li;
- char *id;
-
- id = mmiohs[index].id;
- overlay.v = uv_read_local_mmr(mmiohs[index].overlay);
- pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n",
- id, overlay.v, overlay.s3.base, overlay.s3.m_io);
- if (!overlay.s3.enable) {
- pr_info("UV: %s disabled\n", id);
+ unsigned long mmr, nasid_mask;
+ int nasid, min_nasid, max_nasid, lnasid, mapped;
+ int i, fi, li, n, max_io;
+ char id[8];
+
+ /* One (UV2) mapping */
+ if (index == UV2_MMIOH) {
+ strscpy(id, "MMIOH", sizeof(id));
+ max_io = max_pnode;
+ mapped = 0;
+ goto map_exit;
+ }
+
+ /* small and large MMIOH mappings */
+ switch (index) {
+ case UVY_MMIOH0:
+ mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0;
+ nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK;
+ n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;
+ min_nasid = min_pnode;
+ max_nasid = max_pnode;
+ mapped = 1;
+ break;
+ case UVY_MMIOH1:
+ mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1;
+ nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK;
+ n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH;
+ min_nasid = min_pnode;
+ max_nasid = max_pnode;
+ mapped = 1;
+ break;
+ case UVX_MMIOH0:
+ mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0;
+ nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK;
+ n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;
+ min_nasid = min_pnode * 2;
+ max_nasid = max_pnode * 2;
+ mapped = 1;
+ break;
+ case UVX_MMIOH1:
+ mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1;
+ nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK;
+ n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH;
+ min_nasid = min_pnode * 2;
+ max_nasid = max_pnode * 2;
+ mapped = 1;
+ break;
+ default:
+ pr_err("UV:%s:Invalid mapping type:%d\n", __func__, index);
return;
}
- shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT;
- base = (unsigned long)overlay.s3.base;
- m_io = overlay.s3.m_io;
- mmr = mmiohs[index].redirect;
- n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH;
- min_pnode *= 2; /* convert to NASID */
- max_pnode *= 2;
- max_io = lnasid = fi = li = -1;
+ /* enum values chosen so (index mod 2) is MMIOH 0/1 (low/high) */
+ snprintf(id, sizeof(id), "MMIOH%d", index%2);
+ max_io = lnasid = fi = li = -1;
for (i = 0; i < n; i++) {
- union uv3h_rh_gam_mmioh_redirect_config0_mmr_u redirect;
-
- redirect.v = uv_read_local_mmr(mmr + i * 8);
- nasid = redirect.s3.nasid;
- if (nasid < min_pnode || max_pnode < nasid)
- nasid = -1; /* invalid NASID */
+ unsigned long m_redirect = mmr + i * 8;
+ unsigned long redirect = uv_read_local_mmr(m_redirect);
+
+ nasid = redirect & nasid_mask;
+ if (i == 0)
+ pr_info("UV: %s redirect base 0x%lx(@0x%lx) 0x%04x\n",
+ id, redirect, m_redirect, nasid);
+
+ /* Invalid NASID check */
+ if (nasid < min_nasid || max_nasid < nasid) {
+ /* Not an error: unused table entries get "poison" values */
+ pr_debug("UV:%s:Invalid NASID(%x):%x (range:%x..%x)\n",
+ __func__, index, nasid, min_nasid, max_nasid);
+ nasid = -1;
+ }
if (nasid == lnasid) {
li = i;
- if (i != n-1) /* last entry check */
+ /* Last entry check: */
+ if (i != n-1)
continue;
}
- /* check if we have a cached (or last) redirect to print */
+ /* Check if we have a cached (or last) redirect to print: */
if (lnasid != -1 || (i == n-1 && nasid != -1)) {
unsigned long addr1, addr2;
int f, l;
@@ -625,10 +1008,8 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
f = fi;
l = li;
}
- addr1 = (base << shift) +
- f * (unsigned long)(1 << m_io);
- addr2 = (base << shift) +
- (l + 1) * (unsigned long)(1 << m_io);
+ addr1 = (base << shift) + f * (1ULL << m_io);
+ addr2 = (base << shift) + (l + 1) * (1ULL << m_io);
pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n",
id, fi, li, lnasid, addr1, addr2);
if (max_io < l)
@@ -638,60 +1019,93 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
lnasid = nasid;
}
- pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n",
- id, base, shift, m_io, max_io);
+map_exit:
+ pr_info("UV: %s base:0x%lx shift:%d m_io:%d max_io:%d max_pnode:0x%x\n",
+ id, base, shift, m_io, max_io, max_pnode);
- if (max_io >= 0)
+ if (max_io >= 0 && !mapped)
map_high(id, base, shift, m_io, max_io, map_uc);
}
static __init void map_mmioh_high(int min_pnode, int max_pnode)
{
- union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
- unsigned long mmr, base;
- int shift, enable, m_io, n_io;
-
- if (is_uv3_hub()) {
- /* Map both MMIOH Regions */
- map_mmioh_high_uv3(0, min_pnode, max_pnode);
- map_mmioh_high_uv3(1, min_pnode, max_pnode);
+ /* UVY flavor */
+ if (UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0) {
+ union uvh_rh10_gam_mmioh_overlay_config0_u mmioh0;
+ union uvh_rh10_gam_mmioh_overlay_config1_u mmioh1;
+
+ mmioh0.v = uv_read_local_mmr(UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0);
+ if (unlikely(mmioh0.s.enable == 0))
+ pr_info("UV: MMIOH0 disabled\n");
+ else
+ calc_mmioh_map(UVY_MMIOH0, min_pnode, max_pnode,
+ UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT,
+ mmioh0.s.base, mmioh0.s.m_io, mmioh0.s.n_io);
+
+ mmioh1.v = uv_read_local_mmr(UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1);
+ if (unlikely(mmioh1.s.enable == 0))
+ pr_info("UV: MMIOH1 disabled\n");
+ else
+ calc_mmioh_map(UVY_MMIOH1, min_pnode, max_pnode,
+ UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT,
+ mmioh1.s.base, mmioh1.s.m_io, mmioh1.s.n_io);
return;
}
+ /* UVX flavor */
+ if (UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0) {
+ union uvh_rh_gam_mmioh_overlay_config0_u mmioh0;
+ union uvh_rh_gam_mmioh_overlay_config1_u mmioh1;
+
+ mmioh0.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0);
+ if (unlikely(mmioh0.s.enable == 0))
+ pr_info("UV: MMIOH0 disabled\n");
+ else {
+ unsigned long base = uvxy_field(mmioh0, base, 0);
+ int m_io = uvxy_field(mmioh0, m_io, 0);
+ int n_io = uvxy_field(mmioh0, n_io, 0);
+
+ calc_mmioh_map(UVX_MMIOH0, min_pnode, max_pnode,
+ UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT,
+ base, m_io, n_io);
+ }
- if (is_uv1_hub()) {
- mmr = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
- shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
- mmioh.v = uv_read_local_mmr(mmr);
- enable = !!mmioh.s1.enable;
- base = mmioh.s1.base;
- m_io = mmioh.s1.m_io;
- n_io = mmioh.s1.n_io;
- } else if (is_uv2_hub()) {
- mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
- shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
- mmioh.v = uv_read_local_mmr(mmr);
- enable = !!mmioh.s2.enable;
- base = mmioh.s2.base;
- m_io = mmioh.s2.m_io;
- n_io = mmioh.s2.n_io;
- } else
+ mmioh1.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1);
+ if (unlikely(mmioh1.s.enable == 0))
+ pr_info("UV: MMIOH1 disabled\n");
+ else {
+ unsigned long base = uvxy_field(mmioh1, base, 0);
+ int m_io = uvxy_field(mmioh1, m_io, 0);
+ int n_io = uvxy_field(mmioh1, n_io, 0);
+
+ calc_mmioh_map(UVX_MMIOH1, min_pnode, max_pnode,
+ UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT,
+ base, m_io, n_io);
+ }
return;
+ }
- if (enable) {
- max_pnode &= (1 << n_io) - 1;
- pr_info(
- "UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n",
- base, shift, m_io, n_io, max_pnode);
- map_high("MMIOH", base, shift, m_io, max_pnode, map_uc);
- } else {
- pr_info("UV: MMIOH disabled\n");
+ /* UV2 flavor */
+ if (UVH_RH_GAM_MMIOH_OVERLAY_CONFIG) {
+ union uvh_rh_gam_mmioh_overlay_config_u mmioh;
+
+ mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG);
+ if (unlikely(mmioh.s2.enable == 0))
+ pr_info("UV: MMIOH disabled\n");
+ else
+ calc_mmioh_map(UV2_MMIOH, min_pnode, max_pnode,
+ UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT,
+ mmioh.s2.base, mmioh.s2.m_io, mmioh.s2.n_io);
+ return;
}
}
static __init void map_low_mmrs(void)
{
- init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
- init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+ if (UV_GLOBAL_MMR32_BASE)
+ init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+
+ if (UV_LOCAL_MMR_BASE)
+ init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
}
static __init void uv_rtc_init(void)
@@ -699,367 +1113,708 @@ static __init void uv_rtc_init(void)
long status;
u64 ticks_per_sec;
- status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
- &ticks_per_sec);
+ status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec);
+
if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
- printk(KERN_WARNING
- "unable to determine platform RTC clock frequency, "
- "guessing.\n");
- /* BIOS gives wrong value for clock freq. so guess */
+ pr_warn("UV: unable to determine platform RTC clock frequency, guessing.\n");
+
+ /* BIOS gives wrong value for clock frequency, so guess: */
sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
- } else
+ } else {
sn_rtc_cycles_per_second = ticks_per_sec;
+ }
}
-/*
- * percpu heartbeat timer
- */
-static void uv_heartbeat(unsigned long ignored)
+/* Direct Legacy VGA I/O traffic to designated IOH */
+static int uv_set_vga_state(struct pci_dev *pdev, bool decode, unsigned int command_bits, u32 flags)
{
- struct timer_list *timer = &uv_hub_info->scir.timer;
- unsigned char bits = uv_hub_info->scir.state;
+ int domain, bus, rc;
- /* flip heartbeat bit */
- bits ^= SCIR_CPU_HEARTBEAT;
+ if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE))
+ return 0;
- /* is this cpu idle? */
- if (idle_cpu(raw_smp_processor_id()))
- bits &= ~SCIR_CPU_ACTIVITY;
- else
- bits |= SCIR_CPU_ACTIVITY;
+ if ((command_bits & PCI_COMMAND_IO) == 0)
+ return 0;
- /* update system controller interface reg */
- uv_set_scir_bits(bits);
+ domain = pci_domain_nr(pdev->bus);
+ bus = pdev->bus->number;
+
+ rc = uv_bios_set_legacy_vga_target(decode, domain, bus);
- /* enable next timer period */
- mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+ return rc;
}
-static void __cpuinit uv_heartbeat_enable(int cpu)
+/*
+ * Called on each CPU to initialize the per_cpu UV data area.
+ * FIXME: hotplug not supported yet
+ */
+void uv_cpu_init(void)
{
- while (!uv_cpu_hub_info(cpu)->scir.enabled) {
- struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+ /* CPU 0 initialization will be done via uv_system_init. */
+ if (smp_processor_id() == 0)
+ return;
+
+ uv_hub_info->nr_online_cpus++;
+}
- uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
- setup_timer(timer, uv_heartbeat, cpu);
- timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
- add_timer_on(timer, cpu);
- uv_cpu_hub_info(cpu)->scir.enabled = 1;
+struct mn {
+ unsigned char m_val;
+ unsigned char n_val;
+ unsigned char m_shift;
+ unsigned char n_lshift;
+};
- /* also ensure that boot cpu is enabled */
- cpu = 0;
+/* Initialize caller's MN struct and fill in values */
+static void get_mn(struct mn *mnp)
+{
+ memset(mnp, 0, sizeof(*mnp));
+ mnp->n_val = uv_cpuid.n_skt;
+ if (is_uv(UV4|UVY)) {
+ mnp->m_val = 0;
+ mnp->n_lshift = 0;
+ } else if (is_uv3_hub()) {
+ union uvyh_gr0_gam_gr_config_u m_gr_config;
+
+ mnp->m_val = uv_cpuid.m_skt;
+ m_gr_config.v = uv_read_local_mmr(UVH_GR0_GAM_GR_CONFIG);
+ mnp->n_lshift = m_gr_config.s3.m_skt;
+ } else if (is_uv2_hub()) {
+ mnp->m_val = uv_cpuid.m_skt;
+ mnp->n_lshift = mnp->m_val == 40 ? 40 : 39;
}
+ mnp->m_shift = mnp->m_val ? 64 - mnp->m_val : 0;
}
-#ifdef CONFIG_HOTPLUG_CPU
-static void __cpuinit uv_heartbeat_disable(int cpu)
+static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
{
- if (uv_cpu_hub_info(cpu)->scir.enabled) {
- uv_cpu_hub_info(cpu)->scir.enabled = 0;
- del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+ struct mn mn;
+
+ get_mn(&mn);
+ hi->gpa_mask = mn.m_val ?
+ (1UL << (mn.m_val + mn.n_val)) - 1 :
+ (1UL << uv_cpuid.gpa_shift) - 1;
+
+ hi->m_val = mn.m_val;
+ hi->n_val = mn.n_val;
+ hi->m_shift = mn.m_shift;
+ hi->n_lshift = mn.n_lshift ? mn.n_lshift : 0;
+ hi->hub_revision = uv_hub_info->hub_revision;
+ hi->hub_type = uv_hub_info->hub_type;
+ hi->pnode_mask = uv_cpuid.pnode_mask;
+ hi->nasid_shift = uv_cpuid.nasid_shift;
+ hi->min_pnode = _min_pnode;
+ hi->min_socket = _min_socket;
+ hi->node_to_socket = _node_to_socket;
+ hi->pnode_to_socket = _pnode_to_socket;
+ hi->socket_to_node = _socket_to_node;
+ hi->socket_to_pnode = _socket_to_pnode;
+ hi->gr_table_len = _gr_table_len;
+ hi->gr_table = _gr_table;
+
+ uv_cpuid.gnode_shift = max_t(unsigned int, uv_cpuid.gnode_shift, mn.n_val);
+ hi->gnode_extra = (uv_node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1;
+ if (mn.m_val)
+ hi->gnode_upper = (u64)hi->gnode_extra << mn.m_val;
+
+ if (uv_gp_table) {
+ hi->global_mmr_base = uv_gp_table->mmr_base;
+ hi->global_mmr_shift = uv_gp_table->mmr_shift;
+ hi->global_gru_base = uv_gp_table->gru_base;
+ hi->global_gru_shift = uv_gp_table->gru_shift;
+ hi->gpa_shift = uv_gp_table->gpa_shift;
+ hi->gpa_mask = (1UL << hi->gpa_shift) - 1;
+ } else {
+ hi->global_mmr_base =
+ uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG) &
+ ~UV_MMR_ENABLE;
+ hi->global_mmr_shift = _UV_GLOBAL_MMR64_PNODE_SHIFT;
}
- uv_set_cpu_scir_bits(cpu, 0xff);
+
+ get_lowmem_redirect(&hi->lowmem_remap_base, &hi->lowmem_remap_top);
+
+ hi->apic_pnode_shift = uv_cpuid.socketid_shift;
+
+ /* Show system specific info: */
+ pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n", hi->n_val, hi->m_val, hi->m_shift, hi->n_lshift);
+ pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n", hi->gpa_mask, hi->gpa_shift, hi->pnode_mask, hi->apic_pnode_shift);
+ pr_info("UV: mmr_base/shift:0x%lx/%ld\n", hi->global_mmr_base, hi->global_mmr_shift);
+ if (hi->global_gru_base)
+ pr_info("UV: gru_base/shift:0x%lx/%ld\n",
+ hi->global_gru_base, hi->global_gru_shift);
+
+ pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n", hi->gnode_upper, hi->gnode_extra);
}
-/*
- * cpu hotplug notifier
- */
-static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static void __init decode_gam_params(unsigned long ptr)
{
- long cpu = (long)hcpu;
+ uv_gp_table = (struct uv_gam_parameters *)ptr;
- switch (action) {
- case CPU_ONLINE:
- uv_heartbeat_enable(cpu);
- break;
- case CPU_DOWN_PREPARE:
- uv_heartbeat_disable(cpu);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
+ pr_info("UV: GAM Params...\n");
+ pr_info("UV: mmr_base/shift:0x%llx/%d gru_base/shift:0x%llx/%d gpa_shift:%d\n",
+ uv_gp_table->mmr_base, uv_gp_table->mmr_shift,
+ uv_gp_table->gru_base, uv_gp_table->gru_shift,
+ uv_gp_table->gpa_shift);
}
-static __init void uv_scir_register_cpu_notifier(void)
+static void __init decode_gam_rng_tbl(unsigned long ptr)
{
- hotcpu_notifier(uv_scir_cpu_notify, 0);
-}
+ struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr;
+ unsigned long lgre = 0, gend = 0;
+ int index = 0;
+ int sock_min = INT_MAX, pnode_min = INT_MAX;
+ int sock_max = -1, pnode_max = -1;
+
+ uv_gre_table = gre;
+ for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+ unsigned long size = ((unsigned long)(gre->limit - lgre)
+ << UV_GAM_RANGE_SHFT);
+ int order = 0;
+ char suffix[] = " KMGTPE";
+ int flag = ' ';
+
+ while (size > 9999 && order < sizeof(suffix)) {
+ size /= 1024;
+ order++;
+ }
-#else /* !CONFIG_HOTPLUG_CPU */
+ /* adjust max block size to current range start */
+ if (gre->type == 1 || gre->type == 2)
+ if (adj_blksize(lgre))
+ flag = '*';
-static __init void uv_scir_register_cpu_notifier(void)
-{
+ if (!index) {
+ pr_info("UV: GAM Range Table...\n");
+ pr_info("UV: # %20s %14s %6s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
+ }
+ pr_info("UV: %2d: 0x%014lx-0x%014lx%c %5lu%c %3d %04x %02x %02x\n",
+ index++,
+ (unsigned long)lgre << UV_GAM_RANGE_SHFT,
+ (unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
+ flag, size, suffix[order],
+ gre->type, gre->nasid, gre->sockid, gre->pnode);
+
+ if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
+ gend = (unsigned long)gre->limit << UV_GAM_RANGE_SHFT;
+
+ /* update to next range start */
+ lgre = gre->limit;
+ if (sock_min > gre->sockid)
+ sock_min = gre->sockid;
+ if (sock_max < gre->sockid)
+ sock_max = gre->sockid;
+ if (pnode_min > gre->pnode)
+ pnode_min = gre->pnode;
+ if (pnode_max < gre->pnode)
+ pnode_max = gre->pnode;
+ }
+ _min_socket = sock_min;
+ _max_socket = sock_max;
+ _min_pnode = pnode_min;
+ _max_pnode = pnode_max;
+ _gr_table_len = index;
+
+ pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x), pnodes(min:%x,max:%x), gap_end(%d)\n",
+ index, _min_socket, _max_socket, _min_pnode, _max_pnode, fls64(gend));
}
-static __init int uv_init_heartbeat(void)
+/* Walk through UVsystab decoding the fields */
+static int __init decode_uv_systab(void)
{
- int cpu;
+ struct uv_systab *st;
+ int i;
- if (is_uv_system())
- for_each_online_cpu(cpu)
- uv_heartbeat_enable(cpu);
- return 0;
-}
+ /* Get mapped UVsystab pointer */
+ st = uv_systab;
-late_initcall(uv_init_heartbeat);
+ /* If UVsystab is version 1, there is no extended UVsystab */
+ if (st && st->revision == UV_SYSTAB_VERSION_1)
+ return 0;
-#endif /* !CONFIG_HOTPLUG_CPU */
+ if ((!st) || (st->revision < UV_SYSTAB_VERSION_UV4_LATEST)) {
+ int rev = st ? st->revision : 0;
-/* Direct Legacy VGA I/O traffic to designated IOH */
-int uv_set_vga_state(struct pci_dev *pdev, bool decode,
- unsigned int command_bits, u32 flags)
-{
- int domain, bus, rc;
+ pr_err("UV: BIOS UVsystab mismatch, (%x < %x)\n",
+ rev, UV_SYSTAB_VERSION_UV4_LATEST);
+ pr_err("UV: Does not support UV, switch to non-UV x86_64\n");
+ uv_system_type = UV_NONE;
- PR_DEVEL("devfn %x decode %d cmd %x flags %d\n",
- pdev->devfn, decode, command_bits, flags);
+ return -EINVAL;
+ }
- if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE))
- return 0;
+ for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) {
+ unsigned long ptr = st->entry[i].offset;
- if ((command_bits & PCI_COMMAND_IO) == 0)
- return 0;
+ if (!ptr)
+ continue;
- domain = pci_domain_nr(pdev->bus);
- bus = pdev->bus->number;
+ /* point to payload */
+ ptr += (unsigned long)st;
- rc = uv_bios_set_legacy_vga_target(decode, domain, bus);
- PR_DEVEL("vga decode %d %x:%x, rc: %d\n", decode, domain, bus, rc);
+ switch (st->entry[i].type) {
+ case UV_SYSTAB_TYPE_GAM_PARAMS:
+ decode_gam_params(ptr);
+ break;
- return rc;
+ case UV_SYSTAB_TYPE_GAM_RNG_TBL:
+ decode_gam_rng_tbl(ptr);
+ break;
+
+ case UV_SYSTAB_TYPE_ARCH_TYPE:
+ /* already processed in early startup */
+ break;
+
+ default:
+ pr_err("UV:%s:Unrecognized UV_SYSTAB_TYPE:%d, skipped\n",
+ __func__, st->entry[i].type);
+ break;
+ }
+ }
+ return 0;
}
/*
- * Called on each cpu to initialize the per_cpu UV data area.
- * FIXME: hotplug not supported yet
+ * Given a bitmask 'bits' representing presnt blades, numbered
+ * starting at 'base', masking off unused high bits of blade number
+ * with 'mask', update the minimum and maximum blade numbers that we
+ * have found. (Masking with 'mask' necessary because of BIOS
+ * treatment of system partitioning when creating this table we are
+ * interpreting.)
*/
-void __cpuinit uv_cpu_init(void)
+static inline void blade_update_min_max(unsigned long bits, int base, int mask, int *min, int *max)
{
- /* CPU 0 initilization will be done via uv_system_init. */
- if (!uv_blade_info)
+ int first, last;
+
+ if (!bits)
return;
+ first = (base + __ffs(bits)) & mask;
+ last = (base + __fls(bits)) & mask;
+
+ if (*min > first)
+ *min = first;
+ if (*max < last)
+ *max = last;
+}
+
+/* Set up physical blade translations from UVH_NODE_PRESENT_TABLE */
+static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)
+{
+ unsigned long np;
+ int i, uv_pb = 0;
+ int sock_min = INT_MAX, sock_max = -1, s_mask;
+
+ s_mask = (1 << uv_cpuid.n_skt) - 1;
+
+ if (UVH_NODE_PRESENT_TABLE) {
+ pr_info("UV: NODE_PRESENT_DEPTH = %d\n",
+ UVH_NODE_PRESENT_TABLE_DEPTH);
+ for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
+ np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
+ pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np);
+ blade_update_min_max(np, i * 64, s_mask, &sock_min, &sock_max);
+ }
+ }
+ if (UVH_NODE_PRESENT_0) {
+ np = uv_read_local_mmr(UVH_NODE_PRESENT_0);
+ pr_info("UV: NODE_PRESENT_0 = 0x%016lx\n", np);
+ blade_update_min_max(np, 0, s_mask, &sock_min, &sock_max);
+ }
+ if (UVH_NODE_PRESENT_1) {
+ np = uv_read_local_mmr(UVH_NODE_PRESENT_1);
+ pr_info("UV: NODE_PRESENT_1 = 0x%016lx\n", np);
+ blade_update_min_max(np, 64, s_mask, &sock_min, &sock_max);
+ }
+
+ /* Only update if we actually found some bits indicating blades present */
+ if (sock_max >= sock_min) {
+ _min_socket = sock_min;
+ _max_socket = sock_max;
+ uv_pb = sock_max - sock_min + 1;
+ }
+ if (uv_possible_blades != uv_pb)
+ uv_possible_blades = uv_pb;
- uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+ pr_info("UV: number nodes/possible blades %d (%d - %d)\n",
+ uv_pb, sock_min, sock_max);
+}
+
+static int __init alloc_conv_table(int num_elem, unsigned short **table)
+{
+ int i;
+ size_t bytes;
+
+ bytes = num_elem * sizeof(*table[0]);
+ *table = kmalloc(bytes, GFP_KERNEL);
+ if (WARN_ON_ONCE(!*table))
+ return -ENOMEM;
+ for (i = 0; i < num_elem; i++)
+ ((unsigned short *)*table)[i] = SOCK_EMPTY;
+ return 0;
+}
+
+/* Remove conversion table if it's 1:1 */
+#define FREE_1_TO_1_TABLE(tbl, min, max, max2) free_1_to_1_table(&tbl, #tbl, min, max, max2)
+
+static void __init free_1_to_1_table(unsigned short **tp, char *tname, int min, int max, int max2)
+{
+ int i;
+ unsigned short *table = *tp;
- if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
- set_x2apic_extra_bits(uv_hub_info->pnode);
+ if (table == NULL)
+ return;
+ if (max != max2)
+ return;
+ for (i = 0; i < max; i++) {
+ if (i != table[i])
+ return;
+ }
+ kfree(table);
+ *tp = NULL;
+ pr_info("UV: %s is 1:1, conversion table removed\n", tname);
}
/*
- * When NMI is received, print a stack trace.
+ * Build Socket Tables
+ * If the number of nodes is >1 per socket, socket to node table will
+ * contain lowest node number on that socket.
*/
-int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
+static void __init build_socket_tables(void)
{
- unsigned long real_uv_nmi;
- int bid;
-
- /*
- * Each blade has an MMR that indicates when an NMI has been sent
- * to cpus on the blade. If an NMI is detected, atomically
- * clear the MMR and update a per-blade NMI count used to
- * cause each cpu on the blade to notice a new NMI.
- */
- bid = uv_numa_blade_id();
- real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
-
- if (unlikely(real_uv_nmi)) {
- spin_lock(&uv_blade_info[bid].nmi_lock);
- real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
- if (real_uv_nmi) {
- uv_blade_info[bid].nmi_count++;
- uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
+ struct uv_gam_range_entry *gre = uv_gre_table;
+ int nums, numn, nump;
+ int i, lnid, apicid;
+ int minsock = _min_socket;
+ int maxsock = _max_socket;
+ int minpnode = _min_pnode;
+ int maxpnode = _max_pnode;
+
+ if (!gre) {
+ if (is_uv2_hub() || is_uv3_hub()) {
+ pr_info("UV: No UVsystab socket table, ignoring\n");
+ return;
}
- spin_unlock(&uv_blade_info[bid].nmi_lock);
+ pr_err("UV: Error: UVsystab address translations not available!\n");
+ WARN_ON_ONCE(!gre);
+ return;
+ }
+
+ numn = num_possible_nodes();
+ nump = maxpnode - minpnode + 1;
+ nums = maxsock - minsock + 1;
+
+ /* Allocate and clear tables */
+ if ((alloc_conv_table(nump, &_pnode_to_socket) < 0)
+ || (alloc_conv_table(nums, &_socket_to_pnode) < 0)
+ || (alloc_conv_table(numn, &_node_to_socket) < 0)
+ || (alloc_conv_table(nums, &_socket_to_node) < 0)) {
+ kfree(_pnode_to_socket);
+ kfree(_socket_to_pnode);
+ kfree(_node_to_socket);
+ return;
+ }
+
+ /* Fill in pnode/node/addr conversion list values: */
+ for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+ if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
+ continue;
+ i = gre->sockid - minsock;
+ if (_socket_to_pnode[i] == SOCK_EMPTY)
+ _socket_to_pnode[i] = gre->pnode;
+
+ i = gre->pnode - minpnode;
+ if (_pnode_to_socket[i] == SOCK_EMPTY)
+ _pnode_to_socket[i] = gre->sockid;
+
+ pr_info("UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
+ gre->sockid, gre->type, gre->nasid,
+ _socket_to_pnode[gre->sockid - minsock],
+ _pnode_to_socket[gre->pnode - minpnode]);
}
- if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
- return NMI_DONE;
+ /* Set socket -> node values: */
+ lnid = NUMA_NO_NODE;
+ for (apicid = 0; apicid < ARRAY_SIZE(__apicid_to_node); apicid++) {
+ int nid = __apicid_to_node[apicid];
+ int sockid;
+
+ if ((nid == NUMA_NO_NODE) || (lnid == nid))
+ continue;
+ lnid = nid;
+
+ sockid = apicid >> uv_cpuid.socketid_shift;
- __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
+ if (_socket_to_node[sockid - minsock] == SOCK_EMPTY)
+ _socket_to_node[sockid - minsock] = nid;
+
+ if (_node_to_socket[nid] == SOCK_EMPTY)
+ _node_to_socket[nid] = sockid;
+
+ pr_info("UV: sid:%02x: apicid:%04x socket:%02d node:%03x s2n:%03x\n",
+ sockid,
+ apicid,
+ _node_to_socket[nid],
+ nid,
+ _socket_to_node[sockid - minsock]);
+ }
/*
- * Use a lock so only one cpu prints at a time.
- * This prevents intermixed output.
+ * If e.g. socket id == pnode for all pnodes,
+ * system runs faster by removing corresponding conversion table.
*/
- spin_lock(&uv_nmi_lock);
- pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
- dump_stack();
- spin_unlock(&uv_nmi_lock);
+ FREE_1_TO_1_TABLE(_socket_to_node, _min_socket, nums, numn);
+ FREE_1_TO_1_TABLE(_node_to_socket, _min_socket, nums, numn);
+ FREE_1_TO_1_TABLE(_socket_to_pnode, _min_pnode, nums, nump);
+ FREE_1_TO_1_TABLE(_pnode_to_socket, _min_pnode, nums, nump);
+}
- return NMI_HANDLED;
+/* Check which reboot to use */
+static void check_efi_reboot(void)
+{
+ /* If EFI reboot not available, use ACPI reboot */
+ if (!efi_enabled(EFI_BOOT))
+ reboot_type = BOOT_ACPI;
}
-void uv_register_nmi_notifier(void)
+/*
+ * User proc fs file handling now deprecated.
+ * Recommend using /sys/firmware/sgi_uv/... instead.
+ */
+static int __maybe_unused proc_hubbed_show(struct seq_file *file, void *data)
{
- if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
- printk(KERN_WARNING "UV NMI handler failed to register\n");
+ pr_notice_once("%s: using deprecated /proc/sgi_uv/hubbed, use /sys/firmware/sgi_uv/hub_type\n",
+ current->comm);
+ seq_printf(file, "0x%x\n", uv_hubbed_system);
+ return 0;
}
-void uv_nmi_init(void)
+static int __maybe_unused proc_hubless_show(struct seq_file *file, void *data)
{
- unsigned int value;
+ pr_notice_once("%s: using deprecated /proc/sgi_uv/hubless, use /sys/firmware/sgi_uv/hubless\n",
+ current->comm);
+ seq_printf(file, "0x%x\n", uv_hubless_system);
+ return 0;
+}
- /*
- * Unmask NMI on all cpus
- */
- value = apic_read(APIC_LVT1) | APIC_DM_NMI;
- value &= ~APIC_LVT_MASKED;
- apic_write(APIC_LVT1, value);
+static int __maybe_unused proc_archtype_show(struct seq_file *file, void *data)
+{
+ pr_notice_once("%s: using deprecated /proc/sgi_uv/archtype, use /sys/firmware/sgi_uv/archtype\n",
+ current->comm);
+ seq_printf(file, "%s/%s\n", uv_archtype, oem_table_id);
+ return 0;
}
-void __init uv_system_init(void)
+static __init void uv_setup_proc_files(int hubless)
{
- union uvh_rh_gam_config_mmr_u m_n_config;
- union uvh_node_id_u node_id;
- unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
- int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
- int gnode_extra, min_pnode = 999999, max_pnode = -1;
- unsigned long mmr_base, present, paddr;
- unsigned short pnode_mask;
- char *hub = (is_uv1_hub() ? "UV1" :
- (is_uv2_hub() ? "UV2" :
- "UV3"));
+ struct proc_dir_entry *pde;
+ pde = proc_mkdir(UV_PROC_NODE, NULL);
+ proc_create_single("archtype", 0, pde, proc_archtype_show);
+ if (hubless)
+ proc_create_single("hubless", 0, pde, proc_hubless_show);
+ else
+ proc_create_single("hubbed", 0, pde, proc_hubbed_show);
+}
+
+/* Initialize UV hubless systems */
+static __init int uv_system_init_hubless(void)
+{
+ int rc;
+
+ /* Setup PCH NMI handler */
+ uv_nmi_setup_hubless();
+
+ /* Init kernel/BIOS interface */
+ rc = uv_bios_init();
+ if (rc < 0)
+ return rc;
+
+ /* Process UVsystab */
+ rc = decode_uv_systab();
+ if (rc < 0)
+ return rc;
+
+ /* Set section block size for current node memory */
+ set_block_size();
+
+ /* Create user access node */
+ if (rc >= 0)
+ uv_setup_proc_files(1);
+
+ check_efi_reboot();
+
+ return rc;
+}
+
+static void __init uv_system_init_hub(void)
+{
+ struct uv_hub_info_s hub_info = {0};
+ int bytes, cpu, nodeid, bid;
+ unsigned short min_pnode = USHRT_MAX, max_pnode = 0;
+ char *hub = is_uv5_hub() ? "UV500" :
+ is_uv4_hub() ? "UV400" :
+ is_uv3_hub() ? "UV300" :
+ is_uv2_hub() ? "UV2000/3000" : NULL;
+ struct uv_hub_info_s **uv_hub_info_list_blade;
+
+ if (!hub) {
+ pr_err("UV: Unknown/unsupported UV hub\n");
+ return;
+ }
pr_info("UV: Found %s hub\n", hub);
+
map_low_mmrs();
- m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
- m_val = m_n_config.s.m_skt;
- n_val = m_n_config.s.n_skt;
- pnode_mask = (1 << n_val) - 1;
- mmr_base =
- uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
- ~UV_MMR_ENABLE;
-
- node_id.v = uv_read_local_mmr(UVH_NODE_ID);
- gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
- gnode_upper = ((unsigned long)gnode_extra << m_val);
- pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x\n",
- n_val, m_val, pnode_mask, gnode_upper, gnode_extra);
-
- pr_info("UV: global MMR base 0x%lx\n", mmr_base);
-
- for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
- uv_possible_blades +=
- hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
-
- /* uv_num_possible_blades() is really the hub count */
- pr_info("UV: Found %d blades, %d hubs\n",
- is_uv1_hub() ? uv_num_possible_blades() :
- (uv_num_possible_blades() + 1) / 2,
- uv_num_possible_blades());
-
- bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
- uv_blade_info = kzalloc(bytes, GFP_KERNEL);
- BUG_ON(!uv_blade_info);
-
- for (blade = 0; blade < uv_num_possible_blades(); blade++)
- uv_blade_info[blade].memory_nid = -1;
-
- get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
-
- bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
- uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
- BUG_ON(!uv_node_to_blade);
- memset(uv_node_to_blade, 255, bytes);
-
- bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
- uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
- BUG_ON(!uv_cpu_to_blade);
- memset(uv_cpu_to_blade, 255, bytes);
-
- blade = 0;
- for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
- present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
- for (j = 0; j < 64; j++) {
- if (!test_bit(j, &present))
- continue;
- pnode = (i * 64 + j) & pnode_mask;
- uv_blade_info[blade].pnode = pnode;
- uv_blade_info[blade].nr_possible_cpus = 0;
- uv_blade_info[blade].nr_online_cpus = 0;
- spin_lock_init(&uv_blade_info[blade].nmi_lock);
- min_pnode = min(pnode, min_pnode);
- max_pnode = max(pnode, max_pnode);
- blade++;
- }
+ /* Get uv_systab for decoding, setup UV BIOS calls */
+ uv_bios_init();
+
+ /* If there's an UVsystab problem then abort UV init: */
+ if (decode_uv_systab() < 0) {
+ pr_err("UV: Mangled UVsystab format\n");
+ return;
}
- uv_bios_init();
- uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id,
- &sn_region_size, &system_serial_number);
+ build_socket_tables();
+ build_uv_gr_table();
+ set_block_size();
+ uv_init_hub_info(&hub_info);
+ /* If UV2 or UV3 may need to get # blades from HW */
+ if (is_uv(UV2|UV3) && !uv_gre_table)
+ boot_init_possible_blades(&hub_info);
+ else
+ /* min/max sockets set in decode_gam_rng_tbl */
+ uv_possible_blades = (_max_socket - _min_socket) + 1;
+
+ /* uv_num_possible_blades() is really the hub count: */
+ pr_info("UV: Found %d hubs, %d nodes, %d CPUs\n", uv_num_possible_blades(), num_possible_nodes(), num_possible_cpus());
+
+ uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, &sn_region_size, &system_serial_number);
+ hub_info.coherency_domain_number = sn_coherency_id;
uv_rtc_init();
- for_each_present_cpu(cpu) {
- int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ /*
+ * __uv_hub_info_list[] is indexed by node, but there is only
+ * one hub_info structure per blade. First, allocate one
+ * structure per blade. Further down we create a per-node
+ * table (__uv_hub_info_list[]) pointing to hub_info
+ * structures for the correct blade.
+ */
+
+ bytes = sizeof(void *) * uv_num_possible_blades();
+ uv_hub_info_list_blade = kzalloc(bytes, GFP_KERNEL);
+ if (WARN_ON_ONCE(!uv_hub_info_list_blade))
+ return;
+
+ bytes = sizeof(struct uv_hub_info_s);
+ for_each_possible_blade(bid) {
+ struct uv_hub_info_s *new_hub;
+
+ /* Allocate & fill new per hub info list */
+ new_hub = (bid == 0) ? &uv_hub_info_node0
+ : kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid));
+ if (WARN_ON_ONCE(!new_hub)) {
+ /* do not kfree() bid 0, which is statically allocated */
+ while (--bid > 0)
+ kfree(uv_hub_info_list_blade[bid]);
+ kfree(uv_hub_info_list_blade);
+ return;
+ }
+
+ uv_hub_info_list_blade[bid] = new_hub;
+ *new_hub = hub_info;
+
+ /* Use information from GAM table if available: */
+ if (uv_gre_table)
+ new_hub->pnode = uv_blade_to_pnode(bid);
+ else /* Or fill in during CPU loop: */
+ new_hub->pnode = 0xffff;
+
+ new_hub->numa_blade_id = bid;
+ new_hub->memory_nid = NUMA_NO_NODE;
+ new_hub->nr_possible_cpus = 0;
+ new_hub->nr_online_cpus = 0;
+ }
+
+ /*
+ * Now populate __uv_hub_info_list[] for each node with the
+ * pointer to the struct for the blade it resides on.
+ */
+
+ bytes = sizeof(void *) * num_possible_nodes();
+ __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL);
+ if (WARN_ON_ONCE(!__uv_hub_info_list)) {
+ for_each_possible_blade(bid)
+ /* bid 0 is statically allocated */
+ if (bid != 0)
+ kfree(uv_hub_info_list_blade[bid]);
+ kfree(uv_hub_info_list_blade);
+ return;
+ }
- nid = cpu_to_node(cpu);
- /*
- * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
- */
- uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
- uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
- uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision;
+ for_each_node(nodeid)
+ __uv_hub_info_list[nodeid] = uv_hub_info_list_blade[uv_node_to_blade_id(nodeid)];
- uv_cpu_hub_info(cpu)->m_shift = 64 - m_val;
- uv_cpu_hub_info(cpu)->n_lshift = is_uv2_1_hub() ?
- (m_val == 40 ? 40 : 39) : m_val;
+ /* Initialize per CPU info: */
+ for_each_possible_cpu(cpu) {
+ int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ unsigned short bid;
+ unsigned short pnode;
pnode = uv_apicid_to_pnode(apicid);
- blade = boot_pnode_to_blade(pnode);
- lcpu = uv_blade_info[blade].nr_possible_cpus;
- uv_blade_info[blade].nr_possible_cpus++;
-
- /* Any node on the blade, else will contain -1. */
- uv_blade_info[blade].memory_nid = nid;
-
- uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
- uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
- uv_cpu_hub_info(cpu)->m_val = m_val;
- uv_cpu_hub_info(cpu)->n_val = n_val;
- uv_cpu_hub_info(cpu)->numa_blade_id = blade;
- uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
- uv_cpu_hub_info(cpu)->pnode = pnode;
- uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
- uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
- uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
- uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
- uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
- uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid);
- uv_node_to_blade[nid] = blade;
- uv_cpu_to_blade[cpu] = blade;
+ bid = uv_pnode_to_socket(pnode) - _min_socket;
+
+ uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list_blade[bid];
+ uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++;
+ if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE)
+ uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu);
+
+ if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
+ uv_cpu_hub_info(cpu)->pnode = pnode;
}
- /* Add blade/pnode info for nodes without cpus */
- for_each_online_node(nid) {
- if (uv_node_to_blade[nid] >= 0)
+ for_each_possible_blade(bid) {
+ unsigned short pnode = uv_hub_info_list_blade[bid]->pnode;
+
+ if (pnode == 0xffff)
continue;
- paddr = node_start_pfn(nid) << PAGE_SHIFT;
- pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
- blade = boot_pnode_to_blade(pnode);
- uv_node_to_blade[nid] = blade;
+
+ min_pnode = min(pnode, min_pnode);
+ max_pnode = max(pnode, max_pnode);
+ pr_info("UV: HUB:%2d pn:%02x nrcpus:%d\n",
+ bid,
+ uv_hub_info_list_blade[bid]->pnode,
+ uv_hub_info_list_blade[bid]->nr_possible_cpus);
}
+ pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode);
map_gru_high(max_pnode);
map_mmr_high(max_pnode);
map_mmioh_high(min_pnode, max_pnode);
+ kfree(uv_hub_info_list_blade);
+ uv_hub_info_list_blade = NULL;
+
+ uv_nmi_setup();
uv_cpu_init();
- uv_scir_register_cpu_notifier();
- uv_register_nmi_notifier();
- proc_mkdir("sgi_uv", NULL);
+ uv_setup_proc_files(0);
- /* register Legacy VGA I/O redirection handler */
+ /* Register Legacy VGA I/O redirection handler: */
pci_register_set_vga_state(uv_set_vga_state);
- /*
- * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as
- * EFI is not enabled in the kdump kernel.
- */
- if (is_kdump_kernel())
- reboot_type = BOOT_ACPI;
+ check_efi_reboot();
+}
+
+/*
+ * There is a different code path needed to initialize a UV system that does
+ * not have a "UV HUB" (referred to as "hubless").
+ */
+void __init uv_system_init(void)
+{
+ if (likely(!is_uv_system() && !is_uv_hubless(1)))
+ return;
+
+ if (is_uv_system())
+ uv_system_init_hub();
+ else
+ uv_system_init_hubless();
}
apic_driver(apic_x2apic_uv_x);