summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/apic
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw@amazon.co.uk>2020-10-24 22:35:31 +0100
committerThomas Gleixner <tglx@linutronix.de>2020-10-28 20:26:28 +0100
commit51130d21881d435fad5fa7f25bea77aa0ffc9a4e (patch)
tree5faf6b3754dd0e36101b1bcc50204d9135b0bd57 /arch/x86/kernel/apic
parent79eb3581bcaae9b5677629d945e14da212aa76e2 (diff)
x86/ioapic: Handle Extended Destination ID field in RTE
Bits 63-48 of the I/OAPIC Redirection Table Entry map directly to bits 19-4 of the address used in the resulting MSI cycle. Historically, the x86 MSI format only used the top 8 of those 16 bits as the destination APIC ID, and the "Extended Destination ID" in the lower 8 bits was unused. With interrupt remapping, the lowest bit of the Extended Destination ID (bit 48 of RTE, bit 4 of MSI address) is now used to indicate a remappable format MSI. A hypervisor can use the other 7 bits of the Extended Destination ID to permit guests to address up to 15 bits of APIC IDs, thus allowing 32768 vCPUs before having to expose a vIOMMU and interrupt remapping to the guest. No behavioural change in this patch, since nothing yet permits APIC IDs above 255 to be used with the non-IR I/OAPIC domain. [ tglx: Converted it to the cleaned up entry/msi_msg format and added commentry ] Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/r/20201024213535.443185-32-dwmw2@infradead.org
Diffstat (limited to 'arch/x86/kernel/apic')
-rw-r--r--arch/x86/kernel/apic/io_apic.c20
1 files changed, 15 insertions, 5 deletions
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 443d2c9086b9..1cfd65ef295b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1238,9 +1238,10 @@ static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
(entry.ir_index_15 << 15) | entry.ir_index_0_14,
entry.ir_zero);
} else {
- printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n", buf,
+ printk(KERN_DEBUG "%s, %s, D(%02X%02X), M(%1d)\n", buf,
entry.dest_mode_logical ? "logical " : "physical",
- entry.destid_0_7, entry.delivery_mode);
+ entry.virt_destid_8_14, entry.destid_0_7,
+ entry.delivery_mode);
}
}
}
@@ -1409,6 +1410,7 @@ void native_restore_boot_irq_mode(void)
*/
if (ioapic_i8259.pin != -1) {
struct IO_APIC_route_entry entry;
+ u32 apic_id = read_apic_id();
memset(&entry, 0, sizeof(entry));
entry.masked = false;
@@ -1416,7 +1418,8 @@ void native_restore_boot_irq_mode(void)
entry.active_low = false;
entry.dest_mode_logical = false;
entry.delivery_mode = APIC_DELIVERY_MODE_EXTINT;
- entry.destid_0_7 = read_apic_id();
+ entry.destid_0_7 = apic_id & 0xFF;
+ entry.virt_destid_8_14 = apic_id >> 8;
/*
* Add it to the IO-APIC irq-routing table:
@@ -1885,7 +1888,11 @@ static void ioapic_setup_msg_from_msi(struct irq_data *irq_data,
/* DMAR/IR: 1, 0 for all other modes */
entry->ir_format = msg.arch_addr_lo.dmar_format;
/*
- * DMAR/IR: index bit 0-14.
+ * - DMAR/IR: index bit 0-14.
+ *
+ * - Virt: If the host supports x2apic without a virtualized IR
+ * unit then bit 0-6 of dmar_index_0_14 are providing bit
+ * 8-14 of the destination id.
*
* All other modes have bit 0-6 of dmar_index_0_14 cleared and the
* topmost 8 bits are destination id bit 0-7 (entry::destid_0_7).
@@ -2063,6 +2070,7 @@ static inline void __init unlock_ExtINT_logic(void)
int apic, pin, i;
struct IO_APIC_route_entry entry0, entry1;
unsigned char save_control, save_freq_select;
+ u32 apic_id;
pin = find_isa_irq_pin(8, mp_INT);
if (pin == -1) {
@@ -2078,11 +2086,13 @@ static inline void __init unlock_ExtINT_logic(void)
entry0 = ioapic_read_entry(apic, pin);
clear_IO_APIC_pin(apic, pin);
+ apic_id = hard_smp_processor_id();
memset(&entry1, 0, sizeof(entry1));
entry1.dest_mode_logical = true;
entry1.masked = false;
- entry1.destid_0_7 = hard_smp_processor_id();
+ entry1.destid_0_7 = apic_id & 0xFF;
+ entry1.virt_destid_8_14 = apic_id >> 8;
entry1.delivery_mode = APIC_DELIVERY_MODE_EXTINT;
entry1.active_low = entry0.active_low;
entry1.is_level = false;