diff options
Diffstat (limited to 'drivers/iommu/amd/init.c')
| -rw-r--r-- | drivers/iommu/amd/init.c | 822 |
1 files changed, 512 insertions, 310 deletions
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 0e0a531042ac..4b2953418977 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -12,7 +12,6 @@ #include <linux/acpi.h> #include <linux/list.h> #include <linux/bitmap.h> -#include <linux/slab.h> #include <linux/syscore_ops.h> #include <linux/interrupt.h> #include <linux/msi.h> @@ -152,7 +151,9 @@ struct ivmd_header { bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; -enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; +enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1; +/* Host page table level */ +u8 amd_iommu_hpt_level; /* Guest page table level */ int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; @@ -169,13 +170,16 @@ static int amd_iommu_target_ivhd_type; u64 amd_iommu_efr; u64 amd_iommu_efr2; +/* Host (v1) page table is not supported*/ +bool amd_iommu_hatdis; + /* SNP is enabled on the system? */ bool amd_iommu_snp_en; EXPORT_SYMBOL(amd_iommu_snp_en); LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ -LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the - system */ +LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ +LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */ /* Number of IOMMUs present in the system */ static int amd_iommus_present; @@ -219,7 +223,6 @@ static bool __initdata cmdline_maps; static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); -static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); static bool amd_iommu_pre_enabled = true; @@ -245,17 +248,14 @@ static void init_translation_status(struct amd_iommu *iommu) iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; } -static inline unsigned long tbl_size(int entry_size, int last_bdf) +int amd_iommu_get_num_iommus(void) { - unsigned shift = PAGE_SHIFT + - get_order((last_bdf + 1) * entry_size); - - return 1UL << shift; + return amd_iommus_present; } -int amd_iommu_get_num_iommus(void) +bool amd_iommu_ht_range_ignore(void) { - return amd_iommus_present; + return check_feature2(FEATURE_HT_RANGE_IGNORE); } /* @@ -406,39 +406,35 @@ static void iommu_set_device_table(struct amd_iommu *iommu) BUG_ON(iommu->mmio_base == NULL); + if (is_kdump_kernel()) + return; + entry = iommu_virt_to_phys(dev_table); entry |= (dev_table_size >> 12) - 1; memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, &entry, sizeof(entry)); } -/* Generic functions to enable/disable certain features of the IOMMU. */ -void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +static void iommu_feature_set(struct amd_iommu *iommu, u64 val, u64 mask, u8 shift) { u64 ctrl; ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl |= (1ULL << bit); + mask <<= shift; + ctrl &= ~mask; + ctrl |= (val << shift) & mask; writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } -static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) +/* Generic functions to enable/disable certain features of the IOMMU. */ +void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) { - u64 ctrl; - - ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl &= ~(1ULL << bit); - writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); + iommu_feature_set(iommu, 1ULL, 1ULL, bit); } -static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) +static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) { - u64 ctrl; - - ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl &= ~CTRL_INV_TO_MASK; - ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; - writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); + iommu_feature_set(iommu, 0ULL, 1ULL, bit); } /* Function to enable the hardware */ @@ -643,8 +639,8 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_ /* Allocate per PCI segment device table */ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, - get_order(pci_seg->dev_table_size)); + pci_seg->dev_table = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32, + pci_seg->dev_table_size); if (!pci_seg->dev_table) return -ENOMEM; @@ -653,16 +649,19 @@ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) { - iommu_free_pages(pci_seg->dev_table, - get_order(pci_seg->dev_table_size)); + if (is_kdump_kernel()) + memunmap((void *)pci_seg->dev_table); + else + iommu_free_pages(pci_seg->dev_table); pci_seg->dev_table = NULL; } /* Allocate per PCI segment IOMMU rlookup table. */ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL, - get_order(pci_seg->rlookup_table_size)); + pci_seg->rlookup_table = kvcalloc(pci_seg->last_bdf + 1, + sizeof(*pci_seg->rlookup_table), + GFP_KERNEL); if (pci_seg->rlookup_table == NULL) return -ENOMEM; @@ -671,17 +670,15 @@ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { - iommu_free_pages(pci_seg->rlookup_table, - get_order(pci_seg->rlookup_table_size)); + kvfree(pci_seg->rlookup_table); pci_seg->rlookup_table = NULL; } static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL, - get_order(pci_seg->rlookup_table_size)); - kmemleak_alloc(pci_seg->irq_lookup_table, - pci_seg->rlookup_table_size, 1, GFP_KERNEL); + pci_seg->irq_lookup_table = kvcalloc(pci_seg->last_bdf + 1, + sizeof(*pci_seg->irq_lookup_table), + GFP_KERNEL); if (pci_seg->irq_lookup_table == NULL) return -ENOMEM; @@ -690,9 +687,7 @@ static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_se static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { - kmemleak_free(pci_seg->irq_lookup_table); - iommu_free_pages(pci_seg->irq_lookup_table, - get_order(pci_seg->rlookup_table_size)); + kvfree(pci_seg->irq_lookup_table); pci_seg->irq_lookup_table = NULL; } @@ -700,8 +695,9 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) { int i; - pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL, - get_order(pci_seg->alias_table_size)); + pci_seg->alias_table = kvmalloc_array(pci_seg->last_bdf + 1, + sizeof(*pci_seg->alias_table), + GFP_KERNEL); if (!pci_seg->alias_table) return -ENOMEM; @@ -716,11 +712,30 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) { - iommu_free_pages(pci_seg->alias_table, - get_order(pci_seg->alias_table_size)); + kvfree(pci_seg->alias_table); pci_seg->alias_table = NULL; } +static inline void *iommu_memremap(unsigned long paddr, size_t size) +{ + phys_addr_t phys; + + if (!paddr) + return NULL; + + /* + * Obtain true physical address in kdump kernel when SME is enabled. + * Currently, previous kernel with SME enabled and kdump kernel + * with SME support disabled is not supported. + */ + phys = __sme_clr(paddr); + + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + return (__force void *)ioremap_encrypted(phys, size); + else + return memremap(phys, size, MEMREMAP_WB); +} + /* * Allocates the command buffer. This buffer is per AMD IOMMU. We can * write commands to that buffer later and the IOMMU will execute them @@ -728,8 +743,7 @@ static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) */ static int __init alloc_command_buffer(struct amd_iommu *iommu) { - iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL, - get_order(CMD_BUFFER_SIZE)); + iommu->cmd_buf = iommu_alloc_pages_sz(GFP_KERNEL, CMD_BUFFER_SIZE); return iommu->cmd_buf ? 0 : -ENOMEM; } @@ -807,11 +821,16 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) BUG_ON(iommu->cmd_buf == NULL); - entry = iommu_virt_to_phys(iommu->cmd_buf); - entry |= MMIO_CMD_SIZE_512; - - memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, - &entry, sizeof(entry)); + if (!is_kdump_kernel()) { + /* + * Command buffer is re-used for kdump kernel and setting + * of MMIO register is not required. + */ + entry = iommu_virt_to_phys(iommu->cmd_buf); + entry |= MMIO_CMD_SIZE_512; + memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, + &entry, sizeof(entry)); + } amd_iommu_reset_cmd_buffer(iommu); } @@ -826,20 +845,22 @@ static void iommu_disable_command_buffer(struct amd_iommu *iommu) static void __init free_command_buffer(struct amd_iommu *iommu) { - iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); + iommu_free_pages(iommu->cmd_buf); } void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, size_t size) { - int order = get_order(size); - void *buf = iommu_alloc_pages(gfp, order); + void *buf; - if (buf && - check_feature(FEATURE_SNP) && - set_memory_4k((unsigned long)buf, (1 << order))) { - iommu_free_pages(buf, order); - buf = NULL; + size = PAGE_ALIGN(size); + buf = iommu_alloc_pages_sz(gfp, size); + if (!buf) + return NULL; + if (check_feature(FEATURE_SNP) && + set_memory_4k((unsigned long)buf, size / PAGE_SIZE)) { + iommu_free_pages(buf); + return NULL; } return buf; @@ -860,10 +881,15 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu) BUG_ON(iommu->evt_buf == NULL); - entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; - - memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, - &entry, sizeof(entry)); + if (!is_kdump_kernel()) { + /* + * Event buffer is re-used for kdump kernel and setting + * of MMIO register is not required. + */ + entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; + memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, + &entry, sizeof(entry)); + } /* set head and tail to zero manually */ writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); @@ -882,14 +908,14 @@ static void iommu_disable_event_buffer(struct amd_iommu *iommu) static void __init free_event_buffer(struct amd_iommu *iommu) { - iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); + iommu_free_pages(iommu->evt_buf); } static void free_ga_log(struct amd_iommu *iommu) { #ifdef CONFIG_IRQ_REMAP - iommu_free_pages(iommu->ga_log, get_order(GA_LOG_SIZE)); - iommu_free_pages(iommu->ga_log_tail, get_order(8)); + iommu_free_pages(iommu->ga_log); + iommu_free_pages(iommu->ga_log_tail); #endif } @@ -934,11 +960,11 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) return 0; - iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE)); + iommu->ga_log = iommu_alloc_pages_sz(GFP_KERNEL, GA_LOG_SIZE); if (!iommu->ga_log) goto err_out; - iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8)); + iommu->ga_log_tail = iommu_alloc_pages_sz(GFP_KERNEL, 8); if (!iommu->ga_log_tail) goto err_out; @@ -952,14 +978,129 @@ err_out: static int __init alloc_cwwb_sem(struct amd_iommu *iommu) { iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1); + if (!iommu->cmd_sem) + return -ENOMEM; + iommu->cmd_sem_paddr = iommu_virt_to_phys((void *)iommu->cmd_sem); + return 0; +} + +static int __init remap_event_buffer(struct amd_iommu *iommu) +{ + u64 paddr; + + pr_info_once("Re-using event buffer from the previous kernel\n"); + paddr = readq(iommu->mmio_base + MMIO_EVT_BUF_OFFSET) & PM_ADDR_MASK; + iommu->evt_buf = iommu_memremap(paddr, EVT_BUFFER_SIZE); + + return iommu->evt_buf ? 0 : -ENOMEM; +} + +static int __init remap_command_buffer(struct amd_iommu *iommu) +{ + u64 paddr; + + pr_info_once("Re-using command buffer from the previous kernel\n"); + paddr = readq(iommu->mmio_base + MMIO_CMD_BUF_OFFSET) & PM_ADDR_MASK; + iommu->cmd_buf = iommu_memremap(paddr, CMD_BUFFER_SIZE); + + return iommu->cmd_buf ? 0 : -ENOMEM; +} + +static int __init remap_or_alloc_cwwb_sem(struct amd_iommu *iommu) +{ + u64 paddr; + + if (check_feature(FEATURE_SNP)) { + /* + * When SNP is enabled, the exclusion base register is used for the + * completion wait buffer (CWB) address. Read and re-use it. + */ + pr_info_once("Re-using CWB buffers from the previous kernel\n"); + paddr = readq(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET) & PM_ADDR_MASK; + iommu->cmd_sem = iommu_memremap(paddr, PAGE_SIZE); + if (!iommu->cmd_sem) + return -ENOMEM; + iommu->cmd_sem_paddr = paddr; + } else { + return alloc_cwwb_sem(iommu); + } + + return 0; +} + +static int __init alloc_iommu_buffers(struct amd_iommu *iommu) +{ + int ret; + + /* + * Reuse/Remap the previous kernel's allocated completion wait + * command and event buffers for kdump boot. + */ + if (is_kdump_kernel()) { + ret = remap_or_alloc_cwwb_sem(iommu); + if (ret) + return ret; + + ret = remap_command_buffer(iommu); + if (ret) + return ret; - return iommu->cmd_sem ? 0 : -ENOMEM; + ret = remap_event_buffer(iommu); + if (ret) + return ret; + } else { + ret = alloc_cwwb_sem(iommu); + if (ret) + return ret; + + ret = alloc_command_buffer(iommu); + if (ret) + return ret; + + ret = alloc_event_buffer(iommu); + if (ret) + return ret; + } + + return 0; } static void __init free_cwwb_sem(struct amd_iommu *iommu) { if (iommu->cmd_sem) - iommu_free_page((void *)iommu->cmd_sem); + iommu_free_pages((void *)iommu->cmd_sem); +} +static void __init unmap_cwwb_sem(struct amd_iommu *iommu) +{ + if (iommu->cmd_sem) { + if (check_feature(FEATURE_SNP)) + memunmap((void *)iommu->cmd_sem); + else + iommu_free_pages((void *)iommu->cmd_sem); + } +} + +static void __init unmap_command_buffer(struct amd_iommu *iommu) +{ + memunmap((void *)iommu->cmd_buf); +} + +static void __init unmap_event_buffer(struct amd_iommu *iommu) +{ + memunmap(iommu->evt_buf); +} + +static void __init free_iommu_buffers(struct amd_iommu *iommu) +{ + if (is_kdump_kernel()) { + unmap_cwwb_sem(iommu); + unmap_command_buffer(iommu); + unmap_event_buffer(iommu); + } else { + free_cwwb_sem(iommu); + free_command_buffer(iommu); + free_event_buffer(iommu); + } } static void iommu_enable_xt(struct amd_iommu *iommu) @@ -984,47 +1125,20 @@ static void iommu_enable_gt(struct amd_iommu *iommu) } /* sets a specific bit in the device table entry. */ -static void __set_dev_entry_bit(struct dev_table_entry *dev_table, - u16 devid, u8 bit) +static void set_dte_bit(struct dev_table_entry *dte, u8 bit) { int i = (bit >> 6) & 0x03; int _bit = bit & 0x3f; - dev_table[devid].data[i] |= (1UL << _bit); + dte->data[i] |= (1UL << _bit); } -static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) +static bool __reuse_device_table(struct amd_iommu *iommu) { - struct dev_table_entry *dev_table = get_dev_table(iommu); - - return __set_dev_entry_bit(dev_table, devid, bit); -} - -static int __get_dev_entry_bit(struct dev_table_entry *dev_table, - u16 devid, u8 bit) -{ - int i = (bit >> 6) & 0x03; - int _bit = bit & 0x3f; - - return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit; -} - -static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) -{ - struct dev_table_entry *dev_table = get_dev_table(iommu); - - return __get_dev_entry_bit(dev_table, devid, bit); -} - -static bool __copy_device_table(struct amd_iommu *iommu) -{ - u64 int_ctl, int_tab_len, entry = 0; struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; - struct dev_table_entry *old_devtb = NULL; - u32 lo, hi, devid, old_devtb_size; + u32 lo, hi, old_devtb_size; phys_addr_t old_devtb_phys; - u16 dom_id, dte_v, irq_v; - u64 tmp; + u64 entry; /* Each IOMMU use separate device table with the same size */ lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); @@ -1049,67 +1163,20 @@ static bool __copy_device_table(struct amd_iommu *iommu) pr_err("The address of old device table is above 4G, not trustworthy!\n"); return false; } - old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel()) - ? (__force void *)ioremap_encrypted(old_devtb_phys, - pci_seg->dev_table_size) - : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB); - - if (!old_devtb) - return false; - pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, - get_order(pci_seg->dev_table_size)); + /* + * Re-use the previous kernel's device table for kdump. + */ + pci_seg->old_dev_tbl_cpy = iommu_memremap(old_devtb_phys, pci_seg->dev_table_size); if (pci_seg->old_dev_tbl_cpy == NULL) { - pr_err("Failed to allocate memory for copying old device table!\n"); - memunmap(old_devtb); + pr_err("Failed to remap memory for reusing old device table!\n"); return false; } - for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { - pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid]; - dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; - dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; - - if (dte_v && dom_id) { - pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; - pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; - /* Reserve the Domain IDs used by previous kernel */ - if (ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC) != dom_id) { - pr_err("Failed to reserve domain ID 0x%x\n", dom_id); - memunmap(old_devtb); - return false; - } - /* If gcr3 table existed, mask it out */ - if (old_devtb[devid].data[0] & DTE_FLAG_GV) { - tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; - tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; - pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp; - tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; - tmp |= DTE_FLAG_GV; - pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp; - } - } - - irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; - int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK; - int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK; - if (irq_v && (int_ctl || int_tab_len)) { - if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || - (int_tab_len != DTE_INTTABLEN)) { - pr_err("Wrong old irq remapping flag: %#x\n", devid); - memunmap(old_devtb); - return false; - } - - pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; - } - } - memunmap(old_devtb); - return true; } -static bool copy_device_table(void) +static bool reuse_device_table(void) { struct amd_iommu *iommu; struct amd_iommu_pci_seg *pci_seg; @@ -1117,17 +1184,17 @@ static bool copy_device_table(void) if (!amd_iommu_pre_enabled) return false; - pr_warn("Translation is already enabled - trying to copy translation structures\n"); + pr_warn("Translation is already enabled - trying to reuse translation structures\n"); /* * All IOMMUs within PCI segment shares common device table. - * Hence copy device table only once per PCI segment. + * Hence reuse device table only once per PCI segment. */ for_each_pci_segment(pci_seg) { for_each_iommu(iommu) { if (pci_seg->id != iommu->pci_seg->id) continue; - if (!__copy_device_table(iommu)) + if (!__reuse_device_table(iommu)) return false; break; } @@ -1136,42 +1203,107 @@ static bool copy_device_table(void) return true; } -void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) +struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid) { - int sysmgt; + struct ivhd_dte_flags *e; + unsigned int best_len = UINT_MAX; + struct dev_table_entry *dte = NULL; + + for_each_ivhd_dte_flags(e) { + /* + * Need to go through the whole list to find the smallest range, + * which contains the devid. + */ + if ((e->segid == segid) && + (e->devid_first <= devid) && (devid <= e->devid_last)) { + unsigned int len = e->devid_last - e->devid_first; + + if (len < best_len) { + dte = &(e->dte); + best_len = len; + } + } + } + return dte; +} - sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) | - (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1); +static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last) +{ + struct ivhd_dte_flags *e; - if (sysmgt == 0x01) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW); + for_each_ivhd_dte_flags(e) { + if ((e->segid == segid) && + (e->devid_first == first) && + (e->devid_last == last)) + return true; + } + return false; } /* * This function takes the device specific flags read from the ACPI * table and sets up the device table entry with that information */ -static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, - u16 devid, u32 flags, u32 ext_flags) +static void __init +set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last, + u32 flags, u32 ext_flags) { - if (flags & ACPI_DEVFLAG_INITPASS) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS); - if (flags & ACPI_DEVFLAG_EXTINT) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS); - if (flags & ACPI_DEVFLAG_NMI) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS); - if (flags & ACPI_DEVFLAG_SYSMGT1) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1); - if (flags & ACPI_DEVFLAG_SYSMGT2) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2); - if (flags & ACPI_DEVFLAG_LINT0) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS); - if (flags & ACPI_DEVFLAG_LINT1) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS); + int i; + struct dev_table_entry dte = {}; - amd_iommu_apply_erratum_63(iommu, devid); + /* Parse IVHD DTE setting flags and store information */ + if (flags) { + struct ivhd_dte_flags *d; + + if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last)) + return; + + d = kzalloc(sizeof(struct ivhd_dte_flags), GFP_KERNEL); + if (!d) + return; - amd_iommu_set_rlookup_table(iommu, devid); + pr_debug("%s: devid range %#x:%#x\n", __func__, first, last); + + if (flags & ACPI_DEVFLAG_INITPASS) + set_dte_bit(&dte, DEV_ENTRY_INIT_PASS); + if (flags & ACPI_DEVFLAG_EXTINT) + set_dte_bit(&dte, DEV_ENTRY_EINT_PASS); + if (flags & ACPI_DEVFLAG_NMI) + set_dte_bit(&dte, DEV_ENTRY_NMI_PASS); + if (flags & ACPI_DEVFLAG_SYSMGT1) + set_dte_bit(&dte, DEV_ENTRY_SYSMGT1); + if (flags & ACPI_DEVFLAG_SYSMGT2) + set_dte_bit(&dte, DEV_ENTRY_SYSMGT2); + if (flags & ACPI_DEVFLAG_LINT0) + set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS); + if (flags & ACPI_DEVFLAG_LINT1) + set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS); + + /* Apply erratum 63, which needs info in initial_dte */ + if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1) + dte.data[0] |= DTE_FLAG_IW; + + memcpy(&d->dte, &dte, sizeof(dte)); + d->segid = iommu->pci_seg->id; + d->devid_first = first; + d->devid_last = last; + list_add_tail(&d->list, &amd_ivhd_dev_flags_list); + } + + for (i = first; i <= last; i++) { + if (flags) { + struct dev_table_entry *dev_table = get_dev_table(iommu); + + memcpy(&dev_table[i], &dte, sizeof(dte)); + } + amd_iommu_set_rlookup_table(iommu, i); + } +} + +static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, + u16 devid, u32 flags, u32 ext_flags) +{ + set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags); } int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) @@ -1239,7 +1371,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, entry->cmd_line = cmd_line; entry->root_devid = (entry->devid & (~0x7)); - pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n", + pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n", entry->cmd_line ? "cmd" : "ivrs", entry->hid, entry->uid, entry->root_devid); @@ -1331,15 +1463,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, switch (e->type) { case IVHD_DEV_ALL: - DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); - - for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i) - set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); + DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags); + set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0); break; case IVHD_DEV_SELECT: - DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x " - "flags: %02x\n", + DUMP_printk(" DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1350,8 +1479,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_SELECT_RANGE_START: - DUMP_printk(" DEV_SELECT_RANGE_START\t " - "devid: %04x:%02x:%02x.%x flags: %02x\n", + DUMP_printk(" DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1364,8 +1492,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_ALIAS: - DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x " - "flags: %02x devid_to: %02x:%02x.%x\n", + DUMP_printk(" DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1382,9 +1509,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_ALIAS_RANGE: - DUMP_printk(" DEV_ALIAS_RANGE\t\t " - "devid: %04x:%02x:%02x.%x flags: %02x " - "devid_to: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1401,8 +1526,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_EXT_SELECT: - DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x " - "flags: %02x ext: %08x\n", + DUMP_printk(" DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1414,8 +1538,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_EXT_SELECT_RANGE: - DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " - "%04x:%02x:%02x.%x flags: %02x ext: %08x\n", + DUMP_printk(" DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1428,21 +1551,18 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_RANGE_END: - DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid)); devid = e->devid; - for (dev_i = devid_start; dev_i <= devid; ++dev_i) { - if (alias) { + if (alias) { + for (dev_i = devid_start; dev_i <= devid; ++dev_i) pci_seg->alias_table[dev_i] = devid_to; - set_dev_entry_from_acpi(iommu, - devid_to, flags, ext_flags); - } - set_dev_entry_from_acpi(iommu, dev_i, - flags, ext_flags); + set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags); } + set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags); break; case IVHD_DEV_SPECIAL: { u8 handle, type; @@ -1461,11 +1581,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, else var = "UNKNOWN"; - DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", var, (int)handle, seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), - PCI_FUNC(devid)); + PCI_FUNC(devid), + e->flags); ret = add_special_device(type, handle, &devid, false); if (ret) @@ -1525,11 +1646,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, } devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); - DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", hid, uid, seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), - PCI_FUNC(devid)); + PCI_FUNC(devid), + e->flags); flags = e->flags; @@ -1578,9 +1700,9 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, pci_seg->last_bdf = last_bdf; DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); - pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf); - pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf); - pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf); + pci_seg->dev_table_size = + max(roundup_pow_of_two((last_bdf + 1) * DEV_TABLE_ENTRY_SIZE), + SZ_4K); pci_seg->id = id; init_llist_head(&pci_seg->dev_data_list); @@ -1588,13 +1710,22 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); if (alloc_dev_table(pci_seg)) - return NULL; + goto err_free_pci_seg; if (alloc_alias_table(pci_seg)) - return NULL; + goto err_free_dev_table; if (alloc_rlookup_table(pci_seg)) - return NULL; + goto err_free_alias_table; return pci_seg; + +err_free_alias_table: + free_alias_table(pci_seg); +err_free_dev_table: + free_dev_table(pci_seg); +err_free_pci_seg: + list_del(&pci_seg->list); + kfree(pci_seg); + return NULL; } static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id, @@ -1635,9 +1766,7 @@ static void __init free_sysfs(struct amd_iommu *iommu) static void __init free_iommu_one(struct amd_iommu *iommu) { free_sysfs(iommu); - free_cwwb_sem(iommu); - free_command_buffer(iommu); - free_event_buffer(iommu); + free_iommu_buffers(iommu); amd_iommu_free_ppr_log(iommu); free_ga_log(iommu); iommu_unmap_mmio_space(iommu); @@ -1757,13 +1886,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - /* - * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. - * GAM also requires GA mode. Therefore, we need to - * check cmpxchg16b support before enabling it. - */ - if (!boot_cpu_has(X86_FEATURE_CX16) || - ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) + /* GAM requires GA mode. */ + if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; case 0x11: @@ -1773,13 +1897,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - /* - * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. - * XT, GAM also requires GA mode. Therefore, we need to - * check cmpxchg16b support before enabling them. - */ - if (!boot_cpu_has(X86_FEATURE_CX16) || - ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) { + /* XT and GAM require GA mode. */ + if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) { amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; } @@ -1787,6 +1906,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; + if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) { + pr_warn_once("Host Address Translation is not supported.\n"); + amd_iommu_hatdis = true; + } + early_iommu_features_init(iommu, h); break; @@ -1806,14 +1930,9 @@ static int __init init_iommu_one_late(struct amd_iommu *iommu) { int ret; - if (alloc_cwwb_sem(iommu)) - return -ENOMEM; - - if (alloc_command_buffer(iommu)) - return -ENOMEM; - - if (alloc_event_buffer(iommu)) - return -ENOMEM; + ret = alloc_iommu_buffers(iommu); + if (ret) + return ret; iommu->int_enabled = false; @@ -2019,9 +2138,6 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (!iommu->dev) return -ENODEV; - /* Prevent binding other PCI device drivers to IOMMU devices */ - iommu->dev->match_driver = false; - /* ACPI _PRT won't have an IRQ for IOMMU */ iommu->dev->irq_managed = 1; @@ -2110,7 +2226,15 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) return ret; } - iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); + ret = iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); + if (ret || amd_iommu_pgtable == PD_MODE_NONE) { + /* + * Remove sysfs if DMA translation is not supported by the + * IOMMU. Do not return an error to enable IRQ remapping + * in state_next(), DTE[V, TV] must eventually be set to 0. + */ + iommu_device_sysfs_remove(&iommu->iommu); + } return pci_enable_device(iommu->dev); } @@ -2137,6 +2261,9 @@ static void print_iommu_info(void) if (check_feature(FEATURE_SNP)) pr_cont(" SNP"); + if (check_feature2(FEATURE_SEVSNPIO_SUP)) + pr_cont(" SEV-TIO"); + pr_cont("\n"); } @@ -2145,7 +2272,7 @@ static void print_iommu_info(void) if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) pr_info("X2APIC enabled\n"); } - if (amd_iommu_pgtable == AMD_IOMMU_V2) { + if (amd_iommu_pgtable == PD_MODE_V2) { pr_info("V2 page table enabled (Paging mode : %d level)\n", amd_iommu_gpt_level); } @@ -2332,7 +2459,7 @@ static struct irq_chip intcapxt_controller = { .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_affinity = intcapxt_set_affinity, .irq_set_wake = intcapxt_set_wake, - .flags = IRQCHIP_MASK_ON_SUSPEND, + .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_MOVE_DEFERRED, }; static const struct irq_domain_ops intcapxt_domain_ops = { @@ -2571,13 +2698,13 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) u32 devid; struct dev_table_entry *dev_table = pci_seg->dev_table; - if (dev_table == NULL) + if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE) return; for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { - __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID); + set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID); if (!amd_iommu_snp_en) - __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); + set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION); } } @@ -2605,8 +2732,7 @@ static void init_device_table(void) for_each_pci_segment(pci_seg) { for (devid = 0; devid <= pci_seg->last_bdf; ++devid) - __set_dev_entry_bit(pci_seg->dev_table, - devid, DEV_ENTRY_IRQ_TBL_EN); + set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN); } } @@ -2634,7 +2760,11 @@ static void iommu_init_flags(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_COHERENT_EN); /* Set IOTLB invalidation timeout to 1s */ - iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); + iommu_feature_set(iommu, CTRL_INV_TO_1S, CTRL_INV_TO_MASK, CONTROL_INV_TIMEOUT); + + /* Enable Enhanced Peripheral Page Request Handling */ + if (check_feature(FEATURE_EPHSUP)) + iommu_feature_enable(iommu, CONTROL_EPH_EN); } static void iommu_apply_resume_quirks(struct amd_iommu *iommu) @@ -2723,6 +2853,17 @@ static void iommu_enable_irtcachedis(struct amd_iommu *iommu) iommu->irtcachedis_enabled ? "disabled" : "enabled"); } +static void iommu_enable_2k_int(struct amd_iommu *iommu) +{ + if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) + return; + + iommu_feature_set(iommu, + CONTROL_NUM_INT_REMAP_MODE_2K, + CONTROL_NUM_INT_REMAP_MODE_MASK, + CONTROL_NUM_INT_REMAP_MODE); +} + static void early_enable_iommu(struct amd_iommu *iommu) { iommu_disable(iommu); @@ -2735,6 +2876,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); + iommu_enable_2k_int(iommu); iommu_enable(iommu); amd_iommu_flush_all_caches(iommu); } @@ -2743,8 +2885,8 @@ static void early_enable_iommu(struct amd_iommu *iommu) * This function finally enables all IOMMUs found in the system after * they have been initialized. * - * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy - * the old content of device table entries. Not this case or copy failed, + * Or if in kdump kernel and IOMMUs are all pre-enabled, try to reuse + * the old content of device table entries. Not this case or reuse failed, * just continue as normal kernel does. */ static void early_enable_iommus(void) @@ -2752,19 +2894,25 @@ static void early_enable_iommus(void) struct amd_iommu *iommu; struct amd_iommu_pci_seg *pci_seg; - if (!copy_device_table()) { + if (!reuse_device_table()) { /* - * If come here because of failure in copying device table from old + * If come here because of failure in reusing device table from old * kernel with all IOMMUs enabled, print error message and try to * free allocated old_dev_tbl_cpy. */ - if (amd_iommu_pre_enabled) - pr_err("Failed to copy DEV table from previous kernel.\n"); + if (amd_iommu_pre_enabled) { + pr_err("Failed to reuse DEV table from previous kernel.\n"); + /* + * Bail out early if unable to remap/reuse DEV table from + * previous kernel if SNP enabled as IOMMU commands will + * time out without DEV table and cause kdump boot panic. + */ + BUG_ON(check_feature(FEATURE_SNP)); + } for_each_pci_segment(pci_seg) { if (pci_seg->old_dev_tbl_cpy != NULL) { - iommu_free_pages(pci_seg->old_dev_tbl_cpy, - get_order(pci_seg->dev_table_size)); + memunmap((void *)pci_seg->old_dev_tbl_cpy); pci_seg->old_dev_tbl_cpy = NULL; } } @@ -2774,11 +2922,10 @@ static void early_enable_iommus(void) early_enable_iommu(iommu); } } else { - pr_info("Copied DEV table from previous kernel.\n"); + pr_info("Reused DEV table from previous kernel.\n"); for_each_pci_segment(pci_seg) { - iommu_free_pages(pci_seg->dev_table, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->dev_table); pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; } @@ -2791,6 +2938,7 @@ static void early_enable_iommus(void) iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); + iommu_enable_2k_int(iommu); iommu_set_device_table(iommu); amd_iommu_flush_all_caches(iommu); } @@ -2888,7 +3036,7 @@ static void disable_iommus(void) * disable suspend until real resume implemented */ -static void amd_iommu_resume(void) +static void amd_iommu_resume(void *data) { struct amd_iommu *iommu; @@ -2902,7 +3050,7 @@ static void amd_iommu_resume(void) amd_iommu_enable_interrupts(); } -static int amd_iommu_suspend(void) +static int amd_iommu_suspend(void *data) { /* disable IOMMUs to go out of the way for BIOS */ disable_iommus(); @@ -2910,16 +3058,17 @@ static int amd_iommu_suspend(void) return 0; } -static struct syscore_ops amd_iommu_syscore_ops = { +static const struct syscore_ops amd_iommu_syscore_ops = { .suspend = amd_iommu_suspend, .resume = amd_iommu_resume, }; +static struct syscore amd_iommu_syscore = { + .ops = &amd_iommu_syscore_ops, +}; + static void __init free_iommu_resources(void) { - kmem_cache_destroy(amd_iommu_irq_cache); - amd_iommu_irq_cache = NULL; - free_iommu_all(); free_pci_segments(); } @@ -3018,8 +3167,9 @@ static void __init ivinfo_init(void *ivrs) static int __init early_amd_iommu_init(void) { struct acpi_table_header *ivrs_base; - int remap_cache_sz, ret; + int ret; acpi_status status; + u8 efr_hats; if (!amd_iommu_detected) return -ENODEV; @@ -3033,6 +3183,12 @@ static int __init early_amd_iommu_init(void) return -EINVAL; } + if (!boot_cpu_has(X86_FEATURE_CX16)) { + pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n"); + ret = -EINVAL; + goto out; + } + /* * Validate checksum here so we don't need to do it when * we actually parse the table @@ -3059,13 +3215,37 @@ static int __init early_amd_iommu_init(void) FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL) amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; - if (amd_iommu_pgtable == AMD_IOMMU_V2) { + efr_hats = FIELD_GET(FEATURE_HATS, amd_iommu_efr); + if (efr_hats != 0x3) { + /* + * efr[HATS] bits specify the maximum host translation level + * supported, with LEVEL 4 being initial max level. + */ + amd_iommu_hpt_level = efr_hats + PAGE_MODE_4_LEVEL; + } else { + pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n", + efr_hats); + amd_iommu_hatdis = true; + } + + if (amd_iommu_pgtable == PD_MODE_V2) { if (!amd_iommu_v2_pgtbl_supported()) { pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); - amd_iommu_pgtable = AMD_IOMMU_V1; + amd_iommu_pgtable = PD_MODE_V1; } } + if (amd_iommu_hatdis) { + /* + * Host (v1) page table is not available. Attempt to use + * Guest (v2) page table. + */ + if (amd_iommu_v2_pgtbl_supported()) + amd_iommu_pgtable = PD_MODE_V2; + else + amd_iommu_pgtable = PD_MODE_NONE; + } + /* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled) disable_iommus(); @@ -3075,22 +3255,7 @@ static int __init early_amd_iommu_init(void) if (amd_iommu_irq_remap) { struct amd_iommu_pci_seg *pci_seg; - /* - * Interrupt remapping enabled, create kmem_cache for the - * remapping tables. - */ ret = -ENOMEM; - if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) - remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32); - else - remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2); - amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", - remap_cache_sz, - DTE_INTTAB_ALIGNMENT, - 0, NULL); - if (!amd_iommu_irq_cache) - goto out; - for_each_pci_segment(pci_seg) { if (alloc_irq_lookup_table(pci_seg)) goto out; @@ -3171,7 +3336,7 @@ out: return true; } -static void iommu_snp_enable(void) +static __init void iommu_snp_enable(void) { #ifdef CONFIG_KVM_AMD_SEV if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) @@ -3185,7 +3350,7 @@ static void iommu_snp_enable(void) goto disable_snp; } - if (amd_iommu_pgtable != AMD_IOMMU_V1) { + if (amd_iommu_pgtable != PD_MODE_V1) { pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n"); goto disable_snp; } @@ -3196,6 +3361,14 @@ static void iommu_snp_enable(void) goto disable_snp; } + /* + * Enable host SNP support once SNP support is checked on IOMMU. + */ + if (snp_rmptable_init()) { + pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); + goto disable_snp; + } + pr_info("IOMMU SNP support enabled.\n"); return; @@ -3238,7 +3411,7 @@ static int __init state_next(void) init_state = IOMMU_ENABLED; break; case IOMMU_ENABLED: - register_syscore_ops(&amd_iommu_syscore_ops); + register_syscore(&amd_iommu_syscore); iommu_snp_enable(); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; @@ -3295,6 +3468,19 @@ static int __init iommu_go_to_state(enum iommu_init_state state) ret = state_next(); } + /* + * SNP platform initilazation requires IOMMUs to be fully configured. + * If the SNP support on IOMMUs has NOT been checked, simply mark SNP + * as unsupported. If the SNP support on IOMMUs has been checked and + * host SNP support enabled but RMP enforcement has not been enabled + * in IOMMUs, then the system is in a half-baked state, but can limp + * along as all memory should be Hypervisor-Owned in the RMP. WARN, + * but leave SNP as "supported" to avoid confusing the kernel. + */ + if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) && + !WARN_ON_ONCE(amd_iommu_snp_en)) + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + return ret; } @@ -3328,12 +3514,12 @@ int __init amd_iommu_enable(void) void amd_iommu_disable(void) { - amd_iommu_suspend(); + amd_iommu_suspend(NULL); } int amd_iommu_reenable(int mode) { - amd_iommu_resume(); + amd_iommu_resume(NULL); return 0; } @@ -3352,7 +3538,6 @@ int amd_iommu_enable_faulting(unsigned int cpu) */ static int __init amd_iommu_init(void) { - struct amd_iommu *iommu; int ret; ret = iommu_go_to_state(IOMMU_INITIALIZED); @@ -3366,8 +3551,8 @@ static int __init amd_iommu_init(void) } #endif - for_each_iommu(iommu) - amd_iommu_debugfs_setup(iommu); + if (!ret) + amd_iommu_debugfs_setup(); return ret; } @@ -3398,25 +3583,28 @@ static bool amd_iommu_sme_check(void) * IOMMUs * ****************************************************************************/ -int __init amd_iommu_detect(void) +void __init amd_iommu_detect(void) { int ret; if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return -ENODEV; + goto disable_snp; if (!amd_iommu_sme_check()) - return -ENODEV; + goto disable_snp; ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); if (ret) - return ret; + goto disable_snp; amd_iommu_detected = true; iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; + return; - return 1; +disable_snp: + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); } /**************************************************************************** @@ -3464,9 +3652,9 @@ static int __init parse_amd_iommu_options(char *str) } else if (strncmp(str, "force_isolation", 15) == 0) { amd_iommu_force_isolation = true; } else if (strncmp(str, "pgtbl_v1", 8) == 0) { - amd_iommu_pgtable = AMD_IOMMU_V1; + amd_iommu_pgtable = PD_MODE_V1; } else if (strncmp(str, "pgtbl_v2", 8) == 0) { - amd_iommu_pgtable = AMD_IOMMU_V2; + amd_iommu_pgtable = PD_MODE_V2; } else if (strncmp(str, "irtcachedis", 11) == 0) { amd_iommu_irtcachedis = true; } else if (strncmp(str, "nohugepages", 11) == 0) { @@ -3569,7 +3757,7 @@ static int __init parse_ivrs_acpihid(char *str) { u32 seg = 0, bus, dev, fn; char *hid, *uid, *p, *addr; - char acpiid[ACPIID_LEN] = {0}; + char acpiid[ACPIID_LEN + 1] = { }; /* size with NULL terminator */ int i; addr = strchr(str, '@'); @@ -3595,7 +3783,7 @@ static int __init parse_ivrs_acpihid(char *str) /* We have the '@', make it the terminator to get just the acpiid */ *addr++ = 0; - if (strlen(str) > ACPIID_LEN + 1) + if (strlen(str) > ACPIID_LEN) goto not_found; if (sscanf(str, "=%s", acpiid) != 1) @@ -3626,6 +3814,14 @@ found: while (*uid == '0' && *(uid + 1)) uid++; + if (strlen(hid) >= ACPIHID_HID_LEN) { + pr_err("Invalid command line: hid is too long\n"); + return 1; + } else if (strlen(uid) >= ACPIHID_UID_LEN) { + pr_err("Invalid command line: uid is too long\n"); + return 1; + } + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); @@ -3835,4 +4031,10 @@ int amd_iommu_snp_disable(void) return 0; } EXPORT_SYMBOL_GPL(amd_iommu_snp_disable); + +bool amd_iommu_sev_tio_supported(void) +{ + return check_feature2(FEATURE_SEVSNPIO_SUP); +} +EXPORT_SYMBOL_GPL(amd_iommu_sev_tio_supported); #endif |
