diff options
Diffstat (limited to 'drivers/iommu/amd/init.c')
| -rw-r--r-- | drivers/iommu/amd/init.c | 1382 |
1 files changed, 866 insertions, 516 deletions
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 467b194975b3..4b2953418977 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -12,7 +12,6 @@ #include <linux/acpi.h> #include <linux/list.h> #include <linux/bitmap.h> -#include <linux/slab.h> #include <linux/syscore_ops.h> #include <linux/interrupt.h> #include <linux/msi.h> @@ -30,11 +29,13 @@ #include <asm/io_apic.h> #include <asm/irq_remapping.h> #include <asm/set_memory.h> +#include <asm/sev.h> #include <linux/crash_dump.h> #include "amd_iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" /* * definitions for the ACPI scanning code @@ -83,8 +84,6 @@ #define ACPI_DEVFLAG_LINT1 0x80 #define ACPI_DEVFLAG_ATSDIS 0x10000000 -#define LOOP_TIMEOUT 2000000 - #define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ | ((dev & 0x1f) << 3) | (fn & 0x7)) @@ -152,7 +151,11 @@ struct ivmd_header { bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; -enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; +enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1; +/* Host page table level */ +u8 amd_iommu_hpt_level; +/* Guest page table level */ +int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; @@ -160,22 +163,23 @@ static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; static bool amd_iommu_detected; static bool amd_iommu_disabled __initdata; static bool amd_iommu_force_enable __initdata; +static bool amd_iommu_irtcachedis; static int amd_iommu_target_ivhd_type; /* Global EFR and EFR2 registers */ u64 amd_iommu_efr; u64 amd_iommu_efr2; +/* Host (v1) page table is not supported*/ +bool amd_iommu_hatdis; + /* SNP is enabled on the system? */ bool amd_iommu_snp_en; EXPORT_SYMBOL(amd_iommu_snp_en); LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ -LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the - system */ - -/* Array to assign indices to IOMMUs*/ -struct amd_iommu *amd_iommus[MAX_IOMMUS]; +LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ +LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */ /* Number of IOMMUs present in the system */ static int amd_iommus_present; @@ -184,19 +188,12 @@ static int amd_iommus_present; bool amd_iommu_np_cache __read_mostly; bool amd_iommu_iotlb_sup __read_mostly = true; -u32 amd_iommu_max_pasid __read_mostly = ~0; - -bool amd_iommu_v2_present __read_mostly; static bool amd_iommu_pc_present __read_mostly; bool amdr_ivrs_remap_support __read_mostly; bool amd_iommu_force_isolation __read_mostly; -/* - * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap - * to know which ones are already in use. - */ -unsigned long *amd_iommu_pd_alloc_bitmap; +unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES; enum iommu_init_state { IOMMU_START_STATE, @@ -226,7 +223,6 @@ static bool __initdata cmdline_maps; static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); -static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); static bool amd_iommu_pre_enabled = true; @@ -252,24 +248,21 @@ static void init_translation_status(struct amd_iommu *iommu) iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; } -static inline unsigned long tbl_size(int entry_size, int last_bdf) +int amd_iommu_get_num_iommus(void) { - unsigned shift = PAGE_SHIFT + - get_order((last_bdf + 1) * entry_size); - - return 1UL << shift; + return amd_iommus_present; } -int amd_iommu_get_num_iommus(void) +bool amd_iommu_ht_range_ignore(void) { - return amd_iommus_present; + return check_feature2(FEATURE_HT_RANGE_IGNORE); } /* * Iterate through all the IOMMUs to get common EFR * masks among all IOMMUs and warn if found inconsistency. */ -static void get_global_efr(void) +static __init void get_global_efr(void) { struct amd_iommu *iommu; @@ -301,11 +294,6 @@ static void get_global_efr(void) pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2); } -static bool check_feature_on_all_iommus(u64 mask) -{ - return !!(amd_iommu_efr & mask); -} - /* * For IVHD type 0x11/0x40, EFR is also available via IVHD. * Default to IVHD EFR since it is available sooner @@ -391,7 +379,7 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu) u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); u64 entry = start & PM_ADDR_MASK; - if (!check_feature_on_all_iommus(FEATURE_SNP)) + if (!check_feature(FEATURE_SNP)) return; /* Note: @@ -418,39 +406,35 @@ static void iommu_set_device_table(struct amd_iommu *iommu) BUG_ON(iommu->mmio_base == NULL); + if (is_kdump_kernel()) + return; + entry = iommu_virt_to_phys(dev_table); entry |= (dev_table_size >> 12) - 1; memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, &entry, sizeof(entry)); } -/* Generic functions to enable/disable certain features of the IOMMU. */ -static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +static void iommu_feature_set(struct amd_iommu *iommu, u64 val, u64 mask, u8 shift) { u64 ctrl; ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl |= (1ULL << bit); + mask <<= shift; + ctrl &= ~mask; + ctrl |= (val << shift) & mask; writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } -static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) +/* Generic functions to enable/disable certain features of the IOMMU. */ +void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) { - u64 ctrl; - - ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl &= ~(1ULL << bit); - writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); + iommu_feature_set(iommu, 1ULL, 1ULL, bit); } -static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) +static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) { - u64 ctrl; - - ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl &= ~CTRL_INV_TO_MASK; - ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; - writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); + iommu_feature_set(iommu, 0ULL, 1ULL, bit); } /* Function to enable the hardware */ @@ -475,8 +459,15 @@ static void iommu_disable(struct amd_iommu *iommu) iommu_feature_disable(iommu, CONTROL_GALOG_EN); iommu_feature_disable(iommu, CONTROL_GAINT_EN); + /* Disable IOMMU PPR logging */ + iommu_feature_disable(iommu, CONTROL_PPRLOG_EN); + iommu_feature_disable(iommu, CONTROL_PPRINT_EN); + /* Disable IOMMU hardware itself */ iommu_feature_disable(iommu, CONTROL_IOMMU_EN); + + /* Clear IRTE cache disabling bit */ + iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); } /* @@ -648,8 +639,8 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_ /* Allocate per PCI segment device table */ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32, - get_order(pci_seg->dev_table_size)); + pci_seg->dev_table = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32, + pci_seg->dev_table_size); if (!pci_seg->dev_table) return -ENOMEM; @@ -658,17 +649,19 @@ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) { - free_pages((unsigned long)pci_seg->dev_table, - get_order(pci_seg->dev_table_size)); + if (is_kdump_kernel()) + memunmap((void *)pci_seg->dev_table); + else + iommu_free_pages(pci_seg->dev_table); pci_seg->dev_table = NULL; } /* Allocate per PCI segment IOMMU rlookup table. */ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->rlookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(pci_seg->rlookup_table_size)); + pci_seg->rlookup_table = kvcalloc(pci_seg->last_bdf + 1, + sizeof(*pci_seg->rlookup_table), + GFP_KERNEL); if (pci_seg->rlookup_table == NULL) return -ENOMEM; @@ -677,18 +670,15 @@ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { - free_pages((unsigned long)pci_seg->rlookup_table, - get_order(pci_seg->rlookup_table_size)); + kvfree(pci_seg->rlookup_table); pci_seg->rlookup_table = NULL; } static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->irq_lookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(pci_seg->rlookup_table_size)); - kmemleak_alloc(pci_seg->irq_lookup_table, - pci_seg->rlookup_table_size, 1, GFP_KERNEL); + pci_seg->irq_lookup_table = kvcalloc(pci_seg->last_bdf + 1, + sizeof(*pci_seg->irq_lookup_table), + GFP_KERNEL); if (pci_seg->irq_lookup_table == NULL) return -ENOMEM; @@ -697,9 +687,7 @@ static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_se static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { - kmemleak_free(pci_seg->irq_lookup_table); - free_pages((unsigned long)pci_seg->irq_lookup_table, - get_order(pci_seg->rlookup_table_size)); + kvfree(pci_seg->irq_lookup_table); pci_seg->irq_lookup_table = NULL; } @@ -707,8 +695,9 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) { int i; - pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL, - get_order(pci_seg->alias_table_size)); + pci_seg->alias_table = kvmalloc_array(pci_seg->last_bdf + 1, + sizeof(*pci_seg->alias_table), + GFP_KERNEL); if (!pci_seg->alias_table) return -ENOMEM; @@ -723,11 +712,30 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) { - free_pages((unsigned long)pci_seg->alias_table, - get_order(pci_seg->alias_table_size)); + kvfree(pci_seg->alias_table); pci_seg->alias_table = NULL; } +static inline void *iommu_memremap(unsigned long paddr, size_t size) +{ + phys_addr_t phys; + + if (!paddr) + return NULL; + + /* + * Obtain true physical address in kdump kernel when SME is enabled. + * Currently, previous kernel with SME enabled and kdump kernel + * with SME support disabled is not supported. + */ + phys = __sme_clr(paddr); + + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + return (__force void *)ioremap_encrypted(phys, size); + else + return memremap(phys, size, MEMREMAP_WB); +} + /* * Allocates the command buffer. This buffer is per AMD IOMMU. We can * write commands to that buffer later and the IOMMU will execute them @@ -735,20 +743,56 @@ static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) */ static int __init alloc_command_buffer(struct amd_iommu *iommu) { - iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(CMD_BUFFER_SIZE)); + iommu->cmd_buf = iommu_alloc_pages_sz(GFP_KERNEL, CMD_BUFFER_SIZE); return iommu->cmd_buf ? 0 : -ENOMEM; } /* + * Interrupt handler has processed all pending events and adjusted head + * and tail pointer. Reset overflow mask and restart logging again. + */ +void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, + u8 cntrl_intr, u8 cntrl_log, + u32 status_run_mask, u32 status_overflow_mask) +{ + u32 status; + + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + if (status & status_run_mask) + return; + + pr_info_ratelimited("IOMMU %s log restarting\n", evt_type); + + iommu_feature_disable(iommu, cntrl_log); + iommu_feature_disable(iommu, cntrl_intr); + + writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET); + + iommu_feature_enable(iommu, cntrl_intr); + iommu_feature_enable(iommu, cntrl_log); +} + +/* * This function restarts event logging in case the IOMMU experienced * an event log buffer overflow. */ void amd_iommu_restart_event_logging(struct amd_iommu *iommu) { - iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); - iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); + amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN, + CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK, + MMIO_STATUS_EVT_OVERFLOW_MASK); +} + +/* + * This function restarts event logging in case the IOMMU experienced + * GA log overflow. + */ +void amd_iommu_restart_ga_log(struct amd_iommu *iommu) +{ + amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN, + CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK, + MMIO_STATUS_GALOG_OVERFLOW_MASK); } /* @@ -777,11 +821,16 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) BUG_ON(iommu->cmd_buf == NULL); - entry = iommu_virt_to_phys(iommu->cmd_buf); - entry |= MMIO_CMD_SIZE_512; - - memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, - &entry, sizeof(entry)); + if (!is_kdump_kernel()) { + /* + * Command buffer is re-used for kdump kernel and setting + * of MMIO register is not required. + */ + entry = iommu_virt_to_phys(iommu->cmd_buf); + entry |= MMIO_CMD_SIZE_512; + memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, + &entry, sizeof(entry)); + } amd_iommu_reset_cmd_buffer(iommu); } @@ -796,20 +845,22 @@ static void iommu_disable_command_buffer(struct amd_iommu *iommu) static void __init free_command_buffer(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); + iommu_free_pages(iommu->cmd_buf); } -static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, - gfp_t gfp, size_t size) +void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, + size_t size) { - int order = get_order(size); - void *buf = (void *)__get_free_pages(gfp, order); + void *buf; - if (buf && - check_feature_on_all_iommus(FEATURE_SNP) && - set_memory_4k((unsigned long)buf, (1 << order))) { - free_pages((unsigned long)buf, order); - buf = NULL; + size = PAGE_ALIGN(size); + buf = iommu_alloc_pages_sz(gfp, size); + if (!buf) + return NULL; + if (check_feature(FEATURE_SNP) && + set_memory_4k((unsigned long)buf, size / PAGE_SIZE)) { + iommu_free_pages(buf); + return NULL; } return buf; @@ -818,7 +869,7 @@ static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, /* allocates the memory where the IOMMU will log its events to */ static int __init alloc_event_buffer(struct amd_iommu *iommu) { - iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, + iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL, EVT_BUFFER_SIZE); return iommu->evt_buf ? 0 : -ENOMEM; @@ -830,10 +881,15 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu) BUG_ON(iommu->evt_buf == NULL); - entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; - - memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, - &entry, sizeof(entry)); + if (!is_kdump_kernel()) { + /* + * Event buffer is re-used for kdump kernel and setting + * of MMIO register is not required. + */ + entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; + memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, + &entry, sizeof(entry)); + } /* set head and tail to zero manually */ writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); @@ -852,48 +908,14 @@ static void iommu_disable_event_buffer(struct amd_iommu *iommu) static void __init free_event_buffer(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); -} - -/* allocates the memory where the IOMMU will log its events to */ -static int __init alloc_ppr_log(struct amd_iommu *iommu) -{ - iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, - PPR_LOG_SIZE); - - return iommu->ppr_log ? 0 : -ENOMEM; -} - -static void iommu_enable_ppr_log(struct amd_iommu *iommu) -{ - u64 entry; - - if (iommu->ppr_log == NULL) - return; - - entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; - - memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, - &entry, sizeof(entry)); - - /* set head and tail to zero manually */ - writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); - - iommu_feature_enable(iommu, CONTROL_PPRLOG_EN); - iommu_feature_enable(iommu, CONTROL_PPR_EN); -} - -static void __init free_ppr_log(struct amd_iommu *iommu) -{ - free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE)); + iommu_free_pages(iommu->evt_buf); } static void free_ga_log(struct amd_iommu *iommu) { #ifdef CONFIG_IRQ_REMAP - free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE)); - free_pages((unsigned long)iommu->ga_log_tail, get_order(8)); + iommu_free_pages(iommu->ga_log); + iommu_free_pages(iommu->ga_log_tail); #endif } @@ -920,14 +942,14 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_GAINT_EN); iommu_feature_enable(iommu, CONTROL_GALOG_EN); - for (i = 0; i < LOOP_TIMEOUT; ++i) { + for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (status & (MMIO_STATUS_GALOG_RUN_MASK)) break; udelay(10); } - if (WARN_ON(i >= LOOP_TIMEOUT)) + if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) return -EINVAL; return 0; @@ -938,13 +960,11 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) return 0; - iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(GA_LOG_SIZE)); + iommu->ga_log = iommu_alloc_pages_sz(GFP_KERNEL, GA_LOG_SIZE); if (!iommu->ga_log) goto err_out; - iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(8)); + iommu->ga_log_tail = iommu_alloc_pages_sz(GFP_KERNEL, 8); if (!iommu->ga_log_tail) goto err_out; @@ -957,15 +977,130 @@ err_out: static int __init alloc_cwwb_sem(struct amd_iommu *iommu) { - iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1); + iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1); + if (!iommu->cmd_sem) + return -ENOMEM; + iommu->cmd_sem_paddr = iommu_virt_to_phys((void *)iommu->cmd_sem); + return 0; +} + +static int __init remap_event_buffer(struct amd_iommu *iommu) +{ + u64 paddr; + + pr_info_once("Re-using event buffer from the previous kernel\n"); + paddr = readq(iommu->mmio_base + MMIO_EVT_BUF_OFFSET) & PM_ADDR_MASK; + iommu->evt_buf = iommu_memremap(paddr, EVT_BUFFER_SIZE); + + return iommu->evt_buf ? 0 : -ENOMEM; +} + +static int __init remap_command_buffer(struct amd_iommu *iommu) +{ + u64 paddr; + + pr_info_once("Re-using command buffer from the previous kernel\n"); + paddr = readq(iommu->mmio_base + MMIO_CMD_BUF_OFFSET) & PM_ADDR_MASK; + iommu->cmd_buf = iommu_memremap(paddr, CMD_BUFFER_SIZE); - return iommu->cmd_sem ? 0 : -ENOMEM; + return iommu->cmd_buf ? 0 : -ENOMEM; +} + +static int __init remap_or_alloc_cwwb_sem(struct amd_iommu *iommu) +{ + u64 paddr; + + if (check_feature(FEATURE_SNP)) { + /* + * When SNP is enabled, the exclusion base register is used for the + * completion wait buffer (CWB) address. Read and re-use it. + */ + pr_info_once("Re-using CWB buffers from the previous kernel\n"); + paddr = readq(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET) & PM_ADDR_MASK; + iommu->cmd_sem = iommu_memremap(paddr, PAGE_SIZE); + if (!iommu->cmd_sem) + return -ENOMEM; + iommu->cmd_sem_paddr = paddr; + } else { + return alloc_cwwb_sem(iommu); + } + + return 0; +} + +static int __init alloc_iommu_buffers(struct amd_iommu *iommu) +{ + int ret; + + /* + * Reuse/Remap the previous kernel's allocated completion wait + * command and event buffers for kdump boot. + */ + if (is_kdump_kernel()) { + ret = remap_or_alloc_cwwb_sem(iommu); + if (ret) + return ret; + + ret = remap_command_buffer(iommu); + if (ret) + return ret; + + ret = remap_event_buffer(iommu); + if (ret) + return ret; + } else { + ret = alloc_cwwb_sem(iommu); + if (ret) + return ret; + + ret = alloc_command_buffer(iommu); + if (ret) + return ret; + + ret = alloc_event_buffer(iommu); + if (ret) + return ret; + } + + return 0; } static void __init free_cwwb_sem(struct amd_iommu *iommu) { if (iommu->cmd_sem) - free_page((unsigned long)iommu->cmd_sem); + iommu_free_pages((void *)iommu->cmd_sem); +} +static void __init unmap_cwwb_sem(struct amd_iommu *iommu) +{ + if (iommu->cmd_sem) { + if (check_feature(FEATURE_SNP)) + memunmap((void *)iommu->cmd_sem); + else + iommu_free_pages((void *)iommu->cmd_sem); + } +} + +static void __init unmap_command_buffer(struct amd_iommu *iommu) +{ + memunmap((void *)iommu->cmd_buf); +} + +static void __init unmap_event_buffer(struct amd_iommu *iommu) +{ + memunmap(iommu->evt_buf); +} + +static void __init free_iommu_buffers(struct amd_iommu *iommu) +{ + if (is_kdump_kernel()) { + unmap_cwwb_sem(iommu); + unmap_command_buffer(iommu); + unmap_event_buffer(iommu); + } else { + free_cwwb_sem(iommu); + free_command_buffer(iommu); + free_event_buffer(iommu); + } } static void iommu_enable_xt(struct amd_iommu *iommu) @@ -983,55 +1118,27 @@ static void iommu_enable_xt(struct amd_iommu *iommu) static void iommu_enable_gt(struct amd_iommu *iommu) { - if (!iommu_feature(iommu, FEATURE_GT)) + if (!check_feature(FEATURE_GT)) return; iommu_feature_enable(iommu, CONTROL_GT_EN); } /* sets a specific bit in the device table entry. */ -static void __set_dev_entry_bit(struct dev_table_entry *dev_table, - u16 devid, u8 bit) -{ - int i = (bit >> 6) & 0x03; - int _bit = bit & 0x3f; - - dev_table[devid].data[i] |= (1UL << _bit); -} - -static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) -{ - struct dev_table_entry *dev_table = get_dev_table(iommu); - - return __set_dev_entry_bit(dev_table, devid, bit); -} - -static int __get_dev_entry_bit(struct dev_table_entry *dev_table, - u16 devid, u8 bit) +static void set_dte_bit(struct dev_table_entry *dte, u8 bit) { int i = (bit >> 6) & 0x03; int _bit = bit & 0x3f; - return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit; + dte->data[i] |= (1UL << _bit); } -static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) +static bool __reuse_device_table(struct amd_iommu *iommu) { - struct dev_table_entry *dev_table = get_dev_table(iommu); - - return __get_dev_entry_bit(dev_table, devid, bit); -} - -static bool __copy_device_table(struct amd_iommu *iommu) -{ - u64 int_ctl, int_tab_len, entry = 0; struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; - struct dev_table_entry *old_devtb = NULL; - u32 lo, hi, devid, old_devtb_size; + u32 lo, hi, old_devtb_size; phys_addr_t old_devtb_phys; - u16 dom_id, dte_v, irq_v; - gfp_t gfp_flag; - u64 tmp; + u64 entry; /* Each IOMMU use separate device table with the same size */ lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); @@ -1056,63 +1163,20 @@ static bool __copy_device_table(struct amd_iommu *iommu) pr_err("The address of old device table is above 4G, not trustworthy!\n"); return false; } - old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel()) - ? (__force void *)ioremap_encrypted(old_devtb_phys, - pci_seg->dev_table_size) - : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB); - - if (!old_devtb) - return false; - gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32; - pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, - get_order(pci_seg->dev_table_size)); + /* + * Re-use the previous kernel's device table for kdump. + */ + pci_seg->old_dev_tbl_cpy = iommu_memremap(old_devtb_phys, pci_seg->dev_table_size); if (pci_seg->old_dev_tbl_cpy == NULL) { - pr_err("Failed to allocate memory for copying old device table!\n"); - memunmap(old_devtb); + pr_err("Failed to remap memory for reusing old device table!\n"); return false; } - for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { - pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid]; - dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; - dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; - - if (dte_v && dom_id) { - pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; - pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; - __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); - /* If gcr3 table existed, mask it out */ - if (old_devtb[devid].data[0] & DTE_FLAG_GV) { - tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; - tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; - pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp; - tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; - tmp |= DTE_FLAG_GV; - pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp; - } - } - - irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; - int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK; - int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK; - if (irq_v && (int_ctl || int_tab_len)) { - if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || - (int_tab_len != DTE_INTTABLEN)) { - pr_err("Wrong old irq remapping flag: %#x\n", devid); - memunmap(old_devtb); - return false; - } - - pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; - } - } - memunmap(old_devtb); - return true; } -static bool copy_device_table(void) +static bool reuse_device_table(void) { struct amd_iommu *iommu; struct amd_iommu_pci_seg *pci_seg; @@ -1120,17 +1184,17 @@ static bool copy_device_table(void) if (!amd_iommu_pre_enabled) return false; - pr_warn("Translation is already enabled - trying to copy translation structures\n"); + pr_warn("Translation is already enabled - trying to reuse translation structures\n"); /* * All IOMMUs within PCI segment shares common device table. - * Hence copy device table only once per PCI segment. + * Hence reuse device table only once per PCI segment. */ for_each_pci_segment(pci_seg) { for_each_iommu(iommu) { if (pci_seg->id != iommu->pci_seg->id) continue; - if (!__copy_device_table(iommu)) + if (!__reuse_device_table(iommu)) return false; break; } @@ -1139,42 +1203,107 @@ static bool copy_device_table(void) return true; } -void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) +struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid) { - int sysmgt; + struct ivhd_dte_flags *e; + unsigned int best_len = UINT_MAX; + struct dev_table_entry *dte = NULL; + + for_each_ivhd_dte_flags(e) { + /* + * Need to go through the whole list to find the smallest range, + * which contains the devid. + */ + if ((e->segid == segid) && + (e->devid_first <= devid) && (devid <= e->devid_last)) { + unsigned int len = e->devid_last - e->devid_first; + + if (len < best_len) { + dte = &(e->dte); + best_len = len; + } + } + } + return dte; +} - sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) | - (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1); +static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last) +{ + struct ivhd_dte_flags *e; - if (sysmgt == 0x01) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW); + for_each_ivhd_dte_flags(e) { + if ((e->segid == segid) && + (e->devid_first == first) && + (e->devid_last == last)) + return true; + } + return false; } /* * This function takes the device specific flags read from the ACPI * table and sets up the device table entry with that information */ -static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, - u16 devid, u32 flags, u32 ext_flags) +static void __init +set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last, + u32 flags, u32 ext_flags) { - if (flags & ACPI_DEVFLAG_INITPASS) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS); - if (flags & ACPI_DEVFLAG_EXTINT) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS); - if (flags & ACPI_DEVFLAG_NMI) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS); - if (flags & ACPI_DEVFLAG_SYSMGT1) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1); - if (flags & ACPI_DEVFLAG_SYSMGT2) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2); - if (flags & ACPI_DEVFLAG_LINT0) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS); - if (flags & ACPI_DEVFLAG_LINT1) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS); + int i; + struct dev_table_entry dte = {}; + + /* Parse IVHD DTE setting flags and store information */ + if (flags) { + struct ivhd_dte_flags *d; + + if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last)) + return; + + d = kzalloc(sizeof(struct ivhd_dte_flags), GFP_KERNEL); + if (!d) + return; - amd_iommu_apply_erratum_63(iommu, devid); + pr_debug("%s: devid range %#x:%#x\n", __func__, first, last); + + if (flags & ACPI_DEVFLAG_INITPASS) + set_dte_bit(&dte, DEV_ENTRY_INIT_PASS); + if (flags & ACPI_DEVFLAG_EXTINT) + set_dte_bit(&dte, DEV_ENTRY_EINT_PASS); + if (flags & ACPI_DEVFLAG_NMI) + set_dte_bit(&dte, DEV_ENTRY_NMI_PASS); + if (flags & ACPI_DEVFLAG_SYSMGT1) + set_dte_bit(&dte, DEV_ENTRY_SYSMGT1); + if (flags & ACPI_DEVFLAG_SYSMGT2) + set_dte_bit(&dte, DEV_ENTRY_SYSMGT2); + if (flags & ACPI_DEVFLAG_LINT0) + set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS); + if (flags & ACPI_DEVFLAG_LINT1) + set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS); + + /* Apply erratum 63, which needs info in initial_dte */ + if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1) + dte.data[0] |= DTE_FLAG_IW; + + memcpy(&d->dte, &dte, sizeof(dte)); + d->segid = iommu->pci_seg->id; + d->devid_first = first; + d->devid_last = last; + list_add_tail(&d->list, &amd_ivhd_dev_flags_list); + } + + for (i = first; i <= last; i++) { + if (flags) { + struct dev_table_entry *dev_table = get_dev_table(iommu); + + memcpy(&dev_table[i], &dte, sizeof(dte)); + } + amd_iommu_set_rlookup_table(iommu, i); + } +} - amd_iommu_set_rlookup_table(iommu, devid); +static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, + u16 devid, u32 flags, u32 ext_flags) +{ + set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags); } int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) @@ -1242,7 +1371,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, entry->cmd_line = cmd_line; entry->root_devid = (entry->devid & (~0x7)); - pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n", + pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n", entry->cmd_line ? "cmd" : "ivrs", entry->hid, entry->uid, entry->root_devid); @@ -1334,15 +1463,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, switch (e->type) { case IVHD_DEV_ALL: - DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); - - for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i) - set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); + DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags); + set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0); break; case IVHD_DEV_SELECT: - DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x " - "flags: %02x\n", + DUMP_printk(" DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1353,8 +1479,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_SELECT_RANGE_START: - DUMP_printk(" DEV_SELECT_RANGE_START\t " - "devid: %04x:%02x:%02x.%x flags: %02x\n", + DUMP_printk(" DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1367,8 +1492,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_ALIAS: - DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x " - "flags: %02x devid_to: %02x:%02x.%x\n", + DUMP_printk(" DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1385,9 +1509,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_ALIAS_RANGE: - DUMP_printk(" DEV_ALIAS_RANGE\t\t " - "devid: %04x:%02x:%02x.%x flags: %02x " - "devid_to: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1404,8 +1526,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_EXT_SELECT: - DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x " - "flags: %02x ext: %08x\n", + DUMP_printk(" DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1417,8 +1538,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_EXT_SELECT_RANGE: - DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " - "%04x:%02x:%02x.%x flags: %02x ext: %08x\n", + DUMP_printk(" DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1431,21 +1551,18 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_RANGE_END: - DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid)); devid = e->devid; - for (dev_i = devid_start; dev_i <= devid; ++dev_i) { - if (alias) { + if (alias) { + for (dev_i = devid_start; dev_i <= devid; ++dev_i) pci_seg->alias_table[dev_i] = devid_to; - set_dev_entry_from_acpi(iommu, - devid_to, flags, ext_flags); - } - set_dev_entry_from_acpi(iommu, dev_i, - flags, ext_flags); + set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags); } + set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags); break; case IVHD_DEV_SPECIAL: { u8 handle, type; @@ -1464,11 +1581,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, else var = "UNKNOWN"; - DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", var, (int)handle, seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), - PCI_FUNC(devid)); + PCI_FUNC(devid), + e->flags); ret = add_special_device(type, handle, &devid, false); if (ret) @@ -1528,11 +1646,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, } devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); - DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", hid, uid, seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), - PCI_FUNC(devid)); + PCI_FUNC(devid), + e->flags); flags = e->flags; @@ -1581,9 +1700,9 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, pci_seg->last_bdf = last_bdf; DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); - pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf); - pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf); - pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf); + pci_seg->dev_table_size = + max(roundup_pow_of_two((last_bdf + 1) * DEV_TABLE_ENTRY_SIZE), + SZ_4K); pci_seg->id = id; init_llist_head(&pci_seg->dev_data_list); @@ -1591,13 +1710,22 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); if (alloc_dev_table(pci_seg)) - return NULL; + goto err_free_pci_seg; if (alloc_alias_table(pci_seg)) - return NULL; + goto err_free_dev_table; if (alloc_rlookup_table(pci_seg)) - return NULL; + goto err_free_alias_table; return pci_seg; + +err_free_alias_table: + free_alias_table(pci_seg); +err_free_dev_table: + free_dev_table(pci_seg); +err_free_pci_seg: + list_del(&pci_seg->list); + kfree(pci_seg); + return NULL; } static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id, @@ -1627,14 +1755,22 @@ static void __init free_pci_segments(void) } } +static void __init free_sysfs(struct amd_iommu *iommu) +{ + if (iommu->iommu.dev) { + iommu_device_unregister(&iommu->iommu); + iommu_device_sysfs_remove(&iommu->iommu); + } +} + static void __init free_iommu_one(struct amd_iommu *iommu) { - free_cwwb_sem(iommu); - free_command_buffer(iommu); - free_event_buffer(iommu); - free_ppr_log(iommu); + free_sysfs(iommu); + free_iommu_buffers(iommu); + amd_iommu_free_ppr_log(iommu); free_ga_log(iommu); iommu_unmap_mmio_space(iommu); + amd_iommu_iopf_uninit(iommu); } static void __init free_iommu_all(void) @@ -1722,7 +1858,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, iommu->pci_seg = pci_seg; raw_spin_lock_init(&iommu->lock); - iommu->cmd_sem_val = 0; + atomic64_set(&iommu->cmd_sem_val, 0); /* Add IOMMU to internal data structures */ list_add_tail(&iommu->list, &amd_iommu_list); @@ -1733,9 +1869,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, return -ENOSYS; } - /* Index is fine - add IOMMU to the array */ - amd_iommus[iommu->index] = iommu; - /* * Copy data from ACPI table entry to the iommu struct */ @@ -1753,13 +1886,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - /* - * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. - * GAM also requires GA mode. Therefore, we need to - * check cmpxchg16b support before enabling it. - */ - if (!boot_cpu_has(X86_FEATURE_CX16) || - ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) + /* GAM requires GA mode. */ + if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; case 0x11: @@ -1769,13 +1897,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - /* - * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. - * XT, GAM also requires GA mode. Therefore, we need to - * check cmpxchg16b support before enabling them. - */ - if (!boot_cpu_has(X86_FEATURE_CX16) || - ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) { + /* XT and GAM require GA mode. */ + if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) { amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; } @@ -1783,6 +1906,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; + if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) { + pr_warn_once("Host Address Translation is not supported.\n"); + amd_iommu_hatdis = true; + } + early_iommu_features_init(iommu, h); break; @@ -1802,14 +1930,9 @@ static int __init init_iommu_one_late(struct amd_iommu *iommu) { int ret; - if (alloc_cwwb_sem(iommu)) - return -ENOMEM; - - if (alloc_command_buffer(iommu)) - return -ENOMEM; - - if (alloc_event_buffer(iommu)) - return -ENOMEM; + ret = alloc_iommu_buffers(iommu); + if (ret) + return ret; iommu->int_enabled = false; @@ -1922,7 +2045,7 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) u64 val; struct pci_dev *pdev = iommu->dev; - if (!iommu_feature(iommu, FEATURE_PC)) + if (!check_feature(FEATURE_PC)) return; amd_iommu_pc_present = true; @@ -1941,7 +2064,7 @@ static ssize_t amd_iommu_show_cap(struct device *dev, char *buf) { struct amd_iommu *iommu = dev_to_amd_iommu(dev); - return sprintf(buf, "%x\n", iommu->cap); + return sysfs_emit(buf, "%x\n", iommu->cap); } static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); @@ -1949,8 +2072,7 @@ static ssize_t amd_iommu_show_features(struct device *dev, struct device_attribute *attr, char *buf) { - struct amd_iommu *iommu = dev_to_amd_iommu(dev); - return sprintf(buf, "%llx:%llx\n", iommu->features2, iommu->features); + return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2); } static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); @@ -1986,9 +2108,9 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu) features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2); - if (!iommu->features) { - iommu->features = features; - iommu->features2 = features2; + if (!amd_iommu_efr) { + amd_iommu_efr = features; + amd_iommu_efr2 = features2; return; } @@ -1996,12 +2118,12 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu) * Sanity check and warn if EFR values from * IVHD and MMIO conflict. */ - if (features != iommu->features || - features2 != iommu->features2) { + if (features != amd_iommu_efr || + features2 != amd_iommu_efr2) { pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n", - features, iommu->features, - features2, iommu->features2); + features, amd_iommu_efr, + features2, amd_iommu_efr2); } } @@ -2016,8 +2138,8 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (!iommu->dev) return -ENODEV; - /* Prevent binding other PCI device drivers to IOMMU devices */ - iommu->dev->match_driver = false; + /* ACPI _PRT won't have an IRQ for IOMMU */ + iommu->dev->irq_managed = 1; pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, &iommu->cap); @@ -2027,35 +2149,26 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) late_iommu_features_init(iommu); - if (iommu_feature(iommu, FEATURE_GT)) { + if (check_feature(FEATURE_GT)) { int glxval; - u32 max_pasid; u64 pasmax; - pasmax = iommu->features & FEATURE_PASID_MASK; - pasmax >>= FEATURE_PASID_SHIFT; - max_pasid = (1 << (pasmax + 1)) - 1; - - amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid); + pasmax = FIELD_GET(FEATURE_PASMAX, amd_iommu_efr); + iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1; - BUG_ON(amd_iommu_max_pasid & ~PASID_MASK); + BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK); - glxval = iommu->features & FEATURE_GLXVAL_MASK; - glxval >>= FEATURE_GLXVAL_SHIFT; + glxval = FIELD_GET(FEATURE_GLX, amd_iommu_efr); if (amd_iommu_max_glx_val == -1) amd_iommu_max_glx_val = glxval; else amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); - } - if (iommu_feature(iommu, FEATURE_GT) && - iommu_feature(iommu, FEATURE_PPR)) { - iommu->is_iommu_v2 = true; - amd_iommu_v2_present = true; + iommu_enable_gt(iommu); } - if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) + if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu)) return -ENOMEM; if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { @@ -2066,17 +2179,6 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) init_iommu_perf_ctr(iommu); - if (amd_iommu_pgtable == AMD_IOMMU_V2) { - if (!iommu_feature(iommu, FEATURE_GIOSUP) || - !iommu_feature(iommu, FEATURE_GT)) { - pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); - amd_iommu_pgtable = AMD_IOMMU_V1; - } else if (iommu_default_passthrough()) { - pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n"); - amd_iommu_pgtable = AMD_IOMMU_V1; - } - } - if (is_rd890_iommu(iommu->dev)) { int i, j; @@ -2114,49 +2216,66 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (ret) return ret; - iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); + /* + * Allocate per IOMMU IOPF queue here so that in attach device path, + * PRI capable device can be added to IOPF queue + */ + if (amd_iommu_gt_ppr_supported()) { + ret = amd_iommu_iopf_init(iommu); + if (ret) + return ret; + } + + ret = iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); + if (ret || amd_iommu_pgtable == PD_MODE_NONE) { + /* + * Remove sysfs if DMA translation is not supported by the + * IOMMU. Do not return an error to enable IRQ remapping + * in state_next(), DTE[V, TV] must eventually be set to 0. + */ + iommu_device_sysfs_remove(&iommu->iommu); + } return pci_enable_device(iommu->dev); } static void print_iommu_info(void) { + int i; static const char * const feat_str[] = { "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", "IA", "GA", "HE", "PC" }; - struct amd_iommu *iommu; - for_each_iommu(iommu) { - struct pci_dev *pdev = iommu->dev; - int i; + if (amd_iommu_efr) { + pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2); - pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); - - if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2); + for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { + if (check_feature(1ULL << i)) + pr_cont(" %s", feat_str[i]); + } - for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { - if (iommu_feature(iommu, (1ULL << i))) - pr_cont(" %s", feat_str[i]); - } + if (check_feature(FEATURE_GAM_VAPIC)) + pr_cont(" GA_vAPIC"); - if (iommu->features & FEATURE_GAM_VAPIC) - pr_cont(" GA_vAPIC"); + if (check_feature(FEATURE_SNP)) + pr_cont(" SNP"); - if (iommu->features & FEATURE_SNP) - pr_cont(" SNP"); + if (check_feature2(FEATURE_SEVSNPIO_SUP)) + pr_cont(" SEV-TIO"); - pr_cont("\n"); - } + pr_cont("\n"); } + if (irq_remapping_enabled) { pr_info("Interrupt remapping enabled\n"); if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) pr_info("X2APIC enabled\n"); } - if (amd_iommu_pgtable == AMD_IOMMU_V2) - pr_info("V2 page table enabled\n"); + if (amd_iommu_pgtable == PD_MODE_V2) { + pr_info("V2 page table enabled (Paging mode : %d level)\n", + amd_iommu_gpt_level); + } } static int __init amd_iommu_init_pci(void) @@ -2165,6 +2284,9 @@ static int __init amd_iommu_init_pci(void) struct amd_iommu_pci_seg *pci_seg; int ret; + /* Init global identity domain before registering IOMMU */ + amd_iommu_init_identity_domain(); + for_each_iommu(iommu) { ret = iommu_init_pci(iommu); if (ret) { @@ -2190,7 +2312,7 @@ static int __init amd_iommu_init_pci(void) init_device_table_dma(pci_seg); for_each_iommu(iommu) - iommu_flush_all_caches(iommu); + amd_iommu_flush_all_caches(iommu); print_iommu_info(); @@ -2274,6 +2396,7 @@ static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq struct irq_data *irqd = irq_domain_get_irq_data(domain, i); irqd->chip = &intcapxt_controller; + irqd->hwirq = info->hwirq; irqd->chip_data = info->data; __irq_set_handler(i, handle_edge_irq, 0, "edge"); } @@ -2300,22 +2423,14 @@ static void intcapxt_unmask_irq(struct irq_data *irqd) xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); xt.destid_24_31 = cfg->dest_apicid >> 24; - /** - * Current IOMMU implementation uses the same IRQ for all - * 3 IOMMU interrupts. - */ - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); + writeq(xt.capxt, iommu->mmio_base + irqd->hwirq); } static void intcapxt_mask_irq(struct irq_data *irqd) { struct amd_iommu *iommu = irqd->chip_data; - writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); - writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); - writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); + writeq(0, iommu->mmio_base + irqd->hwirq); } @@ -2344,7 +2459,7 @@ static struct irq_chip intcapxt_controller = { .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_affinity = intcapxt_set_affinity, .irq_set_wake = intcapxt_set_wake, - .flags = IRQCHIP_MASK_ON_SUSPEND, + .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_MOVE_DEFERRED, }; static const struct irq_domain_ops intcapxt_domain_ops = { @@ -2378,11 +2493,13 @@ static struct irq_domain *iommu_get_irqdomain(void) return iommu_irqdomain; } -static int iommu_setup_intcapxt(struct amd_iommu *iommu) +static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname, + int hwirq, irq_handler_t thread_fn) { struct irq_domain *domain; struct irq_alloc_info info; int irq, ret; + int node = dev_to_node(&iommu->dev->dev); domain = iommu_get_irqdomain(); if (!domain) @@ -2391,15 +2508,16 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu) init_irq_alloc_info(&info, NULL); info.type = X86_IRQ_ALLOC_TYPE_AMDVI; info.data = iommu; + info.hwirq = hwirq; - irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); + irq = irq_domain_alloc_irqs(domain, 1, node, &info); if (irq < 0) { irq_domain_remove(domain); return irq; } ret = request_threaded_irq(irq, amd_iommu_int_handler, - amd_iommu_int_thread, 0, "AMD-Vi", iommu); + thread_fn, 0, devname, iommu); if (ret) { irq_domain_free_irqs(irq, 1); irq_domain_remove(domain); @@ -2409,6 +2527,37 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu) return 0; } +static int iommu_setup_intcapxt(struct amd_iommu *iommu) +{ + int ret; + + snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name), + "AMD-Vi%d-Evt", iommu->index); + ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name, + MMIO_INTCAPXT_EVT_OFFSET, + amd_iommu_int_thread_evtlog); + if (ret) + return ret; + + snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name), + "AMD-Vi%d-PPR", iommu->index); + ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name, + MMIO_INTCAPXT_PPR_OFFSET, + amd_iommu_int_thread_pprlog); + if (ret) + return ret; + +#ifdef CONFIG_IRQ_REMAP + snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name), + "AMD-Vi%d-GA", iommu->index); + ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name, + MMIO_INTCAPXT_GALOG_OFFSET, + amd_iommu_int_thread_galog); +#endif + + return ret; +} + static int iommu_init_irq(struct amd_iommu *iommu) { int ret; @@ -2434,8 +2583,6 @@ enable_faults: iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); - if (iommu->ppr_log != NULL) - iommu_feature_enable(iommu, CONTROL_PPRINT_EN); return 0; } @@ -2551,13 +2698,13 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) u32 devid; struct dev_table_entry *dev_table = pci_seg->dev_table; - if (dev_table == NULL) + if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE) return; for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { - __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID); + set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID); if (!amd_iommu_snp_en) - __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); + set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION); } } @@ -2585,8 +2732,7 @@ static void init_device_table(void) for_each_pci_segment(pci_seg) { for (devid = 0; devid <= pci_seg->last_bdf; ++devid) - __set_dev_entry_bit(pci_seg->dev_table, - devid, DEV_ENTRY_IRQ_TBL_EN); + set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN); } } @@ -2614,7 +2760,11 @@ static void iommu_init_flags(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_COHERENT_EN); /* Set IOTLB invalidation timeout to 1s */ - iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); + iommu_feature_set(iommu, CTRL_INV_TO_1S, CTRL_INV_TO_MASK, CONTROL_INV_TIMEOUT); + + /* Enable Enhanced Peripheral Page Request Handling */ + if (check_feature(FEATURE_EPHSUP)) + iommu_feature_enable(iommu, CONTROL_EPH_EN); } static void iommu_apply_resume_quirks(struct amd_iommu *iommu) @@ -2676,6 +2826,44 @@ static void iommu_enable_ga(struct amd_iommu *iommu) #endif } +static void iommu_disable_irtcachedis(struct amd_iommu *iommu) +{ + iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); +} + +static void iommu_enable_irtcachedis(struct amd_iommu *iommu) +{ + u64 ctrl; + + if (!amd_iommu_irtcachedis) + return; + + /* + * Note: + * The support for IRTCacheDis feature is dertermined by + * checking if the bit is writable. + */ + iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS); + ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl &= (1ULL << CONTROL_IRTCACHEDIS); + if (ctrl) + iommu->irtcachedis_enabled = true; + pr_info("iommu%d (%#06x) : IRT cache is %s\n", + iommu->index, iommu->devid, + iommu->irtcachedis_enabled ? "disabled" : "enabled"); +} + +static void iommu_enable_2k_int(struct amd_iommu *iommu) +{ + if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) + return; + + iommu_feature_set(iommu, + CONTROL_NUM_INT_REMAP_MODE_2K, + CONTROL_NUM_INT_REMAP_MODE_MASK, + CONTROL_NUM_INT_REMAP_MODE); +} + static void early_enable_iommu(struct amd_iommu *iommu) { iommu_disable(iommu); @@ -2684,18 +2872,21 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); + iommu_enable_gt(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); + iommu_enable_irtcachedis(iommu); + iommu_enable_2k_int(iommu); iommu_enable(iommu); - iommu_flush_all_caches(iommu); + amd_iommu_flush_all_caches(iommu); } /* * This function finally enables all IOMMUs found in the system after * they have been initialized. * - * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy - * the old content of device table entries. Not this case or copy failed, + * Or if in kdump kernel and IOMMUs are all pre-enabled, try to reuse + * the old content of device table entries. Not this case or reuse failed, * just continue as normal kernel does. */ static void early_enable_iommus(void) @@ -2703,19 +2894,25 @@ static void early_enable_iommus(void) struct amd_iommu *iommu; struct amd_iommu_pci_seg *pci_seg; - if (!copy_device_table()) { + if (!reuse_device_table()) { /* - * If come here because of failure in copying device table from old + * If come here because of failure in reusing device table from old * kernel with all IOMMUs enabled, print error message and try to * free allocated old_dev_tbl_cpy. */ - if (amd_iommu_pre_enabled) - pr_err("Failed to copy DEV table from previous kernel.\n"); + if (amd_iommu_pre_enabled) { + pr_err("Failed to reuse DEV table from previous kernel.\n"); + /* + * Bail out early if unable to remap/reuse DEV table from + * previous kernel if SNP enabled as IOMMU commands will + * time out without DEV table and cause kdump boot panic. + */ + BUG_ON(check_feature(FEATURE_SNP)); + } for_each_pci_segment(pci_seg) { if (pci_seg->old_dev_tbl_cpy != NULL) { - free_pages((unsigned long)pci_seg->old_dev_tbl_cpy, - get_order(pci_seg->dev_table_size)); + memunmap((void *)pci_seg->old_dev_tbl_cpy); pci_seg->old_dev_tbl_cpy = NULL; } } @@ -2725,35 +2922,38 @@ static void early_enable_iommus(void) early_enable_iommu(iommu); } } else { - pr_info("Copied DEV table from previous kernel.\n"); + pr_info("Reused DEV table from previous kernel.\n"); for_each_pci_segment(pci_seg) { - free_pages((unsigned long)pci_seg->dev_table, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->dev_table); pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; } for_each_iommu(iommu) { iommu_disable_command_buffer(iommu); iommu_disable_event_buffer(iommu); + iommu_disable_irtcachedis(iommu); iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); + iommu_enable_irtcachedis(iommu); + iommu_enable_2k_int(iommu); iommu_set_device_table(iommu); - iommu_flush_all_caches(iommu); + amd_iommu_flush_all_caches(iommu); } } } -static void enable_iommus_v2(void) +static void enable_iommus_ppr(void) { struct amd_iommu *iommu; - for_each_iommu(iommu) { - iommu_enable_ppr_log(iommu); - iommu_enable_gt(iommu); - } + if (!amd_iommu_gt_ppr_supported()) + return; + + for_each_iommu(iommu) + amd_iommu_enable_ppr_log(iommu); } static void enable_iommus_vapic(void) @@ -2778,19 +2978,19 @@ static void enable_iommus_vapic(void) * Need to set and poll check the GALOGRun bit to zero before * we can set/ modify GA Log registers safely. */ - for (i = 0; i < LOOP_TIMEOUT; ++i) { + for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) break; udelay(10); } - if (WARN_ON(i >= LOOP_TIMEOUT)) + if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) return; } if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && - !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) { + !check_feature(FEATURE_GAM_VAPIC)) { amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; return; } @@ -2818,13 +3018,6 @@ static void enable_iommus_vapic(void) #endif } -static void enable_iommus(void) -{ - early_enable_iommus(); - enable_iommus_vapic(); - enable_iommus_v2(); -} - static void disable_iommus(void) { struct amd_iommu *iommu; @@ -2843,7 +3036,7 @@ static void disable_iommus(void) * disable suspend until real resume implemented */ -static void amd_iommu_resume(void) +static void amd_iommu_resume(void *data) { struct amd_iommu *iommu; @@ -2851,12 +3044,13 @@ static void amd_iommu_resume(void) iommu_apply_resume_quirks(iommu); /* re-load the hardware */ - enable_iommus(); + for_each_iommu(iommu) + early_enable_iommu(iommu); amd_iommu_enable_interrupts(); } -static int amd_iommu_suspend(void) +static int amd_iommu_suspend(void *data) { /* disable IOMMUs to go out of the way for BIOS */ disable_iommus(); @@ -2864,16 +3058,17 @@ static int amd_iommu_suspend(void) return 0; } -static struct syscore_ops amd_iommu_syscore_ops = { +static const struct syscore_ops amd_iommu_syscore_ops = { .suspend = amd_iommu_suspend, .resume = amd_iommu_resume, }; +static struct syscore amd_iommu_syscore = { + .ops = &amd_iommu_syscore_ops, +}; + static void __init free_iommu_resources(void) { - kmem_cache_destroy(amd_iommu_irq_cache); - amd_iommu_irq_cache = NULL; - free_iommu_all(); free_pci_segments(); } @@ -2932,9 +3127,7 @@ static bool __init check_ioapic_information(void) static void __init free_dma_resources(void) { - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, - get_order(MAX_DOMAIN_ID/8)); - amd_iommu_pd_alloc_bitmap = NULL; + ida_destroy(&pdom_ids); free_unity_maps(); } @@ -2974,8 +3167,9 @@ static void __init ivinfo_init(void *ivrs) static int __init early_amd_iommu_init(void) { struct acpi_table_header *ivrs_base; - int remap_cache_sz, ret; + int ret; acpi_status status; + u8 efr_hats; if (!amd_iommu_detected) return -ENODEV; @@ -2989,6 +3183,12 @@ static int __init early_amd_iommu_init(void) return -EINVAL; } + if (!boot_cpu_has(X86_FEATURE_CX16)) { + pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n"); + ret = -EINVAL; + goto out; + } + /* * Validate checksum here so we don't need to do it when * we actually parse the table @@ -3002,21 +3202,6 @@ static int __init early_amd_iommu_init(void) amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); - /* Device table - directly used by all IOMMUs */ - ret = -ENOMEM; - - amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(MAX_DOMAIN_ID/8)); - if (amd_iommu_pd_alloc_bitmap == NULL) - goto out; - - /* - * never allocate domain 0 because its used as the non-allocated and - * error value placeholder - */ - __set_bit(0, amd_iommu_pd_alloc_bitmap); - /* * now the data structures are allocated and basically initialized * start the real acpi table scan @@ -3025,6 +3210,42 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + /* 5 level guest page table */ + if (cpu_feature_enabled(X86_FEATURE_LA57) && + FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL) + amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; + + efr_hats = FIELD_GET(FEATURE_HATS, amd_iommu_efr); + if (efr_hats != 0x3) { + /* + * efr[HATS] bits specify the maximum host translation level + * supported, with LEVEL 4 being initial max level. + */ + amd_iommu_hpt_level = efr_hats + PAGE_MODE_4_LEVEL; + } else { + pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n", + efr_hats); + amd_iommu_hatdis = true; + } + + if (amd_iommu_pgtable == PD_MODE_V2) { + if (!amd_iommu_v2_pgtbl_supported()) { + pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); + amd_iommu_pgtable = PD_MODE_V1; + } + } + + if (amd_iommu_hatdis) { + /* + * Host (v1) page table is not available. Attempt to use + * Guest (v2) page table. + */ + if (amd_iommu_v2_pgtbl_supported()) + amd_iommu_pgtable = PD_MODE_V2; + else + amd_iommu_pgtable = PD_MODE_NONE; + } + /* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled) disable_iommus(); @@ -3034,22 +3255,7 @@ static int __init early_amd_iommu_init(void) if (amd_iommu_irq_remap) { struct amd_iommu_pci_seg *pci_seg; - /* - * Interrupt remapping enabled, create kmem_cache for the - * remapping tables. - */ ret = -ENOMEM; - if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) - remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32); - else - remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2); - amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", - remap_cache_sz, - DTE_INTTAB_ALIGNMENT, - 0, NULL); - if (!amd_iommu_irq_cache) - goto out; - for_each_pci_segment(pci_seg) { if (alloc_irq_lookup_table(pci_seg)) goto out; @@ -3081,6 +3287,13 @@ static int amd_iommu_enable_interrupts(void) goto out; } + /* + * Interrupt handler is ready to process interrupts. Enable + * PPR and GA log interrupt for all IOMMUs. + */ + enable_iommus_vapic(); + enable_iommus_ppr(); + out: return ret; } @@ -3123,6 +3336,47 @@ out: return true; } +static __init void iommu_snp_enable(void) +{ +#ifdef CONFIG_KVM_AMD_SEV + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + return; + /* + * The SNP support requires that IOMMU must be enabled, and is + * configured with V1 page table (DTE[Mode] = 0 is not supported). + */ + if (no_iommu || iommu_default_passthrough()) { + pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n"); + goto disable_snp; + } + + if (amd_iommu_pgtable != PD_MODE_V1) { + pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n"); + goto disable_snp; + } + + amd_iommu_snp_en = check_feature(FEATURE_SNP); + if (!amd_iommu_snp_en) { + pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n"); + goto disable_snp; + } + + /* + * Enable host SNP support once SNP support is checked on IOMMU. + */ + if (snp_rmptable_init()) { + pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); + goto disable_snp; + } + + pr_info("IOMMU SNP support enabled.\n"); + return; + +disable_snp: + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); +#endif +} + /**************************************************************************** * * AMD IOMMU Initialization State Machine @@ -3157,11 +3411,10 @@ static int __init state_next(void) init_state = IOMMU_ENABLED; break; case IOMMU_ENABLED: - register_syscore_ops(&amd_iommu_syscore_ops); + register_syscore(&amd_iommu_syscore); + iommu_snp_enable(); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; - enable_iommus_vapic(); - enable_iommus_v2(); break; case IOMMU_PCI_INIT: ret = amd_iommu_enable_interrupts(); @@ -3197,7 +3450,7 @@ static int __init state_next(void) uninit_device_table_dma(pci_seg); for_each_iommu(iommu) - iommu_flush_all_caches(iommu); + amd_iommu_flush_all_caches(iommu); } } return ret; @@ -3215,6 +3468,19 @@ static int __init iommu_go_to_state(enum iommu_init_state state) ret = state_next(); } + /* + * SNP platform initilazation requires IOMMUs to be fully configured. + * If the SNP support on IOMMUs has NOT been checked, simply mark SNP + * as unsupported. If the SNP support on IOMMUs has been checked and + * host SNP support enabled but RMP enforcement has not been enabled + * in IOMMUs, then the system is in a half-baked state, but can limp + * along as all memory should be Hypervisor-Owned in the RMP. WARN, + * but leave SNP as "supported" to avoid confusing the kernel. + */ + if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) && + !WARN_ON_ONCE(amd_iommu_snp_en)) + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + return ret; } @@ -3248,17 +3514,17 @@ int __init amd_iommu_enable(void) void amd_iommu_disable(void) { - amd_iommu_suspend(); + amd_iommu_suspend(NULL); } int amd_iommu_reenable(int mode) { - amd_iommu_resume(); + amd_iommu_resume(NULL); return 0; } -int __init amd_iommu_enable_faulting(void) +int amd_iommu_enable_faulting(unsigned int cpu) { /* We enable MSI later when PCI is initialized */ return 0; @@ -3272,7 +3538,6 @@ int __init amd_iommu_enable_faulting(void) */ static int __init amd_iommu_init(void) { - struct amd_iommu *iommu; int ret; ret = iommu_go_to_state(IOMMU_INITIALIZED); @@ -3286,8 +3551,8 @@ static int __init amd_iommu_init(void) } #endif - for_each_iommu(iommu) - amd_iommu_debugfs_setup(iommu); + if (!ret) + amd_iommu_debugfs_setup(); return ret; } @@ -3318,25 +3583,28 @@ static bool amd_iommu_sme_check(void) * IOMMUs * ****************************************************************************/ -int __init amd_iommu_detect(void) +void __init amd_iommu_detect(void) { int ret; if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return -ENODEV; + goto disable_snp; if (!amd_iommu_sme_check()) - return -ENODEV; + goto disable_snp; ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); if (ret) - return ret; + goto disable_snp; amd_iommu_detected = true; iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; + return; - return 1; +disable_snp: + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); } /**************************************************************************** @@ -3384,9 +3652,17 @@ static int __init parse_amd_iommu_options(char *str) } else if (strncmp(str, "force_isolation", 15) == 0) { amd_iommu_force_isolation = true; } else if (strncmp(str, "pgtbl_v1", 8) == 0) { - amd_iommu_pgtable = AMD_IOMMU_V1; + amd_iommu_pgtable = PD_MODE_V1; } else if (strncmp(str, "pgtbl_v2", 8) == 0) { - amd_iommu_pgtable = AMD_IOMMU_V2; + amd_iommu_pgtable = PD_MODE_V2; + } else if (strncmp(str, "irtcachedis", 11) == 0) { + amd_iommu_irtcachedis = true; + } else if (strncmp(str, "nohugepages", 11) == 0) { + pr_info("Restricting V1 page-sizes to 4KiB"); + amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_4K; + } else if (strncmp(str, "v2_pgsizes_only", 15) == 0) { + pr_info("Restricting V1 page-sizes to 4KiB/2MiB/1GiB"); + amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; } else { pr_notice("Unknown option - '%s'\n", str); } @@ -3475,15 +3751,26 @@ found: return 1; } +#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN) + static int __init parse_ivrs_acpihid(char *str) { u32 seg = 0, bus, dev, fn; char *hid, *uid, *p, *addr; - char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; + char acpiid[ACPIID_LEN + 1] = { }; /* size with NULL terminator */ int i; addr = strchr(str, '@'); if (!addr) { + addr = strchr(str, '='); + if (!addr) + goto not_found; + + ++addr; + + if (strlen(addr) > ACPIID_LEN) + goto not_found; + if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", @@ -3496,6 +3783,9 @@ static int __init parse_ivrs_acpihid(char *str) /* We have the '@', make it the terminator to get just the acpiid */ *addr++ = 0; + if (strlen(str) > ACPIID_LEN) + goto not_found; + if (sscanf(str, "=%s", acpiid) != 1) goto not_found; @@ -3524,6 +3814,14 @@ found: while (*uid == '0' && *(uid + 1)) uid++; + if (strlen(hid) >= ACPIHID_HID_LEN) { + pr_err("Invalid command line: hid is too long\n"); + return 1; + } else if (strlen(uid) >= ACPIHID_UID_LEN) { + pr_err("Invalid command line: uid is too long\n"); + return 1; + } + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); @@ -3540,16 +3838,20 @@ __setup("ivrs_ioapic", parse_ivrs_ioapic); __setup("ivrs_hpet", parse_ivrs_hpet); __setup("ivrs_acpihid", parse_ivrs_acpihid); -bool amd_iommu_v2_supported(void) +bool amd_iommu_pasid_supported(void) { + /* CPU page table size should match IOMMU guest page table size */ + if (cpu_feature_enabled(X86_FEATURE_LA57) && + amd_iommu_gpt_level != PAGE_MODE_5_LEVEL) + return false; + /* * Since DTE[Mode]=0 is prohibited on SNP-enabled system * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without * setting up IOMMUv1 page table. */ - return amd_iommu_v2_present && !amd_iommu_snp_en; + return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en; } -EXPORT_SYMBOL(amd_iommu_v2_supported); struct amd_iommu *get_amd_iommu(unsigned int idx) { @@ -3578,13 +3880,11 @@ u8 amd_iommu_pc_get_max_banks(unsigned int idx) return 0; } -EXPORT_SYMBOL(amd_iommu_pc_get_max_banks); bool amd_iommu_pc_supported(void) { return amd_iommu_pc_present; } -EXPORT_SYMBOL(amd_iommu_pc_supported); u8 amd_iommu_pc_get_max_counters(unsigned int idx) { @@ -3595,7 +3895,6 @@ u8 amd_iommu_pc_get_max_counters(unsigned int idx) return 0; } -EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value, bool is_write) @@ -3651,40 +3950,91 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); } -#ifdef CONFIG_AMD_MEM_ENCRYPT -int amd_iommu_snp_enable(void) +#ifdef CONFIG_KVM_AMD_SEV +static int iommu_page_make_shared(void *page) { - /* - * The SNP support requires that IOMMU must be enabled, and is - * not configured in the passthrough mode. - */ - if (no_iommu || iommu_default_passthrough()) { - pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported"); - return -EINVAL; + unsigned long paddr, pfn; + + paddr = iommu_virt_to_phys(page); + /* Cbit maybe set in the paddr */ + pfn = __sme_clr(paddr) >> PAGE_SHIFT; + + if (!(pfn % PTRS_PER_PMD)) { + int ret, level; + bool assigned; + + ret = snp_lookup_rmpentry(pfn, &assigned, &level); + if (ret) { + pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret); + return ret; + } + + if (!assigned) { + pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn); + return -EINVAL; + } + + if (level > PG_LEVEL_4K) { + ret = psmash(pfn); + if (!ret) + goto done; + + pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n", + pfn, ret, level); + return ret; + } } - /* - * Prevent enabling SNP after IOMMU_ENABLED state because this process - * affect how IOMMU driver sets up data structures and configures - * IOMMU hardware. - */ - if (init_state > IOMMU_ENABLED) { - pr_err("SNP: Too late to enable SNP for IOMMU.\n"); - return -EINVAL; +done: + return rmp_make_shared(pfn, PG_LEVEL_4K); +} + +static int iommu_make_shared(void *va, size_t size) +{ + void *page; + int ret; + + if (!va) + return 0; + + for (page = va; page < (va + size); page += PAGE_SIZE) { + ret = iommu_page_make_shared(page); + if (ret) + return ret; } - amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP); + return 0; +} + +int amd_iommu_snp_disable(void) +{ + struct amd_iommu *iommu; + int ret; + if (!amd_iommu_snp_en) - return -EINVAL; + return 0; - pr_info("SNP enabled\n"); + for_each_iommu(iommu) { + ret = iommu_make_shared(iommu->evt_buf, EVT_BUFFER_SIZE); + if (ret) + return ret; - /* Enforce IOMMU v1 pagetable when SNP is enabled. */ - if (amd_iommu_pgtable != AMD_IOMMU_V1) { - pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n"); - amd_iommu_pgtable = AMD_IOMMU_V1; + ret = iommu_make_shared(iommu->ppr_log, PPR_LOG_SIZE); + if (ret) + return ret; + + ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE); + if (ret) + return ret; } return 0; } +EXPORT_SYMBOL_GPL(amd_iommu_snp_disable); + +bool amd_iommu_sev_tio_supported(void) +{ + return check_feature2(FEATURE_SEVSNPIO_SUP); +} +EXPORT_SYMBOL_GPL(amd_iommu_sev_tio_supported); #endif |
