diff options
Diffstat (limited to 'drivers/iommu/intel/iommu.h')
-rw-r--r-- | drivers/iommu/intel/iommu.h | 307 |
1 files changed, 257 insertions, 50 deletions
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 4145c04cb1c6..6ea7bbe26b19 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -22,6 +22,7 @@ #include <linux/bitfield.h> #include <linux/xarray.h> #include <linux/perf_event.h> +#include <linux/pci.h> #include <asm/cacheflush.h> #include <asm/iommu.h> @@ -35,6 +36,8 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) + #define VTD_STRIDE_SHIFT (9) #define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) @@ -47,7 +50,6 @@ #define DMA_FL_PTE_US BIT_ULL(2) #define DMA_FL_PTE_ACCESS BIT_ULL(5) #define DMA_FL_PTE_DIRTY BIT_ULL(6) -#define DMA_FL_PTE_XD BIT_ULL(63) #define DMA_SL_PTE_DIRTY_BIT 9 #define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT) @@ -455,7 +457,6 @@ enum { /* Page group response descriptor QW0 */ #define QI_PGRP_PASID_P(p) (((u64)(p)) << 4) -#define QI_PGRP_PDP(p) (((u64)(p)) << 5) #define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12) #define QI_PGRP_DID(rid) (((u64)(rid)) << 16) #define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) @@ -584,11 +585,23 @@ struct iommu_domain_info { * to VT-d spec, section 9.3 */ }; +/* + * We start simply by using a fixed size for the batched descriptors. This + * size is currently sufficient for our needs. Future improvements could + * involve dynamically allocating the batch buffer based on actual demand, + * allowing us to adjust the batch size for optimal performance in different + * scenarios. + */ +#define QI_MAX_BATCHED_DESC_COUNT 16 +struct qi_batch { + struct qi_desc descs[QI_MAX_BATCHED_DESC_COUNT]; + unsigned int index; +}; + struct dmar_domain { int nid; /* node id */ struct xarray iommu_array; /* Attached IOMMU array */ - u8 has_iotlb_device: 1; u8 iommu_coherency: 1; /* indicate coherency of iommu access */ u8 force_snooping : 1; /* Create IOPTEs with snoop control */ u8 set_pte_snp:1; @@ -607,6 +620,10 @@ struct dmar_domain { struct list_head devices; /* all devices' list */ struct list_head dev_pasids; /* all attached pasids */ + spinlock_t cache_lock; /* Protect the cache tag list */ + struct list_head cache_tags; /* Cache tag list */ + struct qi_batch *qi_batch; /* Batched QI descriptors */ + int iommu_superpage;/* Level of superpages supported: 0 == 4KiB (no superpages), 1 == 2MiB, 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ @@ -637,13 +654,16 @@ struct dmar_domain { struct { /* parent page table which the user domain is nested on */ struct dmar_domain *s2_domain; - /* user page table pointer (in GPA) */ - unsigned long s1_pgtbl; /* page table attributes */ struct iommu_hwpt_vtd_s1 s1_cfg; /* link to parent domain siblings */ struct list_head s2_link; }; + + /* SVA domain */ + struct { + struct mmu_notifier notifier; + }; }; struct iommu_domain domain; /* generic domain data structure for @@ -679,8 +699,6 @@ struct iommu_pmu { DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX); struct perf_event *event_list[IOMMU_PMU_IDX_MAX]; unsigned char irq_name[16]; - struct hlist_node cpuhp_node; - int cpu; }; #define IOMMU_IRQ_ID_OFFSET_PRQ (DMAR_UNITS_SUPPORTED) @@ -701,7 +719,7 @@ struct intel_iommu { int msagaw; /* max sagaw of this iommu */ unsigned int irq, pr_irq, perf_irq; u16 segment; /* PCI segment# */ - unsigned char name[13]; /* Device Name */ + unsigned char name[16]; /* Device Name */ #ifdef CONFIG_INTEL_IOMMU unsigned long *domain_ids; /* bitmap of domains */ @@ -711,17 +729,22 @@ struct intel_iommu { struct iommu_flush flush; #endif -#ifdef CONFIG_INTEL_IOMMU_SVM struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ unsigned long prq_seq_number; struct completion prq_complete; -#endif struct iopf_queue *iopf_queue; unsigned char iopfq_name[16]; + /* Synchronization between fault report and iommu device release. */ + struct mutex iopf_lock; struct q_inval *qi; /* Queued invalidation info */ u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/ + /* rb tree for all probed devices */ + struct rb_root device_rbtree; + /* protect the device_rbtree */ + spinlock_t device_rbtree_lock; + #ifdef CONFIG_IRQ_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ struct irq_domain *ir_domain; @@ -755,6 +778,8 @@ struct device_domain_info { struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ struct pasid_table *pasid_table; /* pasid table */ + /* device tracking node(lookup by PCI RID) */ + struct rb_node node; #ifdef CONFIG_INTEL_IOMMU_DEBUGFS struct dentry *debugfs_dentry; /* pointer to device directory dentry */ #endif @@ -782,6 +807,13 @@ static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) return container_of(dom, struct dmar_domain, domain); } +/* + * Domain ID reserved for pasid entries programmed for first-level + * only and pass-through transfer modes. + */ +#define FLPT_DEFAULT_DID 1 +#define NUM_RESERVED_DID 2 + /* Retrieve the domain ID which has allocated to the domain */ static inline u16 domain_id_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) @@ -792,6 +824,21 @@ domain_id_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) return info->did; } +static inline u16 +iommu_domain_did(struct iommu_domain *domain, struct intel_iommu *iommu) +{ + if (domain->type == IOMMU_DOMAIN_SVA || + domain->type == IOMMU_DOMAIN_IDENTITY) + return FLPT_DEFAULT_DID; + return domain_id_iommu(to_dmar_domain(domain), iommu); +} + +static inline bool dev_is_real_dma_subdevice(struct device *dev) +{ + return dev && dev_is_pci(dev) && + pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); +} + /* * 0: readable * 1: writable @@ -813,11 +860,10 @@ static inline void dma_clear_pte(struct dma_pte *pte) static inline u64 dma_pte_addr(struct dma_pte *pte) { #ifdef CONFIG_64BIT - return pte->val & VTD_PAGE_MASK & (~DMA_FL_PTE_XD); + return pte->val & VTD_PAGE_MASK; #else /* Must have a full atomic 64-bit read */ - return __cmpxchg64(&pte->val, 0ULL, 0ULL) & - VTD_PAGE_MASK & (~DMA_FL_PTE_XD); + return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; #endif } @@ -1029,6 +1075,137 @@ static inline void context_set_sm_pre(struct context_entry *context) context->lo |= BIT_ULL(4); } +/* + * Clear the PRE(Page Request Enable) field of a scalable mode context + * entry. + */ +static inline void context_clear_sm_pre(struct context_entry *context) +{ + context->lo &= ~BIT_ULL(4); +} + +/* Returns a number of VTD pages, but aligned to MM page size */ +static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size) +{ + host_addr &= ~PAGE_MASK; + return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; +} + +/* Return a size from number of VTD pages. */ +static inline unsigned long nrpages_to_size(unsigned long npages) +{ + return npages << VTD_PAGE_SHIFT; +} + +static inline void qi_desc_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type, + struct qi_desc *desc) +{ + u8 dw = 0, dr = 0; + int ih = 0; + + if (cap_write_drain(iommu->cap)) + dw = 1; + + if (cap_read_drain(iommu->cap)) + dr = 1; + + desc->qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) + | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; + desc->qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) + | QI_IOTLB_AM(size_order); + desc->qw2 = 0; + desc->qw3 = 0; +} + +static inline void qi_desc_dev_iotlb(u16 sid, u16 pfsid, u16 qdep, u64 addr, + unsigned int mask, struct qi_desc *desc) +{ + if (mask) { + addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; + desc->qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; + } else { + desc->qw1 = QI_DEV_IOTLB_ADDR(addr); + } + + if (qdep >= QI_DEV_IOTLB_MAX_INVS) + qdep = 0; + + desc->qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | + QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); + desc->qw2 = 0; + desc->qw3 = 0; +} + +static inline void qi_desc_piotlb(u16 did, u32 pasid, u64 addr, + unsigned long npages, bool ih, + struct qi_desc *desc) +{ + if (npages == -1) { + desc->qw0 = QI_EIOTLB_PASID(pasid) | + QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | + QI_EIOTLB_TYPE; + desc->qw1 = 0; + } else { + int mask = ilog2(__roundup_pow_of_two(npages)); + unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); + + if (WARN_ON_ONCE(!IS_ALIGNED(addr, align))) + addr = ALIGN_DOWN(addr, align); + + desc->qw0 = QI_EIOTLB_PASID(pasid) | + QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | + QI_EIOTLB_TYPE; + desc->qw1 = QI_EIOTLB_ADDR(addr) | + QI_EIOTLB_IH(ih) | + QI_EIOTLB_AM(mask); + } +} + +static inline void qi_desc_dev_iotlb_pasid(u16 sid, u16 pfsid, u32 pasid, + u16 qdep, u64 addr, + unsigned int size_order, + struct qi_desc *desc) +{ + unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); + + desc->qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | + QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | + QI_DEV_IOTLB_PFSID(pfsid); + + /* + * If S bit is 0, we only flush a single page. If S bit is set, + * The least significant zero bit indicates the invalidation address + * range. VT-d spec 6.5.2.6. + * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB. + * size order = 0 is PAGE_SIZE 4KB + * Max Invs Pending (MIP) is set to 0 for now until we have DIT in + * ECAP. + */ + if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)) + pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n", + addr, size_order); + + /* Take page address */ + desc->qw1 = QI_DEV_EIOTLB_ADDR(addr); + + if (size_order) { + /* + * Existing 0s in address below size_order may be the least + * significant bit, we must set them to 1s to avoid having + * smaller size than desired. + */ + desc->qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1, + VTD_PAGE_SHIFT); + /* Clear size_order bit to indicate size */ + desc->qw1 &= ~mask; + /* Set the S bit to indicate flushing more than 1 page */ + desc->qw1 |= QI_DEV_EIOTLB_SIZE; + } +} + /* Convert value to context PASID directory size field coding. */ #define context_pdts(pds) (((pds) & 0x7) << 9) @@ -1060,63 +1237,93 @@ void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, unsigned int count, unsigned long options); + +void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); /* * Options used in qi_submit_sync: * QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8. */ #define QI_OPT_WAIT_DRAIN BIT(0) -void domain_update_iotlb(struct dmar_domain *domain); int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void device_block_translation(struct device *dev); -int prepare_domain_attach_device(struct iommu_domain *domain, - struct device *dev); -void domain_update_iommu_cap(struct dmar_domain *domain); +int paging_domain_compatible(struct iommu_domain *domain, struct device *dev); + +struct dev_pasid_info * +domain_add_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); +void domain_remove_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); + +int __domain_setup_first_level(struct intel_iommu *iommu, + struct device *dev, ioasid_t pasid, + u16 did, pgd_t *pgd, int flags, + struct iommu_domain *old); int dmar_ir_support(void); -void *alloc_pgtable_page(int node, gfp_t gfp); -void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); -struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, - const struct iommu_user_data *user_data); +struct iommu_domain * +intel_iommu_domain_alloc_nested(struct device *dev, struct iommu_domain *parent, + u32 flags, + const struct iommu_user_data *user_data); +struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid); + +enum cache_tag_type { + CACHE_TAG_IOTLB, + CACHE_TAG_DEVTLB, + CACHE_TAG_NESTING_IOTLB, + CACHE_TAG_NESTING_DEVTLB, +}; -#ifdef CONFIG_INTEL_IOMMU_SVM -void intel_svm_check(struct intel_iommu *iommu); -int intel_svm_enable_prq(struct intel_iommu *iommu); -int intel_svm_finish_prq(struct intel_iommu *iommu); -int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, - struct iommu_page_response *msg); -struct iommu_domain *intel_svm_domain_alloc(void); -void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid); -void intel_drain_pasid_prq(struct device *dev, u32 pasid); - -struct intel_svm_dev { - struct list_head list; - struct rcu_head rcu; - struct device *dev; +struct cache_tag { + struct list_head node; + enum cache_tag_type type; struct intel_iommu *iommu; - u16 did; - u16 sid, qdep; + /* + * The @dev field represents the location of the cache. For IOTLB, it + * resides on the IOMMU hardware. @dev stores the device pointer to + * the IOMMU hardware. For DevTLB, it locates in the PCIe endpoint. + * @dev stores the device pointer to that endpoint. + */ + struct device *dev; + u16 domain_id; + ioasid_t pasid; + unsigned int users; }; -struct intel_svm { - struct mmu_notifier notifier; - struct mm_struct *mm; - u32 pasid; - struct list_head devs; -}; +int cache_tag_assign_domain(struct dmar_domain *domain, + struct device *dev, ioasid_t pasid); +void cache_tag_unassign_domain(struct dmar_domain *domain, + struct device *dev, ioasid_t pasid); +void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, + unsigned long end, int ih); +void cache_tag_flush_all(struct dmar_domain *domain); +void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, + unsigned long end); + +void intel_context_flush_present(struct device_domain_info *info, + struct context_entry *context, + u16 did, bool affect_domains); + +int intel_iommu_enable_prq(struct intel_iommu *iommu); +int intel_iommu_finish_prq(struct intel_iommu *iommu); +void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg); +void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid); + +#ifdef CONFIG_INTEL_IOMMU_SVM +void intel_svm_check(struct intel_iommu *iommu); +struct iommu_domain *intel_svm_domain_alloc(struct device *dev, + struct mm_struct *mm); #else static inline void intel_svm_check(struct intel_iommu *iommu) {} -static inline void intel_drain_pasid_prq(struct device *dev, u32 pasid) {} -static inline struct iommu_domain *intel_svm_domain_alloc(void) -{ - return NULL; -} - -static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid) +static inline struct iommu_domain *intel_svm_domain_alloc(struct device *dev, + struct mm_struct *mm) { + return ERR_PTR(-ENODEV); } #endif |