From fdbe574eb69312a7fbe09674d69c01b80e4ed9dc Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 19 Jan 2017 20:57:46 +0000 Subject: iommu/dma: Allow MSI-only cookies IOMMU domain users such as VFIO face a similar problem to DMA API ops with regard to mapping MSI messages in systems where the MSI write is subject to IOMMU translation. With the relevant infrastructure now in place for managed DMA domains, it's actually really simple for other users to piggyback off that and reap the benefits without giving up their own IOVA management, and without having to reinvent their own wheel in the MSI layer. Allow such users to opt into automatic MSI remapping by dedicating a region of their IOVA space to a managed cookie, and extend the mapping routine to implement a trivial linear allocator in such cases, to avoid the needless overhead of a full-blown IOVA domain. Signed-off-by: Robin Murphy Reviewed-by: Tomasz Nowicki Reviewed-by: Eric Auger Tested-by: Eric Auger Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- drivers/iommu/dma-iommu.c | 119 +++++++++++++++++++++++++++++++++++++--------- include/linux/dma-iommu.h | 6 +++ 2 files changed, 102 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 2db0d641cf45..de41ead6542a 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -37,15 +37,50 @@ struct iommu_dma_msi_page { phys_addr_t phys; }; +enum iommu_dma_cookie_type { + IOMMU_DMA_IOVA_COOKIE, + IOMMU_DMA_MSI_COOKIE, +}; + struct iommu_dma_cookie { - struct iova_domain iovad; - struct list_head msi_page_list; - spinlock_t msi_lock; + enum iommu_dma_cookie_type type; + union { + /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ + struct iova_domain iovad; + /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ + dma_addr_t msi_iova; + }; + struct list_head msi_page_list; + spinlock_t msi_lock; }; +static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) +{ + if (cookie->type == IOMMU_DMA_IOVA_COOKIE) + return cookie->iovad.granule; + return PAGE_SIZE; +} + static inline struct iova_domain *cookie_iovad(struct iommu_domain *domain) { - return &((struct iommu_dma_cookie *)domain->iova_cookie)->iovad; + struct iommu_dma_cookie *cookie = domain->iova_cookie; + + if (cookie->type == IOMMU_DMA_IOVA_COOKIE) + return &cookie->iovad; + return NULL; +} + +static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) +{ + struct iommu_dma_cookie *cookie; + + cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); + if (cookie) { + spin_lock_init(&cookie->msi_lock); + INIT_LIST_HEAD(&cookie->msi_page_list); + cookie->type = type; + } + return cookie; } int iommu_dma_init(void) @@ -61,26 +96,54 @@ int iommu_dma_init(void) * callback when domain->type == IOMMU_DOMAIN_DMA. */ int iommu_get_dma_cookie(struct iommu_domain *domain) +{ + if (domain->iova_cookie) + return -EEXIST; + + domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE); + if (!domain->iova_cookie) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL(iommu_get_dma_cookie); + +/** + * iommu_get_msi_cookie - Acquire just MSI remapping resources + * @domain: IOMMU domain to prepare + * @base: Start address of IOVA region for MSI mappings + * + * Users who manage their own IOVA allocation and do not want DMA API support, + * but would still like to take advantage of automatic MSI remapping, can use + * this to initialise their own domain appropriately. Users should reserve a + * contiguous IOVA region, starting at @base, large enough to accommodate the + * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address + * used by the devices attached to @domain. + */ +int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) { struct iommu_dma_cookie *cookie; + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + if (domain->iova_cookie) return -EEXIST; - cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); + cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); if (!cookie) return -ENOMEM; - spin_lock_init(&cookie->msi_lock); - INIT_LIST_HEAD(&cookie->msi_page_list); + cookie->msi_iova = base; domain->iova_cookie = cookie; return 0; } -EXPORT_SYMBOL(iommu_get_dma_cookie); +EXPORT_SYMBOL(iommu_get_msi_cookie); /** * iommu_put_dma_cookie - Release a domain's DMA mapping resources - * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() + * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or + * iommu_get_msi_cookie() * * IOMMU drivers should normally call this from their domain_free callback. */ @@ -92,7 +155,7 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) if (!cookie) return; - if (cookie->iovad.granule) + if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) put_iova_domain(&cookie->iovad); list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) { @@ -137,11 +200,12 @@ static void iova_reserve_pci_windows(struct pci_dev *dev, int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size, struct device *dev) { - struct iova_domain *iovad = cookie_iovad(domain); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; unsigned long order, base_pfn, end_pfn; - if (!iovad) - return -ENODEV; + if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) + return -EINVAL; /* Use the smallest supported page size for IOVA granularity */ order = __ffs(domain->pgsize_bitmap); @@ -662,11 +726,12 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iommu_dma_msi_page *msi_page; - struct iova_domain *iovad = &cookie->iovad; + struct iova_domain *iovad = cookie_iovad(domain); struct iova *iova; int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; + size_t size = cookie_msi_granule(cookie); - msi_addr &= ~(phys_addr_t)iova_mask(iovad); + msi_addr &= ~(phys_addr_t)(size - 1); list_for_each_entry(msi_page, &cookie->msi_page_list, list) if (msi_page->phys == msi_addr) return msi_page; @@ -675,13 +740,18 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, if (!msi_page) return NULL; - iova = __alloc_iova(domain, iovad->granule, dma_get_mask(dev)); - if (!iova) - goto out_free_page; - msi_page->phys = msi_addr; - msi_page->iova = iova_dma_addr(iovad, iova); - if (iommu_map(domain, msi_page->iova, msi_addr, iovad->granule, prot)) + if (iovad) { + iova = __alloc_iova(domain, size, dma_get_mask(dev)); + if (!iova) + goto out_free_page; + msi_page->iova = iova_dma_addr(iovad, iova); + } else { + msi_page->iova = cookie->msi_iova; + cookie->msi_iova += size; + } + + if (iommu_map(domain, msi_page->iova, msi_addr, size, prot)) goto out_free_iova; INIT_LIST_HEAD(&msi_page->list); @@ -689,7 +759,10 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, return msi_page; out_free_iova: - __free_iova(iovad, iova); + if (iovad) + __free_iova(iovad, iova); + else + cookie->msi_iova -= size; out_free_page: kfree(msi_page); return NULL; @@ -730,7 +803,7 @@ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg) msg->data = ~0U; } else { msg->address_hi = upper_32_bits(msi_page->iova); - msg->address_lo &= iova_mask(&cookie->iovad); + msg->address_lo &= cookie_msi_granule(cookie) - 1; msg->address_lo += lower_32_bits(msi_page->iova); } } diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 7f7e9a7e3839..28df844a23b6 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -27,6 +27,7 @@ int iommu_dma_init(void); /* Domain management interface for IOMMU drivers */ int iommu_get_dma_cookie(struct iommu_domain *domain); +int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); void iommu_put_dma_cookie(struct iommu_domain *domain); /* Setup call for arch DMA mapping code */ @@ -86,6 +87,11 @@ static inline int iommu_get_dma_cookie(struct iommu_domain *domain) return -ENODEV; } +static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) +{ + return -ENODEV; +} + static inline void iommu_put_dma_cookie(struct iommu_domain *domain) { } -- cgit From e5b5234a36ca283158721d3d2e0cddfa324abdf9 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:47 +0000 Subject: iommu: Rename iommu_dm_regions into iommu_resv_regions We want to extend the callbacks used for dm regions and use them for reserved regions. Reserved regions can be - directly mapped regions - regions that cannot be iommu mapped (PCI host bridge windows, ...) - MSI regions (because they belong to another address space or because they are not translated by the IOMMU and need special handling) So let's rename the struct and also the callbacks. Signed-off-by: Eric Auger Acked-by: Robin Murphy Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- drivers/iommu/amd_iommu.c | 20 ++++++++++---------- drivers/iommu/iommu.c | 22 +++++++++++----------- include/linux/iommu.h | 29 +++++++++++++++-------------- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 3ef0f42984f2..f7a024f1e1dc 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3161,8 +3161,8 @@ static bool amd_iommu_capable(enum iommu_cap cap) return false; } -static void amd_iommu_get_dm_regions(struct device *dev, - struct list_head *head) +static void amd_iommu_get_resv_regions(struct device *dev, + struct list_head *head) { struct unity_map_entry *entry; int devid; @@ -3172,7 +3172,7 @@ static void amd_iommu_get_dm_regions(struct device *dev, return; list_for_each_entry(entry, &amd_iommu_unity_map, list) { - struct iommu_dm_region *region; + struct iommu_resv_region *region; if (devid < entry->devid_start || devid > entry->devid_end) continue; @@ -3195,18 +3195,18 @@ static void amd_iommu_get_dm_regions(struct device *dev, } } -static void amd_iommu_put_dm_regions(struct device *dev, +static void amd_iommu_put_resv_regions(struct device *dev, struct list_head *head) { - struct iommu_dm_region *entry, *next; + struct iommu_resv_region *entry, *next; list_for_each_entry_safe(entry, next, head, list) kfree(entry); } -static void amd_iommu_apply_dm_region(struct device *dev, +static void amd_iommu_apply_resv_region(struct device *dev, struct iommu_domain *domain, - struct iommu_dm_region *region) + struct iommu_resv_region *region) { struct dma_ops_domain *dma_dom = to_dma_ops_domain(to_pdomain(domain)); unsigned long start, end; @@ -3230,9 +3230,9 @@ static const struct iommu_ops amd_iommu_ops = { .add_device = amd_iommu_add_device, .remove_device = amd_iommu_remove_device, .device_group = amd_iommu_device_group, - .get_dm_regions = amd_iommu_get_dm_regions, - .put_dm_regions = amd_iommu_put_dm_regions, - .apply_dm_region = amd_iommu_apply_dm_region, + .get_resv_regions = amd_iommu_get_resv_regions, + .put_resv_regions = amd_iommu_put_resv_regions, + .apply_resv_region = amd_iommu_apply_resv_region, .pgsize_bitmap = AMD_IOMMU_PGSIZES, }; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index dbe7f653bb7c..1cee5c361c21 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -318,7 +318,7 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, struct device *dev) { struct iommu_domain *domain = group->default_domain; - struct iommu_dm_region *entry; + struct iommu_resv_region *entry; struct list_head mappings; unsigned long pg_size; int ret = 0; @@ -331,14 +331,14 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, pg_size = 1UL << __ffs(domain->pgsize_bitmap); INIT_LIST_HEAD(&mappings); - iommu_get_dm_regions(dev, &mappings); + iommu_get_resv_regions(dev, &mappings); /* We need to consider overlapping regions for different devices */ list_for_each_entry(entry, &mappings, list) { dma_addr_t start, end, addr; - if (domain->ops->apply_dm_region) - domain->ops->apply_dm_region(dev, domain, entry); + if (domain->ops->apply_resv_region) + domain->ops->apply_resv_region(dev, domain, entry); start = ALIGN(entry->start, pg_size); end = ALIGN(entry->start + entry->length, pg_size); @@ -358,7 +358,7 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, } out: - iommu_put_dm_regions(dev, &mappings); + iommu_put_resv_regions(dev, &mappings); return ret; } @@ -1559,20 +1559,20 @@ int iommu_domain_set_attr(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_domain_set_attr); -void iommu_get_dm_regions(struct device *dev, struct list_head *list) +void iommu_get_resv_regions(struct device *dev, struct list_head *list) { const struct iommu_ops *ops = dev->bus->iommu_ops; - if (ops && ops->get_dm_regions) - ops->get_dm_regions(dev, list); + if (ops && ops->get_resv_regions) + ops->get_resv_regions(dev, list); } -void iommu_put_dm_regions(struct device *dev, struct list_head *list) +void iommu_put_resv_regions(struct device *dev, struct list_head *list) { const struct iommu_ops *ops = dev->bus->iommu_ops; - if (ops && ops->put_dm_regions) - ops->put_dm_regions(dev, list); + if (ops && ops->put_resv_regions) + ops->put_resv_regions(dev, list); } /* Request that a device is direct mapped by the IOMMU */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0ff5111f6959..bfecb8b74078 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -118,13 +118,13 @@ enum iommu_attr { }; /** - * struct iommu_dm_region - descriptor for a direct mapped memory region + * struct iommu_resv_region - descriptor for a reserved memory region * @list: Linked list pointers * @start: System physical start address of the region * @length: Length of the region in bytes * @prot: IOMMU Protection flags (READ/WRITE/...) */ -struct iommu_dm_region { +struct iommu_resv_region { struct list_head list; phys_addr_t start; size_t length; @@ -150,9 +150,9 @@ struct iommu_dm_region { * @device_group: find iommu group for a particular device * @domain_get_attr: Query domain attributes * @domain_set_attr: Change domain attributes - * @get_dm_regions: Request list of direct mapping requirements for a device - * @put_dm_regions: Free list of direct mapping requirements for a device - * @apply_dm_region: Temporary helper call-back for iova reserved ranges + * @get_resv_regions: Request list of reserved regions for a device + * @put_resv_regions: Free list of reserved regions for a device + * @apply_resv_region: Temporary helper call-back for iova reserved ranges * @domain_window_enable: Configure and enable a particular window for a domain * @domain_window_disable: Disable a particular window for a domain * @domain_set_windows: Set the number of windows for a domain @@ -184,11 +184,12 @@ struct iommu_ops { int (*domain_set_attr)(struct iommu_domain *domain, enum iommu_attr attr, void *data); - /* Request/Free a list of direct mapping requirements for a device */ - void (*get_dm_regions)(struct device *dev, struct list_head *list); - void (*put_dm_regions)(struct device *dev, struct list_head *list); - void (*apply_dm_region)(struct device *dev, struct iommu_domain *domain, - struct iommu_dm_region *region); + /* Request/Free a list of reserved regions for a device */ + void (*get_resv_regions)(struct device *dev, struct list_head *list); + void (*put_resv_regions)(struct device *dev, struct list_head *list); + void (*apply_resv_region)(struct device *dev, + struct iommu_domain *domain, + struct iommu_resv_region *region); /* Window handling functions */ int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, @@ -233,8 +234,8 @@ extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t io extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); -extern void iommu_get_dm_regions(struct device *dev, struct list_head *list); -extern void iommu_put_dm_regions(struct device *dev, struct list_head *list); +extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); +extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); extern int iommu_attach_group(struct iommu_domain *domain, @@ -443,12 +444,12 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain, { } -static inline void iommu_get_dm_regions(struct device *dev, +static inline void iommu_get_resv_regions(struct device *dev, struct list_head *list) { } -static inline void iommu_put_dm_regions(struct device *dev, +static inline void iommu_put_resv_regions(struct device *dev, struct list_head *list) { } -- cgit From d30ddcaa7b028049cdfee3a40248002d07b2bbf3 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:48 +0000 Subject: iommu: Add a new type field in iommu_resv_region We introduce a new field to differentiate the reserved region types and specialize the apply_resv_region implementation. Legacy direct mapped regions have IOMMU_RESV_DIRECT type. We introduce 2 new reserved memory types: - IOMMU_RESV_MSI will characterize MSI regions that are mapped - IOMMU_RESV_RESERVED characterize regions that cannot by mapped. Signed-off-by: Eric Auger Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- drivers/iommu/amd_iommu.c | 1 + include/linux/iommu.h | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index f7a024f1e1dc..5f7ea4faa505 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3186,6 +3186,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, region->start = entry->address_start; region->length = entry->address_end - entry->address_start; + region->type = IOMMU_RESV_DIRECT; if (entry->prot & IOMMU_PROT_IR) region->prot |= IOMMU_READ; if (entry->prot & IOMMU_PROT_IW) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bfecb8b74078..233a6bf093bf 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -117,18 +117,25 @@ enum iommu_attr { DOMAIN_ATTR_MAX, }; +/* These are the possible reserved region types */ +#define IOMMU_RESV_DIRECT (1 << 0) +#define IOMMU_RESV_RESERVED (1 << 1) +#define IOMMU_RESV_MSI (1 << 2) + /** * struct iommu_resv_region - descriptor for a reserved memory region * @list: Linked list pointers * @start: System physical start address of the region * @length: Length of the region in bytes * @prot: IOMMU Protection flags (READ/WRITE/...) + * @type: Type of the reserved region */ struct iommu_resv_region { struct list_head list; phys_addr_t start; size_t length; int prot; + int type; }; #ifdef CONFIG_IOMMU_API -- cgit From 2b20cbba3390a55c511acba2f0f517dd27a528b2 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:49 +0000 Subject: iommu: iommu_alloc_resv_region Introduce a new helper serving the purpose to allocate a reserved region. This will be used in iommu driver implementing reserved region callbacks. Signed-off-by: Eric Auger Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- drivers/iommu/iommu.c | 18 ++++++++++++++++++ include/linux/iommu.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 1cee5c361c21..927878d0a612 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1575,6 +1575,24 @@ void iommu_put_resv_regions(struct device *dev, struct list_head *list) ops->put_resv_regions(dev, list); } +struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, + size_t length, + int prot, int type) +{ + struct iommu_resv_region *region; + + region = kzalloc(sizeof(*region), GFP_KERNEL); + if (!region) + return NULL; + + INIT_LIST_HEAD(®ion->list); + region->start = start; + region->length = length; + region->prot = prot; + region->type = type; + return region; +} + /* Request that a device is direct mapped by the IOMMU */ int iommu_request_dm_for_dev(struct device *dev) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 233a6bf093bf..f6bb55d3e606 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -244,6 +244,8 @@ extern void iommu_set_fault_handler(struct iommu_domain *domain, extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); +extern struct iommu_resv_region * +iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type); extern int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group); -- cgit From 544a25d904cab2ae68bd71b334603ec3a49b60dd Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:50 +0000 Subject: iommu: Only map direct mapped regions As we introduced new reserved region types which do not require mapping, let's make sure we only map direct mapped regions. Signed-off-by: Eric Auger Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- drivers/iommu/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 927878d0a612..41c190695749 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -343,6 +343,9 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, start = ALIGN(entry->start, pg_size); end = ALIGN(entry->start + entry->length, pg_size); + if (entry->type != IOMMU_RESV_DIRECT) + continue; + for (addr = start; addr < end; addr += pg_size) { phys_addr_t phys_addr; -- cgit From 6c65fb318e8bbf21e939e651028b955324f1d873 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:51 +0000 Subject: iommu: iommu_get_group_resv_regions Introduce iommu_get_group_resv_regions whose role consists in enumerating all devices from the group and collecting their reserved regions. The list is sorted and overlaps between regions of the same type are handled by merging the regions. Signed-off-by: Eric Auger Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- drivers/iommu/iommu.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/iommu.h | 8 +++++ 2 files changed, 106 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 41c190695749..640056ba46c2 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -133,6 +133,104 @@ static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) return sprintf(buf, "%s\n", group->name); } +/** + * iommu_insert_resv_region - Insert a new region in the + * list of reserved regions. + * @new: new region to insert + * @regions: list of regions + * + * The new element is sorted by address with respect to the other + * regions of the same type. In case it overlaps with another + * region of the same type, regions are merged. In case it + * overlaps with another region of different type, regions are + * not merged. + */ +static int iommu_insert_resv_region(struct iommu_resv_region *new, + struct list_head *regions) +{ + struct iommu_resv_region *region; + phys_addr_t start = new->start; + phys_addr_t end = new->start + new->length - 1; + struct list_head *pos = regions->next; + + while (pos != regions) { + struct iommu_resv_region *entry = + list_entry(pos, struct iommu_resv_region, list); + phys_addr_t a = entry->start; + phys_addr_t b = entry->start + entry->length - 1; + int type = entry->type; + + if (end < a) { + goto insert; + } else if (start > b) { + pos = pos->next; + } else if ((start >= a) && (end <= b)) { + if (new->type == type) + goto done; + else + pos = pos->next; + } else { + if (new->type == type) { + phys_addr_t new_start = min(a, start); + phys_addr_t new_end = max(b, end); + + list_del(&entry->list); + entry->start = new_start; + entry->length = new_end - new_start + 1; + iommu_insert_resv_region(entry, regions); + } else { + pos = pos->next; + } + } + } +insert: + region = iommu_alloc_resv_region(new->start, new->length, + new->prot, new->type); + if (!region) + return -ENOMEM; + + list_add_tail(®ion->list, pos); +done: + return 0; +} + +static int +iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, + struct list_head *group_resv_regions) +{ + struct iommu_resv_region *entry; + int ret; + + list_for_each_entry(entry, dev_resv_regions, list) { + ret = iommu_insert_resv_region(entry, group_resv_regions); + if (ret) + break; + } + return ret; +} + +int iommu_get_group_resv_regions(struct iommu_group *group, + struct list_head *head) +{ + struct iommu_device *device; + int ret = 0; + + mutex_lock(&group->mutex); + list_for_each_entry(device, &group->devices, list) { + struct list_head dev_resv_regions; + + INIT_LIST_HEAD(&dev_resv_regions); + iommu_get_resv_regions(device->dev, &dev_resv_regions); + ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); + iommu_put_resv_regions(device->dev, &dev_resv_regions); + if (ret) + break; + } + mutex_unlock(&group->mutex); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); + static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); static void iommu_group_release(struct kobject *kobj) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f6bb55d3e606..bec3730dc009 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -246,6 +246,8 @@ extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); extern struct iommu_resv_region * iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type); +extern int iommu_get_group_resv_regions(struct iommu_group *group, + struct list_head *head); extern int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group); @@ -463,6 +465,12 @@ static inline void iommu_put_resv_regions(struct device *dev, { } +static inline int iommu_get_group_resv_regions(struct iommu_group *group, + struct list_head *head) +{ + return -ENODEV; +} + static inline int iommu_request_dm_for_dev(struct device *dev) { return -ENODEV; -- cgit From bc7d12b91bd35477fd650c4d72b61239de9d9066 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:52 +0000 Subject: iommu: Implement reserved_regions iommu-group sysfs file A new iommu-group sysfs attribute file is introduced. It contains the list of reserved regions for the iommu-group. Each reserved region is described on a separate line: - first field is the start IOVA address, - second is the end IOVA address, - third is the type. Signed-off-by: Eric Auger Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- .../ABI/testing/sysfs-kernel-iommu_groups | 12 ++++++++ drivers/iommu/iommu.c | 36 ++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups index 9b31556cfdda..35c64e00b35c 100644 --- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups +++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups @@ -12,3 +12,15 @@ Description: /sys/kernel/iommu_groups/ contains a number of sub- file if the IOMMU driver has chosen to register a more common name for the group. Users: + +What: /sys/kernel/iommu_groups/reserved_regions +Date: January 2017 +KernelVersion: v4.11 +Contact: Eric Auger +Description: /sys/kernel/iommu_groups/reserved_regions list IOVA + regions that are reserved. Not necessarily all + reserved regions are listed. This is typically used to + output direct-mapped, MSI, non mappable regions. Each + region is described on a single line: the 1st field is + the base IOVA, the second is the end IOVA and the third + field describes the type of the region. diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 640056ba46c2..f4a176e56e39 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -68,6 +68,12 @@ struct iommu_group_attribute { const char *buf, size_t count); }; +static const char * const iommu_group_resv_type_string[] = { + [IOMMU_RESV_DIRECT] = "direct", + [IOMMU_RESV_RESERVED] = "reserved", + [IOMMU_RESV_MSI] = "msi", +}; + #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ struct iommu_group_attribute iommu_group_attr_##_name = \ __ATTR(_name, _mode, _show, _store) @@ -231,8 +237,33 @@ int iommu_get_group_resv_regions(struct iommu_group *group, } EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); +static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, + char *buf) +{ + struct iommu_resv_region *region, *next; + struct list_head group_resv_regions; + char *str = buf; + + INIT_LIST_HEAD(&group_resv_regions); + iommu_get_group_resv_regions(group, &group_resv_regions); + + list_for_each_entry_safe(region, next, &group_resv_regions, list) { + str += sprintf(str, "0x%016llx 0x%016llx %s\n", + (long long int)region->start, + (long long int)(region->start + + region->length - 1), + iommu_group_resv_type_string[region->type]); + kfree(region); + } + + return (str - buf); +} + static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); +static IOMMU_GROUP_ATTR(reserved_regions, 0444, + iommu_group_show_resv_regions, NULL); + static void iommu_group_release(struct kobject *kobj) { struct iommu_group *group = to_iommu_group(kobj); @@ -310,6 +341,11 @@ struct iommu_group *iommu_group_alloc(void) */ kobject_put(&group->kobj); + ret = iommu_group_create_file(group, + &iommu_group_attr_reserved_regions); + if (ret) + return ERR_PTR(ret); + pr_debug("Allocated group %d\n", group->id); return group; -- cgit From 0659b8dc45a6b13a4fec73b2ebb51c96b41974c4 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:53 +0000 Subject: iommu/vt-d: Implement reserved region get/put callbacks This patch registers the [FEE0_0000h - FEF0_000h] 1MB MSI range as a reserved region and RMRR regions as direct regions. This will allow to report those reserved regions in the iommu-group sysfs. Signed-off-by: Eric Auger Signed-off-by: Will Deacon --- drivers/iommu/intel-iommu.c | 92 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 74 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 8a185250ae5a..bce59a53c2a6 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -440,6 +440,7 @@ struct dmar_rmrr_unit { u64 end_address; /* reserved end address */ struct dmar_dev_scope *devices; /* target devices */ int devices_cnt; /* target device count */ + struct iommu_resv_region *resv; /* reserved region handle */ }; struct dmar_atsr_unit { @@ -4246,27 +4247,40 @@ static inline void init_iommu_pm_ops(void) {} int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) { struct acpi_dmar_reserved_memory *rmrr; + int prot = DMA_PTE_READ|DMA_PTE_WRITE; struct dmar_rmrr_unit *rmrru; + size_t length; rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); if (!rmrru) - return -ENOMEM; + goto out; rmrru->hdr = header; rmrr = (struct acpi_dmar_reserved_memory *)header; rmrru->base_address = rmrr->base_address; rmrru->end_address = rmrr->end_address; + + length = rmrr->end_address - rmrr->base_address + 1; + rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot, + IOMMU_RESV_DIRECT); + if (!rmrru->resv) + goto free_rmrru; + rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), ((void *)rmrr) + rmrr->header.length, &rmrru->devices_cnt); - if (rmrru->devices_cnt && rmrru->devices == NULL) { - kfree(rmrru); - return -ENOMEM; - } + if (rmrru->devices_cnt && rmrru->devices == NULL) + goto free_all; list_add(&rmrru->list, &dmar_rmrr_units); return 0; +free_all: + kfree(rmrru->resv); +free_rmrru: + kfree(rmrru); +out: + return -ENOMEM; } static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr) @@ -4480,6 +4494,7 @@ static void intel_iommu_free_dmars(void) list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { list_del(&rmrru->list); dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); + kfree(rmrru->resv); kfree(rmrru); } @@ -5203,6 +5218,45 @@ static void intel_iommu_remove_device(struct device *dev) iommu_device_unlink(iommu->iommu_dev, dev); } +static void intel_iommu_get_resv_regions(struct device *device, + struct list_head *head) +{ + struct iommu_resv_region *reg; + struct dmar_rmrr_unit *rmrr; + struct device *i_dev; + int i; + + rcu_read_lock(); + for_each_rmrr_units(rmrr) { + for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, + i, i_dev) { + if (i_dev != device) + continue; + + list_add_tail(&rmrr->resv->list, head); + } + } + rcu_read_unlock(); + + reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, + IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, + 0, IOMMU_RESV_RESERVED); + if (!reg) + return; + list_add_tail(®->list, head); +} + +static void intel_iommu_put_resv_regions(struct device *dev, + struct list_head *head) +{ + struct iommu_resv_region *entry, *next; + + list_for_each_entry_safe(entry, next, head, list) { + if (entry->type == IOMMU_RESV_RESERVED) + kfree(entry); + } +} + #ifdef CONFIG_INTEL_IOMMU_SVM #define MAX_NR_PASID_BITS (20) static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu) @@ -5333,19 +5387,21 @@ struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) #endif /* CONFIG_INTEL_IOMMU_SVM */ static const struct iommu_ops intel_iommu_ops = { - .capable = intel_iommu_capable, - .domain_alloc = intel_iommu_domain_alloc, - .domain_free = intel_iommu_domain_free, - .attach_dev = intel_iommu_attach_device, - .detach_dev = intel_iommu_detach_device, - .map = intel_iommu_map, - .unmap = intel_iommu_unmap, - .map_sg = default_iommu_map_sg, - .iova_to_phys = intel_iommu_iova_to_phys, - .add_device = intel_iommu_add_device, - .remove_device = intel_iommu_remove_device, - .device_group = pci_device_group, - .pgsize_bitmap = INTEL_IOMMU_PGSIZES, + .capable = intel_iommu_capable, + .domain_alloc = intel_iommu_domain_alloc, + .domain_free = intel_iommu_domain_free, + .attach_dev = intel_iommu_attach_device, + .detach_dev = intel_iommu_detach_device, + .map = intel_iommu_map, + .unmap = intel_iommu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = intel_iommu_iova_to_phys, + .add_device = intel_iommu_add_device, + .remove_device = intel_iommu_remove_device, + .get_resv_regions = intel_iommu_get_resv_regions, + .put_resv_regions = intel_iommu_put_resv_regions, + .device_group = pci_device_group, + .pgsize_bitmap = INTEL_IOMMU_PGSIZES, }; static void quirk_iommu_g4x_gfx(struct pci_dev *dev) -- cgit From 4397f32c03a757acb3e44d268c20233fa1758ed9 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:54 +0000 Subject: iommu/amd: Declare MSI and HT regions as reserved IOVA regions This patch registers the MSI and HT regions as non mappable reserved regions. They will be exposed in the iommu-group sysfs. For direct-mapped regions let's also use iommu_alloc_resv_region(). Signed-off-by: Eric Auger Signed-off-by: Will Deacon --- drivers/iommu/amd_iommu.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 5f7ea4faa505..d109e41204e8 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3164,6 +3164,7 @@ static bool amd_iommu_capable(enum iommu_cap cap) static void amd_iommu_get_resv_regions(struct device *dev, struct list_head *head) { + struct iommu_resv_region *region; struct unity_map_entry *entry; int devid; @@ -3172,28 +3173,42 @@ static void amd_iommu_get_resv_regions(struct device *dev, return; list_for_each_entry(entry, &amd_iommu_unity_map, list) { - struct iommu_resv_region *region; + size_t length; + int prot = 0; if (devid < entry->devid_start || devid > entry->devid_end) continue; - region = kzalloc(sizeof(*region), GFP_KERNEL); + length = entry->address_end - entry->address_start; + if (entry->prot & IOMMU_PROT_IR) + prot |= IOMMU_READ; + if (entry->prot & IOMMU_PROT_IW) + prot |= IOMMU_WRITE; + + region = iommu_alloc_resv_region(entry->address_start, + length, prot, + IOMMU_RESV_DIRECT); if (!region) { pr_err("Out of memory allocating dm-regions for %s\n", dev_name(dev)); return; } - - region->start = entry->address_start; - region->length = entry->address_end - entry->address_start; - region->type = IOMMU_RESV_DIRECT; - if (entry->prot & IOMMU_PROT_IR) - region->prot |= IOMMU_READ; - if (entry->prot & IOMMU_PROT_IW) - region->prot |= IOMMU_WRITE; - list_add_tail(®ion->list, head); } + + region = iommu_alloc_resv_region(MSI_RANGE_START, + MSI_RANGE_END - MSI_RANGE_START + 1, + 0, IOMMU_RESV_RESERVED); + if (!region) + return; + list_add_tail(®ion->list, head); + + region = iommu_alloc_resv_region(HT_RANGE_START, + HT_RANGE_END - HT_RANGE_START + 1, + 0, IOMMU_RESV_RESERVED); + if (!region) + return; + list_add_tail(®ion->list, head); } static void amd_iommu_put_resv_regions(struct device *dev, -- cgit From f3ebee80b3131d4cdb49d39cf07b32afe40b811c Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:55 +0000 Subject: iommu/arm-smmu: Implement reserved region get/put callbacks The get() populates the list with the MSI IOVA reserved window. At the moment an arbitray MSI IOVA window is set at 0x8000000 of size 1MB. This will allow to report those info in iommu-group sysfs. Signed-off-by: Eric Auger Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Acked-by: Will Deacon Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index a60cded8a6ed..a354572491c8 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -281,6 +281,9 @@ enum arm_smmu_s2cr_privcfg { #define FSYNR0_WNR (1 << 4) +#define MSI_IOVA_BASE 0x8000000 +#define MSI_IOVA_LENGTH 0x100000 + static int force_stage; module_param(force_stage, int, S_IRUGO); MODULE_PARM_DESC(force_stage, @@ -1549,6 +1552,29 @@ static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) return iommu_fwspec_add_ids(dev, &fwid, 1); } +static void arm_smmu_get_resv_regions(struct device *dev, + struct list_head *head) +{ + struct iommu_resv_region *region; + int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; + + region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, + prot, IOMMU_RESV_MSI); + if (!region) + return; + + list_add_tail(®ion->list, head); +} + +static void arm_smmu_put_resv_regions(struct device *dev, + struct list_head *head) +{ + struct iommu_resv_region *entry, *next; + + list_for_each_entry_safe(entry, next, head, list) + kfree(entry); +} + static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, @@ -1564,6 +1590,8 @@ static struct iommu_ops arm_smmu_ops = { .domain_get_attr = arm_smmu_domain_get_attr, .domain_set_attr = arm_smmu_domain_set_attr, .of_xlate = arm_smmu_of_xlate, + .get_resv_regions = arm_smmu_get_resv_regions, + .put_resv_regions = arm_smmu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during device attach */ }; -- cgit From 50019f09a4baa0bd6635d4933c1bfed22d66b640 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:56 +0000 Subject: iommu/arm-smmu-v3: Implement reserved region get/put callbacks The get() populates the list with the MSI IOVA reserved window. At the moment an arbitray MSI IOVA window is set at 0x8000000 of size 1MB. This will allow to report those info in iommu-group sysfs. Signed-off-by: Eric Auger Acked-by: Will Deacon Reviewed-by: Tomasz Nowicki Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 4d6ec444a9d6..6c4111cb19ff 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -412,6 +412,9 @@ /* High-level queue structures */ #define ARM_SMMU_POLL_TIMEOUT_US 100 +#define MSI_IOVA_BASE 0x8000000 +#define MSI_IOVA_LENGTH 0x100000 + static bool disable_bypass; module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); MODULE_PARM_DESC(disable_bypass, @@ -1883,6 +1886,29 @@ static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) return iommu_fwspec_add_ids(dev, args->args, 1); } +static void arm_smmu_get_resv_regions(struct device *dev, + struct list_head *head) +{ + struct iommu_resv_region *region; + int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; + + region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, + prot, IOMMU_RESV_MSI); + if (!region) + return; + + list_add_tail(®ion->list, head); +} + +static void arm_smmu_put_resv_regions(struct device *dev, + struct list_head *head) +{ + struct iommu_resv_region *entry, *next; + + list_for_each_entry_safe(entry, next, head, list) + kfree(entry); +} + static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, @@ -1898,6 +1924,8 @@ static struct iommu_ops arm_smmu_ops = { .domain_get_attr = arm_smmu_domain_get_attr, .domain_set_attr = arm_smmu_domain_set_attr, .of_xlate = arm_smmu_of_xlate, + .get_resv_regions = arm_smmu_get_resv_regions, + .put_resv_regions = arm_smmu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during device attach */ }; -- cgit From 631a9639ac413da6242cb15558ebd661cf633622 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:57 +0000 Subject: irqdomain: Add irq domain MSI and MSI_REMAP flags We introduce two new enum values for the irq domain flag: - IRQ_DOMAIN_FLAG_MSI indicates the irq domain corresponds to an MSI domain - IRQ_DOMAIN_FLAG_MSI_REMAP indicates the irq domain has MSI remapping capabilities. Those values will be useful to check all MSI irq domains have MSI remapping support when assessing the safety of IRQ assignment to a guest. irq_domain_hierarchical_is_msi_remap() allows to check if an irq domain or any parent implements MSI remapping. Signed-off-by: Eric Auger Reviewed-by: Marc Zyngier Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- include/linux/irqdomain.h | 35 +++++++++++++++++++++++++++++++++++ kernel/irq/irqdomain.c | 14 ++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index ffb84604c1de..bc2f5719dace 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -183,6 +183,12 @@ enum { /* Irq domain is an IPI domain with single virq */ IRQ_DOMAIN_FLAG_IPI_SINGLE = (1 << 3), + /* Irq domain implements MSIs */ + IRQ_DOMAIN_FLAG_MSI = (1 << 4), + + /* Irq domain implements MSI remapping */ + IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -446,6 +452,19 @@ static inline bool irq_domain_is_ipi_single(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_IPI_SINGLE; } + +static inline bool irq_domain_is_msi(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI; +} + +static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP; +} + +extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain); + #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ static inline void irq_domain_activate_irq(struct irq_data *data) { } static inline void irq_domain_deactivate_irq(struct irq_data *data) { } @@ -477,6 +496,22 @@ static inline bool irq_domain_is_ipi_single(struct irq_domain *domain) { return false; } + +static inline bool irq_domain_is_msi(struct irq_domain *domain) +{ + return false; +} + +static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) +{ + return false; +} + +static inline bool +irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) +{ + return false; +} #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #else /* CONFIG_IRQ_DOMAIN */ diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 8c0a0ae43521..876e13172dc8 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1392,6 +1392,20 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain) if (domain->ops->alloc) domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY; } + +/** + * irq_domain_hierarchical_is_msi_remap - Check if the domain or any + * parent has MSI remapping support + * @domain: domain pointer + */ +bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) +{ + for (; domain; domain = domain->parent) { + if (irq_domain_is_msi_remap(domain)) + return true; + } + return false; +} #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ /** * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain -- cgit From 88156f00904183d99e19269fbdb5cb56dc1522c3 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:58 +0000 Subject: genirq/msi: Set IRQ_DOMAIN_FLAG_MSI on MSI domain creation Now we have a flag value indicating an IRQ domain implements MSI, let's set it on msi_create_irq_domain(). Signed-off-by: Eric Auger Reviewed-by: Marc Zyngier Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- kernel/irq/msi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index ee230063f033..ddc2f5427f75 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -270,8 +270,8 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) msi_domain_update_chip_ops(info); - return irq_domain_create_hierarchy(parent, 0, 0, fwnode, - &msi_domain_ops, info); + return irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0, + fwnode, &msi_domain_ops, info); } int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, -- cgit From c7b41f0af38f53e46050b56a5b0e96710097b83c Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:59 +0000 Subject: irqdomain: irq_domain_check_msi_remap This new function checks whether all MSI irq domains implement IRQ remapping. This is useful to understand whether VFIO passthrough is safe with respect to interrupts. On ARM typically an MSI controller can sit downstream to the IOMMU without preventing VFIO passthrough. As such any assigned device can write into the MSI doorbell. In case the MSI controller implements IRQ remapping, assigned devices will not be able to trigger interrupts towards the host. On the contrary, the assignment must be emphasized as unsafe with respect to interrupts. Signed-off-by: Eric Auger Reviewed-by: Marc Zyngier Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- include/linux/irqdomain.h | 1 + kernel/irq/irqdomain.c | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index bc2f5719dace..188eced6813e 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -222,6 +222,7 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, void *host_data); extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); +extern bool irq_domain_check_msi_remap(void); extern void irq_set_default_host(struct irq_domain *host); extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node, diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 876e13172dc8..80c4f9312187 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -277,6 +277,31 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, } EXPORT_SYMBOL_GPL(irq_find_matching_fwspec); +/** + * irq_domain_check_msi_remap - Check whether all MSI irq domains implement + * IRQ remapping + * + * Return: false if any MSI irq domain does not support IRQ remapping, + * true otherwise (including if there is no MSI irq domain) + */ +bool irq_domain_check_msi_remap(void) +{ + struct irq_domain *h; + bool ret = true; + + mutex_lock(&irq_domain_mutex); + list_for_each_entry(h, &irq_domain_list, link) { + if (irq_domain_is_msi(h) && + !irq_domain_hierarchical_is_msi_remap(h)) { + ret = false; + break; + } + } + mutex_unlock(&irq_domain_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(irq_domain_check_msi_remap); + /** * irq_set_default_host() - Set a "default" irq domain * @domain: default domain pointer -- cgit From 59768527db3de41afb0586e677e322012e6a392b Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:58:00 +0000 Subject: irqchip/gicv3-its: Sets IRQ_DOMAIN_FLAG_MSI_REMAP The GICv3 ITS is MSI remapping capable. Let's advertise this property so that VFIO passthrough can assess IRQ safety. Signed-off-by: Eric Auger Reviewed-by: Marc Zyngier Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- drivers/irqchip/irq-gic-v3-its.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 69b040f47d56..9d4fefc59827 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1642,6 +1642,7 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) inner_domain->parent = its_parent; inner_domain->bus_token = DOMAIN_BUS_NEXUS; + inner_domain->flags |= IRQ_DOMAIN_FLAG_MSI_REMAP; info->ops = &its_msi_domain_ops; info->data = its; inner_domain->host_data = info; -- cgit From 5d704992189fe8683a23f583a2f4f137a9b9d28b Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:58:01 +0000 Subject: vfio/type1: Allow transparent MSI IOVA allocation When attaching a group to the container, check the group's reserved regions and test whether the IOMMU translates MSI transactions. If yes, we initialize an IOVA allocator through the iommu_get_msi_cookie API. This will allow the MSI IOVAs to be transparently allocated on MSI controller's compose(). Signed-off-by: Eric Auger Acked-by: Alex Williamson Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- drivers/vfio/vfio_iommu_type1.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 9266271a787a..5651fafe8cfb 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -39,6 +39,7 @@ #include #include #include +#include #define DRIVER_VERSION "0.2" #define DRIVER_AUTHOR "Alex Williamson " @@ -1181,6 +1182,28 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain, return NULL; } +static bool vfio_iommu_has_resv_msi(struct iommu_group *group, + phys_addr_t *base) +{ + struct list_head group_resv_regions; + struct iommu_resv_region *region, *next; + bool ret = false; + + INIT_LIST_HEAD(&group_resv_regions); + iommu_get_group_resv_regions(group, &group_resv_regions); + list_for_each_entry(region, &group_resv_regions, list) { + if (region->type & IOMMU_RESV_MSI) { + *base = region->start; + ret = true; + goto out; + } + } +out: + list_for_each_entry_safe(region, next, &group_resv_regions, list) + kfree(region); + return ret; +} + static int vfio_iommu_type1_attach_group(void *iommu_data, struct iommu_group *iommu_group) { @@ -1189,6 +1212,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, struct vfio_domain *domain, *d; struct bus_type *bus = NULL, *mdev_bus; int ret; + bool resv_msi; + phys_addr_t resv_msi_base; mutex_lock(&iommu->lock); @@ -1258,6 +1283,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (ret) goto out_domain; + resv_msi = vfio_iommu_has_resv_msi(iommu_group, &resv_msi_base); + INIT_LIST_HEAD(&domain->group_list); list_add(&group->next, &domain->group_list); @@ -1304,6 +1331,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (ret) goto out_detach; + if (resv_msi && iommu_get_msi_cookie(domain->domain, resv_msi_base)) + goto out_detach; + list_add(&domain->next, &iommu->domain_list); mutex_unlock(&iommu->lock); -- cgit From 9d72f87babf144ff3ca5d85655c710de05110038 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:58:02 +0000 Subject: vfio/type1: Check MSI remapping at irq domain level In case the IOMMU translates MSI transactions (typical case on ARM), we check MSI remapping capability at IRQ domain level. Otherwise it is checked at IOMMU level. At this stage the arm-smmu-(v3) still advertise the IOMMU_CAP_INTR_REMAP capability at IOMMU level. This will be removed in subsequent patches. Signed-off-by: Eric Auger Acked-by: Alex Williamson Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- drivers/vfio/vfio_iommu_type1.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 5651fafe8cfb..ec903a005fae 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -40,6 +40,7 @@ #include #include #include +#include #define DRIVER_VERSION "0.2" #define DRIVER_AUTHOR "Alex Williamson " @@ -1212,7 +1213,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, struct vfio_domain *domain, *d; struct bus_type *bus = NULL, *mdev_bus; int ret; - bool resv_msi; + bool resv_msi, msi_remap; phys_addr_t resv_msi_base; mutex_lock(&iommu->lock); @@ -1288,8 +1289,10 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, INIT_LIST_HEAD(&domain->group_list); list_add(&group->next, &domain->group_list); - if (!allow_unsafe_interrupts && - !iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) { + msi_remap = resv_msi ? irq_domain_check_msi_remap() : + iommu_capable(bus, IOMMU_CAP_INTR_REMAP); + + if (!allow_unsafe_interrupts && !msi_remap) { pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n", __func__); ret = -EPERM; -- cgit From 5018c8d5ef0c172592eb98cf10e253d47b544ba8 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:58:03 +0000 Subject: iommu/arm-smmu: Do not advertise IOMMU_CAP_INTR_REMAP anymore IOMMU_CAP_INTR_REMAP has been advertised in arm-smmu(-v3) although on ARM this property is not attached to the IOMMU but rather is implemented in the MSI controller (GICv3 ITS). Now vfio_iommu_type1 checks MSI remapping capability at MSI controller level, let's correct this. Signed-off-by: Eric Auger Acked-by: Will Deacon Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 2 -- drivers/iommu/arm-smmu.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 6c4111cb19ff..d9cf6cb8c6cc 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1375,8 +1375,6 @@ static bool arm_smmu_capable(enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return true; - case IOMMU_CAP_INTR_REMAP: - return true; /* MSIs are just memory writes */ case IOMMU_CAP_NOEXEC: return true; default: diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index a354572491c8..13d26009b8e0 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1374,8 +1374,6 @@ static bool arm_smmu_capable(enum iommu_cap cap) * requests. */ return true; - case IOMMU_CAP_INTR_REMAP: - return true; /* MSIs are just memory writes */ case IOMMU_CAP_NOEXEC: return true; default: -- cgit