diff options
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/Kconfig | 4 | ||||
-rw-r--r-- | drivers/iommu/Makefile | 3 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu.c | 12 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_quirks.c | 13 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_types.h | 4 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu-impl.c | 5 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu-qcom.c | 51 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu-v3.c | 10 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu.c | 222 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu.h | 16 | ||||
-rw-r--r-- | drivers/iommu/dmar.c | 5 | ||||
-rw-r--r-- | drivers/iommu/intel-iommu.c | 71 | ||||
-rw-r--r-- | drivers/iommu/io-pgtable-arm-v7s.c | 15 | ||||
-rw-r--r-- | drivers/iommu/io-pgtable-arm.c | 180 | ||||
-rw-r--r-- | drivers/iommu/ioasid.c | 422 | ||||
-rw-r--r-- | drivers/iommu/iommu.c | 30 | ||||
-rw-r--r-- | drivers/iommu/ipmmu-vmsa.c | 228 | ||||
-rw-r--r-- | drivers/iommu/mtk_iommu.c | 88 | ||||
-rw-r--r-- | drivers/iommu/mtk_iommu.h | 2 | ||||
-rw-r--r-- | drivers/iommu/qcom_iommu.c | 8 | ||||
-rw-r--r-- | drivers/iommu/rockchip-iommu.c | 28 | ||||
-rw-r--r-- | drivers/iommu/tegra-smmu.c | 36 | ||||
-rw-r--r-- | drivers/iommu/virtio-iommu.c | 3 |
23 files changed, 1020 insertions, 436 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 384c1a183cb5..f2b7a7132ce4 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -3,6 +3,10 @@ config IOMMU_IOVA tristate +# The IOASID library may also be used by non-IOMMU_API users +config IOASID + tristate + # IOMMU_API always gets selected by whoever wants it. config IOMMU_API bool diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 4f405f926e73..97814cc861ea 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -7,13 +7,14 @@ obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o +obj-$(CONFIG_IOASID) += ioasid.o obj-$(CONFIG_IOMMU_IOVA) += iova.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o amd_iommu_quirks.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o -obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o +obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o arm-smmu-qcom.o obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index be2894bf92fc..a2668f94a8d6 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -547,7 +547,8 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) retry: type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; - pasid = PPR_PASID(*(u64 *)&event[0]); + pasid = (event[0] & EVENT_DOMID_MASK_HI) | + (event[1] & EVENT_DOMID_MASK_LO); flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; address = (u64)(((u64)event[3]) << 32) | event[2]; @@ -580,7 +581,7 @@ retry: address, flags); break; case EVENT_TYPE_PAGE_TAB_ERR: - dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", + dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; @@ -1436,6 +1437,7 @@ static void free_pagetable(struct protection_domain *domain) * to 64 bits. */ static bool increase_address_space(struct protection_domain *domain, + unsigned long address, gfp_t gfp) { unsigned long flags; @@ -1444,8 +1446,8 @@ static bool increase_address_space(struct protection_domain *domain, spin_lock_irqsave(&domain->lock, flags); - if (WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) - /* address space already 64 bit large */ + if (address <= PM_LEVEL_SIZE(domain->mode) || + WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); @@ -1478,7 +1480,7 @@ static u64 *alloc_pte(struct protection_domain *domain, BUG_ON(!is_power_of_2(page_size)); while (address > PM_LEVEL_SIZE(domain->mode)) - *updated = increase_address_space(domain, gfp) || *updated; + *updated = increase_address_space(domain, address, gfp) || *updated; level = domain->mode - 1; pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; diff --git a/drivers/iommu/amd_iommu_quirks.c b/drivers/iommu/amd_iommu_quirks.c index c235f79b7a20..5120ce4fdce3 100644 --- a/drivers/iommu/amd_iommu_quirks.c +++ b/drivers/iommu/amd_iommu_quirks.c @@ -74,6 +74,19 @@ static const struct dmi_system_id ivrs_quirks[] __initconst = { .driver_data = (void *)&ivrs_ioapic_quirks[DELL_LATITUDE_5495], }, { + /* + * Acer Aspire A315-41 requires the very same workaround as + * Dell Latitude 5495 + */ + .callback = ivrs_ioapic_quirk_cb, + .ident = "Acer Aspire A315-41", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A315-41"), + }, + .driver_data = (void *)&ivrs_ioapic_quirks[DELL_LATITUDE_5495], + }, + { .callback = ivrs_ioapic_quirk_cb, .ident = "Lenovo ideapad 330S-15ARR", .matches = { diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index b611459f369a..f52f59d5c6bd 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -130,8 +130,8 @@ #define EVENT_TYPE_INV_PPR_REQ 0x9 #define EVENT_DEVID_MASK 0xffff #define EVENT_DEVID_SHIFT 0 -#define EVENT_DOMID_MASK 0xffff -#define EVENT_DOMID_SHIFT 0 +#define EVENT_DOMID_MASK_LO 0xffff +#define EVENT_DOMID_MASK_HI 0xf0000 #define EVENT_FLAGS_MASK 0xfff #define EVENT_FLAGS_SHIFT 0x10 diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index 5c87a38620c4..b2fe72a8f019 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -109,7 +109,7 @@ static struct arm_smmu_device *cavium_smmu_impl_init(struct arm_smmu_device *smm #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10) #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8) -static int arm_mmu500_reset(struct arm_smmu_device *smmu) +int arm_mmu500_reset(struct arm_smmu_device *smmu) { u32 reg, major; int i; @@ -170,5 +170,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) "calxeda,smmu-secure-config-access")) smmu->impl = &calxeda_impl; + if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm845-smmu-500")) + return qcom_smmu_impl_init(smmu); + return smmu; } diff --git a/drivers/iommu/arm-smmu-qcom.c b/drivers/iommu/arm-smmu-qcom.c new file mode 100644 index 000000000000..24c071c1d8b0 --- /dev/null +++ b/drivers/iommu/arm-smmu-qcom.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2019, The Linux Foundation. All rights reserved. + */ + +#include <linux/qcom_scm.h> + +#include "arm-smmu.h" + +struct qcom_smmu { + struct arm_smmu_device smmu; +}; + +static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu) +{ + int ret; + + arm_mmu500_reset(smmu); + + /* + * To address performance degradation in non-real time clients, + * such as USB and UFS, turn off wait-for-safe on sdm845 based boards, + * such as MTP and db845, whose firmwares implement secure monitor + * call handlers to turn on/off the wait-for-safe logic. + */ + ret = qcom_scm_qsmmu500_wait_safe_toggle(0); + if (ret) + dev_warn(smmu->dev, "Failed to turn off SAFE logic\n"); + + return ret; +} + +static const struct arm_smmu_impl qcom_smmu_impl = { + .reset = qcom_sdm845_smmu500_reset, +}; + +struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) +{ + struct qcom_smmu *qsmmu; + + qsmmu = devm_kzalloc(smmu->dev, sizeof(*qsmmu), GFP_KERNEL); + if (!qsmmu) + return ERR_PTR(-ENOMEM); + + qsmmu->smmu = *smmu; + + qsmmu->smmu.impl = &qcom_smmu_impl; + devm_kfree(smmu->dev, smmu); + + return &qsmmu->smmu; +} diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index f1bdaaa8c5de..effe72eb89e7 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2172,7 +2172,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, cfg->cd.asid = (u16)asid; cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr; - cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; + cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; return 0; out_free_asid: @@ -3611,19 +3611,19 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Interrupt lines */ - irq = platform_get_irq_byname(pdev, "combined"); + irq = platform_get_irq_byname_optional(pdev, "combined"); if (irq > 0) smmu->combined_irq = irq; else { - irq = platform_get_irq_byname(pdev, "eventq"); + irq = platform_get_irq_byname_optional(pdev, "eventq"); if (irq > 0) smmu->evtq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "priq"); + irq = platform_get_irq_byname_optional(pdev, "priq"); if (irq > 0) smmu->priq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "gerror"); + irq = platform_get_irq_byname_optional(pdev, "gerror"); if (irq > 0) smmu->gerr_irq = irq; } diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e85bc0e8d564..4f1a350d9529 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -36,6 +36,7 @@ #include <linux/pci.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <linux/ratelimit.h> #include <linux/slab.h> #include <linux/amba/bus.h> @@ -122,7 +123,7 @@ static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) { if (pm_runtime_enabled(smmu->dev)) - pm_runtime_put(smmu->dev); + pm_runtime_put_autosuspend(smmu->dev); } static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) @@ -244,6 +245,9 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page, unsigned int spin_cnt, delay; u32 reg; + if (smmu->impl && unlikely(smmu->impl->tlb_sync)) + return smmu->impl->tlb_sync(smmu, page, sync, status); + arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL); for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { @@ -268,9 +272,8 @@ static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu) spin_unlock_irqrestore(&smmu->global_sync_lock, flags); } -static void arm_smmu_tlb_sync_context(void *cookie) +static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain) { - struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; unsigned long flags; @@ -280,13 +283,6 @@ static void arm_smmu_tlb_sync_context(void *cookie) spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); } -static void arm_smmu_tlb_sync_vmid(void *cookie) -{ - struct arm_smmu_domain *smmu_domain = cookie; - - arm_smmu_tlb_sync_global(smmu_domain->smmu); -} - static void arm_smmu_tlb_inv_context_s1(void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; @@ -297,7 +293,7 @@ static void arm_smmu_tlb_inv_context_s1(void *cookie) wmb(); arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx, ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid); - arm_smmu_tlb_sync_context(cookie); + arm_smmu_tlb_sync_context(smmu_domain); } static void arm_smmu_tlb_inv_context_s2(void *cookie) @@ -312,18 +308,16 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie) } static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) + size_t granule, void *cookie, int reg) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - int reg, idx = cfg->cbndx; + int idx = cfg->cbndx; if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) wmb(); - reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; - if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { iova = (iova >> 12) << 12; iova |= cfg->asid; @@ -342,16 +336,15 @@ static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size, } static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) + size_t granule, void *cookie, int reg) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; - int reg, idx = smmu_domain->cfg.cbndx; + int idx = smmu_domain->cfg.cbndx; if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) wmb(); - reg = leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2; iova >>= 12; do { if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64) @@ -362,85 +355,98 @@ static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size, } while (size -= granule); } -/* - * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears - * almost negligible, but the benefit of getting the first one in as far ahead - * of the sync as possible is significant, hence we don't just make this a - * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think. - */ -static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) +static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size, + size_t granule, void *cookie) { - struct arm_smmu_domain *smmu_domain = cookie; - struct arm_smmu_device *smmu = smmu_domain->smmu; - - if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) - wmb(); + arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie, + ARM_SMMU_CB_S1_TLBIVA); + arm_smmu_tlb_sync_context(cookie); +} - arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid); +static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie, + ARM_SMMU_CB_S1_TLBIVAL); + arm_smmu_tlb_sync_context(cookie); } -static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, - size_t granule, void *cookie) +static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) { - struct arm_smmu_domain *smmu_domain = cookie; - const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; + arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie, + ARM_SMMU_CB_S1_TLBIVAL); +} - ops->tlb_inv_range(iova, size, granule, false, cookie); - ops->tlb_sync(cookie); +static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie, + ARM_SMMU_CB_S2_TLBIIPAS2); + arm_smmu_tlb_sync_context(cookie); } -static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size, - size_t granule, void *cookie) +static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova, size_t size, + size_t granule, void *cookie) { - struct arm_smmu_domain *smmu_domain = cookie; - const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; + arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie, + ARM_SMMU_CB_S2_TLBIIPAS2L); + arm_smmu_tlb_sync_context(cookie); +} - ops->tlb_inv_range(iova, size, granule, true, cookie); - ops->tlb_sync(cookie); +static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) +{ + arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie, + ARM_SMMU_CB_S2_TLBIIPAS2L); } -static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, - void *cookie) +static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + arm_smmu_tlb_inv_context_s2(cookie); +} +/* + * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears + * almost negligible, but the benefit of getting the first one in as far ahead + * of the sync as possible is significant, hence we don't just make this a + * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might + * think. + */ +static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; - const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; + struct arm_smmu_device *smmu = smmu_domain->smmu; + + if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); - ops->tlb_inv_range(iova, granule, granule, true, cookie); + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid); } -static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = { - .tlb = { - .tlb_flush_all = arm_smmu_tlb_inv_context_s1, - .tlb_flush_walk = arm_smmu_tlb_inv_walk, - .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, - .tlb_add_page = arm_smmu_tlb_add_page, - }, - .tlb_inv_range = arm_smmu_tlb_inv_range_s1, - .tlb_sync = arm_smmu_tlb_sync_context, +static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s1, + .tlb_flush_walk = arm_smmu_tlb_inv_walk_s1, + .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s1, + .tlb_add_page = arm_smmu_tlb_add_page_s1, }; -static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = { - .tlb = { - .tlb_flush_all = arm_smmu_tlb_inv_context_s2, - .tlb_flush_walk = arm_smmu_tlb_inv_walk, - .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, - .tlb_add_page = arm_smmu_tlb_add_page, - }, - .tlb_inv_range = arm_smmu_tlb_inv_range_s2, - .tlb_sync = arm_smmu_tlb_sync_context, +static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_flush_walk = arm_smmu_tlb_inv_walk_s2, + .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s2, + .tlb_add_page = arm_smmu_tlb_add_page_s2, }; -static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = { - .tlb = { - .tlb_flush_all = arm_smmu_tlb_inv_context_s2, - .tlb_flush_walk = arm_smmu_tlb_inv_walk, - .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, - .tlb_add_page = arm_smmu_tlb_add_page, - }, - .tlb_inv_range = arm_smmu_tlb_inv_vmid_nosync, - .tlb_sync = arm_smmu_tlb_sync_vmid, +static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_flush_walk = arm_smmu_tlb_inv_any_s2_v1, + .tlb_flush_leaf = arm_smmu_tlb_inv_any_s2_v1, + .tlb_add_page = arm_smmu_tlb_add_page_s2_v1, }; static irqreturn_t arm_smmu_context_fault(int irq, void *dev) @@ -472,6 +478,8 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) { u32 gfsr, gfsynr0, gfsynr1, gfsynr2; struct arm_smmu_device *smmu = dev; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR); gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0); @@ -481,11 +489,19 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) if (!gfsr) return IRQ_NONE; - dev_err_ratelimited(smmu->dev, - "Unexpected global fault, this could be serious\n"); - dev_err_ratelimited(smmu->dev, - "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n", - gfsr, gfsynr0, gfsynr1, gfsynr2); + if (__ratelimit(&rs)) { + if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) && + (gfsr & sGFSR_USF)) + dev_err(smmu->dev, + "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n", + (u16)gfsynr1); + else + dev_err(smmu->dev, + "Unexpected global fault, this could be serious\n"); + dev_err(smmu->dev, + "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n", + gfsr, gfsynr0, gfsynr1, gfsynr2); + } arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr); return IRQ_HANDLED; @@ -536,8 +552,8 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr; cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr; } else { - cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; - cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1]; + cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair; + cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32; } } } @@ -770,7 +786,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .ias = ias, .oas = oas, .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, - .tlb = &smmu_domain->flush_ops->tlb, + .tlb = smmu_domain->flush_ops, .iommu_dev = smmu->dev, }; @@ -812,6 +828,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, return 0; out_clear_smmu: + __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); smmu_domain->smmu = NULL; out_unlock: mutex_unlock(&smmu_domain->init_mutex); @@ -1038,8 +1055,6 @@ static int arm_smmu_master_alloc_smes(struct device *dev) } group = iommu_group_get_for_dev(dev); - if (!group) - group = ERR_PTR(-ENOMEM); if (IS_ERR(group)) { ret = PTR_ERR(group); goto out_err; @@ -1153,6 +1168,20 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) /* Looks ok, so add the device to the domain */ ret = arm_smmu_domain_add_master(smmu_domain, fwspec); + /* + * Setup an autosuspend delay to avoid bouncing runpm state. + * Otherwise, if a driver for a suspended consumer device + * unmaps buffers, it will runpm resume/suspend for each one. + * + * For example, when used by a GPU device, when an application + * or game exits, it can trigger unmapping 100s or 1000s of + * buffers. With a runpm cycle for each buffer, that adds up + * to 5-10sec worth of reprogramming the context bank, while + * the system appears to be locked up to the user. + */ + pm_runtime_set_autosuspend_delay(smmu->dev, 20); + pm_runtime_use_autosuspend(smmu->dev); + rpm_put: arm_smmu_rpm_put(smmu); return ret; @@ -1199,7 +1228,7 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) if (smmu_domain->flush_ops) { arm_smmu_rpm_get(smmu); - smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain); + smmu_domain->flush_ops->tlb_flush_all(smmu_domain); arm_smmu_rpm_put(smmu); } } @@ -1210,11 +1239,16 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain, struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domain->flush_ops) { - arm_smmu_rpm_get(smmu); - smmu_domain->flush_ops->tlb_sync(smmu_domain); - arm_smmu_rpm_put(smmu); - } + if (!smmu) + return; + + arm_smmu_rpm_get(smmu); + if (smmu->version == ARM_SMMU_V2 || + smmu_domain->stage == ARM_SMMU_DOMAIN_S1) + arm_smmu_tlb_sync_context(smmu_domain); + else + arm_smmu_tlb_sync_global(smmu); + arm_smmu_rpm_put(smmu); } static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, @@ -2061,10 +2095,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev) for (i = 0; i < num_irqs; ++i) { int irq = platform_get_irq(pdev, i); - if (irq < 0) { - dev_err(dev, "failed to get irq index %d\n", i); + if (irq < 0) return -ENODEV; - } smmu->irqs[i] = irq; } diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index b19b6cae9b5e..62b9f0cec49b 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -79,6 +79,8 @@ #define ID7_MINOR GENMASK(3, 0) #define ARM_SMMU_GR0_sGFSR 0x48 +#define sGFSR_USF BIT(1) + #define ARM_SMMU_GR0_sGFSYNR0 0x50 #define ARM_SMMU_GR0_sGFSYNR1 0x54 #define ARM_SMMU_GR0_sGFSYNR2 0x58 @@ -304,17 +306,10 @@ enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_BYPASS, }; -struct arm_smmu_flush_ops { - struct iommu_flush_ops tlb; - void (*tlb_inv_range)(unsigned long iova, size_t size, size_t granule, - bool leaf, void *cookie); - void (*tlb_sync)(void *cookie); -}; - struct arm_smmu_domain { struct arm_smmu_device *smmu; struct io_pgtable_ops *pgtbl_ops; - const struct arm_smmu_flush_ops *flush_ops; + const struct iommu_flush_ops *flush_ops; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; bool non_strict; @@ -335,6 +330,8 @@ struct arm_smmu_impl { int (*cfg_probe)(struct arm_smmu_device *smmu); int (*reset)(struct arm_smmu_device *smmu); int (*init_context)(struct arm_smmu_domain *smmu_domain); + void (*tlb_sync)(struct arm_smmu_device *smmu, int page, int sync, + int status); }; static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n) @@ -398,5 +395,8 @@ static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page, arm_smmu_writeq((s), ARM_SMMU_CB((s), (n)), (o), (v)) struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu); +struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu); + +int arm_mmu500_reset(struct arm_smmu_device *smmu); #endif /* _ARM_SMMU_H */ diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index eecd6a421667..3acfa6a25fa2 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -895,8 +895,11 @@ int __init detect_intel_iommu(void) } #ifdef CONFIG_X86 - if (!ret) + if (!ret) { x86_init.iommu.iommu_init = intel_iommu_init; + x86_platform.iommu_shutdown = intel_iommu_shutdown; + } + #endif if (dmar_tbl) { diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 615495aa613e..0c8d81f56a30 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2420,14 +2420,24 @@ static void domain_remove_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(&device_domain_lock, flags); } -/* - * find_domain - * Note: we use struct device->archdata.iommu stores the info - */ static struct dmar_domain *find_domain(struct device *dev) { struct device_domain_info *info; + if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO || + dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)) + return NULL; + + /* No lock here, assumes no domain exit in normal case */ + info = dev->archdata.iommu; + if (likely(info)) + return info->domain; + + return NULL; +} + +static struct dmar_domain *deferred_attach_domain(struct device *dev) +{ if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) { struct iommu_domain *domain; @@ -2437,12 +2447,7 @@ static struct dmar_domain *find_domain(struct device *dev) intel_iommu_attach_device(domain, dev); } - /* No lock here, assumes no domain exit in normal case */ - info = dev->archdata.iommu; - - if (likely(info)) - return info->domain; - return NULL; + return find_domain(dev); } static inline struct device_domain_info * @@ -2794,7 +2799,7 @@ static int identity_mapping(struct device *dev) struct device_domain_info *info; info = dev->archdata.iommu; - if (info && info != DUMMY_DEVICE_DOMAIN_INFO) + if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO) return (info->domain == si_domain); return 0; @@ -3471,7 +3476,7 @@ static bool iommu_need_mapping(struct device *dev) if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask) dma_mask = dev->coherent_dma_mask; - if (dma_mask >= dma_get_required_mask(dev)) + if (dma_mask >= dma_direct_get_required_mask(dev)) return false; /* @@ -3512,7 +3517,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); - domain = find_domain(dev); + domain = deferred_attach_domain(dev); if (!domain) return DMA_MAPPING_ERROR; @@ -3732,7 +3737,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele if (!iommu_need_mapping(dev)) return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); - domain = find_domain(dev); + domain = deferred_attach_domain(dev); if (!domain) return 0; @@ -3775,6 +3780,13 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele return nelems; } +static u64 intel_get_required_mask(struct device *dev) +{ + if (!iommu_need_mapping(dev)) + return dma_direct_get_required_mask(dev); + return DMA_BIT_MASK(32); +} + static const struct dma_map_ops intel_dma_ops = { .alloc = intel_alloc_coherent, .free = intel_free_coherent, @@ -3787,6 +3799,7 @@ static const struct dma_map_ops intel_dma_ops = { .dma_supported = dma_direct_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .get_required_mask = intel_get_required_mask, }; static void @@ -3819,7 +3832,7 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size, int prot = 0; int ret; - domain = find_domain(dev); + domain = deferred_attach_domain(dev); if (WARN_ON(dir == DMA_NONE || !domain)) return DMA_MAPPING_ERROR; @@ -4306,13 +4319,19 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) { struct acpi_dmar_reserved_memory *rmrr; struct dmar_rmrr_unit *rmrru; + int ret; + + rmrr = (struct acpi_dmar_reserved_memory *)header; + ret = arch_rmrr_sanity_check(rmrr); + if (ret) + return ret; rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); if (!rmrru) goto out; rmrru->hdr = header; - rmrr = (struct acpi_dmar_reserved_memory *)header; + rmrru->base_address = rmrr->base_address; rmrru->end_address = rmrr->end_address; @@ -4751,6 +4770,26 @@ static void intel_disable_iommus(void) iommu_disable_translation(iommu); } +void intel_iommu_shutdown(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu = NULL; + + if (no_iommu || dmar_disabled) + return; + + down_write(&dmar_global_lock); + + /* Disable PMRs explicitly here. */ + for_each_iommu(iommu, drhd) + iommu_disable_protect_mem_regions(iommu); + + /* Make sure the IOMMUs are switched off */ + intel_disable_iommus(); + + up_write(&dmar_global_lock); +} + static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) { struct iommu_device *iommu_dev = dev_to_iommu_device(dev); diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 4cb394937700..7c3bd2c3cdca 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -846,27 +846,28 @@ struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = { #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S_SELFTEST -static struct io_pgtable_cfg *cfg_cookie; +static struct io_pgtable_cfg *cfg_cookie __initdata; -static void dummy_tlb_flush_all(void *cookie) +static void __init dummy_tlb_flush_all(void *cookie) { WARN_ON(cookie != cfg_cookie); } -static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule, - void *cookie) +static void __init dummy_tlb_flush(unsigned long iova, size_t size, + size_t granule, void *cookie) { WARN_ON(cookie != cfg_cookie); WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); } -static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, void *cookie) +static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) { dummy_tlb_flush(iova, granule, granule, cookie); } -static const struct iommu_flush_ops dummy_tlb_ops = { +static const struct iommu_flush_ops dummy_tlb_ops __initconst = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_flush_walk = dummy_tlb_flush, .tlb_flush_leaf = dummy_tlb_flush, diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 4c91359057c5..bdf47f745268 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -32,39 +32,31 @@ io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) /* - * For consistency with the architecture, we always consider - * ARM_LPAE_MAX_LEVELS levels, with the walk starting at level n >=0 - */ -#define ARM_LPAE_START_LVL(d) (ARM_LPAE_MAX_LEVELS - (d)->levels) - -/* * Calculate the right shift amount to get to the portion describing level l * in a virtual address mapped by the pagetable in d. */ #define ARM_LPAE_LVL_SHIFT(l,d) \ - ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \ - * (d)->bits_per_level) + (d)->pg_shift) - -#define ARM_LPAE_GRANULE(d) (1UL << (d)->pg_shift) + (((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) + \ + ilog2(sizeof(arm_lpae_iopte))) -#define ARM_LPAE_PAGES_PER_PGD(d) \ - DIV_ROUND_UP((d)->pgd_size, ARM_LPAE_GRANULE(d)) +#define ARM_LPAE_GRANULE(d) \ + (sizeof(arm_lpae_iopte) << (d)->bits_per_level) +#define ARM_LPAE_PGD_SIZE(d) \ + (sizeof(arm_lpae_iopte) << (d)->pgd_bits) /* * Calculate the index at level l used to map virtual address a using the * pagetable in d. */ #define ARM_LPAE_PGD_IDX(l,d) \ - ((l) == ARM_LPAE_START_LVL(d) ? ilog2(ARM_LPAE_PAGES_PER_PGD(d)) : 0) + ((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0) #define ARM_LPAE_LVL_IDX(a,l,d) \ (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \ ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1)) /* Calculate the block/page mapping size at level l for pagetable in d. */ -#define ARM_LPAE_BLOCK_SIZE(l,d) \ - (1ULL << (ilog2(sizeof(arm_lpae_iopte)) + \ - ((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level))) +#define ARM_LPAE_BLOCK_SIZE(l,d) (1ULL << ARM_LPAE_LVL_SHIFT(l,d)) /* Page table bits */ #define ARM_LPAE_PTE_TYPE_SHIFT 0 @@ -166,6 +158,9 @@ #define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2) #define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4) +#define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL +#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL + /* IOPTE accessors */ #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d)) @@ -177,10 +172,9 @@ struct arm_lpae_io_pgtable { struct io_pgtable iop; - int levels; - size_t pgd_size; - unsigned long pg_shift; - unsigned long bits_per_level; + int pgd_bits; + int start_level; + int bits_per_level; void *pgd; }; @@ -210,7 +204,7 @@ static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte, { u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK; - if (data->pg_shift < 16) + if (ARM_LPAE_GRANULE(data) < SZ_64K) return paddr; /* Rotate the packed high-order bits back to the top */ @@ -389,7 +383,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); /* If we can install a leaf entry at this level, then do so */ - if (size == block_size && (size & cfg->pgsize_bitmap)) + if (size == block_size) return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep); /* We can't allocate tables at the final level */ @@ -461,7 +455,7 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, else if (prot & IOMMU_CACHE) pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE << ARM_LPAE_PTE_ATTRINDX_SHIFT); - else if (prot & IOMMU_QCOM_SYS_CACHE) + else if (prot & IOMMU_SYS_CACHE_ONLY) pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE << ARM_LPAE_PTE_ATTRINDX_SHIFT); } @@ -476,16 +470,19 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t size, int iommu_prot) { struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; arm_lpae_iopte *ptep = data->pgd; - int ret, lvl = ARM_LPAE_START_LVL(data); + int ret, lvl = data->start_level; arm_lpae_iopte prot; /* If no access, then nothing to do */ if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) return 0; - if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) || - paddr >= (1ULL << data->iop.cfg.oas))) + if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) + return -EINVAL; + + if (WARN_ON(iova >> data->iop.cfg.ias || paddr >> data->iop.cfg.oas)) return -ERANGE; prot = arm_lpae_prot_to_pte(data, iommu_prot); @@ -505,8 +502,8 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, arm_lpae_iopte *start, *end; unsigned long table_size; - if (lvl == ARM_LPAE_START_LVL(data)) - table_size = data->pgd_size; + if (lvl == data->start_level) + table_size = ARM_LPAE_PGD_SIZE(data); else table_size = ARM_LPAE_GRANULE(data); @@ -534,7 +531,7 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop) { struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop); - __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data), data->pgd); + __arm_lpae_free_pgtable(data, data->start_level, data->pgd); kfree(data); } @@ -649,13 +646,16 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, size_t size, struct iommu_iotlb_gather *gather) { struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; arm_lpae_iopte *ptep = data->pgd; - int lvl = ARM_LPAE_START_LVL(data); - if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) + if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) + return 0; + + if (WARN_ON(iova >> data->iop.cfg.ias)) return 0; - return __arm_lpae_unmap(data, gather, iova, size, lvl, ptep); + return __arm_lpae_unmap(data, gather, iova, size, data->start_level, ptep); } static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, @@ -663,7 +663,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, { struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); arm_lpae_iopte pte, *ptep = data->pgd; - int lvl = ARM_LPAE_START_LVL(data); + int lvl = data->start_level; do { /* Valid IOPTE pointer? */ @@ -740,8 +740,8 @@ static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) static struct arm_lpae_io_pgtable * arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) { - unsigned long va_bits, pgd_bits; struct arm_lpae_io_pgtable *data; + int levels, va_bits, pg_shift; arm_lpae_restrict_pgsizes(cfg); @@ -763,15 +763,15 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) if (!data) return NULL; - data->pg_shift = __ffs(cfg->pgsize_bitmap); - data->bits_per_level = data->pg_shift - ilog2(sizeof(arm_lpae_iopte)); + pg_shift = __ffs(cfg->pgsize_bitmap); + data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte)); - va_bits = cfg->ias - data->pg_shift; - data->levels = DIV_ROUND_UP(va_bits, data->bits_per_level); + va_bits = cfg->ias - pg_shift; + levels = DIV_ROUND_UP(va_bits, data->bits_per_level); + data->start_level = ARM_LPAE_MAX_LEVELS - levels; /* Calculate the actual size of our pgd (without concatenation) */ - pgd_bits = va_bits - (data->bits_per_level * (data->levels - 1)); - data->pgd_size = 1UL << (pgd_bits + ilog2(sizeof(arm_lpae_iopte))); + data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1)); data->iop.ops = (struct io_pgtable_ops) { .map = arm_lpae_map, @@ -861,11 +861,11 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) (ARM_LPAE_MAIR_ATTR_INC_OWBRWA << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE)); - cfg->arm_lpae_s1_cfg.mair[0] = reg; - cfg->arm_lpae_s1_cfg.mair[1] = 0; + cfg->arm_lpae_s1_cfg.mair = reg; /* Looking good; allocate a pgd */ - data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg); + data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), + GFP_KERNEL, cfg); if (!data->pgd) goto out_free_data; @@ -900,13 +900,13 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) * Concatenate PGDs at level 1 if possible in order to reduce * the depth of the stage-2 walk. */ - if (data->levels == ARM_LPAE_MAX_LEVELS) { + if (data->start_level == 0) { unsigned long pgd_pages; - pgd_pages = data->pgd_size >> ilog2(sizeof(arm_lpae_iopte)); + pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) { - data->pgd_size = pgd_pages << data->pg_shift; - data->levels--; + data->pgd_bits += data->bits_per_level; + data->start_level++; } } @@ -916,7 +916,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); - sl = ARM_LPAE_START_LVL(data); + sl = data->start_level; switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: @@ -962,7 +962,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) cfg->arm_lpae_s2_cfg.vtcr = reg; /* Allocate pgd pages */ - data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg); + data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), + GFP_KERNEL, cfg); if (!data->pgd) goto out_free_data; @@ -1015,27 +1016,57 @@ arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) static struct io_pgtable * arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { - struct io_pgtable *iop; + struct arm_lpae_io_pgtable *data; + + /* No quirks for Mali (hopefully) */ + if (cfg->quirks) + return NULL; - if (cfg->ias != 48 || cfg->oas > 40) + if (cfg->ias > 48 || cfg->oas > 40) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); - iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie); - if (iop) { - u64 mair, ttbr; - /* Copy values as union fields overlap */ - mair = cfg->arm_lpae_s1_cfg.mair[0]; - ttbr = cfg->arm_lpae_s1_cfg.ttbr[0]; + data = arm_lpae_alloc_pgtable(cfg); + if (!data) + return NULL; - cfg->arm_mali_lpae_cfg.memattr = mair; - cfg->arm_mali_lpae_cfg.transtab = ttbr | - ARM_MALI_LPAE_TTBR_READ_INNER | - ARM_MALI_LPAE_TTBR_ADRMODE_TABLE; + /* Mali seems to need a full 4-level table regardless of IAS */ + if (data->start_level > 0) { + data->start_level = 0; + data->pgd_bits = 0; } + /* + * MEMATTR: Mali has no actual notion of a non-cacheable type, so the + * best we can do is mimic the out-of-tree driver and hope that the + * "implementation-defined caching policy" is good enough. Similarly, + * we'll use it for the sake of a valid attribute for our 'device' + * index, although callers should never request that in practice. + */ + cfg->arm_mali_lpae_cfg.memattr = + (ARM_MALI_LPAE_MEMATTR_IMP_DEF + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) | + (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | + (ARM_MALI_LPAE_MEMATTR_IMP_DEF + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); + + data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL, + cfg); + if (!data->pgd) + goto out_free_data; - return iop; + /* Ensure the empty pgd is visible before TRANSTAB can be written */ + wmb(); + + cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) | + ARM_MALI_LPAE_TTBR_READ_INNER | + ARM_MALI_LPAE_TTBR_ADRMODE_TABLE; + return &data->iop; + +out_free_data: + kfree(data); + return NULL; } struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { @@ -1065,22 +1096,23 @@ struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = { #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST -static struct io_pgtable_cfg *cfg_cookie; +static struct io_pgtable_cfg *cfg_cookie __initdata; -static void dummy_tlb_flush_all(void *cookie) +static void __init dummy_tlb_flush_all(void *cookie) { WARN_ON(cookie != cfg_cookie); } -static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule, - void *cookie) +static void __init dummy_tlb_flush(unsigned long iova, size_t size, + size_t granule, void *cookie) { WARN_ON(cookie != cfg_cookie); WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); } -static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, void *cookie) +static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) { dummy_tlb_flush(iova, granule, granule, cookie); } @@ -1099,9 +1131,9 @@ static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n", cfg->pgsize_bitmap, cfg->ias); - pr_err("data: %d levels, 0x%zx pgd_size, %lu pg_shift, %lu bits_per_level, pgd @ %p\n", - data->levels, data->pgd_size, data->pg_shift, - data->bits_per_level, data->pgd); + pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n", + ARM_LPAE_MAX_LEVELS - data->start_level, ARM_LPAE_PGD_SIZE(data), + ilog2(ARM_LPAE_GRANULE(data)), data->bits_per_level, data->pgd); } #define __FAIL(ops, i) ({ \ @@ -1113,7 +1145,7 @@ static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) { - static const enum io_pgtable_fmt fmts[] = { + static const enum io_pgtable_fmt fmts[] __initconst = { ARM_64_LPAE_S1, ARM_64_LPAE_S2, }; @@ -1212,13 +1244,13 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) static int __init arm_lpae_do_selftests(void) { - static const unsigned long pgsize[] = { + static const unsigned long pgsize[] __initconst = { SZ_4K | SZ_2M | SZ_1G, SZ_16K | SZ_32M, SZ_64K | SZ_512M, }; - static const unsigned int ias[] = { + static const unsigned int ias[] __initconst = { 32, 36, 40, 42, 44, 48, }; diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c new file mode 100644 index 000000000000..0f8dd377aada --- /dev/null +++ b/drivers/iommu/ioasid.c @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * I/O Address Space ID allocator. There is one global IOASID space, split into + * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and + * free IOASIDs with ioasid_alloc and ioasid_free. + */ +#include <linux/ioasid.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/xarray.h> + +struct ioasid_data { + ioasid_t id; + struct ioasid_set *set; + void *private; + struct rcu_head rcu; +}; + +/* + * struct ioasid_allocator_data - Internal data structure to hold information + * about an allocator. There are two types of allocators: + * + * - Default allocator always has its own XArray to track the IOASIDs allocated. + * - Custom allocators may share allocation helpers with different private data. + * Custom allocators that share the same helper functions also share the same + * XArray. + * Rules: + * 1. Default allocator is always available, not dynamically registered. This is + * to prevent race conditions with early boot code that want to register + * custom allocators or allocate IOASIDs. + * 2. Custom allocators take precedence over the default allocator. + * 3. When all custom allocators sharing the same helper functions are + * unregistered (e.g. due to hotplug), all outstanding IOASIDs must be + * freed. Otherwise, outstanding IOASIDs will be lost and orphaned. + * 4. When switching between custom allocators sharing the same helper + * functions, outstanding IOASIDs are preserved. + * 5. When switching between custom allocator and default allocator, all IOASIDs + * must be freed to ensure unadulterated space for the new allocator. + * + * @ops: allocator helper functions and its data + * @list: registered custom allocators + * @slist: allocators share the same ops but different data + * @flags: attributes of the allocator + * @xa: xarray holds the IOASID space + * @rcu: used for kfree_rcu when unregistering allocator + */ +struct ioasid_allocator_data { + struct ioasid_allocator_ops *ops; + struct list_head list; + struct list_head slist; +#define IOASID_ALLOCATOR_CUSTOM BIT(0) /* Needs framework to track results */ + unsigned long flags; + struct xarray xa; + struct rcu_head rcu; +}; + +static DEFINE_SPINLOCK(ioasid_allocator_lock); +static LIST_HEAD(allocators_list); + +static ioasid_t default_alloc(ioasid_t min, ioasid_t max, void *opaque); +static void default_free(ioasid_t ioasid, void *opaque); + +static struct ioasid_allocator_ops default_ops = { + .alloc = default_alloc, + .free = default_free, +}; + +static struct ioasid_allocator_data default_allocator = { + .ops = &default_ops, + .flags = 0, + .xa = XARRAY_INIT(ioasid_xa, XA_FLAGS_ALLOC), +}; + +static struct ioasid_allocator_data *active_allocator = &default_allocator; + +static ioasid_t default_alloc(ioasid_t min, ioasid_t max, void *opaque) +{ + ioasid_t id; + + if (xa_alloc(&default_allocator.xa, &id, opaque, XA_LIMIT(min, max), GFP_ATOMIC)) { + pr_err("Failed to alloc ioasid from %d to %d\n", min, max); + return INVALID_IOASID; + } + + return id; +} + +static void default_free(ioasid_t ioasid, void *opaque) +{ + struct ioasid_data *ioasid_data; + + ioasid_data = xa_erase(&default_allocator.xa, ioasid); + kfree_rcu(ioasid_data, rcu); +} + +/* Allocate and initialize a new custom allocator with its helper functions */ +static struct ioasid_allocator_data *ioasid_alloc_allocator(struct ioasid_allocator_ops *ops) +{ + struct ioasid_allocator_data *ia_data; + + ia_data = kzalloc(sizeof(*ia_data), GFP_ATOMIC); + if (!ia_data) + return NULL; + + xa_init_flags(&ia_data->xa, XA_FLAGS_ALLOC); + INIT_LIST_HEAD(&ia_data->slist); + ia_data->flags |= IOASID_ALLOCATOR_CUSTOM; + ia_data->ops = ops; + + /* For tracking custom allocators that share the same ops */ + list_add_tail(&ops->list, &ia_data->slist); + + return ia_data; +} + +static bool use_same_ops(struct ioasid_allocator_ops *a, struct ioasid_allocator_ops *b) +{ + return (a->free == b->free) && (a->alloc == b->alloc); +} + +/** + * ioasid_register_allocator - register a custom allocator + * @ops: the custom allocator ops to be registered + * + * Custom allocators take precedence over the default xarray based allocator. + * Private data associated with the IOASID allocated by the custom allocators + * are managed by IOASID framework similar to data stored in xa by default + * allocator. + * + * There can be multiple allocators registered but only one is active. In case + * of runtime removal of a custom allocator, the next one is activated based + * on the registration ordering. + * + * Multiple allocators can share the same alloc() function, in this case the + * IOASID space is shared. + */ +int ioasid_register_allocator(struct ioasid_allocator_ops *ops) +{ + struct ioasid_allocator_data *ia_data; + struct ioasid_allocator_data *pallocator; + int ret = 0; + + spin_lock(&ioasid_allocator_lock); + + ia_data = ioasid_alloc_allocator(ops); + if (!ia_data) { + ret = -ENOMEM; + goto out_unlock; + } + + /* + * No particular preference, we activate the first one and keep + * the later registered allocators in a list in case the first one gets + * removed due to hotplug. + */ + if (list_empty(&allocators_list)) { + WARN_ON(active_allocator != &default_allocator); + /* Use this new allocator if default is not active */ + if (xa_empty(&active_allocator->xa)) { + rcu_assign_pointer(active_allocator, ia_data); + list_add_tail(&ia_data->list, &allocators_list); + goto out_unlock; + } + pr_warn("Default allocator active with outstanding IOASID\n"); + ret = -EAGAIN; + goto out_free; + } + + /* Check if the allocator is already registered */ + list_for_each_entry(pallocator, &allocators_list, list) { + if (pallocator->ops == ops) { + pr_err("IOASID allocator already registered\n"); + ret = -EEXIST; + goto out_free; + } else if (use_same_ops(pallocator->ops, ops)) { + /* + * If the new allocator shares the same ops, + * then they will share the same IOASID space. + * We should put them under the same xarray. + */ + list_add_tail(&ops->list, &pallocator->slist); + goto out_free; + } + } + list_add_tail(&ia_data->list, &allocators_list); + + spin_unlock(&ioasid_allocator_lock); + return 0; +out_free: + kfree(ia_data); +out_unlock: + spin_unlock(&ioasid_allocator_lock); + return ret; +} +EXPORT_SYMBOL_GPL(ioasid_register_allocator); + +/** + * ioasid_unregister_allocator - Remove a custom IOASID allocator ops + * @ops: the custom allocator to be removed + * + * Remove an allocator from the list, activate the next allocator in + * the order it was registered. Or revert to default allocator if all + * custom allocators are unregistered without outstanding IOASIDs. + */ +void ioasid_unregister_allocator(struct ioasid_allocator_ops *ops) +{ + struct ioasid_allocator_data *pallocator; + struct ioasid_allocator_ops *sops; + + spin_lock(&ioasid_allocator_lock); + if (list_empty(&allocators_list)) { + pr_warn("No custom IOASID allocators active!\n"); + goto exit_unlock; + } + + list_for_each_entry(pallocator, &allocators_list, list) { + if (!use_same_ops(pallocator->ops, ops)) + continue; + + if (list_is_singular(&pallocator->slist)) { + /* No shared helper functions */ + list_del(&pallocator->list); + /* + * All IOASIDs should have been freed before + * the last allocator that shares the same ops + * is unregistered. + */ + WARN_ON(!xa_empty(&pallocator->xa)); + if (list_empty(&allocators_list)) { + pr_info("No custom IOASID allocators, switch to default.\n"); + rcu_assign_pointer(active_allocator, &default_allocator); + } else if (pallocator == active_allocator) { + rcu_assign_pointer(active_allocator, + list_first_entry(&allocators_list, + struct ioasid_allocator_data, list)); + pr_info("IOASID allocator changed"); + } + kfree_rcu(pallocator, rcu); + break; + } + /* + * Find the matching shared ops to delete, + * but keep outstanding IOASIDs + */ + list_for_each_entry(sops, &pallocator->slist, list) { + if (sops == ops) { + list_del(&ops->list); + break; + } + } + break; + } + +exit_unlock: + spin_unlock(&ioasid_allocator_lock); +} +EXPORT_SYMBOL_GPL(ioasid_unregister_allocator); + +/** + * ioasid_set_data - Set private data for an allocated ioasid + * @ioasid: the ID to set data + * @data: the private data + * + * For IOASID that is already allocated, private data can be set + * via this API. Future lookup can be done via ioasid_find. + */ +int ioasid_set_data(ioasid_t ioasid, void *data) +{ + struct ioasid_data *ioasid_data; + int ret = 0; + + spin_lock(&ioasid_allocator_lock); + ioasid_data = xa_load(&active_allocator->xa, ioasid); + if (ioasid_data) + rcu_assign_pointer(ioasid_data->private, data); + else + ret = -ENOENT; + spin_unlock(&ioasid_allocator_lock); + + /* + * Wait for readers to stop accessing the old private data, so the + * caller can free it. + */ + if (!ret) + synchronize_rcu(); + + return ret; +} +EXPORT_SYMBOL_GPL(ioasid_set_data); + +/** + * ioasid_alloc - Allocate an IOASID + * @set: the IOASID set + * @min: the minimum ID (inclusive) + * @max: the maximum ID (inclusive) + * @private: data private to the caller + * + * Allocate an ID between @min and @max. The @private pointer is stored + * internally and can be retrieved with ioasid_find(). + * + * Return: the allocated ID on success, or %INVALID_IOASID on failure. + */ +ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, + void *private) +{ + struct ioasid_data *data; + void *adata; + ioasid_t id; + + data = kzalloc(sizeof(*data), GFP_ATOMIC); + if (!data) + return INVALID_IOASID; + + data->set = set; + data->private = private; + + /* + * Custom allocator needs allocator data to perform platform specific + * operations. + */ + spin_lock(&ioasid_allocator_lock); + adata = active_allocator->flags & IOASID_ALLOCATOR_CUSTOM ? active_allocator->ops->pdata : data; + id = active_allocator->ops->alloc(min, max, adata); + if (id == INVALID_IOASID) { + pr_err("Failed ASID allocation %lu\n", active_allocator->flags); + goto exit_free; + } + + if ((active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) && + xa_alloc(&active_allocator->xa, &id, data, XA_LIMIT(id, id), GFP_ATOMIC)) { + /* Custom allocator needs framework to store and track allocation results */ + pr_err("Failed to alloc ioasid from %d\n", id); + active_allocator->ops->free(id, active_allocator->ops->pdata); + goto exit_free; + } + data->id = id; + + spin_unlock(&ioasid_allocator_lock); + return id; +exit_free: + spin_unlock(&ioasid_allocator_lock); + kfree(data); + return INVALID_IOASID; +} +EXPORT_SYMBOL_GPL(ioasid_alloc); + +/** + * ioasid_free - Free an IOASID + * @ioasid: the ID to remove + */ +void ioasid_free(ioasid_t ioasid) +{ + struct ioasid_data *ioasid_data; + + spin_lock(&ioasid_allocator_lock); + ioasid_data = xa_load(&active_allocator->xa, ioasid); + if (!ioasid_data) { + pr_err("Trying to free unknown IOASID %u\n", ioasid); + goto exit_unlock; + } + + active_allocator->ops->free(ioasid, active_allocator->ops->pdata); + /* Custom allocator needs additional steps to free the xa element */ + if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) { + ioasid_data = xa_erase(&active_allocator->xa, ioasid); + kfree_rcu(ioasid_data, rcu); + } + +exit_unlock: + spin_unlock(&ioasid_allocator_lock); +} +EXPORT_SYMBOL_GPL(ioasid_free); + +/** + * ioasid_find - Find IOASID data + * @set: the IOASID set + * @ioasid: the IOASID to find + * @getter: function to call on the found object + * + * The optional getter function allows to take a reference to the found object + * under the rcu lock. The function can also check if the object is still valid: + * if @getter returns false, then the object is invalid and NULL is returned. + * + * If the IOASID exists, return the private pointer passed to ioasid_alloc. + * Private data can be NULL if not set. Return an error if the IOASID is not + * found, or if @set is not NULL and the IOASID does not belong to the set. + */ +void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, + bool (*getter)(void *)) +{ + void *priv; + struct ioasid_data *ioasid_data; + struct ioasid_allocator_data *idata; + + rcu_read_lock(); + idata = rcu_dereference(active_allocator); + ioasid_data = xa_load(&idata->xa, ioasid); + if (!ioasid_data) { + priv = ERR_PTR(-ENOENT); + goto unlock; + } + if (set && ioasid_data->set != set) { + /* data found but does not belong to the set */ + priv = ERR_PTR(-EACCES); + goto unlock; + } + /* Now IOASID and its set is verified, we can return the private data */ + priv = rcu_dereference(ioasid_data->private); + if (getter && !getter(priv)) + priv = NULL; +unlock: + rcu_read_unlock(); + + return priv; +} +EXPORT_SYMBOL_GPL(ioasid_find); + +MODULE_AUTHOR("Jean-Philippe Brucker <jean-philippe.brucker@arm.com>"); +MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>"); +MODULE_DESCRIPTION("IO Address Space ID (IOASID) allocator"); +MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f8853dbf1c4e..db7bfd4f2d20 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1665,6 +1665,36 @@ out_unlock: } EXPORT_SYMBOL_GPL(iommu_attach_device); +int iommu_cache_invalidate(struct iommu_domain *domain, struct device *dev, + struct iommu_cache_invalidate_info *inv_info) +{ + if (unlikely(!domain->ops->cache_invalidate)) + return -ENODEV; + + return domain->ops->cache_invalidate(domain, dev, inv_info); +} +EXPORT_SYMBOL_GPL(iommu_cache_invalidate); + +int iommu_sva_bind_gpasid(struct iommu_domain *domain, + struct device *dev, struct iommu_gpasid_bind_data *data) +{ + if (unlikely(!domain->ops->sva_bind_gpasid)) + return -ENODEV; + + return domain->ops->sva_bind_gpasid(domain, dev, data); +} +EXPORT_SYMBOL_GPL(iommu_sva_bind_gpasid); + +int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, + ioasid_t pasid) +{ + if (unlikely(!domain->ops->sva_unbind_gpasid)) + return -ENODEV; + + return domain->ops->sva_unbind_gpasid(dev, pasid); +} +EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid); + static void __iommu_detach_device(struct iommu_domain *domain, struct device *dev) { diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 35f3edaba0e2..d02edd2751f3 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -50,6 +50,9 @@ struct ipmmu_features { bool twobit_imttbcr_sl0; bool reserved_context; bool cache_snoop; + unsigned int ctx_offset_base; + unsigned int ctx_offset_stride; + unsigned int utlb_offset_base; }; struct ipmmu_vmsa_device { @@ -99,125 +102,49 @@ static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev) #define IM_NS_ALIAS_OFFSET 0x800 -#define IM_CTX_SIZE 0x40 - -#define IMCTR 0x0000 -#define IMCTR_TRE (1 << 17) -#define IMCTR_AFE (1 << 16) -#define IMCTR_RTSEL_MASK (3 << 4) -#define IMCTR_RTSEL_SHIFT 4 -#define IMCTR_TREN (1 << 3) -#define IMCTR_INTEN (1 << 2) -#define IMCTR_FLUSH (1 << 1) -#define IMCTR_MMUEN (1 << 0) - -#define IMCAAR 0x0004 - -#define IMTTBCR 0x0008 -#define IMTTBCR_EAE (1 << 31) -#define IMTTBCR_PMB (1 << 30) -#define IMTTBCR_SH1_NON_SHAREABLE (0 << 28) /* R-Car Gen2 only */ -#define IMTTBCR_SH1_OUTER_SHAREABLE (2 << 28) /* R-Car Gen2 only */ -#define IMTTBCR_SH1_INNER_SHAREABLE (3 << 28) /* R-Car Gen2 only */ -#define IMTTBCR_SH1_MASK (3 << 28) /* R-Car Gen2 only */ -#define IMTTBCR_ORGN1_NC (0 << 26) /* R-Car Gen2 only */ -#define IMTTBCR_ORGN1_WB_WA (1 << 26) /* R-Car Gen2 only */ -#define IMTTBCR_ORGN1_WT (2 << 26) /* R-Car Gen2 only */ -#define IMTTBCR_ORGN1_WB (3 << 26) /* R-Car Gen2 only */ -#define IMTTBCR_ORGN1_MASK (3 << 26) /* R-Car Gen2 only */ -#define IMTTBCR_IRGN1_NC (0 << 24) /* R-Car Gen2 only */ -#define IMTTBCR_IRGN1_WB_WA (1 << 24) /* R-Car Gen2 only */ -#define IMTTBCR_IRGN1_WT (2 << 24) /* R-Car Gen2 only */ -#define IMTTBCR_IRGN1_WB (3 << 24) /* R-Car Gen2 only */ -#define IMTTBCR_IRGN1_MASK (3 << 24) /* R-Car Gen2 only */ -#define IMTTBCR_TSZ1_MASK (7 << 16) -#define IMTTBCR_TSZ1_SHIFT 16 -#define IMTTBCR_SH0_NON_SHAREABLE (0 << 12) /* R-Car Gen2 only */ -#define IMTTBCR_SH0_OUTER_SHAREABLE (2 << 12) /* R-Car Gen2 only */ +/* MMU "context" registers */ +#define IMCTR 0x0000 /* R-Car Gen2/3 */ +#define IMCTR_INTEN (1 << 2) /* R-Car Gen2/3 */ +#define IMCTR_FLUSH (1 << 1) /* R-Car Gen2/3 */ +#define IMCTR_MMUEN (1 << 0) /* R-Car Gen2/3 */ + +#define IMTTBCR 0x0008 /* R-Car Gen2/3 */ +#define IMTTBCR_EAE (1 << 31) /* R-Car Gen2/3 */ #define IMTTBCR_SH0_INNER_SHAREABLE (3 << 12) /* R-Car Gen2 only */ -#define IMTTBCR_SH0_MASK (3 << 12) /* R-Car Gen2 only */ -#define IMTTBCR_ORGN0_NC (0 << 10) /* R-Car Gen2 only */ #define IMTTBCR_ORGN0_WB_WA (1 << 10) /* R-Car Gen2 only */ -#define IMTTBCR_ORGN0_WT (2 << 10) /* R-Car Gen2 only */ -#define IMTTBCR_ORGN0_WB (3 << 10) /* R-Car Gen2 only */ -#define IMTTBCR_ORGN0_MASK (3 << 10) /* R-Car Gen2 only */ -#define IMTTBCR_IRGN0_NC (0 << 8) /* R-Car Gen2 only */ #define IMTTBCR_IRGN0_WB_WA (1 << 8) /* R-Car Gen2 only */ -#define IMTTBCR_IRGN0_WT (2 << 8) /* R-Car Gen2 only */ -#define IMTTBCR_IRGN0_WB (3 << 8) /* R-Car Gen2 only */ -#define IMTTBCR_IRGN0_MASK (3 << 8) /* R-Car Gen2 only */ -#define IMTTBCR_SL0_TWOBIT_LVL_3 (0 << 6) /* R-Car Gen3 only */ -#define IMTTBCR_SL0_TWOBIT_LVL_2 (1 << 6) /* R-Car Gen3 only */ #define IMTTBCR_SL0_TWOBIT_LVL_1 (2 << 6) /* R-Car Gen3 only */ -#define IMTTBCR_SL0_LVL_2 (0 << 4) -#define IMTTBCR_SL0_LVL_1 (1 << 4) -#define IMTTBCR_TSZ0_MASK (7 << 0) -#define IMTTBCR_TSZ0_SHIFT O - -#define IMBUSCR 0x000c -#define IMBUSCR_DVM (1 << 2) -#define IMBUSCR_BUSSEL_SYS (0 << 0) -#define IMBUSCR_BUSSEL_CCI (1 << 0) -#define IMBUSCR_BUSSEL_IMCAAR (2 << 0) -#define IMBUSCR_BUSSEL_CCI_IMCAAR (3 << 0) -#define IMBUSCR_BUSSEL_MASK (3 << 0) - -#define IMTTLBR0 0x0010 -#define IMTTUBR0 0x0014 -#define IMTTLBR1 0x0018 -#define IMTTUBR1 0x001c - -#define IMSTR 0x0020 -#define IMSTR_ERRLVL_MASK (3 << 12) -#define IMSTR_ERRLVL_SHIFT 12 -#define IMSTR_ERRCODE_TLB_FORMAT (1 << 8) -#define IMSTR_ERRCODE_ACCESS_PERM (4 << 8) -#define IMSTR_ERRCODE_SECURE_ACCESS (5 << 8) -#define IMSTR_ERRCODE_MASK (7 << 8) -#define IMSTR_MHIT (1 << 4) -#define IMSTR_ABORT (1 << 2) -#define IMSTR_PF (1 << 1) -#define IMSTR_TF (1 << 0) - -#define IMMAIR0 0x0028 -#define IMMAIR1 0x002c -#define IMMAIR_ATTR_MASK 0xff -#define IMMAIR_ATTR_DEVICE 0x04 -#define IMMAIR_ATTR_NC 0x44 -#define IMMAIR_ATTR_WBRWA 0xff -#define IMMAIR_ATTR_SHIFT(n) ((n) << 3) -#define IMMAIR_ATTR_IDX_NC 0 -#define IMMAIR_ATTR_IDX_WBRWA 1 -#define IMMAIR_ATTR_IDX_DEV 2 - -#define IMELAR 0x0030 /* IMEAR on R-Car Gen2 */ -#define IMEUAR 0x0034 /* R-Car Gen3 only */ - -#define IMPCTR 0x0200 -#define IMPSTR 0x0208 -#define IMPEAR 0x020c -#define IMPMBA(n) (0x0280 + ((n) * 4)) -#define IMPMBD(n) (0x02c0 + ((n) * 4)) +#define IMTTBCR_SL0_LVL_1 (1 << 4) /* R-Car Gen2 only */ + +#define IMBUSCR 0x000c /* R-Car Gen2 only */ +#define IMBUSCR_DVM (1 << 2) /* R-Car Gen2 only */ +#define IMBUSCR_BUSSEL_MASK (3 << 0) /* R-Car Gen2 only */ + +#define IMTTLBR0 0x0010 /* R-Car Gen2/3 */ +#define IMTTUBR0 0x0014 /* R-Car Gen2/3 */ + +#define IMSTR 0x0020 /* R-Car Gen2/3 */ +#define IMSTR_MHIT (1 << 4) /* R-Car Gen2/3 */ +#define IMSTR_ABORT (1 << 2) /* R-Car Gen2/3 */ +#define IMSTR_PF (1 << 1) /* R-Car Gen2/3 */ +#define IMSTR_TF (1 << 0) /* R-Car Gen2/3 */ +#define IMMAIR0 0x0028 /* R-Car Gen2/3 */ + +#define IMELAR 0x0030 /* R-Car Gen2/3, IMEAR on R-Car Gen2 */ +#define IMEUAR 0x0034 /* R-Car Gen3 only */ + +/* uTLB registers */ #define IMUCTR(n) ((n) < 32 ? IMUCTR0(n) : IMUCTR32(n)) -#define IMUCTR0(n) (0x0300 + ((n) * 16)) -#define IMUCTR32(n) (0x0600 + (((n) - 32) * 16)) -#define IMUCTR_FIXADDEN (1 << 31) -#define IMUCTR_FIXADD_MASK (0xff << 16) -#define IMUCTR_FIXADD_SHIFT 16 -#define IMUCTR_TTSEL_MMU(n) ((n) << 4) -#define IMUCTR_TTSEL_PMB (8 << 4) -#define IMUCTR_TTSEL_MASK (15 << 4) -#define IMUCTR_FLUSH (1 << 1) -#define IMUCTR_MMUEN (1 << 0) +#define IMUCTR0(n) (0x0300 + ((n) * 16)) /* R-Car Gen2/3 */ +#define IMUCTR32(n) (0x0600 + (((n) - 32) * 16)) /* R-Car Gen3 only */ +#define IMUCTR_TTSEL_MMU(n) ((n) << 4) /* R-Car Gen2/3 */ +#define IMUCTR_FLUSH (1 << 1) /* R-Car Gen2/3 */ +#define IMUCTR_MMUEN (1 << 0) /* R-Car Gen2/3 */ #define IMUASID(n) ((n) < 32 ? IMUASID0(n) : IMUASID32(n)) -#define IMUASID0(n) (0x0308 + ((n) * 16)) -#define IMUASID32(n) (0x0608 + (((n) - 32) * 16)) -#define IMUASID_ASID8_MASK (0xff << 8) -#define IMUASID_ASID8_SHIFT 8 -#define IMUASID_ASID0_MASK (0xff << 0) -#define IMUASID_ASID0_SHIFT 0 +#define IMUASID0(n) (0x0308 + ((n) * 16)) /* R-Car Gen2/3 */ +#define IMUASID32(n) (0x0608 + (((n) - 32) * 16)) /* R-Car Gen3 only */ /* ----------------------------------------------------------------------------- * Root device handling @@ -264,29 +191,61 @@ static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset, iowrite32(data, mmu->base + offset); } +static unsigned int ipmmu_ctx_reg(struct ipmmu_vmsa_device *mmu, + unsigned int context_id, unsigned int reg) +{ + return mmu->features->ctx_offset_base + + context_id * mmu->features->ctx_offset_stride + reg; +} + +static u32 ipmmu_ctx_read(struct ipmmu_vmsa_device *mmu, + unsigned int context_id, unsigned int reg) +{ + return ipmmu_read(mmu, ipmmu_ctx_reg(mmu, context_id, reg)); +} + +static void ipmmu_ctx_write(struct ipmmu_vmsa_device *mmu, + unsigned int context_id, unsigned int reg, u32 data) +{ + ipmmu_write(mmu, ipmmu_ctx_reg(mmu, context_id, reg), data); +} + static u32 ipmmu_ctx_read_root(struct ipmmu_vmsa_domain *domain, unsigned int reg) { - return ipmmu_read(domain->mmu->root, - domain->context_id * IM_CTX_SIZE + reg); + return ipmmu_ctx_read(domain->mmu->root, domain->context_id, reg); } static void ipmmu_ctx_write_root(struct ipmmu_vmsa_domain *domain, unsigned int reg, u32 data) { - ipmmu_write(domain->mmu->root, - domain->context_id * IM_CTX_SIZE + reg, data); + ipmmu_ctx_write(domain->mmu->root, domain->context_id, reg, data); } static void ipmmu_ctx_write_all(struct ipmmu_vmsa_domain *domain, unsigned int reg, u32 data) { if (domain->mmu != domain->mmu->root) - ipmmu_write(domain->mmu, - domain->context_id * IM_CTX_SIZE + reg, data); + ipmmu_ctx_write(domain->mmu, domain->context_id, reg, data); + + ipmmu_ctx_write(domain->mmu->root, domain->context_id, reg, data); +} + +static u32 ipmmu_utlb_reg(struct ipmmu_vmsa_device *mmu, unsigned int reg) +{ + return mmu->features->utlb_offset_base + reg; +} + +static void ipmmu_imuasid_write(struct ipmmu_vmsa_device *mmu, + unsigned int utlb, u32 data) +{ + ipmmu_write(mmu, ipmmu_utlb_reg(mmu, IMUASID(utlb)), data); +} - ipmmu_write(domain->mmu->root, - domain->context_id * IM_CTX_SIZE + reg, data); +static void ipmmu_imuctr_write(struct ipmmu_vmsa_device *mmu, + unsigned int utlb, u32 data) +{ + ipmmu_write(mmu, ipmmu_utlb_reg(mmu, IMUCTR(utlb)), data); } /* ----------------------------------------------------------------------------- @@ -334,11 +293,10 @@ static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain, */ /* TODO: What should we set the ASID to ? */ - ipmmu_write(mmu, IMUASID(utlb), 0); + ipmmu_imuasid_write(mmu, utlb, 0); /* TODO: Do we need to flush the microTLB ? */ - ipmmu_write(mmu, IMUCTR(utlb), - IMUCTR_TTSEL_MMU(domain->context_id) | IMUCTR_FLUSH | - IMUCTR_MMUEN); + ipmmu_imuctr_write(mmu, utlb, IMUCTR_TTSEL_MMU(domain->context_id) | + IMUCTR_FLUSH | IMUCTR_MMUEN); mmu->utlb_ctx[utlb] = domain->context_id; } @@ -350,7 +308,7 @@ static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain, { struct ipmmu_vmsa_device *mmu = domain->mmu; - ipmmu_write(mmu, IMUCTR(utlb), 0); + ipmmu_imuctr_write(mmu, utlb, 0); mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID; } @@ -438,7 +396,7 @@ static void ipmmu_domain_setup_context(struct ipmmu_vmsa_domain *domain) /* MAIR0 */ ipmmu_ctx_write_root(domain, IMMAIR0, - domain->cfg.arm_lpae_s1_cfg.mair[0]); + domain->cfg.arm_lpae_s1_cfg.mair); /* IMBUSCR */ if (domain->mmu->features->setup_imbuscr) @@ -783,6 +741,7 @@ static int ipmmu_init_platform_device(struct device *dev, static const struct soc_device_attribute soc_rcar_gen3[] = { { .soc_id = "r8a774a1", }, + { .soc_id = "r8a774b1", }, { .soc_id = "r8a774c0", }, { .soc_id = "r8a7795", }, { .soc_id = "r8a7796", }, @@ -794,6 +753,7 @@ static const struct soc_device_attribute soc_rcar_gen3[] = { }; static const struct soc_device_attribute soc_rcar_gen3_whitelist[] = { + { .soc_id = "r8a774b1", }, { .soc_id = "r8a774c0", }, { .soc_id = "r8a7795", .revision = "ES3.*" }, { .soc_id = "r8a77965", }, @@ -985,7 +945,7 @@ static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu) /* Disable all contexts. */ for (i = 0; i < mmu->num_ctx; ++i) - ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0); + ipmmu_ctx_write(mmu, i, IMCTR, 0); } static const struct ipmmu_features ipmmu_features_default = { @@ -997,6 +957,9 @@ static const struct ipmmu_features ipmmu_features_default = { .twobit_imttbcr_sl0 = false, .reserved_context = false, .cache_snoop = true, + .ctx_offset_base = 0, + .ctx_offset_stride = 0x40, + .utlb_offset_base = 0, }; static const struct ipmmu_features ipmmu_features_rcar_gen3 = { @@ -1008,6 +971,9 @@ static const struct ipmmu_features ipmmu_features_rcar_gen3 = { .twobit_imttbcr_sl0 = true, .reserved_context = true, .cache_snoop = false, + .ctx_offset_base = 0, + .ctx_offset_stride = 0x40, + .utlb_offset_base = 0, }; static const struct of_device_id ipmmu_of_ids[] = { @@ -1018,6 +984,9 @@ static const struct of_device_id ipmmu_of_ids[] = { .compatible = "renesas,ipmmu-r8a774a1", .data = &ipmmu_features_rcar_gen3, }, { + .compatible = "renesas,ipmmu-r8a774b1", + .data = &ipmmu_features_rcar_gen3, + }, { .compatible = "renesas,ipmmu-r8a774c0", .data = &ipmmu_features_rcar_gen3, }, { @@ -1086,8 +1055,6 @@ static int ipmmu_probe(struct platform_device *pdev) mmu->num_ctx = min(IPMMU_CTX_MAX, mmu->features->number_of_contexts); - irq = platform_get_irq(pdev, 0); - /* * Determine if this IPMMU instance is a root device by checking for * the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property. @@ -1106,10 +1073,9 @@ static int ipmmu_probe(struct platform_device *pdev) /* Root devices have mandatory IRQs */ if (ipmmu_is_root(mmu)) { - if (irq < 0) { - dev_err(&pdev->dev, "no IRQ found\n"); + irq = platform_get_irq(pdev, 0); + if (irq < 0) return irq; - } ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0, dev_name(&pdev->dev), mmu); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 556beaa110b3..6fc1f5ecf91e 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -101,8 +101,6 @@ #define MTK_M4U_TO_PORT(id) ((id) & 0x1f) struct mtk_iommu_domain { - spinlock_t pgtlock; /* lock for page table */ - struct io_pgtable_cfg cfg; struct io_pgtable_ops *iop; @@ -173,13 +171,16 @@ static void mtk_iommu_tlb_flush_all(void *cookie) } } -static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size, - size_t granule, bool leaf, - void *cookie) +static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, + size_t granule, void *cookie) { struct mtk_iommu_data *data = cookie; + unsigned long flags; + int ret; + u32 tmp; for_each_m4u(data) { + spin_lock_irqsave(&data->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + REG_MMU_INV_SEL); @@ -188,23 +189,10 @@ static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size, data->base + REG_MMU_INVLD_END_A); writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE); - data->tlb_flush_active = true; - } -} - -static void mtk_iommu_tlb_sync(void *cookie) -{ - struct mtk_iommu_data *data = cookie; - int ret; - u32 tmp; - - for_each_m4u(data) { - /* Avoid timing out if there's nothing to wait for */ - if (!data->tlb_flush_active) - return; + /* tlb sync */ ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, - tmp, tmp != 0, 10, 100000); + tmp, tmp != 0, 10, 1000); if (ret) { dev_warn(data->dev, "Partial TLB flush timed out, falling back to full flush\n"); @@ -212,35 +200,24 @@ static void mtk_iommu_tlb_sync(void *cookie) } /* Clear the CPE status */ writel_relaxed(0, data->base + REG_MMU_CPE_DONE); - data->tlb_flush_active = false; + spin_unlock_irqrestore(&data->tlb_lock, flags); } } -static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size, - size_t granule, void *cookie) -{ - mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie); - mtk_iommu_tlb_sync(cookie); -} - -static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size, - size_t granule, void *cookie) -{ - mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie); - mtk_iommu_tlb_sync(cookie); -} - static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather, unsigned long iova, size_t granule, void *cookie) { - mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie); + struct mtk_iommu_data *data = cookie; + struct iommu_domain *domain = &data->m4u_dom->domain; + + iommu_iotlb_gather_add_page(domain, gather, iova, granule); } static const struct iommu_flush_ops mtk_iommu_flush_ops = { .tlb_flush_all = mtk_iommu_tlb_flush_all, - .tlb_flush_walk = mtk_iommu_tlb_flush_walk, - .tlb_flush_leaf = mtk_iommu_tlb_flush_leaf, + .tlb_flush_walk = mtk_iommu_tlb_flush_range_sync, + .tlb_flush_leaf = mtk_iommu_tlb_flush_range_sync, .tlb_add_page = mtk_iommu_tlb_flush_page_nosync, }; @@ -316,8 +293,6 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) { struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); - spin_lock_init(&dom->pgtlock); - dom->cfg = (struct io_pgtable_cfg) { .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | @@ -416,18 +391,13 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, { struct mtk_iommu_domain *dom = to_mtk_domain(domain); struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); - unsigned long flags; - int ret; /* The "4GB mode" M4U physically can not use the lower remap of Dram. */ if (data->enable_4GB) paddr |= BIT_ULL(32); - spin_lock_irqsave(&dom->pgtlock, flags); - ret = dom->iop->map(dom->iop, iova, paddr, size, prot); - spin_unlock_irqrestore(&dom->pgtlock, flags); - - return ret; + /* Synchronize with the tlb_lock */ + return dom->iop->map(dom->iop, iova, paddr, size, prot); } static size_t mtk_iommu_unmap(struct iommu_domain *domain, @@ -435,25 +405,26 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - unsigned long flags; - size_t unmapsz; - - spin_lock_irqsave(&dom->pgtlock, flags); - unmapsz = dom->iop->unmap(dom->iop, iova, size, gather); - spin_unlock_irqrestore(&dom->pgtlock, flags); - return unmapsz; + return dom->iop->unmap(dom->iop, iova, size, gather); } static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) { - mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data()); + mtk_iommu_tlb_flush_all(mtk_iommu_get_m4u_data()); } static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { - mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data()); + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + size_t length = gather->end - gather->start; + + if (gather->start == ULONG_MAX) + return; + + mtk_iommu_tlb_flush_range_sync(gather->start, length, gather->pgsize, + data); } static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, @@ -461,13 +432,9 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, { struct mtk_iommu_domain *dom = to_mtk_domain(domain); struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); - unsigned long flags; phys_addr_t pa; - spin_lock_irqsave(&dom->pgtlock, flags); pa = dom->iop->iova_to_phys(dom->iop, iova); - spin_unlock_irqrestore(&dom->pgtlock, flags); - if (data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE) pa &= ~BIT_ULL(32); @@ -733,6 +700,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (ret) return ret; + spin_lock_init(&data->tlb_lock); list_add_tail(&data->list, &m4ulist); if (!iommu_present(&platform_bus_type)) diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index fc0f16eabacd..ea949a324e33 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -57,7 +57,7 @@ struct mtk_iommu_data { struct mtk_iommu_domain *m4u_dom; struct iommu_group *m4u_group; bool enable_4GB; - bool tlb_flush_active; + spinlock_t tlb_lock; /* lock for tlb range flush */ struct iommu_device iommu; const struct mtk_iommu_plat_data *plat_data; diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 6ad5f934f00f..52f38292df5b 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -284,9 +284,9 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, /* MAIRs (stage-1 only) */ iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR0, - pgtbl_cfg.arm_lpae_s1_cfg.mair[0]); + pgtbl_cfg.arm_lpae_s1_cfg.mair); iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR1, - pgtbl_cfg.arm_lpae_s1_cfg.mair[1]); + pgtbl_cfg.arm_lpae_s1_cfg.mair >> 32); /* SCTLR */ reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | @@ -539,8 +539,8 @@ static int qcom_iommu_add_device(struct device *dev) } group = iommu_group_get_for_dev(dev); - if (IS_ERR_OR_NULL(group)) - return PTR_ERR_OR_ZERO(group); + if (IS_ERR(group)) + return PTR_ERR(group); iommu_group_put(group); iommu_device_link(&qcom_iommu->iommu, dev); diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index a4f6425fd12d..b33cdd5aad81 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -100,6 +100,7 @@ struct rk_iommu { struct device *dev; void __iomem **bases; int num_mmu; + int num_irq; struct clk_bulk_data *clocks; int num_clocks; bool reset_disabled; @@ -526,7 +527,7 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id) int i, err; err = pm_runtime_get_if_in_use(iommu->dev); - if (WARN_ON_ONCE(err <= 0)) + if (!err || WARN_ON_ONCE(err < 0)) return ret; if (WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks))) @@ -979,13 +980,13 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) if (!dma_dev) return NULL; - rk_domain = devm_kzalloc(dma_dev, sizeof(*rk_domain), GFP_KERNEL); + rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL); if (!rk_domain) return NULL; if (type == IOMMU_DOMAIN_DMA && iommu_get_dma_cookie(&rk_domain->domain)) - return NULL; + goto err_free_domain; /* * rk32xx iommus use a 2 level pagetable. @@ -1020,6 +1021,8 @@ err_free_dt: err_put_cookie: if (type == IOMMU_DOMAIN_DMA) iommu_put_dma_cookie(&rk_domain->domain); +err_free_domain: + kfree(rk_domain); return NULL; } @@ -1048,6 +1051,7 @@ static void rk_iommu_domain_free(struct iommu_domain *domain) if (domain->type == IOMMU_DOMAIN_DMA) iommu_put_dma_cookie(&rk_domain->domain); + kfree(rk_domain); } static int rk_iommu_add_device(struct device *dev) @@ -1136,7 +1140,7 @@ static int rk_iommu_probe(struct platform_device *pdev) struct rk_iommu *iommu; struct resource *res; int num_res = pdev->num_resources; - int err, i, irq; + int err, i; iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -1163,6 +1167,10 @@ static int rk_iommu_probe(struct platform_device *pdev) if (iommu->num_mmu == 0) return PTR_ERR(iommu->bases[0]); + iommu->num_irq = platform_irq_count(pdev); + if (iommu->num_irq < 0) + return iommu->num_irq; + iommu->reset_disabled = device_property_read_bool(dev, "rockchip,disable-mmu-reset"); @@ -1219,8 +1227,9 @@ static int rk_iommu_probe(struct platform_device *pdev) pm_runtime_enable(dev); - i = 0; - while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) { + for (i = 0; i < iommu->num_irq; i++) { + int irq = platform_get_irq(pdev, i); + if (irq < 0) return irq; @@ -1245,10 +1254,13 @@ err_unprepare_clocks: static void rk_iommu_shutdown(struct platform_device *pdev) { struct rk_iommu *iommu = platform_get_drvdata(pdev); - int i = 0, irq; + int i; + + for (i = 0; i < iommu->num_irq; i++) { + int irq = platform_get_irq(pdev, i); - while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) devm_free_irq(iommu->dev, irq, iommu); + } pm_runtime_force_suspend(&pdev->dev); } diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 99f85fb5a704..63a147b623e6 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -159,9 +159,9 @@ static bool smmu_dma_addr_valid(struct tegra_smmu *smmu, dma_addr_t addr) return (addr & smmu->pfn_mask) == addr; } -static dma_addr_t smmu_pde_to_dma(u32 pde) +static dma_addr_t smmu_pde_to_dma(struct tegra_smmu *smmu, u32 pde) { - return pde << 12; + return (dma_addr_t)(pde & smmu->pfn_mask) << 12; } static void smmu_flush_ptc_all(struct tegra_smmu *smmu) @@ -240,7 +240,7 @@ static inline void smmu_flush_tlb_group(struct tegra_smmu *smmu, static inline void smmu_flush(struct tegra_smmu *smmu) { - smmu_readl(smmu, SMMU_CONFIG); + smmu_readl(smmu, SMMU_PTB_ASID); } static int tegra_smmu_alloc_asid(struct tegra_smmu *smmu, unsigned int *idp) @@ -351,6 +351,20 @@ static void tegra_smmu_enable(struct tegra_smmu *smmu, unsigned int swgroup, unsigned int i; u32 value; + group = tegra_smmu_find_swgroup(smmu, swgroup); + if (group) { + value = smmu_readl(smmu, group->reg); + value &= ~SMMU_ASID_MASK; + value |= SMMU_ASID_VALUE(asid); + value |= SMMU_ASID_ENABLE; + smmu_writel(smmu, value, group->reg); + } else { + pr_warn("%s group from swgroup %u not found\n", __func__, + swgroup); + /* No point moving ahead if group was not found */ + return; + } + for (i = 0; i < smmu->soc->num_clients; i++) { const struct tegra_mc_client *client = &smmu->soc->clients[i]; @@ -361,15 +375,6 @@ static void tegra_smmu_enable(struct tegra_smmu *smmu, unsigned int swgroup, value |= BIT(client->smmu.bit); smmu_writel(smmu, value, client->smmu.reg); } - - group = tegra_smmu_find_swgroup(smmu, swgroup); - if (group) { - value = smmu_readl(smmu, group->reg); - value &= ~SMMU_ASID_MASK; - value |= SMMU_ASID_VALUE(asid); - value |= SMMU_ASID_ENABLE; - smmu_writel(smmu, value, group->reg); - } } static void tegra_smmu_disable(struct tegra_smmu *smmu, unsigned int swgroup, @@ -549,6 +554,7 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, dma_addr_t *dmap) { unsigned int pd_index = iova_pd_index(iova); + struct tegra_smmu *smmu = as->smmu; struct page *pt_page; u32 *pd; @@ -557,7 +563,7 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, return NULL; pd = page_address(as->pd); - *dmap = smmu_pde_to_dma(pd[pd_index]); + *dmap = smmu_pde_to_dma(smmu, pd[pd_index]); return tegra_smmu_pte_offset(pt_page, iova); } @@ -599,7 +605,7 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, } else { u32 *pd = page_address(as->pd); - *dmap = smmu_pde_to_dma(pd[pde]); + *dmap = smmu_pde_to_dma(smmu, pd[pde]); } return tegra_smmu_pte_offset(as->pts[pde], iova); @@ -624,7 +630,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) if (--as->count[pde] == 0) { struct tegra_smmu *smmu = as->smmu; u32 *pd = page_address(as->pd); - dma_addr_t pte_dma = smmu_pde_to_dma(pd[pde]); + dma_addr_t pte_dma = smmu_pde_to_dma(smmu, pd[pde]); tegra_smmu_set_pde(as, iova, 0); diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 2ebcf4bae0c6..315c7cc4f99d 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -153,7 +153,6 @@ static off_t viommu_get_write_desc_offset(struct viommu_dev *viommu, */ static int __viommu_sync_req(struct viommu_dev *viommu) { - int ret = 0; unsigned int len; size_t write_len; struct viommu_request *req; @@ -182,7 +181,7 @@ static int __viommu_sync_req(struct viommu_dev *viommu) kfree(req); } - return ret; + return 0; } static int viommu_sync_req(struct viommu_dev *viommu) |