diff options
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/arm-smmu-v3.c | 204 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu.c | 11 | ||||
-rw-r--r-- | drivers/iommu/dmar.c | 3 | ||||
-rw-r--r-- | drivers/iommu/intel-svm.c | 4 | ||||
-rw-r--r-- | drivers/iommu/iova.c | 4 | ||||
-rw-r--r-- | drivers/iommu/ipmmu-vmsa.c | 517 |
6 files changed, 447 insertions, 296 deletions
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index ee0c7b73cff7..f122071688fd 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -316,6 +316,7 @@ #define ARM64_TCR_TBI0_MASK 0x1UL #define CTXDESC_CD_0_AA64 (1UL << 41) +#define CTXDESC_CD_0_S (1UL << 44) #define CTXDESC_CD_0_R (1UL << 45) #define CTXDESC_CD_0_A (1UL << 46) #define CTXDESC_CD_0_ASET_SHIFT 47 @@ -377,7 +378,16 @@ #define CMDQ_SYNC_0_CS_SHIFT 12 #define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT) +#define CMDQ_SYNC_0_CS_IRQ (1UL << CMDQ_SYNC_0_CS_SHIFT) #define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT) +#define CMDQ_SYNC_0_MSH_SHIFT 22 +#define CMDQ_SYNC_0_MSH_ISH (3UL << CMDQ_SYNC_0_MSH_SHIFT) +#define CMDQ_SYNC_0_MSIATTR_SHIFT 24 +#define CMDQ_SYNC_0_MSIATTR_OIWB (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT) +#define CMDQ_SYNC_0_MSIDATA_SHIFT 32 +#define CMDQ_SYNC_0_MSIDATA_MASK 0xffffffffUL +#define CMDQ_SYNC_1_MSIADDR_SHIFT 0 +#define CMDQ_SYNC_1_MSIADDR_MASK 0xffffffffffffcUL /* Event queue */ #define EVTQ_ENT_DWORDS 4 @@ -408,20 +418,12 @@ /* High-level queue structures */ #define ARM_SMMU_POLL_TIMEOUT_US 100 -#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */ +#define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */ +#define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10 #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 -/* Until ACPICA headers cover IORT rev. C */ -#ifndef ACPI_IORT_SMMU_HISILICON_HI161X -#define ACPI_IORT_SMMU_HISILICON_HI161X 0x1 -#endif - -#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX -#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2 -#endif - static bool disable_bypass; module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); MODULE_PARM_DESC(disable_bypass, @@ -504,6 +506,10 @@ struct arm_smmu_cmdq_ent { } pri; #define CMDQ_OP_CMD_SYNC 0x46 + struct { + u32 msidata; + u64 msiaddr; + } sync; }; }; @@ -604,6 +610,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10) #define ARM_SMMU_FEAT_STALLS (1 << 11) #define ARM_SMMU_FEAT_HYP (1 << 12) +#define ARM_SMMU_FEAT_STALL_FORCE (1 << 13) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -616,6 +623,7 @@ struct arm_smmu_device { int gerr_irq; int combined_irq; + atomic_t sync_nr; unsigned long ias; /* IPA */ unsigned long oas; /* PA */ @@ -634,6 +642,8 @@ struct arm_smmu_device { struct arm_smmu_strtab_cfg strtab_cfg; + u32 sync_count; + /* IOMMU core code handle */ struct iommu_device iommu; }; @@ -757,26 +767,29 @@ static void queue_inc_prod(struct arm_smmu_queue *q) * Wait for the SMMU to consume items. If drain is true, wait until the queue * is empty. Otherwise, wait until there is at least one free slot. */ -static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) +static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe) { ktime_t timeout; - unsigned int delay = 1; + unsigned int delay = 1, spin_cnt = 0; - /* Wait longer if it's queue drain */ - timeout = ktime_add_us(ktime_get(), drain ? - ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US : + /* Wait longer if it's a CMD_SYNC */ + timeout = ktime_add_us(ktime_get(), sync ? + ARM_SMMU_CMDQ_SYNC_TIMEOUT_US : ARM_SMMU_POLL_TIMEOUT_US); - while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) { + while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) { if (ktime_compare(ktime_get(), timeout) > 0) return -ETIMEDOUT; if (wfe) { wfe(); - } else { + } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) { cpu_relax(); + continue; + } else { udelay(delay); delay *= 2; + spin_cnt = 0; } } @@ -878,7 +891,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) } break; case CMDQ_OP_CMD_SYNC: - cmd[0] |= CMDQ_SYNC_0_CS_SEV; + if (ent->sync.msiaddr) + cmd[0] |= CMDQ_SYNC_0_CS_IRQ; + else + cmd[0] |= CMDQ_SYNC_0_CS_SEV; + cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB; + cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT; + cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK; break; default: return -ENOENT; @@ -936,13 +955,22 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); } +static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd) +{ + struct arm_smmu_queue *q = &smmu->cmdq.q; + bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + + while (queue_insert_raw(q, cmd) == -ENOSPC) { + if (queue_poll_cons(q, false, wfe)) + dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); + } +} + static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent) { u64 cmd[CMDQ_ENT_DWORDS]; unsigned long flags; - bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); - struct arm_smmu_queue *q = &smmu->cmdq.q; if (arm_smmu_cmdq_build_cmd(cmd, ent)) { dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", @@ -951,14 +979,76 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, } spin_lock_irqsave(&smmu->cmdq.lock, flags); - while (queue_insert_raw(q, cmd) == -ENOSPC) { - if (queue_poll_cons(q, false, wfe)) - dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); - } + arm_smmu_cmdq_insert_cmd(smmu, cmd); + spin_unlock_irqrestore(&smmu->cmdq.lock, flags); +} - if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe)) - dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); +/* + * The difference between val and sync_idx is bounded by the maximum size of + * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic. + */ +static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx) +{ + ktime_t timeout; + u32 val; + + timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US); + val = smp_cond_load_acquire(&smmu->sync_count, + (int)(VAL - sync_idx) >= 0 || + !ktime_before(ktime_get(), timeout)); + + return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0; +} + +static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu) +{ + u64 cmd[CMDQ_ENT_DWORDS]; + unsigned long flags; + struct arm_smmu_cmdq_ent ent = { + .opcode = CMDQ_OP_CMD_SYNC, + .sync = { + .msidata = atomic_inc_return_relaxed(&smmu->sync_nr), + .msiaddr = virt_to_phys(&smmu->sync_count), + }, + }; + + arm_smmu_cmdq_build_cmd(cmd, &ent); + + spin_lock_irqsave(&smmu->cmdq.lock, flags); + arm_smmu_cmdq_insert_cmd(smmu, cmd); spin_unlock_irqrestore(&smmu->cmdq.lock, flags); + + return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata); +} + +static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) +{ + u64 cmd[CMDQ_ENT_DWORDS]; + unsigned long flags; + bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC }; + int ret; + + arm_smmu_cmdq_build_cmd(cmd, &ent); + + spin_lock_irqsave(&smmu->cmdq.lock, flags); + arm_smmu_cmdq_insert_cmd(smmu, cmd); + ret = queue_poll_cons(&smmu->cmdq.q, true, wfe); + spin_unlock_irqrestore(&smmu->cmdq.lock, flags); + + return ret; +} + +static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) +{ + int ret; + bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) && + (smmu->features & ARM_SMMU_FEAT_COHERENCY); + + ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu) + : __arm_smmu_cmdq_issue_sync(smmu); + if (ret) + dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); } /* Context descriptor manipulation functions */ @@ -996,6 +1086,11 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE | CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT | CTXDESC_CD_0_V; + + /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ + if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) + val |= CTXDESC_CD_0_S; + cfg->cdptr[0] = cpu_to_le64(val); val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT; @@ -1029,8 +1124,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) }; arm_smmu_cmdq_issue_cmd(smmu, &cmd); - cmd.opcode = CMDQ_OP_CMD_SYNC; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_sync(smmu); } static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, @@ -1094,7 +1188,11 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING << STRTAB_STE_1_SHCFG_SHIFT); dst[2] = 0; /* Nuke the VMID */ - if (ste_live) + /* + * The SMMU can perform negative caching, so we must sync + * the STE regardless of whether the old value was live. + */ + if (smmu) arm_smmu_sync_ste_for_sid(smmu, sid); return; } @@ -1112,7 +1210,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, #endif STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT); - if (smmu->features & ARM_SMMU_FEAT_STALLS) + if (smmu->features & ARM_SMMU_FEAT_STALLS && + !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK @@ -1275,12 +1374,6 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) return IRQ_HANDLED; } -static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev) -{ - /* We don't actually use CMD_SYNC interrupts for anything */ - return IRQ_HANDLED; -} - static int arm_smmu_device_disable(struct arm_smmu_device *smmu); static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) @@ -1313,10 +1406,8 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) if (active & GERROR_MSI_EVTQ_ABT_ERR) dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); - if (active & GERROR_MSI_CMDQ_ABT_ERR) { + if (active & GERROR_MSI_CMDQ_ABT_ERR) dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); - arm_smmu_cmdq_sync_handler(irq, smmu->dev); - } if (active & GERROR_PRIQ_ABT_ERR) dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); @@ -1345,17 +1436,13 @@ static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) { arm_smmu_gerror_handler(irq, dev); - arm_smmu_cmdq_sync_handler(irq, dev); return IRQ_WAKE_THREAD; } /* IO_PGTABLE API */ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) { - struct arm_smmu_cmdq_ent cmd; - - cmd.opcode = CMDQ_OP_CMD_SYNC; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_sync(smmu); } static void arm_smmu_tlb_sync(void *cookie) @@ -2157,6 +2244,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) { int ret; + atomic_set(&smmu->sync_nr, 0); ret = arm_smmu_init_queues(smmu); if (ret) return ret; @@ -2275,15 +2363,6 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) dev_warn(smmu->dev, "failed to enable evtq irq\n"); } - irq = smmu->cmdq.q.irq; - if (irq) { - ret = devm_request_irq(smmu->dev, irq, - arm_smmu_cmdq_sync_handler, 0, - "arm-smmu-v3-cmdq-sync", smmu); - if (ret < 0) - dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n"); - } - irq = smmu->gerr_irq; if (irq) { ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler, @@ -2409,8 +2488,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) /* Invalidate any cached configuration */ cmd.opcode = CMDQ_OP_CFGI_ALL; arm_smmu_cmdq_issue_cmd(smmu, &cmd); - cmd.opcode = CMDQ_OP_CMD_SYNC; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_sync(smmu); /* Invalidate any stale TLB entries */ if (smmu->features & ARM_SMMU_FEAT_HYP) { @@ -2420,8 +2498,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; arm_smmu_cmdq_issue_cmd(smmu, &cmd); - cmd.opcode = CMDQ_OP_CMD_SYNC; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_sync(smmu); /* Event queue */ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); @@ -2542,13 +2619,14 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) * register, but warn on mismatch. */ if (!!(reg & IDR0_COHACC) != coherent) - dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n", + dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n", coherent ? "true" : "false"); switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) { - case IDR0_STALL_MODEL_STALL: - /* Fallthrough */ case IDR0_STALL_MODEL_FORCE: + smmu->features |= ARM_SMMU_FEAT_STALL_FORCE; + /* Fallthrough */ + case IDR0_STALL_MODEL_STALL: smmu->features |= ARM_SMMU_FEAT_STALLS; } @@ -2675,7 +2753,7 @@ static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; break; - case ACPI_IORT_SMMU_HISILICON_HI161X: + case ACPI_IORT_SMMU_V3_HISILICON_HI161X: smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; break; } @@ -2793,10 +2871,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (irq > 0) smmu->priq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "cmdq-sync"); - if (irq > 0) - smmu->cmdq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "gerror"); if (irq > 0) smmu->gerr_irq = irq; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e4a82d70d446..78d4c6b8f1ba 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -59,6 +59,7 @@ #define ARM_MMU500_ACTLR_CPRE (1 << 1) #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26) +#define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10) #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8) #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ @@ -119,14 +120,6 @@ enum arm_smmu_implementation { CAVIUM_SMMUV2, }; -/* Until ACPICA headers cover IORT rev. C */ -#ifndef ACPI_IORT_SMMU_CORELINK_MMU401 -#define ACPI_IORT_SMMU_CORELINK_MMU401 0x4 -#endif -#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX -#define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5 -#endif - struct arm_smmu_s2cr { struct iommu_group *group; int count; @@ -1616,7 +1609,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) * Allow unmatched Stream IDs to allocate bypass * TLB entries for reduced latency. */ - reg |= ARM_MMU500_ACR_SMTNMB_TLBEN; + reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN; writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR); } diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 1ea7cd537873..9a7ffd13c7f0 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1679,7 +1679,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id) raw_spin_lock_irqsave(&iommu->register_lock, flag); } - writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG); + writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO, + iommu->reg + DMAR_FSTS_REG); unlock_exit: raw_spin_unlock_irqrestore(&iommu->register_lock, flag); diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index f6697e55c2d4..ed1cf7c5a43b 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -292,7 +292,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ int pasid_max; int ret; - if (WARN_ON(!iommu)) + if (WARN_ON(!iommu || !iommu->pasid_table)) return -EINVAL; if (dev_is_pci(dev)) { @@ -458,6 +458,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { + svm->iommu->pasid_table[svm->pasid].val = 0; + wmb(); idr_remove(&svm->iommu->pasid_idr, svm->pasid); if (svm->mm) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 84bda3a4dafc..466aaa8ba841 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -542,7 +542,7 @@ void queue_iova(struct iova_domain *iovad, unsigned long pfn, unsigned long pages, unsigned long data) { - struct iova_fq *fq = get_cpu_ptr(iovad->fq); + struct iova_fq *fq = raw_cpu_ptr(iovad->fq); unsigned long flags; unsigned idx; @@ -572,8 +572,6 @@ void queue_iova(struct iova_domain *iovad, if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) mod_timer(&iovad->fq_timer, jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); - - put_cpu_ptr(iovad->fq); } EXPORT_SYMBOL_GPL(queue_iova); diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index af8140054273..8dce3a9de9d8 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -19,30 +19,49 @@ #include <linux/iommu.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_iommu.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/sizes.h> #include <linux/slab.h> +#include <linux/sys_soc.h> #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) #include <asm/dma-iommu.h> #include <asm/pgalloc.h> +#else +#define arm_iommu_create_mapping(...) NULL +#define arm_iommu_attach_device(...) -ENODEV +#define arm_iommu_release_mapping(...) do {} while (0) +#define arm_iommu_detach_device(...) do {} while (0) #endif #include "io-pgtable.h" -#define IPMMU_CTX_MAX 1 +#define IPMMU_CTX_MAX 8 + +struct ipmmu_features { + bool use_ns_alias_offset; + bool has_cache_leaf_nodes; + unsigned int number_of_contexts; + bool setup_imbuscr; + bool twobit_imttbcr_sl0; +}; struct ipmmu_vmsa_device { struct device *dev; void __iomem *base; struct iommu_device iommu; - + struct ipmmu_vmsa_device *root; + const struct ipmmu_features *features; unsigned int num_utlbs; + unsigned int num_ctx; spinlock_t lock; /* Protects ctx and domains[] */ DECLARE_BITMAP(ctx, IPMMU_CTX_MAX); struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX]; + struct iommu_group *group; struct dma_iommu_mapping *mapping; }; @@ -57,18 +76,12 @@ struct ipmmu_vmsa_domain { spinlock_t lock; /* Protects mappings */ }; -struct ipmmu_vmsa_iommu_priv { - struct ipmmu_vmsa_device *mmu; - struct device *dev; - struct list_head list; -}; - static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) { return container_of(dom, struct ipmmu_vmsa_domain, io_domain); } -static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) +static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev) { return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL; } @@ -133,6 +146,10 @@ static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) #define IMTTBCR_TSZ0_MASK (7 << 0) #define IMTTBCR_TSZ0_SHIFT O +#define IMTTBCR_SL0_TWOBIT_LVL_3 (0 << 6) +#define IMTTBCR_SL0_TWOBIT_LVL_2 (1 << 6) +#define IMTTBCR_SL0_TWOBIT_LVL_1 (2 << 6) + #define IMBUSCR 0x000c #define IMBUSCR_DVM (1 << 2) #define IMBUSCR_BUSSEL_SYS (0 << 0) @@ -194,6 +211,36 @@ static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) #define IMUASID_ASID0_SHIFT 0 /* ----------------------------------------------------------------------------- + * Root device handling + */ + +static struct platform_driver ipmmu_driver; + +static bool ipmmu_is_root(struct ipmmu_vmsa_device *mmu) +{ + return mmu->root == mmu; +} + +static int __ipmmu_check_device(struct device *dev, void *data) +{ + struct ipmmu_vmsa_device *mmu = dev_get_drvdata(dev); + struct ipmmu_vmsa_device **rootp = data; + + if (ipmmu_is_root(mmu)) + *rootp = mmu; + + return 0; +} + +static struct ipmmu_vmsa_device *ipmmu_find_root(void) +{ + struct ipmmu_vmsa_device *root = NULL; + + return driver_for_each_device(&ipmmu_driver.driver, NULL, &root, + __ipmmu_check_device) == 0 ? root : NULL; +} + +/* ----------------------------------------------------------------------------- * Read/Write Access */ @@ -208,15 +255,29 @@ static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset, iowrite32(data, mmu->base + offset); } -static u32 ipmmu_ctx_read(struct ipmmu_vmsa_domain *domain, unsigned int reg) +static u32 ipmmu_ctx_read_root(struct ipmmu_vmsa_domain *domain, + unsigned int reg) { - return ipmmu_read(domain->mmu, domain->context_id * IM_CTX_SIZE + reg); + return ipmmu_read(domain->mmu->root, + domain->context_id * IM_CTX_SIZE + reg); } -static void ipmmu_ctx_write(struct ipmmu_vmsa_domain *domain, unsigned int reg, - u32 data) +static void ipmmu_ctx_write_root(struct ipmmu_vmsa_domain *domain, + unsigned int reg, u32 data) { - ipmmu_write(domain->mmu, domain->context_id * IM_CTX_SIZE + reg, data); + ipmmu_write(domain->mmu->root, + domain->context_id * IM_CTX_SIZE + reg, data); +} + +static void ipmmu_ctx_write_all(struct ipmmu_vmsa_domain *domain, + unsigned int reg, u32 data) +{ + if (domain->mmu != domain->mmu->root) + ipmmu_write(domain->mmu, + domain->context_id * IM_CTX_SIZE + reg, data); + + ipmmu_write(domain->mmu->root, + domain->context_id * IM_CTX_SIZE + reg, data); } /* ----------------------------------------------------------------------------- @@ -228,7 +289,7 @@ static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain) { unsigned int count = 0; - while (ipmmu_ctx_read(domain, IMCTR) & IMCTR_FLUSH) { + while (ipmmu_ctx_read_root(domain, IMCTR) & IMCTR_FLUSH) { cpu_relax(); if (++count == TLB_LOOP_TIMEOUT) { dev_err_ratelimited(domain->mmu->dev, @@ -243,9 +304,9 @@ static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain) { u32 reg; - reg = ipmmu_ctx_read(domain, IMCTR); + reg = ipmmu_ctx_read_root(domain, IMCTR); reg |= IMCTR_FLUSH; - ipmmu_ctx_write(domain, IMCTR, reg); + ipmmu_ctx_write_all(domain, IMCTR, reg); ipmmu_tlb_sync(domain); } @@ -313,11 +374,12 @@ static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu, spin_lock_irqsave(&mmu->lock, flags); - ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX); - if (ret != IPMMU_CTX_MAX) { + ret = find_first_zero_bit(mmu->ctx, mmu->num_ctx); + if (ret != mmu->num_ctx) { mmu->domains[ret] = domain; set_bit(ret, mmu->ctx); - } + } else + ret = -EBUSY; spin_unlock_irqrestore(&mmu->lock, flags); @@ -340,6 +402,7 @@ static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu, static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) { u64 ttbr; + u32 tmp; int ret; /* @@ -364,51 +427,59 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) * TODO: Add support for coherent walk through CCI with DVM and remove * cache handling. For now, delegate it to the io-pgtable code. */ - domain->cfg.iommu_dev = domain->mmu->dev; + domain->cfg.iommu_dev = domain->mmu->root->dev; /* * Find an unused context. */ - ret = ipmmu_domain_allocate_context(domain->mmu, domain); - if (ret == IPMMU_CTX_MAX) - return -EBUSY; + ret = ipmmu_domain_allocate_context(domain->mmu->root, domain); + if (ret < 0) + return ret; domain->context_id = ret; domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, domain); if (!domain->iop) { - ipmmu_domain_free_context(domain->mmu, domain->context_id); + ipmmu_domain_free_context(domain->mmu->root, + domain->context_id); return -EINVAL; } /* TTBR0 */ ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; - ipmmu_ctx_write(domain, IMTTLBR0, ttbr); - ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32); + ipmmu_ctx_write_root(domain, IMTTLBR0, ttbr); + ipmmu_ctx_write_root(domain, IMTTUBR0, ttbr >> 32); /* * TTBCR * We use long descriptors with inner-shareable WBWA tables and allocate * the whole 32-bit VA space to TTBR0. */ - ipmmu_ctx_write(domain, IMTTBCR, IMTTBCR_EAE | - IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA | - IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1); + if (domain->mmu->features->twobit_imttbcr_sl0) + tmp = IMTTBCR_SL0_TWOBIT_LVL_1; + else + tmp = IMTTBCR_SL0_LVL_1; + + ipmmu_ctx_write_root(domain, IMTTBCR, IMTTBCR_EAE | + IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA | + IMTTBCR_IRGN0_WB_WA | tmp); /* MAIR0 */ - ipmmu_ctx_write(domain, IMMAIR0, domain->cfg.arm_lpae_s1_cfg.mair[0]); + ipmmu_ctx_write_root(domain, IMMAIR0, + domain->cfg.arm_lpae_s1_cfg.mair[0]); /* IMBUSCR */ - ipmmu_ctx_write(domain, IMBUSCR, - ipmmu_ctx_read(domain, IMBUSCR) & - ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK)); + if (domain->mmu->features->setup_imbuscr) + ipmmu_ctx_write_root(domain, IMBUSCR, + ipmmu_ctx_read_root(domain, IMBUSCR) & + ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK)); /* * IMSTR * Clear all interrupt flags. */ - ipmmu_ctx_write(domain, IMSTR, ipmmu_ctx_read(domain, IMSTR)); + ipmmu_ctx_write_root(domain, IMSTR, ipmmu_ctx_read_root(domain, IMSTR)); /* * IMCTR @@ -417,7 +488,8 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) * software management as we have no use for it. Flush the TLB as * required when modifying the context registers. */ - ipmmu_ctx_write(domain, IMCTR, IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN); + ipmmu_ctx_write_all(domain, IMCTR, + IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN); return 0; } @@ -430,9 +502,9 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) * * TODO: Is TLB flush really needed ? */ - ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH); + ipmmu_ctx_write_all(domain, IMCTR, IMCTR_FLUSH); ipmmu_tlb_sync(domain); - ipmmu_domain_free_context(domain->mmu, domain->context_id); + ipmmu_domain_free_context(domain->mmu->root, domain->context_id); } /* ----------------------------------------------------------------------------- @@ -446,11 +518,11 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) u32 status; u32 iova; - status = ipmmu_ctx_read(domain, IMSTR); + status = ipmmu_ctx_read_root(domain, IMSTR); if (!(status & err_mask)) return IRQ_NONE; - iova = ipmmu_ctx_read(domain, IMEAR); + iova = ipmmu_ctx_read_root(domain, IMEAR); /* * Clear the error status flags. Unlike traditional interrupt flag @@ -458,7 +530,7 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) * seems to require 0. The error address register must be read before, * otherwise its value will be 0. */ - ipmmu_ctx_write(domain, IMSTR, 0); + ipmmu_ctx_write_root(domain, IMSTR, 0); /* Log fatal errors. */ if (status & IMSTR_MHIT) @@ -499,7 +571,7 @@ static irqreturn_t ipmmu_irq(int irq, void *dev) /* * Check interrupts for all active contexts. */ - for (i = 0; i < IPMMU_CTX_MAX; i++) { + for (i = 0; i < mmu->num_ctx; i++) { if (!mmu->domains[i]) continue; if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED) @@ -528,6 +600,27 @@ static struct iommu_domain *__ipmmu_domain_alloc(unsigned type) return &domain->io_domain; } +static struct iommu_domain *ipmmu_domain_alloc(unsigned type) +{ + struct iommu_domain *io_domain = NULL; + + switch (type) { + case IOMMU_DOMAIN_UNMANAGED: + io_domain = __ipmmu_domain_alloc(type); + break; + + case IOMMU_DOMAIN_DMA: + io_domain = __ipmmu_domain_alloc(type); + if (io_domain && iommu_get_dma_cookie(io_domain)) { + kfree(io_domain); + io_domain = NULL; + } + break; + } + + return io_domain; +} + static void ipmmu_domain_free(struct iommu_domain *io_domain) { struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); @@ -536,6 +629,7 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain) * Free the domain resources. We assume that all devices have already * been detached. */ + iommu_put_dma_cookie(io_domain); ipmmu_domain_destroy_context(domain); free_io_pgtable_ops(domain->iop); kfree(domain); @@ -544,15 +638,14 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain) static int ipmmu_attach_device(struct iommu_domain *io_domain, struct device *dev) { - struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); struct iommu_fwspec *fwspec = dev->iommu_fwspec; - struct ipmmu_vmsa_device *mmu = priv->mmu; + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); unsigned long flags; unsigned int i; int ret = 0; - if (!priv || !priv->mmu) { + if (!mmu) { dev_err(dev, "Cannot attach to IPMMU\n"); return -ENXIO; } @@ -563,6 +656,13 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, /* The domain hasn't been used yet, initialize it. */ domain->mmu = mmu; ret = ipmmu_domain_init_context(domain); + if (ret < 0) { + dev_err(dev, "Unable to initialize IPMMU context\n"); + domain->mmu = NULL; + } else { + dev_info(dev, "Using IPMMU context %u\n", + domain->context_id); + } } else if (domain->mmu != mmu) { /* * Something is wrong, we can't attach two devices using @@ -641,62 +741,53 @@ static int ipmmu_init_platform_device(struct device *dev, struct of_phandle_args *args) { struct platform_device *ipmmu_pdev; - struct ipmmu_vmsa_iommu_priv *priv; ipmmu_pdev = of_find_device_by_node(args->np); if (!ipmmu_pdev) return -ENODEV; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->mmu = platform_get_drvdata(ipmmu_pdev); - priv->dev = dev; - dev->iommu_fwspec->iommu_priv = priv; + dev->iommu_fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev); return 0; } +static bool ipmmu_slave_whitelist(struct device *dev) +{ + /* By default, do not allow use of IPMMU */ + return false; +} + +static const struct soc_device_attribute soc_r8a7795[] = { + { .soc_id = "r8a7795", }, + { /* sentinel */ } +}; + static int ipmmu_of_xlate(struct device *dev, struct of_phandle_args *spec) { + /* For R-Car Gen3 use a white list to opt-in slave devices */ + if (soc_device_match(soc_r8a7795) && !ipmmu_slave_whitelist(dev)) + return -ENODEV; + iommu_fwspec_add_ids(dev, spec->args, 1); /* Initialize once - xlate() will call multiple times */ - if (to_priv(dev)) + if (to_ipmmu(dev)) return 0; return ipmmu_init_platform_device(dev, spec); } -#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) - -static struct iommu_domain *ipmmu_domain_alloc(unsigned type) -{ - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - - return __ipmmu_domain_alloc(type); -} - -static int ipmmu_add_device(struct device *dev) +static int ipmmu_init_arm_mapping(struct device *dev) { - struct ipmmu_vmsa_device *mmu = NULL; + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); struct iommu_group *group; int ret; - /* - * Only let through devices that have been verified in xlate() - */ - if (!to_priv(dev)) - return -ENODEV; - /* Create a device group and add the device to it. */ group = iommu_group_alloc(); if (IS_ERR(group)) { dev_err(dev, "Failed to allocate IOMMU group\n"); - ret = PTR_ERR(group); - goto error; + return PTR_ERR(group); } ret = iommu_group_add_device(group, dev); @@ -704,8 +795,7 @@ static int ipmmu_add_device(struct device *dev) if (ret < 0) { dev_err(dev, "Failed to add device to IPMMU group\n"); - group = NULL; - goto error; + return ret; } /* @@ -717,7 +807,6 @@ static int ipmmu_add_device(struct device *dev) * - Make the mapping size configurable ? We currently use a 2GB mapping * at a 1GB offset to ensure that NULL VAs will fault. */ - mmu = to_priv(dev)->mmu; if (!mmu->mapping) { struct dma_iommu_mapping *mapping; @@ -742,144 +831,58 @@ static int ipmmu_add_device(struct device *dev) return 0; error: - if (mmu) + iommu_group_remove_device(dev); + if (mmu->mapping) arm_iommu_release_mapping(mmu->mapping); - if (!IS_ERR_OR_NULL(group)) - iommu_group_remove_device(dev); - return ret; } -static void ipmmu_remove_device(struct device *dev) -{ - arm_iommu_detach_device(dev); - iommu_group_remove_device(dev); -} - -static const struct iommu_ops ipmmu_ops = { - .domain_alloc = ipmmu_domain_alloc, - .domain_free = ipmmu_domain_free, - .attach_dev = ipmmu_attach_device, - .detach_dev = ipmmu_detach_device, - .map = ipmmu_map, - .unmap = ipmmu_unmap, - .map_sg = default_iommu_map_sg, - .iova_to_phys = ipmmu_iova_to_phys, - .add_device = ipmmu_add_device, - .remove_device = ipmmu_remove_device, - .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, - .of_xlate = ipmmu_of_xlate, -}; - -#endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */ - -#ifdef CONFIG_IOMMU_DMA - -static DEFINE_SPINLOCK(ipmmu_slave_devices_lock); -static LIST_HEAD(ipmmu_slave_devices); - -static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type) -{ - struct iommu_domain *io_domain = NULL; - - switch (type) { - case IOMMU_DOMAIN_UNMANAGED: - io_domain = __ipmmu_domain_alloc(type); - break; - - case IOMMU_DOMAIN_DMA: - io_domain = __ipmmu_domain_alloc(type); - if (io_domain) - iommu_get_dma_cookie(io_domain); - break; - } - - return io_domain; -} - -static void ipmmu_domain_free_dma(struct iommu_domain *io_domain) -{ - switch (io_domain->type) { - case IOMMU_DOMAIN_DMA: - iommu_put_dma_cookie(io_domain); - /* fall-through */ - default: - ipmmu_domain_free(io_domain); - break; - } -} - -static int ipmmu_add_device_dma(struct device *dev) +static int ipmmu_add_device(struct device *dev) { struct iommu_group *group; /* * Only let through devices that have been verified in xlate() */ - if (!to_priv(dev)) + if (!to_ipmmu(dev)) return -ENODEV; + if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)) + return ipmmu_init_arm_mapping(dev); + group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return PTR_ERR(group); - spin_lock(&ipmmu_slave_devices_lock); - list_add(&to_priv(dev)->list, &ipmmu_slave_devices); - spin_unlock(&ipmmu_slave_devices_lock); + iommu_group_put(group); return 0; } -static void ipmmu_remove_device_dma(struct device *dev) +static void ipmmu_remove_device(struct device *dev) { - struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); - - spin_lock(&ipmmu_slave_devices_lock); - list_del(&priv->list); - spin_unlock(&ipmmu_slave_devices_lock); - + arm_iommu_detach_device(dev); iommu_group_remove_device(dev); } -static struct device *ipmmu_find_sibling_device(struct device *dev) -{ - struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); - struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL; - bool found = false; - - spin_lock(&ipmmu_slave_devices_lock); - - list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) { - if (priv == sibling_priv) - continue; - if (sibling_priv->mmu == priv->mmu) { - found = true; - break; - } - } - - spin_unlock(&ipmmu_slave_devices_lock); - - return found ? sibling_priv->dev : NULL; -} - -static struct iommu_group *ipmmu_find_group_dma(struct device *dev) +static struct iommu_group *ipmmu_find_group(struct device *dev) { + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); struct iommu_group *group; - struct device *sibling; - sibling = ipmmu_find_sibling_device(dev); - if (sibling) - group = iommu_group_get(sibling); - if (!sibling || IS_ERR(group)) - group = generic_device_group(dev); + if (mmu->group) + return iommu_group_ref_get(mmu->group); + + group = iommu_group_alloc(); + if (!IS_ERR(group)) + mmu->group = group; return group; } static const struct iommu_ops ipmmu_ops = { - .domain_alloc = ipmmu_domain_alloc_dma, - .domain_free = ipmmu_domain_free_dma, + .domain_alloc = ipmmu_domain_alloc, + .domain_free = ipmmu_domain_free, .attach_dev = ipmmu_attach_device, .detach_dev = ipmmu_detach_device, .map = ipmmu_map, @@ -888,15 +891,13 @@ static const struct iommu_ops ipmmu_ops = { .iotlb_sync = ipmmu_iotlb_sync, .map_sg = default_iommu_map_sg, .iova_to_phys = ipmmu_iova_to_phys, - .add_device = ipmmu_add_device_dma, - .remove_device = ipmmu_remove_device_dma, - .device_group = ipmmu_find_group_dma, + .add_device = ipmmu_add_device, + .remove_device = ipmmu_remove_device, + .device_group = ipmmu_find_group, .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, .of_xlate = ipmmu_of_xlate, }; -#endif /* CONFIG_IOMMU_DMA */ - /* ----------------------------------------------------------------------------- * Probe/remove and init */ @@ -906,10 +907,40 @@ static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu) unsigned int i; /* Disable all contexts. */ - for (i = 0; i < 4; ++i) + for (i = 0; i < mmu->num_ctx; ++i) ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0); } +static const struct ipmmu_features ipmmu_features_default = { + .use_ns_alias_offset = true, + .has_cache_leaf_nodes = false, + .number_of_contexts = 1, /* software only tested with one context */ + .setup_imbuscr = true, + .twobit_imttbcr_sl0 = false, +}; + +static const struct ipmmu_features ipmmu_features_r8a7795 = { + .use_ns_alias_offset = false, + .has_cache_leaf_nodes = true, + .number_of_contexts = 8, + .setup_imbuscr = false, + .twobit_imttbcr_sl0 = true, +}; + +static const struct of_device_id ipmmu_of_ids[] = { + { + .compatible = "renesas,ipmmu-vmsa", + .data = &ipmmu_features_default, + }, { + .compatible = "renesas,ipmmu-r8a7795", + .data = &ipmmu_features_r8a7795, + }, { + /* Terminator */ + }, +}; + +MODULE_DEVICE_TABLE(of, ipmmu_of_ids); + static int ipmmu_probe(struct platform_device *pdev) { struct ipmmu_vmsa_device *mmu; @@ -927,6 +958,8 @@ static int ipmmu_probe(struct platform_device *pdev) mmu->num_utlbs = 32; spin_lock_init(&mmu->lock); bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); + mmu->features = of_device_get_match_data(&pdev->dev); + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); /* Map I/O memory and request IRQ. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -946,34 +979,71 @@ static int ipmmu_probe(struct platform_device *pdev) * Offset the registers base unconditionally to point to the non-secure * alias space for now. */ - mmu->base += IM_NS_ALIAS_OFFSET; + if (mmu->features->use_ns_alias_offset) + mmu->base += IM_NS_ALIAS_OFFSET; + + mmu->num_ctx = min_t(unsigned int, IPMMU_CTX_MAX, + mmu->features->number_of_contexts); irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "no IRQ found\n"); - return irq; - } - ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0, - dev_name(&pdev->dev), mmu); - if (ret < 0) { - dev_err(&pdev->dev, "failed to request IRQ %d\n", irq); - return ret; - } + /* + * Determine if this IPMMU instance is a root device by checking for + * the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property. + */ + if (!mmu->features->has_cache_leaf_nodes || + !of_find_property(pdev->dev.of_node, "renesas,ipmmu-main", NULL)) + mmu->root = mmu; + else + mmu->root = ipmmu_find_root(); - ipmmu_device_reset(mmu); + /* + * Wait until the root device has been registered for sure. + */ + if (!mmu->root) + return -EPROBE_DEFER; + + /* Root devices have mandatory IRQs */ + if (ipmmu_is_root(mmu)) { + if (irq < 0) { + dev_err(&pdev->dev, "no IRQ found\n"); + return irq; + } - ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, - dev_name(&pdev->dev)); - if (ret) - return ret; + ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0, + dev_name(&pdev->dev), mmu); + if (ret < 0) { + dev_err(&pdev->dev, "failed to request IRQ %d\n", irq); + return ret; + } - iommu_device_set_ops(&mmu->iommu, &ipmmu_ops); - iommu_device_set_fwnode(&mmu->iommu, &pdev->dev.of_node->fwnode); + ipmmu_device_reset(mmu); + } - ret = iommu_device_register(&mmu->iommu); - if (ret) - return ret; + /* + * Register the IPMMU to the IOMMU subsystem in the following cases: + * - R-Car Gen2 IPMMU (all devices registered) + * - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device) + */ + if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) { + ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, + dev_name(&pdev->dev)); + if (ret) + return ret; + + iommu_device_set_ops(&mmu->iommu, &ipmmu_ops); + iommu_device_set_fwnode(&mmu->iommu, + &pdev->dev.of_node->fwnode); + + ret = iommu_device_register(&mmu->iommu); + if (ret) + return ret; + +#if defined(CONFIG_IOMMU_DMA) + if (!iommu_present(&platform_bus_type)) + bus_set_iommu(&platform_bus_type, &ipmmu_ops); +#endif + } /* * We can't create the ARM mapping here as it requires the bus to have @@ -993,20 +1063,13 @@ static int ipmmu_remove(struct platform_device *pdev) iommu_device_sysfs_remove(&mmu->iommu); iommu_device_unregister(&mmu->iommu); -#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) arm_iommu_release_mapping(mmu->mapping); -#endif ipmmu_device_reset(mmu); return 0; } -static const struct of_device_id ipmmu_of_ids[] = { - { .compatible = "renesas,ipmmu-vmsa", }, - { } -}; - static struct platform_driver ipmmu_driver = { .driver = { .name = "ipmmu-vmsa", @@ -1018,15 +1081,22 @@ static struct platform_driver ipmmu_driver = { static int __init ipmmu_init(void) { + static bool setup_done; int ret; + if (setup_done) + return 0; + ret = platform_driver_register(&ipmmu_driver); if (ret < 0) return ret; +#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) if (!iommu_present(&platform_bus_type)) bus_set_iommu(&platform_bus_type, &ipmmu_ops); +#endif + setup_done = true; return 0; } @@ -1038,6 +1108,19 @@ static void __exit ipmmu_exit(void) subsys_initcall(ipmmu_init); module_exit(ipmmu_exit); +#ifdef CONFIG_IOMMU_DMA +static int __init ipmmu_vmsa_iommu_of_setup(struct device_node *np) +{ + ipmmu_init(); + return 0; +} + +IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa", + ipmmu_vmsa_iommu_of_setup); +IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795", + ipmmu_vmsa_iommu_of_setup); +#endif + MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>"); MODULE_LICENSE("GPL v2"); |