summaryrefslogtreecommitdiff
path: root/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c')
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c4896
1 files changed, 4896 insertions, 0 deletions
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
new file mode 100644
index 000000000000..d16d35c78c06
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -0,0 +1,4896 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IOMMU API for ARM architected SMMUv3 implementations.
+ *
+ * Copyright (C) 2015 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ *
+ * This driver is powered by bad coffee and bombay mix.
+ */
+
+#include <linux/acpi.h>
+#include <linux/acpi_iort.h>
+#include <linux/bitops.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io-pgtable.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+#include <linux/platform_device.h>
+#include <linux/string_choices.h>
+#include <kunit/visibility.h>
+#include <uapi/linux/iommufd.h>
+
+#include "arm-smmu-v3.h"
+#include "../../dma-iommu.h"
+
+static bool disable_msipolling;
+module_param(disable_msipolling, bool, 0444);
+MODULE_PARM_DESC(disable_msipolling,
+ "Disable MSI-based polling for CMD_SYNC completion.");
+
+static const struct iommu_ops arm_smmu_ops;
+static struct iommu_dirty_ops arm_smmu_dirty_ops;
+
+enum arm_smmu_msi_index {
+ EVTQ_MSI_INDEX,
+ GERROR_MSI_INDEX,
+ PRIQ_MSI_INDEX,
+ ARM_SMMU_MAX_MSIS,
+};
+
+#define NUM_ENTRY_QWORDS 8
+static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64));
+static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64));
+
+static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
+ [EVTQ_MSI_INDEX] = {
+ ARM_SMMU_EVTQ_IRQ_CFG0,
+ ARM_SMMU_EVTQ_IRQ_CFG1,
+ ARM_SMMU_EVTQ_IRQ_CFG2,
+ },
+ [GERROR_MSI_INDEX] = {
+ ARM_SMMU_GERROR_IRQ_CFG0,
+ ARM_SMMU_GERROR_IRQ_CFG1,
+ ARM_SMMU_GERROR_IRQ_CFG2,
+ },
+ [PRIQ_MSI_INDEX] = {
+ ARM_SMMU_PRIQ_IRQ_CFG0,
+ ARM_SMMU_PRIQ_IRQ_CFG1,
+ ARM_SMMU_PRIQ_IRQ_CFG2,
+ },
+};
+
+struct arm_smmu_option_prop {
+ u32 opt;
+ const char *prop;
+};
+
+DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
+DEFINE_MUTEX(arm_smmu_asid_lock);
+
+static struct arm_smmu_option_prop arm_smmu_options[] = {
+ { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
+ { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
+ { 0, NULL},
+};
+
+static const char * const event_str[] = {
+ [EVT_ID_BAD_STREAMID_CONFIG] = "C_BAD_STREAMID",
+ [EVT_ID_STE_FETCH_FAULT] = "F_STE_FETCH",
+ [EVT_ID_BAD_STE_CONFIG] = "C_BAD_STE",
+ [EVT_ID_STREAM_DISABLED_FAULT] = "F_STREAM_DISABLED",
+ [EVT_ID_BAD_SUBSTREAMID_CONFIG] = "C_BAD_SUBSTREAMID",
+ [EVT_ID_CD_FETCH_FAULT] = "F_CD_FETCH",
+ [EVT_ID_BAD_CD_CONFIG] = "C_BAD_CD",
+ [EVT_ID_TRANSLATION_FAULT] = "F_TRANSLATION",
+ [EVT_ID_ADDR_SIZE_FAULT] = "F_ADDR_SIZE",
+ [EVT_ID_ACCESS_FAULT] = "F_ACCESS",
+ [EVT_ID_PERMISSION_FAULT] = "F_PERMISSION",
+ [EVT_ID_VMS_FETCH_FAULT] = "F_VMS_FETCH",
+};
+
+static const char * const event_class_str[] = {
+ [0] = "CD fetch",
+ [1] = "Stage 1 translation table fetch",
+ [2] = "Input address caused fault",
+ [3] = "Reserved",
+};
+
+static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
+
+static void parse_driver_options(struct arm_smmu_device *smmu)
+{
+ int i = 0;
+
+ do {
+ if (of_property_read_bool(smmu->dev->of_node,
+ arm_smmu_options[i].prop)) {
+ smmu->options |= arm_smmu_options[i].opt;
+ dev_notice(smmu->dev, "option %s\n",
+ arm_smmu_options[i].prop);
+ }
+ } while (arm_smmu_options[++i].opt);
+}
+
+/* Low-level queue manipulation functions */
+static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
+{
+ u32 space, prod, cons;
+
+ prod = Q_IDX(q, q->prod);
+ cons = Q_IDX(q, q->cons);
+
+ if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
+ space = (1 << q->max_n_shift) - (prod - cons);
+ else
+ space = cons - prod;
+
+ return space >= n;
+}
+
+static bool queue_full(struct arm_smmu_ll_queue *q)
+{
+ return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
+ Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
+}
+
+static bool queue_empty(struct arm_smmu_ll_queue *q)
+{
+ return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
+ Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
+}
+
+static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
+{
+ return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
+ (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
+ ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
+ (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
+}
+
+static void queue_sync_cons_out(struct arm_smmu_queue *q)
+{
+ /*
+ * Ensure that all CPU accesses (reads and writes) to the queue
+ * are complete before we update the cons pointer.
+ */
+ __iomb();
+ writel_relaxed(q->llq.cons, q->cons_reg);
+}
+
+static void queue_inc_cons(struct arm_smmu_ll_queue *q)
+{
+ u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
+ q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
+}
+
+static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
+{
+ struct arm_smmu_ll_queue *llq = &q->llq;
+
+ if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
+ return;
+
+ llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
+ Q_IDX(llq, llq->cons);
+ queue_sync_cons_out(q);
+}
+
+static int queue_sync_prod_in(struct arm_smmu_queue *q)
+{
+ u32 prod;
+ int ret = 0;
+
+ /*
+ * We can't use the _relaxed() variant here, as we must prevent
+ * speculative reads of the queue before we have determined that
+ * prod has indeed moved.
+ */
+ prod = readl(q->prod_reg);
+
+ if (Q_OVF(prod) != Q_OVF(q->llq.prod))
+ ret = -EOVERFLOW;
+
+ q->llq.prod = prod;
+ return ret;
+}
+
+static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
+{
+ u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
+ return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
+}
+
+static void queue_poll_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_queue_poll *qp)
+{
+ qp->delay = 1;
+ qp->spin_cnt = 0;
+ qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+ qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
+}
+
+static int queue_poll(struct arm_smmu_queue_poll *qp)
+{
+ if (ktime_compare(ktime_get(), qp->timeout) > 0)
+ return -ETIMEDOUT;
+
+ if (qp->wfe) {
+ wfe();
+ } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
+ cpu_relax();
+ } else {
+ udelay(qp->delay);
+ qp->delay *= 2;
+ qp->spin_cnt = 0;
+ }
+
+ return 0;
+}
+
+static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
+{
+ int i;
+
+ for (i = 0; i < n_dwords; ++i)
+ *dst++ = cpu_to_le64(*src++);
+}
+
+static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
+{
+ int i;
+
+ for (i = 0; i < n_dwords; ++i)
+ *dst++ = le64_to_cpu(*src++);
+}
+
+static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
+{
+ if (queue_empty(&q->llq))
+ return -EAGAIN;
+
+ queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
+ queue_inc_cons(&q->llq);
+ queue_sync_cons_out(q);
+ return 0;
+}
+
+/* High-level queue accessors */
+static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
+{
+ memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
+ cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
+
+ switch (ent->opcode) {
+ case CMDQ_OP_TLBI_EL2_ALL:
+ case CMDQ_OP_TLBI_NSNH_ALL:
+ break;
+ case CMDQ_OP_PREFETCH_CFG:
+ cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
+ break;
+ case CMDQ_OP_CFGI_CD:
+ cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
+ fallthrough;
+ case CMDQ_OP_CFGI_STE:
+ cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
+ cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
+ break;
+ case CMDQ_OP_CFGI_CD_ALL:
+ cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
+ break;
+ case CMDQ_OP_CFGI_ALL:
+ /* Cover the entire SID range */
+ cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
+ break;
+ case CMDQ_OP_TLBI_NH_VA:
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
+ fallthrough;
+ case CMDQ_OP_TLBI_EL2_VA:
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
+ cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
+ cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
+ cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
+ cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
+ break;
+ case CMDQ_OP_TLBI_S2_IPA:
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
+ cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
+ cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
+ cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
+ cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
+ break;
+ case CMDQ_OP_TLBI_NH_ASID:
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
+ fallthrough;
+ case CMDQ_OP_TLBI_NH_ALL:
+ case CMDQ_OP_TLBI_S12_VMALL:
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
+ break;
+ case CMDQ_OP_TLBI_EL2_ASID:
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
+ break;
+ case CMDQ_OP_ATC_INV:
+ cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
+ cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
+ cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
+ cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
+ cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
+ cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
+ break;
+ case CMDQ_OP_PRI_RESP:
+ cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
+ cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
+ cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
+ cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
+ switch (ent->pri.resp) {
+ case PRI_RESP_DENY:
+ case PRI_RESP_FAIL:
+ case PRI_RESP_SUCC:
+ break;
+ default:
+ return -EINVAL;
+ }
+ cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
+ break;
+ case CMDQ_OP_RESUME:
+ cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
+ cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
+ cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
+ break;
+ case CMDQ_OP_CMD_SYNC:
+ if (ent->sync.msiaddr) {
+ cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
+ cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
+ } else {
+ cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
+ }
+ cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
+ cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_ent *ent)
+{
+ struct arm_smmu_cmdq *cmdq = NULL;
+
+ if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq)
+ cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent);
+
+ return cmdq ?: &smmu->cmdq;
+}
+
+static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
+{
+ if (cmdq == &smmu->cmdq)
+ return false;
+
+ return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV;
+}
+
+static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq, u32 prod)
+{
+ struct arm_smmu_queue *q = &cmdq->q;
+ struct arm_smmu_cmdq_ent ent = {
+ .opcode = CMDQ_OP_CMD_SYNC,
+ };
+
+ /*
+ * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
+ * payload, so the write will zero the entire command on that platform.
+ */
+ if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
+ ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
+ q->ent_dwords * 8;
+ }
+
+ arm_smmu_cmdq_build_cmd(cmd, &ent);
+ if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
+ u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
+}
+
+void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
+{
+ static const char * const cerror_str[] = {
+ [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
+ [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
+ [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
+ [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
+ };
+ struct arm_smmu_queue *q = &cmdq->q;
+
+ int i;
+ u64 cmd[CMDQ_ENT_DWORDS];
+ u32 cons = readl_relaxed(q->cons_reg);
+ u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
+ struct arm_smmu_cmdq_ent cmd_sync = {
+ .opcode = CMDQ_OP_CMD_SYNC,
+ };
+
+ dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
+ idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
+
+ switch (idx) {
+ case CMDQ_ERR_CERROR_ABT_IDX:
+ dev_err(smmu->dev, "retrying command fetch\n");
+ return;
+ case CMDQ_ERR_CERROR_NONE_IDX:
+ return;
+ case CMDQ_ERR_CERROR_ATC_INV_IDX:
+ /*
+ * ATC Invalidation Completion timeout. CONS is still pointing
+ * at the CMD_SYNC. Attempt to complete other pending commands
+ * by repeating the CMD_SYNC, though we might well end up back
+ * here since the ATC invalidation may still be pending.
+ */
+ return;
+ case CMDQ_ERR_CERROR_ILL_IDX:
+ default:
+ break;
+ }
+
+ /*
+ * We may have concurrent producers, so we need to be careful
+ * not to touch any of the shadow cmdq state.
+ */
+ queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
+ dev_err(smmu->dev, "skipping command in error state:\n");
+ for (i = 0; i < ARRAY_SIZE(cmd); ++i)
+ dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
+
+ /* Convert the erroneous command into a CMD_SYNC */
+ arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
+ if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
+ u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
+
+ queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
+}
+
+static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
+{
+ __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq);
+}
+
+/*
+ * Command queue locking.
+ * This is a form of bastardised rwlock with the following major changes:
+ *
+ * - The only LOCK routines are exclusive_trylock() and shared_lock().
+ * Neither have barrier semantics, and instead provide only a control
+ * dependency.
+ *
+ * - The UNLOCK routines are supplemented with shared_tryunlock(), which
+ * fails if the caller appears to be the last lock holder (yes, this is
+ * racy). All successful UNLOCK routines have RELEASE semantics.
+ */
+static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
+{
+ int val;
+
+ /*
+ * We can try to avoid the cmpxchg() loop by simply incrementing the
+ * lock counter. When held in exclusive state, the lock counter is set
+ * to INT_MIN so these increments won't hurt as the value will remain
+ * negative.
+ */
+ if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
+ return;
+
+ do {
+ val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
+ } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
+}
+
+static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
+{
+ (void)atomic_dec_return_release(&cmdq->lock);
+}
+
+static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
+{
+ if (atomic_read(&cmdq->lock) == 1)
+ return false;
+
+ arm_smmu_cmdq_shared_unlock(cmdq);
+ return true;
+}
+
+#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
+({ \
+ bool __ret; \
+ local_irq_save(flags); \
+ __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
+ if (!__ret) \
+ local_irq_restore(flags); \
+ __ret; \
+})
+
+#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
+({ \
+ atomic_set_release(&cmdq->lock, 0); \
+ local_irq_restore(flags); \
+})
+
+
+/*
+ * Command queue insertion.
+ * This is made fiddly by our attempts to achieve some sort of scalability
+ * since there is one queue shared amongst all of the CPUs in the system. If
+ * you like mixed-size concurrency, dependency ordering and relaxed atomics,
+ * then you'll *love* this monstrosity.
+ *
+ * The basic idea is to split the queue up into ranges of commands that are
+ * owned by a given CPU; the owner may not have written all of the commands
+ * itself, but is responsible for advancing the hardware prod pointer when
+ * the time comes. The algorithm is roughly:
+ *
+ * 1. Allocate some space in the queue. At this point we also discover
+ * whether the head of the queue is currently owned by another CPU,
+ * or whether we are the owner.
+ *
+ * 2. Write our commands into our allocated slots in the queue.
+ *
+ * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
+ *
+ * 4. If we are an owner:
+ * a. Wait for the previous owner to finish.
+ * b. Mark the queue head as unowned, which tells us the range
+ * that we are responsible for publishing.
+ * c. Wait for all commands in our owned range to become valid.
+ * d. Advance the hardware prod pointer.
+ * e. Tell the next owner we've finished.
+ *
+ * 5. If we are inserting a CMD_SYNC (we may or may not have been an
+ * owner), then we need to stick around until it has completed:
+ * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
+ * to clear the first 4 bytes.
+ * b. Otherwise, we spin waiting for the hardware cons pointer to
+ * advance past our command.
+ *
+ * The devil is in the details, particularly the use of locking for handling
+ * SYNC completion and freeing up space in the queue before we think that it is
+ * full.
+ */
+static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
+ u32 sprod, u32 eprod, bool set)
+{
+ u32 swidx, sbidx, ewidx, ebidx;
+ struct arm_smmu_ll_queue llq = {
+ .max_n_shift = cmdq->q.llq.max_n_shift,
+ .prod = sprod,
+ };
+
+ ewidx = BIT_WORD(Q_IDX(&llq, eprod));
+ ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
+
+ while (llq.prod != eprod) {
+ unsigned long mask;
+ atomic_long_t *ptr;
+ u32 limit = BITS_PER_LONG;
+
+ swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
+ sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
+
+ ptr = &cmdq->valid_map[swidx];
+
+ if ((swidx == ewidx) && (sbidx < ebidx))
+ limit = ebidx;
+
+ mask = GENMASK(limit - 1, sbidx);
+
+ /*
+ * The valid bit is the inverse of the wrap bit. This means
+ * that a zero-initialised queue is invalid and, after marking
+ * all entries as valid, they become invalid again when we
+ * wrap.
+ */
+ if (set) {
+ atomic_long_xor(mask, ptr);
+ } else { /* Poll */
+ unsigned long valid;
+
+ valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
+ atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
+ }
+
+ llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
+ }
+}
+
+/* Mark all entries in the range [sprod, eprod) as valid */
+static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
+ u32 sprod, u32 eprod)
+{
+ __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
+}
+
+/* Wait for all entries in the range [sprod, eprod) to become valid */
+static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
+ u32 sprod, u32 eprod)
+{
+ __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
+}
+
+/* Wait for the command queue to become non-full */
+static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_ll_queue *llq)
+{
+ unsigned long flags;
+ struct arm_smmu_queue_poll qp;
+ int ret = 0;
+
+ /*
+ * Try to update our copy of cons by grabbing exclusive cmdq access. If
+ * that fails, spin until somebody else updates it for us.
+ */
+ if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
+ WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
+ arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
+ llq->val = READ_ONCE(cmdq->q.llq.val);
+ return 0;
+ }
+
+ queue_poll_init(smmu, &qp);
+ do {
+ llq->val = READ_ONCE(cmdq->q.llq.val);
+ if (!queue_full(llq))
+ break;
+
+ ret = queue_poll(&qp);
+ } while (!ret);
+
+ return ret;
+}
+
+/*
+ * Wait until the SMMU signals a CMD_SYNC completion MSI.
+ * Must be called with the cmdq lock held in some capacity.
+ */
+static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_ll_queue *llq)
+{
+ int ret = 0;
+ struct arm_smmu_queue_poll qp;
+ u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
+
+ queue_poll_init(smmu, &qp);
+
+ /*
+ * The MSI won't generate an event, since it's being written back
+ * into the command queue.
+ */
+ qp.wfe = false;
+ smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
+ llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
+ return ret;
+}
+
+/*
+ * Wait until the SMMU cons index passes llq->prod.
+ * Must be called with the cmdq lock held in some capacity.
+ */
+static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_ll_queue *llq)
+{
+ struct arm_smmu_queue_poll qp;
+ u32 prod = llq->prod;
+ int ret = 0;
+
+ queue_poll_init(smmu, &qp);
+ llq->val = READ_ONCE(cmdq->q.llq.val);
+ do {
+ if (queue_consumed(llq, prod))
+ break;
+
+ ret = queue_poll(&qp);
+
+ /*
+ * This needs to be a readl() so that our subsequent call
+ * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
+ *
+ * Specifically, we need to ensure that we observe all
+ * shared_lock()s by other CMD_SYNCs that share our owner,
+ * so that a failing call to tryunlock() means that we're
+ * the last one out and therefore we can safely advance
+ * cmdq->q.llq.cons. Roughly speaking:
+ *
+ * CPU 0 CPU1 CPU2 (us)
+ *
+ * if (sync)
+ * shared_lock();
+ *
+ * dma_wmb();
+ * set_valid_map();
+ *
+ * if (owner) {
+ * poll_valid_map();
+ * <control dependency>
+ * writel(prod_reg);
+ *
+ * readl(cons_reg);
+ * tryunlock();
+ *
+ * Requires us to see CPU 0's shared_lock() acquisition.
+ */
+ llq->cons = readl(cmdq->q.cons_reg);
+ } while (!ret);
+
+ return ret;
+}
+
+static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_ll_queue *llq)
+{
+ if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
+ !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
+ return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
+
+ return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
+}
+
+static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
+ u32 prod, int n)
+{
+ int i;
+ struct arm_smmu_ll_queue llq = {
+ .max_n_shift = cmdq->q.llq.max_n_shift,
+ .prod = prod,
+ };
+
+ for (i = 0; i < n; ++i) {
+ u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
+
+ prod = queue_inc_prod_n(&llq, i);
+ queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
+ }
+}
+
+/*
+ * This is the actual insertion function, and provides the following
+ * ordering guarantees to callers:
+ *
+ * - There is a dma_wmb() before publishing any commands to the queue.
+ * This can be relied upon to order prior writes to data structures
+ * in memory (such as a CD or an STE) before the command.
+ *
+ * - On completion of a CMD_SYNC, there is a control dependency.
+ * This can be relied upon to order subsequent writes to memory (e.g.
+ * freeing an IOVA) after completion of the CMD_SYNC.
+ *
+ * - Command insertion is totally ordered, so if two CPUs each race to
+ * insert their own list of commands then all of the commands from one
+ * CPU will appear before any of the commands from the other CPU.
+ */
+int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
+ bool sync)
+{
+ u64 cmd_sync[CMDQ_ENT_DWORDS];
+ u32 prod;
+ unsigned long flags;
+ bool owner;
+ struct arm_smmu_ll_queue llq, head;
+ int ret = 0;
+
+ llq.max_n_shift = cmdq->q.llq.max_n_shift;
+
+ /* 1. Allocate some space in the queue */
+ local_irq_save(flags);
+ llq.val = READ_ONCE(cmdq->q.llq.val);
+ do {
+ u64 old;
+
+ while (!queue_has_space(&llq, n + sync)) {
+ local_irq_restore(flags);
+ if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
+ dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
+ local_irq_save(flags);
+ }
+
+ head.cons = llq.cons;
+ head.prod = queue_inc_prod_n(&llq, n + sync) |
+ CMDQ_PROD_OWNED_FLAG;
+
+ old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
+ if (old == llq.val)
+ break;
+
+ llq.val = old;
+ } while (1);
+ owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
+ head.prod &= ~CMDQ_PROD_OWNED_FLAG;
+ llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
+
+ /*
+ * 2. Write our commands into the queue
+ * Dependency ordering from the cmpxchg() loop above.
+ */
+ arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
+ if (sync) {
+ prod = queue_inc_prod_n(&llq, n);
+ arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
+ queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
+
+ /*
+ * In order to determine completion of our CMD_SYNC, we must
+ * ensure that the queue can't wrap twice without us noticing.
+ * We achieve that by taking the cmdq lock as shared before
+ * marking our slot as valid.
+ */
+ arm_smmu_cmdq_shared_lock(cmdq);
+ }
+
+ /* 3. Mark our slots as valid, ensuring commands are visible first */
+ dma_wmb();
+ arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
+
+ /* 4. If we are the owner, take control of the SMMU hardware */
+ if (owner) {
+ /* a. Wait for previous owner to finish */
+ atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
+
+ /* b. Stop gathering work by clearing the owned flag */
+ prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
+ &cmdq->q.llq.atomic.prod);
+ prod &= ~CMDQ_PROD_OWNED_FLAG;
+
+ /*
+ * c. Wait for any gathered work to be written to the queue.
+ * Note that we read our own entries so that we have the control
+ * dependency required by (d).
+ */
+ arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
+
+ /*
+ * d. Advance the hardware prod pointer
+ * Control dependency ordering from the entries becoming valid.
+ */
+ writel_relaxed(prod, cmdq->q.prod_reg);
+
+ /*
+ * e. Tell the next owner we're done
+ * Make sure we've updated the hardware first, so that we don't
+ * race to update prod and potentially move it backwards.
+ */
+ atomic_set_release(&cmdq->owner_prod, prod);
+ }
+
+ /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
+ if (sync) {
+ llq.prod = queue_inc_prod_n(&llq, n);
+ ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
+ if (ret) {
+ dev_err_ratelimited(smmu->dev,
+ "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
+ llq.prod,
+ readl_relaxed(cmdq->q.prod_reg),
+ readl_relaxed(cmdq->q.cons_reg));
+ }
+
+ /*
+ * Try to unlock the cmdq lock. This will fail if we're the last
+ * reader, in which case we can safely update cmdq->q.llq.cons
+ */
+ if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
+ WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
+ arm_smmu_cmdq_shared_unlock(cmdq);
+ }
+ }
+
+ local_irq_restore(flags);
+ return ret;
+}
+
+static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_ent *ent,
+ bool sync)
+{
+ u64 cmd[CMDQ_ENT_DWORDS];
+
+ if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
+ dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
+ ent->opcode);
+ return -EINVAL;
+ }
+
+ return arm_smmu_cmdq_issue_cmdlist(
+ smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync);
+}
+
+static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_ent *ent)
+{
+ return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
+}
+
+static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_ent *ent)
+{
+ return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
+}
+
+static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds,
+ struct arm_smmu_cmdq_ent *ent)
+{
+ cmds->num = 0;
+ cmds->cmdq = arm_smmu_get_cmdq(smmu, ent);
+}
+
+static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds,
+ struct arm_smmu_cmdq_ent *cmd)
+{
+ bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
+ bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
+ (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
+ int index;
+
+ if (force_sync || unsupported_cmd) {
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, true);
+ arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
+ }
+
+ if (cmds->num == CMDQ_BATCH_ENTRIES) {
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, false);
+ arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
+ }
+
+ index = cmds->num * CMDQ_ENT_DWORDS;
+ if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
+ dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
+ cmd->opcode);
+ return;
+ }
+
+ cmds->num++;
+}
+
+static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds)
+{
+ return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, true);
+}
+
+static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
+ struct iommu_page_response *resp)
+{
+ struct arm_smmu_cmdq_ent cmd = {0};
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ int sid = master->streams[0].id;
+
+ if (WARN_ON(!master->stall_enabled))
+ return;
+
+ cmd.opcode = CMDQ_OP_RESUME;
+ cmd.resume.sid = sid;
+ cmd.resume.stag = resp->grpid;
+ switch (resp->code) {
+ case IOMMU_PAGE_RESP_INVALID:
+ case IOMMU_PAGE_RESP_FAILURE:
+ cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
+ break;
+ case IOMMU_PAGE_RESP_SUCCESS:
+ cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
+ break;
+ default:
+ break;
+ }
+
+ arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
+ /*
+ * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
+ * RESUME consumption guarantees that the stalled transaction will be
+ * terminated... at some point in the future. PRI_RESP is fire and
+ * forget.
+ */
+}
+
+/* Context descriptor manipulation functions */
+void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
+{
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
+ CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
+ .tlbi.asid = asid,
+ };
+
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+}
+
+/*
+ * Based on the value of ent report which bits of the STE the HW will access. It
+ * would be nice if this was complete according to the spec, but minimally it
+ * has to capture the bits this driver uses.
+ */
+VISIBLE_IF_KUNIT
+void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
+{
+ unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
+
+ used_bits[0] = cpu_to_le64(STRTAB_STE_0_V);
+ if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V)))
+ return;
+
+ used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
+
+ /* S1 translates */
+ if (cfg & BIT(0)) {
+ used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
+ STRTAB_STE_0_S1CTXPTR_MASK |
+ STRTAB_STE_0_S1CDMAX);
+ used_bits[1] |=
+ cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
+ STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
+ STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
+ STRTAB_STE_1_EATS | STRTAB_STE_1_MEV);
+ used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
+
+ /*
+ * See 13.5 Summary of attribute/permission configuration fields
+ * for the SHCFG behavior.
+ */
+ if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
+ STRTAB_STE_1_S1DSS_BYPASS)
+ used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
+ }
+
+ /* S2 translates */
+ if (cfg & BIT(1)) {
+ used_bits[1] |=
+ cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS |
+ STRTAB_STE_1_SHCFG | STRTAB_STE_1_MEV);
+ used_bits[2] |=
+ cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
+ STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
+ STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S |
+ STRTAB_STE_2_S2R);
+ used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
+ }
+
+ if (cfg == STRTAB_STE_0_CFG_BYPASS)
+ used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
+
+/*
+ * Figure out if we can do a hitless update of entry to become target. Returns a
+ * bit mask where 1 indicates that qword needs to be set disruptively.
+ * unused_update is an intermediate value of entry that has unused bits set to
+ * their new values.
+ */
+static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
+ const __le64 *entry, const __le64 *target,
+ __le64 *unused_update)
+{
+ __le64 target_used[NUM_ENTRY_QWORDS] = {};
+ __le64 cur_used[NUM_ENTRY_QWORDS] = {};
+ u8 used_qword_diff = 0;
+ unsigned int i;
+
+ writer->ops->get_used(entry, cur_used);
+ writer->ops->get_used(target, target_used);
+
+ for (i = 0; i != NUM_ENTRY_QWORDS; i++) {
+ /*
+ * Check that masks are up to date, the make functions are not
+ * allowed to set a bit to 1 if the used function doesn't say it
+ * is used.
+ */
+ WARN_ON_ONCE(target[i] & ~target_used[i]);
+
+ /* Bits can change because they are not currently being used */
+ unused_update[i] = (entry[i] & cur_used[i]) |
+ (target[i] & ~cur_used[i]);
+ /*
+ * Each bit indicates that a used bit in a qword needs to be
+ * changed after unused_update is applied.
+ */
+ if ((unused_update[i] & target_used[i]) != target[i])
+ used_qword_diff |= 1 << i;
+ }
+ return used_qword_diff;
+}
+
+static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
+ const __le64 *target, unsigned int start,
+ unsigned int len)
+{
+ bool changed = false;
+ unsigned int i;
+
+ for (i = start; len != 0; len--, i++) {
+ if (entry[i] != target[i]) {
+ WRITE_ONCE(entry[i], target[i]);
+ changed = true;
+ }
+ }
+
+ if (changed)
+ writer->ops->sync(writer);
+ return changed;
+}
+
+/*
+ * Update the STE/CD to the target configuration. The transition from the
+ * current entry to the target entry takes place over multiple steps that
+ * attempts to make the transition hitless if possible. This function takes care
+ * not to create a situation where the HW can perceive a corrupted entry. HW is
+ * only required to have a 64 bit atomicity with stores from the CPU, while
+ * entries are many 64 bit values big.
+ *
+ * The difference between the current value and the target value is analyzed to
+ * determine which of three updates are required - disruptive, hitless or no
+ * change.
+ *
+ * In the most general disruptive case we can make any update in three steps:
+ * - Disrupting the entry (V=0)
+ * - Fill now unused qwords, execpt qword 0 which contains V
+ * - Make qword 0 have the final value and valid (V=1) with a single 64
+ * bit store
+ *
+ * However this disrupts the HW while it is happening. There are several
+ * interesting cases where a STE/CD can be updated without disturbing the HW
+ * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
+ * because the used bits don't intersect. We can detect this by calculating how
+ * many 64 bit values need update after adjusting the unused bits and skip the
+ * V=0 process. This relies on the IGNORED behavior described in the
+ * specification.
+ */
+VISIBLE_IF_KUNIT
+void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
+ const __le64 *target)
+{
+ __le64 unused_update[NUM_ENTRY_QWORDS];
+ u8 used_qword_diff;
+
+ used_qword_diff =
+ arm_smmu_entry_qword_diff(writer, entry, target, unused_update);
+ if (hweight8(used_qword_diff) == 1) {
+ /*
+ * Only one qword needs its used bits to be changed. This is a
+ * hitless update, update all bits the current STE/CD is
+ * ignoring to their new values, then update a single "critical
+ * qword" to change the STE/CD and finally 0 out any bits that
+ * are now unused in the target configuration.
+ */
+ unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
+
+ /*
+ * Skip writing unused bits in the critical qword since we'll be
+ * writing it in the next step anyways. This can save a sync
+ * when the only change is in that qword.
+ */
+ unused_update[critical_qword_index] =
+ entry[critical_qword_index];
+ entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS);
+ entry_set(writer, entry, target, critical_qword_index, 1);
+ entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
+ } else if (used_qword_diff) {
+ /*
+ * At least two qwords need their inuse bits to be changed. This
+ * requires a breaking update, zero the V bit, write all qwords
+ * but 0, then set qword 0
+ */
+ unused_update[0] = 0;
+ entry_set(writer, entry, unused_update, 0, 1);
+ entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
+ entry_set(writer, entry, target, 0, 1);
+ } else {
+ /*
+ * No inuse bit changed. Sanity check that all unused bits are 0
+ * in the entry. The target was already sanity checked by
+ * compute_qword_diff().
+ */
+ WARN_ON_ONCE(
+ entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
+ }
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
+
+static void arm_smmu_sync_cd(struct arm_smmu_master *master,
+ int ssid, bool leaf)
+{
+ size_t i;
+ struct arm_smmu_cmdq_batch cmds;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = CMDQ_OP_CFGI_CD,
+ .cfgi = {
+ .ssid = ssid,
+ .leaf = leaf,
+ },
+ };
+
+ arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
+ for (i = 0; i < master->num_streams; i++) {
+ cmd.cfgi.sid = master->streams[i].id;
+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ }
+
+ arm_smmu_cmdq_batch_submit(smmu, &cmds);
+}
+
+static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst,
+ dma_addr_t l2ptr_dma)
+{
+ u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V;
+
+ /* The HW has 64 bit atomicity with stores to the L2 CD table */
+ WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
+}
+
+static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src)
+{
+ return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK;
+}
+
+struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
+ u32 ssid)
+{
+ struct arm_smmu_cdtab_l2 *l2;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
+
+ if (!arm_smmu_cdtab_allocated(cd_table))
+ return NULL;
+
+ if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+ return &cd_table->linear.table[ssid];
+
+ l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)];
+ if (!l2)
+ return NULL;
+ return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)];
+}
+
+static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
+ u32 ssid)
+{
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
+ struct arm_smmu_device *smmu = master->smmu;
+
+ might_sleep();
+ iommu_group_mutex_assert(master->dev);
+
+ if (!arm_smmu_cdtab_allocated(cd_table)) {
+ if (arm_smmu_alloc_cd_tables(master))
+ return NULL;
+ }
+
+ if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
+ unsigned int idx = arm_smmu_cdtab_l1_idx(ssid);
+ struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx];
+
+ if (!*l2ptr) {
+ dma_addr_t l2ptr_dma;
+
+ *l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr),
+ &l2ptr_dma, GFP_KERNEL);
+ if (!*l2ptr)
+ return NULL;
+
+ arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx],
+ l2ptr_dma);
+ /* An invalid L1CD can be cached */
+ arm_smmu_sync_cd(master, ssid, false);
+ }
+ }
+ return arm_smmu_get_cd_ptr(master, ssid);
+}
+
+struct arm_smmu_cd_writer {
+ struct arm_smmu_entry_writer writer;
+ unsigned int ssid;
+};
+
+VISIBLE_IF_KUNIT
+void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
+{
+ used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
+ if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V)))
+ return;
+ memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));
+
+ /*
+ * If EPD0 is set by the make function it means
+ * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
+ */
+ if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
+ used_bits[0] &= ~cpu_to_le64(
+ CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
+ CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
+ CTXDESC_CD_0_TCR_SH0);
+ used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
+ }
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used);
+
+static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
+{
+ struct arm_smmu_cd_writer *cd_writer =
+ container_of(writer, struct arm_smmu_cd_writer, writer);
+
+ arm_smmu_sync_cd(writer->master, cd_writer->ssid, true);
+}
+
+static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
+ .sync = arm_smmu_cd_writer_sync_entry,
+ .get_used = arm_smmu_get_cd_used,
+};
+
+void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
+ struct arm_smmu_cd *cdptr,
+ const struct arm_smmu_cd *target)
+{
+ bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
+ bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
+ struct arm_smmu_cd_writer cd_writer = {
+ .writer = {
+ .ops = &arm_smmu_cd_writer_ops,
+ .master = master,
+ },
+ .ssid = ssid,
+ };
+
+ if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) {
+ if (cur_valid)
+ master->cd_table.used_ssids--;
+ else
+ master->cd_table.used_ssids++;
+ }
+
+ arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
+}
+
+void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
+ struct arm_smmu_master *master,
+ struct arm_smmu_domain *smmu_domain)
+{
+ struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
+ const struct io_pgtable_cfg *pgtbl_cfg =
+ &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
+ typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr =
+ &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
+
+ memset(target, 0, sizeof(*target));
+
+ target->data[0] = cpu_to_le64(
+ FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
+#ifdef __BIG_ENDIAN
+ CTXDESC_CD_0_ENDI |
+#endif
+ CTXDESC_CD_0_TCR_EPD1 |
+ CTXDESC_CD_0_V |
+ FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
+ CTXDESC_CD_0_AA64 |
+ (master->stall_enabled ? CTXDESC_CD_0_S : 0) |
+ CTXDESC_CD_0_R |
+ CTXDESC_CD_0_A |
+ CTXDESC_CD_0_ASET |
+ FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
+ );
+
+ /* To enable dirty flag update, set both Access flag and dirty state update */
+ if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
+ target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA |
+ CTXDESC_CD_0_TCR_HD);
+
+ target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr &
+ CTXDESC_CD_1_TTB0_MASK);
+ target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair);
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd);
+
+void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
+{
+ struct arm_smmu_cd target = {};
+ struct arm_smmu_cd *cdptr;
+
+ if (!arm_smmu_cdtab_allocated(&master->cd_table))
+ return;
+ cdptr = arm_smmu_get_cd_ptr(master, ssid);
+ if (WARN_ON(!cdptr))
+ return;
+ arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
+}
+
+static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
+{
+ int ret;
+ size_t l1size;
+ size_t max_contexts;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
+
+ cd_table->s1cdmax = master->ssid_bits;
+ max_contexts = 1 << cd_table->s1cdmax;
+
+ if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
+ max_contexts <= CTXDESC_L2_ENTRIES) {
+ cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
+ cd_table->linear.num_ents = max_contexts;
+
+ l1size = max_contexts * sizeof(struct arm_smmu_cd);
+ cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size,
+ &cd_table->cdtab_dma,
+ GFP_KERNEL);
+ if (!cd_table->linear.table)
+ return -ENOMEM;
+ } else {
+ cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
+ cd_table->l2.num_l1_ents =
+ DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES);
+
+ cd_table->l2.l2ptrs = kcalloc(cd_table->l2.num_l1_ents,
+ sizeof(*cd_table->l2.l2ptrs),
+ GFP_KERNEL);
+ if (!cd_table->l2.l2ptrs)
+ return -ENOMEM;
+
+ l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1);
+ cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size,
+ &cd_table->cdtab_dma,
+ GFP_KERNEL);
+ if (!cd_table->l2.l1tab) {
+ ret = -ENOMEM;
+ goto err_free_l2ptrs;
+ }
+ }
+ return 0;
+
+err_free_l2ptrs:
+ kfree(cd_table->l2.l2ptrs);
+ cd_table->l2.l2ptrs = NULL;
+ return ret;
+}
+
+static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
+{
+ int i;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
+
+ if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) {
+ for (i = 0; i < cd_table->l2.num_l1_ents; i++) {
+ if (!cd_table->l2.l2ptrs[i])
+ continue;
+
+ dma_free_coherent(smmu->dev,
+ sizeof(*cd_table->l2.l2ptrs[i]),
+ cd_table->l2.l2ptrs[i],
+ arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i]));
+ }
+ kfree(cd_table->l2.l2ptrs);
+
+ dma_free_coherent(smmu->dev,
+ cd_table->l2.num_l1_ents *
+ sizeof(struct arm_smmu_cdtab_l1),
+ cd_table->l2.l1tab, cd_table->cdtab_dma);
+ } else {
+ dma_free_coherent(smmu->dev,
+ cd_table->linear.num_ents *
+ sizeof(struct arm_smmu_cd),
+ cd_table->linear.table, cd_table->cdtab_dma);
+ }
+}
+
+/* Stream table manipulation functions */
+static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 *dst,
+ dma_addr_t l2ptr_dma)
+{
+ u64 val = 0;
+
+ val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1);
+ val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
+
+ /* The HW has 64 bit atomicity with stores to the L2 STE table */
+ WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
+}
+
+struct arm_smmu_ste_writer {
+ struct arm_smmu_entry_writer writer;
+ u32 sid;
+};
+
+static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
+{
+ struct arm_smmu_ste_writer *ste_writer =
+ container_of(writer, struct arm_smmu_ste_writer, writer);
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = CMDQ_OP_CFGI_STE,
+ .cfgi = {
+ .sid = ste_writer->sid,
+ .leaf = true,
+ },
+ };
+
+ arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
+}
+
+static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
+ .sync = arm_smmu_ste_writer_sync_entry,
+ .get_used = arm_smmu_get_ste_used,
+};
+
+static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
+ struct arm_smmu_ste *ste,
+ const struct arm_smmu_ste *target)
+{
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ste_writer ste_writer = {
+ .writer = {
+ .ops = &arm_smmu_ste_writer_ops,
+ .master = master,
+ },
+ .sid = sid,
+ };
+
+ arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data);
+
+ /* It's likely that we'll want to use the new STE soon */
+ if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
+ struct arm_smmu_cmdq_ent
+ prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
+ .prefetch = {
+ .sid = sid,
+ } };
+
+ arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
+ }
+}
+
+void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
+{
+ memset(target, 0, sizeof(*target));
+ target->data[0] = cpu_to_le64(
+ STRTAB_STE_0_V |
+ FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste);
+
+VISIBLE_IF_KUNIT
+void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
+ struct arm_smmu_ste *target)
+{
+ memset(target, 0, sizeof(*target));
+ target->data[0] = cpu_to_le64(
+ STRTAB_STE_0_V |
+ FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
+
+ if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
+ target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+ STRTAB_STE_1_SHCFG_INCOMING));
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste);
+
+VISIBLE_IF_KUNIT
+void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
+ struct arm_smmu_master *master, bool ats_enabled,
+ unsigned int s1dss)
+{
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
+ struct arm_smmu_device *smmu = master->smmu;
+
+ memset(target, 0, sizeof(*target));
+ target->data[0] = cpu_to_le64(
+ STRTAB_STE_0_V |
+ FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
+ FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
+ (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+ FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
+
+ target->data[1] = cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) |
+ FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+ FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+ FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
+ ((smmu->features & ARM_SMMU_FEAT_STALLS &&
+ !master->stall_enabled) ?
+ STRTAB_STE_1_S1STALLD :
+ 0) |
+ FIELD_PREP(STRTAB_STE_1_EATS,
+ ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+
+ if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) &&
+ s1dss == STRTAB_STE_1_S1DSS_BYPASS)
+ target->data[1] |= cpu_to_le64(FIELD_PREP(
+ STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
+
+ if (smmu->features & ARM_SMMU_FEAT_E2H) {
+ /*
+ * To support BTM the streamworld needs to match the
+ * configuration of the CPU so that the ASID broadcasts are
+ * properly matched. This means either S/NS-EL2-E2H (hypervisor)
+ * or NS-EL1 (guest). Since an SVA domain can be installed in a
+ * PASID this should always use a BTM compatible configuration
+ * if the HW supports it.
+ */
+ target->data[1] |= cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
+ } else {
+ target->data[1] |= cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
+
+ /*
+ * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
+ * arm_smmu_domain_alloc_id()
+ */
+ target->data[2] =
+ cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
+ }
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
+
+void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
+ struct arm_smmu_master *master,
+ struct arm_smmu_domain *smmu_domain,
+ bool ats_enabled)
+{
+ struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
+ const struct io_pgtable_cfg *pgtbl_cfg =
+ &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
+ typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
+ &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
+ u64 vtcr_val;
+ struct arm_smmu_device *smmu = master->smmu;
+
+ memset(target, 0, sizeof(*target));
+ target->data[0] = cpu_to_le64(
+ STRTAB_STE_0_V |
+ FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
+
+ target->data[1] = cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_1_EATS,
+ ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+
+ if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB)
+ target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB);
+ if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
+ target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+ STRTAB_STE_1_SHCFG_INCOMING));
+
+ vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
+ target->data[2] = cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
+ FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
+ STRTAB_STE_2_S2AA64 |
+#ifdef __BIG_ENDIAN
+ STRTAB_STE_2_S2ENDI |
+#endif
+ STRTAB_STE_2_S2PTW |
+ (master->stall_enabled ? STRTAB_STE_2_S2S : 0) |
+ STRTAB_STE_2_S2R);
+
+ target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
+ STRTAB_STE_3_S2TTB_MASK);
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste);
+
+/*
+ * This can safely directly manipulate the STE memory without a sync sequence
+ * because the STE table has not been installed in the SMMU yet.
+ */
+static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
+ unsigned int nent)
+{
+ unsigned int i;
+
+ for (i = 0; i < nent; ++i) {
+ arm_smmu_make_abort_ste(strtab);
+ strtab++;
+ }
+}
+
+static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
+{
+ dma_addr_t l2ptr_dma;
+ struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
+ struct arm_smmu_strtab_l2 **l2table;
+
+ l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)];
+ if (*l2table)
+ return 0;
+
+ *l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table),
+ &l2ptr_dma, GFP_KERNEL);
+ if (!*l2table) {
+ dev_err(smmu->dev,
+ "failed to allocate l2 stream table for SID %u\n",
+ sid);
+ return -ENOMEM;
+ }
+
+ arm_smmu_init_initial_stes((*l2table)->stes,
+ ARRAY_SIZE((*l2table)->stes));
+ arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)],
+ l2ptr_dma);
+ return 0;
+}
+
+static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs)
+{
+ struct arm_smmu_stream *stream_rhs =
+ rb_entry(rhs, struct arm_smmu_stream, node);
+ const u32 *sid_lhs = lhs;
+
+ if (*sid_lhs < stream_rhs->id)
+ return -1;
+ if (*sid_lhs > stream_rhs->id)
+ return 1;
+ return 0;
+}
+
+static int arm_smmu_streams_cmp_node(struct rb_node *lhs,
+ const struct rb_node *rhs)
+{
+ return arm_smmu_streams_cmp_key(
+ &rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs);
+}
+
+static struct arm_smmu_master *
+arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
+{
+ struct rb_node *node;
+
+ lockdep_assert_held(&smmu->streams_mutex);
+
+ node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key);
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct arm_smmu_stream, node)->master;
+}
+
+/* IRQ and event handlers */
+static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw,
+ struct arm_smmu_event *event)
+{
+ struct arm_smmu_master *master;
+
+ event->id = FIELD_GET(EVTQ_0_ID, raw[0]);
+ event->sid = FIELD_GET(EVTQ_0_SID, raw[0]);
+ event->ssv = FIELD_GET(EVTQ_0_SSV, raw[0]);
+ event->ssid = event->ssv ? FIELD_GET(EVTQ_0_SSID, raw[0]) : IOMMU_NO_PASID;
+ event->privileged = FIELD_GET(EVTQ_1_PnU, raw[1]);
+ event->instruction = FIELD_GET(EVTQ_1_InD, raw[1]);
+ event->s2 = FIELD_GET(EVTQ_1_S2, raw[1]);
+ event->read = FIELD_GET(EVTQ_1_RnW, raw[1]);
+ event->stag = FIELD_GET(EVTQ_1_STAG, raw[1]);
+ event->stall = FIELD_GET(EVTQ_1_STALL, raw[1]);
+ event->class = FIELD_GET(EVTQ_1_CLASS, raw[1]);
+ event->iova = FIELD_GET(EVTQ_2_ADDR, raw[2]);
+ event->ipa = raw[3] & EVTQ_3_IPA;
+ event->fetch_addr = raw[3] & EVTQ_3_FETCH_ADDR;
+ event->ttrnw = FIELD_GET(EVTQ_1_TT_READ, raw[1]);
+ event->class_tt = false;
+ event->dev = NULL;
+
+ if (event->id == EVT_ID_PERMISSION_FAULT)
+ event->class_tt = (event->class == EVTQ_1_CLASS_TT);
+
+ mutex_lock(&smmu->streams_mutex);
+ master = arm_smmu_find_master(smmu, event->sid);
+ if (master)
+ event->dev = get_device(master->dev);
+ mutex_unlock(&smmu->streams_mutex);
+}
+
+static int arm_smmu_handle_event(struct arm_smmu_device *smmu, u64 *evt,
+ struct arm_smmu_event *event)
+{
+ int ret = 0;
+ u32 perm = 0;
+ struct arm_smmu_master *master;
+ struct iopf_fault fault_evt = { };
+ struct iommu_fault *flt = &fault_evt.fault;
+
+ switch (event->id) {
+ case EVT_ID_BAD_STE_CONFIG:
+ case EVT_ID_STREAM_DISABLED_FAULT:
+ case EVT_ID_BAD_SUBSTREAMID_CONFIG:
+ case EVT_ID_BAD_CD_CONFIG:
+ case EVT_ID_TRANSLATION_FAULT:
+ case EVT_ID_ADDR_SIZE_FAULT:
+ case EVT_ID_ACCESS_FAULT:
+ case EVT_ID_PERMISSION_FAULT:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (event->stall) {
+ if (event->read)
+ perm |= IOMMU_FAULT_PERM_READ;
+ else
+ perm |= IOMMU_FAULT_PERM_WRITE;
+
+ if (event->instruction)
+ perm |= IOMMU_FAULT_PERM_EXEC;
+
+ if (event->privileged)
+ perm |= IOMMU_FAULT_PERM_PRIV;
+
+ flt->type = IOMMU_FAULT_PAGE_REQ;
+ flt->prm = (struct iommu_fault_page_request){
+ .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
+ .grpid = event->stag,
+ .perm = perm,
+ .addr = event->iova,
+ };
+
+ if (event->ssv) {
+ flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ flt->prm.pasid = event->ssid;
+ }
+ }
+
+ mutex_lock(&smmu->streams_mutex);
+ master = arm_smmu_find_master(smmu, event->sid);
+ if (!master) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (event->stall)
+ ret = iommu_report_device_fault(master->dev, &fault_evt);
+ else if (master->vmaster && !event->s2)
+ ret = arm_vmaster_report_event(master->vmaster, evt);
+ else
+ ret = -EOPNOTSUPP; /* Unhandled events should be pinned */
+out_unlock:
+ mutex_unlock(&smmu->streams_mutex);
+ return ret;
+}
+
+static void arm_smmu_dump_raw_event(struct arm_smmu_device *smmu, u64 *raw,
+ struct arm_smmu_event *event)
+{
+ int i;
+
+ dev_err(smmu->dev, "event 0x%02x received:\n", event->id);
+
+ for (i = 0; i < EVTQ_ENT_DWORDS; ++i)
+ dev_err(smmu->dev, "\t0x%016llx\n", raw[i]);
+}
+
+#define ARM_SMMU_EVT_KNOWN(e) ((e)->id < ARRAY_SIZE(event_str) && event_str[(e)->id])
+#define ARM_SMMU_LOG_EVT_STR(e) ARM_SMMU_EVT_KNOWN(e) ? event_str[(e)->id] : "UNKNOWN"
+#define ARM_SMMU_LOG_CLIENT(e) (e)->dev ? dev_name((e)->dev) : "(unassigned sid)"
+
+static void arm_smmu_dump_event(struct arm_smmu_device *smmu, u64 *raw,
+ struct arm_smmu_event *evt,
+ struct ratelimit_state *rs)
+{
+ if (!__ratelimit(rs))
+ return;
+
+ arm_smmu_dump_raw_event(smmu, raw, evt);
+
+ switch (evt->id) {
+ case EVT_ID_TRANSLATION_FAULT:
+ case EVT_ID_ADDR_SIZE_FAULT:
+ case EVT_ID_ACCESS_FAULT:
+ case EVT_ID_PERMISSION_FAULT:
+ dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x iova: %#llx ipa: %#llx",
+ ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
+ evt->sid, evt->ssid, evt->iova, evt->ipa);
+
+ dev_err(smmu->dev, "%s %s %s %s \"%s\"%s%s stag: %#x",
+ evt->privileged ? "priv" : "unpriv",
+ evt->instruction ? "inst" : "data",
+ str_read_write(evt->read),
+ evt->s2 ? "s2" : "s1", event_class_str[evt->class],
+ evt->class_tt ? (evt->ttrnw ? " ttd_read" : " ttd_write") : "",
+ evt->stall ? " stall" : "", evt->stag);
+
+ break;
+
+ case EVT_ID_STE_FETCH_FAULT:
+ case EVT_ID_CD_FETCH_FAULT:
+ case EVT_ID_VMS_FETCH_FAULT:
+ dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x fetch_addr: %#llx",
+ ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
+ evt->sid, evt->ssid, evt->fetch_addr);
+
+ break;
+
+ default:
+ dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x",
+ ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
+ evt->sid, evt->ssid);
+ }
+}
+
+static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
+{
+ u64 evt[EVTQ_ENT_DWORDS];
+ struct arm_smmu_event event = {0};
+ struct arm_smmu_device *smmu = dev;
+ struct arm_smmu_queue *q = &smmu->evtq.q;
+ struct arm_smmu_ll_queue *llq = &q->llq;
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+ do {
+ while (!queue_remove_raw(q, evt)) {
+ arm_smmu_decode_event(smmu, evt, &event);
+ if (arm_smmu_handle_event(smmu, evt, &event))
+ arm_smmu_dump_event(smmu, evt, &event, &rs);
+
+ put_device(event.dev);
+ cond_resched();
+ }
+
+ /*
+ * Not much we can do on overflow, so scream and pretend we're
+ * trying harder.
+ */
+ if (queue_sync_prod_in(q) == -EOVERFLOW)
+ dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
+ } while (!queue_empty(llq));
+
+ /* Sync our overflow flag, as we believe we're up to speed */
+ queue_sync_cons_ovf(q);
+ return IRQ_HANDLED;
+}
+
+static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
+{
+ u32 sid, ssid;
+ u16 grpid;
+ bool ssv, last;
+
+ sid = FIELD_GET(PRIQ_0_SID, evt[0]);
+ ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
+ ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
+ last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
+ grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
+
+ dev_info(smmu->dev, "unexpected PRI request received:\n");
+ dev_info(smmu->dev,
+ "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
+ sid, ssid, grpid, last ? "L" : "",
+ evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
+ evt[0] & PRIQ_0_PERM_READ ? "R" : "",
+ evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
+ evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
+ evt[1] & PRIQ_1_ADDR_MASK);
+
+ if (last) {
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = CMDQ_OP_PRI_RESP,
+ .substream_valid = ssv,
+ .pri = {
+ .sid = sid,
+ .ssid = ssid,
+ .grpid = grpid,
+ .resp = PRI_RESP_DENY,
+ },
+ };
+
+ arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+ }
+}
+
+static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
+{
+ struct arm_smmu_device *smmu = dev;
+ struct arm_smmu_queue *q = &smmu->priq.q;
+ struct arm_smmu_ll_queue *llq = &q->llq;
+ u64 evt[PRIQ_ENT_DWORDS];
+
+ do {
+ while (!queue_remove_raw(q, evt))
+ arm_smmu_handle_ppr(smmu, evt);
+
+ if (queue_sync_prod_in(q) == -EOVERFLOW)
+ dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
+ } while (!queue_empty(llq));
+
+ /* Sync our overflow flag, as we believe we're up to speed */
+ queue_sync_cons_ovf(q);
+ return IRQ_HANDLED;
+}
+
+static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
+
+static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
+{
+ u32 gerror, gerrorn, active;
+ struct arm_smmu_device *smmu = dev;
+
+ gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
+ gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
+
+ active = gerror ^ gerrorn;
+ if (!(active & GERROR_ERR_MASK))
+ return IRQ_NONE; /* No errors pending */
+
+ dev_warn(smmu->dev,
+ "unexpected global error reported (0x%08x), this could be serious\n",
+ active);
+
+ if (active & GERROR_SFM_ERR) {
+ dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
+ arm_smmu_device_disable(smmu);
+ }
+
+ if (active & GERROR_MSI_GERROR_ABT_ERR)
+ dev_warn(smmu->dev, "GERROR MSI write aborted\n");
+
+ if (active & GERROR_MSI_PRIQ_ABT_ERR)
+ dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
+
+ if (active & GERROR_MSI_EVTQ_ABT_ERR)
+ dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
+
+ if (active & GERROR_MSI_CMDQ_ABT_ERR)
+ dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
+
+ if (active & GERROR_PRIQ_ABT_ERR)
+ dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
+
+ if (active & GERROR_EVTQ_ABT_ERR)
+ dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
+
+ if (active & GERROR_CMDQ_ERR)
+ arm_smmu_cmdq_skip_err(smmu);
+
+ writel(gerror, smmu->base + ARM_SMMU_GERRORN);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
+{
+ struct arm_smmu_device *smmu = dev;
+
+ arm_smmu_evtq_thread(irq, dev);
+ if (smmu->features & ARM_SMMU_FEAT_PRI)
+ arm_smmu_priq_thread(irq, dev);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
+{
+ arm_smmu_gerror_handler(irq, dev);
+ return IRQ_WAKE_THREAD;
+}
+
+static void
+arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
+ struct arm_smmu_cmdq_ent *cmd)
+{
+ size_t log2_span;
+ size_t span_mask;
+ /* ATC invalidates are always on 4096-bytes pages */
+ size_t inval_grain_shift = 12;
+ unsigned long page_start, page_end;
+
+ /*
+ * ATS and PASID:
+ *
+ * If substream_valid is clear, the PCIe TLP is sent without a PASID
+ * prefix. In that case all ATC entries within the address range are
+ * invalidated, including those that were requested with a PASID! There
+ * is no way to invalidate only entries without PASID.
+ *
+ * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
+ * traffic), translation requests without PASID create ATC entries
+ * without PASID, which must be invalidated with substream_valid clear.
+ * This has the unpleasant side-effect of invalidating all PASID-tagged
+ * ATC entries within the address range.
+ */
+ *cmd = (struct arm_smmu_cmdq_ent) {
+ .opcode = CMDQ_OP_ATC_INV,
+ .substream_valid = (ssid != IOMMU_NO_PASID),
+ .atc.ssid = ssid,
+ };
+
+ if (!size) {
+ cmd->atc.size = ATC_INV_SIZE_ALL;
+ return;
+ }
+
+ page_start = iova >> inval_grain_shift;
+ page_end = (iova + size - 1) >> inval_grain_shift;
+
+ /*
+ * In an ATS Invalidate Request, the address must be aligned on the
+ * range size, which must be a power of two number of page sizes. We
+ * thus have to choose between grossly over-invalidating the region, or
+ * splitting the invalidation into multiple commands. For simplicity
+ * we'll go with the first solution, but should refine it in the future
+ * if multiple commands are shown to be more efficient.
+ *
+ * Find the smallest power of two that covers the range. The most
+ * significant differing bit between the start and end addresses,
+ * fls(start ^ end), indicates the required span. For example:
+ *
+ * We want to invalidate pages [8; 11]. This is already the ideal range:
+ * x = 0b1000 ^ 0b1011 = 0b11
+ * span = 1 << fls(x) = 4
+ *
+ * To invalidate pages [7; 10], we need to invalidate [0; 15]:
+ * x = 0b0111 ^ 0b1010 = 0b1101
+ * span = 1 << fls(x) = 16
+ */
+ log2_span = fls_long(page_start ^ page_end);
+ span_mask = (1ULL << log2_span) - 1;
+
+ page_start &= ~span_mask;
+
+ cmd->atc.addr = page_start << inval_grain_shift;
+ cmd->atc.size = log2_span;
+}
+
+static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
+ ioasid_t ssid)
+{
+ int i;
+ struct arm_smmu_cmdq_ent cmd;
+ struct arm_smmu_cmdq_batch cmds;
+
+ arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
+
+ arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
+ for (i = 0; i < master->num_streams; i++) {
+ cmd.atc.sid = master->streams[i].id;
+ arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
+ }
+
+ return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
+}
+
+int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
+ unsigned long iova, size_t size)
+{
+ struct arm_smmu_master_domain *master_domain;
+ int i;
+ unsigned long flags;
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = CMDQ_OP_ATC_INV,
+ };
+ struct arm_smmu_cmdq_batch cmds;
+
+ if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
+ return 0;
+
+ /*
+ * Ensure that we've completed prior invalidation of the main TLBs
+ * before we read 'nr_ats_masters' in case of a concurrent call to
+ * arm_smmu_enable_ats():
+ *
+ * // unmap() // arm_smmu_enable_ats()
+ * TLBI+SYNC atomic_inc(&nr_ats_masters);
+ * smp_mb(); [...]
+ * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
+ *
+ * Ensures that we always see the incremented 'nr_ats_masters' count if
+ * ATS was enabled at the PCI device before completion of the TLBI.
+ */
+ smp_mb();
+ if (!atomic_read(&smmu_domain->nr_ats_masters))
+ return 0;
+
+ arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
+
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master_domain, &smmu_domain->devices,
+ devices_elm) {
+ struct arm_smmu_master *master = master_domain->master;
+
+ if (!master->ats_enabled)
+ continue;
+
+ if (master_domain->nested_ats_flush) {
+ /*
+ * If a S2 used as a nesting parent is changed we have
+ * no option but to completely flush the ATC.
+ */
+ arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
+ } else {
+ arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size,
+ &cmd);
+ }
+
+ for (i = 0; i < master->num_streams; i++) {
+ cmd.atc.sid = master->streams[i].id;
+ arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
+ }
+ }
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+ return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
+}
+
+/* IO_PGTABLE API */
+static void arm_smmu_tlb_inv_context(void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_cmdq_ent cmd;
+
+ /*
+ * NOTE: when io-pgtable is in non-strict mode, we may get here with
+ * PTEs previously cleared by unmaps on the current CPU not yet visible
+ * to the SMMU. We are relying on the dma_wmb() implicit during cmd
+ * insertion to guarantee those are observed before the TLBI. Do be
+ * careful, 007.
+ */
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
+ } else {
+ cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
+ cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ }
+ arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
+}
+
+static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
+ unsigned long iova, size_t size,
+ size_t granule,
+ struct arm_smmu_domain *smmu_domain)
+{
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ unsigned long end = iova + size, num_pages = 0, tg = 0;
+ size_t inv_range = granule;
+ struct arm_smmu_cmdq_batch cmds;
+
+ if (!size)
+ return;
+
+ if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
+ /* Get the leaf page size */
+ tg = __ffs(smmu_domain->domain.pgsize_bitmap);
+
+ num_pages = size >> tg;
+
+ /* Convert page size of 12,14,16 (log2) to 1,2,3 */
+ cmd->tlbi.tg = (tg - 10) / 2;
+
+ /*
+ * Determine what level the granule is at. For non-leaf, both
+ * io-pgtable and SVA pass a nominal last-level granule because
+ * they don't know what level(s) actually apply, so ignore that
+ * and leave TTL=0. However for various errata reasons we still
+ * want to use a range command, so avoid the SVA corner case
+ * where both scale and num could be 0 as well.
+ */
+ if (cmd->tlbi.leaf)
+ cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
+ else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
+ num_pages++;
+ }
+
+ arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
+
+ while (iova < end) {
+ if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
+ /*
+ * On each iteration of the loop, the range is 5 bits
+ * worth of the aligned size remaining.
+ * The range in pages is:
+ *
+ * range = (num_pages & (0x1f << __ffs(num_pages)))
+ */
+ unsigned long scale, num;
+
+ /* Determine the power of 2 multiple number of pages */
+ scale = __ffs(num_pages);
+ cmd->tlbi.scale = scale;
+
+ /* Determine how many chunks of 2^scale size we have */
+ num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
+ cmd->tlbi.num = num - 1;
+
+ /* range is num * 2^scale * pgsize */
+ inv_range = num << (scale + tg);
+
+ /* Clear out the lower order bits for the next iteration */
+ num_pages -= num << scale;
+ }
+
+ cmd->tlbi.addr = iova;
+ arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
+ iova += inv_range;
+ }
+ arm_smmu_cmdq_batch_submit(smmu, &cmds);
+}
+
+static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
+ size_t granule, bool leaf,
+ struct arm_smmu_domain *smmu_domain)
+{
+ struct arm_smmu_cmdq_ent cmd = {
+ .tlbi = {
+ .leaf = leaf,
+ },
+ };
+
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
+ CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
+ cmd.tlbi.asid = smmu_domain->cd.asid;
+ } else {
+ cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
+ cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
+ }
+ __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
+
+ if (smmu_domain->nest_parent) {
+ /*
+ * When the S2 domain changes all the nested S1 ASIDs have to be
+ * flushed too.
+ */
+ cmd.opcode = CMDQ_OP_TLBI_NH_ALL;
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);
+ }
+
+ /*
+ * Unfortunately, this can't be leaf-only since we may have
+ * zapped an entire table.
+ */
+ arm_smmu_atc_inv_domain(smmu_domain, iova, size);
+}
+
+void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
+ size_t granule, bool leaf,
+ struct arm_smmu_domain *smmu_domain)
+{
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
+ CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
+ .tlbi = {
+ .asid = asid,
+ .leaf = leaf,
+ },
+ };
+
+ __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
+}
+
+static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+ struct iommu_domain *domain = &smmu_domain->domain;
+
+ iommu_iotlb_gather_add_page(domain, gather, iova, granule);
+}
+
+static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
+}
+
+static const struct iommu_flush_ops arm_smmu_flush_ops = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context,
+ .tlb_flush_walk = arm_smmu_tlb_inv_walk,
+ .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
+};
+
+static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
+{
+ u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY);
+
+ return (smmu->features & features) == features;
+}
+
+/* IOMMU API */
+static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
+{
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
+ /* Assume that a coherent TCU implies coherent TBUs */
+ return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
+ case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
+ return arm_smmu_master_canwbs(master);
+ case IOMMU_CAP_NOEXEC:
+ case IOMMU_CAP_DEFERRED_FLUSH:
+ return true;
+ case IOMMU_CAP_DIRTY_TRACKING:
+ return arm_smmu_dbm_capable(master->smmu);
+ default:
+ return false;
+ }
+}
+
+static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_master_domain *master_domain;
+ unsigned long flags;
+ bool ret = true;
+
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master_domain, &smmu_domain->devices,
+ devices_elm) {
+ if (!arm_smmu_master_canwbs(master_domain->master)) {
+ ret = false;
+ break;
+ }
+ }
+ smmu_domain->enforce_cache_coherency = ret;
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+ return ret;
+}
+
+struct arm_smmu_domain *arm_smmu_domain_alloc(void)
+{
+ struct arm_smmu_domain *smmu_domain;
+
+ smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
+ if (!smmu_domain)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&smmu_domain->devices);
+ spin_lock_init(&smmu_domain->devices_lock);
+
+ return smmu_domain;
+}
+
+static void arm_smmu_domain_free_paging(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops);
+
+ /* Free the ASID or VMID */
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ /* Prevent SVA from touching the CD while we're freeing it */
+ mutex_lock(&arm_smmu_asid_lock);
+ xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid);
+ mutex_unlock(&arm_smmu_asid_lock);
+ } else {
+ struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
+ if (cfg->vmid)
+ ida_free(&smmu->vmid_map, cfg->vmid);
+ }
+
+ kfree(smmu_domain);
+}
+
+static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
+ struct arm_smmu_domain *smmu_domain)
+{
+ int ret;
+ u32 asid = 0;
+ struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
+
+ /* Prevent SVA from modifying the ASID until it is written to the CD */
+ mutex_lock(&arm_smmu_asid_lock);
+ ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
+ XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
+ cd->asid = (u16)asid;
+ mutex_unlock(&arm_smmu_asid_lock);
+ return ret;
+}
+
+static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
+ struct arm_smmu_domain *smmu_domain)
+{
+ int vmid;
+ struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
+
+ /* Reserve VMID 0 for stage-2 bypass STEs */
+ vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
+ GFP_KERNEL);
+ if (vmid < 0)
+ return vmid;
+
+ cfg->vmid = (u16)vmid;
+ return 0;
+}
+
+static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_device *smmu, u32 flags)
+{
+ int ret;
+ enum io_pgtable_fmt fmt;
+ struct io_pgtable_cfg pgtbl_cfg;
+ struct io_pgtable_ops *pgtbl_ops;
+ int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
+ struct arm_smmu_domain *smmu_domain);
+ bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+
+ pgtbl_cfg = (struct io_pgtable_cfg) {
+ .pgsize_bitmap = smmu->pgsize_bitmap,
+ .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
+ .tlb = &arm_smmu_flush_ops,
+ .iommu_dev = smmu->dev,
+ };
+
+ switch (smmu_domain->stage) {
+ case ARM_SMMU_DOMAIN_S1: {
+ unsigned long ias = (smmu->features &
+ ARM_SMMU_FEAT_VAX) ? 52 : 48;
+
+ pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
+ pgtbl_cfg.oas = smmu->ias;
+ if (enable_dirty)
+ pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
+ fmt = ARM_64_LPAE_S1;
+ finalise_stage_fn = arm_smmu_domain_finalise_s1;
+ break;
+ }
+ case ARM_SMMU_DOMAIN_S2:
+ if (enable_dirty)
+ return -EOPNOTSUPP;
+ pgtbl_cfg.ias = smmu->ias;
+ pgtbl_cfg.oas = smmu->oas;
+ fmt = ARM_64_LPAE_S2;
+ finalise_stage_fn = arm_smmu_domain_finalise_s2;
+ if ((smmu->features & ARM_SMMU_FEAT_S2FWB) &&
+ (flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
+ pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
+ if (!pgtbl_ops)
+ return -ENOMEM;
+
+ smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+ smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
+ smmu_domain->domain.geometry.force_aperture = true;
+ if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+ smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
+
+ ret = finalise_stage_fn(smmu, smmu_domain);
+ if (ret < 0) {
+ free_io_pgtable_ops(pgtbl_ops);
+ return ret;
+ }
+
+ smmu_domain->pgtbl_ops = pgtbl_ops;
+ smmu_domain->smmu = smmu;
+ return 0;
+}
+
+static struct arm_smmu_ste *
+arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
+{
+ struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
+
+ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
+ /* Two-level walk */
+ return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]
+ ->stes[arm_smmu_strtab_l2_idx(sid)];
+ } else {
+ /* Simple linear lookup */
+ return &cfg->linear.table[sid];
+ }
+}
+
+void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
+ const struct arm_smmu_ste *target)
+{
+ int i, j;
+ struct arm_smmu_device *smmu = master->smmu;
+
+ master->cd_table.in_ste =
+ FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) ==
+ STRTAB_STE_0_CFG_S1_TRANS;
+ master->ste_ats_enabled =
+ FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) ==
+ STRTAB_STE_1_EATS_TRANS;
+
+ for (i = 0; i < master->num_streams; ++i) {
+ u32 sid = master->streams[i].id;
+ struct arm_smmu_ste *step =
+ arm_smmu_get_step_for_sid(smmu, sid);
+
+ /* Bridged PCI devices may end up with duplicated IDs */
+ for (j = 0; j < i; j++)
+ if (master->streams[j].id == sid)
+ break;
+ if (j < i)
+ continue;
+
+ arm_smmu_write_ste(master, sid, step, target);
+ }
+}
+
+static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
+{
+ struct device *dev = master->dev;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+ if (!(smmu->features & ARM_SMMU_FEAT_ATS))
+ return false;
+
+ if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
+ return false;
+
+ return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
+}
+
+static void arm_smmu_enable_ats(struct arm_smmu_master *master)
+{
+ size_t stu;
+ struct pci_dev *pdev;
+ struct arm_smmu_device *smmu = master->smmu;
+
+ /* Smallest Translation Unit: log2 of the smallest supported granule */
+ stu = __ffs(smmu->pgsize_bitmap);
+ pdev = to_pci_dev(master->dev);
+
+ /*
+ * ATC invalidation of PASID 0 causes the entire ATC to be flushed.
+ */
+ arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
+ if (pci_enable_ats(pdev, stu))
+ dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
+}
+
+static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
+{
+ int ret;
+ int features;
+ int num_pasids;
+ struct pci_dev *pdev;
+
+ if (!dev_is_pci(master->dev))
+ return -ENODEV;
+
+ pdev = to_pci_dev(master->dev);
+
+ features = pci_pasid_features(pdev);
+ if (features < 0)
+ return features;
+
+ num_pasids = pci_max_pasids(pdev);
+ if (num_pasids <= 0)
+ return num_pasids;
+
+ ret = pci_enable_pasid(pdev, features);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to enable PASID\n");
+ return ret;
+ }
+
+ master->ssid_bits = min_t(u8, ilog2(num_pasids),
+ master->smmu->ssid_bits);
+ return 0;
+}
+
+static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
+{
+ struct pci_dev *pdev;
+
+ if (!dev_is_pci(master->dev))
+ return;
+
+ pdev = to_pci_dev(master->dev);
+
+ if (!pdev->pasid_enabled)
+ return;
+
+ master->ssid_bits = 0;
+ pci_disable_pasid(pdev);
+}
+
+static struct arm_smmu_master_domain *
+arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
+ struct iommu_domain *domain,
+ struct arm_smmu_master *master,
+ ioasid_t ssid, bool nested_ats_flush)
+{
+ struct arm_smmu_master_domain *master_domain;
+
+ lockdep_assert_held(&smmu_domain->devices_lock);
+
+ list_for_each_entry(master_domain, &smmu_domain->devices,
+ devices_elm) {
+ if (master_domain->master == master &&
+ master_domain->domain == domain &&
+ master_domain->ssid == ssid &&
+ master_domain->nested_ats_flush == nested_ats_flush)
+ return master_domain;
+ }
+ return NULL;
+}
+
+/*
+ * If the domain uses the smmu_domain->devices list return the arm_smmu_domain
+ * structure, otherwise NULL. These domains track attached devices so they can
+ * issue invalidations.
+ */
+static struct arm_smmu_domain *
+to_smmu_domain_devices(struct iommu_domain *domain)
+{
+ /* The domain can be NULL only when processing the first attach */
+ if (!domain)
+ return NULL;
+ if ((domain->type & __IOMMU_DOMAIN_PAGING) ||
+ domain->type == IOMMU_DOMAIN_SVA)
+ return to_smmu_domain(domain);
+ if (domain->type == IOMMU_DOMAIN_NESTED)
+ return to_smmu_nested_domain(domain)->vsmmu->s2_parent;
+ return NULL;
+}
+
+static int arm_smmu_enable_iopf(struct arm_smmu_master *master,
+ struct arm_smmu_master_domain *master_domain)
+{
+ int ret;
+
+ iommu_group_mutex_assert(master->dev);
+
+ if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA))
+ return -EOPNOTSUPP;
+
+ /*
+ * Drivers for devices supporting PRI or stall require iopf others have
+ * device-specific fault handlers and don't need IOPF, so this is not a
+ * failure.
+ */
+ if (!master->stall_enabled)
+ return 0;
+
+ /* We're not keeping track of SIDs in fault events */
+ if (master->num_streams != 1)
+ return -EOPNOTSUPP;
+
+ if (master->iopf_refcount) {
+ master->iopf_refcount++;
+ master_domain->using_iopf = true;
+ return 0;
+ }
+
+ ret = iopf_queue_add_device(master->smmu->evtq.iopf, master->dev);
+ if (ret)
+ return ret;
+ master->iopf_refcount = 1;
+ master_domain->using_iopf = true;
+ return 0;
+}
+
+static void arm_smmu_disable_iopf(struct arm_smmu_master *master,
+ struct arm_smmu_master_domain *master_domain)
+{
+ iommu_group_mutex_assert(master->dev);
+
+ if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA))
+ return;
+
+ if (!master_domain || !master_domain->using_iopf)
+ return;
+
+ master->iopf_refcount--;
+ if (master->iopf_refcount == 0)
+ iopf_queue_remove_device(master->smmu->evtq.iopf, master->dev);
+}
+
+static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
+ struct iommu_domain *domain,
+ ioasid_t ssid)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain);
+ struct arm_smmu_master_domain *master_domain;
+ bool nested_ats_flush = false;
+ unsigned long flags;
+
+ if (!smmu_domain)
+ return;
+
+ if (domain->type == IOMMU_DOMAIN_NESTED)
+ nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats;
+
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ master_domain = arm_smmu_find_master_domain(smmu_domain, domain, master,
+ ssid, nested_ats_flush);
+ if (master_domain) {
+ list_del(&master_domain->devices_elm);
+ if (master->ats_enabled)
+ atomic_dec(&smmu_domain->nr_ats_masters);
+ }
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+ arm_smmu_disable_iopf(master, master_domain);
+ kfree(master_domain);
+}
+
+/*
+ * Start the sequence to attach a domain to a master. The sequence contains three
+ * steps:
+ * arm_smmu_attach_prepare()
+ * arm_smmu_install_ste_for_dev()
+ * arm_smmu_attach_commit()
+ *
+ * If prepare succeeds then the sequence must be completed. The STE installed
+ * must set the STE.EATS field according to state.ats_enabled.
+ *
+ * If the device supports ATS then this determines if EATS should be enabled
+ * in the STE, and starts sequencing EATS disable if required.
+ *
+ * The change of the EATS in the STE and the PCI ATS config space is managed by
+ * this sequence to be in the right order so that if PCI ATS is enabled then
+ * STE.ETAS is enabled.
+ *
+ * new_domain can be a non-paging domain. In this case ATS will not be enabled,
+ * and invalidations won't be tracked.
+ */
+int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
+ struct iommu_domain *new_domain)
+{
+ struct arm_smmu_master *master = state->master;
+ struct arm_smmu_master_domain *master_domain;
+ struct arm_smmu_domain *smmu_domain =
+ to_smmu_domain_devices(new_domain);
+ unsigned long flags;
+ int ret;
+
+ /*
+ * arm_smmu_share_asid() must not see two domains pointing to the same
+ * arm_smmu_master_domain contents otherwise it could randomly write one
+ * or the other to the CD.
+ */
+ lockdep_assert_held(&arm_smmu_asid_lock);
+
+ if (smmu_domain || state->cd_needs_ats) {
+ /*
+ * The SMMU does not support enabling ATS with bypass/abort.
+ * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS
+ * Translation Requests and Translated transactions are denied
+ * as though ATS is disabled for the stream (STE.EATS == 0b00),
+ * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
+ * (IHI0070Ea 5.2 Stream Table Entry).
+ *
+ * However, if we have installed a CD table and are using S1DSS
+ * then ATS will work in S1DSS bypass. See "13.6.4 Full ATS
+ * skipping stage 1".
+ *
+ * Disable ATS if we are going to create a normal 0b100 bypass
+ * STE.
+ */
+ state->ats_enabled = !state->disable_ats &&
+ arm_smmu_ats_supported(master);
+ }
+
+ if (smmu_domain) {
+ if (new_domain->type == IOMMU_DOMAIN_NESTED) {
+ ret = arm_smmu_attach_prepare_vmaster(
+ state, to_smmu_nested_domain(new_domain));
+ if (ret)
+ return ret;
+ }
+
+ master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
+ if (!master_domain) {
+ ret = -ENOMEM;
+ goto err_free_vmaster;
+ }
+ master_domain->domain = new_domain;
+ master_domain->master = master;
+ master_domain->ssid = state->ssid;
+ if (new_domain->type == IOMMU_DOMAIN_NESTED)
+ master_domain->nested_ats_flush =
+ to_smmu_nested_domain(new_domain)->enable_ats;
+
+ if (new_domain->iopf_handler) {
+ ret = arm_smmu_enable_iopf(master, master_domain);
+ if (ret)
+ goto err_free_master_domain;
+ }
+
+ /*
+ * During prepare we want the current smmu_domain and new
+ * smmu_domain to be in the devices list before we change any
+ * HW. This ensures that both domains will send ATS
+ * invalidations to the master until we are done.
+ *
+ * It is tempting to make this list only track masters that are
+ * using ATS, but arm_smmu_share_asid() also uses this to change
+ * the ASID of a domain, unrelated to ATS.
+ *
+ * Notice if we are re-attaching the same domain then the list
+ * will have two identical entries and commit will remove only
+ * one of them.
+ */
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ if (smmu_domain->enforce_cache_coherency &&
+ !arm_smmu_master_canwbs(master)) {
+ spin_unlock_irqrestore(&smmu_domain->devices_lock,
+ flags);
+ ret = -EINVAL;
+ goto err_iopf;
+ }
+
+ if (state->ats_enabled)
+ atomic_inc(&smmu_domain->nr_ats_masters);
+ list_add(&master_domain->devices_elm, &smmu_domain->devices);
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+ }
+
+ if (!state->ats_enabled && master->ats_enabled) {
+ pci_disable_ats(to_pci_dev(master->dev));
+ /*
+ * This is probably overkill, but the config write for disabling
+ * ATS should complete before the STE is configured to generate
+ * UR to avoid AER noise.
+ */
+ wmb();
+ }
+ return 0;
+
+err_iopf:
+ arm_smmu_disable_iopf(master, master_domain);
+err_free_master_domain:
+ kfree(master_domain);
+err_free_vmaster:
+ kfree(state->vmaster);
+ return ret;
+}
+
+/*
+ * Commit is done after the STE/CD are configured with the EATS setting. It
+ * completes synchronizing the PCI device's ATC and finishes manipulating the
+ * smmu_domain->devices list.
+ */
+void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
+{
+ struct arm_smmu_master *master = state->master;
+
+ lockdep_assert_held(&arm_smmu_asid_lock);
+
+ arm_smmu_attach_commit_vmaster(state);
+
+ if (state->ats_enabled && !master->ats_enabled) {
+ arm_smmu_enable_ats(master);
+ } else if (state->ats_enabled && master->ats_enabled) {
+ /*
+ * The translation has changed, flush the ATC. At this point the
+ * SMMU is translating for the new domain and both the old&new
+ * domain will issue invalidations.
+ */
+ arm_smmu_atc_inv_master(master, state->ssid);
+ } else if (!state->ats_enabled && master->ats_enabled) {
+ /* ATS is being switched off, invalidate the entire ATC */
+ arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
+ }
+
+ arm_smmu_remove_master_domain(master, state->old_domain, state->ssid);
+ master->ats_enabled = state->ats_enabled;
+}
+
+static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev,
+ struct iommu_domain *old_domain)
+{
+ int ret = 0;
+ struct arm_smmu_ste target;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct arm_smmu_device *smmu;
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_attach_state state = {
+ .old_domain = old_domain,
+ .ssid = IOMMU_NO_PASID,
+ };
+ struct arm_smmu_master *master;
+ struct arm_smmu_cd *cdptr;
+
+ if (!fwspec)
+ return -ENOENT;
+
+ state.master = master = dev_iommu_priv_get(dev);
+ smmu = master->smmu;
+
+ if (smmu_domain->smmu != smmu)
+ return -EINVAL;
+
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
+ if (!cdptr)
+ return -ENOMEM;
+ } else if (arm_smmu_ssids_in_use(&master->cd_table))
+ return -EBUSY;
+
+ /*
+ * Prevent arm_smmu_share_asid() from trying to change the ASID
+ * of either the old or new domain while we are working on it.
+ * This allows the STE and the smmu_domain->devices list to
+ * be inconsistent during this routine.
+ */
+ mutex_lock(&arm_smmu_asid_lock);
+
+ ret = arm_smmu_attach_prepare(&state, domain);
+ if (ret) {
+ mutex_unlock(&arm_smmu_asid_lock);
+ return ret;
+ }
+
+ switch (smmu_domain->stage) {
+ case ARM_SMMU_DOMAIN_S1: {
+ struct arm_smmu_cd target_cd;
+
+ arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
+ arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
+ &target_cd);
+ arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled,
+ STRTAB_STE_1_S1DSS_SSID0);
+ arm_smmu_install_ste_for_dev(master, &target);
+ break;
+ }
+ case ARM_SMMU_DOMAIN_S2:
+ arm_smmu_make_s2_domain_ste(&target, master, smmu_domain,
+ state.ats_enabled);
+ arm_smmu_install_ste_for_dev(master, &target);
+ arm_smmu_clear_cd(master, IOMMU_NO_PASID);
+ break;
+ }
+
+ arm_smmu_attach_commit(&state);
+ mutex_unlock(&arm_smmu_asid_lock);
+ return 0;
+}
+
+static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t id,
+ struct iommu_domain *old)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_cd target_cd;
+
+ if (smmu_domain->smmu != smmu)
+ return -EINVAL;
+
+ if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
+ return -EINVAL;
+
+ /*
+ * We can read cd.asid outside the lock because arm_smmu_set_pasid()
+ * will fix it
+ */
+ arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
+ return arm_smmu_set_pasid(master, to_smmu_domain(domain), id,
+ &target_cd, old);
+}
+
+static void arm_smmu_update_ste(struct arm_smmu_master *master,
+ struct iommu_domain *sid_domain,
+ bool ats_enabled)
+{
+ unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE;
+ struct arm_smmu_ste ste;
+
+ if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled)
+ return;
+
+ if (sid_domain->type == IOMMU_DOMAIN_IDENTITY)
+ s1dss = STRTAB_STE_1_S1DSS_BYPASS;
+ else
+ WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED);
+
+ /*
+ * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior
+ * using s1dss if necessary. If the cd_table is already installed then
+ * the S1DSS is correct and this will just update the EATS. Otherwise it
+ * installs the entire thing. This will be hitless.
+ */
+ arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss);
+ arm_smmu_install_ste_for_dev(master, &ste);
+}
+
+int arm_smmu_set_pasid(struct arm_smmu_master *master,
+ struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
+ struct arm_smmu_cd *cd, struct iommu_domain *old)
+{
+ struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);
+ struct arm_smmu_attach_state state = {
+ .master = master,
+ .ssid = pasid,
+ .old_domain = old,
+ };
+ struct arm_smmu_cd *cdptr;
+ int ret;
+
+ /* The core code validates pasid */
+
+ if (smmu_domain->smmu != master->smmu)
+ return -EINVAL;
+
+ if (!master->cd_table.in_ste &&
+ sid_domain->type != IOMMU_DOMAIN_IDENTITY &&
+ sid_domain->type != IOMMU_DOMAIN_BLOCKED)
+ return -EINVAL;
+
+ cdptr = arm_smmu_alloc_cd_ptr(master, pasid);
+ if (!cdptr)
+ return -ENOMEM;
+
+ mutex_lock(&arm_smmu_asid_lock);
+ ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain);
+ if (ret)
+ goto out_unlock;
+
+ /*
+ * We don't want to obtain to the asid_lock too early, so fix up the
+ * caller set ASID under the lock in case it changed.
+ */
+ cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID);
+ cd->data[0] |= cpu_to_le64(
+ FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid));
+
+ arm_smmu_write_cd_entry(master, pasid, cdptr, cd);
+ arm_smmu_update_ste(master, sid_domain, state.ats_enabled);
+
+ arm_smmu_attach_commit(&state);
+
+out_unlock:
+ mutex_unlock(&arm_smmu_asid_lock);
+ return ret;
+}
+
+static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old_domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain);
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
+ mutex_lock(&arm_smmu_asid_lock);
+ arm_smmu_clear_cd(master, pasid);
+ if (master->ats_enabled)
+ arm_smmu_atc_inv_master(master, pasid);
+ arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid);
+ mutex_unlock(&arm_smmu_asid_lock);
+
+ /*
+ * When the last user of the CD table goes away downgrade the STE back
+ * to a non-cd_table one, by re-attaching its sid_domain.
+ */
+ if (!arm_smmu_ssids_in_use(&master->cd_table)) {
+ struct iommu_domain *sid_domain =
+ iommu_get_domain_for_dev(master->dev);
+
+ if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
+ sid_domain->type == IOMMU_DOMAIN_BLOCKED)
+ sid_domain->ops->attach_dev(sid_domain, dev,
+ sid_domain);
+ }
+ return 0;
+}
+
+static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
+ struct iommu_domain *old_domain,
+ struct device *dev,
+ struct arm_smmu_ste *ste,
+ unsigned int s1dss)
+{
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ struct arm_smmu_attach_state state = {
+ .master = master,
+ .old_domain = old_domain,
+ .ssid = IOMMU_NO_PASID,
+ };
+
+ /*
+ * Do not allow any ASID to be changed while are working on the STE,
+ * otherwise we could miss invalidations.
+ */
+ mutex_lock(&arm_smmu_asid_lock);
+
+ /*
+ * If the CD table is not in use we can use the provided STE, otherwise
+ * we use a cdtable STE with the provided S1DSS.
+ */
+ if (arm_smmu_ssids_in_use(&master->cd_table)) {
+ /*
+ * If a CD table has to be present then we need to run with ATS
+ * on because we have to assume a PASID is using ATS. For
+ * IDENTITY this will setup things so that S1DSS=bypass which
+ * follows the explanation in "13.6.4 Full ATS skipping stage 1"
+ * and allows for ATS on the RID to work.
+ */
+ state.cd_needs_ats = true;
+ arm_smmu_attach_prepare(&state, domain);
+ arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss);
+ } else {
+ arm_smmu_attach_prepare(&state, domain);
+ }
+ arm_smmu_install_ste_for_dev(master, ste);
+ arm_smmu_attach_commit(&state);
+ mutex_unlock(&arm_smmu_asid_lock);
+
+ /*
+ * This has to be done after removing the master from the
+ * arm_smmu_domain->devices to avoid races updating the same context
+ * descriptor from arm_smmu_share_asid().
+ */
+ arm_smmu_clear_cd(master, IOMMU_NO_PASID);
+}
+
+static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
+ struct device *dev,
+ struct iommu_domain *old_domain)
+{
+ struct arm_smmu_ste ste;
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
+ arm_smmu_master_clear_vmaster(master);
+ arm_smmu_make_bypass_ste(master->smmu, &ste);
+ arm_smmu_attach_dev_ste(domain, old_domain, dev, &ste,
+ STRTAB_STE_1_S1DSS_BYPASS);
+ return 0;
+}
+
+static const struct iommu_domain_ops arm_smmu_identity_ops = {
+ .attach_dev = arm_smmu_attach_dev_identity,
+};
+
+static struct iommu_domain arm_smmu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &arm_smmu_identity_ops,
+};
+
+static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
+ struct device *dev,
+ struct iommu_domain *old_domain)
+{
+ struct arm_smmu_ste ste;
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
+ arm_smmu_master_clear_vmaster(master);
+ arm_smmu_make_abort_ste(&ste);
+ arm_smmu_attach_dev_ste(domain, old_domain, dev, &ste,
+ STRTAB_STE_1_S1DSS_TERMINATE);
+ return 0;
+}
+
+static const struct iommu_domain_ops arm_smmu_blocked_ops = {
+ .attach_dev = arm_smmu_attach_dev_blocked,
+ .set_dev_pasid = arm_smmu_blocking_set_dev_pasid,
+};
+
+static struct iommu_domain arm_smmu_blocked_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
+ .ops = &arm_smmu_blocked_ops,
+};
+
+static struct iommu_domain *
+arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags,
+ const struct iommu_user_data *user_data)
+{
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ struct arm_smmu_device *smmu = master->smmu;
+ const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
+ IOMMU_HWPT_ALLOC_PASID |
+ IOMMU_HWPT_ALLOC_NEST_PARENT;
+ struct arm_smmu_domain *smmu_domain;
+ int ret;
+
+ if (flags & ~PAGING_FLAGS)
+ return ERR_PTR(-EOPNOTSUPP);
+ if (user_data)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ smmu_domain = arm_smmu_domain_alloc();
+ if (IS_ERR(smmu_domain))
+ return ERR_CAST(smmu_domain);
+
+ switch (flags) {
+ case 0:
+ /* Prefer S1 if available */
+ if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ else
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+ break;
+ case IOMMU_HWPT_ALLOC_NEST_PARENT:
+ if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) {
+ ret = -EOPNOTSUPP;
+ goto err_free;
+ }
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+ smmu_domain->nest_parent = true;
+ break;
+ case IOMMU_HWPT_ALLOC_DIRTY_TRACKING:
+ case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID:
+ case IOMMU_HWPT_ALLOC_PASID:
+ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) {
+ ret = -EOPNOTSUPP;
+ goto err_free;
+ }
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ goto err_free;
+ }
+
+ smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
+ smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
+ ret = arm_smmu_domain_finalise(smmu_domain, smmu, flags);
+ if (ret)
+ goto err_free;
+ return &smmu_domain->domain;
+
+err_free:
+ kfree(smmu_domain);
+ return ERR_PTR(ret);
+}
+
+static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped)
+{
+ struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+
+ if (!ops)
+ return -ENODEV;
+
+ return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
+}
+
+static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+
+ if (!ops)
+ return 0;
+
+ return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
+}
+
+static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ if (smmu_domain->smmu)
+ arm_smmu_tlb_inv_context(smmu_domain);
+}
+
+static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ if (!gather->pgsize)
+ return;
+
+ arm_smmu_tlb_inv_range_domain(gather->start,
+ gather->end - gather->start + 1,
+ gather->pgsize, true, smmu_domain);
+}
+
+static phys_addr_t
+arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
+{
+ struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+
+ if (!ops)
+ return 0;
+
+ return ops->iova_to_phys(ops, iova);
+}
+
+static struct platform_driver arm_smmu_driver;
+
+static
+struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
+{
+ struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
+
+ put_device(dev);
+ return dev ? dev_get_drvdata(dev) : NULL;
+}
+
+static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
+{
+ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
+ return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents;
+ return sid < smmu->strtab_cfg.linear.num_ents;
+}
+
+static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
+{
+ /* Check the SIDs are in range of the SMMU and our stream table */
+ if (!arm_smmu_sid_in_range(smmu, sid))
+ return -ERANGE;
+
+ /* Ensure l2 strtab is initialised */
+ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
+ return arm_smmu_init_l2_strtab(smmu, sid);
+
+ return 0;
+}
+
+static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
+ struct arm_smmu_master *master)
+{
+ int i;
+ int ret = 0;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
+
+ master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
+ GFP_KERNEL);
+ if (!master->streams)
+ return -ENOMEM;
+ master->num_streams = fwspec->num_ids;
+
+ mutex_lock(&smmu->streams_mutex);
+ for (i = 0; i < fwspec->num_ids; i++) {
+ struct arm_smmu_stream *new_stream = &master->streams[i];
+ struct rb_node *existing;
+ u32 sid = fwspec->ids[i];
+
+ new_stream->id = sid;
+ new_stream->master = master;
+
+ ret = arm_smmu_init_sid_strtab(smmu, sid);
+ if (ret)
+ break;
+
+ /* Insert into SID tree */
+ existing = rb_find_add(&new_stream->node, &smmu->streams,
+ arm_smmu_streams_cmp_node);
+ if (existing) {
+ struct arm_smmu_master *existing_master =
+ rb_entry(existing, struct arm_smmu_stream, node)
+ ->master;
+
+ /* Bridged PCI devices may end up with duplicated IDs */
+ if (existing_master == master)
+ continue;
+
+ dev_warn(master->dev,
+ "Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n",
+ sid, dev_name(existing_master->dev));
+ ret = -ENODEV;
+ break;
+ }
+ }
+
+ if (ret) {
+ for (i--; i >= 0; i--)
+ rb_erase(&master->streams[i].node, &smmu->streams);
+ kfree(master->streams);
+ }
+ mutex_unlock(&smmu->streams_mutex);
+
+ return ret;
+}
+
+static void arm_smmu_remove_master(struct arm_smmu_master *master)
+{
+ int i;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
+
+ if (!smmu || !master->streams)
+ return;
+
+ mutex_lock(&smmu->streams_mutex);
+ for (i = 0; i < fwspec->num_ids; i++)
+ rb_erase(&master->streams[i].node, &smmu->streams);
+ mutex_unlock(&smmu->streams_mutex);
+
+ kfree(master->streams);
+}
+
+static struct iommu_device *arm_smmu_probe_device(struct device *dev)
+{
+ int ret;
+ struct arm_smmu_device *smmu;
+ struct arm_smmu_master *master;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+ if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
+ return ERR_PTR(-EBUSY);
+
+ smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
+ if (!smmu)
+ return ERR_PTR(-ENODEV);
+
+ master = kzalloc(sizeof(*master), GFP_KERNEL);
+ if (!master)
+ return ERR_PTR(-ENOMEM);
+
+ master->dev = dev;
+ master->smmu = smmu;
+ dev_iommu_priv_set(dev, master);
+
+ ret = arm_smmu_insert_master(smmu, master);
+ if (ret)
+ goto err_free_master;
+
+ device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
+ master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
+
+ /*
+ * Note that PASID must be enabled before, and disabled after ATS:
+ * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
+ *
+ * Behavior is undefined if this bit is Set and the value of the PASID
+ * Enable, Execute Requested Enable, or Privileged Mode Requested bits
+ * are changed.
+ */
+ arm_smmu_enable_pasid(master);
+
+ if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
+ master->ssid_bits = min_t(u8, master->ssid_bits,
+ CTXDESC_LINEAR_CDMAX);
+
+ if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
+ device_property_read_bool(dev, "dma-can-stall")) ||
+ smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
+ master->stall_enabled = true;
+
+ if (dev_is_pci(dev)) {
+ unsigned int stu = __ffs(smmu->pgsize_bitmap);
+
+ pci_prepare_ats(to_pci_dev(dev), stu);
+ }
+
+ return &smmu->iommu;
+
+err_free_master:
+ kfree(master);
+ return ERR_PTR(ret);
+}
+
+static void arm_smmu_release_device(struct device *dev)
+{
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
+ WARN_ON(master->iopf_refcount);
+
+ arm_smmu_disable_pasid(master);
+ arm_smmu_remove_master(master);
+ if (arm_smmu_cdtab_allocated(&master->cd_table))
+ arm_smmu_free_cd_tables(master);
+ kfree(master);
+}
+
+static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+
+ return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
+}
+
+static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
+ bool enabled)
+{
+ /*
+ * Always enabled and the dirty bitmap is cleared prior to
+ * set_dirty_tracking().
+ */
+ return 0;
+}
+
+static struct iommu_group *arm_smmu_device_group(struct device *dev)
+{
+ struct iommu_group *group;
+
+ /*
+ * We don't support devices sharing stream IDs other than PCI RID
+ * aliases, since the necessary ID-to-device lookup becomes rather
+ * impractical given a potential sparse 32-bit stream ID space.
+ */
+ if (dev_is_pci(dev))
+ group = pci_device_group(dev);
+ else
+ group = generic_device_group(dev);
+
+ return group;
+}
+
+static int arm_smmu_of_xlate(struct device *dev,
+ const struct of_phandle_args *args)
+{
+ return iommu_fwspec_add_ids(dev, args->args, 1);
+}
+
+static void arm_smmu_get_resv_regions(struct device *dev,
+ struct list_head *head)
+{
+ struct iommu_resv_region *region;
+ int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+
+ region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
+ prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
+ if (!region)
+ return;
+
+ list_add_tail(&region->list, head);
+
+ iommu_dma_get_resv_regions(dev, head);
+}
+
+/*
+ * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
+ * PCIe link and save the data to memory by DMA. The hardware is restricted to
+ * use identity mapping only.
+ */
+#define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
+ (pdev)->device == 0xa12e)
+
+static int arm_smmu_def_domain_type(struct device *dev)
+{
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (IS_HISI_PTT_DEVICE(pdev))
+ return IOMMU_DOMAIN_IDENTITY;
+ }
+
+ return 0;
+}
+
+static const struct iommu_ops arm_smmu_ops = {
+ .identity_domain = &arm_smmu_identity_domain,
+ .blocked_domain = &arm_smmu_blocked_domain,
+ .release_domain = &arm_smmu_blocked_domain,
+ .capable = arm_smmu_capable,
+ .hw_info = arm_smmu_hw_info,
+ .domain_alloc_sva = arm_smmu_sva_domain_alloc,
+ .domain_alloc_paging_flags = arm_smmu_domain_alloc_paging_flags,
+ .probe_device = arm_smmu_probe_device,
+ .release_device = arm_smmu_release_device,
+ .device_group = arm_smmu_device_group,
+ .of_xlate = arm_smmu_of_xlate,
+ .get_resv_regions = arm_smmu_get_resv_regions,
+ .page_response = arm_smmu_page_response,
+ .def_domain_type = arm_smmu_def_domain_type,
+ .get_viommu_size = arm_smmu_get_viommu_size,
+ .viommu_init = arm_vsmmu_init,
+ .user_pasid_table = 1,
+ .owner = THIS_MODULE,
+ .default_domain_ops = &(const struct iommu_domain_ops) {
+ .attach_dev = arm_smmu_attach_dev,
+ .enforce_cache_coherency = arm_smmu_enforce_cache_coherency,
+ .set_dev_pasid = arm_smmu_s1_set_dev_pasid,
+ .map_pages = arm_smmu_map_pages,
+ .unmap_pages = arm_smmu_unmap_pages,
+ .flush_iotlb_all = arm_smmu_flush_iotlb_all,
+ .iotlb_sync = arm_smmu_iotlb_sync,
+ .iova_to_phys = arm_smmu_iova_to_phys,
+ .free = arm_smmu_domain_free_paging,
+ }
+};
+
+static struct iommu_dirty_ops arm_smmu_dirty_ops = {
+ .read_and_clear_dirty = arm_smmu_read_and_clear_dirty,
+ .set_dirty_tracking = arm_smmu_set_dirty_tracking,
+};
+
+/* Probing and initialisation functions */
+int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
+ struct arm_smmu_queue *q, void __iomem *page,
+ unsigned long prod_off, unsigned long cons_off,
+ size_t dwords, const char *name)
+{
+ size_t qsz;
+
+ do {
+ qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
+ q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
+ GFP_KERNEL);
+ if (q->base || qsz < PAGE_SIZE)
+ break;
+
+ q->llq.max_n_shift--;
+ } while (1);
+
+ if (!q->base) {
+ dev_err(smmu->dev,
+ "failed to allocate queue (0x%zx bytes) for %s\n",
+ qsz, name);
+ return -ENOMEM;
+ }
+
+ if (!WARN_ON(q->base_dma & (qsz - 1))) {
+ dev_info(smmu->dev, "allocated %u entries for %s\n",
+ 1 << q->llq.max_n_shift, name);
+ }
+
+ q->prod_reg = page + prod_off;
+ q->cons_reg = page + cons_off;
+ q->ent_dwords = dwords;
+
+ q->q_base = Q_BASE_RWA;
+ q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
+ q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
+
+ q->llq.prod = q->llq.cons = 0;
+ return 0;
+}
+
+int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
+{
+ unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
+
+ atomic_set(&cmdq->owner_prod, 0);
+ atomic_set(&cmdq->lock, 0);
+
+ cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
+ GFP_KERNEL);
+ if (!cmdq->valid_map)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
+{
+ int ret;
+
+ /* cmdq */
+ ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
+ ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
+ CMDQ_ENT_DWORDS, "cmdq");
+ if (ret)
+ return ret;
+
+ ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq);
+ if (ret)
+ return ret;
+
+ /* evtq */
+ ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
+ ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
+ EVTQ_ENT_DWORDS, "evtq");
+ if (ret)
+ return ret;
+
+ if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
+ (smmu->features & ARM_SMMU_FEAT_STALLS)) {
+ smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
+ if (!smmu->evtq.iopf)
+ return -ENOMEM;
+ }
+
+ /* priq */
+ if (!(smmu->features & ARM_SMMU_FEAT_PRI))
+ return 0;
+
+ return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
+ ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
+ PRIQ_ENT_DWORDS, "priq");
+}
+
+static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
+{
+ u32 l1size;
+ struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
+ unsigned int last_sid_idx =
+ arm_smmu_strtab_l1_idx((1ULL << smmu->sid_bits) - 1);
+
+ /* Calculate the L1 size, capped to the SIDSIZE. */
+ cfg->l2.num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES);
+ if (cfg->l2.num_l1_ents <= last_sid_idx)
+ dev_warn(smmu->dev,
+ "2-level strtab only covers %u/%u bits of SID\n",
+ ilog2(cfg->l2.num_l1_ents * STRTAB_NUM_L2_STES),
+ smmu->sid_bits);
+
+ l1size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1);
+ cfg->l2.l1tab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->l2.l1_dma,
+ GFP_KERNEL);
+ if (!cfg->l2.l1tab) {
+ dev_err(smmu->dev,
+ "failed to allocate l1 stream table (%u bytes)\n",
+ l1size);
+ return -ENOMEM;
+ }
+
+ cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents,
+ sizeof(*cfg->l2.l2ptrs), GFP_KERNEL);
+ if (!cfg->l2.l2ptrs)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
+{
+ u32 size;
+ struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
+
+ size = (1 << smmu->sid_bits) * sizeof(struct arm_smmu_ste);
+ cfg->linear.table = dmam_alloc_coherent(smmu->dev, size,
+ &cfg->linear.ste_dma,
+ GFP_KERNEL);
+ if (!cfg->linear.table) {
+ dev_err(smmu->dev,
+ "failed to allocate linear stream table (%u bytes)\n",
+ size);
+ return -ENOMEM;
+ }
+ cfg->linear.num_ents = 1 << smmu->sid_bits;
+
+ arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents);
+ return 0;
+}
+
+static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
+{
+ int ret;
+
+ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
+ ret = arm_smmu_init_strtab_2lvl(smmu);
+ else
+ ret = arm_smmu_init_strtab_linear(smmu);
+ if (ret)
+ return ret;
+
+ ida_init(&smmu->vmid_map);
+
+ return 0;
+}
+
+static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
+{
+ int ret;
+
+ mutex_init(&smmu->streams_mutex);
+ smmu->streams = RB_ROOT;
+
+ ret = arm_smmu_init_queues(smmu);
+ if (ret)
+ return ret;
+
+ ret = arm_smmu_init_strtab(smmu);
+ if (ret)
+ return ret;
+
+ if (smmu->impl_ops && smmu->impl_ops->init_structures)
+ return smmu->impl_ops->init_structures(smmu);
+
+ return 0;
+}
+
+static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
+ unsigned int reg_off, unsigned int ack_off)
+{
+ u32 reg;
+
+ writel_relaxed(val, smmu->base + reg_off);
+ return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
+ 1, ARM_SMMU_POLL_TIMEOUT_US);
+}
+
+/* GBPA is "special" */
+static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
+{
+ int ret;
+ u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
+
+ ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
+ 1, ARM_SMMU_POLL_TIMEOUT_US);
+ if (ret)
+ return ret;
+
+ reg &= ~clr;
+ reg |= set;
+ writel_relaxed(reg | GBPA_UPDATE, gbpa);
+ ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
+ 1, ARM_SMMU_POLL_TIMEOUT_US);
+
+ if (ret)
+ dev_err(smmu->dev, "GBPA not responding to update\n");
+ return ret;
+}
+
+static void arm_smmu_free_msis(void *data)
+{
+ struct device *dev = data;
+
+ platform_device_msi_free_irqs_all(dev);
+}
+
+static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+ phys_addr_t doorbell;
+ struct device *dev = msi_desc_to_dev(desc);
+ struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+ phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
+
+ doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
+ doorbell &= MSI_CFG0_ADDR_MASK;
+
+ writeq_relaxed(doorbell, smmu->base + cfg[0]);
+ writel_relaxed(msg->data, smmu->base + cfg[1]);
+ writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
+}
+
+static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
+{
+ int ret, nvec = ARM_SMMU_MAX_MSIS;
+ struct device *dev = smmu->dev;
+
+ /* Clear the MSI address regs */
+ writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
+ writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
+
+ if (smmu->features & ARM_SMMU_FEAT_PRI)
+ writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
+ else
+ nvec--;
+
+ if (!(smmu->features & ARM_SMMU_FEAT_MSI))
+ return;
+
+ if (!dev->msi.domain) {
+ dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
+ return;
+ }
+
+ /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
+ ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
+ if (ret) {
+ dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
+ return;
+ }
+
+ smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
+ smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
+ smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
+
+ /* Add callback to free MSIs on teardown */
+ devm_add_action_or_reset(dev, arm_smmu_free_msis, dev);
+}
+
+static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
+{
+ int irq, ret;
+
+ arm_smmu_setup_msis(smmu);
+
+ /* Request interrupt lines */
+ irq = smmu->evtq.q.irq;
+ if (irq) {
+ ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
+ arm_smmu_evtq_thread,
+ IRQF_ONESHOT,
+ "arm-smmu-v3-evtq", smmu);
+ if (ret < 0)
+ dev_warn(smmu->dev, "failed to enable evtq irq\n");
+ } else {
+ dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
+ }
+
+ irq = smmu->gerr_irq;
+ if (irq) {
+ ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
+ 0, "arm-smmu-v3-gerror", smmu);
+ if (ret < 0)
+ dev_warn(smmu->dev, "failed to enable gerror irq\n");
+ } else {
+ dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
+ }
+
+ if (smmu->features & ARM_SMMU_FEAT_PRI) {
+ irq = smmu->priq.q.irq;
+ if (irq) {
+ ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
+ arm_smmu_priq_thread,
+ IRQF_ONESHOT,
+ "arm-smmu-v3-priq",
+ smmu);
+ if (ret < 0)
+ dev_warn(smmu->dev,
+ "failed to enable priq irq\n");
+ } else {
+ dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
+ }
+ }
+}
+
+static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+{
+ int ret, irq;
+ u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
+
+ /* Disable IRQs first */
+ ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
+ ARM_SMMU_IRQ_CTRLACK);
+ if (ret) {
+ dev_err(smmu->dev, "failed to disable irqs\n");
+ return ret;
+ }
+
+ irq = smmu->combined_irq;
+ if (irq) {
+ /*
+ * Cavium ThunderX2 implementation doesn't support unique irq
+ * lines. Use a single irq line for all the SMMUv3 interrupts.
+ */
+ ret = devm_request_threaded_irq(smmu->dev, irq,
+ arm_smmu_combined_irq_handler,
+ arm_smmu_combined_irq_thread,
+ IRQF_ONESHOT,
+ "arm-smmu-v3-combined-irq", smmu);
+ if (ret < 0)
+ dev_warn(smmu->dev, "failed to enable combined irq\n");
+ } else
+ arm_smmu_setup_unique_irqs(smmu);
+
+ if (smmu->features & ARM_SMMU_FEAT_PRI)
+ irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
+
+ /* Enable interrupt generation on the SMMU */
+ ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
+ ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
+ if (ret)
+ dev_warn(smmu->dev, "failed to enable irqs\n");
+
+ return 0;
+}
+
+static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
+{
+ int ret;
+
+ ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
+ if (ret)
+ dev_err(smmu->dev, "failed to clear cr0\n");
+
+ return ret;
+}
+
+static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
+{
+ struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
+ dma_addr_t dma;
+ u32 reg;
+
+ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
+ reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
+ STRTAB_BASE_CFG_FMT_2LVL) |
+ FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE,
+ ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT) |
+ FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
+ dma = cfg->l2.l1_dma;
+ } else {
+ reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
+ STRTAB_BASE_CFG_FMT_LINEAR) |
+ FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
+ dma = cfg->linear.ste_dma;
+ }
+ writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA,
+ smmu->base + ARM_SMMU_STRTAB_BASE);
+ writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
+}
+
+static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
+{
+ int ret;
+ u32 reg, enables;
+ struct arm_smmu_cmdq_ent cmd;
+
+ /* Clear CR0 and sync (disables SMMU and queue processing) */
+ reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
+ if (reg & CR0_SMMUEN) {
+ dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
+ arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
+ }
+
+ ret = arm_smmu_device_disable(smmu);
+ if (ret)
+ return ret;
+
+ /* CR1 (table and queue memory attributes) */
+ reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
+ FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
+ FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
+ FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
+ FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
+ FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
+ writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
+
+ /* CR2 (random crap) */
+ reg = CR2_PTM | CR2_RECINVSID;
+
+ if (smmu->features & ARM_SMMU_FEAT_E2H)
+ reg |= CR2_E2H;
+
+ writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
+
+ /* Stream table */
+ arm_smmu_write_strtab(smmu);
+
+ /* Command queue */
+ writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
+ writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
+ writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
+
+ enables = CR0_CMDQEN;
+ ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
+ ARM_SMMU_CR0ACK);
+ if (ret) {
+ dev_err(smmu->dev, "failed to enable command queue\n");
+ return ret;
+ }
+
+ /* Invalidate any cached configuration */
+ cmd.opcode = CMDQ_OP_CFGI_ALL;
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+
+ /* Invalidate any stale TLB entries */
+ if (smmu->features & ARM_SMMU_FEAT_HYP) {
+ cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ }
+
+ cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+
+ /* Event queue */
+ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
+ writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
+ writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
+
+ enables |= CR0_EVTQEN;
+ ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
+ ARM_SMMU_CR0ACK);
+ if (ret) {
+ dev_err(smmu->dev, "failed to enable event queue\n");
+ return ret;
+ }
+
+ /* PRI queue */
+ if (smmu->features & ARM_SMMU_FEAT_PRI) {
+ writeq_relaxed(smmu->priq.q.q_base,
+ smmu->base + ARM_SMMU_PRIQ_BASE);
+ writel_relaxed(smmu->priq.q.llq.prod,
+ smmu->page1 + ARM_SMMU_PRIQ_PROD);
+ writel_relaxed(smmu->priq.q.llq.cons,
+ smmu->page1 + ARM_SMMU_PRIQ_CONS);
+
+ enables |= CR0_PRIQEN;
+ ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
+ ARM_SMMU_CR0ACK);
+ if (ret) {
+ dev_err(smmu->dev, "failed to enable PRI queue\n");
+ return ret;
+ }
+ }
+
+ if (smmu->features & ARM_SMMU_FEAT_ATS) {
+ enables |= CR0_ATSCHK;
+ ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
+ ARM_SMMU_CR0ACK);
+ if (ret) {
+ dev_err(smmu->dev, "failed to enable ATS check\n");
+ return ret;
+ }
+ }
+
+ ret = arm_smmu_setup_irqs(smmu);
+ if (ret) {
+ dev_err(smmu->dev, "failed to setup irqs\n");
+ return ret;
+ }
+
+ if (is_kdump_kernel())
+ enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
+
+ /* Enable the SMMU interface */
+ enables |= CR0_SMMUEN;
+ ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
+ ARM_SMMU_CR0ACK);
+ if (ret) {
+ dev_err(smmu->dev, "failed to enable SMMU interface\n");
+ return ret;
+ }
+
+ if (smmu->impl_ops && smmu->impl_ops->device_reset) {
+ ret = smmu->impl_ops->device_reset(smmu);
+ if (ret) {
+ dev_err(smmu->dev, "failed to reset impl\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+#define IIDR_IMPLEMENTER_ARM 0x43b
+#define IIDR_PRODUCTID_ARM_MMU_600 0x483
+#define IIDR_PRODUCTID_ARM_MMU_700 0x487
+
+static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
+{
+ u32 reg;
+ unsigned int implementer, productid, variant, revision;
+
+ reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
+ implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
+ productid = FIELD_GET(IIDR_PRODUCTID, reg);
+ variant = FIELD_GET(IIDR_VARIANT, reg);
+ revision = FIELD_GET(IIDR_REVISION, reg);
+
+ switch (implementer) {
+ case IIDR_IMPLEMENTER_ARM:
+ switch (productid) {
+ case IIDR_PRODUCTID_ARM_MMU_600:
+ /* Arm erratum 1076982 */
+ if (variant == 0 && revision <= 2)
+ smmu->features &= ~ARM_SMMU_FEAT_SEV;
+ /* Arm erratum 1209401 */
+ if (variant < 2)
+ smmu->features &= ~ARM_SMMU_FEAT_NESTING;
+ break;
+ case IIDR_PRODUCTID_ARM_MMU_700:
+ /* Arm erratum 2812531 */
+ smmu->features &= ~ARM_SMMU_FEAT_BTM;
+ smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
+ /* Arm errata 2268618, 2812531 */
+ smmu->features &= ~ARM_SMMU_FEAT_NESTING;
+ break;
+ }
+ break;
+ }
+}
+
+static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg)
+{
+ u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD);
+ u32 hw_features = 0;
+
+ switch (FIELD_GET(IDR0_HTTU, reg)) {
+ case IDR0_HTTU_ACCESS_DIRTY:
+ hw_features |= ARM_SMMU_FEAT_HD;
+ fallthrough;
+ case IDR0_HTTU_ACCESS:
+ hw_features |= ARM_SMMU_FEAT_HA;
+ }
+
+ if (smmu->dev->of_node)
+ smmu->features |= hw_features;
+ else if (hw_features != fw_features)
+ /* ACPI IORT sets the HTTU bits */
+ dev_warn(smmu->dev,
+ "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n",
+ hw_features, fw_features);
+}
+
+static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
+{
+ u32 reg;
+ bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
+
+ /* IDR0 */
+ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
+
+ /* 2-level structures */
+ if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
+ smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
+
+ if (reg & IDR0_CD2L)
+ smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
+
+ /*
+ * Translation table endianness.
+ * We currently require the same endianness as the CPU, but this
+ * could be changed later by adding a new IO_PGTABLE_QUIRK.
+ */
+ switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
+ case IDR0_TTENDIAN_MIXED:
+ smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
+ break;
+#ifdef __BIG_ENDIAN
+ case IDR0_TTENDIAN_BE:
+ smmu->features |= ARM_SMMU_FEAT_TT_BE;
+ break;
+#else
+ case IDR0_TTENDIAN_LE:
+ smmu->features |= ARM_SMMU_FEAT_TT_LE;
+ break;
+#endif
+ default:
+ dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
+ return -ENXIO;
+ }
+
+ /* Boolean feature flags */
+ if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
+ smmu->features |= ARM_SMMU_FEAT_PRI;
+
+ if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
+ smmu->features |= ARM_SMMU_FEAT_ATS;
+
+ if (reg & IDR0_SEV)
+ smmu->features |= ARM_SMMU_FEAT_SEV;
+
+ if (reg & IDR0_MSI) {
+ smmu->features |= ARM_SMMU_FEAT_MSI;
+ if (coherent && !disable_msipolling)
+ smmu->options |= ARM_SMMU_OPT_MSIPOLL;
+ }
+
+ if (reg & IDR0_HYP) {
+ smmu->features |= ARM_SMMU_FEAT_HYP;
+ if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
+ smmu->features |= ARM_SMMU_FEAT_E2H;
+ }
+
+ arm_smmu_get_httu(smmu, reg);
+
+ /*
+ * The coherency feature as set by FW is used in preference to the ID
+ * register, but warn on mismatch.
+ */
+ if (!!(reg & IDR0_COHACC) != coherent)
+ dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
+ str_true_false(coherent));
+
+ switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
+ case IDR0_STALL_MODEL_FORCE:
+ smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
+ fallthrough;
+ case IDR0_STALL_MODEL_STALL:
+ smmu->features |= ARM_SMMU_FEAT_STALLS;
+ }
+
+ if (reg & IDR0_S1P)
+ smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
+
+ if (reg & IDR0_S2P)
+ smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
+
+ if (!(reg & (IDR0_S1P | IDR0_S2P))) {
+ dev_err(smmu->dev, "no translation support!\n");
+ return -ENXIO;
+ }
+
+ /* We only support the AArch64 table format at present */
+ switch (FIELD_GET(IDR0_TTF, reg)) {
+ case IDR0_TTF_AARCH32_64:
+ smmu->ias = 40;
+ fallthrough;
+ case IDR0_TTF_AARCH64:
+ break;
+ default:
+ dev_err(smmu->dev, "AArch64 table format not supported!\n");
+ return -ENXIO;
+ }
+
+ /* ASID/VMID sizes */
+ smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
+ smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
+
+ /* IDR1 */
+ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
+ if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
+ dev_err(smmu->dev, "embedded implementation not supported\n");
+ return -ENXIO;
+ }
+
+ if (reg & IDR1_ATTR_TYPES_OVR)
+ smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
+
+ /* Queue sizes, capped to ensure natural alignment */
+ smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
+ FIELD_GET(IDR1_CMDQS, reg));
+ if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
+ /*
+ * We don't support splitting up batches, so one batch of
+ * commands plus an extra sync needs to fit inside the command
+ * queue. There's also no way we can handle the weird alignment
+ * restrictions on the base pointer for a unit-length queue.
+ */
+ dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
+ CMDQ_BATCH_ENTRIES);
+ return -ENXIO;
+ }
+
+ smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
+ FIELD_GET(IDR1_EVTQS, reg));
+ smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
+ FIELD_GET(IDR1_PRIQS, reg));
+
+ /* SID/SSID sizes */
+ smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
+ smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
+ smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
+
+ /*
+ * If the SMMU supports fewer bits than would fill a single L2 stream
+ * table, use a linear table instead.
+ */
+ if (smmu->sid_bits <= STRTAB_SPLIT)
+ smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
+
+ /* IDR3 */
+ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
+ if (FIELD_GET(IDR3_RIL, reg))
+ smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
+ if (FIELD_GET(IDR3_FWB, reg))
+ smmu->features |= ARM_SMMU_FEAT_S2FWB;
+
+ if (FIELD_GET(IDR3_BBM, reg) == 2)
+ smmu->features |= ARM_SMMU_FEAT_BBML2;
+
+ /* IDR5 */
+ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
+
+ /* Maximum number of outstanding stalls */
+ smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
+
+ /* Page sizes */
+ if (reg & IDR5_GRAN64K)
+ smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
+ if (reg & IDR5_GRAN16K)
+ smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
+ if (reg & IDR5_GRAN4K)
+ smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
+
+ /* Input address size */
+ if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
+ smmu->features |= ARM_SMMU_FEAT_VAX;
+
+ /* Output address size */
+ switch (FIELD_GET(IDR5_OAS, reg)) {
+ case IDR5_OAS_32_BIT:
+ smmu->oas = 32;
+ break;
+ case IDR5_OAS_36_BIT:
+ smmu->oas = 36;
+ break;
+ case IDR5_OAS_40_BIT:
+ smmu->oas = 40;
+ break;
+ case IDR5_OAS_42_BIT:
+ smmu->oas = 42;
+ break;
+ case IDR5_OAS_44_BIT:
+ smmu->oas = 44;
+ break;
+ case IDR5_OAS_52_BIT:
+ smmu->oas = 52;
+ smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
+ break;
+ default:
+ dev_info(smmu->dev,
+ "unknown output address size. Truncating to 48-bit\n");
+ fallthrough;
+ case IDR5_OAS_48_BIT:
+ smmu->oas = 48;
+ }
+
+ /* Set the DMA mask for our table walker */
+ if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
+ dev_warn(smmu->dev,
+ "failed to set DMA mask for table walker\n");
+
+ smmu->ias = max(smmu->ias, smmu->oas);
+
+ if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
+ (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
+ smmu->features |= ARM_SMMU_FEAT_NESTING;
+
+ arm_smmu_device_iidr_probe(smmu);
+
+ if (arm_smmu_sva_supported(smmu))
+ smmu->features |= ARM_SMMU_FEAT_SVA;
+
+ dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
+ smmu->ias, smmu->oas, smmu->features);
+ return 0;
+}
+
+#ifdef CONFIG_ACPI
+#ifdef CONFIG_TEGRA241_CMDQV
+static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
+ struct arm_smmu_device *smmu)
+{
+ const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
+ struct acpi_device *adev;
+
+ /* Look for an NVDA200C node whose _UID matches the SMMU node ID */
+ adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);
+ if (adev) {
+ /* Tegra241 CMDQV driver is responsible for put_device() */
+ smmu->impl_dev = &adev->dev;
+ smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
+ dev_info(smmu->dev, "found companion CMDQV device: %s\n",
+ dev_name(smmu->impl_dev));
+ }
+ kfree(uid);
+}
+#else
+static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
+ struct arm_smmu_device *smmu)
+{
+}
+#endif
+
+static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
+ struct arm_smmu_device *smmu)
+{
+ struct acpi_iort_smmu_v3 *iort_smmu =
+ (struct acpi_iort_smmu_v3 *)node->node_data;
+
+ switch (iort_smmu->model) {
+ case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
+ smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
+ break;
+ case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
+ smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
+ break;
+ case ACPI_IORT_SMMU_V3_GENERIC:
+ /*
+ * Tegra241 implementation stores its SMMU options and impl_dev
+ * in DSDT. Thus, go through the ACPI tables unconditionally.
+ */
+ acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu);
+ break;
+ }
+
+ dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
+ return 0;
+}
+
+static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
+ struct arm_smmu_device *smmu)
+{
+ struct acpi_iort_smmu_v3 *iort_smmu;
+ struct device *dev = smmu->dev;
+ struct acpi_iort_node *node;
+
+ node = *(struct acpi_iort_node **)dev_get_platdata(dev);
+
+ /* Retrieve SMMUv3 specific data */
+ iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
+
+ if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
+ smmu->features |= ARM_SMMU_FEAT_COHERENCY;
+
+ switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) {
+ case IDR0_HTTU_ACCESS_DIRTY:
+ smmu->features |= ARM_SMMU_FEAT_HD;
+ fallthrough;
+ case IDR0_HTTU_ACCESS:
+ smmu->features |= ARM_SMMU_FEAT_HA;
+ }
+
+ return acpi_smmu_iort_probe_model(node, smmu);
+}
+#else
+static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
+ struct arm_smmu_device *smmu)
+{
+ return -ENODEV;
+}
+#endif
+
+static int arm_smmu_device_dt_probe(struct platform_device *pdev,
+ struct arm_smmu_device *smmu)
+{
+ struct device *dev = &pdev->dev;
+ u32 cells;
+ int ret = -EINVAL;
+
+ if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
+ dev_err(dev, "missing #iommu-cells property\n");
+ else if (cells != 1)
+ dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
+ else
+ ret = 0;
+
+ parse_driver_options(smmu);
+
+ if (of_dma_is_coherent(dev->of_node))
+ smmu->features |= ARM_SMMU_FEAT_COHERENCY;
+
+ return ret;
+}
+
+static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
+{
+ if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
+ return SZ_64K;
+ else
+ return SZ_128K;
+}
+
+static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
+ resource_size_t size)
+{
+ struct resource res = DEFINE_RES_MEM(start, size);
+
+ return devm_ioremap_resource(dev, &res);
+}
+
+static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
+{
+ struct list_head rmr_list;
+ struct iommu_resv_region *e;
+
+ INIT_LIST_HEAD(&rmr_list);
+ iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
+
+ list_for_each_entry(e, &rmr_list, list) {
+ struct iommu_iort_rmr_data *rmr;
+ int ret, i;
+
+ rmr = container_of(e, struct iommu_iort_rmr_data, rr);
+ for (i = 0; i < rmr->num_sids; i++) {
+ ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
+ if (ret) {
+ dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
+ rmr->sids[i]);
+ continue;
+ }
+
+ /*
+ * STE table is not programmed to HW, see
+ * arm_smmu_initial_bypass_stes()
+ */
+ arm_smmu_make_bypass_ste(smmu,
+ arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
+ }
+ }
+
+ iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
+}
+
+static void arm_smmu_impl_remove(void *data)
+{
+ struct arm_smmu_device *smmu = data;
+
+ if (smmu->impl_ops && smmu->impl_ops->device_remove)
+ smmu->impl_ops->device_remove(smmu);
+}
+
+/*
+ * Probe all the compiled in implementations. Each one checks to see if it
+ * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which
+ * replaces the callers. Otherwise the original is returned or ERR_PTR.
+ */
+static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
+{
+ struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
+ const struct arm_smmu_impl_ops *ops;
+ int ret;
+
+ if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
+ new_smmu = tegra241_cmdqv_probe(smmu);
+
+ if (new_smmu == ERR_PTR(-ENODEV))
+ return smmu;
+ if (IS_ERR(new_smmu))
+ return new_smmu;
+
+ ops = new_smmu->impl_ops;
+ if (ops) {
+ /* get_viommu_size and vsmmu_init ops must be paired */
+ if (WARN_ON(!ops->get_viommu_size != !ops->vsmmu_init)) {
+ ret = -EINVAL;
+ goto err_remove;
+ }
+ }
+
+ ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
+ new_smmu);
+ if (ret)
+ return ERR_PTR(ret);
+ return new_smmu;
+
+err_remove:
+ arm_smmu_impl_remove(new_smmu);
+ return ERR_PTR(ret);
+}
+
+static int arm_smmu_device_probe(struct platform_device *pdev)
+{
+ int irq, ret;
+ struct resource *res;
+ resource_size_t ioaddr;
+ struct arm_smmu_device *smmu;
+ struct device *dev = &pdev->dev;
+
+ smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
+ if (!smmu)
+ return -ENOMEM;
+ smmu->dev = dev;
+
+ if (dev->of_node) {
+ ret = arm_smmu_device_dt_probe(pdev, smmu);
+ } else {
+ ret = arm_smmu_device_acpi_probe(pdev, smmu);
+ }
+ if (ret)
+ return ret;
+
+ smmu = arm_smmu_impl_probe(smmu);
+ if (IS_ERR(smmu))
+ return PTR_ERR(smmu);
+
+ /* Base address */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -EINVAL;
+ if (resource_size(res) < arm_smmu_resource_size(smmu)) {
+ dev_err(dev, "MMIO region too small (%pr)\n", res);
+ return -EINVAL;
+ }
+ ioaddr = res->start;
+
+ /*
+ * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
+ * the PMCG registers which are reserved by the PMU driver.
+ */
+ smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
+ if (IS_ERR(smmu->base))
+ return PTR_ERR(smmu->base);
+
+ if (arm_smmu_resource_size(smmu) > SZ_64K) {
+ smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
+ ARM_SMMU_REG_SZ);
+ if (IS_ERR(smmu->page1))
+ return PTR_ERR(smmu->page1);
+ } else {
+ smmu->page1 = smmu->base;
+ }
+
+ /* Interrupt lines */
+
+ irq = platform_get_irq_byname_optional(pdev, "combined");
+ if (irq > 0)
+ smmu->combined_irq = irq;
+ else {
+ irq = platform_get_irq_byname_optional(pdev, "eventq");
+ if (irq > 0)
+ smmu->evtq.q.irq = irq;
+
+ irq = platform_get_irq_byname_optional(pdev, "priq");
+ if (irq > 0)
+ smmu->priq.q.irq = irq;
+
+ irq = platform_get_irq_byname_optional(pdev, "gerror");
+ if (irq > 0)
+ smmu->gerr_irq = irq;
+ }
+ /* Probe the h/w */
+ ret = arm_smmu_device_hw_probe(smmu);
+ if (ret)
+ return ret;
+
+ /* Initialise in-memory data structures */
+ ret = arm_smmu_init_structures(smmu);
+ if (ret)
+ goto err_free_iopf;
+
+ /* Record our private device structure */
+ platform_set_drvdata(pdev, smmu);
+
+ /* Check for RMRs and install bypass STEs if any */
+ arm_smmu_rmr_install_bypass_ste(smmu);
+
+ /* Reset the device */
+ ret = arm_smmu_device_reset(smmu);
+ if (ret)
+ goto err_disable;
+
+ /* And we're up. Go go go! */
+ ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
+ "smmu3.%pa", &ioaddr);
+ if (ret)
+ goto err_disable;
+
+ ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
+ if (ret) {
+ dev_err(dev, "Failed to register iommu\n");
+ goto err_free_sysfs;
+ }
+
+ return 0;
+
+err_free_sysfs:
+ iommu_device_sysfs_remove(&smmu->iommu);
+err_disable:
+ arm_smmu_device_disable(smmu);
+err_free_iopf:
+ iopf_queue_free(smmu->evtq.iopf);
+ return ret;
+}
+
+static void arm_smmu_device_remove(struct platform_device *pdev)
+{
+ struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
+
+ iommu_device_unregister(&smmu->iommu);
+ iommu_device_sysfs_remove(&smmu->iommu);
+ arm_smmu_device_disable(smmu);
+ iopf_queue_free(smmu->evtq.iopf);
+ ida_destroy(&smmu->vmid_map);
+}
+
+static void arm_smmu_device_shutdown(struct platform_device *pdev)
+{
+ struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
+
+ arm_smmu_device_disable(smmu);
+}
+
+static const struct of_device_id arm_smmu_of_match[] = {
+ { .compatible = "arm,smmu-v3", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
+
+static void arm_smmu_driver_unregister(struct platform_driver *drv)
+{
+ arm_smmu_sva_notifier_synchronize();
+ platform_driver_unregister(drv);
+}
+
+static struct platform_driver arm_smmu_driver = {
+ .driver = {
+ .name = "arm-smmu-v3",
+ .of_match_table = arm_smmu_of_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = arm_smmu_device_probe,
+ .remove = arm_smmu_device_remove,
+ .shutdown = arm_smmu_device_shutdown,
+};
+module_driver(arm_smmu_driver, platform_driver_register,
+ arm_smmu_driver_unregister);
+
+MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
+MODULE_AUTHOR("Will Deacon <will@kernel.org>");
+MODULE_ALIAS("platform:arm-smmu-v3");
+MODULE_LICENSE("GPL v2");