summaryrefslogtreecommitdiff
path: root/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c')
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c2322
1 files changed, 1518 insertions, 804 deletions
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 41f93c3ab160..10cc6dc26b7b 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -26,20 +26,21 @@
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/platform_device.h>
+#include <linux/string_choices.h>
+#include <kunit/visibility.h>
+#include <uapi/linux/iommufd.h>
#include "arm-smmu-v3.h"
#include "../../dma-iommu.h"
-static bool disable_bypass = true;
-module_param(disable_bypass, bool, 0444);
-MODULE_PARM_DESC(disable_bypass,
- "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
-
static bool disable_msipolling;
module_param(disable_msipolling, bool, 0444);
MODULE_PARM_DESC(disable_msipolling,
"Disable MSI-based polling for CMD_SYNC completion.");
+static struct iommu_ops arm_smmu_ops;
+static struct iommu_dirty_ops arm_smmu_dirty_ops;
+
enum arm_smmu_msi_index {
EVTQ_MSI_INDEX,
GERROR_MSI_INDEX,
@@ -47,8 +48,9 @@ enum arm_smmu_msi_index {
ARM_SMMU_MAX_MSIS,
};
-static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu,
- ioasid_t sid);
+#define NUM_ENTRY_QWORDS 8
+static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64));
+static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64));
static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
[EVTQ_MSI_INDEX] = {
@@ -76,20 +78,35 @@ struct arm_smmu_option_prop {
DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
DEFINE_MUTEX(arm_smmu_asid_lock);
-/*
- * Special value used by SVA when a process dies, to quiesce a CD without
- * disabling it.
- */
-struct arm_smmu_ctx_desc quiet_cd = { 0 };
-
static struct arm_smmu_option_prop arm_smmu_options[] = {
{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
{ 0, NULL},
};
-static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
- struct arm_smmu_device *smmu);
+static const char * const event_str[] = {
+ [EVT_ID_BAD_STREAMID_CONFIG] = "C_BAD_STREAMID",
+ [EVT_ID_STE_FETCH_FAULT] = "F_STE_FETCH",
+ [EVT_ID_BAD_STE_CONFIG] = "C_BAD_STE",
+ [EVT_ID_STREAM_DISABLED_FAULT] = "F_STREAM_DISABLED",
+ [EVT_ID_BAD_SUBSTREAMID_CONFIG] = "C_BAD_SUBSTREAMID",
+ [EVT_ID_CD_FETCH_FAULT] = "F_CD_FETCH",
+ [EVT_ID_BAD_CD_CONFIG] = "C_BAD_CD",
+ [EVT_ID_TRANSLATION_FAULT] = "F_TRANSLATION",
+ [EVT_ID_ADDR_SIZE_FAULT] = "F_ADDR_SIZE",
+ [EVT_ID_ACCESS_FAULT] = "F_ACCESS",
+ [EVT_ID_PERMISSION_FAULT] = "F_PERMISSION",
+ [EVT_ID_VMS_FETCH_FAULT] = "F_VMS_FETCH",
+};
+
+static const char * const event_class_str[] = {
+ [0] = "CD fetch",
+ [1] = "Stage 1 translation table fetch",
+ [2] = "Input address caused fault",
+ [3] = "Reserved",
+};
+
+static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
static void parse_driver_options(struct arm_smmu_device *smmu)
{
@@ -299,6 +316,7 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
case CMDQ_OP_TLBI_NH_ASID:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
fallthrough;
+ case CMDQ_OP_TLBI_NH_ALL:
case CMDQ_OP_TLBI_S12_VMALL:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
break;
@@ -350,14 +368,30 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
return 0;
}
-static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
+static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_ent *ent)
{
- return &smmu->cmdq;
+ struct arm_smmu_cmdq *cmdq = NULL;
+
+ if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq)
+ cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent);
+
+ return cmdq ?: &smmu->cmdq;
+}
+
+static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
+{
+ if (cmdq == &smmu->cmdq)
+ return false;
+
+ return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV;
}
static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
- struct arm_smmu_queue *q, u32 prod)
+ struct arm_smmu_cmdq *cmdq, u32 prod)
{
+ struct arm_smmu_queue *q = &cmdq->q;
struct arm_smmu_cmdq_ent ent = {
.opcode = CMDQ_OP_CMD_SYNC,
};
@@ -372,10 +406,12 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
}
arm_smmu_cmdq_build_cmd(cmd, &ent);
+ if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
+ u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
}
-static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
- struct arm_smmu_queue *q)
+void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
{
static const char * const cerror_str[] = {
[CMDQ_ERR_CERROR_NONE_IDX] = "No error",
@@ -383,6 +419,7 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
[CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
[CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
};
+ struct arm_smmu_queue *q = &cmdq->q;
int i;
u64 cmd[CMDQ_ENT_DWORDS];
@@ -425,13 +462,15 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
/* Convert the erroneous command into a CMD_SYNC */
arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
+ if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
+ u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
}
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
{
- __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
+ __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq);
}
/*
@@ -596,11 +635,11 @@ static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
/* Wait for the command queue to become non-full */
static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
unsigned long flags;
struct arm_smmu_queue_poll qp;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
int ret = 0;
/*
@@ -631,11 +670,11 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
* Must be called with the cmdq lock held in some capacity.
*/
static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
int ret = 0;
struct arm_smmu_queue_poll qp;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
queue_poll_init(smmu, &qp);
@@ -655,10 +694,10 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
* Must be called with the cmdq lock held in some capacity.
*/
static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
struct arm_smmu_queue_poll qp;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
u32 prod = llq->prod;
int ret = 0;
@@ -705,12 +744,14 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
}
static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
- if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
- return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
+ if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
+ !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
+ return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
- return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
+ return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
}
static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
@@ -746,14 +787,14 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
* insert their own list of commands then all of the commands from one
* CPU will appear before any of the commands from the other CPU.
*/
-static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
- u64 *cmds, int n, bool sync)
+int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
+ bool sync)
{
u64 cmd_sync[CMDQ_ENT_DWORDS];
u32 prod;
unsigned long flags;
bool owner;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
struct arm_smmu_ll_queue llq, head;
int ret = 0;
@@ -767,7 +808,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
while (!queue_has_space(&llq, n + sync)) {
local_irq_restore(flags);
- if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
+ if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
local_irq_save(flags);
}
@@ -793,7 +834,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
if (sync) {
prod = queue_inc_prod_n(&llq, n);
- arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
+ arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
/*
@@ -843,7 +884,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
if (sync) {
llq.prod = queue_inc_prod_n(&llq, n);
- ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
+ ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
if (ret) {
dev_err_ratelimited(smmu->dev,
"CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
@@ -878,7 +919,8 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
return -EINVAL;
}
- return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
+ return arm_smmu_cmdq_issue_cmdlist(
+ smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync);
}
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -893,21 +935,33 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
}
+static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds,
+ struct arm_smmu_cmdq_ent *ent)
+{
+ cmds->num = 0;
+ cmds->cmdq = arm_smmu_get_cmdq(smmu, ent);
+}
+
static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
struct arm_smmu_cmdq_ent *cmd)
{
+ bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
+ bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
+ (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
int index;
- if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
- (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
- arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
- cmds->num = 0;
+ if (force_sync || unsupported_cmd) {
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, true);
+ arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
}
if (cmds->num == CMDQ_BATCH_ENTRIES) {
- arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
- cmds->num = 0;
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, false);
+ arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
}
index = cmds->num * CMDQ_ENT_DWORDS;
@@ -923,7 +977,8 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds)
{
- return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
+ return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, true);
}
static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
@@ -977,44 +1032,55 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
* would be nice if this was complete according to the spec, but minimally it
* has to capture the bits this driver uses.
*/
-static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent,
- struct arm_smmu_ste *used_bits)
+VISIBLE_IF_KUNIT
+void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
{
- unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent->data[0]));
+ unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
- used_bits->data[0] = cpu_to_le64(STRTAB_STE_0_V);
- if (!(ent->data[0] & cpu_to_le64(STRTAB_STE_0_V)))
+ used_bits[0] = cpu_to_le64(STRTAB_STE_0_V);
+ if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V)))
return;
- used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
+ used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
/* S1 translates */
if (cfg & BIT(0)) {
- used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
- STRTAB_STE_0_S1CTXPTR_MASK |
- STRTAB_STE_0_S1CDMAX);
- used_bits->data[1] |=
+ used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
+ STRTAB_STE_0_S1CTXPTR_MASK |
+ STRTAB_STE_0_S1CDMAX);
+ used_bits[1] |=
cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
- STRTAB_STE_1_EATS);
- used_bits->data[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
+ STRTAB_STE_1_EATS | STRTAB_STE_1_MEV);
+ used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
+
+ /*
+ * See 13.5 Summary of attribute/permission configuration fields
+ * for the SHCFG behavior.
+ */
+ if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
+ STRTAB_STE_1_S1DSS_BYPASS)
+ used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
}
/* S2 translates */
if (cfg & BIT(1)) {
- used_bits->data[1] |=
- cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
- used_bits->data[2] |=
+ used_bits[1] |=
+ cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS |
+ STRTAB_STE_1_SHCFG | STRTAB_STE_1_MEV);
+ used_bits[2] |=
cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
- STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R);
- used_bits->data[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
+ STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S |
+ STRTAB_STE_2_S2R);
+ used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
}
if (cfg == STRTAB_STE_0_CFG_BYPASS)
- used_bits->data[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
+ used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
/*
* Figure out if we can do a hitless update of entry to become target. Returns a
@@ -1022,57 +1088,55 @@ static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent,
* unused_update is an intermediate value of entry that has unused bits set to
* their new values.
*/
-static u8 arm_smmu_entry_qword_diff(const struct arm_smmu_ste *entry,
- const struct arm_smmu_ste *target,
- struct arm_smmu_ste *unused_update)
+static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
+ const __le64 *entry, const __le64 *target,
+ __le64 *unused_update)
{
- struct arm_smmu_ste target_used = {};
- struct arm_smmu_ste cur_used = {};
+ __le64 target_used[NUM_ENTRY_QWORDS] = {};
+ __le64 cur_used[NUM_ENTRY_QWORDS] = {};
u8 used_qword_diff = 0;
unsigned int i;
- arm_smmu_get_ste_used(entry, &cur_used);
- arm_smmu_get_ste_used(target, &target_used);
+ writer->ops->get_used(entry, cur_used);
+ writer->ops->get_used(target, target_used);
- for (i = 0; i != ARRAY_SIZE(target_used.data); i++) {
+ for (i = 0; i != NUM_ENTRY_QWORDS; i++) {
/*
* Check that masks are up to date, the make functions are not
* allowed to set a bit to 1 if the used function doesn't say it
* is used.
*/
- WARN_ON_ONCE(target->data[i] & ~target_used.data[i]);
+ WARN_ON_ONCE(target[i] & ~target_used[i]);
/* Bits can change because they are not currently being used */
- unused_update->data[i] = (entry->data[i] & cur_used.data[i]) |
- (target->data[i] & ~cur_used.data[i]);
+ unused_update[i] = (entry[i] & cur_used[i]) |
+ (target[i] & ~cur_used[i]);
/*
* Each bit indicates that a used bit in a qword needs to be
* changed after unused_update is applied.
*/
- if ((unused_update->data[i] & target_used.data[i]) !=
- target->data[i])
+ if ((unused_update[i] & target_used[i]) != target[i])
used_qword_diff |= 1 << i;
}
return used_qword_diff;
}
-static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid,
- struct arm_smmu_ste *entry,
- const struct arm_smmu_ste *target, unsigned int start,
+static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
+ const __le64 *target, unsigned int start,
unsigned int len)
{
bool changed = false;
unsigned int i;
for (i = start; len != 0; len--, i++) {
- if (entry->data[i] != target->data[i]) {
- WRITE_ONCE(entry->data[i], target->data[i]);
+ if (entry[i] != target[i]) {
+ WRITE_ONCE(entry[i], target[i]);
changed = true;
}
}
if (changed)
- arm_smmu_sync_ste_for_sid(smmu, sid);
+ writer->ops->sync(writer);
return changed;
}
@@ -1102,24 +1166,22 @@ static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid,
* V=0 process. This relies on the IGNORED behavior described in the
* specification.
*/
-static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
- struct arm_smmu_ste *entry,
- const struct arm_smmu_ste *target)
+VISIBLE_IF_KUNIT
+void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
+ const __le64 *target)
{
- unsigned int num_entry_qwords = ARRAY_SIZE(target->data);
- struct arm_smmu_device *smmu = master->smmu;
- struct arm_smmu_ste unused_update;
+ __le64 unused_update[NUM_ENTRY_QWORDS];
u8 used_qword_diff;
used_qword_diff =
- arm_smmu_entry_qword_diff(entry, target, &unused_update);
+ arm_smmu_entry_qword_diff(writer, entry, target, unused_update);
if (hweight8(used_qword_diff) == 1) {
/*
* Only one qword needs its used bits to be changed. This is a
- * hitless update, update all bits the current STE is ignoring
- * to their new values, then update a single "critical qword" to
- * change the STE and finally 0 out any bits that are now unused
- * in the target configuration.
+ * hitless update, update all bits the current STE/CD is
+ * ignoring to their new values, then update a single "critical
+ * qword" to change the STE/CD and finally 0 out any bits that
+ * are now unused in the target configuration.
*/
unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
@@ -1128,22 +1190,21 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
* writing it in the next step anyways. This can save a sync
* when the only change is in that qword.
*/
- unused_update.data[critical_qword_index] =
- entry->data[critical_qword_index];
- entry_set(smmu, sid, entry, &unused_update, 0, num_entry_qwords);
- entry_set(smmu, sid, entry, target, critical_qword_index, 1);
- entry_set(smmu, sid, entry, target, 0, num_entry_qwords);
+ unused_update[critical_qword_index] =
+ entry[critical_qword_index];
+ entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS);
+ entry_set(writer, entry, target, critical_qword_index, 1);
+ entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
} else if (used_qword_diff) {
/*
* At least two qwords need their inuse bits to be changed. This
* requires a breaking update, zero the V bit, write all qwords
* but 0, then set qword 0
*/
- unused_update.data[0] = entry->data[0] &
- cpu_to_le64(~STRTAB_STE_0_V);
- entry_set(smmu, sid, entry, &unused_update, 0, 1);
- entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1);
- entry_set(smmu, sid, entry, target, 0, 1);
+ unused_update[0] = 0;
+ entry_set(writer, entry, unused_update, 0, 1);
+ entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
+ entry_set(writer, entry, target, 0, 1);
} else {
/*
* No inuse bit changed. Sanity check that all unused bits are 0
@@ -1151,20 +1212,10 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
* compute_qword_diff().
*/
WARN_ON_ONCE(
- entry_set(smmu, sid, entry, target, 0, num_entry_qwords));
- }
-
- /* It's likely that we'll want to use the new STE soon */
- if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
- struct arm_smmu_cmdq_ent
- prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
- .prefetch = {
- .sid = sid,
- } };
-
- arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
+ entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
}
}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
static void arm_smmu_sync_cd(struct arm_smmu_master *master,
int ssid, bool leaf)
@@ -1180,7 +1231,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master,
},
};
- cmds.num = 0;
+ arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
for (i = 0; i < master->num_streams; i++) {
cmd.cfgi.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
@@ -1189,138 +1240,191 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master,
arm_smmu_cmdq_batch_submit(smmu, &cmds);
}
-static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
- struct arm_smmu_l1_ctx_desc *l1_desc)
+static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst,
+ dma_addr_t l2ptr_dma)
{
- size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
+ u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V;
- l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
- &l1_desc->l2ptr_dma, GFP_KERNEL);
- if (!l1_desc->l2ptr) {
- dev_warn(smmu->dev,
- "failed to allocate context descriptor table\n");
- return -ENOMEM;
- }
- return 0;
+ /* The HW has 64 bit atomicity with stores to the L2 CD table */
+ WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
}
-static void arm_smmu_write_cd_l1_desc(__le64 *dst,
- struct arm_smmu_l1_ctx_desc *l1_desc)
+static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src)
{
- u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
- CTXDESC_L1_DESC_V;
-
- /* See comment in arm_smmu_write_ctx_desc() */
- WRITE_ONCE(*dst, cpu_to_le64(val));
+ return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK;
}
-static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
+struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
+ u32 ssid)
{
- __le64 *l1ptr;
- unsigned int idx;
- struct arm_smmu_l1_ctx_desc *l1_desc;
- struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_cdtab_l2 *l2;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
+ if (!arm_smmu_cdtab_allocated(cd_table))
+ return NULL;
+
if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
- return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
+ return &cd_table->linear.table[ssid];
+
+ l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)];
+ if (!l2)
+ return NULL;
+ return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)];
+}
- idx = ssid >> CTXDESC_SPLIT;
- l1_desc = &cd_table->l1_desc[idx];
- if (!l1_desc->l2ptr) {
- if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
+static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
+ u32 ssid)
+{
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
+ struct arm_smmu_device *smmu = master->smmu;
+
+ might_sleep();
+ iommu_group_mutex_assert(master->dev);
+
+ if (!arm_smmu_cdtab_allocated(cd_table)) {
+ if (arm_smmu_alloc_cd_tables(master))
return NULL;
+ }
+
+ if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
+ unsigned int idx = arm_smmu_cdtab_l1_idx(ssid);
+ struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx];
- l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
- arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
- /* An invalid L1CD can be cached */
- arm_smmu_sync_cd(master, ssid, false);
+ if (!*l2ptr) {
+ dma_addr_t l2ptr_dma;
+
+ *l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr),
+ &l2ptr_dma, GFP_KERNEL);
+ if (!*l2ptr)
+ return NULL;
+
+ arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx],
+ l2ptr_dma);
+ /* An invalid L1CD can be cached */
+ arm_smmu_sync_cd(master, ssid, false);
+ }
}
- idx = ssid & (CTXDESC_L2_ENTRIES - 1);
- return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
+ return arm_smmu_get_cd_ptr(master, ssid);
}
-int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
- struct arm_smmu_ctx_desc *cd)
+struct arm_smmu_cd_writer {
+ struct arm_smmu_entry_writer writer;
+ unsigned int ssid;
+};
+
+VISIBLE_IF_KUNIT
+void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
{
+ used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
+ if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V)))
+ return;
+ memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));
+
/*
- * This function handles the following cases:
- *
- * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
- * (2) Install a secondary CD, for SID+SSID traffic.
- * (3) Update ASID of a CD. Atomically write the first 64 bits of the
- * CD, then invalidate the old entry and mappings.
- * (4) Quiesce the context without clearing the valid bit. Disable
- * translation, and ignore any translation fault.
- * (5) Remove a secondary CD.
+ * If EPD0 is set by the make function it means
+ * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
*/
- u64 val;
- bool cd_live;
- __le64 *cdptr;
- struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- struct arm_smmu_device *smmu = master->smmu;
+ if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
+ used_bits[0] &= ~cpu_to_le64(
+ CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
+ CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
+ CTXDESC_CD_0_TCR_SH0);
+ used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
+ }
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used);
- if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
- return -E2BIG;
+static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
+{
+ struct arm_smmu_cd_writer *cd_writer =
+ container_of(writer, struct arm_smmu_cd_writer, writer);
- cdptr = arm_smmu_get_cd_ptr(master, ssid);
- if (!cdptr)
- return -ENOMEM;
+ arm_smmu_sync_cd(writer->master, cd_writer->ssid, true);
+}
- val = le64_to_cpu(cdptr[0]);
- cd_live = !!(val & CTXDESC_CD_0_V);
-
- if (!cd) { /* (5) */
- val = 0;
- } else if (cd == &quiet_cd) { /* (4) */
- if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
- val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R);
- val |= CTXDESC_CD_0_TCR_EPD0;
- } else if (cd_live) { /* (3) */
- val &= ~CTXDESC_CD_0_ASID;
- val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
- /*
- * Until CD+TLB invalidation, both ASIDs may be used for tagging
- * this substream's traffic
- */
- } else { /* (1) and (2) */
- cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
- cdptr[2] = 0;
- cdptr[3] = cpu_to_le64(cd->mair);
+static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
+ .sync = arm_smmu_cd_writer_sync_entry,
+ .get_used = arm_smmu_get_cd_used,
+};
- /*
- * STE may be live, and the SMMU might read dwords of this CD in any
- * order. Ensure that it observes valid values before reading
- * V=1.
- */
- arm_smmu_sync_cd(master, ssid, true);
+void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
+ struct arm_smmu_cd *cdptr,
+ const struct arm_smmu_cd *target)
+{
+ bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
+ bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
+ struct arm_smmu_cd_writer cd_writer = {
+ .writer = {
+ .ops = &arm_smmu_cd_writer_ops,
+ .master = master,
+ },
+ .ssid = ssid,
+ };
- val = cd->tcr |
-#ifdef __BIG_ENDIAN
- CTXDESC_CD_0_ENDI |
-#endif
- CTXDESC_CD_0_R | CTXDESC_CD_0_A |
- (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
- CTXDESC_CD_0_AA64 |
- FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
- CTXDESC_CD_0_V;
-
- if (cd_table->stall_enabled)
- val |= CTXDESC_CD_0_S;
+ if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) {
+ if (cur_valid)
+ master->cd_table.used_ssids--;
+ else
+ master->cd_table.used_ssids++;
}
- /*
- * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
- * "Configuration structures and configuration invalidation completion"
- *
- * The size of single-copy atomic reads made by the SMMU is
- * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
- * field within an aligned 64-bit span of a structure can be altered
- * without first making the structure invalid.
- */
- WRITE_ONCE(cdptr[0], cpu_to_le64(val));
- arm_smmu_sync_cd(master, ssid, true);
- return 0;
+ arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
+}
+
+void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
+ struct arm_smmu_master *master,
+ struct arm_smmu_domain *smmu_domain)
+{
+ struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
+ const struct io_pgtable_cfg *pgtbl_cfg =
+ &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
+ typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr =
+ &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
+
+ memset(target, 0, sizeof(*target));
+
+ target->data[0] = cpu_to_le64(
+ FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
+#ifdef __BIG_ENDIAN
+ CTXDESC_CD_0_ENDI |
+#endif
+ CTXDESC_CD_0_TCR_EPD1 |
+ CTXDESC_CD_0_V |
+ FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
+ CTXDESC_CD_0_AA64 |
+ (master->stall_enabled ? CTXDESC_CD_0_S : 0) |
+ CTXDESC_CD_0_R |
+ CTXDESC_CD_0_A |
+ CTXDESC_CD_0_ASET |
+ FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
+ );
+
+ /* To enable dirty flag update, set both Access flag and dirty state update */
+ if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
+ target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA |
+ CTXDESC_CD_0_TCR_HD);
+
+ target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr &
+ CTXDESC_CD_1_TTB0_MASK);
+ target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair);
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd);
+
+void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
+{
+ struct arm_smmu_cd target = {};
+ struct arm_smmu_cd *cdptr;
+
+ if (!arm_smmu_cdtab_allocated(&master->cd_table))
+ return;
+ cdptr = arm_smmu_get_cd_ptr(master, ssid);
+ if (WARN_ON(!cdptr))
+ return;
+ arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
}
static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
@@ -1331,131 +1435,155 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- cd_table->stall_enabled = master->stall_enabled;
cd_table->s1cdmax = master->ssid_bits;
max_contexts = 1 << cd_table->s1cdmax;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
max_contexts <= CTXDESC_L2_ENTRIES) {
cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
- cd_table->num_l1_ents = max_contexts;
+ cd_table->linear.num_ents = max_contexts;
- l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
+ l1size = max_contexts * sizeof(struct arm_smmu_cd);
+ cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size,
+ &cd_table->cdtab_dma,
+ GFP_KERNEL);
+ if (!cd_table->linear.table)
+ return -ENOMEM;
} else {
cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
- cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
- CTXDESC_L2_ENTRIES);
+ cd_table->l2.num_l1_ents =
+ DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES);
- cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
- sizeof(*cd_table->l1_desc),
- GFP_KERNEL);
- if (!cd_table->l1_desc)
+ cd_table->l2.l2ptrs = kcalloc(cd_table->l2.num_l1_ents,
+ sizeof(*cd_table->l2.l2ptrs),
+ GFP_KERNEL);
+ if (!cd_table->l2.l2ptrs)
return -ENOMEM;
- l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
- }
-
- cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
- GFP_KERNEL);
- if (!cd_table->cdtab) {
- dev_warn(smmu->dev, "failed to allocate context descriptor\n");
- ret = -ENOMEM;
- goto err_free_l1;
+ l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1);
+ cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size,
+ &cd_table->cdtab_dma,
+ GFP_KERNEL);
+ if (!cd_table->l2.l2ptrs) {
+ ret = -ENOMEM;
+ goto err_free_l2ptrs;
+ }
}
-
return 0;
-err_free_l1:
- if (cd_table->l1_desc) {
- devm_kfree(smmu->dev, cd_table->l1_desc);
- cd_table->l1_desc = NULL;
- }
+err_free_l2ptrs:
+ kfree(cd_table->l2.l2ptrs);
+ cd_table->l2.l2ptrs = NULL;
return ret;
}
static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
{
int i;
- size_t size, l1size;
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (cd_table->l1_desc) {
- size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
-
- for (i = 0; i < cd_table->num_l1_ents; i++) {
- if (!cd_table->l1_desc[i].l2ptr)
+ if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) {
+ for (i = 0; i < cd_table->l2.num_l1_ents; i++) {
+ if (!cd_table->l2.l2ptrs[i])
continue;
- dmam_free_coherent(smmu->dev, size,
- cd_table->l1_desc[i].l2ptr,
- cd_table->l1_desc[i].l2ptr_dma);
+ dma_free_coherent(smmu->dev,
+ sizeof(*cd_table->l2.l2ptrs[i]),
+ cd_table->l2.l2ptrs[i],
+ arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i]));
}
- devm_kfree(smmu->dev, cd_table->l1_desc);
- cd_table->l1_desc = NULL;
+ kfree(cd_table->l2.l2ptrs);
- l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+ dma_free_coherent(smmu->dev,
+ cd_table->l2.num_l1_ents *
+ sizeof(struct arm_smmu_cdtab_l1),
+ cd_table->l2.l1tab, cd_table->cdtab_dma);
} else {
- l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
+ dma_free_coherent(smmu->dev,
+ cd_table->linear.num_ents *
+ sizeof(struct arm_smmu_cd),
+ cd_table->linear.table, cd_table->cdtab_dma);
}
-
- dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
- cd_table->cdtab_dma = 0;
- cd_table->cdtab = NULL;
-}
-
-bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
-{
- bool free;
- struct arm_smmu_ctx_desc *old_cd;
-
- if (!cd->asid)
- return false;
-
- free = refcount_dec_and_test(&cd->refs);
- if (free) {
- old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
- WARN_ON(old_cd != cd);
- }
- return free;
}
/* Stream table manipulation functions */
-static void
-arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
+static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 *dst,
+ dma_addr_t l2ptr_dma)
{
u64 val = 0;
- val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
- val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
+ val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1);
+ val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
- /* See comment in arm_smmu_write_ctx_desc() */
- WRITE_ONCE(*dst, cpu_to_le64(val));
+ /* The HW has 64 bit atomicity with stores to the L2 STE table */
+ WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
}
-static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
+struct arm_smmu_ste_writer {
+ struct arm_smmu_entry_writer writer;
+ u32 sid;
+};
+
+static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
{
+ struct arm_smmu_ste_writer *ste_writer =
+ container_of(writer, struct arm_smmu_ste_writer, writer);
struct arm_smmu_cmdq_ent cmd = {
.opcode = CMDQ_OP_CFGI_STE,
.cfgi = {
- .sid = sid,
+ .sid = ste_writer->sid,
.leaf = true,
},
};
- arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
+}
+
+static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
+ .sync = arm_smmu_ste_writer_sync_entry,
+ .get_used = arm_smmu_get_ste_used,
+};
+
+static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
+ struct arm_smmu_ste *ste,
+ const struct arm_smmu_ste *target)
+{
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ste_writer ste_writer = {
+ .writer = {
+ .ops = &arm_smmu_ste_writer_ops,
+ .master = master,
+ },
+ .sid = sid,
+ };
+
+ arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data);
+
+ /* It's likely that we'll want to use the new STE soon */
+ if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
+ struct arm_smmu_cmdq_ent
+ prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
+ .prefetch = {
+ .sid = sid,
+ } };
+
+ arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
+ }
}
-static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
+void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
{
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
STRTAB_STE_0_V |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste);
-static void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
- struct arm_smmu_ste *target)
+VISIBLE_IF_KUNIT
+void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
+ struct arm_smmu_ste *target)
{
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
@@ -1466,9 +1594,12 @@ static void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
STRTAB_STE_1_SHCFG_INCOMING));
}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste);
-static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
- struct arm_smmu_master *master)
+VISIBLE_IF_KUNIT
+void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
+ struct arm_smmu_master *master, bool ats_enabled,
+ unsigned int s1dss)
{
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
struct arm_smmu_device *smmu = master->smmu;
@@ -1482,7 +1613,7 @@ static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
target->data[1] = cpu_to_le64(
- FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
+ FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) |
FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
@@ -1491,7 +1622,12 @@ static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
STRTAB_STE_1_S1STALLD :
0) |
FIELD_PREP(STRTAB_STE_1_EATS,
- master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+ ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+
+ if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) &&
+ s1dss == STRTAB_STE_1_S1DSS_BYPASS)
+ target->data[1] |= cpu_to_le64(FIELD_PREP(
+ STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
if (smmu->features & ARM_SMMU_FEAT_E2H) {
/*
@@ -1516,10 +1652,12 @@ static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
}
}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
-static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
- struct arm_smmu_master *master,
- struct arm_smmu_domain *smmu_domain)
+void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
+ struct arm_smmu_master *master,
+ struct arm_smmu_domain *smmu_domain,
+ bool ats_enabled)
{
struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
const struct io_pgtable_cfg *pgtbl_cfg =
@@ -1536,8 +1674,10 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
target->data[1] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_EATS,
- master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+ ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+ if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB)
+ target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB);
if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
STRTAB_STE_1_SHCFG_INCOMING));
@@ -1557,93 +1697,136 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
STRTAB_STE_2_S2ENDI |
#endif
STRTAB_STE_2_S2PTW |
+ (master->stall_enabled ? STRTAB_STE_2_S2S : 0) |
STRTAB_STE_2_S2R);
target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
STRTAB_STE_3_S2TTB_MASK);
}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste);
/*
* This can safely directly manipulate the STE memory without a sync sequence
* because the STE table has not been installed in the SMMU yet.
*/
-static void arm_smmu_init_initial_stes(struct arm_smmu_device *smmu,
- struct arm_smmu_ste *strtab,
+static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
unsigned int nent)
{
unsigned int i;
for (i = 0; i < nent; ++i) {
- if (disable_bypass)
- arm_smmu_make_abort_ste(strtab);
- else
- arm_smmu_make_bypass_ste(smmu, strtab);
+ arm_smmu_make_abort_ste(strtab);
strtab++;
}
}
static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
{
- size_t size;
- void *strtab;
+ dma_addr_t l2ptr_dma;
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
- struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
+ struct arm_smmu_strtab_l2 **l2table;
- if (desc->l2ptr)
+ l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)];
+ if (*l2table)
return 0;
- size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
- strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
-
- desc->span = STRTAB_SPLIT + 1;
- desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
- GFP_KERNEL);
- if (!desc->l2ptr) {
+ *l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table),
+ &l2ptr_dma, GFP_KERNEL);
+ if (!*l2table) {
dev_err(smmu->dev,
"failed to allocate l2 stream table for SID %u\n",
sid);
return -ENOMEM;
}
- arm_smmu_init_initial_stes(smmu, desc->l2ptr, 1 << STRTAB_SPLIT);
- arm_smmu_write_strtab_l1_desc(strtab, desc);
+ arm_smmu_init_initial_stes((*l2table)->stes,
+ ARRAY_SIZE((*l2table)->stes));
+ arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)],
+ l2ptr_dma);
return 0;
}
+static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs)
+{
+ struct arm_smmu_stream *stream_rhs =
+ rb_entry(rhs, struct arm_smmu_stream, node);
+ const u32 *sid_lhs = lhs;
+
+ if (*sid_lhs < stream_rhs->id)
+ return -1;
+ if (*sid_lhs > stream_rhs->id)
+ return 1;
+ return 0;
+}
+
+static int arm_smmu_streams_cmp_node(struct rb_node *lhs,
+ const struct rb_node *rhs)
+{
+ return arm_smmu_streams_cmp_key(
+ &rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs);
+}
+
static struct arm_smmu_master *
arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
{
struct rb_node *node;
- struct arm_smmu_stream *stream;
lockdep_assert_held(&smmu->streams_mutex);
- node = smmu->streams.rb_node;
- while (node) {
- stream = rb_entry(node, struct arm_smmu_stream, node);
- if (stream->id < sid)
- node = node->rb_right;
- else if (stream->id > sid)
- node = node->rb_left;
- else
- return stream->master;
- }
-
- return NULL;
+ node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key);
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct arm_smmu_stream, node)->master;
}
/* IRQ and event handlers */
-static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
+static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw,
+ struct arm_smmu_event *event)
+{
+ struct arm_smmu_master *master;
+
+ event->id = FIELD_GET(EVTQ_0_ID, raw[0]);
+ event->sid = FIELD_GET(EVTQ_0_SID, raw[0]);
+ event->ssv = FIELD_GET(EVTQ_0_SSV, raw[0]);
+ event->ssid = event->ssv ? FIELD_GET(EVTQ_0_SSID, raw[0]) : IOMMU_NO_PASID;
+ event->privileged = FIELD_GET(EVTQ_1_PnU, raw[1]);
+ event->instruction = FIELD_GET(EVTQ_1_InD, raw[1]);
+ event->s2 = FIELD_GET(EVTQ_1_S2, raw[1]);
+ event->read = FIELD_GET(EVTQ_1_RnW, raw[1]);
+ event->stag = FIELD_GET(EVTQ_1_STAG, raw[1]);
+ event->stall = FIELD_GET(EVTQ_1_STALL, raw[1]);
+ event->class = FIELD_GET(EVTQ_1_CLASS, raw[1]);
+ event->iova = FIELD_GET(EVTQ_2_ADDR, raw[2]);
+ event->ipa = raw[3] & EVTQ_3_IPA;
+ event->fetch_addr = raw[3] & EVTQ_3_FETCH_ADDR;
+ event->ttrnw = FIELD_GET(EVTQ_1_TT_READ, raw[1]);
+ event->class_tt = false;
+ event->dev = NULL;
+
+ if (event->id == EVT_ID_PERMISSION_FAULT)
+ event->class_tt = (event->class == EVTQ_1_CLASS_TT);
+
+ mutex_lock(&smmu->streams_mutex);
+ master = arm_smmu_find_master(smmu, event->sid);
+ if (master)
+ event->dev = get_device(master->dev);
+ mutex_unlock(&smmu->streams_mutex);
+}
+
+static int arm_smmu_handle_event(struct arm_smmu_device *smmu, u64 *evt,
+ struct arm_smmu_event *event)
{
int ret = 0;
u32 perm = 0;
struct arm_smmu_master *master;
- bool ssid_valid = evt[0] & EVTQ_0_SSV;
- u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
struct iopf_fault fault_evt = { };
struct iommu_fault *flt = &fault_evt.fault;
- switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
+ switch (event->id) {
+ case EVT_ID_BAD_STE_CONFIG:
+ case EVT_ID_STREAM_DISABLED_FAULT:
+ case EVT_ID_BAD_SUBSTREAMID_CONFIG:
+ case EVT_ID_BAD_CD_CONFIG:
case EVT_ID_TRANSLATION_FAULT:
case EVT_ID_ADDR_SIZE_FAULT:
case EVT_ID_ACCESS_FAULT:
@@ -1653,73 +1836,126 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
return -EOPNOTSUPP;
}
- /* Stage-2 is always pinned at the moment */
- if (evt[1] & EVTQ_1_S2)
- return -EFAULT;
-
- if (!(evt[1] & EVTQ_1_STALL))
- return -EOPNOTSUPP;
-
- if (evt[1] & EVTQ_1_RnW)
- perm |= IOMMU_FAULT_PERM_READ;
- else
- perm |= IOMMU_FAULT_PERM_WRITE;
+ if (event->stall) {
+ if (event->read)
+ perm |= IOMMU_FAULT_PERM_READ;
+ else
+ perm |= IOMMU_FAULT_PERM_WRITE;
- if (evt[1] & EVTQ_1_InD)
- perm |= IOMMU_FAULT_PERM_EXEC;
+ if (event->instruction)
+ perm |= IOMMU_FAULT_PERM_EXEC;
- if (evt[1] & EVTQ_1_PnU)
- perm |= IOMMU_FAULT_PERM_PRIV;
+ if (event->privileged)
+ perm |= IOMMU_FAULT_PERM_PRIV;
- flt->type = IOMMU_FAULT_PAGE_REQ;
- flt->prm = (struct iommu_fault_page_request) {
- .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
- .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
- .perm = perm,
- .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
- };
+ flt->type = IOMMU_FAULT_PAGE_REQ;
+ flt->prm = (struct iommu_fault_page_request){
+ .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
+ .grpid = event->stag,
+ .perm = perm,
+ .addr = event->iova,
+ };
- if (ssid_valid) {
- flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
- flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
+ if (event->ssv) {
+ flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ flt->prm.pasid = event->ssid;
+ }
}
mutex_lock(&smmu->streams_mutex);
- master = arm_smmu_find_master(smmu, sid);
+ master = arm_smmu_find_master(smmu, event->sid);
if (!master) {
ret = -EINVAL;
goto out_unlock;
}
- iommu_report_device_fault(master->dev, &fault_evt);
+ if (event->stall)
+ ret = iommu_report_device_fault(master->dev, &fault_evt);
+ else if (master->vmaster && !event->s2)
+ ret = arm_vmaster_report_event(master->vmaster, evt);
+ else
+ ret = -EOPNOTSUPP; /* Unhandled events should be pinned */
out_unlock:
mutex_unlock(&smmu->streams_mutex);
return ret;
}
+static void arm_smmu_dump_raw_event(struct arm_smmu_device *smmu, u64 *raw,
+ struct arm_smmu_event *event)
+{
+ int i;
+
+ dev_err(smmu->dev, "event 0x%02x received:\n", event->id);
+
+ for (i = 0; i < EVTQ_ENT_DWORDS; ++i)
+ dev_err(smmu->dev, "\t0x%016llx\n", raw[i]);
+}
+
+#define ARM_SMMU_EVT_KNOWN(e) ((e)->id < ARRAY_SIZE(event_str) && event_str[(e)->id])
+#define ARM_SMMU_LOG_EVT_STR(e) ARM_SMMU_EVT_KNOWN(e) ? event_str[(e)->id] : "UNKNOWN"
+#define ARM_SMMU_LOG_CLIENT(e) (e)->dev ? dev_name((e)->dev) : "(unassigned sid)"
+
+static void arm_smmu_dump_event(struct arm_smmu_device *smmu, u64 *raw,
+ struct arm_smmu_event *evt,
+ struct ratelimit_state *rs)
+{
+ if (!__ratelimit(rs))
+ return;
+
+ arm_smmu_dump_raw_event(smmu, raw, evt);
+
+ switch (evt->id) {
+ case EVT_ID_TRANSLATION_FAULT:
+ case EVT_ID_ADDR_SIZE_FAULT:
+ case EVT_ID_ACCESS_FAULT:
+ case EVT_ID_PERMISSION_FAULT:
+ dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x iova: %#llx ipa: %#llx",
+ ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
+ evt->sid, evt->ssid, evt->iova, evt->ipa);
+
+ dev_err(smmu->dev, "%s %s %s %s \"%s\"%s%s stag: %#x",
+ evt->privileged ? "priv" : "unpriv",
+ evt->instruction ? "inst" : "data",
+ str_read_write(evt->read),
+ evt->s2 ? "s2" : "s1", event_class_str[evt->class],
+ evt->class_tt ? (evt->ttrnw ? " ttd_read" : " ttd_write") : "",
+ evt->stall ? " stall" : "", evt->stag);
+
+ break;
+
+ case EVT_ID_STE_FETCH_FAULT:
+ case EVT_ID_CD_FETCH_FAULT:
+ case EVT_ID_VMS_FETCH_FAULT:
+ dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x fetch_addr: %#llx",
+ ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
+ evt->sid, evt->ssid, evt->fetch_addr);
+
+ break;
+
+ default:
+ dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x",
+ ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
+ evt->sid, evt->ssid);
+ }
+}
+
static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
{
- int i, ret;
+ u64 evt[EVTQ_ENT_DWORDS];
+ struct arm_smmu_event event = {0};
struct arm_smmu_device *smmu = dev;
struct arm_smmu_queue *q = &smmu->evtq.q;
struct arm_smmu_ll_queue *llq = &q->llq;
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
- u64 evt[EVTQ_ENT_DWORDS];
do {
while (!queue_remove_raw(q, evt)) {
- u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
-
- ret = arm_smmu_handle_evt(smmu, evt);
- if (!ret || !__ratelimit(&rs))
- continue;
-
- dev_info(smmu->dev, "event 0x%02x received:\n", id);
- for (i = 0; i < ARRAY_SIZE(evt); ++i)
- dev_info(smmu->dev, "\t0x%016llx\n",
- (unsigned long long)evt[i]);
+ arm_smmu_decode_event(smmu, evt, &event);
+ if (arm_smmu_handle_event(smmu, evt, &event))
+ arm_smmu_dump_event(smmu, evt, &event, &rs);
+ put_device(event.dev);
cond_resched();
}
@@ -1926,15 +2162,16 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
cmd->atc.size = log2_span;
}
-static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
+static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
+ ioasid_t ssid)
{
int i;
struct arm_smmu_cmdq_ent cmd;
struct arm_smmu_cmdq_batch cmds;
- arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
+ arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
- cmds.num = 0;
+ arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
for (i = 0; i < master->num_streams; i++) {
cmd.atc.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
@@ -1943,13 +2180,15 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
}
-int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
+int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
unsigned long iova, size_t size)
{
+ struct arm_smmu_master_domain *master_domain;
int i;
unsigned long flags;
- struct arm_smmu_cmdq_ent cmd;
- struct arm_smmu_master *master;
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = CMDQ_OP_ATC_INV,
+ };
struct arm_smmu_cmdq_batch cmds;
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
@@ -1972,15 +2211,27 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
if (!atomic_read(&smmu_domain->nr_ats_masters))
return 0;
- arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
-
- cmds.num = 0;
+ arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+ list_for_each_entry(master_domain, &smmu_domain->devices,
+ devices_elm) {
+ struct arm_smmu_master *master = master_domain->master;
+
if (!master->ats_enabled)
continue;
+ if (master_domain->nested_ats_flush) {
+ /*
+ * If a S2 used as a nesting parent is changed we have
+ * no option but to completely flush the ATC.
+ */
+ arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
+ } else {
+ arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size,
+ &cmd);
+ }
+
for (i = 0; i < master->num_streams; i++) {
cmd.atc.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
@@ -2012,7 +2263,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
- arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
+ arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
}
static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
@@ -2051,7 +2302,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
num_pages++;
}
- cmds.num = 0;
+ arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
while (iova < end) {
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
@@ -2106,11 +2357,20 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
}
__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
+ if (smmu_domain->nest_parent) {
+ /*
+ * When the S2 domain changes all the nested S1 ASIDs have to be
+ * flushed too.
+ */
+ cmd.opcode = CMDQ_OP_TLBI_NH_ALL;
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);
+ }
+
/*
* Unfortunately, this can't be leaf-only since we may have
* zapped an entire table.
*/
- arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
+ arm_smmu_atc_inv_domain(smmu_domain, iova, size);
}
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
@@ -2151,6 +2411,13 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = {
.tlb_add_page = arm_smmu_tlb_inv_page_nosync,
};
+static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
+{
+ u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY);
+
+ return (smmu->features & features) == features;
+}
+
/* IOMMU API */
static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
{
@@ -2160,54 +2427,53 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
case IOMMU_CAP_CACHE_COHERENCY:
/* Assume that a coherent TCU implies coherent TBUs */
return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
+ case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
+ return arm_smmu_master_canwbs(master);
case IOMMU_CAP_NOEXEC:
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
+ case IOMMU_CAP_DIRTY_TRACKING:
+ return arm_smmu_dbm_capable(master->smmu);
default:
return false;
}
}
-static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
+static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_master_domain *master_domain;
+ unsigned long flags;
+ bool ret = true;
- if (type == IOMMU_DOMAIN_SVA)
- return arm_smmu_sva_domain_alloc();
- return ERR_PTR(-EOPNOTSUPP);
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master_domain, &smmu_domain->devices,
+ devices_elm) {
+ if (!arm_smmu_master_canwbs(master_domain->master)) {
+ ret = false;
+ break;
+ }
+ }
+ smmu_domain->enforce_cache_coherency = ret;
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+ return ret;
}
-static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
+struct arm_smmu_domain *arm_smmu_domain_alloc(void)
{
struct arm_smmu_domain *smmu_domain;
- /*
- * Allocate the domain and initialise some of its data structures.
- * We can't really do anything meaningful until we've added a
- * master.
- */
smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
if (!smmu_domain)
return ERR_PTR(-ENOMEM);
- mutex_init(&smmu_domain->init_mutex);
INIT_LIST_HEAD(&smmu_domain->devices);
spin_lock_init(&smmu_domain->devices_lock);
- INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
- if (dev) {
- struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- int ret;
-
- ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
- if (ret) {
- kfree(smmu_domain);
- return ERR_PTR(ret);
- }
- }
- return &smmu_domain->domain;
+ return smmu_domain;
}
-static void arm_smmu_domain_free(struct iommu_domain *domain)
+static void arm_smmu_domain_free_paging(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
@@ -2218,7 +2484,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
/* Prevent SVA from touching the CD while we're freeing it */
mutex_lock(&arm_smmu_asid_lock);
- arm_smmu_free_asid(&smmu_domain->cd);
+ xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid);
mutex_unlock(&arm_smmu_asid_lock);
} else {
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
@@ -2230,45 +2496,23 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
}
static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
- struct arm_smmu_domain *smmu_domain,
- struct io_pgtable_cfg *pgtbl_cfg)
+ struct arm_smmu_domain *smmu_domain)
{
int ret;
- u32 asid;
+ u32 asid = 0;
struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
- typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
-
- refcount_set(&cd->refs, 1);
/* Prevent SVA from modifying the ASID until it is written to the CD */
mutex_lock(&arm_smmu_asid_lock);
- ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
+ ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
- if (ret)
- goto out_unlock;
-
cd->asid = (u16)asid;
- cd->ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
- cd->tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
- FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
- FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
- FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
- FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
- FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
- CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
- cd->mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
-
- mutex_unlock(&arm_smmu_asid_lock);
- return 0;
-
-out_unlock:
mutex_unlock(&arm_smmu_asid_lock);
return ret;
}
static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
- struct arm_smmu_domain *smmu_domain,
- struct io_pgtable_cfg *pgtbl_cfg)
+ struct arm_smmu_domain *smmu_domain)
{
int vmid;
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
@@ -2284,50 +2528,51 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
}
static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
- struct arm_smmu_device *smmu)
+ struct arm_smmu_device *smmu, u32 flags)
{
int ret;
- unsigned long ias, oas;
enum io_pgtable_fmt fmt;
struct io_pgtable_cfg pgtbl_cfg;
struct io_pgtable_ops *pgtbl_ops;
int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
- struct arm_smmu_domain *smmu_domain,
- struct io_pgtable_cfg *pgtbl_cfg);
+ struct arm_smmu_domain *smmu_domain);
+ bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
- /* Restrict the stage to what we can actually support */
- if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
- smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
- if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
- smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ pgtbl_cfg = (struct io_pgtable_cfg) {
+ .pgsize_bitmap = smmu->pgsize_bitmap,
+ .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
+ .tlb = &arm_smmu_flush_ops,
+ .iommu_dev = smmu->dev,
+ };
switch (smmu_domain->stage) {
- case ARM_SMMU_DOMAIN_S1:
- ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
- ias = min_t(unsigned long, ias, VA_BITS);
- oas = smmu->ias;
+ case ARM_SMMU_DOMAIN_S1: {
+ unsigned long ias = (smmu->features &
+ ARM_SMMU_FEAT_VAX) ? 52 : 48;
+
+ pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
+ pgtbl_cfg.oas = smmu->ias;
+ if (enable_dirty)
+ pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
fmt = ARM_64_LPAE_S1;
finalise_stage_fn = arm_smmu_domain_finalise_s1;
break;
+ }
case ARM_SMMU_DOMAIN_S2:
- ias = smmu->ias;
- oas = smmu->oas;
+ if (enable_dirty)
+ return -EOPNOTSUPP;
+ pgtbl_cfg.ias = smmu->ias;
+ pgtbl_cfg.oas = smmu->oas;
fmt = ARM_64_LPAE_S2;
finalise_stage_fn = arm_smmu_domain_finalise_s2;
+ if ((smmu->features & ARM_SMMU_FEAT_S2FWB) &&
+ (flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
+ pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB;
break;
default:
return -EINVAL;
}
- pgtbl_cfg = (struct io_pgtable_cfg) {
- .pgsize_bitmap = smmu->pgsize_bitmap,
- .ias = ias,
- .oas = oas,
- .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
- .tlb = &arm_smmu_flush_ops,
- .iommu_dev = smmu->dev,
- };
-
pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
if (!pgtbl_ops)
return -ENOMEM;
@@ -2335,8 +2580,10 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
smmu_domain->domain.geometry.force_aperture = true;
+ if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+ smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
- ret = finalise_stage_fn(smmu, smmu_domain, &pgtbl_cfg);
+ ret = finalise_stage_fn(smmu, smmu_domain);
if (ret < 0) {
free_io_pgtable_ops(pgtbl_ops);
return ret;
@@ -2353,25 +2600,28 @@ arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
- unsigned int idx1, idx2;
-
/* Two-level walk */
- idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
- idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
- return &cfg->l1_desc[idx1].l2ptr[idx2];
+ return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]
+ ->stes[arm_smmu_strtab_l2_idx(sid)];
} else {
/* Simple linear lookup */
- return (struct arm_smmu_ste *)&cfg
- ->strtab[sid * STRTAB_STE_DWORDS];
+ return &cfg->linear.table[sid];
}
}
-static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
- const struct arm_smmu_ste *target)
+void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
+ const struct arm_smmu_ste *target)
{
int i, j;
struct arm_smmu_device *smmu = master->smmu;
+ master->cd_table.in_ste =
+ FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) ==
+ STRTAB_STE_0_CFG_S1_TRANS;
+ master->ste_ats_enabled =
+ FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) ==
+ STRTAB_STE_1_EATS_TRANS;
+
for (i = 0; i < master->num_streams; ++i) {
u32 sid = master->streams[i].id;
struct arm_smmu_ste *step =
@@ -2403,41 +2653,22 @@ static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
}
-static void arm_smmu_enable_ats(struct arm_smmu_master *master,
- struct arm_smmu_domain *smmu_domain)
+static void arm_smmu_enable_ats(struct arm_smmu_master *master)
{
size_t stu;
struct pci_dev *pdev;
struct arm_smmu_device *smmu = master->smmu;
- /* Don't enable ATS at the endpoint if it's not enabled in the STE */
- if (!master->ats_enabled)
- return;
-
/* Smallest Translation Unit: log2 of the smallest supported granule */
stu = __ffs(smmu->pgsize_bitmap);
pdev = to_pci_dev(master->dev);
- atomic_inc(&smmu_domain->nr_ats_masters);
- arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
- if (pci_enable_ats(pdev, stu))
- dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
-}
-
-static void arm_smmu_disable_ats(struct arm_smmu_master *master,
- struct arm_smmu_domain *smmu_domain)
-{
- if (!master->ats_enabled)
- return;
-
- pci_disable_ats(to_pci_dev(master->dev));
/*
- * Ensure ATS is disabled at the endpoint before we issue the
- * ATC invalidation via the SMMU.
+ * ATC invalidation of PASID 0 causes the entire ATC to be flushed.
*/
- wmb();
- arm_smmu_atc_inv_master(master);
- atomic_dec(&smmu_domain->nr_ats_masters);
+ arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
+ if (pci_enable_ats(pdev, stu))
+ dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
}
static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
@@ -2487,61 +2718,318 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
pci_disable_pasid(pdev);
}
-static void arm_smmu_detach_dev(struct arm_smmu_master *master)
+static struct arm_smmu_master_domain *
+arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
+ struct iommu_domain *domain,
+ struct arm_smmu_master *master,
+ ioasid_t ssid, bool nested_ats_flush)
{
- struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev);
- struct arm_smmu_domain *smmu_domain;
+ struct arm_smmu_master_domain *master_domain;
+
+ lockdep_assert_held(&smmu_domain->devices_lock);
+
+ list_for_each_entry(master_domain, &smmu_domain->devices,
+ devices_elm) {
+ if (master_domain->master == master &&
+ master_domain->domain == domain &&
+ master_domain->ssid == ssid &&
+ master_domain->nested_ats_flush == nested_ats_flush)
+ return master_domain;
+ }
+ return NULL;
+}
+
+/*
+ * If the domain uses the smmu_domain->devices list return the arm_smmu_domain
+ * structure, otherwise NULL. These domains track attached devices so they can
+ * issue invalidations.
+ */
+static struct arm_smmu_domain *
+to_smmu_domain_devices(struct iommu_domain *domain)
+{
+ /* The domain can be NULL only when processing the first attach */
+ if (!domain)
+ return NULL;
+ if ((domain->type & __IOMMU_DOMAIN_PAGING) ||
+ domain->type == IOMMU_DOMAIN_SVA)
+ return to_smmu_domain(domain);
+ if (domain->type == IOMMU_DOMAIN_NESTED)
+ return to_smmu_nested_domain(domain)->vsmmu->s2_parent;
+ return NULL;
+}
+
+static int arm_smmu_enable_iopf(struct arm_smmu_master *master,
+ struct arm_smmu_master_domain *master_domain)
+{
+ int ret;
+
+ iommu_group_mutex_assert(master->dev);
+
+ if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA))
+ return -EOPNOTSUPP;
+
+ /*
+ * Drivers for devices supporting PRI or stall require iopf others have
+ * device-specific fault handlers and don't need IOPF, so this is not a
+ * failure.
+ */
+ if (!master->stall_enabled)
+ return 0;
+
+ /* We're not keeping track of SIDs in fault events */
+ if (master->num_streams != 1)
+ return -EOPNOTSUPP;
+
+ if (master->iopf_refcount) {
+ master->iopf_refcount++;
+ master_domain->using_iopf = true;
+ return 0;
+ }
+
+ ret = iopf_queue_add_device(master->smmu->evtq.iopf, master->dev);
+ if (ret)
+ return ret;
+ master->iopf_refcount = 1;
+ master_domain->using_iopf = true;
+ return 0;
+}
+
+static void arm_smmu_disable_iopf(struct arm_smmu_master *master,
+ struct arm_smmu_master_domain *master_domain)
+{
+ iommu_group_mutex_assert(master->dev);
+
+ if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA))
+ return;
+
+ if (!master_domain || !master_domain->using_iopf)
+ return;
+
+ master->iopf_refcount--;
+ if (master->iopf_refcount == 0)
+ iopf_queue_remove_device(master->smmu->evtq.iopf, master->dev);
+}
+
+static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
+ struct iommu_domain *domain,
+ ioasid_t ssid)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain);
+ struct arm_smmu_master_domain *master_domain;
+ bool nested_ats_flush = false;
unsigned long flags;
- if (!domain || !(domain->type & __IOMMU_DOMAIN_PAGING))
+ if (!smmu_domain)
return;
- smmu_domain = to_smmu_domain(domain);
- arm_smmu_disable_ats(master, smmu_domain);
+ if (domain->type == IOMMU_DOMAIN_NESTED)
+ nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats;
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- list_del_init(&master->domain_head);
+ master_domain = arm_smmu_find_master_domain(smmu_domain, domain, master,
+ ssid, nested_ats_flush);
+ if (master_domain) {
+ list_del(&master_domain->devices_elm);
+ if (master->ats_enabled)
+ atomic_dec(&smmu_domain->nr_ats_masters);
+ }
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
- master->ats_enabled = false;
+ arm_smmu_disable_iopf(master, master_domain);
+ kfree(master_domain);
+}
+
+/*
+ * Start the sequence to attach a domain to a master. The sequence contains three
+ * steps:
+ * arm_smmu_attach_prepare()
+ * arm_smmu_install_ste_for_dev()
+ * arm_smmu_attach_commit()
+ *
+ * If prepare succeeds then the sequence must be completed. The STE installed
+ * must set the STE.EATS field according to state.ats_enabled.
+ *
+ * If the device supports ATS then this determines if EATS should be enabled
+ * in the STE, and starts sequencing EATS disable if required.
+ *
+ * The change of the EATS in the STE and the PCI ATS config space is managed by
+ * this sequence to be in the right order so that if PCI ATS is enabled then
+ * STE.ETAS is enabled.
+ *
+ * new_domain can be a non-paging domain. In this case ATS will not be enabled,
+ * and invalidations won't be tracked.
+ */
+int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
+ struct iommu_domain *new_domain)
+{
+ struct arm_smmu_master *master = state->master;
+ struct arm_smmu_master_domain *master_domain;
+ struct arm_smmu_domain *smmu_domain =
+ to_smmu_domain_devices(new_domain);
+ unsigned long flags;
+ int ret;
+
+ /*
+ * arm_smmu_share_asid() must not see two domains pointing to the same
+ * arm_smmu_master_domain contents otherwise it could randomly write one
+ * or the other to the CD.
+ */
+ lockdep_assert_held(&arm_smmu_asid_lock);
+
+ if (smmu_domain || state->cd_needs_ats) {
+ /*
+ * The SMMU does not support enabling ATS with bypass/abort.
+ * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS
+ * Translation Requests and Translated transactions are denied
+ * as though ATS is disabled for the stream (STE.EATS == 0b00),
+ * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
+ * (IHI0070Ea 5.2 Stream Table Entry).
+ *
+ * However, if we have installed a CD table and are using S1DSS
+ * then ATS will work in S1DSS bypass. See "13.6.4 Full ATS
+ * skipping stage 1".
+ *
+ * Disable ATS if we are going to create a normal 0b100 bypass
+ * STE.
+ */
+ state->ats_enabled = !state->disable_ats &&
+ arm_smmu_ats_supported(master);
+ }
+
+ if (smmu_domain) {
+ if (new_domain->type == IOMMU_DOMAIN_NESTED) {
+ ret = arm_smmu_attach_prepare_vmaster(
+ state, to_smmu_nested_domain(new_domain));
+ if (ret)
+ return ret;
+ }
+
+ master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
+ if (!master_domain) {
+ kfree(state->vmaster);
+ return -ENOMEM;
+ }
+ master_domain->domain = new_domain;
+ master_domain->master = master;
+ master_domain->ssid = state->ssid;
+ if (new_domain->type == IOMMU_DOMAIN_NESTED)
+ master_domain->nested_ats_flush =
+ to_smmu_nested_domain(new_domain)->enable_ats;
+
+ if (new_domain->iopf_handler) {
+ ret = arm_smmu_enable_iopf(master, master_domain);
+ if (ret)
+ goto err_free_master_domain;
+ }
+
+ /*
+ * During prepare we want the current smmu_domain and new
+ * smmu_domain to be in the devices list before we change any
+ * HW. This ensures that both domains will send ATS
+ * invalidations to the master until we are done.
+ *
+ * It is tempting to make this list only track masters that are
+ * using ATS, but arm_smmu_share_asid() also uses this to change
+ * the ASID of a domain, unrelated to ATS.
+ *
+ * Notice if we are re-attaching the same domain then the list
+ * will have two identical entries and commit will remove only
+ * one of them.
+ */
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ if (smmu_domain->enforce_cache_coherency &&
+ !arm_smmu_master_canwbs(master)) {
+ spin_unlock_irqrestore(&smmu_domain->devices_lock,
+ flags);
+ kfree(state->vmaster);
+ ret = -EINVAL;
+ goto err_iopf;
+ }
+
+ if (state->ats_enabled)
+ atomic_inc(&smmu_domain->nr_ats_masters);
+ list_add(&master_domain->devices_elm, &smmu_domain->devices);
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+ }
+
+ if (!state->ats_enabled && master->ats_enabled) {
+ pci_disable_ats(to_pci_dev(master->dev));
+ /*
+ * This is probably overkill, but the config write for disabling
+ * ATS should complete before the STE is configured to generate
+ * UR to avoid AER noise.
+ */
+ wmb();
+ }
+ return 0;
+
+err_iopf:
+ arm_smmu_disable_iopf(master, master_domain);
+err_free_master_domain:
+ kfree(master_domain);
+ return ret;
+}
+
+/*
+ * Commit is done after the STE/CD are configured with the EATS setting. It
+ * completes synchronizing the PCI device's ATC and finishes manipulating the
+ * smmu_domain->devices list.
+ */
+void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
+{
+ struct arm_smmu_master *master = state->master;
+
+ lockdep_assert_held(&arm_smmu_asid_lock);
+
+ arm_smmu_attach_commit_vmaster(state);
+
+ if (state->ats_enabled && !master->ats_enabled) {
+ arm_smmu_enable_ats(master);
+ } else if (state->ats_enabled && master->ats_enabled) {
+ /*
+ * The translation has changed, flush the ATC. At this point the
+ * SMMU is translating for the new domain and both the old&new
+ * domain will issue invalidations.
+ */
+ arm_smmu_atc_inv_master(master, state->ssid);
+ } else if (!state->ats_enabled && master->ats_enabled) {
+ /* ATS is being switched off, invalidate the entire ATC */
+ arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
+ }
+ master->ats_enabled = state->ats_enabled;
+
+ arm_smmu_remove_master_domain(master, state->old_domain, state->ssid);
}
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
int ret = 0;
- unsigned long flags;
struct arm_smmu_ste target;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_device *smmu;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_attach_state state = {
+ .old_domain = iommu_get_domain_for_dev(dev),
+ .ssid = IOMMU_NO_PASID,
+ };
struct arm_smmu_master *master;
+ struct arm_smmu_cd *cdptr;
if (!fwspec)
return -ENOENT;
- master = dev_iommu_priv_get(dev);
+ state.master = master = dev_iommu_priv_get(dev);
smmu = master->smmu;
- /*
- * Checking that SVA is disabled ensures that this device isn't bound to
- * any mm, and can be safely detached from its old domain. Bonds cannot
- * be removed concurrently since we're holding the group mutex.
- */
- if (arm_smmu_master_sva_enabled(master)) {
- dev_err(dev, "cannot attach - SVA enabled\n");
- return -EBUSY;
- }
-
- mutex_lock(&smmu_domain->init_mutex);
-
- if (!smmu_domain->smmu) {
- ret = arm_smmu_domain_finalise(smmu_domain, smmu);
- } else if (smmu_domain->smmu != smmu)
- ret = -EINVAL;
+ if (smmu_domain->smmu != smmu)
+ return -EINVAL;
- mutex_unlock(&smmu_domain->init_mutex);
- if (ret)
- return ret;
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
+ if (!cdptr)
+ return -ENOMEM;
+ } else if (arm_smmu_ssids_in_use(&master->cd_table))
+ return -EBUSY;
/*
* Prevent arm_smmu_share_asid() from trying to change the ASID
@@ -2551,67 +3039,176 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
*/
mutex_lock(&arm_smmu_asid_lock);
- arm_smmu_detach_dev(master);
-
- master->ats_enabled = arm_smmu_ats_supported(master);
-
- spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- list_add(&master->domain_head, &smmu_domain->devices);
- spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+ ret = arm_smmu_attach_prepare(&state, domain);
+ if (ret) {
+ mutex_unlock(&arm_smmu_asid_lock);
+ return ret;
+ }
switch (smmu_domain->stage) {
- case ARM_SMMU_DOMAIN_S1:
- if (!master->cd_table.cdtab) {
- ret = arm_smmu_alloc_cd_tables(master);
- if (ret)
- goto out_list_del;
- } else {
- /*
- * arm_smmu_write_ctx_desc() relies on the entry being
- * invalid to work, clear any existing entry.
- */
- ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID,
- NULL);
- if (ret)
- goto out_list_del;
- }
-
- ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
- if (ret)
- goto out_list_del;
-
- arm_smmu_make_cdtable_ste(&target, master);
+ case ARM_SMMU_DOMAIN_S1: {
+ struct arm_smmu_cd target_cd;
+
+ arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
+ arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
+ &target_cd);
+ arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled,
+ STRTAB_STE_1_S1DSS_SSID0);
arm_smmu_install_ste_for_dev(master, &target);
break;
+ }
case ARM_SMMU_DOMAIN_S2:
- arm_smmu_make_s2_domain_ste(&target, master, smmu_domain);
+ arm_smmu_make_s2_domain_ste(&target, master, smmu_domain,
+ state.ats_enabled);
arm_smmu_install_ste_for_dev(master, &target);
- if (master->cd_table.cdtab)
- arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID,
- NULL);
+ arm_smmu_clear_cd(master, IOMMU_NO_PASID);
break;
}
- arm_smmu_enable_ats(master, smmu_domain);
- goto out_unlock;
+ arm_smmu_attach_commit(&state);
+ mutex_unlock(&arm_smmu_asid_lock);
+ return 0;
+}
-out_list_del:
- spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- list_del_init(&master->domain_head);
- spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t id,
+ struct iommu_domain *old)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_cd target_cd;
+
+ if (smmu_domain->smmu != smmu)
+ return -EINVAL;
+
+ if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
+ return -EINVAL;
+
+ /*
+ * We can read cd.asid outside the lock because arm_smmu_set_pasid()
+ * will fix it
+ */
+ arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
+ return arm_smmu_set_pasid(master, to_smmu_domain(domain), id,
+ &target_cd, old);
+}
+
+static void arm_smmu_update_ste(struct arm_smmu_master *master,
+ struct iommu_domain *sid_domain,
+ bool ats_enabled)
+{
+ unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE;
+ struct arm_smmu_ste ste;
+
+ if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled)
+ return;
+
+ if (sid_domain->type == IOMMU_DOMAIN_IDENTITY)
+ s1dss = STRTAB_STE_1_S1DSS_BYPASS;
+ else
+ WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED);
+
+ /*
+ * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior
+ * using s1dss if necessary. If the cd_table is already installed then
+ * the S1DSS is correct and this will just update the EATS. Otherwise it
+ * installs the entire thing. This will be hitless.
+ */
+ arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss);
+ arm_smmu_install_ste_for_dev(master, &ste);
+}
+
+int arm_smmu_set_pasid(struct arm_smmu_master *master,
+ struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
+ struct arm_smmu_cd *cd, struct iommu_domain *old)
+{
+ struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);
+ struct arm_smmu_attach_state state = {
+ .master = master,
+ .ssid = pasid,
+ .old_domain = old,
+ };
+ struct arm_smmu_cd *cdptr;
+ int ret;
+
+ /* The core code validates pasid */
+
+ if (smmu_domain->smmu != master->smmu)
+ return -EINVAL;
+
+ if (!master->cd_table.in_ste &&
+ sid_domain->type != IOMMU_DOMAIN_IDENTITY &&
+ sid_domain->type != IOMMU_DOMAIN_BLOCKED)
+ return -EINVAL;
+
+ cdptr = arm_smmu_alloc_cd_ptr(master, pasid);
+ if (!cdptr)
+ return -ENOMEM;
+
+ mutex_lock(&arm_smmu_asid_lock);
+ ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain);
+ if (ret)
+ goto out_unlock;
+
+ /*
+ * We don't want to obtain to the asid_lock too early, so fix up the
+ * caller set ASID under the lock in case it changed.
+ */
+ cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID);
+ cd->data[0] |= cpu_to_le64(
+ FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid));
+
+ arm_smmu_write_cd_entry(master, pasid, cdptr, cd);
+ arm_smmu_update_ste(master, sid_domain, state.ats_enabled);
+
+ arm_smmu_attach_commit(&state);
out_unlock:
mutex_unlock(&arm_smmu_asid_lock);
return ret;
}
-static int arm_smmu_attach_dev_ste(struct device *dev,
- struct arm_smmu_ste *ste)
+static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old_domain)
{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- if (arm_smmu_master_sva_enabled(master))
- return -EBUSY;
+ mutex_lock(&arm_smmu_asid_lock);
+ arm_smmu_clear_cd(master, pasid);
+ if (master->ats_enabled)
+ arm_smmu_atc_inv_master(master, pasid);
+ arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid);
+ mutex_unlock(&arm_smmu_asid_lock);
+
+ /*
+ * When the last user of the CD table goes away downgrade the STE back
+ * to a non-cd_table one.
+ */
+ if (!arm_smmu_ssids_in_use(&master->cd_table)) {
+ struct iommu_domain *sid_domain =
+ iommu_get_domain_for_dev(master->dev);
+
+ if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
+ sid_domain->type == IOMMU_DOMAIN_BLOCKED)
+ sid_domain->ops->attach_dev(sid_domain, dev);
+ }
+ return 0;
+}
+
+static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
+ struct device *dev,
+ struct arm_smmu_ste *ste,
+ unsigned int s1dss)
+{
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ struct arm_smmu_attach_state state = {
+ .master = master,
+ .old_domain = iommu_get_domain_for_dev(dev),
+ .ssid = IOMMU_NO_PASID,
+ };
/*
* Do not allow any ASID to be changed while are working on the STE,
@@ -2620,15 +3217,25 @@ static int arm_smmu_attach_dev_ste(struct device *dev,
mutex_lock(&arm_smmu_asid_lock);
/*
- * The SMMU does not support enabling ATS with bypass/abort. When the
- * STE is in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests
- * and Translated transactions are denied as though ATS is disabled for
- * the stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
- * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
+ * If the CD table is not in use we can use the provided STE, otherwise
+ * we use a cdtable STE with the provided S1DSS.
*/
- arm_smmu_detach_dev(master);
-
+ if (arm_smmu_ssids_in_use(&master->cd_table)) {
+ /*
+ * If a CD table has to be present then we need to run with ATS
+ * on because we have to assume a PASID is using ATS. For
+ * IDENTITY this will setup things so that S1DSS=bypass which
+ * follows the explanation in "13.6.4 Full ATS skipping stage 1"
+ * and allows for ATS on the RID to work.
+ */
+ state.cd_needs_ats = true;
+ arm_smmu_attach_prepare(&state, domain);
+ arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss);
+ } else {
+ arm_smmu_attach_prepare(&state, domain);
+ }
arm_smmu_install_ste_for_dev(master, ste);
+ arm_smmu_attach_commit(&state);
mutex_unlock(&arm_smmu_asid_lock);
/*
@@ -2636,9 +3243,7 @@ static int arm_smmu_attach_dev_ste(struct device *dev,
* arm_smmu_domain->devices to avoid races updating the same context
* descriptor from arm_smmu_share_asid().
*/
- if (master->cd_table.cdtab)
- arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
- return 0;
+ arm_smmu_clear_cd(master, IOMMU_NO_PASID);
}
static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
@@ -2647,8 +3252,10 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
struct arm_smmu_ste ste;
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ arm_smmu_master_clear_vmaster(master);
arm_smmu_make_bypass_ste(master->smmu, &ste);
- return arm_smmu_attach_dev_ste(dev, &ste);
+ arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS);
+ return 0;
}
static const struct iommu_domain_ops arm_smmu_identity_ops = {
@@ -2664,13 +3271,18 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
struct device *dev)
{
struct arm_smmu_ste ste;
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ arm_smmu_master_clear_vmaster(master);
arm_smmu_make_abort_ste(&ste);
- return arm_smmu_attach_dev_ste(dev, &ste);
+ arm_smmu_attach_dev_ste(domain, dev, &ste,
+ STRTAB_STE_1_S1DSS_TERMINATE);
+ return 0;
}
static const struct iommu_domain_ops arm_smmu_blocked_ops = {
.attach_dev = arm_smmu_attach_dev_blocked,
+ .set_dev_pasid = arm_smmu_blocking_set_dev_pasid,
};
static struct iommu_domain arm_smmu_blocked_domain = {
@@ -2678,6 +3290,69 @@ static struct iommu_domain arm_smmu_blocked_domain = {
.ops = &arm_smmu_blocked_ops,
};
+static struct iommu_domain *
+arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags,
+ const struct iommu_user_data *user_data)
+{
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ struct arm_smmu_device *smmu = master->smmu;
+ const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
+ IOMMU_HWPT_ALLOC_PASID |
+ IOMMU_HWPT_ALLOC_NEST_PARENT;
+ struct arm_smmu_domain *smmu_domain;
+ int ret;
+
+ if (flags & ~PAGING_FLAGS)
+ return ERR_PTR(-EOPNOTSUPP);
+ if (user_data)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ smmu_domain = arm_smmu_domain_alloc();
+ if (IS_ERR(smmu_domain))
+ return ERR_CAST(smmu_domain);
+
+ switch (flags) {
+ case 0:
+ /* Prefer S1 if available */
+ if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ else
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+ break;
+ case IOMMU_HWPT_ALLOC_NEST_PARENT:
+ if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) {
+ ret = -EOPNOTSUPP;
+ goto err_free;
+ }
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+ smmu_domain->nest_parent = true;
+ break;
+ case IOMMU_HWPT_ALLOC_DIRTY_TRACKING:
+ case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID:
+ case IOMMU_HWPT_ALLOC_PASID:
+ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) {
+ ret = -EOPNOTSUPP;
+ goto err_free;
+ }
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ goto err_free;
+ }
+
+ smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
+ smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
+ ret = arm_smmu_domain_finalise(smmu_domain, smmu, flags);
+ if (ret)
+ goto err_free;
+ return &smmu_domain->domain;
+
+err_free:
+ kfree(smmu_domain);
+ return ERR_PTR(ret);
+}
+
static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -2740,20 +3415,17 @@ static struct platform_driver arm_smmu_driver;
static
struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
{
- struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
- fwnode);
+ struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
+
put_device(dev);
return dev ? dev_get_drvdata(dev) : NULL;
}
static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
{
- unsigned long limit = smmu->strtab_cfg.num_l1_ents;
-
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
- limit *= 1UL << STRTAB_SPLIT;
-
- return sid < limit;
+ return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents;
+ return sid < smmu->strtab_cfg.linear.num_ents;
}
static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
@@ -2774,8 +3446,6 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
{
int i;
int ret = 0;
- struct arm_smmu_stream *new_stream, *cur_stream;
- struct rb_node **new_node, *parent_node = NULL;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
@@ -2786,9 +3456,10 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
mutex_lock(&smmu->streams_mutex);
for (i = 0; i < fwspec->num_ids; i++) {
+ struct arm_smmu_stream *new_stream = &master->streams[i];
+ struct rb_node *existing;
u32 sid = fwspec->ids[i];
- new_stream = &master->streams[i];
new_stream->id = sid;
new_stream->master = master;
@@ -2797,28 +3468,23 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
break;
/* Insert into SID tree */
- new_node = &(smmu->streams.rb_node);
- while (*new_node) {
- cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
- node);
- parent_node = *new_node;
- if (cur_stream->id > new_stream->id) {
- new_node = &((*new_node)->rb_left);
- } else if (cur_stream->id < new_stream->id) {
- new_node = &((*new_node)->rb_right);
- } else {
- dev_warn(master->dev,
- "stream %u already in tree\n",
- cur_stream->id);
- ret = -EINVAL;
- break;
- }
- }
- if (ret)
- break;
+ existing = rb_find_add(&new_stream->node, &smmu->streams,
+ arm_smmu_streams_cmp_node);
+ if (existing) {
+ struct arm_smmu_master *existing_master =
+ rb_entry(existing, struct arm_smmu_stream, node)
+ ->master;
+
+ /* Bridged PCI devices may end up with duplicated IDs */
+ if (existing_master == master)
+ continue;
- rb_link_node(&new_stream->node, parent_node, new_node);
- rb_insert_color(&new_stream->node, &smmu->streams);
+ dev_warn(master->dev,
+ "Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n",
+ sid, dev_name(existing_master->dev));
+ ret = -ENODEV;
+ break;
+ }
}
if (ret) {
@@ -2848,8 +3514,6 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master)
kfree(master->streams);
}
-static struct iommu_ops arm_smmu_ops;
-
static struct iommu_device *arm_smmu_probe_device(struct device *dev)
{
int ret;
@@ -2870,8 +3534,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
master->dev = dev;
master->smmu = smmu;
- INIT_LIST_HEAD(&master->bonds);
- INIT_LIST_HEAD(&master->domain_head);
dev_iommu_priv_set(dev, master);
ret = arm_smmu_insert_master(smmu, master);
@@ -2900,6 +3562,12 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
master->stall_enabled = true;
+ if (dev_is_pci(dev)) {
+ unsigned int stu = __ffs(smmu->pgsize_bitmap);
+
+ pci_prepare_ats(to_pci_dev(dev), stu);
+ }
+
return &smmu->iommu;
err_free_master:
@@ -2911,22 +3579,42 @@ static void arm_smmu_release_device(struct device *dev)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- if (WARN_ON(arm_smmu_master_sva_enabled(master)))
- iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
+ WARN_ON(master->iopf_refcount);
/* Put the STE back to what arm_smmu_init_strtab() sets */
- if (disable_bypass && !dev->iommu->require_direct)
- arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
- else
+ if (dev->iommu->require_direct)
arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
+ else
+ arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
arm_smmu_disable_pasid(master);
arm_smmu_remove_master(master);
- if (master->cd_table.cdtab)
+ if (arm_smmu_cdtab_allocated(&master->cd_table))
arm_smmu_free_cd_tables(master);
kfree(master);
}
+static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+
+ return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
+}
+
+static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
+ bool enabled)
+{
+ /*
+ * Always enabled and the dirty bitmap is cleared prior to
+ * set_dirty_tracking().
+ */
+ return 0;
+}
+
static struct iommu_group *arm_smmu_device_group(struct device *dev)
{
struct iommu_group *group;
@@ -2944,21 +3632,6 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
return group;
}
-static int arm_smmu_enable_nesting(struct iommu_domain *domain)
-{
- struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- int ret = 0;
-
- mutex_lock(&smmu_domain->init_mutex);
- if (smmu_domain->smmu)
- ret = -EPERM;
- else
- smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
- mutex_unlock(&smmu_domain->init_mutex);
-
- return ret;
-}
-
static int arm_smmu_of_xlate(struct device *dev,
const struct of_phandle_args *args)
{
@@ -2981,58 +3654,6 @@ static void arm_smmu_get_resv_regions(struct device *dev,
iommu_dma_get_resv_regions(dev, head);
}
-static int arm_smmu_dev_enable_feature(struct device *dev,
- enum iommu_dev_features feat)
-{
- struct arm_smmu_master *master = dev_iommu_priv_get(dev);
-
- if (!master)
- return -ENODEV;
-
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- if (!arm_smmu_master_iopf_supported(master))
- return -EINVAL;
- if (master->iopf_enabled)
- return -EBUSY;
- master->iopf_enabled = true;
- return 0;
- case IOMMU_DEV_FEAT_SVA:
- if (!arm_smmu_master_sva_supported(master))
- return -EINVAL;
- if (arm_smmu_master_sva_enabled(master))
- return -EBUSY;
- return arm_smmu_master_enable_sva(master);
- default:
- return -EINVAL;
- }
-}
-
-static int arm_smmu_dev_disable_feature(struct device *dev,
- enum iommu_dev_features feat)
-{
- struct arm_smmu_master *master = dev_iommu_priv_get(dev);
-
- if (!master)
- return -EINVAL;
-
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- if (!master->iopf_enabled)
- return -EINVAL;
- if (master->sva_enabled)
- return -EBUSY;
- master->iopf_enabled = false;
- return 0;
- case IOMMU_DEV_FEAT_SVA:
- if (!arm_smmu_master_sva_enabled(master))
- return -EINVAL;
- return arm_smmu_master_disable_sva(master);
- default:
- return -EINVAL;
- }
-}
-
/*
* HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
* PCIe link and save the data to memory by DMA. The hardware is restricted to
@@ -3053,54 +3674,47 @@ static int arm_smmu_def_domain_type(struct device *dev)
return 0;
}
-static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
-{
- struct iommu_domain *domain;
-
- domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
- if (WARN_ON(IS_ERR(domain)) || !domain)
- return;
-
- arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
-}
-
static struct iommu_ops arm_smmu_ops = {
.identity_domain = &arm_smmu_identity_domain,
.blocked_domain = &arm_smmu_blocked_domain,
.capable = arm_smmu_capable,
- .domain_alloc = arm_smmu_domain_alloc,
- .domain_alloc_paging = arm_smmu_domain_alloc_paging,
+ .hw_info = arm_smmu_hw_info,
+ .domain_alloc_sva = arm_smmu_sva_domain_alloc,
+ .domain_alloc_paging_flags = arm_smmu_domain_alloc_paging_flags,
.probe_device = arm_smmu_probe_device,
.release_device = arm_smmu_release_device,
.device_group = arm_smmu_device_group,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
- .remove_dev_pasid = arm_smmu_remove_dev_pasid,
- .dev_enable_feat = arm_smmu_dev_enable_feature,
- .dev_disable_feat = arm_smmu_dev_disable_feature,
.page_response = arm_smmu_page_response,
.def_domain_type = arm_smmu_def_domain_type,
+ .viommu_alloc = arm_vsmmu_alloc,
+ .user_pasid_table = 1,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = arm_smmu_attach_dev,
+ .enforce_cache_coherency = arm_smmu_enforce_cache_coherency,
+ .set_dev_pasid = arm_smmu_s1_set_dev_pasid,
.map_pages = arm_smmu_map_pages,
.unmap_pages = arm_smmu_unmap_pages,
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
- .enable_nesting = arm_smmu_enable_nesting,
- .free = arm_smmu_domain_free,
+ .free = arm_smmu_domain_free_paging,
}
};
+static struct iommu_dirty_ops arm_smmu_dirty_ops = {
+ .read_and_clear_dirty = arm_smmu_read_and_clear_dirty,
+ .set_dirty_tracking = arm_smmu_set_dirty_tracking,
+};
+
/* Probing and initialisation functions */
-static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
- struct arm_smmu_queue *q,
- void __iomem *page,
- unsigned long prod_off,
- unsigned long cons_off,
- size_t dwords, const char *name)
+int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
+ struct arm_smmu_queue *q, void __iomem *page,
+ unsigned long prod_off, unsigned long cons_off,
+ size_t dwords, const char *name)
{
size_t qsz;
@@ -3138,9 +3752,9 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
return 0;
}
-static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
+int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
{
- struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
atomic_set(&cmdq->owner_prod, 0);
@@ -3165,7 +3779,7 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
if (ret)
return ret;
- ret = arm_smmu_cmdq_init(smmu);
+ ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq);
if (ret)
return ret;
@@ -3192,109 +3806,71 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
PRIQ_ENT_DWORDS, "priq");
}
-static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
-{
- unsigned int i;
- struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
- void *strtab = smmu->strtab_cfg.strtab;
-
- cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
- sizeof(*cfg->l1_desc), GFP_KERNEL);
- if (!cfg->l1_desc)
- return -ENOMEM;
-
- for (i = 0; i < cfg->num_l1_ents; ++i) {
- arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
- strtab += STRTAB_L1_DESC_DWORDS << 3;
- }
-
- return 0;
-}
-
static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
{
- void *strtab;
- u64 reg;
- u32 size, l1size;
+ u32 l1size;
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
+ unsigned int last_sid_idx =
+ arm_smmu_strtab_l1_idx((1ULL << smmu->sid_bits) - 1);
/* Calculate the L1 size, capped to the SIDSIZE. */
- size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
- size = min(size, smmu->sid_bits - STRTAB_SPLIT);
- cfg->num_l1_ents = 1 << size;
-
- size += STRTAB_SPLIT;
- if (size < smmu->sid_bits)
+ cfg->l2.num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES);
+ if (cfg->l2.num_l1_ents <= last_sid_idx)
dev_warn(smmu->dev,
"2-level strtab only covers %u/%u bits of SID\n",
- size, smmu->sid_bits);
+ ilog2(cfg->l2.num_l1_ents * STRTAB_NUM_L2_STES),
+ smmu->sid_bits);
- l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
- strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
- GFP_KERNEL);
- if (!strtab) {
+ l1size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1);
+ cfg->l2.l1tab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->l2.l1_dma,
+ GFP_KERNEL);
+ if (!cfg->l2.l1tab) {
dev_err(smmu->dev,
"failed to allocate l1 stream table (%u bytes)\n",
l1size);
return -ENOMEM;
}
- cfg->strtab = strtab;
- /* Configure strtab_base_cfg for 2 levels */
- reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
- reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
- reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
- cfg->strtab_base_cfg = reg;
+ cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents,
+ sizeof(*cfg->l2.l2ptrs), GFP_KERNEL);
+ if (!cfg->l2.l2ptrs)
+ return -ENOMEM;
- return arm_smmu_init_l1_strtab(smmu);
+ return 0;
}
static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
{
- void *strtab;
- u64 reg;
u32 size;
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
- size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
- strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
- GFP_KERNEL);
- if (!strtab) {
+ size = (1 << smmu->sid_bits) * sizeof(struct arm_smmu_ste);
+ cfg->linear.table = dmam_alloc_coherent(smmu->dev, size,
+ &cfg->linear.ste_dma,
+ GFP_KERNEL);
+ if (!cfg->linear.table) {
dev_err(smmu->dev,
"failed to allocate linear stream table (%u bytes)\n",
size);
return -ENOMEM;
}
- cfg->strtab = strtab;
- cfg->num_l1_ents = 1 << smmu->sid_bits;
-
- /* Configure strtab_base_cfg for a linear table covering all SIDs */
- reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
- reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
- cfg->strtab_base_cfg = reg;
+ cfg->linear.num_ents = 1 << smmu->sid_bits;
- arm_smmu_init_initial_stes(smmu, strtab, cfg->num_l1_ents);
+ arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents);
return 0;
}
static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
{
- u64 reg;
int ret;
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
ret = arm_smmu_init_strtab_2lvl(smmu);
else
ret = arm_smmu_init_strtab_linear(smmu);
-
if (ret)
return ret;
- /* Set the strtab base address */
- reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
- reg |= STRTAB_BASE_RA;
- smmu->strtab_cfg.strtab_base = reg;
-
ida_init(&smmu->vmid_map);
return 0;
@@ -3311,7 +3887,14 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
if (ret)
return ret;
- return arm_smmu_init_strtab(smmu);
+ ret = arm_smmu_init_strtab(smmu);
+ if (ret)
+ return ret;
+
+ if (smmu->impl_ops && smmu->impl_ops->init_structures)
+ return smmu->impl_ops->init_structures(smmu);
+
+ return 0;
}
static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
@@ -3402,7 +3985,7 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
/* Add callback to free MSIs on teardown */
- devm_add_action(dev, arm_smmu_free_msis, dev);
+ devm_add_action_or_reset(dev, arm_smmu_free_msis, dev);
}
static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
@@ -3503,7 +4086,31 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
return ret;
}
-static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
+static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
+{
+ struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
+ dma_addr_t dma;
+ u32 reg;
+
+ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
+ reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
+ STRTAB_BASE_CFG_FMT_2LVL) |
+ FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE,
+ ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT) |
+ FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
+ dma = cfg->l2.l1_dma;
+ } else {
+ reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
+ STRTAB_BASE_CFG_FMT_LINEAR) |
+ FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
+ dma = cfg->linear.ste_dma;
+ }
+ writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA,
+ smmu->base + ARM_SMMU_STRTAB_BASE);
+ writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
+}
+
+static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
{
int ret;
u32 reg, enables;
@@ -3513,7 +4120,6 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
if (reg & CR0_SMMUEN) {
dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
- WARN_ON(is_kdump_kernel() && !disable_bypass);
arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
}
@@ -3539,10 +4145,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
/* Stream table */
- writeq_relaxed(smmu->strtab_cfg.strtab_base,
- smmu->base + ARM_SMMU_STRTAB_BASE);
- writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
- smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
+ arm_smmu_write_strtab(smmu);
/* Command queue */
writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
@@ -3620,14 +4223,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
if (is_kdump_kernel())
enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
- /* Enable the SMMU interface, or ensure bypass */
- if (!bypass || disable_bypass) {
- enables |= CR0_SMMUEN;
- } else {
- ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
- if (ret)
- return ret;
- }
+ /* Enable the SMMU interface */
+ enables |= CR0_SMMUEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
if (ret) {
@@ -3635,6 +4232,14 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
return ret;
}
+ if (smmu->impl_ops && smmu->impl_ops->device_reset) {
+ ret = smmu->impl_ops->device_reset(smmu);
+ if (ret) {
+ dev_err(smmu->dev, "failed to reset impl\n");
+ return ret;
+ }
+ }
+
return 0;
}
@@ -3676,6 +4281,28 @@ static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
}
}
+static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg)
+{
+ u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD);
+ u32 hw_features = 0;
+
+ switch (FIELD_GET(IDR0_HTTU, reg)) {
+ case IDR0_HTTU_ACCESS_DIRTY:
+ hw_features |= ARM_SMMU_FEAT_HD;
+ fallthrough;
+ case IDR0_HTTU_ACCESS:
+ hw_features |= ARM_SMMU_FEAT_HA;
+ }
+
+ if (smmu->dev->of_node)
+ smmu->features |= hw_features;
+ else if (hw_features != fw_features)
+ /* ACPI IORT sets the HTTU bits */
+ dev_warn(smmu->dev,
+ "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n",
+ hw_features, fw_features);
+}
+
static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
{
u32 reg;
@@ -3736,13 +4363,15 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
smmu->features |= ARM_SMMU_FEAT_E2H;
}
+ arm_smmu_get_httu(smmu, reg);
+
/*
* The coherency feature as set by FW is used in preference to the ID
* register, but warn on mismatch.
*/
if (!!(reg & IDR0_COHACC) != coherent)
dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
- coherent ? "true" : "false");
+ str_true_false(coherent));
switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
case IDR0_STALL_MODEL_FORCE:
@@ -3825,6 +4454,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
if (FIELD_GET(IDR3_RIL, reg))
smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
+ if (FIELD_GET(IDR3_FWB, reg))
+ smmu->features |= ARM_SMMU_FEAT_S2FWB;
/* IDR5 */
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
@@ -3900,18 +4531,55 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
}
#ifdef CONFIG_ACPI
-static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
+#ifdef CONFIG_TEGRA241_CMDQV
+static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
+ struct arm_smmu_device *smmu)
+{
+ const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
+ struct acpi_device *adev;
+
+ /* Look for an NVDA200C node whose _UID matches the SMMU node ID */
+ adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);
+ if (adev) {
+ /* Tegra241 CMDQV driver is responsible for put_device() */
+ smmu->impl_dev = &adev->dev;
+ smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
+ dev_info(smmu->dev, "found companion CMDQV device: %s\n",
+ dev_name(smmu->impl_dev));
+ }
+ kfree(uid);
+}
+#else
+static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
+ struct arm_smmu_device *smmu)
+{
+}
+#endif
+
+static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
+ struct arm_smmu_device *smmu)
{
- switch (model) {
+ struct acpi_iort_smmu_v3 *iort_smmu =
+ (struct acpi_iort_smmu_v3 *)node->node_data;
+
+ switch (iort_smmu->model) {
case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
break;
case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
break;
+ case ACPI_IORT_SMMU_V3_GENERIC:
+ /*
+ * Tegra241 implementation stores its SMMU options and impl_dev
+ * in DSDT. Thus, go through the ACPI tables unconditionally.
+ */
+ acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu);
+ break;
}
dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
+ return 0;
}
static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
@@ -3926,12 +4594,18 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
/* Retrieve SMMUv3 specific data */
iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
- acpi_smmu_get_options(iort_smmu->model, smmu);
-
if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
smmu->features |= ARM_SMMU_FEAT_COHERENCY;
- return 0;
+ switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) {
+ case IDR0_HTTU_ACCESS_DIRTY:
+ smmu->features |= ARM_SMMU_FEAT_HD;
+ fallthrough;
+ case IDR0_HTTU_ACCESS:
+ smmu->features |= ARM_SMMU_FEAT_HA;
+ }
+
+ return acpi_smmu_iort_probe_model(node, smmu);
}
#else
static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
@@ -4012,6 +4686,39 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
}
+static void arm_smmu_impl_remove(void *data)
+{
+ struct arm_smmu_device *smmu = data;
+
+ if (smmu->impl_ops && smmu->impl_ops->device_remove)
+ smmu->impl_ops->device_remove(smmu);
+}
+
+/*
+ * Probe all the compiled in implementations. Each one checks to see if it
+ * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which
+ * replaces the callers. Otherwise the original is returned or ERR_PTR.
+ */
+static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
+{
+ struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
+ int ret;
+
+ if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
+ new_smmu = tegra241_cmdqv_probe(smmu);
+
+ if (new_smmu == ERR_PTR(-ENODEV))
+ return smmu;
+ if (IS_ERR(new_smmu))
+ return new_smmu;
+
+ ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
+ new_smmu);
+ if (ret)
+ return ERR_PTR(ret);
+ return new_smmu;
+}
+
static int arm_smmu_device_probe(struct platform_device *pdev)
{
int irq, ret;
@@ -4019,7 +4726,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
resource_size_t ioaddr;
struct arm_smmu_device *smmu;
struct device *dev = &pdev->dev;
- bool bypass;
smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
if (!smmu)
@@ -4030,12 +4736,13 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
ret = arm_smmu_device_dt_probe(pdev, smmu);
} else {
ret = arm_smmu_device_acpi_probe(pdev, smmu);
- if (ret == -ENODEV)
- return ret;
}
+ if (ret)
+ return ret;
- /* Set bypass mode according to firmware probing result */
- bypass = !!ret;
+ smmu = arm_smmu_impl_probe(smmu);
+ if (IS_ERR(smmu))
+ return PTR_ERR(smmu);
/* Base address */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -4090,7 +4797,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
/* Initialise in-memory data structures */
ret = arm_smmu_init_structures(smmu);
if (ret)
- return ret;
+ goto err_free_iopf;
/* Record our private device structure */
platform_set_drvdata(pdev, smmu);
@@ -4099,24 +4806,31 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
arm_smmu_rmr_install_bypass_ste(smmu);
/* Reset the device */
- ret = arm_smmu_device_reset(smmu, bypass);
+ ret = arm_smmu_device_reset(smmu);
if (ret)
- return ret;
+ goto err_disable;
/* And we're up. Go go go! */
ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
"smmu3.%pa", &ioaddr);
if (ret)
- return ret;
+ goto err_disable;
ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
if (ret) {
dev_err(dev, "Failed to register iommu\n");
- iommu_device_sysfs_remove(&smmu->iommu);
- return ret;
+ goto err_free_sysfs;
}
return 0;
+
+err_free_sysfs:
+ iommu_device_sysfs_remove(&smmu->iommu);
+err_disable:
+ arm_smmu_device_disable(smmu);
+err_free_iopf:
+ iopf_queue_free(smmu->evtq.iopf);
+ return ret;
}
static void arm_smmu_device_remove(struct platform_device *pdev)
@@ -4156,7 +4870,7 @@ static struct platform_driver arm_smmu_driver = {
.suppress_bind_attrs = true,
},
.probe = arm_smmu_device_probe,
- .remove_new = arm_smmu_device_remove,
+ .remove = arm_smmu_device_remove,
.shutdown = arm_smmu_device_shutdown,
};
module_driver(arm_smmu_driver, platform_driver_register,