summaryrefslogtreecommitdiff
path: root/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c')
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c233
1 files changed, 222 insertions, 11 deletions
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 5c7cfc51b57c..62874b18f645 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -8,12 +8,48 @@
#include <linux/delay.h>
#include <linux/of_device.h>
#include <linux/firmware/qcom/qcom_scm.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include "arm-smmu.h"
#include "arm-smmu-qcom.h"
#define QCOM_DUMMY_VAL -1
+/*
+ * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
+ * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
+ * buffer). The remaining bits are implementation defined and vary across
+ * SoCs.
+ */
+
+#define CPRE (1 << 1)
+#define CMTLB (1 << 0)
+#define PREFETCH_SHIFT 8
+#define PREFETCH_DEFAULT 0
+#define PREFETCH_SHALLOW (1 << PREFETCH_SHIFT)
+#define PREFETCH_MODERATE (2 << PREFETCH_SHIFT)
+#define PREFETCH_DEEP (3 << PREFETCH_SHIFT)
+#define GFX_ACTLR_PRR (1 << 5)
+
+static const struct of_device_id qcom_smmu_actlr_client_of_match[] = {
+ { .compatible = "qcom,adreno",
+ .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,adreno-gmu",
+ .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,adreno-smmu",
+ .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,fastrpc",
+ .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,sc7280-mdss",
+ .data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
+ { .compatible = "qcom,sc7280-venus",
+ .data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
+ { .compatible = "qcom,sm8550-mdss",
+ .data = (const void *) (PREFETCH_DEFAULT | CMTLB) },
+ { }
+};
+
static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
{
return container_of(smmu, struct qcom_smmu, smmu);
@@ -76,25 +112,80 @@ static void qcom_adreno_smmu_set_stall(const void *cookie, bool enabled)
{
struct arm_smmu_domain *smmu_domain = (void *)cookie;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- struct qcom_smmu *qsmmu = to_qcom_smmu(smmu_domain->smmu);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+ u32 mask = BIT(cfg->cbndx);
+ bool stall_changed = !!(qsmmu->stall_enabled & mask) != enabled;
+ unsigned long flags;
if (enabled)
- qsmmu->stall_enabled |= BIT(cfg->cbndx);
+ qsmmu->stall_enabled |= mask;
else
- qsmmu->stall_enabled &= ~BIT(cfg->cbndx);
+ qsmmu->stall_enabled &= ~mask;
+
+ /*
+ * If the device is on and we changed the setting, update the register.
+ * The spec pseudocode says that CFCFG is resampled after a fault, and
+ * we believe that no implementations cache it in the TLB, so it should
+ * be safe to change it without a TLB invalidation.
+ */
+ if (stall_changed && pm_runtime_get_if_active(smmu->dev) > 0) {
+ u32 reg;
+
+ spin_lock_irqsave(&smmu_domain->cb_lock, flags);
+ reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR);
+
+ if (enabled)
+ reg |= ARM_SMMU_SCTLR_CFCFG;
+ else
+ reg &= ~ARM_SMMU_SCTLR_CFCFG;
+
+ arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR, reg);
+ spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
+
+ pm_runtime_put_autosuspend(smmu->dev);
+ }
}
-static void qcom_adreno_smmu_resume_translation(const void *cookie, bool terminate)
+static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
{
struct arm_smmu_domain *smmu_domain = (void *)cookie;
- struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
u32 reg = 0;
+ int ret;
- if (terminate)
- reg |= ARM_SMMU_RESUME_TERMINATE;
+ ret = pm_runtime_resume_and_get(smmu->dev);
+ if (ret < 0) {
+ dev_err(smmu->dev, "failed to get runtime PM: %d\n", ret);
+ return;
+ }
+
+ reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
+ reg &= ~GFX_ACTLR_PRR;
+ if (set)
+ reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
+ arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
+ pm_runtime_put_autosuspend(smmu->dev);
+}
+
+static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
+{
+ struct arm_smmu_domain *smmu_domain = (void *)cookie;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(smmu->dev);
+ if (ret < 0) {
+ dev_err(smmu->dev, "failed to get runtime PM: %d\n", ret);
+ return;
+ }
- arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
+ writel_relaxed(lower_32_bits(page_addr),
+ smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
+ writel_relaxed(upper_32_bits(page_addr),
+ smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
+ pm_runtime_put_autosuspend(smmu->dev);
}
#define QCOM_ADRENO_SMMU_GPU_SID 0
@@ -205,13 +296,37 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
return true;
}
+static void qcom_smmu_set_actlr_dev(struct device *dev, struct arm_smmu_device *smmu, int cbndx,
+ const struct of_device_id *client_match)
+{
+ const struct of_device_id *match =
+ of_match_device(client_match, dev);
+
+ if (!match) {
+ dev_dbg(dev, "no ACTLR settings present\n");
+ return;
+ }
+
+ arm_smmu_cb_write(smmu, cbndx, ARM_SMMU_CB_ACTLR, (unsigned long)match->data);
+}
+
static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
{
+ const struct device_node *np = smmu_domain->smmu->dev->of_node;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+ const struct of_device_id *client_match;
+ int cbndx = smmu_domain->cfg.cbndx;
struct adreno_smmu_priv *priv;
smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
+ client_match = qsmmu->data->client_match;
+
+ if (client_match)
+ qcom_smmu_set_actlr_dev(dev, smmu, cbndx, client_match);
+
/* Only enable split pagetables for the GPU device (SID 0) */
if (!qcom_adreno_smmu_is_gpu_device(dev))
return 0;
@@ -236,7 +351,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
priv->set_ttbr0_cfg = qcom_adreno_smmu_set_ttbr0_cfg;
priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
priv->set_stall = qcom_adreno_smmu_set_stall;
- priv->resume_translation = qcom_adreno_smmu_resume_translation;
+ priv->set_prr_bit = NULL;
+ priv->set_prr_addr = NULL;
+
+ if (of_device_is_compatible(np, "qcom,smmu-500") &&
+ of_device_is_compatible(np, "qcom,adreno-smmu")) {
+ priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
+ priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
+ }
return 0;
}
@@ -247,6 +369,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,mdp4" },
{ .compatible = "qcom,mdss" },
{ .compatible = "qcom,qcm2290-mdss" },
+ { .compatible = "qcom,sar2130p-mdss" },
{ .compatible = "qcom,sc7180-mdss" },
{ .compatible = "qcom,sc7180-mss-pil" },
{ .compatible = "qcom,sc7280-mdss" },
@@ -267,8 +390,18 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
{
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+ const struct of_device_id *client_match;
+ int cbndx = smmu_domain->cfg.cbndx;
+
smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
+ client_match = qsmmu->data->client_match;
+
+ if (client_match)
+ qcom_smmu_set_actlr_dev(dev, smmu, cbndx, client_match);
+
return 0;
}
@@ -281,6 +414,20 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
int i;
/*
+ * MSM8998 LPASS SMMU reports 13 context banks, but accessing
+ * the last context bank crashes the system.
+ */
+ if (of_device_is_compatible(smmu->dev->of_node, "qcom,msm8998-smmu-v2") &&
+ smmu->num_context_banks == 13) {
+ smmu->num_context_banks = 12;
+ } else if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm630-smmu-v2")) {
+ if (smmu->num_context_banks == 21) /* SDM630 / SDM660 A2NOC SMMU */
+ smmu->num_context_banks = 7;
+ else if (smmu->num_context_banks == 14) /* SDM630 / SDM660 LPASS SMMU */
+ smmu->num_context_banks = 13;
+ }
+
+ /*
* Some platforms support more than the Arm SMMU architected maximum of
* 128 stream matching groups. For unknown reasons, the additional
* groups don't exhibit the same behavior as the architected registers,
@@ -336,6 +483,19 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
return 0;
}
+static int qcom_adreno_smmuv2_cfg_probe(struct arm_smmu_device *smmu)
+{
+ /* Support for 16K pages is advertised on some SoCs, but it doesn't seem to work */
+ smmu->features &= ~ARM_SMMU_FEAT_FMT_AARCH64_16K;
+
+ /* TZ protects several last context banks, hide them from Linux */
+ if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm630-smmu-v2") &&
+ smmu->num_context_banks == 5)
+ smmu->num_context_banks = 2;
+
+ return 0;
+}
+
static void qcom_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
{
struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
@@ -413,6 +573,10 @@ static const struct arm_smmu_impl qcom_smmu_500_impl = {
.reset = arm_mmu500_reset,
.write_s2cr = qcom_smmu_write_s2cr,
.tlb_sync = qcom_smmu_tlb_sync,
+#ifdef CONFIG_ARM_SMMU_QCOM_DEBUG
+ .context_fault = qcom_smmu_context_fault,
+ .context_fault_needs_threaded_irq = true,
+#endif
};
static const struct arm_smmu_impl sdm845_smmu_500_impl = {
@@ -422,14 +586,20 @@ static const struct arm_smmu_impl sdm845_smmu_500_impl = {
.reset = qcom_sdm845_smmu500_reset,
.write_s2cr = qcom_smmu_write_s2cr,
.tlb_sync = qcom_smmu_tlb_sync,
+#ifdef CONFIG_ARM_SMMU_QCOM_DEBUG
+ .context_fault = qcom_smmu_context_fault,
+ .context_fault_needs_threaded_irq = true,
+#endif
};
static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = {
.init_context = qcom_adreno_smmu_init_context,
+ .cfg_probe = qcom_adreno_smmuv2_cfg_probe,
.def_domain_type = qcom_smmu_def_domain_type,
.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
.write_sctlr = qcom_adreno_smmu_write_sctlr,
.tlb_sync = qcom_smmu_tlb_sync,
+ .context_fault_needs_threaded_irq = true,
};
static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = {
@@ -439,6 +609,7 @@ static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = {
.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
.write_sctlr = qcom_adreno_smmu_write_sctlr,
.tlb_sync = qcom_smmu_tlb_sync,
+ .context_fault_needs_threaded_irq = true,
};
static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
@@ -461,14 +632,15 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
/* Check to make sure qcom_scm has finished probing */
if (!qcom_scm_is_available())
- return ERR_PTR(-EPROBE_DEFER);
+ return ERR_PTR(dev_err_probe(smmu->dev, -EPROBE_DEFER,
+ "qcom_scm not ready\n"));
qsmmu = devm_krealloc(smmu->dev, smmu, sizeof(*qsmmu), GFP_KERNEL);
if (!qsmmu)
return ERR_PTR(-ENOMEM);
qsmmu->smmu.impl = impl;
- qsmmu->cfg = data->cfg;
+ qsmmu->data = data;
return &qsmmu->smmu;
}
@@ -511,6 +683,7 @@ static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = {
.impl = &qcom_smmu_500_impl,
.adreno_impl = &qcom_adreno_smmu_500_impl,
.cfg = &qcom_smmu_impl0_cfg,
+ .client_match = qcom_smmu_actlr_client_of_match,
};
/*
@@ -528,6 +701,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_v2_data },
+ { .compatible = "qcom,sdm670-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data },
{ .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data},
@@ -553,10 +727,47 @@ static struct acpi_platform_list qcom_acpi_platlist[] = {
};
#endif
+static int qcom_smmu_tbu_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ int ret;
+
+ if (IS_ENABLED(CONFIG_ARM_SMMU_QCOM_DEBUG)) {
+ ret = qcom_tbu_probe(pdev);
+ if (ret)
+ return ret;
+ }
+
+ if (dev->pm_domain) {
+ pm_runtime_set_active(dev);
+ pm_runtime_enable(dev);
+ }
+
+ return 0;
+}
+
+static const struct of_device_id qcom_smmu_tbu_of_match[] = {
+ { .compatible = "qcom,sc7280-tbu" },
+ { .compatible = "qcom,sdm845-tbu" },
+ { }
+};
+
+static struct platform_driver qcom_smmu_tbu_driver = {
+ .driver = {
+ .name = "qcom_tbu",
+ .of_match_table = qcom_smmu_tbu_of_match,
+ },
+ .probe = qcom_smmu_tbu_probe,
+};
+
struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
{
const struct device_node *np = smmu->dev->of_node;
const struct of_device_id *match;
+ static u8 tbu_registered;
+
+ if (!tbu_registered++)
+ platform_driver_register(&qcom_smmu_tbu_driver);
#ifdef CONFIG_ACPI
if (np == NULL) {