summaryrefslogtreecommitdiff
path: root/drivers/iommu/arm
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2024-10-30 21:20:54 -0300
committerJason Gunthorpe <jgg@nvidia.com>2024-11-12 14:11:03 -0400
commit67e4fe3985138325c9b21193be52266750616182 (patch)
treead950f381769eb131b082d649d716db2fa1a33c1 /drivers/iommu/arm
parent1e8be08d1c91d52a9b51d424db78ddbf88660bbb (diff)
iommu/arm-smmu-v3: Use S2FWB for NESTED domains
Force Write Back (FWB) changes how the S2 IOPTE's MemAttr field works. When S2FWB is supported and enabled the IOPTE will force cachable access to IOMMU_CACHE memory when nesting with a S1 and deny cachable access when !IOMMU_CACHE. When using a single stage of translation, a simple S2 domain, it doesn't change things for PCI devices as it is just a different encoding for the existing mapping of the IOMMU protection flags to cachability attributes. For non-PCI it also changes the combining rules when incoming transactions have inconsistent attributes. However, when used with a nested S1, FWB has the effect of preventing the guest from choosing a MemAttr in it's S1 that would cause ordinary DMA to bypass the cache. Consistent with KVM we wish to deny the guest the ability to become incoherent with cached memory the hypervisor believes is cachable so we don't have to flush it. Allow NESTED domains to be created if the SMMU has S2FWB support and use S2FWB for NESTING_PARENTS. This is an additional option to CANWBS. Link: https://patch.msgid.link/r/10-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Reviewed-by: Nicolin Chen <nicolinc@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com> Reviewed-by: Donald Dutile <ddutile@redhat.com> Tested-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/iommu/arm')
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c7
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c8
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h3
3 files changed, 15 insertions, 3 deletions
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
index 91247a2a2d2c..a1c8fcd4797c 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
@@ -220,9 +220,12 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
* Must support some way to prevent the VM from bypassing the cache
* because VFIO currently does not do any cache maintenance. canwbs
* indicates the device is fully coherent and no cache maintenance is
- * ever required, even for PCI No-Snoop.
+ * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make
+ * things non-coherent using the memattr, but No-Snoop behavior is not
+ * effected.
*/
- if (!arm_smmu_master_canwbs(master))
+ if (!arm_smmu_master_canwbs(master) &&
+ !(smmu->features & ARM_SMMU_FEAT_S2FWB))
return ERR_PTR(-EOPNOTSUPP);
vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 53f12b9d78ab..de598d66b5c2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1046,7 +1046,8 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
/* S2 translates */
if (cfg & BIT(1)) {
used_bits[1] |=
- cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
+ cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS |
+ STRTAB_STE_1_SHCFG);
used_bits[2] |=
cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
@@ -1654,6 +1655,8 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
FIELD_PREP(STRTAB_STE_1_EATS,
ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+ if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB)
+ target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB);
if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
STRTAB_STE_1_SHCFG_INCOMING));
@@ -2472,6 +2475,9 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
pgtbl_cfg.oas = smmu->oas;
fmt = ARM_64_LPAE_S2;
finalise_stage_fn = arm_smmu_domain_finalise_s2;
+ if ((smmu->features & ARM_SMMU_FEAT_S2FWB) &&
+ (flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
+ pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB;
break;
default:
return -EINVAL;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 3fabe187ea78..5a025d310dbe 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -58,6 +58,7 @@ struct arm_smmu_device;
#define IDR1_SIDSIZE GENMASK(5, 0)
#define ARM_SMMU_IDR3 0xc
+#define IDR3_FWB (1 << 8)
#define IDR3_RIL (1 << 10)
#define ARM_SMMU_IDR5 0x14
@@ -265,6 +266,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid)
#define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
#define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
+#define STRTAB_STE_1_S2FWB (1UL << 25)
#define STRTAB_STE_1_S1STALLD (1UL << 27)
#define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
@@ -740,6 +742,7 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20)
#define ARM_SMMU_FEAT_HA (1 << 21)
#define ARM_SMMU_FEAT_HD (1 << 22)
+#define ARM_SMMU_FEAT_S2FWB (1 << 23)
u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)