diff options
Diffstat (limited to 'drivers/iommu/io-pgtable-arm.c')
| -rw-r--r-- | drivers/iommu/io-pgtable-arm.c | 1205 |
1 files changed, 636 insertions, 569 deletions
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 237cacd4a62b..e6626004b323 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * CPU-agnostic ARM page table allocator. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * * Copyright (C) 2014 ARM Limited * * Author: Will Deacon <will.deacon@arm.com> @@ -22,8 +11,7 @@ #include <linux/atomic.h> #include <linux/bitops.h> -#include <linux/iommu.h> -#include <linux/kernel.h> +#include <linux/io-pgtable.h> #include <linux/sizes.h> #include <linux/slab.h> #include <linux/types.h> @@ -31,7 +19,8 @@ #include <asm/barrier.h> -#include "io-pgtable.h" +#include "io-pgtable-arm.h" +#include "iommu-pages.h" #define ARM_LPAE_MAX_ADDR_BITS 52 #define ARM_LPAE_S2_MAX_CONCAT_PAGES 16 @@ -45,39 +34,34 @@ io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) /* - * For consistency with the architecture, we always consider - * ARM_LPAE_MAX_LEVELS levels, with the walk starting at level n >=0 - */ -#define ARM_LPAE_START_LVL(d) (ARM_LPAE_MAX_LEVELS - (d)->levels) - -/* * Calculate the right shift amount to get to the portion describing level l * in a virtual address mapped by the pagetable in d. */ #define ARM_LPAE_LVL_SHIFT(l,d) \ - ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \ - * (d)->bits_per_level) + (d)->pg_shift) + (((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) + \ + ilog2(sizeof(arm_lpae_iopte))) -#define ARM_LPAE_GRANULE(d) (1UL << (d)->pg_shift) +#define ARM_LPAE_GRANULE(d) \ + (sizeof(arm_lpae_iopte) << (d)->bits_per_level) +#define ARM_LPAE_PGD_SIZE(d) \ + (sizeof(arm_lpae_iopte) << (d)->pgd_bits) -#define ARM_LPAE_PAGES_PER_PGD(d) \ - DIV_ROUND_UP((d)->pgd_size, ARM_LPAE_GRANULE(d)) +#define ARM_LPAE_PTES_PER_TABLE(d) \ + (ARM_LPAE_GRANULE(d) >> ilog2(sizeof(arm_lpae_iopte))) /* * Calculate the index at level l used to map virtual address a using the * pagetable in d. */ #define ARM_LPAE_PGD_IDX(l,d) \ - ((l) == ARM_LPAE_START_LVL(d) ? ilog2(ARM_LPAE_PAGES_PER_PGD(d)) : 0) + ((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0) #define ARM_LPAE_LVL_IDX(a,l,d) \ (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \ ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1)) /* Calculate the block/page mapping size at level l for pagetable in d. */ -#define ARM_LPAE_BLOCK_SIZE(l,d) \ - (1ULL << (ilog2(sizeof(arm_lpae_iopte)) + \ - ((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level))) +#define ARM_LPAE_BLOCK_SIZE(l,d) (1ULL << ARM_LPAE_LVL_SHIFT(l,d)) /* Page table bits */ #define ARM_LPAE_PTE_TYPE_SHIFT 0 @@ -91,6 +75,7 @@ #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) +#define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) @@ -98,17 +83,16 @@ #define ARM_LPAE_PTE_NS (((arm_lpae_iopte)1) << 5) #define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0) -#define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) -/* Ignore the contiguous bit for block splitting */ -#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) -#define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ - ARM_LPAE_PTE_ATTR_HI_MASK) /* Software bit for solving coherency races */ #define ARM_LPAE_PTE_SW_SYNC (((arm_lpae_iopte)1) << 55) /* Stage-1 PTE */ #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) -#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) +#define ARM_LPAE_PTE_AP_RDONLY_BIT 7 +#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)1) << \ + ARM_LPAE_PTE_AP_RDONLY_BIT) +#define ARM_LPAE_PTE_AP_WR_CLEAN_MASK (ARM_LPAE_PTE_AP_RDONLY | \ + ARM_LPAE_PTE_DBM) #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) @@ -116,89 +100,88 @@ #define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6) #define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6) #define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6) +/* + * For !FWB these code to: + * 1111 = Normal outer write back cachable / Inner Write Back Cachable + * Permit S1 to override + * 0101 = Normal Non-cachable / Inner Non-cachable + * 0001 = Device / Device-nGnRE + * For S2FWB these code: + * 0110 Force Normal Write Back + * 0101 Normal* is forced Normal-NC, Device unchanged + * 0001 Force Device-nGnRE + */ +#define ARM_LPAE_PTE_MEMATTR_FWB_WB (((arm_lpae_iopte)0x6) << 2) #define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2) #define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2) #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) /* Register bits */ -#define ARM_32_LPAE_TCR_EAE (1 << 31) -#define ARM_64_LPAE_S2_TCR_RES1 (1 << 31) - -#define ARM_LPAE_TCR_EPD1 (1 << 23) - -#define ARM_LPAE_TCR_TG0_4K (0 << 14) -#define ARM_LPAE_TCR_TG0_64K (1 << 14) -#define ARM_LPAE_TCR_TG0_16K (2 << 14) - -#define ARM_LPAE_TCR_SH0_SHIFT 12 -#define ARM_LPAE_TCR_SH0_MASK 0x3 -#define ARM_LPAE_TCR_SH_NS 0 -#define ARM_LPAE_TCR_SH_OS 2 -#define ARM_LPAE_TCR_SH_IS 3 - -#define ARM_LPAE_TCR_ORGN0_SHIFT 10 -#define ARM_LPAE_TCR_IRGN0_SHIFT 8 -#define ARM_LPAE_TCR_RGN_MASK 0x3 -#define ARM_LPAE_TCR_RGN_NC 0 -#define ARM_LPAE_TCR_RGN_WBWA 1 -#define ARM_LPAE_TCR_RGN_WT 2 -#define ARM_LPAE_TCR_RGN_WB 3 - -#define ARM_LPAE_TCR_SL0_SHIFT 6 -#define ARM_LPAE_TCR_SL0_MASK 0x3 +#define ARM_LPAE_VTCR_SL0_MASK 0x3 #define ARM_LPAE_TCR_T0SZ_SHIFT 0 -#define ARM_LPAE_TCR_SZ_MASK 0xf - -#define ARM_LPAE_TCR_PS_SHIFT 16 -#define ARM_LPAE_TCR_PS_MASK 0x7 - -#define ARM_LPAE_TCR_IPS_SHIFT 32 -#define ARM_LPAE_TCR_IPS_MASK 0x7 -#define ARM_LPAE_TCR_PS_32_BIT 0x0ULL -#define ARM_LPAE_TCR_PS_36_BIT 0x1ULL -#define ARM_LPAE_TCR_PS_40_BIT 0x2ULL -#define ARM_LPAE_TCR_PS_42_BIT 0x3ULL -#define ARM_LPAE_TCR_PS_44_BIT 0x4ULL -#define ARM_LPAE_TCR_PS_48_BIT 0x5ULL -#define ARM_LPAE_TCR_PS_52_BIT 0x6ULL +#define ARM_LPAE_VTCR_PS_SHIFT 16 +#define ARM_LPAE_VTCR_PS_MASK 0x7 #define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3) #define ARM_LPAE_MAIR_ATTR_MASK 0xff #define ARM_LPAE_MAIR_ATTR_DEVICE 0x04 #define ARM_LPAE_MAIR_ATTR_NC 0x44 +#define ARM_LPAE_MAIR_ATTR_INC_OWBRWA 0xf4 #define ARM_LPAE_MAIR_ATTR_WBRWA 0xff #define ARM_LPAE_MAIR_ATTR_IDX_NC 0 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2 +#define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE 3 + +#define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0) +#define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2) +#define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4) + +#define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL +#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL /* IOPTE accessors */ #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d)) -#define iopte_type(pte,l) \ +#define iopte_type(pte) \ (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK) -#define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK) +#define iopte_writeable_dirty(pte) \ + (((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM) -#define iopte_leaf(pte,l) \ - (l == (ARM_LPAE_MAX_LEVELS - 1) ? \ - (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) : \ - (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK)) +#define iopte_set_writeable_clean(ptep) \ + set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)(ptep)) struct arm_lpae_io_pgtable { struct io_pgtable iop; - int levels; - size_t pgd_size; - unsigned long pg_shift; - unsigned long bits_per_level; + int pgd_bits; + int start_level; + int bits_per_level; void *pgd; }; typedef u64 arm_lpae_iopte; +static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl, + enum io_pgtable_fmt fmt) +{ + if (lvl == (ARM_LPAE_MAX_LEVELS - 1) && fmt != ARM_MALI_LPAE) + return iopte_type(pte) == ARM_LPAE_PTE_TYPE_PAGE; + + return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK; +} + +static inline bool iopte_table(arm_lpae_iopte pte, int lvl) +{ + if (lvl == (ARM_LPAE_MAX_LEVELS - 1)) + return false; + return iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE; +} + static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr, struct arm_lpae_io_pgtable *data) { @@ -213,14 +196,52 @@ static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte, { u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK; - if (data->pg_shift < 16) + if (ARM_LPAE_GRANULE(data) < SZ_64K) return paddr; /* Rotate the packed high-order bits back to the top */ return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4); } -static bool selftest_running = false; +/* + * Convert an index returned by ARM_LPAE_PGD_IDX(), which can point into + * a concatenated PGD, into the maximum number of entries that can be + * mapped in the same table page. + */ +static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data) +{ + int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data); + + return ptes_per_table - (i & (ptes_per_table - 1)); +} + +/* + * Check if concatenated PGDs are mandatory according to Arm DDI0487 (K.a) + * 1) R_DXBSH: For 16KB, and 48-bit input size, use level 1 instead of 0. + * 2) R_SRKBC: After de-ciphering the table for PA size and valid initial lookup + * a) 40 bits PA size with 4K: use level 1 instead of level 0 (2 tables for ias = oas) + * b) 40 bits PA size with 16K: use level 2 instead of level 1 (16 tables for ias = oas) + * c) 42 bits PA size with 4K: use level 1 instead of level 0 (8 tables for ias = oas) + * d) 48 bits PA size with 16K: use level 1 instead of level 0 (2 tables for ias = oas) + */ +static inline bool arm_lpae_concat_mandatory(struct io_pgtable_cfg *cfg, + struct arm_lpae_io_pgtable *data) +{ + unsigned int ias = cfg->ias; + unsigned int oas = cfg->oas; + + /* Covers 1 and 2.d */ + if ((ARM_LPAE_GRANULE(data) == SZ_16K) && (data->start_level == 0)) + return (oas == 48) || (ias == 48); + + /* Covers 2.a and 2.c */ + if ((ARM_LPAE_GRANULE(data) == SZ_4K) && (data->start_level == 0)) + return (oas == 40) || (oas == 42); + + /* Case 2.b */ + return (ARM_LPAE_GRANULE(data) == SZ_16K) && + (data->start_level == 1) && (oas == 40); +} static dma_addr_t __arm_lpae_dma_addr(void *pages) { @@ -228,22 +249,29 @@ static dma_addr_t __arm_lpae_dma_addr(void *pages) } static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, - struct io_pgtable_cfg *cfg) + struct io_pgtable_cfg *cfg, + void *cookie) { struct device *dev = cfg->iommu_dev; - int order = get_order(size); - struct page *p; + size_t alloc_size; dma_addr_t dma; void *pages; - VM_BUG_ON((gfp & __GFP_HIGHMEM)); - p = alloc_pages_node(dev ? dev_to_node(dev) : NUMA_NO_NODE, - gfp | __GFP_ZERO, order); - if (!p) + /* + * For very small starting-level translation tables the HW requires a + * minimum alignment of at least 64 to cover all cases. + */ + alloc_size = max(size, 64); + if (cfg->alloc) + pages = cfg->alloc(cookie, alloc_size, gfp); + else + pages = iommu_alloc_pages_node_sz(dev_to_node(dev), gfp, + alloc_size); + + if (!pages) return NULL; - pages = page_address(p); - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { + if (!cfg->coherent_walk) { dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -261,96 +289,113 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, out_unmap: dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n"); dma_unmap_single(dev, dma, size, DMA_TO_DEVICE); + out_free: - __free_pages(p, order); + if (cfg->free) + cfg->free(cookie, pages, size); + else + iommu_free_pages(pages); + return NULL; } static void __arm_lpae_free_pages(void *pages, size_t size, - struct io_pgtable_cfg *cfg) + struct io_pgtable_cfg *cfg, + void *cookie) { - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), size, DMA_TO_DEVICE); - free_pages((unsigned long)pages, get_order(size)); + + if (cfg->free) + cfg->free(cookie, pages, size); + else + iommu_free_pages(pages); } -static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, +static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries, struct io_pgtable_cfg *cfg) { dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep), - sizeof(*ptep), DMA_TO_DEVICE); + sizeof(*ptep) * num_entries, DMA_TO_DEVICE); } -static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, - struct io_pgtable_cfg *cfg) +static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries) { - *ptep = pte; + for (int i = 0; i < num_entries; i++) + ptep[i] = 0; - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) - __arm_lpae_sync_pte(ptep, cfg); + if (!cfg->coherent_walk && num_entries) + __arm_lpae_sync_pte(ptep, num_entries, cfg); } static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, - unsigned long iova, size_t size, int lvl, - arm_lpae_iopte *ptep); + struct iommu_iotlb_gather *gather, + unsigned long iova, size_t size, size_t pgcount, + int lvl, arm_lpae_iopte *ptep); static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, phys_addr_t paddr, arm_lpae_iopte prot, - int lvl, arm_lpae_iopte *ptep) + int lvl, int num_entries, arm_lpae_iopte *ptep) { arm_lpae_iopte pte = prot; + struct io_pgtable_cfg *cfg = &data->iop.cfg; + size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data); + int i; - if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_LPAE_PTE_NS; - - if (lvl == ARM_LPAE_MAX_LEVELS - 1) + if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1) pte |= ARM_LPAE_PTE_TYPE_PAGE; else pte |= ARM_LPAE_PTE_TYPE_BLOCK; - pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; - pte |= paddr_to_iopte(paddr, data); + for (i = 0; i < num_entries; i++) + ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data); - __arm_lpae_set_pte(ptep, pte, &data->iop.cfg); + if (!cfg->coherent_walk) + __arm_lpae_sync_pte(ptep, num_entries, cfg); } static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, unsigned long iova, phys_addr_t paddr, - arm_lpae_iopte prot, int lvl, + arm_lpae_iopte prot, int lvl, int num_entries, arm_lpae_iopte *ptep) { - arm_lpae_iopte pte = *ptep; - - if (iopte_leaf(pte, lvl)) { - /* We require an unmap first */ - WARN_ON(!selftest_running); - return -EEXIST; - } else if (iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_TABLE) { - /* - * We need to unmap and free the old table before - * overwriting it with a block entry. - */ - arm_lpae_iopte *tblp; - size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data); - - tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data); - if (WARN_ON(__arm_lpae_unmap(data, iova, sz, lvl, tblp) != sz)) - return -EINVAL; - } + int i; + + for (i = 0; i < num_entries; i++) + if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) { + /* We require an unmap first */ + WARN_ON(!(data->iop.cfg.quirks & IO_PGTABLE_QUIRK_NO_WARN)); + return -EEXIST; + } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) { + /* + * We need to unmap and free the old table before + * overwriting it with a block entry. + */ + arm_lpae_iopte *tblp; + size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data); + + tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data); + if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1, + lvl, tblp) != sz) { + WARN_ON(1); + return -EINVAL; + } + } - __arm_lpae_init_pte(data, paddr, prot, lvl, ptep); + __arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep); return 0; } static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, arm_lpae_iopte *ptep, arm_lpae_iopte curr, - struct io_pgtable_cfg *cfg) + struct arm_lpae_io_pgtable *data) { arm_lpae_iopte old, new; + struct io_pgtable_cfg *cfg = &data->iop.cfg; - new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE; + new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE; if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) new |= ARM_LPAE_PTE_NSTABLE; @@ -363,12 +408,11 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, old = cmpxchg64_relaxed(ptep, curr, new); - if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) || - (old & ARM_LPAE_PTE_SW_SYNC)) + if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC)) return old; /* Even if it's not ours, there's no point waiting; just kick it */ - __arm_lpae_sync_pte(ptep, cfg); + __arm_lpae_sync_pte(ptep, 1, cfg); if (old == curr) WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC); @@ -376,20 +420,30 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, } static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, - phys_addr_t paddr, size_t size, arm_lpae_iopte prot, - int lvl, arm_lpae_iopte *ptep) + phys_addr_t paddr, size_t size, size_t pgcount, + arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep, + gfp_t gfp, size_t *mapped) { arm_lpae_iopte *cptep, pte; size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); size_t tblsz = ARM_LPAE_GRANULE(data); struct io_pgtable_cfg *cfg = &data->iop.cfg; + int ret = 0, num_entries, max_entries, map_idx_start; /* Find our entry at the current level */ - ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data); + ptep += map_idx_start; /* If we can install a leaf entry at this level, then do so */ - if (size == block_size && (size & cfg->pgsize_bitmap)) - return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep); + if (size == block_size) { + max_entries = arm_lpae_max_entries(map_idx_start, data); + num_entries = min_t(int, pgcount, max_entries); + ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep); + if (!ret) + *mapped += num_entries * size; + + return ret; + } /* We can't allocate tables at the final level */ if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1)) @@ -398,28 +452,28 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, /* Grab a pointer to the next level */ pte = READ_ONCE(*ptep); if (!pte) { - cptep = __arm_lpae_alloc_pages(tblsz, GFP_ATOMIC, cfg); + cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg, data->iop.cookie); if (!cptep) return -ENOMEM; - pte = arm_lpae_install_table(cptep, ptep, 0, cfg); + pte = arm_lpae_install_table(cptep, ptep, 0, data); if (pte) - __arm_lpae_free_pages(cptep, tblsz, cfg); - } else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) && - !(pte & ARM_LPAE_PTE_SW_SYNC)) { - __arm_lpae_sync_pte(ptep, cfg); + __arm_lpae_free_pages(cptep, tblsz, cfg, data->iop.cookie); + } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) { + __arm_lpae_sync_pte(ptep, 1, cfg); } - if (pte && !iopte_leaf(pte, lvl)) { + if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) { cptep = iopte_deref(pte, data); } else if (pte) { /* We require an unmap first */ - WARN_ON(!selftest_running); + WARN_ON(!(cfg->quirks & IO_PGTABLE_QUIRK_NO_WARN)); return -EEXIST; } /* Rinse, repeat */ - return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep); + return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1, + cptep, gfp, mapped); } static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, @@ -430,57 +484,93 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, if (data->iop.fmt == ARM_64_LPAE_S1 || data->iop.fmt == ARM_32_LPAE_S1) { pte = ARM_LPAE_PTE_nG; - if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) pte |= ARM_LPAE_PTE_AP_RDONLY; - + else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) + pte |= ARM_LPAE_PTE_DBM; if (!(prot & IOMMU_PRIV)) pte |= ARM_LPAE_PTE_AP_UNPRIV; - - if (prot & IOMMU_MMIO) - pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV - << ARM_LPAE_PTE_ATTRINDX_SHIFT); - else if (prot & IOMMU_CACHE) - pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE - << ARM_LPAE_PTE_ATTRINDX_SHIFT); } else { pte = ARM_LPAE_PTE_HAP_FAULT; if (prot & IOMMU_READ) pte |= ARM_LPAE_PTE_HAP_READ; if (prot & IOMMU_WRITE) pte |= ARM_LPAE_PTE_HAP_WRITE; - if (prot & IOMMU_MMIO) + } + + /* + * Note that this logic is structured to accommodate Mali LPAE + * having stage-1-like attributes but stage-2-like permissions. + */ + if (data->iop.fmt == ARM_64_LPAE_S2 || + data->iop.fmt == ARM_32_LPAE_S2) { + if (prot & IOMMU_MMIO) { pte |= ARM_LPAE_PTE_MEMATTR_DEV; - else if (prot & IOMMU_CACHE) - pte |= ARM_LPAE_PTE_MEMATTR_OIWB; - else + } else if (prot & IOMMU_CACHE) { + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_S2FWB) + pte |= ARM_LPAE_PTE_MEMATTR_FWB_WB; + else + pte |= ARM_LPAE_PTE_MEMATTR_OIWB; + } else { pte |= ARM_LPAE_PTE_MEMATTR_NC; + } + } else { + if (prot & IOMMU_MMIO) + pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV + << ARM_LPAE_PTE_ATTRINDX_SHIFT); + else if (prot & IOMMU_CACHE) + pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE + << ARM_LPAE_PTE_ATTRINDX_SHIFT); } + /* + * Also Mali has its own notions of shareability wherein its Inner + * domain covers the cores within the GPU, and its Outer domain is + * "outside the GPU" (i.e. either the Inner or System domain in CPU + * terms, depending on coherency). + */ + if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE) + pte |= ARM_LPAE_PTE_SH_IS; + else + pte |= ARM_LPAE_PTE_SH_OS; + if (prot & IOMMU_NOEXEC) pte |= ARM_LPAE_PTE_XN; + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) + pte |= ARM_LPAE_PTE_NS; + + if (data->iop.fmt != ARM_MALI_LPAE) + pte |= ARM_LPAE_PTE_AF; + return pte; } -static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, - phys_addr_t paddr, size_t size, int iommu_prot) +static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int iommu_prot, gfp_t gfp, size_t *mapped) { struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; arm_lpae_iopte *ptep = data->pgd; - int ret, lvl = ARM_LPAE_START_LVL(data); + int ret, lvl = data->start_level; arm_lpae_iopte prot; + long iaext = (s64)iova >> cfg->ias; - /* If no access, then nothing to do */ - if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) - return 0; + if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize)) + return -EINVAL; - if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) || - paddr >= (1ULL << data->iop.cfg.oas))) + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext || paddr >> cfg->oas)) return -ERANGE; + if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) + return -EINVAL; + prot = arm_lpae_prot_to_pte(data, iommu_prot); - ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); + ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl, + ptep, gfp, mapped); /* * Synchronise all PTE updates for the new mapping before there's * a chance for anything to kick off a table walk for the new iova. @@ -496,8 +586,8 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, arm_lpae_iopte *start, *end; unsigned long table_size; - if (lvl == ARM_LPAE_START_LVL(data)) - table_size = data->pgd_size; + if (lvl == data->start_level) + table_size = ARM_LPAE_PGD_SIZE(data); else table_size = ARM_LPAE_GRANULE(data); @@ -512,177 +602,272 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, while (ptep != end) { arm_lpae_iopte pte = *ptep++; - if (!pte || iopte_leaf(pte, lvl)) + if (!pte || iopte_leaf(pte, lvl, data->iop.fmt)) continue; __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); } - __arm_lpae_free_pages(start, table_size, &data->iop.cfg); + __arm_lpae_free_pages(start, table_size, &data->iop.cfg, data->iop.cookie); } static void arm_lpae_free_pgtable(struct io_pgtable *iop) { struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop); - __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data), data->pgd); + __arm_lpae_free_pgtable(data, data->start_level, data->pgd); kfree(data); } -static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, - unsigned long iova, size_t size, - arm_lpae_iopte blk_pte, int lvl, - arm_lpae_iopte *ptep) -{ - struct io_pgtable_cfg *cfg = &data->iop.cfg; - arm_lpae_iopte pte, *tablep; - phys_addr_t blk_paddr; - size_t tablesz = ARM_LPAE_GRANULE(data); - size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data); - int i, unmap_idx = -1; - - if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) - return 0; - - tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg); - if (!tablep) - return 0; /* Bytes unmapped */ - - if (size == split_sz) - unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data); - - blk_paddr = iopte_to_paddr(blk_pte, data); - pte = iopte_prot(blk_pte); - - for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) { - /* Unmap! */ - if (i == unmap_idx) - continue; - - __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]); - } - - pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg); - if (pte != blk_pte) { - __arm_lpae_free_pages(tablep, tablesz, cfg); - /* - * We may race against someone unmapping another part of this - * block, but anything else is invalid. We can't misinterpret - * a page entry here since we're never at the last level. - */ - if (iopte_type(pte, lvl - 1) != ARM_LPAE_PTE_TYPE_TABLE) - return 0; - - tablep = iopte_deref(pte, data); - } else if (unmap_idx >= 0) { - io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); - io_pgtable_tlb_sync(&data->iop); - return size; - } - - return __arm_lpae_unmap(data, iova, size, lvl, tablep); -} - static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, - unsigned long iova, size_t size, int lvl, - arm_lpae_iopte *ptep) + struct iommu_iotlb_gather *gather, + unsigned long iova, size_t size, size_t pgcount, + int lvl, arm_lpae_iopte *ptep) { arm_lpae_iopte pte; struct io_pgtable *iop = &data->iop; + int i = 0, num_entries, max_entries, unmap_idx_start; /* Something went horribly wrong and we ran out of page table */ if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) return 0; - ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data); + ptep += unmap_idx_start; pte = READ_ONCE(*ptep); - if (WARN_ON(!pte)) - return 0; + if (!pte) { + WARN_ON(!(data->iop.cfg.quirks & IO_PGTABLE_QUIRK_NO_WARN)); + return -ENOENT; + } /* If the size matches this level, we're in the right place */ if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { - __arm_lpae_set_pte(ptep, 0, &iop->cfg); - - if (!iopte_leaf(pte, lvl)) { - /* Also flush any partial walks */ - io_pgtable_tlb_add_flush(iop, iova, size, - ARM_LPAE_GRANULE(data), false); - io_pgtable_tlb_sync(iop); - ptep = iopte_deref(pte, data); - __arm_lpae_free_pgtable(data, lvl + 1, ptep); - } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) { - /* - * Order the PTE update against queueing the IOVA, to - * guarantee that a flush callback from a different CPU - * has observed it before the TLBIALL can be issued. - */ - smp_wmb(); - } else { - io_pgtable_tlb_add_flush(iop, iova, size, size, true); + max_entries = arm_lpae_max_entries(unmap_idx_start, data); + num_entries = min_t(int, pgcount, max_entries); + + /* Find and handle non-leaf entries */ + for (i = 0; i < num_entries; i++) { + pte = READ_ONCE(ptep[i]); + if (!pte) { + WARN_ON(!(data->iop.cfg.quirks & IO_PGTABLE_QUIRK_NO_WARN)); + break; + } + + if (!iopte_leaf(pte, lvl, iop->fmt)) { + __arm_lpae_clear_pte(&ptep[i], &iop->cfg, 1); + + /* Also flush any partial walks */ + io_pgtable_tlb_flush_walk(iop, iova + i * size, size, + ARM_LPAE_GRANULE(data)); + __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); + } } - return size; - } else if (iopte_leaf(pte, lvl)) { - /* - * Insert a table at the next level to map the old region, - * minus the part we want to unmap - */ - return arm_lpae_split_blk_unmap(data, iova, size, pte, - lvl + 1, ptep); + /* Clear the remaining entries */ + __arm_lpae_clear_pte(ptep, &iop->cfg, i); + + if (gather && !iommu_iotlb_gather_queued(gather)) + for (int j = 0; j < i; j++) + io_pgtable_tlb_add_page(iop, gather, iova + j * size, size); + + return i * size; + } else if (iopte_leaf(pte, lvl, iop->fmt)) { + WARN_ONCE(true, "Unmap of a partial large IOPTE is not allowed"); + return 0; } /* Keep on walkin' */ ptep = iopte_deref(pte, data); - return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep); + return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl + 1, ptep); } -static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, - size_t size) +static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; arm_lpae_iopte *ptep = data->pgd; - int lvl = ARM_LPAE_START_LVL(data); + long iaext = (s64)iova >> cfg->ias; - if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) + if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount)) return 0; - return __arm_lpae_unmap(data, iova, size, lvl, ptep); + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext)) + return 0; + + return __arm_lpae_unmap(data, gather, iova, pgsize, pgcount, + data->start_level, ptep); +} + +struct io_pgtable_walk_data { + struct io_pgtable *iop; + void *data; + int (*visit)(struct io_pgtable_walk_data *walk_data, int lvl, + arm_lpae_iopte *ptep, size_t size); + unsigned long flags; + u64 addr; + const u64 end; +}; + +static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl); + +struct iova_to_phys_data { + arm_lpae_iopte pte; + int lvl; +}; + +static int visit_iova_to_phys(struct io_pgtable_walk_data *walk_data, int lvl, + arm_lpae_iopte *ptep, size_t size) +{ + struct iova_to_phys_data *data = walk_data->data; + data->pte = *ptep; + data->lvl = lvl; + return 0; } static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) { struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); - arm_lpae_iopte pte, *ptep = data->pgd; - int lvl = ARM_LPAE_START_LVL(data); + struct iova_to_phys_data d; + struct io_pgtable_walk_data walk_data = { + .data = &d, + .visit = visit_iova_to_phys, + .addr = iova, + .end = iova + 1, + }; + int ret; + + ret = __arm_lpae_iopte_walk(data, &walk_data, data->pgd, data->start_level); + if (ret) + return 0; + + iova &= (ARM_LPAE_BLOCK_SIZE(d.lvl, data) - 1); + return iopte_to_paddr(d.pte, data) | iova; +} + +static int visit_pgtable_walk(struct io_pgtable_walk_data *walk_data, int lvl, + arm_lpae_iopte *ptep, size_t size) +{ + struct arm_lpae_io_pgtable_walk_data *data = walk_data->data; + data->ptes[lvl] = *ptep; + return 0; +} + +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova, + void *wd) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_walk_data walk_data = { + .data = wd, + .visit = visit_pgtable_walk, + .addr = iova, + .end = iova + 1, + }; + + return __arm_lpae_iopte_walk(data, &walk_data, data->pgd, data->start_level); +} + +static int io_pgtable_visit(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, int lvl) +{ + struct io_pgtable *iop = &data->iop; + arm_lpae_iopte pte = READ_ONCE(*ptep); + + size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data); + int ret = walk_data->visit(walk_data, lvl, ptep, size); + if (ret) + return ret; + + if (iopte_leaf(pte, lvl, iop->fmt)) { + walk_data->addr += size; + return 0; + } + + if (!iopte_table(pte, lvl)) { + return -EINVAL; + } - do { - /* Valid IOPTE pointer? */ - if (!ptep) - return 0; + ptep = iopte_deref(pte, data); + return __arm_lpae_iopte_walk(data, walk_data, ptep, lvl + 1); +} - /* Grab the IOPTE we're interested in */ - ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); - pte = READ_ONCE(*ptep); +static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl) +{ + u32 idx; + int max_entries, ret; - /* Valid entry? */ - if (!pte) - return 0; + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return -EINVAL; - /* Leaf entry? */ - if (iopte_leaf(pte,lvl)) - goto found_translation; + if (lvl == data->start_level) + max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); + else + max_entries = ARM_LPAE_PTES_PER_TABLE(data); - /* Take it to the next level */ - ptep = iopte_deref(pte, data); - } while (++lvl < ARM_LPAE_MAX_LEVELS); + for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data); + (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) { + ret = io_pgtable_visit(data, walk_data, ptep + idx, lvl); + if (ret) + return ret; + } - /* Ran out of page tables to walk */ return 0; +} + +static int visit_dirty(struct io_pgtable_walk_data *walk_data, int lvl, + arm_lpae_iopte *ptep, size_t size) +{ + struct iommu_dirty_bitmap *dirty = walk_data->data; -found_translation: - iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1); - return iopte_to_paddr(pte, data) | iova; + if (!iopte_leaf(*ptep, lvl, walk_data->iop->fmt)) + return 0; + + if (iopte_writeable_dirty(*ptep)) { + iommu_dirty_bitmap_record(dirty, walk_data->addr, size); + if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR)) + iopte_set_writeable_clean(ptep); + } + + return 0; +} + +static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + struct io_pgtable_walk_data walk_data = { + .iop = &data->iop, + .data = dirty, + .visit = visit_dirty, + .flags = flags, + .addr = iova, + .end = iova + size, + }; + arm_lpae_iopte *ptep = data->pgd; + int lvl = data->start_level; + + if (WARN_ON(!size)) + return -EINVAL; + if (WARN_ON((iova + size - 1) & ~(BIT(cfg->ias) - 1))) + return -EINVAL; + if (data->iop.fmt != ARM_64_LPAE_S1) + return -EINVAL; + + return __arm_lpae_iopte_walk(data, &walk_data, ptep, lvl); } static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) @@ -731,8 +916,8 @@ static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) static struct arm_lpae_io_pgtable * arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) { - unsigned long va_bits, pgd_bits; struct arm_lpae_io_pgtable *data; + int levels, va_bits, pg_shift; arm_lpae_restrict_pgsizes(cfg); @@ -745,29 +930,26 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS) return NULL; - if (!selftest_running && cfg->iommu_dev->dma_pfn_offset) { - dev_err(cfg->iommu_dev, "Cannot accommodate DMA offset for IOMMU page tables\n"); - return NULL; - } - data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) return NULL; - data->pg_shift = __ffs(cfg->pgsize_bitmap); - data->bits_per_level = data->pg_shift - ilog2(sizeof(arm_lpae_iopte)); + pg_shift = __ffs(cfg->pgsize_bitmap); + data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte)); - va_bits = cfg->ias - data->pg_shift; - data->levels = DIV_ROUND_UP(va_bits, data->bits_per_level); + va_bits = cfg->ias - pg_shift; + levels = DIV_ROUND_UP(va_bits, data->bits_per_level); + data->start_level = ARM_LPAE_MAX_LEVELS - levels; /* Calculate the actual size of our pgd (without concatenation) */ - pgd_bits = va_bits - (data->bits_per_level * (data->levels - 1)); - data->pgd_size = 1UL << (pgd_bits + ilog2(sizeof(arm_lpae_iopte))); + data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1)); data->iop.ops = (struct io_pgtable_ops) { - .map = arm_lpae_map, - .unmap = arm_lpae_unmap, + .map_pages = arm_lpae_map_pages, + .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, + .read_and_clear_dirty = arm_lpae_read_and_clear_dirty, + .pgtable_walk = arm_lpae_pgtable_walk, }; return data; @@ -778,9 +960,14 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) { u64 reg; struct arm_lpae_io_pgtable *data; - - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA | - IO_PGTABLE_QUIRK_NON_STRICT)) + typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr; + bool tg1; + + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | + IO_PGTABLE_QUIRK_ARM_TTBR1 | + IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | + IO_PGTABLE_QUIRK_ARM_HD | + IO_PGTABLE_QUIRK_NO_WARN)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -788,53 +975,61 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) return NULL; /* TCR */ - reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + if (cfg->coherent_walk) { + tcr->sh = ARM_LPAE_TCR_SH_IS; + tcr->irgn = ARM_LPAE_TCR_RGN_WBWA; + tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA) + goto out_free_data; + } else { + tcr->sh = ARM_LPAE_TCR_SH_OS; + tcr->irgn = ARM_LPAE_TCR_RGN_NC; + if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) + tcr->orgn = ARM_LPAE_TCR_RGN_NC; + else + tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; + } + tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1; switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: - reg |= ARM_LPAE_TCR_TG0_4K; + tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K; break; case SZ_16K: - reg |= ARM_LPAE_TCR_TG0_16K; + tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K; break; case SZ_64K: - reg |= ARM_LPAE_TCR_TG0_64K; + tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K; break; } switch (cfg->oas) { case 32: - reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_32_BIT; break; case 36: - reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_36_BIT; break; case 40: - reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_40_BIT; break; case 42: - reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_42_BIT; break; case 44: - reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_44_BIT; break; case 48: - reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_48_BIT; break; case 52: - reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_52_BIT; break; default: goto out_free_data; } - reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; - - /* Disable speculative walks through TTBR1 */ - reg |= ARM_LPAE_TCR_EPD1; - cfg->arm_lpae_s1_cfg.tcr = reg; + tcr->tsz = 64ULL - cfg->ias; /* MAIRs */ reg = (ARM_LPAE_MAIR_ATTR_NC @@ -842,22 +1037,23 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) (ARM_LPAE_MAIR_ATTR_WBRWA << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | (ARM_LPAE_MAIR_ATTR_DEVICE - << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) | + (ARM_LPAE_MAIR_ATTR_INC_OWBRWA + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE)); - cfg->arm_lpae_s1_cfg.mair[0] = reg; - cfg->arm_lpae_s1_cfg.mair[1] = 0; + cfg->arm_lpae_s1_cfg.mair = reg; /* Looking good; allocate a pgd */ - data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg); + data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), + GFP_KERNEL, cfg, cookie); if (!data->pgd) goto out_free_data; /* Ensure the empty pgd is visible before any actual TTBR write */ wmb(); - /* TTBRs */ - cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd); - cfg->arm_lpae_s1_cfg.ttbr[1] = 0; + /* TTBR */ + cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd); return &data->iop; out_free_data: @@ -868,85 +1064,84 @@ out_free_data: static struct io_pgtable * arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) { - u64 reg, sl; + u64 sl; struct arm_lpae_io_pgtable *data; + typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr; - /* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA | - IO_PGTABLE_QUIRK_NON_STRICT)) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_S2FWB | + IO_PGTABLE_QUIRK_NO_WARN)) return NULL; data = arm_lpae_alloc_pgtable(cfg); if (!data) return NULL; - /* - * Concatenate PGDs at level 1 if possible in order to reduce - * the depth of the stage-2 walk. - */ - if (data->levels == ARM_LPAE_MAX_LEVELS) { - unsigned long pgd_pages; - - pgd_pages = data->pgd_size >> ilog2(sizeof(arm_lpae_iopte)); - if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) { - data->pgd_size = pgd_pages << data->pg_shift; - data->levels--; - } + if (arm_lpae_concat_mandatory(cfg, data)) { + if (WARN_ON((ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte)) > + ARM_LPAE_S2_MAX_CONCAT_PAGES)) + return NULL; + data->pgd_bits += data->bits_per_level; + data->start_level++; } /* VTCR */ - reg = ARM_64_LPAE_S2_TCR_RES1 | - (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + if (cfg->coherent_walk) { + vtcr->sh = ARM_LPAE_TCR_SH_IS; + vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA; + vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA; + } else { + vtcr->sh = ARM_LPAE_TCR_SH_OS; + vtcr->irgn = ARM_LPAE_TCR_RGN_NC; + vtcr->orgn = ARM_LPAE_TCR_RGN_NC; + } - sl = ARM_LPAE_START_LVL(data); + sl = data->start_level; switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: - reg |= ARM_LPAE_TCR_TG0_4K; + vtcr->tg = ARM_LPAE_TCR_TG0_4K; sl++; /* SL0 format is different for 4K granule size */ break; case SZ_16K: - reg |= ARM_LPAE_TCR_TG0_16K; + vtcr->tg = ARM_LPAE_TCR_TG0_16K; break; case SZ_64K: - reg |= ARM_LPAE_TCR_TG0_64K; + vtcr->tg = ARM_LPAE_TCR_TG0_64K; break; } switch (cfg->oas) { case 32: - reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_32_BIT; break; case 36: - reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_36_BIT; break; case 40: - reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_40_BIT; break; case 42: - reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_42_BIT; break; case 44: - reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_44_BIT; break; case 48: - reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_48_BIT; break; case 52: - reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_52_BIT; break; default: goto out_free_data; } - reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; - reg |= (~sl & ARM_LPAE_TCR_SL0_MASK) << ARM_LPAE_TCR_SL0_SHIFT; - cfg->arm_lpae_s2_cfg.vtcr = reg; + vtcr->tsz = 64ULL - cfg->ias; + vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK; /* Allocate pgd pages */ - data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg); + data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), + GFP_KERNEL, cfg, cookie); if (!data->pgd) goto out_free_data; @@ -965,236 +1160,108 @@ out_free_data: static struct io_pgtable * arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) { - struct io_pgtable *iop; - if (cfg->ias > 32 || cfg->oas > 40) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); - iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie); - if (iop) { - cfg->arm_lpae_s1_cfg.tcr |= ARM_32_LPAE_TCR_EAE; - cfg->arm_lpae_s1_cfg.tcr &= 0xffffffff; - } - - return iop; + return arm_64_lpae_alloc_pgtable_s1(cfg, cookie); } static struct io_pgtable * arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) { - struct io_pgtable *iop; - if (cfg->ias > 40 || cfg->oas > 40) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); - iop = arm_64_lpae_alloc_pgtable_s2(cfg, cookie); - if (iop) - cfg->arm_lpae_s2_cfg.vtcr &= 0xffffffff; + return arm_64_lpae_alloc_pgtable_s2(cfg, cookie); +} + +static struct io_pgtable * +arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct arm_lpae_io_pgtable *data; - return iop; + /* No quirks for Mali (hopefully) */ + if (cfg->quirks) + return NULL; + + if (cfg->ias > 48 || cfg->oas > 40) + return NULL; + + cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); + + data = arm_lpae_alloc_pgtable(cfg); + if (!data) + return NULL; + + /* Mali seems to need a full 4-level table regardless of IAS */ + if (data->start_level > 0) { + data->start_level = 0; + data->pgd_bits = 0; + } + /* + * MEMATTR: Mali has no actual notion of a non-cacheable type, so the + * best we can do is mimic the out-of-tree driver and hope that the + * "implementation-defined caching policy" is good enough. Similarly, + * we'll use it for the sake of a valid attribute for our 'device' + * index, although callers should never request that in practice. + */ + cfg->arm_mali_lpae_cfg.memattr = + (ARM_MALI_LPAE_MEMATTR_IMP_DEF + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) | + (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | + (ARM_MALI_LPAE_MEMATTR_IMP_DEF + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); + + data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL, + cfg, cookie); + if (!data->pgd) + goto out_free_data; + + /* Ensure the empty pgd is visible before TRANSTAB can be written */ + wmb(); + + cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) | + ARM_MALI_LPAE_TTBR_READ_INNER | + ARM_MALI_LPAE_TTBR_ADRMODE_TABLE; + if (cfg->coherent_walk) + cfg->arm_mali_lpae_cfg.transtab |= ARM_MALI_LPAE_TTBR_SHARE_OUTER; + + return &data->iop; + +out_free_data: + kfree(data); + return NULL; } struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { + .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, .alloc = arm_64_lpae_alloc_pgtable_s1, .free = arm_lpae_free_pgtable, }; struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = { + .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, .alloc = arm_64_lpae_alloc_pgtable_s2, .free = arm_lpae_free_pgtable, }; struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = { + .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, .alloc = arm_32_lpae_alloc_pgtable_s1, .free = arm_lpae_free_pgtable, }; struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = { + .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, .alloc = arm_32_lpae_alloc_pgtable_s2, .free = arm_lpae_free_pgtable, }; -#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST - -static struct io_pgtable_cfg *cfg_cookie; - -static void dummy_tlb_flush_all(void *cookie) -{ - WARN_ON(cookie != cfg_cookie); -} - -static void dummy_tlb_add_flush(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) -{ - WARN_ON(cookie != cfg_cookie); - WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); -} - -static void dummy_tlb_sync(void *cookie) -{ - WARN_ON(cookie != cfg_cookie); -} - -static const struct iommu_gather_ops dummy_tlb_ops __initconst = { - .tlb_flush_all = dummy_tlb_flush_all, - .tlb_add_flush = dummy_tlb_add_flush, - .tlb_sync = dummy_tlb_sync, +struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = { + .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, + .alloc = arm_mali_lpae_alloc_pgtable, + .free = arm_lpae_free_pgtable, }; - -static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) -{ - struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); - struct io_pgtable_cfg *cfg = &data->iop.cfg; - - pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n", - cfg->pgsize_bitmap, cfg->ias); - pr_err("data: %d levels, 0x%zx pgd_size, %lu pg_shift, %lu bits_per_level, pgd @ %p\n", - data->levels, data->pgd_size, data->pg_shift, - data->bits_per_level, data->pgd); -} - -#define __FAIL(ops, i) ({ \ - WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \ - arm_lpae_dump_ops(ops); \ - selftest_running = false; \ - -EFAULT; \ -}) - -static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) -{ - static const enum io_pgtable_fmt fmts[] = { - ARM_64_LPAE_S1, - ARM_64_LPAE_S2, - }; - - int i, j; - unsigned long iova; - size_t size; - struct io_pgtable_ops *ops; - - selftest_running = true; - - for (i = 0; i < ARRAY_SIZE(fmts); ++i) { - cfg_cookie = cfg; - ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg); - if (!ops) { - pr_err("selftest: failed to allocate io pgtable ops\n"); - return -ENOMEM; - } - - /* - * Initial sanity checks. - * Empty page tables shouldn't provide any translations. - */ - if (ops->iova_to_phys(ops, 42)) - return __FAIL(ops, i); - - if (ops->iova_to_phys(ops, SZ_1G + 42)) - return __FAIL(ops, i); - - if (ops->iova_to_phys(ops, SZ_2G + 42)) - return __FAIL(ops, i); - - /* - * Distinct mappings of different granule sizes. - */ - iova = 0; - for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { - size = 1UL << j; - - if (ops->map(ops, iova, iova, size, IOMMU_READ | - IOMMU_WRITE | - IOMMU_NOEXEC | - IOMMU_CACHE)) - return __FAIL(ops, i); - - /* Overlapping mappings */ - if (!ops->map(ops, iova, iova + size, size, - IOMMU_READ | IOMMU_NOEXEC)) - return __FAIL(ops, i); - - if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) - return __FAIL(ops, i); - - iova += SZ_1G; - } - - /* Partial unmap */ - size = 1UL << __ffs(cfg->pgsize_bitmap); - if (ops->unmap(ops, SZ_1G + size, size) != size) - return __FAIL(ops, i); - - /* Remap of partial unmap */ - if (ops->map(ops, SZ_1G + size, size, size, IOMMU_READ)) - return __FAIL(ops, i); - - if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42)) - return __FAIL(ops, i); - - /* Full unmap */ - iova = 0; - for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { - size = 1UL << j; - - if (ops->unmap(ops, iova, size) != size) - return __FAIL(ops, i); - - if (ops->iova_to_phys(ops, iova + 42)) - return __FAIL(ops, i); - - /* Remap full block */ - if (ops->map(ops, iova, iova, size, IOMMU_WRITE)) - return __FAIL(ops, i); - - if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) - return __FAIL(ops, i); - - iova += SZ_1G; - } - - free_io_pgtable_ops(ops); - } - - selftest_running = false; - return 0; -} - -static int __init arm_lpae_do_selftests(void) -{ - static const unsigned long pgsize[] = { - SZ_4K | SZ_2M | SZ_1G, - SZ_16K | SZ_32M, - SZ_64K | SZ_512M, - }; - - static const unsigned int ias[] = { - 32, 36, 40, 42, 44, 48, - }; - - int i, j, pass = 0, fail = 0; - struct io_pgtable_cfg cfg = { - .tlb = &dummy_tlb_ops, - .oas = 48, - .quirks = IO_PGTABLE_QUIRK_NO_DMA, - }; - - for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { - for (j = 0; j < ARRAY_SIZE(ias); ++j) { - cfg.pgsize_bitmap = pgsize[i]; - cfg.ias = ias[j]; - pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n", - pgsize[i], ias[j]); - if (arm_lpae_run_tests(&cfg)) - fail++; - else - pass++; - } - } - - pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail); - return fail ? -EFAULT : 0; -} -subsys_initcall(arm_lpae_do_selftests); -#endif |
