diff options
Diffstat (limited to 'arch/arc/mm')
| -rw-r--r-- | arch/arc/mm/Makefile | 8 | ||||
| -rw-r--r-- | arch/arc/mm/cache.c | 1094 | ||||
| -rw-r--r-- | arch/arc/mm/cache_arc700.c | 767 | ||||
| -rw-r--r-- | arch/arc/mm/dma.c | 147 | ||||
| -rw-r--r-- | arch/arc/mm/extable.c | 45 | ||||
| -rw-r--r-- | arch/arc/mm/fault.c | 242 | ||||
| -rw-r--r-- | arch/arc/mm/highmem.c | 73 | ||||
| -rw-r--r-- | arch/arc/mm/init.c | 188 | ||||
| -rw-r--r-- | arch/arc/mm/ioremap.c | 85 | ||||
| -rw-r--r-- | arch/arc/mm/mmap.c | 56 | ||||
| -rw-r--r-- | arch/arc/mm/tlb.c | 782 | ||||
| -rw-r--r-- | arch/arc/mm/tlbex.S | 397 |
12 files changed, 2138 insertions, 1746 deletions
diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile index ac95cc239c1e..633a773369ca 100644 --- a/arch/arc/mm/Makefile +++ b/arch/arc/mm/Makefile @@ -1,10 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# obj-y := extable.o ioremap.o dma.o fault.o init.o -obj-y += tlb.o tlbex.o cache_arc700.o mmap.o +obj-y += tlb.o tlbex.o cache.o mmap.o +obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c new file mode 100644 index 000000000000..7d2f93dc1e91 --- /dev/null +++ b/arch/arc/mm/cache.c @@ -0,0 +1,1094 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ARC Cache Management + * + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + */ + +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/cache.h> +#include <linux/mmu_context.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <linux/pagemap.h> +#include <asm/cacheflush.h> +#include <asm/cachectl.h> +#include <asm/setup.h> + +#ifdef CONFIG_ISA_ARCV2 +#define USE_RGN_FLSH 1 +#endif + +static int l2_line_sz; +static int ioc_exists; +int slc_enable = 1, ioc_enable = 1; +unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */ +unsigned long perip_end = 0xFFFFFFFF; /* legacy value */ + +static struct cpuinfo_arc_cache { + unsigned int sz_k, line_len, colors; +} ic_info, dc_info, slc_info; + +void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op, const int full_page); + +void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz); +void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz); +void (*__dma_cache_wback)(phys_addr_t start, unsigned long sz); + +static int read_decode_cache_bcr_arcv2(int c, char *buf, int len) +{ + struct cpuinfo_arc_cache *p_slc = &slc_info; + struct bcr_identity ident; + struct bcr_generic sbcr; + struct bcr_clust_cfg cbcr; + struct bcr_volatile vol; + int n = 0; + + READ_BCR(ARC_REG_SLC_BCR, sbcr); + if (sbcr.ver) { + struct bcr_slc_cfg slc_cfg; + READ_BCR(ARC_REG_SLC_CFG, slc_cfg); + p_slc->sz_k = 128 << slc_cfg.sz; + l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64; + n += scnprintf(buf + n, len - n, + "SLC\t\t: %uK, %uB Line%s\n", + p_slc->sz_k, p_slc->line_len, IS_USED_RUN(slc_enable)); + } + + READ_BCR(ARC_REG_CLUSTER_BCR, cbcr); + if (cbcr.c) { + ioc_exists = 1; + + /* + * As for today we don't support both IOC and ZONE_HIGHMEM enabled + * simultaneously. This happens because as of today IOC aperture covers + * only ZONE_NORMAL (low mem) and any dma transactions outside this + * region won't be HW coherent. + * If we want to use both IOC and ZONE_HIGHMEM we can use + * bounce_buffer to handle dma transactions to HIGHMEM. + * Also it is possible to modify dma_direct cache ops or increase IOC + * aperture size if we are planning to use HIGHMEM without PAE. + */ + if (IS_ENABLED(CONFIG_HIGHMEM) || is_pae40_enabled()) + ioc_enable = 0; + } else { + ioc_enable = 0; + } + + READ_BCR(AUX_IDENTITY, ident); + + /* HS 2.0 didn't have AUX_VOL */ + if (ident.family > 0x51) { + READ_BCR(AUX_VOL, vol); + perip_base = vol.start << 28; + /* HS 3.0 has limit and strict-ordering fields */ + if (ident.family > 0x52) + perip_end = (vol.limit << 28) - 1; + } + + n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", + perip_base, + IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) ")); + + return n; +} + +int arc_cache_mumbojumbo(int c, char *buf, int len) +{ + struct cpuinfo_arc_cache *p_ic = &ic_info, *p_dc = &dc_info; + struct bcr_cache ibcr, dbcr; + int vipt, assoc; + int n = 0; + + READ_BCR(ARC_REG_IC_BCR, ibcr); + if (!ibcr.ver) + goto dc_chk; + + if (is_isa_arcompact() && (ibcr.ver <= 3)) { + BUG_ON(ibcr.config != 3); + assoc = 2; /* Fixed to 2w set assoc */ + } else if (is_isa_arcv2() && (ibcr.ver >= 4)) { + assoc = 1 << ibcr.config; /* 1,2,4,8 */ + } + + p_ic->line_len = 8 << ibcr.line_len; + p_ic->sz_k = 1 << (ibcr.sz - 1); + p_ic->colors = p_ic->sz_k/assoc/TO_KB(PAGE_SIZE); + + n += scnprintf(buf + n, len - n, + "I-Cache\t\t: %uK, %dway/set, %uB Line, VIPT%s%s\n", + p_ic->sz_k, assoc, p_ic->line_len, + p_ic->colors > 1 ? " aliasing" : "", + IS_USED_CFG(CONFIG_ARC_HAS_ICACHE)); + +dc_chk: + READ_BCR(ARC_REG_DC_BCR, dbcr); + if (!dbcr.ver) + goto slc_chk; + + if (is_isa_arcompact() && (dbcr.ver <= 3)) { + BUG_ON(dbcr.config != 2); + vipt = 1; + assoc = 4; /* Fixed to 4w set assoc */ + p_dc->colors = p_dc->sz_k/assoc/TO_KB(PAGE_SIZE); + } else if (is_isa_arcv2() && (dbcr.ver >= 4)) { + vipt = 0; + assoc = 1 << dbcr.config; /* 1,2,4,8 */ + p_dc->colors = 1; /* PIPT so can't VIPT alias */ + } + + p_dc->line_len = 16 << dbcr.line_len; + p_dc->sz_k = 1 << (dbcr.sz - 1); + + n += scnprintf(buf + n, len - n, + "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s\n", + p_dc->sz_k, assoc, p_dc->line_len, + vipt ? "VIPT" : "PIPT", + IS_USED_CFG(CONFIG_ARC_HAS_DCACHE)); + +slc_chk: + if (is_isa_arcv2()) + n += read_decode_cache_bcr_arcv2(c, buf + n, len - n); + + return n; +} + +/* + * Line Operation on {I,D}-Cache + */ + +#define OP_INV 0x1 +#define OP_FLUSH 0x2 +#define OP_FLUSH_N_INV 0x3 +#define OP_INV_IC 0x4 + +/* + * Cache Flush programming model + * + * ARC700 MMUv3 I$ and D$ are both VIPT and can potentially alias. + * Programming model requires both paddr and vaddr irrespecive of aliasing + * considerations: + * - vaddr in {I,D}C_IV?L + * - paddr in {I,D}C_PTAG + * + * In HS38x (MMUv4), D$ is PIPT, I$ is VIPT and can still alias. + * Programming model is different for aliasing vs. non-aliasing I$ + * - D$ / Non-aliasing I$: only paddr in {I,D}C_IV?L + * - Aliasing I$: same as ARC700 above (so MMUv3 routine used for MMUv4 I$) + * + * - If PAE40 is enabled, independent of aliasing considerations, the higher + * bits needs to be written into PTAG_HI + */ + +static inline +void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op, const int full_page) +{ + unsigned int aux_cmd, aux_tag; + int num_lines; + + if (op == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; + aux_tag = ARC_REG_IC_PTAG; + } else { + aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; + aux_tag = ARC_REG_DC_PTAG; + } + + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + vaddr &= CACHE_LINE_MASK; + } + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + + /* + * MMUv3, cache ops require paddr in PTAG reg + * if V-P const for loop, PTAG can be written once outside loop + */ + if (full_page) + write_aux_reg(aux_tag, paddr); + + /* + * This is technically for MMU v4, using the MMU v3 programming model + * Special work for HS38 aliasing I-cache configuration with PAE40 + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + * Note that PTAG_HI is hoisted outside the line loop + */ + if (is_pae40_enabled() && op == OP_INV_IC) + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + + while (num_lines-- > 0) { + if (!full_page) { + write_aux_reg(aux_tag, paddr); + paddr += L1_CACHE_BYTES; + } + + write_aux_reg(aux_cmd, vaddr); + vaddr += L1_CACHE_BYTES; + } +} + +#ifndef USE_RGN_FLSH + +/* + */ +static inline +void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op, const int full_page) +{ + unsigned int aux_cmd; + int num_lines; + + if (op == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; + } else { + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; + } + + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + } + + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + + /* + * For HS38 PAE40 configuration + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + */ + if (is_pae40_enabled()) { + if (op == OP_INV_IC) + /* + * Non aliasing I-cache in HS38, + * aliasing I-cache handled in __cache_line_loop_v3() + */ + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + else + write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); + } + + while (num_lines-- > 0) { + write_aux_reg(aux_cmd, paddr); + paddr += L1_CACHE_BYTES; + } +} + +#else + +/* + * optimized flush operation which takes a region as opposed to iterating per line + */ +static inline +void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op, const int full_page) +{ + unsigned int s, e; + + /* Only for Non aliasing I-cache in HS38 */ + if (op == OP_INV_IC) { + s = ARC_REG_IC_IVIR; + e = ARC_REG_IC_ENDR; + } else { + s = ARC_REG_DC_STARTR; + e = ARC_REG_DC_ENDR; + } + + if (!full_page) { + /* for any leading gap between @paddr and start of cache line */ + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + + /* + * account for any trailing gap to end of cache line + * this is equivalent to DIV_ROUND_UP() in line ops above + */ + sz += L1_CACHE_BYTES - 1; + } + + if (is_pae40_enabled()) { + /* TBD: check if crossing 4TB boundary */ + if (op == OP_INV_IC) + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + else + write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); + } + + /* ENDR needs to be set ahead of START */ + write_aux_reg(e, paddr + sz); /* ENDR is exclusive */ + write_aux_reg(s, paddr); + + /* caller waits on DC_CTRL.FS */ +} + +#endif + +#ifdef CONFIG_ARC_MMU_V3 +#define __cache_line_loop __cache_line_loop_v3 +#else +#define __cache_line_loop __cache_line_loop_v4 +#endif + +#ifdef CONFIG_ARC_HAS_DCACHE + +/*************************************************************** + * Machine specific helpers for Entire D-Cache or Per Line ops + */ + +#ifndef USE_RGN_FLSH +/* + * this version avoids extra read/write of DC_CTRL for flush or invalid ops + * in the non region flush regime (such as for ARCompact) + */ +static inline void __before_dc_op(const int op) +{ + if (op == OP_FLUSH_N_INV) { + /* Dcache provides 2 cmd: FLUSH or INV + * INV in turn has sub-modes: DISCARD or FLUSH-BEFORE + * flush-n-inv is achieved by INV cmd but with IM=1 + * So toggle INV sub-mode depending on op request and default + */ + const unsigned int ctl = ARC_REG_DC_CTRL; + write_aux_reg(ctl, read_aux_reg(ctl) | DC_CTRL_INV_MODE_FLUSH); + } +} + +#else + +static inline void __before_dc_op(const int op) +{ + const unsigned int ctl = ARC_REG_DC_CTRL; + unsigned int val = read_aux_reg(ctl); + + if (op == OP_FLUSH_N_INV) { + val |= DC_CTRL_INV_MODE_FLUSH; + } + + if (op != OP_INV_IC) { + /* + * Flush / Invalidate is provided by DC_CTRL.RNG_OP 0 or 1 + * combined Flush-n-invalidate uses DC_CTRL.IM = 1 set above + */ + val &= ~DC_CTRL_RGN_OP_MSK; + if (op & OP_INV) + val |= DC_CTRL_RGN_OP_INV; + } + write_aux_reg(ctl, val); +} + +#endif + + +static inline void __after_dc_op(const int op) +{ + if (op & OP_FLUSH) { + const unsigned int ctl = ARC_REG_DC_CTRL; + unsigned int reg; + + /* flush / flush-n-inv both wait */ + while ((reg = read_aux_reg(ctl)) & DC_CTRL_FLUSH_STATUS) + ; + + /* Switch back to default Invalidate mode */ + if (op == OP_FLUSH_N_INV) + write_aux_reg(ctl, reg & ~DC_CTRL_INV_MODE_FLUSH); + } +} + +/* + * Operation on Entire D-Cache + * @op = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV} + * Note that constant propagation ensures all the checks are gone + * in generated code + */ +static inline void __dc_entire_op(const int op) +{ + int aux; + + __before_dc_op(op); + + if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ + aux = ARC_REG_DC_IVDC; + else + aux = ARC_REG_DC_FLSH; + + write_aux_reg(aux, 0x1); + + __after_dc_op(op); +} + +static inline void __dc_disable(void) +{ + const int r = ARC_REG_DC_CTRL; + + __dc_entire_op(OP_FLUSH_N_INV); + write_aux_reg(r, read_aux_reg(r) | DC_CTRL_DIS); +} + +static void __dc_enable(void) +{ + const int r = ARC_REG_DC_CTRL; + + write_aux_reg(r, read_aux_reg(r) & ~DC_CTRL_DIS); +} + +/* For kernel mappings cache operation: index is same as paddr */ +#define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op) + +/* + * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback) + */ +static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op) +{ + const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; + unsigned long flags; + + local_irq_save(flags); + + __before_dc_op(op); + + __cache_line_loop(paddr, vaddr, sz, op, full_page); + + __after_dc_op(op); + + local_irq_restore(flags); +} + +#else + +#define __dc_entire_op(op) +#define __dc_disable() +#define __dc_enable() +#define __dc_line_op(paddr, vaddr, sz, op) +#define __dc_line_op_k(paddr, sz, op) + +#endif /* CONFIG_ARC_HAS_DCACHE */ + +#ifdef CONFIG_ARC_HAS_ICACHE + +static inline void __ic_entire_inv(void) +{ + write_aux_reg(ARC_REG_IC_IVIC, 1); + read_aux_reg(ARC_REG_IC_CTRL); /* blocks */ +} + +static inline void +__ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz) +{ + const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; + unsigned long flags; + + local_irq_save(flags); + (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC, full_page); + local_irq_restore(flags); +} + +#ifndef CONFIG_SMP + +#define __ic_line_inv_vaddr(p, v, s) __ic_line_inv_vaddr_local(p, v, s) + +#else + +struct ic_inv_args { + phys_addr_t paddr, vaddr; + int sz; +}; + +static void __ic_line_inv_vaddr_helper(void *info) +{ + struct ic_inv_args *ic_inv = info; + + __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz); +} + +static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz) +{ + struct ic_inv_args ic_inv = { + .paddr = paddr, + .vaddr = vaddr, + .sz = sz + }; + + on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1); +} + +#endif /* CONFIG_SMP */ + +#else /* !CONFIG_ARC_HAS_ICACHE */ + +#define __ic_entire_inv() +#define __ic_line_inv_vaddr(pstart, vstart, sz) + +#endif /* CONFIG_ARC_HAS_ICACHE */ + +static noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op) +{ +#ifdef CONFIG_ISA_ARCV2 + /* + * SLC is shared between all cores and concurrent aux operations from + * multiple cores need to be serialized using a spinlock + * A concurrent operation can be silently ignored and/or the old/new + * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop + * below) + */ + static DEFINE_SPINLOCK(lock); + unsigned long flags; + unsigned int ctrl; + phys_addr_t end; + + spin_lock_irqsave(&lock, flags); + + /* + * The Region Flush operation is specified by CTRL.RGN_OP[11..9] + * - b'000 (default) is Flush, + * - b'001 is Invalidate if CTRL.IM == 0 + * - b'001 is Flush-n-Invalidate if CTRL.IM == 1 + */ + ctrl = read_aux_reg(ARC_REG_SLC_CTRL); + + /* Don't rely on default value of IM bit */ + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; + + if (op & OP_INV) + ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */ + else + ctrl &= ~SLC_CTRL_RGN_OP_INV; + + write_aux_reg(ARC_REG_SLC_CTRL, ctrl); + + /* + * Lower bits are ignored, no need to clip + * END needs to be setup before START (latter triggers the operation) + * END can't be same as START, so add (l2_line_sz - 1) to sz + */ + end = paddr + sz + l2_line_sz - 1; + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_SLC_RGN_END1, upper_32_bits(end)); + + write_aux_reg(ARC_REG_SLC_RGN_END, lower_32_bits(end)); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_SLC_RGN_START1, upper_32_bits(paddr)); + + write_aux_reg(ARC_REG_SLC_RGN_START, lower_32_bits(paddr)); + + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(ARC_REG_SLC_CTRL); + + while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); + + spin_unlock_irqrestore(&lock, flags); +#endif +} + +static __maybe_unused noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op) +{ +#ifdef CONFIG_ISA_ARCV2 + /* + * SLC is shared between all cores and concurrent aux operations from + * multiple cores need to be serialized using a spinlock + * A concurrent operation can be silently ignored and/or the old/new + * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop + * below) + */ + static DEFINE_SPINLOCK(lock); + + const unsigned long SLC_LINE_MASK = ~(l2_line_sz - 1); + unsigned int ctrl, cmd; + unsigned long flags; + int num_lines; + + spin_lock_irqsave(&lock, flags); + + ctrl = read_aux_reg(ARC_REG_SLC_CTRL); + + /* Don't rely on default value of IM bit */ + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; + + write_aux_reg(ARC_REG_SLC_CTRL, ctrl); + + cmd = op & OP_INV ? ARC_AUX_SLC_IVDL : ARC_AUX_SLC_FLDL; + + sz += paddr & ~SLC_LINE_MASK; + paddr &= SLC_LINE_MASK; + + num_lines = DIV_ROUND_UP(sz, l2_line_sz); + + while (num_lines-- > 0) { + write_aux_reg(cmd, paddr); + paddr += l2_line_sz; + } + + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(ARC_REG_SLC_CTRL); + + while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); + + spin_unlock_irqrestore(&lock, flags); +#endif +} + +#define slc_op(paddr, sz, op) slc_op_rgn(paddr, sz, op) + +noinline static void slc_entire_op(const int op) +{ + unsigned int ctrl, r = ARC_REG_SLC_CTRL; + + ctrl = read_aux_reg(r); + + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; + + write_aux_reg(r, ctrl); + + if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ + write_aux_reg(ARC_REG_SLC_INVALIDATE, 0x1); + else + write_aux_reg(ARC_REG_SLC_FLUSH, 0x1); + + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(r); + + /* Important to wait for flush to complete */ + while (read_aux_reg(r) & SLC_CTRL_BUSY); +} + +static inline void arc_slc_disable(void) +{ + const int r = ARC_REG_SLC_CTRL; + + slc_entire_op(OP_FLUSH_N_INV); + write_aux_reg(r, read_aux_reg(r) | SLC_CTRL_DIS); +} + +static inline void arc_slc_enable(void) +{ + const int r = ARC_REG_SLC_CTRL; + + write_aux_reg(r, read_aux_reg(r) & ~SLC_CTRL_DIS); +} + +/*********************************************************** + * Exported APIs + */ + +void flush_dcache_folio(struct folio *folio) +{ + clear_bit(PG_dc_clean, &folio->flags.f); + return; +} +EXPORT_SYMBOL(flush_dcache_folio); + +void flush_dcache_page(struct page *page) +{ + return flush_dcache_folio(page_folio(page)); +} +EXPORT_SYMBOL(flush_dcache_page); + +/* + * DMA ops for systems with L1 cache only + * Make memory coherent with L1 cache by flushing/invalidating L1 lines + */ +static void __dma_cache_wback_inv_l1(phys_addr_t start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH_N_INV); +} + +static void __dma_cache_inv_l1(phys_addr_t start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_INV); +} + +static void __dma_cache_wback_l1(phys_addr_t start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH); +} + +/* + * DMA ops for systems with both L1 and L2 caches, but without IOC + * Both L1 and L2 lines need to be explicitly flushed/invalidated + */ +static void __dma_cache_wback_inv_slc(phys_addr_t start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH_N_INV); + slc_op(start, sz, OP_FLUSH_N_INV); +} + +static void __dma_cache_inv_slc(phys_addr_t start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_INV); + slc_op(start, sz, OP_INV); +} + +static void __dma_cache_wback_slc(phys_addr_t start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH); + slc_op(start, sz, OP_FLUSH); +} + +/* + * Exported DMA API + */ +void dma_cache_wback_inv(phys_addr_t start, unsigned long sz) +{ + __dma_cache_wback_inv(start, sz); +} +EXPORT_SYMBOL(dma_cache_wback_inv); + +void dma_cache_inv(phys_addr_t start, unsigned long sz) +{ + __dma_cache_inv(start, sz); +} +EXPORT_SYMBOL(dma_cache_inv); + +void dma_cache_wback(phys_addr_t start, unsigned long sz) +{ + __dma_cache_wback(start, sz); +} +EXPORT_SYMBOL(dma_cache_wback); + +/* + * This is API for making I/D Caches consistent when modifying + * kernel code (loadable modules, kprobes, kgdb...) + * This is called on insmod, with kernel virtual address for CODE of + * the module. ARC cache maintenance ops require PHY address thus we + * need to convert vmalloc addr to PHY addr + */ +void flush_icache_range(unsigned long kstart, unsigned long kend) +{ + unsigned int tot_sz; + + WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__); + + /* Shortcut for bigger flush ranges. + * Here we don't care if this was kernel virtual or phy addr + */ + tot_sz = kend - kstart; + if (tot_sz > PAGE_SIZE) { + flush_cache_all(); + return; + } + + /* Case: Kernel Phy addr (0x8000_0000 onwards) */ + if (likely(kstart > PAGE_OFFSET)) { + /* + * The 2nd arg despite being paddr will be used to index icache + * This is OK since no alternate virtual mappings will exist + * given the callers for this case: kprobe/kgdb in built-in + * kernel code only. + */ + __sync_icache_dcache(kstart, kstart, kend - kstart); + return; + } + + /* + * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff) + * (1) ARC Cache Maintenance ops only take Phy addr, hence special + * handling of kernel vaddr. + * + * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already), + * it still needs to handle a 2 page scenario, where the range + * straddles across 2 virtual pages and hence need for loop + */ + while (tot_sz > 0) { + unsigned int off, sz; + unsigned long phy, pfn; + + off = kstart % PAGE_SIZE; + pfn = vmalloc_to_pfn((void *)kstart); + phy = (pfn << PAGE_SHIFT) + off; + sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off); + __sync_icache_dcache(phy, kstart, sz); + kstart += sz; + tot_sz -= sz; + } +} +EXPORT_SYMBOL(flush_icache_range); + +/* + * General purpose helper to make I and D cache lines consistent. + * @paddr is phy addr of region + * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc) + * However in one instance, when called by kprobe (for a breakpt in + * builtin kernel code) @vaddr will be paddr only, meaning CDU operation will + * use a paddr to index the cache (despite VIPT). This is fine since a + * builtin kernel page will not have any virtual mappings. + * kprobe on loadable module will be kernel vaddr. + */ +void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len) +{ + __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV); + __ic_line_inv_vaddr(paddr, vaddr, len); +} + +/* wrapper to compile time eliminate alignment checks in flush loop */ +void __inv_icache_pages(phys_addr_t paddr, unsigned long vaddr, unsigned nr) +{ + __ic_line_inv_vaddr(paddr, vaddr, nr * PAGE_SIZE); +} + +/* + * wrapper to clearout kernel or userspace mappings of a page + * For kernel mappings @vaddr == @paddr + */ +void __flush_dcache_pages(phys_addr_t paddr, unsigned long vaddr, unsigned nr) +{ + __dc_line_op(paddr, vaddr & PAGE_MASK, nr * PAGE_SIZE, OP_FLUSH_N_INV); +} + +noinline void flush_cache_all(void) +{ + unsigned long flags; + + local_irq_save(flags); + + __ic_entire_inv(); + __dc_entire_op(OP_FLUSH_N_INV); + + local_irq_restore(flags); + +} + +void copy_user_highpage(struct page *to, struct page *from, + unsigned long u_vaddr, struct vm_area_struct *vma) +{ + struct folio *src = page_folio(from); + struct folio *dst = page_folio(to); + void *kfrom = kmap_atomic(from); + void *kto = kmap_atomic(to); + + copy_page(kto, kfrom); + + clear_bit(PG_dc_clean, &dst->flags.f); + clear_bit(PG_dc_clean, &src->flags.f); + + kunmap_atomic(kto); + kunmap_atomic(kfrom); +} + +void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) +{ + struct folio *folio = page_folio(page); + clear_page(to); + clear_bit(PG_dc_clean, &folio->flags.f); +} +EXPORT_SYMBOL(clear_user_page); + +/********************************************************************** + * Explicit Cache flush request from user space via syscall + * Needed for JITs which generate code on the fly + */ +SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags) +{ + /* TBD: optimize this */ + flush_cache_all(); + return 0; +} + +/* + * IO-Coherency (IOC) setup rules: + * + * 1. Needs to be at system level, so only once by Master core + * Non-Masters need not be accessing caches at that time + * - They are either HALT_ON_RESET and kick started much later or + * - if run on reset, need to ensure that arc_platform_smp_wait_to_boot() + * doesn't perturb caches or coherency unit + * + * 2. caches (L1 and SLC) need to be purged (flush+inv) before setting up IOC, + * otherwise any straggler data might behave strangely post IOC enabling + * + * 3. All Caches need to be disabled when setting up IOC to elide any in-flight + * Coherency transactions + */ +static noinline void __init arc_ioc_setup(void) +{ + unsigned int ioc_base, mem_sz; + + /* + * If IOC was already enabled (due to bootloader) it technically needs to + * be reconfigured with aperture base,size corresponding to Linux memory map + * which will certainly be different than uboot's. But disabling and + * reenabling IOC when DMA might be potentially active is tricky business. + * To avoid random memory issues later, just panic here and ask user to + * upgrade bootloader to one which doesn't enable IOC + */ + if (read_aux_reg(ARC_REG_IO_COH_ENABLE) & ARC_IO_COH_ENABLE_BIT) + panic("IOC already enabled, please upgrade bootloader!\n"); + + if (!ioc_enable) + return; + + /* Flush + invalidate + disable L1 dcache */ + __dc_disable(); + + /* Flush + invalidate SLC */ + if (read_aux_reg(ARC_REG_SLC_BCR)) + slc_entire_op(OP_FLUSH_N_INV); + + /* + * currently IOC Aperture covers entire DDR + * TBD: fix for PGU + 1GB of low mem + * TBD: fix for PAE + */ + mem_sz = arc_get_mem_sz(); + + if (!is_power_of_2(mem_sz) || mem_sz < 4096) + panic("IOC Aperture size must be power of 2 larger than 4KB"); + + /* + * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB, + * so setting 0x11 implies 512MB, 0x12 implies 1GB... + */ + write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, order_base_2(mem_sz >> 10) - 2); + + /* for now assume kernel base is start of IOC aperture */ + ioc_base = CONFIG_LINUX_RAM_BASE; + + if (ioc_base % mem_sz != 0) + panic("IOC Aperture start must be aligned to the size of the aperture"); + + write_aux_reg(ARC_REG_IO_COH_AP0_BASE, ioc_base >> 12); + write_aux_reg(ARC_REG_IO_COH_PARTIAL, ARC_IO_COH_PARTIAL_BIT); + write_aux_reg(ARC_REG_IO_COH_ENABLE, ARC_IO_COH_ENABLE_BIT); + + /* Re-enable L1 dcache */ + __dc_enable(); +} + +/* + * Cache related boot time checks/setups only needed on master CPU: + * - Geometry checks (kernel build and hardware agree: e.g. L1_CACHE_BYTES) + * Assume SMP only, so all cores will have same cache config. A check on + * one core suffices for all + * - IOC setup / dma callbacks only need to be done once + */ +static noinline void __init arc_cache_init_master(void) +{ + if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) { + struct cpuinfo_arc_cache *ic = &ic_info; + + if (!ic->line_len) + panic("cache support enabled but non-existent cache\n"); + + if (ic->line_len != L1_CACHE_BYTES) + panic("ICache line [%d] != kernel Config [%d]", + ic->line_len, L1_CACHE_BYTES); + + /* + * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG + * pair to provide vaddr/paddr respectively, just as in MMU v3 + */ + if (is_isa_arcv2() && ic->colors > 1) + _cache_line_loop_ic_fn = __cache_line_loop_v3; + else + _cache_line_loop_ic_fn = __cache_line_loop; + } + + if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) { + struct cpuinfo_arc_cache *dc = &dc_info; + + if (!dc->line_len) + panic("cache support enabled but non-existent cache\n"); + + if (dc->line_len != L1_CACHE_BYTES) + panic("DCache line [%d] != kernel Config [%d]", + dc->line_len, L1_CACHE_BYTES); + + /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ + if (is_isa_arcompact() && dc->colors > 1) { + panic("Aliasing VIPT cache not supported\n"); + } + } + + /* + * Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger + * or equal to any cache line length. + */ + BUILD_BUG_ON_MSG(L1_CACHE_BYTES > SMP_CACHE_BYTES, + "SMP_CACHE_BYTES must be >= any cache line length"); + if (is_isa_arcv2() && (l2_line_sz > SMP_CACHE_BYTES)) + panic("L2 Cache line [%d] > kernel Config [%d]\n", + l2_line_sz, SMP_CACHE_BYTES); + + /* Note that SLC disable not formally supported till HS 3.0 */ + if (is_isa_arcv2() && l2_line_sz && !slc_enable) + arc_slc_disable(); + + if (is_isa_arcv2() && ioc_exists) + arc_ioc_setup(); + + if (is_isa_arcv2() && l2_line_sz && slc_enable) { + __dma_cache_wback_inv = __dma_cache_wback_inv_slc; + __dma_cache_inv = __dma_cache_inv_slc; + __dma_cache_wback = __dma_cache_wback_slc; + } else { + __dma_cache_wback_inv = __dma_cache_wback_inv_l1; + __dma_cache_inv = __dma_cache_inv_l1; + __dma_cache_wback = __dma_cache_wback_l1; + } + /* + * In case of IOC (say IOC+SLC case), pointers above could still be set + * but end up not being relevant as the first function in chain is not + * called at all for devices using coherent DMA. + * arch_sync_dma_for_cpu() -> dma_cache_*() -> __dma_cache_*() + */ +} + +void __ref arc_cache_init(void) +{ + unsigned int __maybe_unused cpu = smp_processor_id(); + + if (!cpu) + arc_cache_init_master(); + + /* + * In PAE regime, TLB and cache maintenance ops take wider addresses + * And even if PAE is not enabled in kernel, the upper 32-bits still need + * to be zeroed to keep the ops sane. + * As an optimization for more common !PAE enabled case, zero them out + * once at init, rather than checking/setting to 0 for every runtime op + */ + if (is_isa_arcv2() && pae40_exist_but_not_enab()) { + + if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) + write_aux_reg(ARC_REG_IC_PTAG_HI, 0); + + if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) + write_aux_reg(ARC_REG_DC_PTAG_HI, 0); + + if (l2_line_sz) { + write_aux_reg(ARC_REG_SLC_RGN_END1, 0); + write_aux_reg(ARC_REG_SLC_RGN_START1, 0); + } + } +} diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c deleted file mode 100644 index f415d851b765..000000000000 --- a/arch/arc/mm/cache_arc700.c +++ /dev/null @@ -1,767 +0,0 @@ -/* - * ARC700 VIPT Cache Management - * - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs - * -flush_cache_dup_mm (fork) - * -likewise for flush_cache_mm (exit/execve) - * -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break) - * - * vineetg: Apr 2011 - * -Now that MMU can support larger pg sz (16K), the determiniation of - * aliasing shd not be based on assumption of 8k pg - * - * vineetg: Mar 2011 - * -optimised version of flush_icache_range( ) for making I/D coherent - * when vaddr is available (agnostic of num of aliases) - * - * vineetg: Mar 2011 - * -Added documentation about I-cache aliasing on ARC700 and the way it - * was handled up until MMU V2. - * -Spotted a three year old bug when killing the 4 aliases, which needs - * bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03} - * instead of paddr | {0x00, 0x01, 0x10, 0x11} - * (Rajesh you owe me one now) - * - * vineetg: Dec 2010 - * -Off-by-one error when computing num_of_lines to flush - * This broke signal handling with bionic which uses synthetic sigret stub - * - * vineetg: Mar 2010 - * -GCC can't generate ZOL for core cache flush loops. - * Conv them into iterations based as opposed to while (start < end) types - * - * Vineetg: July 2009 - * -In I-cache flush routine we used to chk for aliasing for every line INV. - * Instead now we setup routines per cache geometry and invoke them - * via function pointers. - * - * Vineetg: Jan 2009 - * -Cache Line flush routines used to flush an extra line beyond end addr - * because check was while (end >= start) instead of (end > start) - * =Some call sites had to work around by doing -1, -4 etc to end param - * =Some callers didnt care. This was spec bad in case of INV routines - * which would discard valid data (cause of the horrible ext2 bug - * in ARC IDE driver) - * - * vineetg: June 11th 2008: Fixed flush_icache_range( ) - * -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need - * to be flushed, which it was not doing. - * -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API, - * however ARC cache maintenance OPs require PHY addr. Thus need to do - * vmalloc_to_phy. - * -Also added optimisation there, that for range > PAGE SIZE we flush the - * entire cache in one shot rather than line by line. For e.g. a module - * with Code sz 600k, old code flushed 600k worth of cache (line-by-line), - * while cache is only 16 or 32k. - */ - -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/cache.h> -#include <linux/mmu_context.h> -#include <linux/syscalls.h> -#include <linux/uaccess.h> -#include <linux/pagemap.h> -#include <asm/cacheflush.h> -#include <asm/cachectl.h> -#include <asm/setup.h> - -/* Instruction cache related Auxiliary registers */ -#define ARC_REG_IC_BCR 0x77 /* Build Config reg */ -#define ARC_REG_IC_IVIC 0x10 -#define ARC_REG_IC_CTRL 0x11 -#define ARC_REG_IC_IVIL 0x19 -#if (CONFIG_ARC_MMU_VER > 2) -#define ARC_REG_IC_PTAG 0x1E -#endif - -/* Bit val in IC_CTRL */ -#define IC_CTRL_CACHE_DISABLE 0x1 - -/* Data cache related Auxiliary registers */ -#define ARC_REG_DC_BCR 0x72 /* Build Config reg */ -#define ARC_REG_DC_IVDC 0x47 -#define ARC_REG_DC_CTRL 0x48 -#define ARC_REG_DC_IVDL 0x4A -#define ARC_REG_DC_FLSH 0x4B -#define ARC_REG_DC_FLDL 0x4C -#if (CONFIG_ARC_MMU_VER > 2) -#define ARC_REG_DC_PTAG 0x5C -#endif - -/* Bit val in DC_CTRL */ -#define DC_CTRL_INV_MODE_FLUSH 0x40 -#define DC_CTRL_FLUSH_STATUS 0x100 - -char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len) -{ - int n = 0; - unsigned int c = smp_processor_id(); - -#define PR_CACHE(p, enb, str) \ -{ \ - if (!(p)->ver) \ - n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \ - else \ - n += scnprintf(buf + n, len - n, \ - str"\t\t: (%uK) VIPT, %dway set-asc, %ub Line %s\n", \ - TO_KB((p)->sz), (p)->assoc, (p)->line_len, \ - enb ? "" : "DISABLED (kernel-build)"); \ -} - - PR_CACHE(&cpuinfo_arc700[c].icache, IS_ENABLED(CONFIG_ARC_HAS_ICACHE), - "I-Cache"); - PR_CACHE(&cpuinfo_arc700[c].dcache, IS_ENABLED(CONFIG_ARC_HAS_DCACHE), - "D-Cache"); - - return buf; -} - -/* - * Read the Cache Build Confuration Registers, Decode them and save into - * the cpuinfo structure for later use. - * No Validation done here, simply read/convert the BCRs - */ -void read_decode_cache_bcr(void) -{ - struct cpuinfo_arc_cache *p_ic, *p_dc; - unsigned int cpu = smp_processor_id(); - struct bcr_cache { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; -#else - unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; -#endif - } ibcr, dbcr; - - p_ic = &cpuinfo_arc700[cpu].icache; - READ_BCR(ARC_REG_IC_BCR, ibcr); - - BUG_ON(ibcr.config != 3); - p_ic->assoc = 2; /* Fixed to 2w set assoc */ - p_ic->line_len = 8 << ibcr.line_len; - p_ic->sz = 0x200 << ibcr.sz; - p_ic->ver = ibcr.ver; - - p_dc = &cpuinfo_arc700[cpu].dcache; - READ_BCR(ARC_REG_DC_BCR, dbcr); - - BUG_ON(dbcr.config != 2); - p_dc->assoc = 4; /* Fixed to 4w set assoc */ - p_dc->line_len = 16 << dbcr.line_len; - p_dc->sz = 0x200 << dbcr.sz; - p_dc->ver = dbcr.ver; -} - -/* - * 1. Validate the Cache Geomtery (compile time config matches hardware) - * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn) - * (aliasing D-cache configurations are not supported YET) - * 3. Enable the Caches, setup default flush mode for D-Cache - * 3. Calculate the SHMLBA used by user space - */ -void arc_cache_init(void) -{ - unsigned int cpu = smp_processor_id(); - struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; - struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; - unsigned int dcache_does_alias, temp; - char str[256]; - - printk(arc_cache_mumbojumbo(0, str, sizeof(str))); - - if (!ic->ver) - goto chk_dc; - -#ifdef CONFIG_ARC_HAS_ICACHE - /* 1. Confirm some of I-cache params which Linux assumes */ - if (ic->line_len != ARC_ICACHE_LINE_LEN) - panic("Cache H/W doesn't match kernel Config"); - - if (ic->ver != CONFIG_ARC_MMU_VER) - panic("Cache ver doesn't match MMU ver\n"); -#endif - - /* Enable/disable I-Cache */ - temp = read_aux_reg(ARC_REG_IC_CTRL); - -#ifdef CONFIG_ARC_HAS_ICACHE - temp &= ~IC_CTRL_CACHE_DISABLE; -#else - temp |= IC_CTRL_CACHE_DISABLE; -#endif - - write_aux_reg(ARC_REG_IC_CTRL, temp); - -chk_dc: - if (!dc->ver) - return; - -#ifdef CONFIG_ARC_HAS_DCACHE - if (dc->line_len != ARC_DCACHE_LINE_LEN) - panic("Cache H/W doesn't match kernel Config"); - - /* check for D-Cache aliasing */ - dcache_does_alias = (dc->sz / dc->assoc) > PAGE_SIZE; - - if (dcache_does_alias && !cache_is_vipt_aliasing()) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - else if (!dcache_does_alias && cache_is_vipt_aliasing()) - panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n"); -#endif - - /* Set the default Invalidate Mode to "simpy discard dirty lines" - * as this is more frequent then flush before invalidate - * Ofcourse we toggle this default behviour when desired - */ - temp = read_aux_reg(ARC_REG_DC_CTRL); - temp &= ~DC_CTRL_INV_MODE_FLUSH; - -#ifdef CONFIG_ARC_HAS_DCACHE - /* Enable D-Cache: Clear Bit 0 */ - write_aux_reg(ARC_REG_DC_CTRL, temp & ~IC_CTRL_CACHE_DISABLE); -#else - /* Flush D cache */ - write_aux_reg(ARC_REG_DC_FLSH, 0x1); - /* Disable D cache */ - write_aux_reg(ARC_REG_DC_CTRL, temp | IC_CTRL_CACHE_DISABLE); -#endif - - return; -} - -#define OP_INV 0x1 -#define OP_FLUSH 0x2 -#define OP_FLUSH_N_INV 0x3 - -#ifdef CONFIG_ARC_HAS_DCACHE - -/*************************************************************** - * Machine specific helpers for Entire D-Cache or Per Line ops - */ - -static inline void wait_for_flush(void) -{ - while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS) - ; -} - -/* - * Operation on Entire D-Cache - * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV} - * Note that constant propagation ensures all the checks are gone - * in generated code - */ -static inline void __dc_entire_op(const int cacheop) -{ - unsigned int tmp = tmp; - int aux; - - if (cacheop == OP_FLUSH_N_INV) { - /* Dcache provides 2 cmd: FLUSH or INV - * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE - * flush-n-inv is achieved by INV cmd but with IM=1 - * Default INV sub-mode is DISCARD, which needs to be toggled - */ - tmp = read_aux_reg(ARC_REG_DC_CTRL); - write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH); - } - - if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */ - aux = ARC_REG_DC_IVDC; - else - aux = ARC_REG_DC_FLSH; - - write_aux_reg(aux, 0x1); - - if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */ - wait_for_flush(); - - /* Switch back the DISCARD ONLY Invalidate mode */ - if (cacheop == OP_FLUSH_N_INV) - write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH); -} - -/* - * Per Line Operation on D-Cache - * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete - * It's sole purpose is to help gcc generate ZOL - * (aliasing VIPT dcache flushing needs both vaddr and paddr) - */ -static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr, - unsigned long sz, const int aux_reg) -{ - int num_lines; - - /* Ensure we properly floor/ceil the non-line aligned/sized requests - * and have @paddr - aligned to cache line and integral @num_lines. - * This however can be avoided for page sized since: - * -@paddr will be cache-line aligned already (being page aligned) - * -@sz will be integral multiple of line size (being page sized). - */ - if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) { - sz += paddr & ~DCACHE_LINE_MASK; - paddr &= DCACHE_LINE_MASK; - vaddr &= DCACHE_LINE_MASK; - } - - num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN); - -#if (CONFIG_ARC_MMU_VER <= 2) - paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; -#endif - - while (num_lines-- > 0) { -#if (CONFIG_ARC_MMU_VER > 2) - /* - * Just as for I$, in MMU v3, D$ ops also require - * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops - */ - write_aux_reg(ARC_REG_DC_PTAG, paddr); - - write_aux_reg(aux_reg, vaddr); - vaddr += ARC_DCACHE_LINE_LEN; -#else - /* paddr contains stuffed vaddrs bits */ - write_aux_reg(aux_reg, paddr); -#endif - paddr += ARC_DCACHE_LINE_LEN; - } -} - -/* For kernel mappings cache operation: index is same as paddr */ -#define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op) - -/* - * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback) - */ -static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, - unsigned long sz, const int cacheop) -{ - unsigned long flags, tmp = tmp; - int aux; - - local_irq_save(flags); - - if (cacheop == OP_FLUSH_N_INV) { - /* - * Dcache provides 2 cmd: FLUSH or INV - * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE - * flush-n-inv is achieved by INV cmd but with IM=1 - * Default INV sub-mode is DISCARD, which needs to be toggled - */ - tmp = read_aux_reg(ARC_REG_DC_CTRL); - write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH); - } - - if (cacheop & OP_INV) /* Inv / flush-n-inv use same cmd reg */ - aux = ARC_REG_DC_IVDL; - else - aux = ARC_REG_DC_FLDL; - - __dc_line_loop(paddr, vaddr, sz, aux); - - if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */ - wait_for_flush(); - - /* Switch back the DISCARD ONLY Invalidate mode */ - if (cacheop == OP_FLUSH_N_INV) - write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH); - - local_irq_restore(flags); -} - -#else - -#define __dc_entire_op(cacheop) -#define __dc_line_op(paddr, vaddr, sz, cacheop) -#define __dc_line_op_k(paddr, sz, cacheop) - -#endif /* CONFIG_ARC_HAS_DCACHE */ - - -#ifdef CONFIG_ARC_HAS_ICACHE - -/* - * I-Cache Aliasing in ARC700 VIPT caches - * - * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. - * The orig Cache Management Module "CDU" only required paddr to invalidate a - * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. - * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching - * the exact same line. - * - * However for larger Caches (way-size > page-size) - i.e. in Aliasing config, - * paddr alone could not be used to correctly index the cache. - * - * ------------------ - * MMU v1/v2 (Fixed Page Size 8k) - * ------------------ - * The solution was to provide CDU with these additonal vaddr bits. These - * would be bits [x:13], x would depend on cache-geometry, 13 comes from - * standard page size of 8k. - * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits - * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the - * orig 5 bits of paddr were anyways ignored by CDU line ops, as they - * represent the offset within cache-line. The adv of using this "clumsy" - * interface for additional info was no new reg was needed in CDU programming - * model. - * - * 17:13 represented the max num of bits passable, actual bits needed were - * fewer, based on the num-of-aliases possible. - * -for 2 alias possibility, only bit 13 needed (32K cache) - * -for 4 alias possibility, bits 14:13 needed (64K cache) - * - * ------------------ - * MMU v3 - * ------------------ - * This ver of MMU supports variable page sizes (1k-16k): although Linux will - * only support 8k (default), 16k and 4k. - * However from hardware perspective, smaller page sizes aggrevate aliasing - * meaning more vaddr bits needed to disambiguate the cache-line-op ; - * the existing scheme of piggybacking won't work for certain configurations. - * Two new registers IC_PTAG and DC_PTAG inttoduced. - * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs - */ - -/*********************************************************** - * Machine specific helper for per line I-Cache invalidate. - */ -static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, - unsigned long sz) -{ - unsigned long flags; - int num_lines; - - /* - * Ensure we properly floor/ceil the non-line aligned/sized requests: - * However page sized flushes can be compile time optimised. - * -@paddr will be cache-line aligned already (being page aligned) - * -@sz will be integral multiple of line size (being page sized). - */ - if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) { - sz += paddr & ~ICACHE_LINE_MASK; - paddr &= ICACHE_LINE_MASK; - vaddr &= ICACHE_LINE_MASK; - } - - num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN); - -#if (CONFIG_ARC_MMU_VER <= 2) - /* bits 17:13 of vaddr go as bits 4:0 of paddr */ - paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; -#endif - - local_irq_save(flags); - while (num_lines-- > 0) { -#if (CONFIG_ARC_MMU_VER > 2) - /* tag comes from phy addr */ - write_aux_reg(ARC_REG_IC_PTAG, paddr); - - /* index bits come from vaddr */ - write_aux_reg(ARC_REG_IC_IVIL, vaddr); - vaddr += ARC_ICACHE_LINE_LEN; -#else - /* paddr contains stuffed vaddrs bits */ - write_aux_reg(ARC_REG_IC_IVIL, paddr); -#endif - paddr += ARC_ICACHE_LINE_LEN; - } - local_irq_restore(flags); -} - -static inline void __ic_entire_inv(void) -{ - write_aux_reg(ARC_REG_IC_IVIC, 1); - read_aux_reg(ARC_REG_IC_CTRL); /* blocks */ -} - -#else - -#define __ic_entire_inv() -#define __ic_line_inv_vaddr(pstart, vstart, sz) - -#endif /* CONFIG_ARC_HAS_ICACHE */ - - -/*********************************************************** - * Exported APIs - */ - -/* - * Handle cache congruency of kernel and userspace mappings of page when kernel - * writes-to/reads-from - * - * The idea is to defer flushing of kernel mapping after a WRITE, possible if: - * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent - * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache) - * -In SMP, if hardware caches are coherent - * - * There's a corollary case, where kernel READs from a userspace mapped page. - * If the U-mapping is not congruent to to K-mapping, former needs flushing. - */ -void flush_dcache_page(struct page *page) -{ - struct address_space *mapping; - - if (!cache_is_vipt_aliasing()) { - clear_bit(PG_dc_clean, &page->flags); - return; - } - - /* don't handle anon pages here */ - mapping = page_mapping(page); - if (!mapping) - return; - - /* - * pagecache page, file not yet mapped to userspace - * Make a note that K-mapping is dirty - */ - if (!mapping_mapped(mapping)) { - clear_bit(PG_dc_clean, &page->flags); - } else if (page_mapped(page)) { - - /* kernel reading from page with U-mapping */ - void *paddr = page_address(page); - unsigned long vaddr = page->index << PAGE_CACHE_SHIFT; - - if (addr_not_cache_congruent(paddr, vaddr)) - __flush_dcache_page(paddr, vaddr); - } -} -EXPORT_SYMBOL(flush_dcache_page); - - -void dma_cache_wback_inv(unsigned long start, unsigned long sz) -{ - __dc_line_op_k(start, sz, OP_FLUSH_N_INV); -} -EXPORT_SYMBOL(dma_cache_wback_inv); - -void dma_cache_inv(unsigned long start, unsigned long sz) -{ - __dc_line_op_k(start, sz, OP_INV); -} -EXPORT_SYMBOL(dma_cache_inv); - -void dma_cache_wback(unsigned long start, unsigned long sz) -{ - __dc_line_op_k(start, sz, OP_FLUSH); -} -EXPORT_SYMBOL(dma_cache_wback); - -/* - * This is API for making I/D Caches consistent when modifying - * kernel code (loadable modules, kprobes, kgdb...) - * This is called on insmod, with kernel virtual address for CODE of - * the module. ARC cache maintenance ops require PHY address thus we - * need to convert vmalloc addr to PHY addr - */ -void flush_icache_range(unsigned long kstart, unsigned long kend) -{ - unsigned int tot_sz, off, sz; - unsigned long phy, pfn; - - /* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */ - - /* This is not the right API for user virtual address */ - if (kstart < TASK_SIZE) { - BUG_ON("Flush icache range for user virtual addr space"); - return; - } - - /* Shortcut for bigger flush ranges. - * Here we don't care if this was kernel virtual or phy addr - */ - tot_sz = kend - kstart; - if (tot_sz > PAGE_SIZE) { - flush_cache_all(); - return; - } - - /* Case: Kernel Phy addr (0x8000_0000 onwards) */ - if (likely(kstart > PAGE_OFFSET)) { - /* - * The 2nd arg despite being paddr will be used to index icache - * This is OK since no alternate virtual mappings will exist - * given the callers for this case: kprobe/kgdb in built-in - * kernel code only. - */ - __sync_icache_dcache(kstart, kstart, kend - kstart); - return; - } - - /* - * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff) - * (1) ARC Cache Maintenance ops only take Phy addr, hence special - * handling of kernel vaddr. - * - * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already), - * it still needs to handle a 2 page scenario, where the range - * straddles across 2 virtual pages and hence need for loop - */ - while (tot_sz > 0) { - off = kstart % PAGE_SIZE; - pfn = vmalloc_to_pfn((void *)kstart); - phy = (pfn << PAGE_SHIFT) + off; - sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off); - __sync_icache_dcache(phy, kstart, sz); - kstart += sz; - tot_sz -= sz; - } -} - -/* - * General purpose helper to make I and D cache lines consistent. - * @paddr is phy addr of region - * @vaddr is typically user or kernel vaddr (vmalloc) - * Howver in one instance, flush_icache_range() by kprobe (for a breakpt in - * builtin kernel code) @vaddr will be paddr only, meaning CDU operation will - * use a paddr to index the cache (despite VIPT). This is fine since since a - * built-in kernel page will not have any virtual mappings (not even kernel) - * kprobe on loadable module is different as it will have kvaddr. - */ -void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len) -{ - unsigned long flags; - - local_irq_save(flags); - __ic_line_inv_vaddr(paddr, vaddr, len); - __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV); - local_irq_restore(flags); -} - -/* wrapper to compile time eliminate alignment checks in flush loop */ -void __inv_icache_page(unsigned long paddr, unsigned long vaddr) -{ - __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE); -} - -/* - * wrapper to clearout kernel or userspace mappings of a page - * For kernel mappings @vaddr == @paddr - */ -void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr) -{ - __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); -} - -noinline void flush_cache_all(void) -{ - unsigned long flags; - - local_irq_save(flags); - - __ic_entire_inv(); - __dc_entire_op(OP_FLUSH_N_INV); - - local_irq_restore(flags); - -} - -#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING - -void flush_cache_mm(struct mm_struct *mm) -{ - flush_cache_all(); -} - -void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, - unsigned long pfn) -{ - unsigned int paddr = pfn << PAGE_SHIFT; - - u_vaddr &= PAGE_MASK; - - ___flush_dcache_page(paddr, u_vaddr); - - if (vma->vm_flags & VM_EXEC) - __inv_icache_page(paddr, u_vaddr); -} - -void flush_cache_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - flush_cache_all(); -} - -void flush_anon_page(struct vm_area_struct *vma, struct page *page, - unsigned long u_vaddr) -{ - /* TBD: do we really need to clear the kernel mapping */ - __flush_dcache_page(page_address(page), u_vaddr); - __flush_dcache_page(page_address(page), page_address(page)); - -} - -#endif - -void copy_user_highpage(struct page *to, struct page *from, - unsigned long u_vaddr, struct vm_area_struct *vma) -{ - void *kfrom = page_address(from); - void *kto = page_address(to); - int clean_src_k_mappings = 0; - - /* - * If SRC page was already mapped in userspace AND it's U-mapping is - * not congruent with K-mapping, sync former to physical page so that - * K-mapping in memcpy below, sees the right data - * - * Note that while @u_vaddr refers to DST page's userspace vaddr, it is - * equally valid for SRC page as well - */ - if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { - __flush_dcache_page(kfrom, u_vaddr); - clean_src_k_mappings = 1; - } - - copy_page(kto, kfrom); - - /* - * Mark DST page K-mapping as dirty for a later finalization by - * update_mmu_cache(). Although the finalization could have been done - * here as well (given that both vaddr/paddr are available). - * But update_mmu_cache() already has code to do that for other - * non copied user pages (e.g. read faults which wire in pagecache page - * directly). - */ - clear_bit(PG_dc_clean, &to->flags); - - /* - * if SRC was already usermapped and non-congruent to kernel mapping - * sync the kernel mapping back to physical page - */ - if (clean_src_k_mappings) { - __flush_dcache_page(kfrom, kfrom); - set_bit(PG_dc_clean, &from->flags); - } else { - clear_bit(PG_dc_clean, &from->flags); - } -} - -void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) -{ - clear_page(to); - clear_bit(PG_dc_clean, &page->flags); -} - - -/********************************************************************** - * Explicit Cache flush request from user space via syscall - * Needed for JITs which generate code on the fly - */ -SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags) -{ - /* TBD: optimize this */ - flush_cache_all(); - return 0; -} diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 12cc6485b218..6b85e94f3275 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -1,94 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* - * DMA Coherent API Notes - * - * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is - * implemented by accessintg it using a kernel virtual address, with - * Cache bit off in the TLB entry. - * - * The default DMA address == Phy address which is 0x8000_0000 based. - * A platform/device can make it zero based, by over-riding - * plat_{dma,kernel}_addr_to_{kernel,dma} */ -#include <linux/dma-mapping.h> -#include <linux/dma-debug.h> -#include <linux/export.h> +#include <linux/dma-map-ops.h> +#include <asm/cache.h> #include <asm/cacheflush.h> /* - * Helpers for Coherent DMA API. + * ARCH specific callbacks for generic noncoherent DMA ops + * - hardware IOC not available (or "dma-coherent" not set for device in DT) + * - But still handle both coherent and non-coherent requests from caller + * + * For DMA coherent hardware (IOC) generic code suffices */ -void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - void *paddr; - - /* This is linear addr (0x8000_0000 based) */ - paddr = alloc_pages_exact(size, gfp); - if (!paddr) - return NULL; - /* This is bus address, platform dependent */ - *dma_handle = plat_kernel_addr_to_dma(dev, paddr); - - return paddr; -} -EXPORT_SYMBOL(dma_alloc_noncoherent); - -void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) +void arch_dma_prep_coherent(struct page *page, size_t size) { - free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle), - size); + /* + * Evict any existing L1 and/or L2 lines for the backing page + * in case it was used earlier as a normal "cached" page. + * Yeah this bit us - STAR 9000898266 + * + * Although core does call flush_cache_vmap(), it gets kvaddr hence + * can't be used to efficiently flush L1 and/or L2 which need paddr + * Currently flush_cache_vmap nukes the L1 cache completely which + * will be optimized as a separate commit + */ + dma_cache_wback_inv(page_to_phys(page), size); } -EXPORT_SYMBOL(dma_free_noncoherent); -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - void *paddr, *kvaddr; +/* + * Cache operations depending on function and direction argument, inspired by + * https://lore.kernel.org/lkml/20180518175004.GF17671@n2100.armlinux.org.uk + * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20] + * dma-mapping: provide a generic dma-noncoherent implementation)" + * + * | map == for_device | unmap == for_cpu + * |---------------------------------------------------------------- + * TO_DEV | writeback writeback | none none + * FROM_DEV | invalidate invalidate | invalidate* invalidate* + * BIDIR | writeback+inv writeback+inv | invalidate invalidate + * + * [*] needed for CPU speculative prefetches + * + * NOTE: we don't check the validity of direction argument as it is done in + * upper layer functions (in include/linux/dma-mapping.h) + */ - /* This is linear addr (0x8000_0000 based) */ - paddr = alloc_pages_exact(size, gfp); - if (!paddr) - return NULL; +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + dma_cache_wback(paddr, size); + break; - /* This is kernel Virtual address (0x7000_0000 based) */ - kvaddr = ioremap_nocache((unsigned long)paddr, size); - if (kvaddr != NULL) - memset(kvaddr, 0, size); + case DMA_FROM_DEVICE: + dma_cache_inv(paddr, size); + break; - /* This is bus address, platform dependent */ - *dma_handle = plat_kernel_addr_to_dma(dev, paddr); + case DMA_BIDIRECTIONAL: + dma_cache_wback_inv(paddr, size); + break; - return kvaddr; + default: + break; + } } -EXPORT_SYMBOL(dma_alloc_coherent); -void dma_free_coherent(struct device *dev, size_t size, void *kvaddr, - dma_addr_t dma_handle) +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { - iounmap((void __force __iomem *)kvaddr); + switch (dir) { + case DMA_TO_DEVICE: + break; - free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle), - size); + /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */ + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + dma_cache_inv(paddr, size); + break; + + default: + break; + } } -EXPORT_SYMBOL(dma_free_coherent); /* - * Helper for streaming DMA... + * Plug in direct dma map ops. */ -void __arc_dma_cache_sync(unsigned long paddr, size_t size, - enum dma_data_direction dir) +void arch_setup_dma_ops(struct device *dev, bool coherent) { - __inline_dma_cache_sync(paddr, size, dir); + /* + * IOC hardware snoops all DMA traffic keeping the caches consistent + * with memory - eliding need for any explicit cache maintenance of + * DMA buffers. + */ + if (is_isa_arcv2() && ioc_enable && coherent) + dev->dma_coherent = true; + + dev_info(dev, "use %scoherent DMA ops\n", + dev->dma_coherent ? "" : "non"); } -EXPORT_SYMBOL(__arc_dma_cache_sync); diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c index aa652e281324..88fa3a4d4906 100644 --- a/arch/arc/mm/extable.c +++ b/arch/arc/mm/extable.c @@ -1,14 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Borrowed heavily from MIPS */ -#include <linux/module.h> +#include <linux/export.h> +#include <linux/extable.h> #include <linux/uaccess.h> int fixup_exception(struct pt_regs *regs) @@ -24,40 +22,3 @@ int fixup_exception(struct pt_regs *regs) return 0; } - -#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE - -long arc_copy_from_user_noinline(void *to, const void __user *from, - unsigned long n) -{ - return __arc_copy_from_user(to, from, n); -} -EXPORT_SYMBOL(arc_copy_from_user_noinline); - -long arc_copy_to_user_noinline(void __user *to, const void *from, - unsigned long n) -{ - return __arc_copy_to_user(to, from, n); -} -EXPORT_SYMBOL(arc_copy_to_user_noinline); - -unsigned long arc_clear_user_noinline(void __user *to, - unsigned long n) -{ - return __arc_clear_user(to, n); -} -EXPORT_SYMBOL(arc_clear_user_noinline); - -long arc_strncpy_from_user_noinline(char *dst, const char __user *src, - long count) -{ - return __arc_strncpy_from_user(dst, src, count); -} -EXPORT_SYMBOL(arc_strncpy_from_user_noinline); - -long arc_strnlen_user_noinline(const char __user *src, long n) -{ - return __arc_strnlen_user(src, n); -} -EXPORT_SYMBOL(arc_strnlen_user_noinline); -#endif diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 0fd1f0d515ff..95119a5e7761 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -1,49 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only /* Page Fault Handling for ARC (TLB Miss / ProtV) * * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/signal.h> #include <linux/interrupt.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/errno.h> #include <linux/ptrace.h> #include <linux/uaccess.h> #include <linux/kdebug.h> -#include <asm/pgalloc.h> +#include <linux/perf_event.h> +#include <linux/mm_types.h> +#include <asm/entry.h> #include <asm/mmu.h> -static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address) +/* + * kernel virtual address is required to implement vmalloc/pkmap/fixmap + * Refer to asm/processor.h for System Memory Map + * + * It simply copies the PMD entry (pointer to 2nd level page table or hugepage) + * from swapper pgdir to task pgdir. The 2nd level table/page is thus shared + */ +noinline static int handle_kernel_vaddr_fault(unsigned long address) { /* * Synchronize this task's top level page-table * with the 'reference' page table. */ pgd_t *pgd, *pgd_k; + p4d_t *p4d, *p4d_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; - pgd = pgd_offset_fast(mm, address); + pgd = pgd_offset(current->active_mm, address); pgd_k = pgd_offset_k(address); - if (!pgd_present(*pgd_k)) + if (pgd_none (*pgd_k)) goto bad_area; + if (!pgd_present(*pgd)) + set_pgd(pgd, *pgd_k); - pud = pud_offset(pgd, address); - pud_k = pud_offset(pgd_k, address); - if (!pud_present(*pud_k)) + p4d = p4d_offset(pgd, address); + p4d_k = p4d_offset(pgd_k, address); + if (p4d_none(*p4d_k)) goto bad_area; + if (!p4d_present(*p4d)) + set_p4d(p4d, *p4d_k); + + pud = pud_offset(p4d, address); + pud_k = pud_offset(p4d_k, address); + if (pud_none(*pud_k)) + goto bad_area; + if (!pud_present(*pud)) + set_pud(pud, *pud_k); pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); - if (!pmd_present(*pmd_k)) + if (pmd_none(*pmd_k)) goto bad_area; - - set_pmd(pmd, *pmd_k); + if (!pmd_present(*pmd)) + set_pmd(pmd, *pmd_k); /* XXX: create the TLB entry here */ return 0; @@ -52,178 +70,124 @@ bad_area: return 1; } -void do_page_fault(struct pt_regs *regs, unsigned long address) +void do_page_fault(unsigned long address, struct pt_regs *regs) { struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - siginfo_t info; - int fault, ret; - int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + int sig, si_code = SEGV_MAPERR; + unsigned int write = 0, exec = 0, mask; + vm_fault_t fault = VM_FAULT_SIGSEGV; /* handle_mm_fault() output */ + unsigned int flags; /* handle_mm_fault() input */ /* - * We fault-in kernel-space virtual memory on-demand. The - * 'reference' page table is init_mm.pgd. - * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ - if (address >= VMALLOC_START && address <= VMALLOC_END) { - ret = handle_vmalloc_fault(mm, address); - if (unlikely(ret)) - goto bad_area_nosemaphore; + if (address >= VMALLOC_START && !user_mode(regs)) { + if (unlikely(handle_kernel_vaddr_fault(address))) + goto no_context; else return; } - info.si_code = SEGV_MAPERR; - /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; + if (regs->ecr.cause & ECR_C_PROTV_STORE) /* ST/EX */ + write = 1; + else if ((regs->ecr.vec == ECR_V_PROTV) && + (regs->ecr.cause == ECR_C_PROTV_INST_FETCH)) + exec = 1; + + flags = FAULT_FLAG_DEFAULT; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (write) + flags |= FAULT_FLAG_WRITE; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: - down_read(&mm->mmap_sem); - vma = find_vma(mm, address); + vma = lock_mm_and_find_vma(mm, address, regs); if (!vma) - goto bad_area; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (expand_stack(vma, address)) - goto bad_area; + goto bad_area_nosemaphore; /* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. + * vm_area is good, now check permissions for this memory access */ -good_area: - info.si_code = SEGV_ACCERR; - - /* Handle protection violation, execute on heap or stack */ - - if ((regs->ecr_vec == ECR_V_PROTV) && - (regs->ecr_cause == ECR_C_PROTV_INST_FETCH)) + mask = VM_READ; + if (write) + mask = VM_WRITE; + if (exec) + mask = VM_EXEC; + + if (!(vma->vm_flags & mask)) { + si_code = SEGV_ACCERR; goto bad_area; - - if (write) { - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - } else { - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) - goto bad_area; } -survive: - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault. - */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); - /* If Pagefault was interrupted by SIGKILL, exit page fault "early" */ - if (unlikely(fatal_signal_pending(current))) { - if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY)) - up_read(&mm->mmap_sem); - if (user_mode(regs)) - return; - } - - if (likely(!(fault & VM_FAULT_ERROR))) { - if (flags & FAULT_FLAG_ALLOW_RETRY) { - /* To avoid updating stats twice for retry case */ - if (fault & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; - - if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; - flags |= FAULT_FLAG_TRIED; - goto retry; - } - } - - /* Fault Handled Gracefully */ - up_read(&mm->mmap_sem); + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + goto no_context; return; } - /* TBD: switch to pagefault_out_of_memory() */ - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - - /* no man's land */ - BUG(); + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; /* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. + * Fault retry nuances, mmap_lock already relinquished by core mm */ + if (unlikely(fault & VM_FAULT_RETRY)) { + flags |= FAULT_FLAG_TRIED; + goto retry; + } + bad_area: - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); bad_area_nosemaphore: - /* User mode accesses just cause a SIGSEGV */ - if (user_mode(regs)) { - tsk->thread.fault_address = address; - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void __user *)address; - force_sig_info(SIGSEGV, &info, tsk); - return; - } - -no_context: - /* Are we prepared to handle this kernel fault? - * - * (The kernel has valid exception-points in the source - * when it acesses user-memory. When it fails in one - * of those points, we find it in a table and do a jump - * to some fixup code that loads an appropriate error - * code) + /* + * Major/minor page fault accounting + * (in case of retry we only land here once) */ - if (fixup_exception(regs)) + if (likely(!(fault & VM_FAULT_ERROR))) + /* Normal return path: fault Handled Gracefully */ return; - die("Oops", regs, address); - -out_of_memory: - if (is_global_init(tsk)) { - yield(); - goto survive; - } - up_read(&mm->mmap_sem); + if (!user_mode(regs)) + goto no_context; - if (user_mode(regs)) { + if (fault & VM_FAULT_OOM) { pagefault_out_of_memory(); return; } - goto no_context; + if (fault & VM_FAULT_SIGBUS) { + sig = SIGBUS; + si_code = BUS_ADRERR; + } + else { + sig = SIGSEGV; + } -do_sigbus: - up_read(&mm->mmap_sem); + tsk->thread.fault_address = address; + force_sig_fault(sig, si_code, (void __user *)address); + return; - if (!user_mode(regs)) - goto no_context; +no_context: + if (fixup_exception(regs)) + return; - tsk->thread.fault_address = address; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, tsk); + die("Oops", regs, address); } diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c new file mode 100644 index 000000000000..c79912a6b196 --- /dev/null +++ b/arch/arc/mm/highmem.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + */ + +#include <linux/memblock.h> +#include <linux/export.h> +#include <linux/highmem.h> +#include <linux/pgtable.h> +#include <asm/processor.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> + +/* + * HIGHMEM API: + * + * kmap() API provides sleep semantics hence referred to as "permanent maps" + * It allows mapping LAST_PKMAP pages, using @last_pkmap_nr as the cursor + * for book-keeping + * + * kmap_atomic() can't sleep (calls pagefault_disable()), thus it provides + * shortlived ala "temporary mappings" which historically were implemented as + * fixmaps (compile time addr etc). Their book-keeping is done per cpu. + * + * Both these facts combined (preemption disabled and per-cpu allocation) + * means the total number of concurrent fixmaps will be limited to max + * such allocations in a single control path. Thus KM_TYPE_NR (another + * historic relic) is a small'ish number which caps max percpu fixmaps + * + * ARC HIGHMEM Details + * + * - the kernel vaddr space from 0x7z to 0x8z (currently used by vmalloc/module) + * is now shared between vmalloc and kmap (non overlapping though) + * + * - Both fixmap/pkmap use a dedicated page table each, hooked up to swapper PGD + * This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which means + * 2M of kvaddr space for typical config (8K page and 11:8:13 traversal split) + * + * - The fixed KMAP slots for kmap_local/atomic() require KM_MAX_IDX slots per + * CPU. So the number of CPUs sharing a single PTE page is limited. + * + * - pkmap being preemptible, in theory could do with more than 256 concurrent + * mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse + * the PGD and only works with a single page table @pkmap_page_table, hence + * sets the limit + */ + +extern pte_t * pkmap_page_table; + +static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr) +{ + pmd_t *pmd_k = pmd_off_k(kvaddr); + pte_t *pte_k; + + pte_k = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + if (!pte_k) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + + pmd_populate_kernel(&init_mm, pmd_k, pte_k); + return pte_k; +} + +void __init kmap_init(void) +{ + /* Due to recursive include hell, we can't do this in processor.h */ + BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE)); + BUILD_BUG_ON(LAST_PKMAP > PTRS_PER_PTE); + BUILD_BUG_ON(FIX_KMAP_SLOTS > PTRS_PER_PTE); + + pkmap_page_table = alloc_kmap_pgtable(PKMAP_BASE); + alloc_kmap_pgtable(FIXMAP_BASE); +} diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index a08ce7185423..a73cc94f806e 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -1,36 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/bootmem.h> #include <linux/memblock.h> +#ifdef CONFIG_BLK_DEV_INITRD +#include <linux/initrd.h> +#endif +#include <linux/of_fdt.h> #include <linux/swap.h> #include <linux/module.h> +#include <linux/highmem.h> #include <asm/page.h> -#include <asm/pgalloc.h> #include <asm/sections.h> +#include <asm/setup.h> #include <asm/arcregs.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE); char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE); EXPORT_SYMBOL(empty_zero_page); -/* Default tot mem from .config */ -static unsigned long arc_mem_sz = 0x20000000; /* some default */ +static const unsigned long low_mem_start = CONFIG_LINUX_RAM_BASE; +static unsigned long low_mem_sz; + +#ifdef CONFIG_HIGHMEM +static unsigned long min_high_pfn, max_high_pfn; +static phys_addr_t high_mem_start; +static phys_addr_t high_mem_sz; +unsigned long arch_pfn_offset; +EXPORT_SYMBOL(arch_pfn_offset); +#endif + +long __init arc_get_mem_sz(void) +{ + return low_mem_sz; +} /* User can over-ride above with "mem=nnn[KkMm]" in cmdline */ static int __init setup_mem_sz(char *str) { - arc_mem_sz = memparse(str, NULL) & PAGE_MASK; + low_mem_sz = memparse(str, NULL) & PAGE_MASK; /* early console might not be setup yet - it will show up later */ - pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(arc_mem_sz)); + pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(low_mem_sz)); return 0; } @@ -38,8 +52,27 @@ early_param("mem", setup_mem_sz); void __init early_init_dt_add_memory_arch(u64 base, u64 size) { - arc_mem_sz = size & PAGE_MASK; - pr_info("Memory size set via devicetree %ldM\n", TO_MB(arc_mem_sz)); + int in_use = 0; + + if (!low_mem_sz) { + if (base != low_mem_start) + panic("CONFIG_LINUX_RAM_BASE != DT memory { }"); + + low_mem_sz = size; + in_use = 1; + memblock_add_node(base, size, 0, MEMBLOCK_NONE); + } else { +#ifdef CONFIG_HIGHMEM + high_mem_start = base; + high_mem_sz = size; + in_use = 1; + memblock_add_node(base, size, 1, MEMBLOCK_NONE); + memblock_reserve(base, size); +#endif + } + + pr_info("Memory @ %llx [%lldM] %s\n", + base, TO_MB(size), !in_use ? "Not used":""); } /* @@ -50,86 +83,97 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) */ void __init setup_arch_memory(void) { - unsigned long zones_size[MAX_NR_ZONES] = { 0, 0 }; - unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz; + unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 }; - init_mm.start_code = (unsigned long)_text; - init_mm.end_code = (unsigned long)_etext; - init_mm.end_data = (unsigned long)_edata; - init_mm.brk = (unsigned long)_end; - - /* - * We do it here, so that memory is correctly instantiated - * even if "mem=xxx" cmline over-ride is given and/or - * DT has memory node. Each causes an update to @arc_mem_sz - * and we finally add memory one here - */ - memblock_add(CONFIG_LINUX_LINK_BASE, arc_mem_sz); - - /*------------- externs in mm need setting up ---------------*/ + setup_initial_init_mm(_text, _etext, _edata, _end); /* first page of system - kernel .vector starts here */ - min_low_pfn = PFN_DOWN(CONFIG_LINUX_LINK_BASE); + min_low_pfn = virt_to_pfn((void *)CONFIG_LINUX_RAM_BASE); - /* Last usable page of low mem (no HIGHMEM yet for ARC port) */ - max_low_pfn = max_pfn = PFN_DOWN(end_mem); + /* Last usable page of low mem */ + max_low_pfn = max_pfn = PFN_DOWN(low_mem_start + low_mem_sz); - max_mapnr = max_low_pfn - min_low_pfn; + /*------------- bootmem allocator setup -----------------------*/ + + /* + * seed the bootmem allocator after any DT memory node parsing or + * "mem=xxx" cmdline overrides have potentially updated @arc_mem_sz + * + * Only low mem is added, otherwise we have crashes when allocating + * mem_map[] itself. NO_BOOTMEM allocates mem_map[] at the end of + * avail memory, ending in highmem with a > 32-bit address. However + * it then tries to memset it with a truncaed 32-bit handle, causing + * the crash + */ - /*------------- reserve kernel image -----------------------*/ memblock_reserve(CONFIG_LINUX_LINK_BASE, __pa(_end) - CONFIG_LINUX_LINK_BASE); +#ifdef CONFIG_BLK_DEV_INITRD + if (phys_initrd_size) { + memblock_reserve(phys_initrd_start, phys_initrd_size); + initrd_start = (unsigned long)__va(phys_initrd_start); + initrd_end = initrd_start + phys_initrd_size; + } +#endif + + early_init_fdt_reserve_self(); + early_init_fdt_scan_reserved_mem(); + memblock_dump_all(); - /*-------------- node setup --------------------------------*/ - memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; + /*----------------- node/zones setup --------------------------*/ + max_zone_pfn[ZONE_NORMAL] = max_low_pfn; +#ifdef CONFIG_HIGHMEM /* - * We can't use the helper free_area_init(zones[]) because it uses - * PAGE_OFFSET to compute the @min_low_pfn which would be wrong - * when our kernel doesn't start at PAGE_OFFSET, i.e. - * PAGE_OFFSET != CONFIG_LINUX_LINK_BASE + * On ARC (w/o PAE) HIGHMEM addresses are actually smaller (0 based) + * than addresses in normal aka low memory (0x8000_0000 based). + * Even with PAE, the huge peripheral space hole would waste a lot of + * mem with single contiguous mem_map[]. + * Thus when HIGHMEM on ARC is enabled the memory map corresponding + * to the hole is freed and ARC specific version of pfn_valid() + * handles the hole in the memory map. */ - free_area_init_node(0, /* node-id */ - zones_size, /* num pages per zone */ - min_low_pfn, /* first pfn of node */ - NULL); /* NO holes */ -} -/* - * mem_init - initializes memory - * - * Frees up bootmem - * Calculates and displays memory available/used - */ -void __init mem_init(void) -{ - high_memory = (void *)(CONFIG_LINUX_LINK_BASE + arc_mem_sz); - free_all_bootmem(); - mem_init_print_info(NULL); -} + min_high_pfn = PFN_DOWN(high_mem_start); + max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz); -/* - * free_initmem: Free all the __init memory. - */ -void __init_refok free_initmem(void) -{ - free_initmem_default(-1); + /* + * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE. + * For HIGHMEM without PAE max_high_pfn should be less than + * min_low_pfn to guarantee that these two regions don't overlap. + * For PAE case highmem is greater than lowmem, so it is natural + * to use max_high_pfn. + * + * In both cases, holes should be handled by pfn_valid(). + */ + max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn; + + arch_pfn_offset = min(min_low_pfn, min_high_pfn); + kmap_init(); +#endif /* CONFIG_HIGHMEM */ + + free_area_init(max_zone_pfn); } -#ifdef CONFIG_BLK_DEV_INITRD -void __init free_initrd_mem(unsigned long start, unsigned long end) +void __init arch_mm_preinit(void) { - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} +#ifdef CONFIG_HIGHMEM + memblock_phys_free(high_mem_start, high_mem_sz); #endif -#ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) + BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE); + BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE); + BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE); + BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE); +} + +#ifdef CONFIG_HIGHMEM +int pfn_valid(unsigned long pfn) { - pr_err("%s(%lx, %lx)\n", __func__, start, end); + return (pfn >= min_high_pfn && pfn <= max_high_pfn) || + (pfn >= min_low_pfn && pfn <= max_low_pfn); } -#endif /* CONFIG_OF_FLATTREE */ +EXPORT_SYMBOL(pfn_valid); +#endif diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index 739e65f355de..fd8897a0e52c 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/vmalloc.h> @@ -11,81 +8,55 @@ #include <linux/module.h> #include <linux/io.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/cache.h> -void __iomem *ioremap(unsigned long paddr, unsigned long size) +static inline bool arc_uncached_addr_space(phys_addr_t paddr) { - unsigned long end; + if (is_isa_arcompact()) { + if (paddr >= ARC_UNCACHED_ADDR_SPACE) + return true; + } else if (paddr >= perip_base && paddr <= perip_end) { + return true; + } - /* Don't allow wraparound or zero size */ - end = paddr + size - 1; - if (!size || (end < paddr)) - return NULL; + return false; +} - /* If the region is h/w uncached, avoid MMU mappings */ - if (paddr >= ARC_UNCACHED_ADDR_SPACE) - return (void __iomem *)paddr; +void __iomem *ioremap(phys_addr_t paddr, unsigned long size) +{ + /* + * If the region is h/w uncached, MMU mapping can be elided as optim + * The cast to u32 is fine as this region can only be inside 4GB + */ + if (arc_uncached_addr_space(paddr)) + return (void __iomem *)(u32)paddr; - return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE); + return ioremap_prot(paddr, size, + pgprot_noncached(PAGE_KERNEL)); } EXPORT_SYMBOL(ioremap); /* * ioremap with access flags * Cache semantics wise it is same as ioremap - "forced" uncached. - * However unline vanilla ioremap which bypasses ARC MMU for addresses in + * However unlike vanilla ioremap which bypasses ARC MMU for addresses in * ARC hardware uncached region, this one still goes thru the MMU as caller * might need finer access control (R/W/X) */ -void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, - unsigned long flags) +void __iomem *ioremap_prot(phys_addr_t paddr, size_t size, + pgprot_t prot) { - void __iomem *vaddr; - struct vm_struct *area; - unsigned long off, end; - pgprot_t prot = __pgprot(flags); - - /* Don't allow wraparound, zero size */ - end = paddr + size - 1; - if ((!size) || (end < paddr)) - return NULL; - - /* An early platform driver might end up here */ - if (!slab_is_available()) - return NULL; - /* force uncached */ - prot = pgprot_noncached(prot); - - /* Mappings have to be page-aligned */ - off = paddr & ~PAGE_MASK; - paddr &= PAGE_MASK; - size = PAGE_ALIGN(end + 1) - paddr; - - /* - * Ok, go for it.. - */ - area = get_vm_area(size, VM_IOREMAP); - if (!area) - return NULL; - area->phys_addr = paddr; - vaddr = (void __iomem *)area->addr; - if (ioremap_page_range((unsigned long)vaddr, - (unsigned long)vaddr + size, paddr, prot)) { - vunmap((void __force *)vaddr); - return NULL; - } - return (void __iomem *)(off + (char __iomem *)vaddr); + return generic_ioremap_prot(paddr, size, pgprot_noncached(prot)); } EXPORT_SYMBOL(ioremap_prot); - -void iounmap(const void __iomem *addr) +void iounmap(volatile void __iomem *addr) { - if (addr >= (void __force __iomem *)ARC_UNCACHED_ADDR_SPACE) + /* weird double cast to handle phys_addr_t > 32 bits */ + if (arc_uncached_addr_space((phys_addr_t)(u32)addr)) return; - vfree((void *)(PAGE_MASK & (unsigned long __force)addr)); + generic_iounmap(addr); } EXPORT_SYMBOL(iounmap); diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c index 2e06d56e987b..2185afe8d59f 100644 --- a/arch/arc/mm/mmap.c +++ b/arch/arc/mm/mmap.c @@ -1,24 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * ARC700 mmap * * (started from arm version - for VIPT alias handling) * * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/mm.h> #include <linux/mman.h> -#include <linux/sched.h> -#include <asm/cacheflush.h> +#include <linux/sched/mm.h> -#define COLOUR_ALIGN(addr, pgoff) \ - ((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + \ - (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1))) +#include <asm/cacheflush.h> /* * Ensure that shared mappings are correctly aligned to @@ -29,25 +23,18 @@ */ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) + unsigned long len, unsigned long pgoff, + unsigned long flags, vm_flags_t vm_flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - int do_align = 0; - int aliasing = cache_is_vipt_aliasing(); - struct vm_unmapped_area_info info; - - /* - * We only need to do colour alignment if D cache aliases. - */ - if (aliasing) - do_align = filp || (flags & MAP_SHARED); + struct vm_unmapped_area_info info = {}; /* * We enforce the MAP_FIXED case. */ if (flags & MAP_FIXED) { - if (aliasing && flags & MAP_SHARED && + if (flags & MAP_SHARED && (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) return -EINVAL; return addr; @@ -57,22 +44,37 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, return -ENOMEM; if (addr) { - if (do_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); + addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } - info.flags = 0; info.length = len; info.low_limit = mm->mmap_base; info.high_limit = TASK_SIZE; - info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; info.align_offset = pgoff << PAGE_SHIFT; return vm_unmapped_area(&info); } + +static const pgprot_t protection_map[16] = { + [VM_NONE] = PAGE_U_NONE, + [VM_READ] = PAGE_U_R, + [VM_WRITE] = PAGE_U_R, + [VM_WRITE | VM_READ] = PAGE_U_R, + [VM_EXEC] = PAGE_U_X_R, + [VM_EXEC | VM_READ] = PAGE_U_X_R, + [VM_EXEC | VM_WRITE] = PAGE_U_X_R, + [VM_EXEC | VM_WRITE | VM_READ] = PAGE_U_X_R, + [VM_SHARED] = PAGE_U_NONE, + [VM_SHARED | VM_READ] = PAGE_U_R, + [VM_SHARED | VM_WRITE] = PAGE_U_W_R, + [VM_SHARED | VM_WRITE | VM_READ] = PAGE_U_W_R, + [VM_SHARED | VM_EXEC] = PAGE_U_X_R, + [VM_SHARED | VM_EXEC | VM_READ] = PAGE_U_X_R, + [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_U_X_W_R, + [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_U_X_W_R +}; +DECLARE_VM_GET_PAGE_PROT diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 7957dc4e4d4a..ed6915ba76ec 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -1,207 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * TLB Management (flush/create/diagnostics) for ARC700 + * TLB Management (flush/create/diagnostics) for MMUv3 and MMUv4 * * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * vineetg: Aug 2011 - * -Reintroduce duplicate PD fixup - some customer chips still have the issue - * - * vineetg: May 2011 - * -No need to flush_cache_page( ) for each call to update_mmu_cache() - * some of the LMBench tests improved amazingly - * = page-fault thrice as fast (75 usec to 28 usec) - * = mmap twice as fast (9.6 msec to 4.6 msec), - * = fork (5.3 msec to 3.7 msec) - * - * vineetg: April 2011 : - * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, - * helps avoid a shift when preparing PD0 from PTE - * - * vineetg: April 2011 : Preparing for MMU V3 - * -MMU v2/v3 BCRs decoded differently - * -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512 - * -tlb_entry_erase( ) can be void - * -local_flush_tlb_range( ): - * = need not "ceil" @end - * = walks MMU only if range spans < 32 entries, as opposed to 256 - * - * Vineetg: Sept 10th 2008 - * -Changes related to MMU v2 (Rel 4.8) - * - * Vineetg: Aug 29th 2008 - * -In TLB Flush operations (Metal Fix MMU) there is a explict command to - * flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd, - * it fails. Thus need to load it with ANY valid value before invoking - * TLBIVUTLB cmd - * - * Vineetg: Aug 21th 2008: - * -Reduced the duration of IRQ lockouts in TLB Flush routines - * -Multiple copies of TLB erase code seperated into a "single" function - * -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID - * in interrupt-safe region. - * - * Vineetg: April 23rd Bug #93131 - * Problem: tlb_flush_kernel_range() doesnt do anything if the range to - * flush is more than the size of TLB itself. - * - * Rahul Trivedi : Codito Technologies 2004 */ #include <linux/module.h> +#include <linux/bug.h> +#include <linux/mm_types.h> + #include <asm/arcregs.h> #include <asm/setup.h> #include <asm/mmu_context.h> #include <asm/mmu.h> -/* Need for ARC MMU v2 - * - * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc. - * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages - * map into same set, there would be contention for the 2 ways causing severe - * Thrashing. - * - * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has - * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways. - * Given this, the thrasing problem should never happen because once the 3 - * J-TLB entries are created (even though 3rd will knock out one of the prev - * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy - * - * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs. - * This is a simple design for keeping them in sync. So what do we do? - * The solution which James came up was pretty neat. It utilised the assoc - * of uTLBs by not invalidating always but only when absolutely necessary. - * - * - Existing TLB commands work as before - * - New command (TLBWriteNI) for TLB write without clearing uTLBs - * - New command (TLBIVUTLB) to invalidate uTLBs. - * - * The uTLBs need only be invalidated when pages are being removed from the - * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB - * as a result of a miss, the removed entry is still allowed to exist in the - * uTLBs as it is still valid and present in the OS page table. This allows the - * full associativity of the uTLBs to hide the limited associativity of the main - * TLB. - * - * During a miss handler, the new "TLBWriteNI" command is used to load - * entries without clearing the uTLBs. - * - * When the OS page table is updated, TLB entries that may be associated with a - * removed page are removed (flushed) from the TLB using TLBWrite. In this - * circumstance, the uTLBs must also be cleared. This is done by using the - * existing TLBWrite command. An explicit IVUTLB is also required for those - * corner cases when TLBWrite was not executed at all because the corresp - * J-TLB entry got evicted/replaced. - */ - - /* A copy of the ASID from the PID reg is kept in asid_cache */ -int asid_cache = FIRST_ASID; +DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; -/* ASID to mm struct mapping. We have one extra entry corresponding to - * NO_ASID to save us a compare when clearing the mm entry for old asid - * see get_new_mmu_context (asm-arc/mmu_context.h) - */ -struct mm_struct *asid_mm_map[NUM_ASID + 1]; +static struct cpuinfo_arc_mmu { + unsigned int ver, pg_sz_k, s_pg_sz_m, pae, sets, ways; +} mmuinfo; /* * Utility Routine to erase a J-TLB entry - * The procedure is to look it up in the MMU. If found, ERASE it by - * issuing a TlbWrite CMD with PD0 = PD1 = 0 + * Caller needs to setup Index Reg (manually or via getIndex) */ - -static void __tlb_entry_erase(void) +static inline void __tlb_entry_erase(void) { write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } -static void tlb_entry_erase(unsigned int vaddr_n_asid) +static void utlb_invalidate(void) +{ + write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB); +} + +#ifdef CONFIG_ARC_MMU_V3 + +static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) { unsigned int idx; - /* Locate the TLB entry for this vaddr + ASID */ write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe); idx = read_aux_reg(ARC_REG_TLBINDEX); + return idx; +} + +static void tlb_entry_erase(unsigned int vaddr_n_asid) +{ + unsigned int idx; + + /* Locate the TLB entry for this vaddr + ASID */ + idx = tlb_entry_lkup(vaddr_n_asid); + /* No error means entry found, zero it out */ if (likely(!(idx & TLB_LKUP_ERR))) { __tlb_entry_erase(); - } else { /* Some sort of Error */ - + } else { /* Duplicate entry error */ - if (idx & 0x1) { - /* TODO we need to handle this case too */ - pr_emerg("unhandled Duplicate flush for %x\n", - vaddr_n_asid); - } - /* else entry not found so nothing to do */ + WARN(idx == TLB_DUP_ERR, "Probe returned Dup PD for %x\n", + vaddr_n_asid); } } -/**************************************************************************** - * ARC700 MMU caches recently used J-TLB entries (RAM) as uTLBs (FLOPs) - * - * New IVUTLB cmd in MMU v2 explictly invalidates the uTLB - * - * utlb_invalidate ( ) - * -For v2 MMU calls Flush uTLB Cmd - * -For v1 MMU does nothing (except for Metal Fix v1 MMU) - * This is because in v1 TLBWrite itself invalidate uTLBs - ***************************************************************************/ - -static void utlb_invalidate(void) +static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1) { -#if (CONFIG_ARC_MMU_VER >= 2) - -#if (CONFIG_ARC_MMU_VER < 3) - /* MMU v2 introduced the uTLB Flush command. - * There was however an obscure hardware bug, where uTLB flush would - * fail when a prior probe for J-TLB (both totally unrelated) would - * return lkup err - because the entry didnt exist in MMU. - * The Workround was to set Index reg with some valid value, prior to - * flush. This was fixed in MMU v3 hence not needed any more - */ unsigned int idx; - /* make sure INDEX Reg is valid */ - idx = read_aux_reg(ARC_REG_TLBINDEX); + /* + * First verify if entry for this vaddr+ASID already exists + * This also sets up PD0 (vaddr, ASID..) for final commit + */ + idx = tlb_entry_lkup(pd0); - /* If not write some dummy val */ - if (unlikely(idx & TLB_LKUP_ERR)) - write_aux_reg(ARC_REG_TLBINDEX, 0xa); -#endif + /* + * If Not already present get a free slot from MMU. + * Otherwise, Probe would have located the entry and set INDEX Reg + * with existing location. This will cause Write CMD to over-write + * existing entry with new PD0 and PD1 + */ + if (likely(idx & TLB_LKUP_ERR)) + write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex); - write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB); -#endif + /* setup the other half of TLB entry (pfn, rwx..) */ + write_aux_reg(ARC_REG_TLBPD1, pd1); + /* + * Commit the Entry to MMU + * It doesn't sound safe to use the TLBWriteNI cmd here + * which doesn't flush uTLBs. I'd rather be safe than sorry. + */ + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); +} + +#else /* MMUv4 */ + +static void tlb_entry_erase(unsigned int vaddr_n_asid) +{ + write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); } +static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1) +{ + write_aux_reg(ARC_REG_TLBPD0, pd0); + + if (!is_pae40_enabled()) { + write_aux_reg(ARC_REG_TLBPD1, pd1); + } else { + write_aux_reg(ARC_REG_TLBPD1, pd1 & 0xFFFFFFFF); + write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32); + } + + write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); +} + +#endif + /* * Un-conditionally (without lookup) erase the entire MMU contents */ noinline void local_flush_tlb_all(void) { + struct cpuinfo_arc_mmu *mmu = &mmuinfo; unsigned long flags; unsigned int entry; - struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + int num_tlb = mmu->sets * mmu->ways; local_irq_save(flags); /* Load PD0 and PD1 with template for a Blank Entry */ write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); - for (entry = 0; entry < mmu->num_tlb; entry++) { + for (entry = 0; entry < num_tlb; entry++) { /* write this entry to the TLB */ write_aux_reg(ARC_REG_TLBINDEX, entry); - write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWriteNI); + } + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + const int stlb_idx = 0x800; + + /* Blank sTLB entry */ + write_aux_reg(ARC_REG_TLBPD0, _PAGE_HW_SZ); + + for (entry = stlb_idx; entry < stlb_idx + 16; entry++) { + write_aux_reg(ARC_REG_TLBINDEX, entry); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWriteNI); + } } utlb_invalidate(); @@ -210,7 +172,7 @@ noinline void local_flush_tlb_all(void) } /* - * Flush the entrie MM for userland. The fastest way is to move to Next ASID + * Flush the entire MM for userland. The fastest way is to move to Next ASID */ noinline void local_flush_tlb_mm(struct mm_struct *mm) { @@ -224,13 +186,14 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm) return; /* - * Workaround for Android weirdism: - * A binder VMA could end up in a task such that vma->mm != tsk->mm - * old code would cause h/w - s/w ASID to get out of sync + * - Move to a new ASID, but only if the mm is still wired in + * (Android Binder ended up calling this for vma->mm != tsk->mm, + * causing h/w - s/w ASID to get out of sync) + * - Also get_new_mmu_context() new implementation allocates a new + * ASID only if it is not allocated already - so unallocate first */ - if (current->mm != mm) - destroy_context(mm); - else + destroy_context(mm); + if (current->mm == mm) get_new_mmu_context(mm); } @@ -240,16 +203,16 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm) * Difference between this and Kernel Range Flush is * -Here the fastest way (if range is too large) is to move to next ASID * without doing any explicit Shootdown - * -In case of kernel Flush, entry has to be shot down explictly + * -In case of kernel Flush, entry has to be shot down explicitly */ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + const unsigned int cpu = smp_processor_id(); unsigned long flags; - unsigned int asid; /* If range @start to @end is more than 32 TLB entries deep, - * its better to move to a new ASID rather than searching for + * it's better to move to a new ASID rather than searching for * individual entries and then shooting them down * * The calc above is rough, doesn't account for unaligned parts, @@ -268,17 +231,14 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, start &= PAGE_MASK; local_irq_save(flags); - asid = vma->vm_mm->context.asid; - if (asid != NO_ASID) { + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { while (start < end) { - tlb_entry_erase(start | (asid & 0xff)); + tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu)); start += PAGE_SIZE; } } - utlb_invalidate(); - local_irq_restore(flags); } @@ -307,8 +267,6 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) start += PAGE_SIZE; } - utlb_invalidate(); - local_irq_restore(flags); } @@ -319,6 +277,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { + const unsigned int cpu = smp_processor_id(); unsigned long flags; /* Note that it is critical that interrupts are DISABLED between @@ -326,23 +285,118 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) */ local_irq_save(flags); - if (vma->vm_mm->context.asid != NO_ASID) { - tlb_entry_erase((page & PAGE_MASK) | - (vma->vm_mm->context.asid & 0xff)); - utlb_invalidate(); + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { + tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu)); } local_irq_restore(flags); } +#ifdef CONFIG_SMP + +struct tlb_args { + struct vm_area_struct *ta_vma; + unsigned long ta_start; + unsigned long ta_end; +}; + +static inline void ipi_flush_tlb_page(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_tlb_page(ta->ta_vma, ta->ta_start); +} + +static inline void ipi_flush_tlb_range(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void ipi_flush_pmd_tlb_range(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_pmd_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} +#endif + +static inline void ipi_flush_tlb_kernel_range(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end); +} + +void flush_tlb_all(void) +{ + on_each_cpu((smp_call_func_t)local_flush_tlb_all, NULL, 1); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm, + mm, 1); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = uaddr + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = start, + .ta_end = end + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = start, + .ta_end = end + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_pmd_tlb_range, &ta, 1); +} +#endif + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + struct tlb_args ta = { + .ta_start = start, + .ta_end = end + }; + + on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); +} +#endif + /* * Routine to create a TLB entry */ -void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +static void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) { unsigned long flags; - unsigned int idx, asid_or_sasid; - unsigned long pd0_flags; + unsigned int asid_or_sasid, rwx; + unsigned long pd0; + phys_addr_t pd1; /* * create_tlb() assumes that current->mm == vma->mm, since @@ -351,11 +405,10 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) * * Removing the assumption involves * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg. - * -Fix the TLB paranoid debug code to not trigger false negatives. * -More importantly it makes this handler inconsistent with fast-path * TLB Refill handler which always deals with "current" * - * Lets see the use cases when current->mm != vma->mm and we land here + * Let's see the use cases when current->mm != vma->mm and we land here * 1. execve->copy_strings()->__get_user_pages->handle_mm_fault * Here VM wants to pre-install a TLB entry for user stack while * current->mm still points to pre-execve mm (hence the condition). @@ -374,47 +427,35 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) local_irq_save(flags); - tlb_paranoid_check(vma->vm_mm->context.asid, address); - - address &= PAGE_MASK; + vaddr &= PAGE_MASK; /* update this PTE credentials */ pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED); - /* Create HW TLB entry Flags (in PD0) from PTE Flags */ -#if (CONFIG_ARC_MMU_VER <= 2) - pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0) >> 1); -#else - pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0)); -#endif + /* Create HW TLB(PD0,PD1) from PTE */ /* ASID for this task */ asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff; - write_aux_reg(ARC_REG_TLBPD0, address | pd0_flags | asid_or_sasid); - - /* Load remaining info in PD1 (Page Frame Addr and Kx/Kw/Kr Flags) */ - write_aux_reg(ARC_REG_TLBPD1, (pte_val(*ptep) & PTE_BITS_IN_PD1)); - - /* First verify if entry for this vaddr+ASID already exists */ - write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe); - idx = read_aux_reg(ARC_REG_TLBINDEX); + pd0 = vaddr | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); /* - * If Not already present get a free slot from MMU. - * Otherwise, Probe would have located the entry and set INDEX Reg - * with existing location. This will cause Write CMD to over-write - * existing entry with new PD0 and PD1 + * ARC MMU provides fully orthogonal access bits for K/U mode, + * however Linux only saves 1 set to save PTE real-estate + * Here we convert 3 PTE bits into 6 MMU bits: + * -Kernel only entries have Kr Kw Kx 0 0 0 + * -User entries have mirrored K and U bits */ - if (likely(idx & TLB_LKUP_ERR)) - write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex); + rwx = pte_val(*ptep) & PTE_BITS_RWX; - /* - * Commit the Entry to MMU - * It doesnt sound safe to use the TLBWriteNI cmd here - * which doesn't flush uTLBs. I'd rather be safe than sorry. - */ - write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + if (pte_val(*ptep) & _PAGE_GLOBAL) + rwx <<= 3; /* r w x => Kr Kw Kx 0 0 0 */ + else + rwx |= (rwx << 3); /* r w x => Kr Kw Kx Ur Uw Ux */ + + pd1 = rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1); + + tlb_entry_insert(pd0, pd1); local_irq_restore(flags); } @@ -428,146 +469,199 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) * Note that flush (when done) involves both WBACK - so physical page is * in sync as well as INV - so any non-congruent aliases don't remain */ -void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, - pte_t *ptep) +void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, + unsigned long vaddr_unaligned, pte_t *ptep, unsigned int nr) { unsigned long vaddr = vaddr_unaligned & PAGE_MASK; - unsigned long paddr = pte_val(*ptep) & PAGE_MASK; + phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS; struct page *page = pfn_to_page(pte_pfn(*ptep)); create_tlb(vma, vaddr, ptep); - if (page == ZERO_PAGE(0)) { + if (page == ZERO_PAGE(0)) return; - } /* - * Exec page : Independent of aliasing/page-color considerations, - * since icache doesn't snoop dcache on ARC, any dirty - * K-mapping of a code page needs to be wback+inv so that - * icache fetch by userspace sees code correctly. - * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it - * so userspace sees the right data. - * (Avoids the flush for Non-exec + congruent mapping case) + * For executable pages, since icache doesn't snoop dcache, any + * dirty K-mapping of a code page needs to be wback+inv so that + * icache fetch by userspace sees code correctly. */ - if ((vma->vm_flags & VM_EXEC) || - addr_not_cache_congruent(paddr, vaddr)) { - - int dirty = !test_and_set_bit(PG_dc_clean, &page->flags); + if (vma->vm_flags & VM_EXEC) { + struct folio *folio = page_folio(page); + int dirty = !test_and_set_bit(PG_dc_clean, &folio->flags.f); if (dirty) { - /* wback + inv dcache lines */ - __flush_dcache_page(paddr, paddr); - - /* invalidate any existing icache lines */ + unsigned long offset = offset_in_folio(folio, paddr); + nr = folio_nr_pages(folio); + paddr -= offset; + vaddr -= offset; + /* wback + inv dcache lines (K-mapping) */ + __flush_dcache_pages(paddr, paddr, nr); + + /* invalidate any existing icache lines (U-mapping) */ if (vma->vm_flags & VM_EXEC) - __inv_icache_page(paddr, vaddr); + __inv_icache_pages(paddr, vaddr, nr); } } } -/* Read the Cache Build Confuration Registers, Decode them and save into - * the cpuinfo structure for later use. - * No Validation is done here, simply read/convert the BCRs +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +/* + * MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP + * support. + * + * Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a + * new bit "SZ" in TLB page descriptor to distinguish between them. + * Super Page size is configurable in hardware (4K to 16M), but fixed once + * RTL builds. + * + * The exact THP size a Linux configuration will support is a function of: + * - MMU page size (typical 8K, RTL fixed) + * - software page walker address split between PGD:PTE:PFN (typical + * 11:8:13, but can be changed with 1 line) + * So for above default, THP size supported is 8K * (2^8) = 2M + * + * Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime + * reduces to 1 level (as PTE is folded into PGD and canonically referred + * to as PMD). + * Thus THP PMD accessors are implemented in terms of PTE (just like sparc) */ -void read_decode_mmu_bcr(void) + +void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd) { - struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; - unsigned int tmp; - struct bcr_mmu_1_2 { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8; -#else - unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8; -#endif - } *mmu2; - - struct bcr_mmu_3 { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4, - u_itlb:4, u_dtlb:4; -#else - unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4, - ways:4, ver:8; -#endif - } *mmu3; - - tmp = read_aux_reg(ARC_REG_MMU_BCR); - mmu->ver = (tmp >> 24); - - if (mmu->ver <= 2) { - mmu2 = (struct bcr_mmu_1_2 *)&tmp; - mmu->pg_sz = PAGE_SIZE; - mmu->sets = 1 << mmu2->sets; - mmu->ways = 1 << mmu2->ways; - mmu->u_dtlb = mmu2->u_dtlb; - mmu->u_itlb = mmu2->u_itlb; - } else { - mmu3 = (struct bcr_mmu_3 *)&tmp; - mmu->pg_sz = 512 << mmu3->pg_sz; - mmu->sets = 1 << mmu3->sets; - mmu->ways = 1 << mmu3->ways; - mmu->u_dtlb = mmu3->u_dtlb; - mmu->u_itlb = mmu3->u_itlb; + pte_t pte = __pte(pmd_val(*pmd)); + update_mmu_cache_range(NULL, vma, addr, &pte, HPAGE_PMD_NR); +} + +void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + unsigned int cpu; + unsigned long flags; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + if (likely(asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID)) { + unsigned int asid = hw_pid(vma->vm_mm, cpu); + + /* No need to loop here: this will always be for 1 Huge Page */ + tlb_entry_erase(start | _PAGE_HW_SZ | asid); } - mmu->num_tlb = mmu->sets * mmu->ways; + local_irq_restore(flags); } -char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) +#endif + +/* Read the Cache Build Configuration Registers, Decode them and save into + * the cpuinfo structure for later use. + * No Validation is done here, simply read/convert the BCRs + */ +int arc_mmu_mumbojumbo(int c, char *buf, int len) { + struct cpuinfo_arc_mmu *mmu = &mmuinfo; + unsigned int bcr, u_dtlb, u_itlb, sasid; + struct bcr_mmu_3 *mmu3; + struct bcr_mmu_4 *mmu4; + char super_pg[64] = ""; int n = 0; - struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu; - n += scnprintf(buf + n, len - n, "ARC700 MMU [v%x]\t: %dk PAGE, ", - p_mmu->ver, TO_KB(p_mmu->pg_sz)); + bcr = read_aux_reg(ARC_REG_MMU_BCR); + mmu->ver = (bcr >> 24); + + if (is_isa_arcompact() && mmu->ver == 3) { + mmu3 = (struct bcr_mmu_3 *)&bcr; + mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); + mmu->sets = 1 << mmu3->sets; + mmu->ways = 1 << mmu3->ways; + u_dtlb = mmu3->u_dtlb; + u_itlb = mmu3->u_itlb; + sasid = mmu3->sasid; + } else { + mmu4 = (struct bcr_mmu_4 *)&bcr; + mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); + mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11); + mmu->sets = 64 << mmu4->n_entry; + mmu->ways = mmu4->n_ways * 2; + u_dtlb = mmu4->u_dtlb * 4; + u_itlb = mmu4->u_itlb * 4; + sasid = mmu4->sasid; + mmu->pae = mmu4->pae; + } + + if (mmu->s_pg_sz_m) + scnprintf(super_pg, 64, "/%dM%s", + mmu->s_pg_sz_m, + IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ? " (THP enabled)":""); n += scnprintf(buf + n, len - n, - "J-TLB %d (%dx%d), uDTLB %d, uITLB %d, %s\n", - p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, - p_mmu->u_dtlb, p_mmu->u_itlb, - IS_ENABLED(CONFIG_ARC_MMU_SASID) ? "SASID" : ""); + "MMU [v%x]\t: %dk%s, swalk %d lvl, JTLB %dx%d, uDTLB %d, uITLB %d%s%s%s\n", + mmu->ver, mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS, + mmu->sets, mmu->ways, + u_dtlb, u_itlb, + IS_AVAIL1(sasid, ", SASID"), + IS_AVAIL2(mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40)); + + return n; +} - return buf; +int pae40_exist_but_not_enab(void) +{ + return mmuinfo.pae && !is_pae40_enabled(); } void arc_mmu_init(void) { - char str[256]; - struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; - - printk(arc_mmu_mumbojumbo(0, str, sizeof(str))); - - /* For efficiency sake, kernel is compile time built for a MMU ver - * This must match the hardware it is running on. - * Linux built for MMU V2, if run on MMU V1 will break down because V1 - * hardware doesn't understand cmds such as WriteNI, or IVUTLB - * On the other hand, Linux built for V1 if run on MMU V2 will do - * un-needed workarounds to prevent memcpy thrashing. - * Similarly MMU V3 has new features which won't work on older MMU + struct cpuinfo_arc_mmu *mmu = &mmuinfo; + int compat = 0; + + /* + * Can't be done in processor.h due to header include dependencies */ - if (mmu->ver != CONFIG_ARC_MMU_VER) { - panic("MMU ver %d doesn't match kernel built for %d...\n", - mmu->ver, CONFIG_ARC_MMU_VER); - } + BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE)); - if (mmu->pg_sz != PAGE_SIZE) - panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); + /* + * stack top size sanity check, + * Can't be done in processor.h due to header include dependencies + */ + BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE)); /* - * ASID mgmt data structures are compile time init - * asid_cache = FIRST_ASID and asid_mm_map[] all zeroes + * Ensure that MMU features assumed by kernel exist in hardware. + * - For older ARC700 cpus, only v3 supported + * - For HS cpus, v4 was baseline and v5 is backwards compatible + * (will run older software). */ + if (is_isa_arcompact() && mmu->ver == 3) + compat = 1; + else if (is_isa_arcv2() && mmu->ver >= 4) + compat = 1; - local_flush_tlb_all(); + if (!compat) + panic("MMU ver %d doesn't match kernel built for\n", mmu->ver); - /* Enable the MMU */ - write_aux_reg(ARC_REG_PID, MMU_ENABLE); + if (mmu->pg_sz_k != TO_KB(PAGE_SIZE)) + panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); - /* In smp we use this reg for interrupt 1 scratch */ -#ifndef CONFIG_SMP - /* swapper_pg_dir is the pgd for the kernel, used by vmalloc */ - write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir); -#endif + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + mmu->s_pg_sz_m != TO_MB(HPAGE_PMD_SIZE)) + panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n", + (unsigned long)TO_MB(HPAGE_PMD_SIZE)); + + if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae) + panic("Hardware doesn't support PAE40\n"); + + /* Enable the MMU with ASID 0 */ + mmu_setup_asid(NULL, 0); + + /* cache the pgd pointer in MMU SCRATCH reg (ARCv2 only) */ + mmu_setup_pgd(NULL, swapper_pg_dir); + + if (pae40_exist_but_not_enab()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); } /* @@ -595,32 +689,34 @@ void arc_mmu_init(void) * the duplicate one. * -Knob to be verbose abt it.(TODO: hook them up to debugfs) */ -volatile int dup_pd_verbose = 1;/* Be slient abt it or complain (default) */ +volatile int dup_pd_silent; /* Be silent abt it or complain (default) */ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, struct pt_regs *regs) { - int set, way, n; - unsigned int pd0[4], pd1[4]; /* assume max 4 ways */ - unsigned long flags, is_valid; - struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + struct cpuinfo_arc_mmu *mmu = &mmuinfo; + unsigned long flags; + int set, n_ways = mmu->ways; - local_irq_save(flags); + n_ways = min(n_ways, 4); + BUG_ON(mmu->ways > 4); - /* re-enable the MMU */ - write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID)); + local_irq_save(flags); /* loop thru all sets of TLB */ for (set = 0; set < mmu->sets; set++) { + int is_valid, way; + unsigned int pd0[4]; + /* read out all the ways of current set */ - for (way = 0, is_valid = 0; way < mmu->ways; way++) { + for (way = 0, is_valid = 0; way < n_ways; way++) { write_aux_reg(ARC_REG_TLBINDEX, SET_WAY_TO_IDX(mmu, set, way)); write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead); pd0[way] = read_aux_reg(ARC_REG_TLBPD0); - pd1[way] = read_aux_reg(ARC_REG_TLBPD1); is_valid |= pd0[way] & _PAGE_PRESENT; + pd0[way] &= PAGE_MASK; } /* If all the WAYS in SET are empty, skip to next SET */ @@ -628,68 +724,32 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, continue; /* Scan the set for duplicate ways: needs a nested loop */ - for (way = 0; way < mmu->ways; way++) { + for (way = 0; way < n_ways - 1; way++) { + + int n; + if (!pd0[way]) continue; - for (n = way + 1; n < mmu->ways; n++) { - if ((pd0[way] & PAGE_MASK) == - (pd0[n] & PAGE_MASK)) { - - if (dup_pd_verbose) { - pr_info("Duplicate PD's @" - "[%d:%d]/[%d:%d]\n", - set, way, set, n); - pr_info("TLBPD0[%u]: %08x\n", - way, pd0[way]); - } - - /* - * clear entry @way and not @n. This is - * critical to our optimised loop - */ - pd0[way] = pd1[way] = 0; - write_aux_reg(ARC_REG_TLBINDEX, + for (n = way + 1; n < n_ways; n++) { + if (pd0[way] != pd0[n]) + continue; + + if (!dup_pd_silent) + pr_info("Dup TLB PD0 %08x @ set %d ways %d,%d\n", + pd0[way], set, way, n); + + /* + * clear entry @way and not @n. + * This is critical to our optimised loop + */ + pd0[way] = 0; + write_aux_reg(ARC_REG_TLBINDEX, SET_WAY_TO_IDX(mmu, set, way)); - __tlb_entry_erase(); - } + __tlb_entry_erase(); } } } local_irq_restore(flags); } - -/*********************************************************************** - * Diagnostic Routines - * -Called from Low Level TLB Hanlders if things don;t look good - **********************************************************************/ - -#ifdef CONFIG_ARC_DBG_TLB_PARANOIA - -/* - * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS - * don't match - */ -void print_asid_mismatch(int is_fast_path) -{ - int pid_sw, pid_hw; - pid_sw = current->active_mm->context.asid; - pid_hw = read_aux_reg(ARC_REG_PID) & 0xff; - - pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n", - is_fast_path ? "Fast" : "Slow", pid_sw, pid_hw); - - __asm__ __volatile__("flag 1"); -} - -void tlb_paranoid_check(unsigned int pid_sw, unsigned long addr) -{ - unsigned int pid_hw; - - pid_hw = read_aux_reg(ARC_REG_PID) & 0xff; - - if (addr < 0x70000000 && ((pid_hw != pid_sw) || (pid_sw == NO_ASID))) - print_asid_mismatch(0); -} -#endif diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 5c5bb23001b0..dc65e87a531f 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -1,26 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * TLB Exception Handling for ARC * * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Vineetg: April 2011 : - * -MMU v1: moved out legacy code into a seperate file + * -MMU v1: moved out legacy code into a separate file * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, * helps avoid a shift when preparing PD0 from PTE * * Vineetg: July 2009 - * -For MMU V2, we need not do heuristics at the time of commiting a D-TLB - * entry, so that it doesn't knock out it's I-TLB entry + * -For MMU V2, we need not do heuristics at the time of committing a D-TLB + * entry, so that it doesn't knock out its I-TLB entry * -Some more fine tuning: * bmsk instead of add, asl.cc instead of branch, delay slot utilise etc * * Vineetg: July 2009 * -Practically rewrote the I/D TLB Miss handlers - * Now 40 and 135 instructions a peice as compared to 131 and 449 resp. + * Now 40 and 135 instructions apiece as compared to 131 and 449 resp. * Hence Leaner by 1.5 K * Used Conditional arithmetic to replace excessive branching * Also used short instructions wherever possible @@ -35,26 +32,43 @@ * Rahul Trivedi, Amit Bhor: Codito Technologies 2004 */ - .cpu A7 - #include <linux/linkage.h> +#include <linux/pgtable.h> #include <asm/entry.h> #include <asm/mmu.h> -#include <asm/pgtable.h> #include <asm/arcregs.h> #include <asm/cache.h> #include <asm/processor.h> -#if (CONFIG_ARC_MMU_VER == 1) -#include <asm/tlb-mmu1.h> -#endif -;-------------------------------------------------------------------------- -; scratch memory to save the registers (r0-r3) used to code TLB refill Handler -; For details refer to comments before TLBMISS_FREEUP_REGS below +#ifdef CONFIG_ISA_ARCOMPACT +;----------------------------------------------------------------- +; ARC700 Exception Handling doesn't auto-switch stack and it only provides +; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" +; +; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a +; "global" is used to free-up FIRST core reg to be able to code the rest of +; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe). +; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3 +; need to be saved as well by extending the "global" to be 4 words. Hence +; ".size ex_saved_reg1, 16" +; [All of this dance is to avoid stack switching for each TLB Miss, since we +; only need to save only a handful of regs, as opposed to complete reg file] +; +; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST +; core reg as it will not be SMP safe. +; Thus scratch AUX reg is used (and no longer used to cache task PGD). +; To save the rest of 3 regs - per cpu, the global is made "per-cpu". +; Epilogue thus has to locate the "per-cpu" storage for regs. +; To avoid cache line bouncing the per-cpu global is aligned/sized per +; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence +; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)" + +; As simple as that.... ;-------------------------------------------------------------------------- +; scratch memory to save [r0-r3] used to code TLB refill Handler ARCFP_DATA ex_saved_reg1 - .align 1 << L1_CACHE_SHIFT ; IMP: Must be Cache Line aligned + .align 1 << L1_CACHE_SHIFT .type ex_saved_reg1, @object #ifdef CONFIG_SMP .size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT) @@ -66,54 +80,79 @@ ex_saved_reg1: .zero 16 #endif -;============================================================================ -; Troubleshooting Stuff -;============================================================================ - -; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid -; When Creating TLB Entries, instead of doing 3 dependent loads from memory, -; we use the MMU PID Reg to get current ASID. -; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble. -; So we try to detect this in TLB Mis shandler - - -.macro DBG_ASID_MISMATCH - -#ifdef CONFIG_ARC_DBG_TLB_PARANOIA - - ; make sure h/w ASID is same as s/w ASID - - GET_CURR_TASK_ON_CPU r3 - ld r0, [r3, TASK_ACT_MM] - ld r0, [r0, MM_CTXT+MM_CTXT_ASID] - - lr r1, [ARC_REG_PID] - and r1, r1, 0xFF - breq r1, r0, 5f - - ; Error if H/w and S/w ASID don't match, but NOT if in kernel mode - lr r0, [erstatus] - bbit0 r0, STATUS_U_BIT, 5f - - ; We sure are in troubled waters, Flag the error, but to do so - ; need to switch to kernel mode stack to call error routine - GET_TSK_STACK_BASE r3, sp +.macro TLBMISS_FREEUP_REGS +#ifdef CONFIG_SMP + sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with + GET_CPU_ID r0 ; get to per cpu scratch mem, + asl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu + add r0, @ex_saved_reg1, r0 +#else + st r0, [@ex_saved_reg1] + mov_s r0, @ex_saved_reg1 +#endif + st_s r1, [r0, 4] + st_s r2, [r0, 8] + st_s r3, [r0, 12] +.endm - ; Call printk to shoutout aloud - mov r0, 1 - j print_asid_mismatch +.macro TLBMISS_RESTORE_REGS +#ifdef CONFIG_SMP + GET_CPU_ID r0 ; get to per cpu scratch mem + asl r0, r0, L1_CACHE_SHIFT ; each is cache line wide + add r0, @ex_saved_reg1, r0 + ld_s r3, [r0,12] + ld_s r2, [r0, 8] + ld_s r1, [r0, 4] + lr r0, [ARC_REG_SCRATCH_DATA0] +#else + mov_s r0, @ex_saved_reg1 + ld_s r3, [r0,12] + ld_s r2, [r0, 8] + ld_s r1, [r0, 4] + ld_s r0, [r0] +#endif +.endm -5: ; ASIDs match so proceed normally - nop +#else /* ARCv2 */ +.macro TLBMISS_FREEUP_REGS +#ifdef CONFIG_ARC_HAS_LL64 + std r0, [sp, -16] + std r2, [sp, -8] +#else + PUSH r0 + PUSH r1 + PUSH r2 + PUSH r3 #endif +.endm +.macro TLBMISS_RESTORE_REGS +#ifdef CONFIG_ARC_HAS_LL64 + ldd r0, [sp, -16] + ldd r2, [sp, -8] +#else + POP r3 + POP r2 + POP r1 + POP r0 +#endif .endm +#endif + ;============================================================================ ;TLB Miss handling Code ;============================================================================ +#ifndef PMD_SHIFT +#define PMD_SHIFT PUD_SHIFT +#endif + +#ifndef PUD_SHIFT +#define PUD_SHIFT PGDIR_SHIFT +#endif + ;----------------------------------------------------------------------------- ; This macro does the page-table lookup for the faulting address. ; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address @@ -121,7 +160,7 @@ ex_saved_reg1: lr r2, [efa] -#ifndef CONFIG_SMP +#ifdef CONFIG_ISA_ARCV2 lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd #else GET_CURR_TASK_ON_CPU r1 @@ -130,44 +169,80 @@ ex_saved_reg1: #endif lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD - ld.as r1, [r1, r0] ; PGD entry corresp to faulting addr - and.f r1, r1, PAGE_MASK ; Ignoring protection and other flags - ; contains Ptr to Page Table - bz.d do_slow_path_pf ; if no Page Table, do page fault + ld.as r3, [r1, r0] ; PGD entry corresp to faulting addr + tst r3, r3 + bz do_slow_path_pf ; if no Page Table, do page fault + +#if CONFIG_PGTABLE_LEVELS > 3 + lsr r0, r2, PUD_SHIFT ; Bits for indexing into PUD + and r0, r0, (PTRS_PER_PUD - 1) + ld.as r1, [r3, r0] ; PMD entry + tst r1, r1 + bz do_slow_path_pf + mov r3, r1 +#endif + +#if CONFIG_PGTABLE_LEVELS > 2 + lsr r0, r2, PMD_SHIFT ; Bits for indexing into PMD + and r0, r0, (PTRS_PER_PMD - 1) + ld.as r1, [r3, r0] ; PMD entry + tst r1, r1 + bz do_slow_path_pf + mov r3, r1 +#endif + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp) + add2.nz r1, r1, r0 + bnz.d 2f ; YES: PGD == PMD has THP PTE: stop pgd walk + mov.nz r0, r3 + +#endif + and r1, r3, PAGE_MASK ; Get the PTE entry: The idea is ; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index - ; (3) z = pgtbl[y] - ; To avoid the multiply by in end, we do the -2, <<2 below - - lsr r0, r2, (PAGE_SHIFT - 2) - and r0, r0, ( (PTRS_PER_PTE - 1) << 2) - ld.aw r0, [r1, r0] ; get PTE and PTE ptr for fault addr -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - and.f 0, r0, _PAGE_PRESENT - bz 1f - ld r3, [num_pte_not_present] - add r3, r3, 1 - st r3, [num_pte_not_present] -1: + ; (3) z = (pgtbl + y * 4) + +#ifdef CONFIG_ARC_HAS_PAE40 +#define PTE_SIZE_LOG 3 /* 8 == 2 ^ 3 */ +#else +#define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */ #endif + ; multiply in step (3) above avoided by shifting lesser in step (1) + lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG ) + and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG ) + ld.aw r0, [r1, r0] ; r0: PTE (lower word only for PAE40) + ; r1: PTE ptr + +2: + .endm ;----------------------------------------------------------------- ; Convert Linux PTE entry into TLB entry ; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu +; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI]) ; IN: r0 = PTE, r1 = ptr to PTE .macro CONV_PTE_TO_TLB - and r3, r0, PTE_BITS_IN_PD1 ; Extract permission flags+PFN from PTE - sr r3, [ARC_REG_TLBPD1] ; these go in PD1 + and r3, r0, PTE_BITS_RWX ; r w x + asl r2, r3, 3 ; Kr Kw Kx 0 0 0 (GLOBAL, kernel only) + and.f 0, r0, _PAGE_GLOBAL + or.z r2, r2, r3 ; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page) + + and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE + or r3, r3, r2 + + sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C +#ifdef CONFIG_ARC_HAS_PAE40 + ld r3, [r1, 4] ; paddr[39..32] + sr r3, [ARC_REG_TLBPD1HI] +#endif and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb -#if (CONFIG_ARC_MMU_VER <= 2) /* Neednot be done with v3 onwards */ - lsr r2, r2 ; shift PTE flags to match layout in PD0 -#endif lr r3,[ARC_REG_TLBPD0] ; MMU prepares PD0 with vaddr and asid @@ -179,80 +254,21 @@ ex_saved_reg1: ; Commit the TLB entry into MMU .macro COMMIT_ENTRY_TO_MMU +#ifdef CONFIG_ARC_MMU_V3 /* Get free TLB slot: Set = computed from vaddr, way = random */ sr TLBGetIndex, [ARC_REG_TLBCOMMAND] /* Commit the Write */ -#if (CONFIG_ARC_MMU_VER >= 2) /* introduced in v2 */ sr TLBWriteNI, [ARC_REG_TLBCOMMAND] -#else - sr TLBWrite, [ARC_REG_TLBCOMMAND] -#endif -.endm - -;----------------------------------------------------------------- -; ARC700 Exception Handling doesn't auto-switch stack and it only provides -; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" -; -; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a -; "global" is used to free-up FIRST core reg to be able to code the rest of -; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe). -; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3 -; need to be saved as well by extending the "global" to be 4 words. Hence -; ".size ex_saved_reg1, 16" -; [All of this dance is to avoid stack switching for each TLB Miss, since we -; only need to save only a handful of regs, as opposed to complete reg file] -; -; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST -; core reg as it will not be SMP safe. -; Thus scratch AUX reg is used (and no longer used to cache task PGD). -; To save the rest of 3 regs - per cpu, the global is made "per-cpu". -; Epilogue thus has to locate the "per-cpu" storage for regs. -; To avoid cache line bouncing the per-cpu global is aligned/sized per -; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence -; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)" - -; As simple as that.... -.macro TLBMISS_FREEUP_REGS -#ifdef CONFIG_SMP - sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with - GET_CPU_ID r0 ; get to per cpu scratch mem, - lsl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu - add r0, @ex_saved_reg1, r0 #else - st r0, [@ex_saved_reg1] - mov_s r0, @ex_saved_reg1 + sr TLBInsertEntry, [ARC_REG_TLBCOMMAND] #endif - st_s r1, [r0, 4] - st_s r2, [r0, 8] - st_s r3, [r0, 12] - ; VERIFY if the ASID in MMU-PID Reg is same as - ; one in Linux data structures - - DBG_ASID_MISMATCH +88: .endm -;----------------------------------------------------------------- -.macro TLBMISS_RESTORE_REGS -#ifdef CONFIG_SMP - GET_CPU_ID r0 ; get to per cpu scratch mem - lsl r0, r0, L1_CACHE_SHIFT ; each is cache line wide - add r0, @ex_saved_reg1, r0 - ld_s r3, [r0,12] - ld_s r2, [r0, 8] - ld_s r1, [r0, 4] - lr r0, [ARC_REG_SCRATCH_DATA0] -#else - mov_s r0, @ex_saved_reg1 - ld_s r3, [r0,12] - ld_s r2, [r0, 8] - ld_s r1, [r0, 4] - ld_s r0, [r0] -#endif -.endm ARCFP_CODE ;Fast Path Code, candidate for ICCM @@ -260,16 +276,10 @@ ARCFP_CODE ;Fast Path Code, candidate for ICCM ; I-TLB Miss Exception Handler ;----------------------------------------------------------------------------- -ARC_ENTRY EV_TLBMissI +ENTRY(EV_TLBMissI) TLBMISS_FREEUP_REGS -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - ld r0, [@numitlb] - add r0, r0, 1 - st r0, [@numitlb] -#endif - ;---------------------------------------------------------------- ; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA LOAD_FAULT_PTE @@ -277,8 +287,8 @@ ARC_ENTRY EV_TLBMissI ;---------------------------------------------------------------- ; VERIFY_PTE: Check if PTE permissions approp for executing code cmp_s r2, VMALLOC_START - mov.lo r2, (_PAGE_PRESENT | _PAGE_U_EXECUTE) - mov.hs r2, (_PAGE_PRESENT | _PAGE_K_EXECUTE) + mov_s r2, (_PAGE_PRESENT | _PAGE_EXECUTE) + or.hs r2, r2, _PAGE_GLOBAL and r3, r0, r2 ; Mask out NON Flag bits from PTE xor.f r3, r3, r2 ; check ( ( pte & flags_test ) == flags_test ) @@ -291,24 +301,19 @@ ARC_ENTRY EV_TLBMissI CONV_PTE_TO_TLB COMMIT_ENTRY_TO_MMU TLBMISS_RESTORE_REGS +EV_TLBMissI_fast_ret: ; additional label for VDK OS-kit instrumentation rtie -ARC_EXIT EV_TLBMissI +END(EV_TLBMissI) ;----------------------------------------------------------------------------- ; D-TLB Miss Exception Handler ;----------------------------------------------------------------------------- -ARC_ENTRY EV_TLBMissD +ENTRY(EV_TLBMissD) TLBMISS_FREEUP_REGS -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - ld r0, [@numdtlb] - add r0, r0, 1 - st r0, [@numdtlb] -#endif - ;---------------------------------------------------------------- ; Get the PTE corresponding to V-addr accessed ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA @@ -317,26 +322,21 @@ ARC_ENTRY EV_TLBMissD ;---------------------------------------------------------------- ; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W) - mov_s r2, 0 + cmp_s r2, VMALLOC_START + mov_s r2, _PAGE_PRESENT ; common bit for K/U PTE + or.hs r2, r2, _PAGE_GLOBAL ; kernel PTE only + + ; Linux PTE [RWX] bits are semantically overloaded: + ; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc) + ; -Otherwise they are user-mode permissions, and those are exactly + ; same for kernel mode as well (e.g. copy_(to|from)_user) + lr r3, [ecr] btst_s r3, ECR_C_BIT_DTLB_LD_MISS ; Read Access - or.nz r2, r2, _PAGE_U_READ ; chk for Read flag in PTE + or.nz r2, r2, _PAGE_READ ; chk for Read flag in PTE btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; Write Access - or.nz r2, r2, _PAGE_U_WRITE ; chk for Write flag in PTE - ; Above laddering takes care of XCHG access - ; which is both Read and Write - - ; If kernel mode access, ; make _PAGE_xx flags as _PAGE_K_xx - ; For copy_(to|from)_user, despite exception taken in kernel mode, - ; this code is not hit, because EFA would still be the user mode - ; address (EFA < 0x6000_0000). - ; This code is for legit kernel mode faults, vmalloc specifically - ; (EFA: 0x7000_0000 to 0x7FFF_FFFF) - - lr r3, [efa] - cmp r3, VMALLOC_START - 1 ; If kernel mode access - asl.hi r2, r2, 3 ; make _PAGE_xx flags as _PAGE_K_xx - or r2, r2, _PAGE_PRESENT ; Common flag for K/U mode + or.nz r2, r2, _PAGE_WRITE ; chk for Write flag in PTE + ; Above laddering takes care of XCHG access (both R and W) ; By now, r2 setup with all the Flags we need to check in PTE and r3, r0, r2 ; Mask out NON Flag bits from PTE @@ -344,54 +344,35 @@ ARC_ENTRY EV_TLBMissD ;---------------------------------------------------------------- ; UPDATE_PTE: Let Linux VM know that page was accessed/dirty - lr r3, [ecr] or r0, r0, _PAGE_ACCESSED ; Accessed bit always - btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? - or.nz r0, r0, _PAGE_MODIFIED ; if Write, set Dirty bit as well + or.nz r0, r0, _PAGE_DIRTY ; if Write, set Dirty bit as well st_s r0, [r1] ; Write back PTE CONV_PTE_TO_TLB -#if (CONFIG_ARC_MMU_VER == 1) - ; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of - ; memcpy where 3 parties contend for 2 ways, ensuing a livelock. - ; But only for old MMU or one with Metal Fix - TLB_WRITE_HEURISTICS -#endif - COMMIT_ENTRY_TO_MMU TLBMISS_RESTORE_REGS +EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation rtie ;-------- Common routine to call Linux Page Fault Handler ----------- do_slow_path_pf: +#ifdef CONFIG_ISA_ARCV2 + ; Set Z flag if exception in U mode. Hardware micro-ops do this on any + ; taken interrupt/exception, and thus is already the case at the entry + ; above, but ensuing code would have already clobbered. + ; EXCEPTION_PROLOGUE called in slow path, relies on correct Z flag set + + lr r2, [erstatus] + and r2, r2, STATUS_U_MASK + bxor.f 0, r2, STATUS_U_BIT +#endif + ; Restore the 4-scratch regs saved by fast path miss handler TLBMISS_RESTORE_REGS ; Slow path TLB Miss handled as a regular ARC Exception ; (stack switching / save the complete reg-file). - ; That requires freeing up r9 - EXCPN_PROLOG_FREEUP_REG r9 - - lr r9, [erstatus] - - SWITCH_TO_KERNEL_STK - SAVE_ALL_SYS - - ; ------- setup args for Linux Page fault Hanlder --------- - mov_s r0, sp - lr r1, [efa] - - ; We don't want exceptions to be disabled while the fault is handled. - ; Now that we have saved the context we return from exception hence - ; exceptions get re-enable - - FAKE_RET_FROM_EXCPN r9 - - bl do_page_fault - b ret_from_exception - -ARC_EXIT EV_TLBMissD - -ARC_ENTRY EV_TLBMissB ; Bogus entry to measure sz of DTLBMiss hdlr + b call_do_page_fault +END(EV_TLBMissD) |
