From cfd9d70a855edf6adb37d0ed88be9e35274dbe49 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 13 Nov 2014 19:27:24 +0530 Subject: ARCv2: mm: TLB Miss optim: SMP builds can cache pgd pointer in mmu scratch reg ARC700 exception (and intr handling) didn't have auto stack switching thus had to rely on stashing a reg temporarily (to free it up) at a known place in memory, allowing to code up the low level stack switching. This however was not re-entrant in SMP which thus had to repurpose the per-cpu MMU SCRATCH DATA register otherwise used to "cache" the task pdg pointer (vs. reading it from mm struct) The newer HS cores do have auto-stack switching and thus even SMP builds can use the MMU SCRATCH reg as originally intended. This patch fixes the restriction to ARC700 SMP builds only Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry-compact.h | 4 ++-- arch/arc/include/asm/mmu.h | 4 ++++ arch/arc/include/asm/mmu_context.h | 2 +- arch/arc/include/asm/pgtable.h | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arc/include/asm') diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 66a292335ee6..c3aa775878dc 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -130,7 +130,7 @@ * to be saved again on kernel mode stack, as part of pt_regs. *-------------------------------------------------------------*/ .macro PROLOG_FREEUP_REG reg, mem -#ifdef CONFIG_SMP +#ifndef ARC_USE_SCRATCH_REG sr \reg, [ARC_REG_SCRATCH_DATA0] #else st \reg, [\mem] @@ -138,7 +138,7 @@ .endm .macro PROLOG_RESTORE_REG reg, mem -#ifdef CONFIG_SMP +#ifndef ARC_USE_SCRATCH_REG lr \reg, [ARC_REG_SCRATCH_DATA0] #else ld \reg, [\mem] diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 98cadf1a09ac..0abacb82a72b 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -40,6 +40,10 @@ #define ARC_REG_SCRATCH_DATA0 0x46c #endif +#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP) +#define ARC_USE_SCRATCH_REG +#endif + /* Bits in MMU PID register */ #define __TLB_ENABLE (1 << 31) #define __PROG_ENABLE (1 << 30) diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h index 035470816be5..3a5e6a5b9ed6 100644 --- a/arch/arc/include/asm/mmu_context.h +++ b/arch/arc/include/asm/mmu_context.h @@ -144,7 +144,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ cpumask_set_cpu(cpu, mm_cpumask(next)); -#ifndef CONFIG_SMP +#ifdef ARC_USE_SCRATCH_REG /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */ write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd); #endif diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 7addd0301c51..ea14a8bfc691 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -351,7 +351,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, * Thus use this macro only when you are certain that "current" is current * e.g. when dealing with signal frame setup code etc */ -#ifndef CONFIG_SMP +#ifdef ARC_USE_SCRATCH_REG #define pgd_offset_fast(mm, addr) \ ({ \ pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0); \ -- cgit From ad4c40e937f6d6a08a579c4a78206039618426b7 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 17 Nov 2015 10:10:29 +0530 Subject: ARC: mm: tlb flush optim: Make TLBWriteNI fallback to TLBWrite if not available TLBWriteNI was introduced in MMUv2 (to not invalidate uTLBs in Fast Path TLB Refill Handler). To avoid #ifdef'ery make it fallback to TLBWrite availabel on all MMUs. This will also help with next change Signed-off-by: Vineet Gupta --- arch/arc/include/asm/mmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arc/include/asm') diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 0abacb82a72b..26b731d32a2b 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -67,6 +67,8 @@ #if (CONFIG_ARC_MMU_VER >= 2) #define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */ #define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ +#else +#define TLBWriteNI TLBWrite /* Not present in hardware, fallback */ #endif #if (CONFIG_ARC_MMU_VER >= 4) -- cgit From f091d5a426447cc427680bdd3adc7773aa2867df Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 8 Nov 2019 19:20:22 +0300 Subject: ARC: ARCv2: jump label: implement jump label patching Implement jump label patching for ARC. Jump labels provide an interface to generate dynamic branches using self-modifying code. This allows us to implement conditional branches where changing branch direction is expensive but branch selection is basically 'free' This implementation uses 32-bit NOP and BRANCH instructions which forced to be aligned by 4 to guarantee that they don't cross L1 cache line boundary and can be update atomically. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cache.h | 2 ++ arch/arc/include/asm/jump_label.h | 72 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 arch/arc/include/asm/jump_label.h (limited to 'arch/arc/include/asm') diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 918804c7c1a4..d8ece4292388 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -25,6 +25,8 @@ #ifndef __ASSEMBLY__ +#include + /* Uncached access macros */ #define arc_read_uncached_32(ptr) \ ({ \ diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h new file mode 100644 index 000000000000..9d9618079739 --- /dev/null +++ b/arch/arc/include/asm/jump_label.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARC_JUMP_LABEL_H +#define _ASM_ARC_JUMP_LABEL_H + +#ifndef __ASSEMBLY__ + +#include +#include + +#define JUMP_LABEL_NOP_SIZE 4 + +/* + * NOTE about '.balign 4': + * + * To make atomic update of patched instruction available we need to guarantee + * that this instruction doesn't cross L1 cache line boundary. + * + * As of today we simply align instruction which can be patched by 4 byte using + * ".balign 4" directive. In that case patched instruction is aligned with one + * 16-bit NOP_S if this is required. + * However 'align by 4' directive is much stricter than it actually required. + * It's enough that our 32-bit instruction don't cross L1 cache line boundary / + * L1 I$ fetch block boundary which can be achieved by using + * ".bundle_align_mode" assembler directive. That will save us from adding + * useless NOP_S padding in most of the cases. + * + * TODO: switch to ".bundle_align_mode" directive using whin it will be + * supported by ARC toolchain. + */ + +static __always_inline bool arch_static_branch(struct static_key *key, + bool branch) +{ + asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + "1: \n" + "nop \n" + ".pushsection __jump_table, \"aw\" \n" + ".word 1b, %l[l_yes], %c0 \n" + ".popsection \n" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, + bool branch) +{ + asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + "1: \n" + "b %l[l_yes] \n" + ".pushsection __jump_table, \"aw\" \n" + ".word 1b, %l[l_yes], %c0 \n" + ".popsection \n" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +typedef u32 jump_label_t; + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif /* __ASSEMBLY__ */ +#endif -- cgit From d092a87073269677b7ff09e71a8d91912b7f969a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Oct 2019 08:09:38 +0200 Subject: arch: rely on asm-generic/io.h for default ioremap_* definitions Various architectures that use asm-generic/io.h still defined their own default versions of ioremap_nocache, ioremap_wt and ioremap_wc that point back to plain ioremap directly or indirectly. Remove these definitions and rely on asm-generic/io.h instead. For this to work the backup ioremap_* defintions needs to be changed to purely cpp macros instea of inlines to cover for architectures like openrisc that only define ioremap after including . Signed-off-by: Christoph Hellwig Reviewed-by: Arnd Bergmann Reviewed-by: Palmer Dabbelt --- arch/arc/include/asm/io.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/arc/include/asm') diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index 72f7929736f8..8f777d6441a5 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -34,10 +34,6 @@ static inline void ioport_unmap(void __iomem *addr) extern void iounmap(const void __iomem *addr); -#define ioremap_nocache(phy, sz) ioremap(phy, sz) -#define ioremap_wc(phy, sz) ioremap(phy, sz) -#define ioremap_wt(phy, sz) ioremap(phy, sz) - /* * io{read,write}{16,32}be() macros */ -- cgit From a1b39bae16a62ce4aae02d958224f19316d98b24 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Fri, 25 Oct 2019 08:10:37 +0200 Subject: asm-generic: Make msi.h a mandatory include/asm header msi.h is generic for all architectures except x86, which has its own version. Enabling MSI by adding msi.h to every architecture's Kbuild is just an additional step which doesn't need to be done. Make msi.h mandatory in the asm-generic/Kbuild so we don't have to do it for each architecture. Suggested-by: Christoph Hellwig Link: https://lore.kernel.org/r/c991669e29a79b1a8e28c3b4b3a125801a693de8.1571983829.git.michal.simek@xilinx.com Tested-by: Paul Walmsley # build only, rv32/rv64 Signed-off-by: Michal Simek Signed-off-by: Bjorn Helgaas Reviewed-by: Masahiro Yamada Acked-by: Waiman Long Acked-by: Paul Walmsley # arch/riscv --- arch/arc/include/asm/Kbuild | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arc/include/asm') diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 393d4f5e1450..1b505694691e 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -17,7 +17,6 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += mmiowb.h -generic-y += msi.h generic-y += parport.h generic-y += percpu.h generic-y += preempt.h -- cgit From 6aae3425aa9ca776e8201a93494a4a482353d2c3 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sat, 30 Nov 2019 17:51:06 -0800 Subject: ARC: mm: remove __ARCH_USE_5LEVEL_HACK Patch series "elide extraneous generated code for folded p4d/pud/pmd", v3. This series came out of seemingly benign excursion into understanding/removing __ARCH_USE_5LEVEL_HACK from ARC port showing some extraneous code being generated despite folded p4d/pud/pmd | bloat-o-meter2 vmlinux-[AB]* | add/remove: 0/0 grow/shrink: 3/0 up/down: 130/0 (130) | function old new delta | free_pgd_range 548 660 +112 | p4d_clear_bad 2 20 +18 The patches here address that | bloat-o-meter2 vmlinux-[BF]* | add/remove: 0/2 grow/shrink: 0/1 up/down: 0/-386 (-386) | function old new delta | pud_clear_bad 20 - -20 | p4d_clear_bad 20 - -20 | free_pgd_range 660 314 -346 The code savings are not a whole lot, but still worthwhile IMHO. This patch (of 5): With paging code made 5-level compliant, this is no longer needed. ARC has software page walker with 2 lookup levels (pgd -> pte) This was expected to be non functional change but ended with slight code bloat due to needless inclusions of p*d_free_tlb() macros which will be addressed in further patches. | bloat-o-meter2 vmlinux-[AB]* | add/remove: 0/0 grow/shrink: 2/0 up/down: 128/0 (128) | function old new delta | free_pgd_range 546 656 +110 | p4d_clear_bad 2 20 +18 | Total: Before=4137148, After=4137276, chg 0.000000% Link: http://lkml.kernel.org/r/20191016162400.14796-2-vgupta@synopsys.com Signed-off-by: Vineet Gupta Acked-by: Kirill A. Shutemov Cc: "Aneesh Kumar K . V" Cc: Arnd Bergmann Cc: Nick Piggin Cc: Peter Zijlstra Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/include/asm/pgtable.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arc/include/asm') diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 7addd0301c51..b917b596f7fb 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -33,7 +33,6 @@ #define _ASM_ARC_PGTABLE_H #include -#define __ARCH_USE_5LEVEL_HACK #include #include #include /* to propagate CONFIG_ARC_MMU_VER */ -- cgit