summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-09-01 08:09:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-09-01 08:09:48 -0700
commite0152e7481c6c63764d6ea8ee41af5cf9dfac5e9 (patch)
treed0cf6b7764b3dfe238fcaae2039c934580f40af3
parenta031eba2956863457b2680453ca45515a1605a47 (diff)
parent89775a27ff6d0396b44de0d6f44dcbc25221fdda (diff)
Merge tag 'riscv-for-linus-6.6-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux
Pull RISC-V updates from Palmer Dabbelt: - Support for the new "riscv,isa-extensions" and "riscv,isa-base" device tree interfaces for probing extensions - Support for userspace access to the performance counters - Support for more instructions in kprobes - Crash kernels can be allocated above 4GiB - Support for KCFI - Support for ELFs in !MMU configurations - ARCH_KMALLOC_MINALIGN has been reduced to 8 - mmap() defaults to sv48-sized addresses, with longer addresses hidden behind a hint (similar to Arm and Intel) - Also various fixes and cleanups * tag 'riscv-for-linus-6.6-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (51 commits) lib/Kconfig.debug: Restrict DEBUG_INFO_SPLIT for RISC-V riscv: support PREEMPT_DYNAMIC with static keys riscv: Move create_tmp_mapping() to init sections riscv: Mark KASAN tmp* page tables variables as static riscv: mm: use bitmap_zero() API riscv: enable DEBUG_FORCE_FUNCTION_ALIGN_64B riscv: remove redundant mv instructions RISC-V: mm: Document mmap changes RISC-V: mm: Update pgtable comment documentation RISC-V: mm: Add tests for RISC-V mm RISC-V: mm: Restrict address space for sv39,sv48,sv57 riscv: enable DMA_BOUNCE_UNALIGNED_KMALLOC for !dma_coherent riscv: allow kmalloc() caches aligned to the smallest value riscv: support the elf-fdpic binfmt loader binfmt_elf_fdpic: support 64-bit systems riscv: Allow CONFIG_CFI_CLANG to be selected riscv/purgatory: Disable CFI riscv: Add CFI error handling riscv: Add ftrace_stub_graph riscv: Add types to indirectly called assembly functions ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt22
-rw-r--r--Documentation/admin-guide/sysctl/kernel.rst27
-rw-r--r--Documentation/riscv/vm-layout.rst22
-rw-r--r--arch/riscv/Kconfig24
-rw-r--r--arch/riscv/Makefile3
-rw-r--r--arch/riscv/include/asm/alternative-macros.h2
-rw-r--r--arch/riscv/include/asm/cache.h14
-rw-r--r--arch/riscv/include/asm/cacheflush.h2
-rw-r--r--arch/riscv/include/asm/cfi.h22
-rw-r--r--arch/riscv/include/asm/elf.h13
-rw-r--r--arch/riscv/include/asm/hwcap.h17
-rw-r--r--arch/riscv/include/asm/insn.h10
-rw-r--r--arch/riscv/include/asm/mmu.h4
-rw-r--r--arch/riscv/include/asm/pgtable.h33
-rw-r--r--arch/riscv/include/asm/processor.h52
-rw-r--r--arch/riscv/include/asm/syscall.h5
-rw-r--r--arch/riscv/include/asm/syscall_wrapper.h87
-rw-r--r--arch/riscv/include/uapi/asm/ptrace.h5
-rw-r--r--arch/riscv/include/uapi/asm/sigcontext.h2
-rw-r--r--arch/riscv/kernel/Makefile2
-rw-r--r--arch/riscv/kernel/cfi.c77
-rw-r--r--arch/riscv/kernel/compat_syscall_table.c8
-rw-r--r--arch/riscv/kernel/cpu.c181
-rw-r--r--arch/riscv/kernel/cpufeature.c521
-rw-r--r--arch/riscv/kernel/head.S6
-rw-r--r--arch/riscv/kernel/mcount.S9
-rw-r--r--arch/riscv/kernel/probes/decode-insn.c11
-rw-r--r--arch/riscv/kernel/probes/simulate-insn.c105
-rw-r--r--arch/riscv/kernel/probes/simulate-insn.h5
-rw-r--r--arch/riscv/kernel/setup.c6
-rw-r--r--arch/riscv/kernel/suspend_entry.S5
-rw-r--r--arch/riscv/kernel/sys_riscv.c6
-rw-r--r--arch/riscv/kernel/syscall_table.c8
-rw-r--r--arch/riscv/kernel/traps.c4
-rw-r--r--arch/riscv/mm/context.c2
-rw-r--r--arch/riscv/mm/dma-noncoherent.c8
-rw-r--r--arch/riscv/mm/init.c93
-rw-r--r--arch/riscv/mm/kasan_init.c8
-rw-r--r--arch/riscv/purgatory/Makefile4
-rw-r--r--drivers/perf/riscv_pmu.c113
-rw-r--r--drivers/perf/riscv_pmu_legacy.c28
-rw-r--r--drivers/perf/riscv_pmu_sbi.c196
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/binfmt_elf_fdpic.c38
-rw-r--r--include/asm-generic/preempt.h14
-rw-r--r--include/linux/elf-fdpic.h14
-rw-r--r--include/linux/perf/riscv_pmu.h12
-rw-r--r--include/linux/perf_event.h3
-rw-r--r--include/uapi/linux/elf-fdpic.h15
-rw-r--r--lib/Kconfig.debug7
-rw-r--r--tools/lib/perf/mmap.c66
-rw-r--r--tools/perf/tests/mmap-basic.c6
-rw-r--r--tools/testing/selftests/riscv/Makefile2
-rw-r--r--tools/testing/selftests/riscv/mm/.gitignore2
-rw-r--r--tools/testing/selftests/riscv/mm/Makefile15
-rw-r--r--tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c35
-rw-r--r--tools/testing/selftests/riscv/mm/testcases/mmap_default.c35
-rw-r--r--tools/testing/selftests/riscv/mm/testcases/mmap_test.h64
-rwxr-xr-xtools/testing/selftests/riscv/mm/testcases/run_mmap.sh12
59 files changed, 1707 insertions, 407 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fcf79acea475..98efa34bf530 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -873,7 +873,7 @@
memory region [offset, offset + size] for that kernel
image. If '@offset' is omitted, then a suitable offset
is selected automatically.
- [KNL, X86-64, ARM64] Select a region under 4G first, and
+ [KNL, X86-64, ARM64, RISCV] Select a region under 4G first, and
fall back to reserve region above 4G when '@offset'
hasn't been specified.
See Documentation/admin-guide/kdump/kdump.rst for further details.
@@ -886,14 +886,14 @@
Documentation/admin-guide/kdump/kdump.rst for an example.
crashkernel=size[KMG],high
- [KNL, X86-64, ARM64] range could be above 4G. Allow kernel
- to allocate physical memory region from top, so could
- be above 4G if system have more than 4G ram installed.
- Otherwise memory region will be allocated below 4G, if
- available.
+ [KNL, X86-64, ARM64, RISCV] range could be above 4G.
+ Allow kernel to allocate physical memory region from top,
+ so could be above 4G if system have more than 4G ram
+ installed. Otherwise memory region will be allocated
+ below 4G, if available.
It will be ignored if crashkernel=X is specified.
crashkernel=size[KMG],low
- [KNL, X86-64, ARM64] range under 4G. When crashkernel=X,high
+ [KNL, X86-64, ARM64, RISCV] range under 4G. When crashkernel=X,high
is passed, kernel could allocate physical memory region
above 4G, that cause second kernel crash on system
that require some amount of low memory, e.g. swiotlb
@@ -904,6 +904,7 @@
size is platform dependent.
--> x86: max(swiotlb_size_or_default() + 8MiB, 256MiB)
--> arm64: 128MiB
+ --> riscv: 128MiB
This one lets the user specify own low range under 4G
for second kernel instead.
0: to disable low allocation.
@@ -5554,6 +5555,13 @@
[KNL] Disable ring 3 MONITOR/MWAIT feature on supported
CPUs.
+ riscv_isa_fallback [RISCV]
+ When CONFIG_RISCV_ISA_FALLBACK is not enabled, permit
+ falling back to detecting extension support by parsing
+ "riscv,isa" property on devicetree systems when the
+ replacement properties are not found. See the Kconfig
+ entry for RISCV_ISA_FALLBACK.
+
ro [KNL] Mount root device read-only on boot
rodata= [KNL]
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 3800fab1619b..8019103aac10 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -941,16 +941,35 @@ enabled, otherwise writing to this file will return ``-EBUSY``.
The default value is 8.
-perf_user_access (arm64 only)
-=================================
+perf_user_access (arm64 and riscv only)
+=======================================
+
+Controls user space access for reading perf event counters.
-Controls user space access for reading perf event counters. When set to 1,
-user space can read performance monitor counter registers directly.
+arm64
+=====
The default value is 0 (access disabled).
+When set to 1, user space can read performance monitor counter registers
+directly.
+
See Documentation/arch/arm64/perf.rst for more information.
+riscv
+=====
+
+When set to 0, user space access is disabled.
+
+The default value is 1, user space can read performance monitor counter
+registers through perf, any direct access without perf intervention will trigger
+an illegal instruction.
+
+When set to 2, which enables legacy mode (user space has direct access to cycle
+and insret CSRs only). Note that this legacy value is deprecated and will be
+removed once all user space applications are fixed.
+
+Note that the time CSR is always directly accessible to all modes.
pid_max
=======
diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst
index 5462c84f4723..69ff6da1dbf8 100644
--- a/Documentation/riscv/vm-layout.rst
+++ b/Documentation/riscv/vm-layout.rst
@@ -133,3 +133,25 @@ RISC-V Linux Kernel SV57
ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF
ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel
__________________|____________|__________________|_________|____________________________________________________________
+
+
+Userspace VAs
+--------------------
+To maintain compatibility with software that relies on the VA space with a
+maximum of 48 bits the kernel will, by default, return virtual addresses to
+userspace from a 48-bit range (sv48). This default behavior is achieved by
+passing 0 into the hint address parameter of mmap. On CPUs with an address space
+smaller than sv48, the CPU maximum supported address space will be the default.
+
+Software can "opt-in" to receiving VAs from another VA space by providing
+a hint address to mmap. A hint address passed to mmap will cause the largest
+address space that fits entirely into the hint to be used, unless there is no
+space left in the address space. If there is no space available in the requested
+address space, an address in the next smallest available address space will be
+returned.
+
+For example, in order to obtain 48-bit VA space, a hint address greater than
+:code:`1 << 47` must be provided. Note that this is 47 due to sv48 userspace
+ending at :code:`1 << 47` and the addresses beyond this are reserved for the
+kernel. Similarly, to obtain 57-bit VA space addresses, a hint address greater
+than or equal to :code:`1 << 56` must be provided.
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index f9fbfbf11ad2..5138dce1a0b4 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -35,6 +35,7 @@ config RISCV
select ARCH_HAS_SET_MEMORY if MMU
select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
+ select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAS_VDSO_DATA
@@ -42,12 +43,14 @@ config RISCV
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_CFI_CLANG
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGETLBFS if MMU
select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
+ select ARCH_USES_CFI_TRAPS if CFI_CLANG
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
@@ -62,6 +65,7 @@ config RISCV
select COMMON_CLK
select CPU_PM if CPU_IDLE || HIBERNATION
select EDAC_SUPPORT
+ select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE)
select GENERIC_ARCH_TOPOLOGY
select GENERIC_ATOMIC64 if !64BIT
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
@@ -130,6 +134,7 @@ config RISCV
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ select HAVE_PREEMPT_DYNAMIC_KEY if !XIP_KERNEL
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RETHOOK if !XIP_KERNEL
select HAVE_RSEQ
@@ -267,6 +272,7 @@ config RISCV_DMA_NONCOHERENT
select ARCH_HAS_SETUP_DMA_OPS
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ select DMA_BOUNCE_UNALIGNED_KMALLOC if SWIOTLB
select DMA_DIRECT_REMAP
config AS_HAS_INSN
@@ -836,6 +842,24 @@ config XIP_PHYS_ADDR
be linked for and stored to. This address is dependent on your
own flash usage.
+config RISCV_ISA_FALLBACK
+ bool "Permit falling back to parsing riscv,isa for extension support by default"
+ default y
+ help
+ Parsing the "riscv,isa" devicetree property has been deprecated and
+ replaced by a list of explicitly defined strings. For compatibility
+ with existing platforms, the kernel will fall back to parsing the
+ "riscv,isa" property if the replacements are not found.
+
+ Selecting N here will result in a kernel that does not use the
+ fallback, unless the commandline "riscv_isa_fallback" parameter is
+ present.
+
+ Please see the dt-binding, located at
+ Documentation/devicetree/bindings/riscv/extensions.yaml for details
+ on the replacement properties, "riscv,isa-base" and
+ "riscv,isa-extensions".
+
endmenu # "Boot options"
config BUILTIN_DTB
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 6ec6d52a4180..1329e060c548 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -87,9 +87,6 @@ endif
ifeq ($(CONFIG_CMODEL_MEDANY),y)
KBUILD_CFLAGS += -mcmodel=medany
endif
-ifeq ($(CONFIG_PERF_EVENTS),y)
- KBUILD_CFLAGS += -fno-omit-frame-pointer
-endif
# Avoid generating .eh_frame sections.
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables
diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
index b8c55fb3ab2c..721ec275ce57 100644
--- a/arch/riscv/include/asm/alternative-macros.h
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -146,7 +146,7 @@
* vendor_id: The CPU vendor ID.
* patch_id: The patch ID (erratum ID or cpufeature ID).
* CONFIG_k: The Kconfig of this patch ID. When Kconfig is disabled, the old
- * content will alwyas be executed.
+ * content will always be executed.
*/
#define ALTERNATIVE(old_content, new_content, vendor_id, patch_id, CONFIG_k) \
_ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k)
diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h
index d3036df23ccb..2174fe7bac9a 100644
--- a/arch/riscv/include/asm/cache.h
+++ b/arch/riscv/include/asm/cache.h
@@ -13,6 +13,7 @@
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+#define ARCH_KMALLOC_MINALIGN (8)
#endif
/*
@@ -23,4 +24,17 @@
#define ARCH_SLAB_MINALIGN 16
#endif
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_RISCV_DMA_NONCOHERENT
+extern int dma_cache_alignment;
+#define dma_get_cache_alignment dma_get_cache_alignment
+static inline int dma_get_cache_alignment(void)
+{
+ return dma_cache_alignment;
+}
+#endif
+
+#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_RISCV_CACHE_H */
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index c4dca559bb97..3cb53c4df27c 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -58,8 +58,10 @@ void riscv_init_cbo_blocksizes(void);
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
void riscv_noncoherent_supported(void);
+void __init riscv_set_dma_cache_alignment(void);
#else
static inline void riscv_noncoherent_supported(void) {}
+static inline void riscv_set_dma_cache_alignment(void) {}
#endif
/*
diff --git a/arch/riscv/include/asm/cfi.h b/arch/riscv/include/asm/cfi.h
new file mode 100644
index 000000000000..56bf9d69d5e3
--- /dev/null
+++ b/arch/riscv/include/asm/cfi.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_CFI_H
+#define _ASM_RISCV_CFI_H
+
+/*
+ * Clang Control Flow Integrity (CFI) support.
+ *
+ * Copyright (C) 2023 Google LLC
+ */
+
+#include <linux/cfi.h>
+
+#ifdef CONFIG_CFI_CLANG
+enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
+#else
+static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
+{
+ return BUG_TRAP_TYPE_NONE;
+}
+#endif /* CONFIG_CFI_CLANG */
+
+#endif /* _ASM_RISCV_CFI_H */
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index c24280774caf..b3b2dfbdf945 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -41,6 +41,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
#define compat_elf_check_arch compat_elf_check_arch
#define CORE_DUMP_USE_REGSET
+#define ELF_FDPIC_CORE_EFLAGS 0
#define ELF_EXEC_PAGESIZE (PAGE_SIZE)
/*
@@ -49,7 +50,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
* the loader. We need to make sure that it is out of the way of the program
* that it will "exec", and that there is sufficient room for the brk.
*/
-#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2)
+#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2)
#ifdef CONFIG_64BIT
#ifdef CONFIG_COMPAT
@@ -69,6 +70,13 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
#define ELF_HWCAP riscv_get_elf_hwcap()
extern unsigned long elf_hwcap;
+#define ELF_FDPIC_PLAT_INIT(_r, _exec_map_addr, _interp_map_addr, dynamic_addr) \
+ do { \
+ (_r)->a1 = _exec_map_addr; \
+ (_r)->a2 = _interp_map_addr; \
+ (_r)->a3 = dynamic_addr; \
+ } while (0)
+
/*
* This yields a string that ld.so will use to load implementation
* specific libraries for optimization. This is more specific in
@@ -78,7 +86,6 @@ extern unsigned long elf_hwcap;
#define COMPAT_ELF_PLATFORM (NULL)
-#ifdef CONFIG_MMU
#define ARCH_DLINFO \
do { \
/* \
@@ -115,6 +122,8 @@ do { \
else \
NEW_AUX_ENT(AT_IGNORE, 0); \
} while (0)
+
+#ifdef CONFIG_MMU
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
struct linux_binprm;
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index f041bfa7f6a0..b7b58258f6c7 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -14,12 +14,17 @@
#include <uapi/asm/hwcap.h>
#define RISCV_ISA_EXT_a ('a' - 'a')
+#define RISCV_ISA_EXT_b ('b' - 'a')
#define RISCV_ISA_EXT_c ('c' - 'a')
#define RISCV_ISA_EXT_d ('d' - 'a')
#define RISCV_ISA_EXT_f ('f' - 'a')
#define RISCV_ISA_EXT_h ('h' - 'a')
#define RISCV_ISA_EXT_i ('i' - 'a')
+#define RISCV_ISA_EXT_j ('j' - 'a')
+#define RISCV_ISA_EXT_k ('k' - 'a')
#define RISCV_ISA_EXT_m ('m' - 'a')
+#define RISCV_ISA_EXT_p ('p' - 'a')
+#define RISCV_ISA_EXT_q ('q' - 'a')
#define RISCV_ISA_EXT_s ('s' - 'a')
#define RISCV_ISA_EXT_u ('u' - 'a')
#define RISCV_ISA_EXT_v ('v' - 'a')
@@ -55,7 +60,6 @@
#define RISCV_ISA_EXT_ZIHPM 42
#define RISCV_ISA_EXT_MAX 64
-#define RISCV_ISA_EXT_NAME_LEN_MAX 32
#ifdef CONFIG_RISCV_M_MODE
#define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SMAIA
@@ -70,12 +74,15 @@
unsigned long riscv_get_elf_hwcap(void);
struct riscv_isa_ext_data {
- /* Name of the extension displayed to userspace via /proc/cpuinfo */
- char uprop[RISCV_ISA_EXT_NAME_LEN_MAX];
- /* The logical ISA extension ID */
- unsigned int isa_ext_id;
+ const unsigned int id;
+ const char *name;
+ const char *property;
};
+extern const struct riscv_isa_ext_data riscv_isa_ext[];
+extern const size_t riscv_isa_ext_count;
+extern bool riscv_isa_fallback;
+
unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h
index fce00400c9bc..06e439eeef9a 100644
--- a/arch/riscv/include/asm/insn.h
+++ b/arch/riscv/include/asm/insn.h
@@ -63,6 +63,7 @@
#define RVG_RS1_OPOFF 15
#define RVG_RS2_OPOFF 20
#define RVG_RD_OPOFF 7
+#define RVG_RS1_MASK GENMASK(4, 0)
#define RVG_RD_MASK GENMASK(4, 0)
/* The bit field of immediate value in RVC J instruction */
@@ -130,6 +131,7 @@
#define RVC_C2_RS1_OPOFF 7
#define RVC_C2_RS2_OPOFF 2
#define RVC_C2_RD_OPOFF 7
+#define RVC_C2_RS1_MASK GENMASK(4, 0)
/* parts of opcode for RVG*/
#define RVG_OPCODE_FENCE 0x0f
@@ -289,6 +291,10 @@ static __always_inline bool riscv_insn_is_c_jalr(u32 code)
#define RV_X(X, s, mask) (((X) >> (s)) & (mask))
#define RVC_X(X, s, mask) RV_X(X, s, mask)
+#define RV_EXTRACT_RS1_REG(x) \
+ ({typeof(x) x_ = (x); \
+ (RV_X(x_, RVG_RS1_OPOFF, RVG_RS1_MASK)); })
+
#define RV_EXTRACT_RD_REG(x) \
({typeof(x) x_ = (x); \
(RV_X(x_, RVG_RD_OPOFF, RVG_RD_MASK)); })
@@ -316,6 +322,10 @@ static __always_inline bool riscv_insn_is_c_jalr(u32 code)
(RV_X(x_, RV_B_IMM_11_OPOFF, RV_B_IMM_11_MASK) << RV_B_IMM_11_OFF) | \
(RV_IMM_SIGN(x_) << RV_B_IMM_SIGN_OFF); })
+#define RVC_EXTRACT_C2_RS1_REG(x) \
+ ({typeof(x) x_ = (x); \
+ (RV_X(x_, RVC_C2_RS1_OPOFF, RVC_C2_RS1_MASK)); })
+
#define RVC_EXTRACT_JTYPE_IMM(x) \
({typeof(x) x_ = (x); \
(RVC_X(x_, RVC_J_IMM_3_1_OPOFF, RVC_J_IMM_3_1_MASK) << RVC_J_IMM_3_1_OFF) | \
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 0099dc116168..355504b37f8e 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -20,6 +20,10 @@ typedef struct {
/* A local icache flush is needed before user execution can resume. */
cpumask_t icache_stale_mask;
#endif
+#ifdef CONFIG_BINFMT_ELF_FDPIC
+ unsigned long exec_fdpic_loadmap;
+ unsigned long interp_fdpic_loadmap;
+#endif
} mm_context_t;
void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa,
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 2f1c0cde2ca4..b2ba3f79cfe9 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -62,11 +62,16 @@
* struct pages to map half the virtual address space. Then
* position vmemmap directly below the VMALLOC region.
*/
+#define VA_BITS_SV32 32
#ifdef CONFIG_64BIT
+#define VA_BITS_SV39 39
+#define VA_BITS_SV48 48
+#define VA_BITS_SV57 57
+
#define VA_BITS (pgtable_l5_enabled ? \
- 57 : (pgtable_l4_enabled ? 48 : 39))
+ VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
#else
-#define VA_BITS 32
+#define VA_BITS VA_BITS_SV32
#endif
#define VMEMMAP_SHIFT \
@@ -111,11 +116,27 @@
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
+#include <asm/compat.h>
#define __page_val_to_pfn(_val) (((_val) & _PAGE_PFN_MASK) >> _PAGE_PFN_SHIFT)
#ifdef CONFIG_64BIT
#include <asm/pgtable-64.h>
+
+#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1))
+#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1))
+#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
+
+#ifdef CONFIG_COMPAT
+#define MMAP_VA_BITS_64 ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
+#define MMAP_MIN_VA_BITS_64 (VA_BITS_SV39)
+#define MMAP_VA_BITS (is_compat_task() ? VA_BITS_SV32 : MMAP_VA_BITS_64)
+#define MMAP_MIN_VA_BITS (is_compat_task() ? VA_BITS_SV32 : MMAP_MIN_VA_BITS_64)
+#else
+#define MMAP_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
+#define MMAP_MIN_VA_BITS (VA_BITS_SV39)
+#endif /* CONFIG_COMPAT */
+
#else
#include <asm/pgtable-32.h>
#endif /* CONFIG_64BIT */
@@ -843,14 +864,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
* Task size is:
- * - 0x9fc00000 (~2.5GB) for RV32.
- * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
- * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
+ * - 0x9fc00000 (~2.5GB) for RV32.
+ * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
+ * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
+ * - 0x100000000000000 ( 64PB) for RV64 using SV57 mmu
*
* Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
* Instruction Set Manual Volume II: Privileged Architecture" states that
* "load and store effective addresses, which are 64bits, must have bits
* 63–48 all equal to bit 47, or else a page-fault exception will occur."
+ * Similarly for SV57, bits 63–57 must be equal to bit 56.
*/
#ifdef CONFIG_64BIT
#define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2)
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index c950a8d9edef..3e23e1786d05 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -13,19 +13,59 @@
#include <asm/ptrace.h>
+#ifdef CONFIG_64BIT
+#define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1))
+#define STACK_TOP_MAX TASK_SIZE_64
+
+#define arch_get_mmap_end(addr, len, flags) \
+({ \
+ unsigned long mmap_end; \
+ typeof(addr) _addr = (addr); \
+ if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \
+ mmap_end = STACK_TOP_MAX; \
+ else if ((_addr) >= VA_USER_SV57) \
+ mmap_end = STACK_TOP_MAX; \
+ else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
+ mmap_end = VA_USER_SV48; \
+ else \
+ mmap_end = VA_USER_SV39; \
+ mmap_end; \
+})
+
+#define arch_get_mmap_base(addr, base) \
+({ \
+ unsigned long mmap_base; \
+ typeof(addr) _addr = (addr); \
+ typeof(base) _base = (base); \
+ unsigned long rnd_gap = DEFAULT_MAP_WINDOW - (_base); \
+ if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \
+ mmap_base = (_base); \
+ else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \
+ mmap_base = VA_USER_SV57 - rnd_gap; \
+ else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
+ mmap_base = VA_USER_SV48 - rnd_gap; \
+ else \
+ mmap_base = VA_USER_SV39 - rnd_gap; \
+ mmap_base; \
+})
+
+#else
+#define DEFAULT_MAP_WINDOW TASK_SIZE
+#define STACK_TOP_MAX TASK_SIZE
+#endif
+#define STACK_ALIGN 16
+
+#define STACK_TOP DEFAULT_MAP_WINDOW
+
/*
* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
-#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
-
-#define STACK_TOP TASK_SIZE
#ifdef CONFIG_64BIT
-#define STACK_TOP_MAX TASK_SIZE_64
+#define TASK_UNMAPPED_BASE PAGE_ALIGN((UL(1) << MMAP_MIN_VA_BITS) / 3)
#else
-#define STACK_TOP_MAX TASK_SIZE
+#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
#endif
-#define STACK_ALIGN 16
#ifndef __ASSEMBLY__
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 0148c6bd9675..121fff429dce 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -75,7 +75,7 @@ static inline int syscall_get_arch(struct task_struct *task)
#endif
}
-typedef long (*syscall_t)(ulong, ulong, ulong, ulong, ulong, ulong, ulong);
+typedef long (*syscall_t)(const struct pt_regs *);
static inline void syscall_handler(struct pt_regs *regs, ulong syscall)
{
syscall_t fn;
@@ -87,8 +87,7 @@ static inline void syscall_handler(struct pt_regs *regs, ulong syscall)
#endif
fn = sys_call_table[syscall];
- regs->a0 = fn(regs->orig_a0, regs->a1, regs->a2,
- regs->a3, regs->a4, regs->a5, regs->a6);
+ regs->a0 = fn(regs);
}
static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h
new file mode 100644
index 000000000000..1d7942c8a6cb
--- /dev/null
+++ b/arch/riscv/include/asm/syscall_wrapper.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_wrapper.h - riscv specific wrappers to syscall definitions
+ *
+ * Based on arch/arm64/include/syscall_wrapper.h
+ */
+
+#ifndef __ASM_SYSCALL_WRAPPER_H
+#define __ASM_SYSCALL_WRAPPER_H
+
+#include <asm/ptrace.h>
+
+asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *);
+
+#define SC_RISCV_REGS_TO_ARGS(x, ...) \
+ __MAP(x,__SC_ARGS \
+ ,,regs->orig_a0,,regs->a1,,regs->a2 \
+ ,,regs->a3,,regs->a4,,regs->a5,,regs->a6)
+
+#ifdef CONFIG_COMPAT
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
+ asmlinkage long __riscv_compat_sys##name(const struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__riscv_compat_sys##name, ERRNO); \
+ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+ asmlinkage long __riscv_compat_sys##name(const struct pt_regs *regs) \
+ { \
+ return __se_compat_sys##name(SC_RISCV_REGS_TO_ARGS(x,__VA_ARGS__)); \
+ } \
+ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
+ { \
+ return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \
+ } \
+ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+#define COMPAT_SYSCALL_DEFINE0(sname) \
+ asmlinkage long __riscv_compat_sys_##sname(const struct pt_regs *__unused); \
+ ALLOW_ERROR_INJECTION(__riscv_compat_sys_##sname, ERRNO); \
+ asmlinkage long __riscv_compat_sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL_COMPAT(name) \
+ asmlinkage long __weak __riscv_compat_sys_##name(const struct pt_regs *regs); \
+ asmlinkage long __weak __riscv_compat_sys_##name(const struct pt_regs *regs) \
+ { \
+ return sys_ni_syscall(); \
+ }
+
+#define COMPAT_SYS_NI(name) \
+ SYSCALL_ALIAS(__riscv_compat_sys_##name, sys_ni_posix_timers);
+
+#endif /* CONFIG_COMPAT */
+
+#define __SYSCALL_DEFINEx(x, name, ...) \
+ asmlinkage long __riscv_sys##name(const struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__riscv_sys##name, ERRNO); \
+ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+ asmlinkage long __riscv_sys##name(const struct pt_regs *regs) \
+ { \
+ return __se_sys##name(SC_RISCV_REGS_TO_ARGS(x,__VA_ARGS__)); \
+ } \
+ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
+ { \
+ long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
+ __MAP(x,__SC_TEST,__VA_ARGS__); \
+ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
+ return ret; \
+ } \
+ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+#define SYSCALL_DEFINE0(sname) \
+ SYSCALL_METADATA(_##sname, 0); \
+ asmlinkage long __riscv_sys_##sname(const struct pt_regs *__unused); \
+ ALLOW_ERROR_INJECTION(__riscv_sys_##sname, ERRNO); \
+ asmlinkage long __riscv_sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL(name) \
+ asmlinkage long __weak __riscv_sys_##name(const struct pt_regs *regs); \
+ asmlinkage long __weak __riscv_sys_##name(const struct pt_regs *regs) \
+ { \
+ return sys_ni_syscall(); \
+ }
+
+#define SYS_NI(name) SYSCALL_ALIAS(__riscv_sys_##name, sys_ni_posix_timers);
+
+#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h
index 283800130614..6d2d9afaabea 100644
--- a/arch/riscv/include/uapi/asm/ptrace.h
+++ b/arch/riscv/include/uapi/asm/ptrace.h
@@ -10,6 +10,11 @@
#include <linux/types.h>
+#define PTRACE_GETFDPIC 33
+
+#define PTRACE_GETFDPIC_EXEC 0
+#define PTRACE_GETFDPIC_INTERP 1
+
/*
* User-mode register state for core dumps, ptrace, sigcontext
*
diff --git a/arch/riscv/include/uapi/asm/sigcontext.h b/arch/riscv/include/uapi/asm/sigcontext.h
index 8c8712aa9551..cd4f175dc837 100644
--- a/arch/riscv/include/uapi/asm/sigcontext.h
+++ b/arch/riscv/include/uapi/asm/sigcontext.h
@@ -25,7 +25,7 @@ struct __sc_riscv_v_state {
* Signal context structure
*
* This contains the context saved before a signal handler is invoked;
- * it is restored by sys_sigreturn / sys_rt_sigreturn.
+ * it is restored by sys_rt_sigreturn.
*/
struct sigcontext {
struct user_regs_struct sc_regs;
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 506cc4a9a45a..6ac56af42f4a 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -91,6 +91,8 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+obj-$(CONFIG_CFI_CLANG) += cfi.o
+
obj-$(CONFIG_EFI) += efi.o
obj-$(CONFIG_COMPAT) += compat_syscall_table.o
obj-$(CONFIG_COMPAT) += compat_signal.o
diff --git a/arch/riscv/kernel/cfi.c b/arch/riscv/kernel/cfi.c
new file mode 100644
index 000000000000..820158d7a291
--- /dev/null
+++ b/arch/riscv/kernel/cfi.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Clang Control Flow Integrity (CFI) support.
+ *
+ * Copyright (C) 2023 Google LLC
+ */
+#include <asm/cfi.h>
+#include <asm/insn.h>
+
+/*
+ * Returns the target address and the expected type when regs->epc points
+ * to a compiler-generated CFI trap.
+ */
+static bool decode_cfi_insn(struct pt_regs *regs, unsigned long *target,
+ u32 *type)
+{
+ unsigned long *regs_ptr = (unsigned long *)regs;
+ int rs1_num;
+ u32 insn;
+
+ *target = *type = 0;
+
+ /*
+ * The compiler generates the following instruction sequence
+ * for indirect call checks:
+ *
+ *   lw t1, -4(<reg>)
+ * lui t2, <hi20>
+ * addiw t2, t2, <lo12>
+ * beq t1, t2, .Ltmp1
+ * ebreak ; <- regs->epc
+ * .Ltmp1:
+ * jalr <reg>
+ *
+ * We can read the expected type and the target address from the
+ * registers passed to the beq/jalr instructions.
+ */
+ if (get_kernel_nofault(insn, (void *)regs->epc - 4))
+ return false;
+ if (!riscv_insn_is_beq(insn))
+ return false;
+
+ *type = (u32)regs_ptr[RV_EXTRACT_RS1_REG(insn)];
+
+ if (get_kernel_nofault(insn, (void *)regs->epc) ||
+ get_kernel_nofault(insn, (void *)regs->epc + GET_INSN_LENGTH(insn)))
+ return false;
+
+ if (riscv_insn_is_jalr(insn))
+ rs1_num = RV_EXTRACT_RS1_REG(insn);
+ else if (riscv_insn_is_c_jalr(insn))
+ rs1_num = RVC_EXTRACT_C2_RS1_REG(insn);
+ else
+ return false;
+
+ *target = regs_ptr[rs1_num];
+
+ return true;
+}
+
+/*
+ * Checks if the ebreak trap is because of a CFI failure, and handles the trap
+ * if needed. Returns a bug_trap_type value similarly to report_bug.
+ */
+enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
+{
+ unsigned long target;
+ u32 type;
+
+ if (!is_cfi_trap(regs->epc))
+ return BUG_TRAP_TYPE_NONE;
+
+ if (!decode_cfi_insn(regs, &target, &type))
+ return report_cfi_failure_noaddr(regs, regs->epc);
+
+ return report_cfi_failure(regs, regs->epc, &target, type);
+}
diff --git a/arch/riscv/kernel/compat_syscall_table.c b/arch/riscv/kernel/compat_syscall_table.c
index 651f2b009c28..ad7f2d712f5f 100644
--- a/arch/riscv/kernel/compat_syscall_table.c
+++ b/arch/riscv/kernel/compat_syscall_table.c
@@ -9,11 +9,15 @@
#include <asm/syscall.h>
#undef __SYSCALL
-#define __SYSCALL(nr, call) [nr] = (call),
+#define __SYSCALL(nr, call) asmlinkage long __riscv_##call(const struct pt_regs *);
+#include <asm/unistd.h>
+
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = __riscv_##call,
asmlinkage long compat_sys_rt_sigreturn(void);
void * const compat_sys_call_table[__NR_syscalls] = {
- [0 ... __NR_syscalls - 1] = sys_ni_syscall,
+ [0 ... __NR_syscalls - 1] = __riscv_sys_ni_syscall,
#include <asm/unistd.h>
};
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 35b854cf078e..c17dacb1141c 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -46,7 +46,7 @@ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart)
return 0;
}
-int riscv_early_of_processor_hartid(struct device_node *node, unsigned long *hart)
+int __init riscv_early_of_processor_hartid(struct device_node *node, unsigned long *hart)
{
const char *isa;
@@ -66,16 +66,53 @@ int riscv_early_of_processor_hartid(struct device_node *node, unsigned long *har
return -ENODEV;
}
+ if (of_property_read_string(node, "riscv,isa-base", &isa))
+ goto old_interface;
+
+ if (IS_ENABLED(CONFIG_32BIT) && strncasecmp(isa, "rv32i", 5)) {
+ pr_warn("CPU with hartid=%lu does not support rv32i", *hart);
+ return -ENODEV;
+ }
+
+ if (IS_ENABLED(CONFIG_64BIT) && strncasecmp(isa, "rv64i", 5)) {
+ pr_warn("CPU with hartid=%lu does not support rv64i", *hart);
+ return -ENODEV;
+ }
+
+ if (!of_property_present(node, "riscv,isa-extensions"))
+ return -ENODEV;
+
+ if (of_property_match_string(node, "riscv,isa-extensions", "i") < 0 ||
+ of_property_match_string(node, "riscv,isa-extensions", "m") < 0 ||
+ of_property_match_string(node, "riscv,isa-extensions", "a") < 0) {
+ pr_warn("CPU with hartid=%lu does not support ima", *hart);
+ return -ENODEV;
+ }
+
+ return 0;
+
+old_interface:
+ if (!riscv_isa_fallback) {
+ pr_warn("CPU with hartid=%lu is invalid: this kernel does not parse \"riscv,isa\"",
+ *hart);
+ return -ENODEV;
+ }
+
if (of_property_read_string(node, "riscv,isa", &isa)) {
- pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart);
+ pr_warn("CPU with hartid=%lu has no \"riscv,isa-base\" or \"riscv,isa\" property\n",
+ *hart);
return -ENODEV;
}
- if (IS_ENABLED(CONFIG_32BIT) && strncasecmp(isa, "rv32ima", 7))
+ if (IS_ENABLED(CONFIG_32BIT) && strncasecmp(isa, "rv32ima", 7)) {
+ pr_warn("CPU with hartid=%lu does not support rv32ima", *hart);
return -ENODEV;
+ }
- if (IS_ENABLED(CONFIG_64BIT) && strncasecmp(isa, "rv64ima", 7))
+ if (IS_ENABLED(CONFIG_64BIT) && strncasecmp(isa, "rv64ima", 7)) {
+ pr_warn("CPU with hartid=%lu does not support rv64ima", *hart);
return -ENODEV;
+ }
return 0;
}
@@ -165,132 +202,46 @@ arch_initcall(riscv_cpuinfo_init);
#ifdef CONFIG_PROC_FS
-#define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \
- { \
- .uprop = #UPROP, \
- .isa_ext_id = EXTID, \
- }
-
-/*
- * The canonical order of ISA extension names in the ISA string is defined in
- * chapter 27 of the unprivileged specification.
- *
- * Ordinarily, for in-kernel data structures, this order is unimportant but
- * isa_ext_arr defines the order of the ISA string in /proc/cpuinfo.
- *
- * The specification uses vague wording, such as should, when it comes to
- * ordering, so for our purposes the following rules apply:
- *
- * 1. All multi-letter extensions must be separated from other extensions by an
- * underscore.
- *
- * 2. Additional standard extensions (starting with 'Z') must be sorted after
- * single-letter extensions and before any higher-privileged extensions.
-
- * 3. The first letter following the 'Z' conventionally indicates the most
- * closely related alphabetical extension category, IMAFDQLCBKJTPVH.
- * If multiple 'Z' extensions are named, they must be ordered first by
- * category, then alphabetically within a category.
- *
- * 3. Standard supervisor-level extensions (starting with 'S') must be listed
- * after standard unprivileged extensions. If multiple supervisor-level
- * extensions are listed, they must be ordered alphabetically.
- *
- * 4. Standard machine-level extensions (starting with 'Zxm') must be listed
- * after any lower-privileged, standard extensions. If multiple
- * machine-level extensions are listed, they must be ordered
- * alphabetically.
- *
- * 5. Non-standard extensions (starting with 'X') must be listed after all
- * standard extensions. If multiple non-standard extensions are listed, they
- * must be ordered alphabetically.
- *
- * An example string following the order is:
- * rv64imadc_zifoo_zigoo_zafoo_sbar_scar_zxmbaz_xqux_xrux
- *
- * New entries to this struct should follow the ordering rules described above.
- */
-static struct riscv_isa_ext_data isa_ext_arr[] = {
- __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
- __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
- __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
- __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
- __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI),
- __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
- __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
- __RISCV_ISA_EXT_DATA(zba, RISCV_ISA_EXT_ZBA),
- __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
- __RISCV_ISA_EXT_DATA(zbs, RISCV_ISA_EXT_ZBS),
- __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA),
- __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA),
- __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
- __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
- __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
- __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
- __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
- __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
-};
-
-static void print_isa_ext(struct seq_file *f)
+static void print_isa(struct seq_file *f)
{
- struct riscv_isa_ext_data *edata;
- int i = 0, arr_sz;
-
- arr_sz = ARRAY_SIZE(isa_ext_arr) - 1;
+ seq_puts(f, "isa\t\t: ");
- /* No extension support available */
- if (arr_sz <= 0)
- return;
+ if (IS_ENABLED(CONFIG_32BIT))
+ seq_write(f, "rv32", 4);
+ else
+ seq_write(f, "rv64", 4);
- for (i = 0; i <= arr_sz; i++) {
- edata = &isa_ext_arr[i];
- if (!__riscv_isa_extension_available(NULL, edata->isa_ext_id))
+ for (int i = 0; i < riscv_isa_ext_count; i++) {
+ if (!__riscv_isa_extension_available(NULL, riscv_isa_ext[i].id))
continue;
- seq_printf(f, "_%s", edata->uprop);
- }
-}
-/*
- * These are the only valid base (single letter) ISA extensions as per the spec.
- * It also specifies the canonical order in which it appears in the spec.
- * Some of the extension may just be a place holder for now (B, K, P, J).
- * This should be updated once corresponding extensions are ratified.
- */
-static const char base_riscv_exts[13] = "imafdqcbkjpvh";
+ /* Only multi-letter extensions are split by underscores */
+ if (strnlen(riscv_isa_ext[i].name, 2) != 1)
+ seq_puts(f, "_");
-static void print_isa(struct seq_file *f, const char *isa)
-{
- int i;
-
- seq_puts(f, "isa\t\t: ");
- /* Print the rv[64/32] part */
- seq_write(f, isa, 4);
- for (i = 0; i < sizeof(base_riscv_exts); i++) {
- if (__riscv_isa_extension_available(NULL, base_riscv_exts[i] - 'a'))
- /* Print only enabled the base ISA extensions */
- seq_write(f, &base_riscv_exts[i], 1);
+ seq_printf(f, "%s", riscv_isa_ext[i].name);
}
- print_isa_ext(f);
+
seq_puts(f, "\n");
}
static void print_mmu(struct seq_file *f)
{
- char sv_type[16];
+ const char *sv_type;
#ifdef CONFIG_MMU
#if defined(CONFIG_32BIT)
- strncpy(sv_type, "sv32", 5);
+ sv_type = "sv32";
#elif defined(CONFIG_64BIT)
if (pgtable_l5_enabled)
- strncpy(sv_type, "sv57", 5);
+ sv_type = "sv57";
else if (pgtable_l4_enabled)
- strncpy(sv_type, "sv48", 5);
+ sv_type = "sv48";
else
- strncpy(sv_type, "sv39", 5);
+ sv_type = "sv39";
#endif
#else
- strncpy(sv_type, "none", 5);
+ sv_type = "none";
#endif /* CONFIG_MMU */
seq_printf(f, "mmu\t\t: %s\n", sv_type);
}
@@ -321,27 +272,21 @@ static int c_show(struct seq_file *m, void *v)
unsigned long cpu_id = (unsigned long)v - 1;
struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id);
struct device_node *node;
- const char *compat, *isa;
+ const char *compat;
seq_printf(m, "processor\t: %lu\n", cpu_id);
seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
+ print_isa(m);
+ print_mmu(m);
if (acpi_disabled) {
node = of_get_cpu_node(cpu_id, NULL);
- if (!of_property_read_string(node, "riscv,isa", &isa))
- print_isa(m, isa);
- print_mmu(m);
if (!of_property_read_string(node, "compatible", &compat) &&
strcmp(compat, "riscv"))
seq_printf(m, "uarch\t\t: %s\n", compat);
of_node_put(node);
- } else {
- if (!acpi_get_riscv_isa(NULL, cpu_id, &isa))
- print_isa(m, isa);
-
- print_mmu(m);
}
seq_printf(m, "mvendorid\t: 0x%lx\n", ci->mvendorid);
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 3cc99e928744..ef7b4fd9e876 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -98,29 +98,252 @@ static bool riscv_isa_extension_check(int id)
return true;
}
-void __init riscv_fill_hwcap(void)
+#define __RISCV_ISA_EXT_DATA(_name, _id) { \
+ .name = #_name, \
+ .property = #_name, \
+ .id = _id, \
+}
+
+/*
+ * The canonical order of ISA extension names in the ISA string is defined in
+ * chapter 27 of the unprivileged specification.
+ *
+ * Ordinarily, for in-kernel data structures, this order is unimportant but
+ * isa_ext_arr defines the order of the ISA string in /proc/cpuinfo.
+ *
+ * The specification uses vague wording, such as should, when it comes to
+ * ordering, so for our purposes the following rules apply:
+ *
+ * 1. All multi-letter extensions must be separated from other extensions by an
+ * underscore.
+ *
+ * 2. Additional standard extensions (starting with 'Z') must be sorted after
+ * single-letter extensions and before any higher-privileged extensions.
+ *
+ * 3. The first letter following the 'Z' conventionally indicates the most
+ * closely related alphabetical extension category, IMAFDQLCBKJTPVH.
+ * If multiple 'Z' extensions are named, they must be ordered first by
+ * category, then alphabetically within a category.
+ *
+ * 3. Standard supervisor-level extensions (starting with 'S') must be listed
+ * after standard unprivileged extensions. If multiple supervisor-level
+ * extensions are listed, they must be ordered alphabetically.
+ *
+ * 4. Standard machine-level extensions (starting with 'Zxm') must be listed
+ * after any lower-privileged, standard extensions. If multiple
+ * machine-level extensions are listed, they must be ordered
+ * alphabetically.
+ *
+ * 5. Non-standard extensions (starting with 'X') must be listed after all
+ * standard extensions. If multiple non-standard extensions are listed, they
+ * must be ordered alphabetically.
+ *
+ * An example string following the order is:
+ * rv64imadc_zifoo_zigoo_zafoo_sbar_scar_zxmbaz_xqux_xrux
+ *
+ * New entries to this struct should follow the ordering rules described above.
+ */
+const struct riscv_isa_ext_data riscv_isa_ext[] = {
+ __RISCV_ISA_EXT_DATA(i, RISCV_ISA_EXT_i),
+ __RISCV_ISA_EXT_DATA(m, RISCV_ISA_EXT_m),
+ __RISCV_ISA_EXT_DATA(a, RISCV_ISA_EXT_a),
+ __RISCV_ISA_EXT_DATA(f, RISCV_ISA_EXT_f),
+ __RISCV_ISA_EXT_DATA(d, RISCV_ISA_EXT_d),
+ __RISCV_ISA_EXT_DATA(q, RISCV_ISA_EXT_q),
+ __RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c),
+ __RISCV_ISA_EXT_DATA(b, RISCV_ISA_EXT_b),
+ __RISCV_ISA_EXT_DATA(k, RISCV_ISA_EXT_k),
+ __RISCV_ISA_EXT_DATA(j, RISCV_ISA_EXT_j),
+ __RISCV_ISA_EXT_DATA(p, RISCV_ISA_EXT_p),
+ __RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v),
+ __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
+ __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
+ __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
+ __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
+ __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
+ __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI),
+ __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
+ __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
+ __RISCV_ISA_EXT_DATA(zba, RISCV_ISA_EXT_ZBA),
+ __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
+ __RISCV_ISA_EXT_DATA(zbs, RISCV_ISA_EXT_ZBS),
+ __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA),
+ __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA),
+ __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
+ __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
+ __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
+ __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
+ __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
+};
+
+const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
+
+static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct riscv_isainfo *isainfo,
+ unsigned long *isa2hwcap, const char *isa)
+{
+ /*
+ * For all possible cpus, we have already validated in
+ * the boot process that they at least contain "rv" and
+ * whichever of "32"/"64" this kernel supports, and so this
+ * section can be skipped.
+ */
+ isa += 4;
+
+ while (*isa) {
+ const char *ext = isa++;
+ const char *ext_end = isa;
+ bool ext_long = false, ext_err = false;
+
+ switch (*ext) {
+ case 's':
+ /*
+ * Workaround for invalid single-letter 's' & 'u'(QEMU).
+ * No need to set the bit in riscv_isa as 's' & 'u' are
+ * not valid ISA extensions. It works until multi-letter
+ * extension starting with "Su" appears.
+ */
+ if (ext[-1] != '_' && ext[1] == 'u') {
+ ++isa;
+ ext_err = true;
+ break;
+ }
+ fallthrough;
+ case 'S':
+ case 'x':
+ case 'X':
+ case 'z':
+ case 'Z':
+ /*
+ * Before attempting to parse the extension itself, we find its end.
+ * As multi-letter extensions must be split from other multi-letter
+ * extensions with an "_", the end of a multi-letter extension will
+ * either be the null character or the "_" at the start of the next
+ * multi-letter extension.
+ *
+ * Next, as the extensions version is currently ignored, we
+ * eliminate that portion. This is done by parsing backwards from
+ * the end of the extension, removing any numbers. This may be a
+ * major or minor number however, so the process is repeated if a
+ * minor number was found.
+ *
+ * ext_end is intended to represent the first character *after* the
+ * name portion of an extension, but will be decremented to the last
+ * character itself while eliminating the extensions version number.
+ * A simple re-increment solves this problem.
+ */
+ ext_long = true;
+ for (; *isa && *isa != '_'; ++isa)
+ if (unlikely(!isalnum(*isa)))
+ ext_err = true;
+
+ ext_end = isa;
+ if (unlikely(ext_err))
+ break;
+
+ if (!isdigit(ext_end[-1]))
+ break;
+
+ while (isdigit(*--ext_end))
+ ;
+
+ if (tolower(ext_end[0]) != 'p' || !isdigit(ext_end[-1])) {
+ ++ext_end;
+ break;
+ }
+
+ while (isdigit(*--ext_end))
+ ;
+
+ ++ext_end;
+ break;
+ default:
+ /*
+ * Things are a little easier for single-letter extensions, as they
+ * are parsed forwards.
+ *
+ * After checking that our starting position is valid, we need to
+ * ensure that, when isa was incremented at the start of the loop,
+ * that it arrived at the start of the next extension.
+ *
+ * If we are already on a non-digit, there is nothing to do. Either
+ * we have a multi-letter extension's _, or the start of an
+ * extension.
+ *
+ * Otherwise we have found the current extension's major version
+ * number. Parse past it, and a subsequent p/minor version number
+ * if present. The `p` extension must not appear immediately after
+ * a number, so there is no fear of missing it.
+ *
+ */
+ if (unlikely(!isalpha(*ext))) {
+ ext_err = true;
+ break;
+ }
+
+ if (!isdigit(*isa))
+ break;
+
+ while (isdigit(*++isa))
+ ;
+
+ if (tolower(*isa) != 'p')
+ break;
+
+ if (!isdigit(*++isa)) {
+ --isa;
+ break;
+ }
+
+ while (isdigit(*++isa))
+ ;
+
+ break;
+ }
+
+ /*
+ * The parser expects that at the start of an iteration isa points to the
+ * first character of the next extension. As we stop parsing an extension
+ * on meeting a non-alphanumeric character, an extra increment is needed
+ * where the succeeding extension is a multi-letter prefixed with an "_".
+ */
+ if (*isa == '_')
+ ++isa;
+
+#define SET_ISA_EXT_MAP(name, bit) \
+ do { \
+ if ((ext_end - ext == strlen(name)) && \
+ !strncasecmp(ext, name, strlen(name)) && \
+ riscv_isa_extension_check(bit)) \
+ set_bit(bit, isainfo->isa); \
+ } while (false) \
+
+ if (unlikely(ext_err))
+ continue;
+ if (!ext_long) {
+ int nr = tolower(*ext) - 'a';
+
+ if (riscv_isa_extension_check(nr)) {
+ *this_hwcap |= isa2hwcap[nr];
+ set_bit(nr, isainfo->isa);
+ }
+ } else {
+ for (int i = 0; i < riscv_isa_ext_count; i++)
+ SET_ISA_EXT_MAP(riscv_isa_ext[i].name,
+ riscv_isa_ext[i].id);
+ }
+#undef SET_ISA_EXT_MAP
+ }
+}
+
+static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
{
struct device_node *node;
const char *isa;
- char print_str[NUM_ALPHA_EXTS + 1];
- int i, j, rc;
- unsigned long isa2hwcap[26] = {0};
+ int rc;
struct acpi_table_header *rhct;
acpi_status status;
unsigned int cpu;
- isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I;
- isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M;
- isa2hwcap['a' - 'a'] = COMPAT_HWCAP_ISA_A;
- isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F;
- isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D;
- isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C;
- isa2hwcap['v' - 'a'] = COMPAT_HWCAP_ISA_V;
-
- elf_hwcap = 0;
-
- bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX);
-
if (!acpi_disabled) {
status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct);
if (ACPI_FAILURE(status))
@@ -152,169 +375,7 @@ void __init riscv_fill_hwcap(void)
}
}
- /*
- * For all possible cpus, we have already validated in
- * the boot process that they at least contain "rv" and
- * whichever of "32"/"64" this kernel supports, and so this
- * section can be skipped.
- */
- isa += 4;
-
- while (*isa) {
- const char *ext = isa++;
- const char *ext_end = isa;
- bool ext_long = false, ext_err = false;
-
- switch (*ext) {
- case 's':
- /*
- * Workaround for invalid single-letter 's' & 'u'(QEMU).
- * No need to set the bit in riscv_isa as 's' & 'u' are
- * not valid ISA extensions. It works until multi-letter
- * extension starting with "Su" appears.
- */
- if (ext[-1] != '_' && ext[1] == 'u') {
- ++isa;
- ext_err = true;
- break;
- }
- fallthrough;
- case 'S':
- case 'x':
- case 'X':
- case 'z':
- case 'Z':
- /*
- * Before attempting to parse the extension itself, we find its end.
- * As multi-letter extensions must be split from other multi-letter
- * extensions with an "_", the end of a multi-letter extension will
- * either be the null character or the "_" at the start of the next
- * multi-letter extension.
- *
- * Next, as the extensions version is currently ignored, we
- * eliminate that portion. This is done by parsing backwards from
- * the end of the extension, removing any numbers. This may be a
- * major or minor number however, so the process is repeated if a
- * minor number was found.
- *
- * ext_end is intended to represent the first character *after* the
- * name portion of an extension, but will be decremented to the last
- * character itself while eliminating the extensions version number.
- * A simple re-increment solves this problem.
- */
- ext_long = true;
- for (; *isa && *isa != '_'; ++isa)
- if (unlikely(!isalnum(*isa)))
- ext_err = true;
-
- ext_end = isa;
- if (unlikely(ext_err))
- break;
-
- if (!isdigit(ext_end[-1]))
- break;
-
- while (isdigit(*--ext_end))
- ;
-
- if (tolower(ext_end[0]) != 'p' || !isdigit(ext_end[-1])) {
- ++ext_end;
- break;
- }
-
- while (isdigit(*--ext_end))
- ;
-
- ++ext_end;
- break;
- default:
- /*
- * Things are a little easier for single-letter extensions, as they
- * are parsed forwards.
- *
- * After checking that our starting position is valid, we need to
- * ensure that, when isa was incremented at the start of the loop,
- * that it arrived at the start of the next extension.
- *
- * If we are already on a non-digit, there is nothing to do. Either
- * we have a multi-letter extension's _, or the start of an
- * extension.
- *
- * Otherwise we have found the current extension's major version
- * number. Parse past it, and a subsequent p/minor version number
- * if present. The `p` extension must not appear immediately after
- * a number, so there is no fear of missing it.
- *
- */
- if (unlikely(!isalpha(*ext))) {
- ext_err = true;
- break;
- }
-
- if (!isdigit(*isa))
- break;
-
- while (isdigit(*++isa))
- ;
-
- if (tolower(*isa) != 'p')
- break;
-
- if (!isdigit(*++isa)) {
- --isa;
- break;
- }
-
- while (isdigit(*++isa))
- ;
-
- break;
- }
-
- /*
- * The parser expects that at the start of an iteration isa points to the
- * first character of the next extension. As we stop parsing an extension
- * on meeting a non-alphanumeric character, an extra increment is needed
- * where the succeeding extension is a multi-letter prefixed with an "_".
- */
- if (*isa == '_')
- ++isa;
-
-#define SET_ISA_EXT_MAP(name, bit) \
- do { \
- if ((ext_end - ext == sizeof(name) - 1) && \
- !strncasecmp(ext, name, sizeof(name) - 1) && \
- riscv_isa_extension_check(bit)) \
- set_bit(bit, isainfo->isa); \
- } while (false) \
-
- if (unlikely(ext_err))
- continue;
- if (!ext_long) {
- int nr = tolower(*ext) - 'a';
-
- if (riscv_isa_extension_check(nr)) {
- this_hwcap |= isa2hwcap[nr];
- set_bit(nr, isainfo->isa);
- }
- } else {
- /* sorted alphabetically */
- SET_ISA_EXT_MAP("smaia", RISCV_ISA_EXT_SMAIA);
- SET_ISA_EXT_MAP("ssaia", RISCV_ISA_EXT_SSAIA);
- SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
- SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
- SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
- SET_ISA_EXT_MAP("svnapot", RISCV_ISA_EXT_SVNAPOT);
- SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
- SET_ISA_EXT_MAP("zba", RISCV_ISA_EXT_ZBA);
- SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
- SET_ISA_EXT_MAP("zbs", RISCV_ISA_EXT_ZBS);
- SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
- SET_ISA_EXT_MAP("zicboz", RISCV_ISA_EXT_ZICBOZ);
- SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
- }
-#undef SET_ISA_EXT_MAP
- }
+ riscv_parse_isa_string(&this_hwcap, isainfo, isa2hwcap, isa);
/*
* These ones were as they were part of the base ISA when the
@@ -346,9 +407,107 @@ void __init riscv_fill_hwcap(void)
if (!acpi_disabled && rhct)
acpi_put_table((struct acpi_table_header *)rhct);
+}
+
+static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ unsigned long this_hwcap = 0;
+ struct device_node *cpu_node;
+ struct riscv_isainfo *isainfo = &hart_isa[cpu];
+
+ cpu_node = of_cpu_device_node_get(cpu);
+ if (!cpu_node) {
+ pr_warn("Unable to find cpu node\n");
+ continue;
+ }
+
+ if (!of_property_present(cpu_node, "riscv,isa-extensions")) {
+ of_node_put(cpu_node);
+ continue;
+ }
+
+ for (int i = 0; i < riscv_isa_ext_count; i++) {
+ if (of_property_match_string(cpu_node, "riscv,isa-extensions",
+ riscv_isa_ext[i].property) < 0)
+ continue;
+
+ if (!riscv_isa_extension_check(riscv_isa_ext[i].id))
+ continue;
+
+ /* Only single letter extensions get set in hwcap */
+ if (strnlen(riscv_isa_ext[i].name, 2) == 1)
+ this_hwcap |= isa2hwcap[riscv_isa_ext[i].id];
+
+ set_bit(riscv_isa_ext[i].id, isainfo->isa);
+ }
+
+ of_node_put(cpu_node);
+
+ /*
+ * All "okay" harts should have same isa. Set HWCAP based on
+ * common capabilities of every "okay" hart, in case they don't.
+ */
+ if (elf_hwcap)
+ elf_hwcap &= this_hwcap;
+ else
+ elf_hwcap = this_hwcap;
+
+ if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX))
+ bitmap_copy(riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX);
+ else
+ bitmap_and(riscv_isa, riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX);
+ }
+
+ if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX))
+ return -ENOENT;
+
+ return 0;
+}
+
+#ifdef CONFIG_RISCV_ISA_FALLBACK
+bool __initdata riscv_isa_fallback = true;
+#else
+bool __initdata riscv_isa_fallback;
+static int __init riscv_isa_fallback_setup(char *__unused)
+{
+ riscv_isa_fallback = true;
+ return 1;
+}
+early_param("riscv_isa_fallback", riscv_isa_fallback_setup);
+#endif
+
+void __init riscv_fill_hwcap(void)
+{
+ char print_str[NUM_ALPHA_EXTS + 1];
+ unsigned long isa2hwcap[26] = {0};
+ int i, j;
+
+ isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I;
+ isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M;
+ isa2hwcap['a' - 'a'] = COMPAT_HWCAP_ISA_A;
+ isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F;
+ isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D;
+ isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C;
+ isa2hwcap['v' - 'a'] = COMPAT_HWCAP_ISA_V;
+
+ if (!acpi_disabled) {
+ riscv_fill_hwcap_from_isa_string(isa2hwcap);
+ } else {
+ int ret = riscv_fill_hwcap_from_ext_list(isa2hwcap);
+
+ if (ret && riscv_isa_fallback) {
+ pr_info("Falling back to deprecated \"riscv,isa\"\n");
+ riscv_fill_hwcap_from_isa_string(isa2hwcap);
+ }
+ }
- /* We don't support systems with F but without D, so mask those out
- * here. */
+ /*
+ * We don't support systems with F but without D, so mask those out
+ * here.
+ */
if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) {
pr_info("This kernel does not support systems with F but not D\n");
elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 11c3b94c4534..3710ea5d160f 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -289,10 +289,6 @@ clear_bss:
blt a3, a4, clear_bss
clear_bss_done:
#endif
- /* Save hart ID and DTB physical address */
- mv s0, a0
- mv s1, a1
-
la a2, boot_cpu_hartid
XIP_FIXUP_OFFSET a2
REG_S a0, (a2)
@@ -306,7 +302,7 @@ clear_bss_done:
la a0, __dtb_start
XIP_FIXUP_OFFSET a0
#else
- mv a0, s1
+ mv a0, a1
#endif /* CONFIG_BUILTIN_DTB */
call setup_vm
#ifdef CONFIG_MMU
diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index 8a6e5a9e842a..8818a8fa9ff3 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/asm.h>
#include <asm/csr.h>
#include <asm/unistd.h>
@@ -47,15 +48,19 @@
addi sp, sp, 4*SZREG
.endm
-ENTRY(ftrace_stub)
+SYM_TYPED_FUNC_START(ftrace_stub)
#ifdef CONFIG_DYNAMIC_FTRACE
.global MCOUNT_NAME
.set MCOUNT_NAME, ftrace_stub
#endif
ret
-ENDPROC(ftrace_stub)
+SYM_FUNC_END(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+SYM_TYPED_FUNC_START(ftrace_stub_graph)
+ ret
+SYM_FUNC_END(ftrace_stub_graph)
+
ENTRY(return_to_handler)
/*
* On implementing the frame point test, the ideal way is to compare the
diff --git a/arch/riscv/kernel/probes/decode-insn.c b/arch/riscv/kernel/probes/decode-insn.c
index 64f6183b4717..65d9590bfb9f 100644
--- a/arch/riscv/kernel/probes/decode-insn.c
+++ b/arch/riscv/kernel/probes/decode-insn.c
@@ -29,13 +29,14 @@ riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api)
* TODO: the REJECTED ones below need to be implemented
*/
#ifdef CONFIG_RISCV_ISA_C
- RISCV_INSN_REJECTED(c_j, insn);
- RISCV_INSN_REJECTED(c_jr, insn);
RISCV_INSN_REJECTED(c_jal, insn);
- RISCV_INSN_REJECTED(c_jalr, insn);
- RISCV_INSN_REJECTED(c_beqz, insn);
- RISCV_INSN_REJECTED(c_bnez, insn);
RISCV_INSN_REJECTED(c_ebreak, insn);
+
+ RISCV_INSN_SET_SIMULATE(c_j, insn);
+ RISCV_INSN_SET_SIMULATE(c_jr, insn);
+ RISCV_INSN_SET_SIMULATE(c_jalr, insn);
+ RISCV_INSN_SET_SIMULATE(c_beqz, insn);
+ RISCV_INSN_SET_SIMULATE(c_bnez, insn);
#endif
RISCV_INSN_SET_SIMULATE(jal, insn);
diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c
index 7441ac8a6843..d3099d67816d 100644
--- a/arch/riscv/kernel/probes/simulate-insn.c
+++ b/arch/riscv/kernel/probes/simulate-insn.c
@@ -188,3 +188,108 @@ bool __kprobes simulate_branch(u32 opcode, unsigned long addr, struct pt_regs *r
return true;
}
+
+bool __kprobes simulate_c_j(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ /*
+ * 15 13 12 2 1 0
+ * | funct3 | offset[11|4|9:8|10|6|7|3:1|5] | opcode |
+ * 3 11 2
+ */
+
+ s32 offset;
+
+ offset = ((opcode >> 3) & 0x7) << 1;
+ offset |= ((opcode >> 11) & 0x1) << 4;
+ offset |= ((opcode >> 2) & 0x1) << 5;
+ offset |= ((opcode >> 7) & 0x1) << 6;
+ offset |= ((opcode >> 6) & 0x1) << 7;
+ offset |= ((opcode >> 9) & 0x3) << 8;
+ offset |= ((opcode >> 8) & 0x1) << 10;
+ offset |= ((opcode >> 12) & 0x1) << 11;
+
+ instruction_pointer_set(regs, addr + sign_extend32(offset, 11));
+
+ return true;
+}
+
+static bool __kprobes simulate_c_jr_jalr(u32 opcode, unsigned long addr, struct pt_regs *regs,
+ bool is_jalr)
+{
+ /*
+ * 15 12 11 7 6 2 1 0
+ * | funct4 | rs1 | rs2 | op |
+ * 4 5 5 2
+ */
+
+ unsigned long jump_addr;
+
+ u32 rs1 = (opcode >> 7) & 0x1f;
+
+ if (rs1 == 0) /* C.JR is only valid when rs1 != x0 */
+ return false;
+
+ if (!rv_insn_reg_get_val(regs, rs1, &jump_addr))
+ return false;
+
+ if (is_jalr && !rv_insn_reg_set_val(regs, 1, addr + 2))
+ return false;
+
+ instruction_pointer_set(regs, jump_addr);
+
+ return true;
+}
+
+bool __kprobes simulate_c_jr(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ return simulate_c_jr_jalr(opcode, addr, regs, false);
+}
+
+bool __kprobes simulate_c_jalr(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ return simulate_c_jr_jalr(opcode, addr, regs, true);
+}
+
+static bool __kprobes simulate_c_bnez_beqz(u32 opcode, unsigned long addr, struct pt_regs *regs,
+ bool is_bnez)
+{
+ /*
+ * 15 13 12 10 9 7 6 2 1 0
+ * | funct3 | offset[8|4:3] | rs1' | offset[7:6|2:1|5] | op |
+ * 3 3 3 5 2
+ */
+
+ s32 offset;
+ u32 rs1;
+ unsigned long rs1_val;
+
+ rs1 = 0x8 | ((opcode >> 7) & 0x7);
+
+ if (!rv_insn_reg_get_val(regs, rs1, &rs1_val))
+ return false;
+
+ if ((rs1_val != 0 && is_bnez) || (rs1_val == 0 && !is_bnez)) {
+ offset = ((opcode >> 3) & 0x3) << 1;
+ offset |= ((opcode >> 10) & 0x3) << 3;
+ offset |= ((opcode >> 2) & 0x1) << 5;
+ offset |= ((opcode >> 5) & 0x3) << 6;
+ offset |= ((opcode >> 12) & 0x1) << 8;
+ offset = sign_extend32(offset, 8);
+ } else {
+ offset = 2;
+ }
+
+ instruction_pointer_set(regs, addr + offset);
+
+ return true;
+}
+
+bool __kprobes simulate_c_bnez(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ return simulate_c_bnez_beqz(opcode, addr, regs, true);
+}
+
+bool __kprobes simulate_c_beqz(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ return simulate_c_bnez_beqz(opcode, addr, regs, false);
+}
diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h
index 61e35db31001..44ebbc444db9 100644
--- a/arch/riscv/kernel/probes/simulate-insn.h
+++ b/arch/riscv/kernel/probes/simulate-insn.h
@@ -24,5 +24,10 @@ bool simulate_auipc(u32 opcode, unsigned long addr, struct pt_regs *regs);
bool simulate_branch(u32 opcode, unsigned long addr, struct pt_regs *regs);
bool simulate_jal(u32 opcode, unsigned long addr, struct pt_regs *regs);
bool simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *regs);
+bool simulate_c_j(u32 opcode, unsigned long addr, struct pt_regs *regs);
+bool simulate_c_jr(u32 opcode, unsigned long addr, struct pt_regs *regs);
+bool simulate_c_jalr(u32 opcode, unsigned long addr, struct pt_regs *regs);
+bool simulate_c_bnez(u32 opcode, unsigned long addr, struct pt_regs *regs);
+bool simulate_c_beqz(u32 opcode, unsigned long addr, struct pt_regs *regs);
#endif /* _RISCV_KERNEL_PROBES_SIMULATE_INSN_H */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 971fe776e2f8..32c2e1eb71bd 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -178,6 +178,11 @@ static void __init init_resources(void)
if (ret < 0)
goto error;
}
+ if (crashk_low_res.start != crashk_low_res.end) {
+ ret = add_resource(&iomem_resource, &crashk_low_res);
+ if (ret < 0)
+ goto error;
+ }
#endif
#ifdef CONFIG_CRASH_DUMP
@@ -311,6 +316,7 @@ void __init setup_arch(char **cmdline_p)
if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) &&
riscv_isa_extension_available(NULL, ZICBOM))
riscv_noncoherent_supported();
+ riscv_set_dma_cache_alignment();
}
static int __init topology_init(void)
diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S
index 12b52afe09a4..f7960c7c5f9e 100644
--- a/arch/riscv/kernel/suspend_entry.S
+++ b/arch/riscv/kernel/suspend_entry.S
@@ -5,6 +5,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/asm.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
@@ -58,7 +59,7 @@ ENTRY(__cpu_suspend_enter)
ret
END(__cpu_suspend_enter)
-ENTRY(__cpu_resume_enter)
+SYM_TYPED_FUNC_START(__cpu_resume_enter)
/* Load the global pointer */
.option push
.option norelax
@@ -94,4 +95,4 @@ ENTRY(__cpu_resume_enter)
/* Return to C code */
ret
-END(__cpu_resume_enter)
+SYM_FUNC_END(__cpu_resume_enter)
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 26ef5526bfb4..473159b5f303 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -335,3 +335,9 @@ SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs,
return do_riscv_hwprobe(pairs, pair_count, cpu_count,
cpus, flags);
}
+
+/* Not defined using SYSCALL_DEFINE0 to avoid error injection */
+asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *__unused)
+{
+ return -ENOSYS;
+}
diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c
index 44b1420a2270..dda913764903 100644
--- a/arch/riscv/kernel/syscall_table.c
+++ b/arch/riscv/kernel/syscall_table.c
@@ -10,9 +10,13 @@
#include <asm/syscall.h>
#undef __SYSCALL
-#define __SYSCALL(nr, call) [nr] = (call),
+#define __SYSCALL(nr, call) asmlinkage long __riscv_##call(const struct pt_regs *);
+#include <asm/unistd.h>
+
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = __riscv_##call,
void * const sys_call_table[__NR_syscalls] = {
- [0 ... __NR_syscalls - 1] = sys_ni_syscall,
+ [0 ... __NR_syscalls - 1] = __riscv_sys_ni_syscall,
#include <asm/unistd.h>
};
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index f798c853bede..19807c4d3805 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -21,6 +21,7 @@
#include <asm/asm-prototypes.h>
#include <asm/bug.h>
+#include <asm/cfi.h>
#include <asm/csr.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
@@ -271,7 +272,8 @@ void handle_break(struct pt_regs *regs)
== NOTIFY_STOP)
return;
#endif
- else if (report_bug(regs->epc, regs) == BUG_TRAP_TYPE_WARN)
+ else if (report_bug(regs->epc, regs) == BUG_TRAP_TYPE_WARN ||
+ handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN)
regs->epc += get_break_insn_length(regs->epc);
else
die(regs, "Kernel BUG");
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 12e22e7330e7..217fd4de6134 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -67,7 +67,7 @@ static void __flush_context(void)
lockdep_assert_held(&context_lock);
/* Update the list of reserved ASIDs and the ASID bitmap. */
- bitmap_clear(context_asid_map, 0, num_asids);
+ bitmap_zero(context_asid_map, num_asids);
/* Mark already active ASIDs as used */
for_each_possible_cpu(i) {
diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c
index d51a75864e53..7270b4d8c05b 100644
--- a/arch/riscv/mm/dma-noncoherent.c
+++ b/arch/riscv/mm/dma-noncoherent.c
@@ -11,6 +11,8 @@
#include <asm/cacheflush.h>
static bool noncoherent_supported __ro_after_init;
+int dma_cache_alignment __ro_after_init = ARCH_DMA_MINALIGN;
+EXPORT_SYMBOL_GPL(dma_cache_alignment);
void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
@@ -78,3 +80,9 @@ void riscv_noncoherent_supported(void)
"Non-coherent DMA support enabled without a block size\n");
noncoherent_supported = true;
}
+
+void __init riscv_set_dma_cache_alignment(void)
+{
+ if (!noncoherent_supported)
+ dma_cache_alignment = 1;
+}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index c07ff3e2c90a..943c18d6ef4d 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -1299,6 +1299,28 @@ static inline void setup_vm_final(void)
}
#endif /* CONFIG_MMU */
+/* Reserve 128M low memory by default for swiotlb buffer */
+#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20)
+
+static int __init reserve_crashkernel_low(unsigned long long low_size)
+{
+ unsigned long long low_base;
+
+ low_base = memblock_phys_alloc_range(low_size, PMD_SIZE, 0, dma32_phys_limit);
+ if (!low_base) {
+ pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
+ return -ENOMEM;
+ }
+
+ pr_info("crashkernel low memory reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
+ low_base, low_base + low_size, low_size >> 20);
+
+ crashk_low_res.start = low_base;
+ crashk_low_res.end = low_base + low_size - 1;
+
+ return 0;
+}
+
/*
* reserve_crashkernel() - reserves memory for crash kernel
*
@@ -1310,8 +1332,12 @@ static void __init reserve_crashkernel(void)
{
unsigned long long crash_base = 0;
unsigned long long crash_size = 0;
+ unsigned long long crash_low_size = 0;
unsigned long search_start = memblock_start_of_DRAM();
- unsigned long search_end = memblock_end_of_DRAM();
+ unsigned long search_end = (unsigned long)dma32_phys_limit;
+ char *cmdline = boot_command_line;
+ bool fixed_base = false;
+ bool high = false;
int ret = 0;
@@ -1327,14 +1353,36 @@ static void __init reserve_crashkernel(void)
return;
}
- ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+ ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
&crash_size, &crash_base);
- if (ret || !crash_size)
+ if (ret == -ENOENT) {
+ /* Fallback to crashkernel=X,[high,low] */
+ ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
+ if (ret || !crash_size)
+ return;
+
+ /*
+ * crashkernel=Y,low is valid only when crashkernel=X,high
+ * is passed.
+ */
+ ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
+ if (ret == -ENOENT)
+ crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
+ else if (ret)
+ return;
+
+ search_start = (unsigned long)dma32_phys_limit;
+ search_end = memblock_end_of_DRAM();
+ high = true;
+ } else if (ret || !crash_size) {
+ /* Invalid argument value specified */
return;
+ }
crash_size = PAGE_ALIGN(crash_size);
if (crash_base) {
+ fixed_base = true;
search_start = crash_base;
search_end = crash_base + crash_size;
}
@@ -1347,12 +1395,37 @@ static void __init reserve_crashkernel(void)
* swiotlb can work on the crash kernel.
*/
crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
- search_start,
- min(search_end, (unsigned long)(SZ_4G - 1)));
+ search_start, search_end);
if (crash_base == 0) {
- /* Try again without restricting region to 32bit addressible memory */
+ /*
+ * For crashkernel=size[KMG]@offset[KMG], print out failure
+ * message if can't reserve the specified region.
+ */
+ if (fixed_base) {
+ pr_warn("crashkernel: allocating failed with given size@offset\n");
+ return;
+ }
+
+ if (high) {
+ /*
+ * For crashkernel=size[KMG],high, if the first attempt was
+ * for high memory, fall back to low memory.
+ */
+ search_start = memblock_start_of_DRAM();
+ search_end = (unsigned long)dma32_phys_limit;
+ } else {
+ /*
+ * For crashkernel=size[KMG], if the first attempt was for
+ * low memory, fall back to high memory, the minimum required
+ * low memory will be reserved later.
+ */
+ search_start = (unsigned long)dma32_phys_limit;
+ search_end = memblock_end_of_DRAM();
+ crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
+ }
+
crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
- search_start, search_end);
+ search_start, search_end);
if (crash_base == 0) {
pr_warn("crashkernel: couldn't allocate %lldKB\n",
crash_size >> 10);
@@ -1360,6 +1433,12 @@ static void __init reserve_crashkernel(void)
}
}
+ if ((crash_base >= dma32_phys_limit) && crash_low_size &&
+ reserve_crashkernel_low(crash_low_size)) {
+ memblock_phys_free(crash_base, crash_size);
+ return;
+ }
+
pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
crash_base, crash_base + crash_size, crash_size >> 20);
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index a01bc15dce24..5e39dcf23fdb 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -22,9 +22,9 @@
* region is not and then we have to go down to the PUD level.
*/
-pgd_t tmp_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
-p4d_t tmp_p4d[PTRS_PER_P4D] __page_aligned_bss;
-pud_t tmp_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pgd_t tmp_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
+static p4d_t tmp_p4d[PTRS_PER_P4D] __page_aligned_bss;
+static pud_t tmp_pud[PTRS_PER_PUD] __page_aligned_bss;
static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
{
@@ -438,7 +438,7 @@ static void __init kasan_shallow_populate(void *start, void *end)
kasan_shallow_populate_pgd(vaddr, vend);
}
-static void create_tmp_mapping(void)
+static void __init create_tmp_mapping(void)
{
void *ptr;
p4d_t *base_p4d;
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index dc20e166983e..9e6476719abb 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -77,6 +77,10 @@ ifdef CONFIG_STACKPROTECTOR_STRONG
PURGATORY_CFLAGS_REMOVE += -fstack-protector-strong
endif
+ifdef CONFIG_CFI_CLANG
+PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_CFI)
+endif
+
CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_purgatory.o += $(PURGATORY_CFLAGS)
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index 56897d4d4fd3..1f9a35f724f5 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -14,9 +14,81 @@
#include <linux/perf/riscv_pmu.h>
#include <linux/printk.h>
#include <linux/smp.h>
+#include <linux/sched_clock.h>
#include <asm/sbi.h>
+static bool riscv_perf_user_access(struct perf_event *event)
+{
+ return ((event->attr.type == PERF_TYPE_HARDWARE) ||
+ (event->attr.type == PERF_TYPE_HW_CACHE) ||
+ (event->attr.type == PERF_TYPE_RAW)) &&
+ !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT);
+}
+
+void arch_perf_update_userpage(struct perf_event *event,
+ struct perf_event_mmap_page *userpg, u64 now)
+{
+ struct clock_read_data *rd;
+ unsigned int seq;
+ u64 ns;
+
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_time_short = 0;
+ userpg->cap_user_rdpmc = riscv_perf_user_access(event);
+
+#ifdef CONFIG_RISCV_PMU
+ /*
+ * The counters are 64-bit but the priv spec doesn't mandate all the
+ * bits to be implemented: that's why, counter width can vary based on
+ * the cpu vendor.
+ */
+ if (userpg->cap_user_rdpmc)
+ userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1;
+#endif
+
+ do {
+ rd = sched_clock_read_begin(&seq);
+
+ userpg->time_mult = rd->mult;
+ userpg->time_shift = rd->shift;
+ userpg->time_zero = rd->epoch_ns;
+ userpg->time_cycles = rd->epoch_cyc;
+ userpg->time_mask = rd->sched_clock_mask;
+
+ /*
+ * Subtract the cycle base, such that software that
+ * doesn't know about cap_user_time_short still 'works'
+ * assuming no wraps.
+ */
+ ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+ userpg->time_zero -= ns;
+
+ } while (sched_clock_read_retry(seq));
+
+ userpg->time_offset = userpg->time_zero - now;
+
+ /*
+ * time_shift is not expected to be greater than 31 due to
+ * the original published conversion algorithm shifting a
+ * 32-bit value (now specifies a 64-bit value) - refer
+ * perf_event_mmap_page documentation in perf_event.h.
+ */
+ if (userpg->time_shift == 32) {
+ userpg->time_shift = 31;
+ userpg->time_mult >>= 1;
+ }
+
+ /*
+ * Internal timekeeping for enabled/running/stopped times
+ * is always computed with the sched_clock.
+ */
+ userpg->cap_user_time = 1;
+ userpg->cap_user_time_zero = 1;
+ userpg->cap_user_time_short = 1;
+}
+
static unsigned long csr_read_num(int csr_num)
{
#define switchcase_csr_read(__csr_num, __val) {\
@@ -171,6 +243,8 @@ int riscv_pmu_event_set_period(struct perf_event *event)
local64_set(&hwc->prev_count, (u64)-left);
+ perf_event_update_userpage(event);
+
return overflow;
}
@@ -264,6 +338,9 @@ static int riscv_pmu_event_init(struct perf_event *event)
hwc->idx = -1;
hwc->event_base = mapped_event;
+ if (rvpmu->event_init)
+ rvpmu->event_init(event);
+
if (!is_sampling_event(event)) {
/*
* For non-sampling runs, limit the sample_period to half
@@ -280,6 +357,39 @@ static int riscv_pmu_event_init(struct perf_event *event)
return 0;
}
+static int riscv_pmu_event_idx(struct perf_event *event)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
+ return 0;
+
+ if (rvpmu->csr_index)
+ return rvpmu->csr_index(event) + 1;
+
+ return 0;
+}
+
+static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (rvpmu->event_mapped) {
+ rvpmu->event_mapped(event, mm);
+ perf_event_update_userpage(event);
+ }
+}
+
+static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (rvpmu->event_unmapped) {
+ rvpmu->event_unmapped(event, mm);
+ perf_event_update_userpage(event);
+ }
+}
+
struct riscv_pmu *riscv_pmu_alloc(void)
{
struct riscv_pmu *pmu;
@@ -304,6 +414,9 @@ struct riscv_pmu *riscv_pmu_alloc(void)
}
pmu->pmu = (struct pmu) {
.event_init = riscv_pmu_event_init,
+ .event_mapped = riscv_pmu_event_mapped,
+ .event_unmapped = riscv_pmu_event_unmapped,
+ .event_idx = riscv_pmu_event_idx,
.add = riscv_pmu_add,
.del = riscv_pmu_del,
.start = riscv_pmu_start,
diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c
index ca9e20bfc7ac..79fdd667922e 100644
--- a/drivers/perf/riscv_pmu_legacy.c
+++ b/drivers/perf/riscv_pmu_legacy.c
@@ -13,7 +13,7 @@
#include <linux/platform_device.h>
#define RISCV_PMU_LEGACY_CYCLE 0
-#define RISCV_PMU_LEGACY_INSTRET 1
+#define RISCV_PMU_LEGACY_INSTRET 2
static bool pmu_init_done;
@@ -71,6 +71,29 @@ static void pmu_legacy_ctr_start(struct perf_event *event, u64 ival)
local64_set(&hwc->prev_count, initial_val);
}
+static uint8_t pmu_legacy_csr_index(struct perf_event *event)
+{
+ return event->hw.idx;
+}
+
+static void pmu_legacy_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS)
+ return;
+
+ event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
+}
+
+static void pmu_legacy_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS)
+ return;
+
+ event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
+}
+
/*
* This is just a simple implementation to allow legacy implementations
* compatible with new RISC-V PMU driver framework.
@@ -91,6 +114,9 @@ static void pmu_legacy_init(struct riscv_pmu *pmu)
pmu->ctr_get_width = NULL;
pmu->ctr_clear_idx = NULL;
pmu->ctr_read = pmu_legacy_read_ctr;
+ pmu->event_mapped = pmu_legacy_event_mapped;
+ pmu->event_unmapped = pmu_legacy_event_unmapped;
+ pmu->csr_index = pmu_legacy_csr_index;
perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
}
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 4163ff517471..9a51053b1f99 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -24,6 +24,14 @@
#include <asm/sbi.h>
#include <asm/hwcap.h>
+#define SYSCTL_NO_USER_ACCESS 0
+#define SYSCTL_USER_ACCESS 1
+#define SYSCTL_LEGACY 2
+
+#define PERF_EVENT_FLAG_NO_USER_ACCESS BIT(SYSCTL_NO_USER_ACCESS)
+#define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS)
+#define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY)
+
PMU_FORMAT_ATTR(event, "config:0-47");
PMU_FORMAT_ATTR(firmware, "config:63");
@@ -43,6 +51,9 @@ static const struct attribute_group *riscv_pmu_attr_groups[] = {
NULL,
};
+/* Allow user mode access by default */
+static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS;
+
/*
* RISC-V doesn't have heterogeneous harts yet. This need to be part of
* per_cpu in case of harts with different pmu counters
@@ -301,6 +312,11 @@ int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr)
}
EXPORT_SYMBOL_GPL(riscv_pmu_get_hpm_info);
+static uint8_t pmu_sbi_csr_index(struct perf_event *event)
+{
+ return pmu_ctr_list[event->hw.idx].csr - CSR_CYCLE;
+}
+
static unsigned long pmu_sbi_get_filter_flags(struct perf_event *event)
{
unsigned long cflags = 0;
@@ -329,18 +345,34 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
struct sbiret ret;
int idx;
- uint64_t cbase = 0;
+ uint64_t cbase = 0, cmask = rvpmu->cmask;
unsigned long cflags = 0;
cflags = pmu_sbi_get_filter_flags(event);
+
+ /*
+ * In legacy mode, we have to force the fixed counters for those events
+ * but not in the user access mode as we want to use the other counters
+ * that support sampling/filtering.
+ */
+ if (hwc->flags & PERF_EVENT_FLAG_LEGACY) {
+ if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+ cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
+ cmask = 1;
+ } else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
+ cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
+ cmask = 1UL << (CSR_INSTRET - CSR_CYCLE);
+ }
+ }
+
/* retrieve the available counter index */
#if defined(CONFIG_32BIT)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
- rvpmu->cmask, cflags, hwc->event_base, hwc->config,
+ cmask, cflags, hwc->event_base, hwc->config,
hwc->config >> 32);
#else
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
- rvpmu->cmask, cflags, hwc->event_base, hwc->config, 0);
+ cmask, cflags, hwc->event_base, hwc->config, 0);
#endif
if (ret.error) {
pr_debug("Not able to find a counter for event %lx config %llx\n",
@@ -474,6 +506,22 @@ static u64 pmu_sbi_ctr_read(struct perf_event *event)
return val;
}
+static void pmu_sbi_set_scounteren(void *arg)
+{
+ struct perf_event *event = (struct perf_event *)arg;
+
+ csr_write(CSR_SCOUNTEREN,
+ csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event)));
+}
+
+static void pmu_sbi_reset_scounteren(void *arg)
+{
+ struct perf_event *event = (struct perf_event *)arg;
+
+ csr_write(CSR_SCOUNTEREN,
+ csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event)));
+}
+
static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
{
struct sbiret ret;
@@ -490,6 +538,10 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED))
pr_err("Starting counter idx %d failed with error %d\n",
hwc->idx, sbi_err_map_linux_errno(ret.error));
+
+ if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
+ (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
+ pmu_sbi_set_scounteren((void *)event);
}
static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
@@ -497,6 +549,10 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
struct sbiret ret;
struct hw_perf_event *hwc = &event->hw;
+ if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
+ (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
+ pmu_sbi_reset_scounteren((void *)event);
+
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
flag != SBI_PMU_STOP_FLAG_RESET)
@@ -704,10 +760,13 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
/*
- * Enable the access for CYCLE, TIME, and INSTRET CSRs from userspace,
- * as is necessary to maintain uABI compatibility.
+ * We keep enabling userspace access to CYCLE, TIME and INSTRET via the
+ * legacy option but that will be removed in the future.
*/
- csr_write(CSR_SCOUNTEREN, 0x7);
+ if (sysctl_perf_user_access == SYSCTL_LEGACY)
+ csr_write(CSR_SCOUNTEREN, 0x7);
+ else
+ csr_write(CSR_SCOUNTEREN, 0x2);
/* Stop all the counters so that they can be enabled from perf */
pmu_sbi_stop_all(pmu);
@@ -838,6 +897,121 @@ static void riscv_pmu_destroy(struct riscv_pmu *pmu)
cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
}
+static void pmu_sbi_event_init(struct perf_event *event)
+{
+ /*
+ * The permissions are set at event_init so that we do not depend
+ * on the sysctl value that can change.
+ */
+ if (sysctl_perf_user_access == SYSCTL_NO_USER_ACCESS)
+ event->hw.flags |= PERF_EVENT_FLAG_NO_USER_ACCESS;
+ else if (sysctl_perf_user_access == SYSCTL_USER_ACCESS)
+ event->hw.flags |= PERF_EVENT_FLAG_USER_ACCESS;
+ else
+ event->hw.flags |= PERF_EVENT_FLAG_LEGACY;
+}
+
+static void pmu_sbi_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+ if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
+ return;
+
+ if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
+ return;
+ }
+ }
+
+ /*
+ * The user mmapped the event to directly access it: this is where
+ * we determine based on sysctl_perf_user_access if we grant userspace
+ * the direct access to this event. That means that within the same
+ * task, some events may be directly accessible and some other may not,
+ * if the user changes the value of sysctl_perf_user_accesss in the
+ * meantime.
+ */
+
+ event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
+
+ /*
+ * We must enable userspace access *before* advertising in the user page
+ * that it is possible to do so to avoid any race.
+ * And we must notify all cpus here because threads that currently run
+ * on other cpus will try to directly access the counter too without
+ * calling pmu_sbi_ctr_start.
+ */
+ if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
+ on_each_cpu_mask(mm_cpumask(mm),
+ pmu_sbi_set_scounteren, (void *)event, 1);
+}
+
+static void pmu_sbi_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+ if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
+ return;
+
+ if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
+ return;
+ }
+ }
+
+ /*
+ * Here we can directly remove user access since the user does not have
+ * access to the user page anymore so we avoid the racy window where the
+ * user could have read cap_user_rdpmc to true right before we disable
+ * it.
+ */
+ event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
+
+ if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
+ on_each_cpu_mask(mm_cpumask(mm),
+ pmu_sbi_reset_scounteren, (void *)event, 1);
+}
+
+static void riscv_pmu_update_counter_access(void *info)
+{
+ if (sysctl_perf_user_access == SYSCTL_LEGACY)
+ csr_write(CSR_SCOUNTEREN, 0x7);
+ else
+ csr_write(CSR_SCOUNTEREN, 0x2);
+}
+
+static int riscv_pmu_proc_user_access_handler(struct ctl_table *table,
+ int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int prev = sysctl_perf_user_access;
+ int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ /*
+ * Test against the previous value since we clear SCOUNTEREN when
+ * sysctl_perf_user_access is set to SYSCTL_USER_ACCESS, but we should
+ * not do that if that was already the case.
+ */
+ if (ret || !write || prev == sysctl_perf_user_access)
+ return ret;
+
+ on_each_cpu(riscv_pmu_update_counter_access, NULL, 1);
+
+ return 0;
+}
+
+static struct ctl_table sbi_pmu_sysctl_table[] = {
+ {
+ .procname = "perf_user_access",
+ .data = &sysctl_perf_user_access,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = riscv_pmu_proc_user_access_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ { }
+};
+
static int pmu_sbi_device_probe(struct platform_device *pdev)
{
struct riscv_pmu *pmu = NULL;
@@ -881,6 +1055,10 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
pmu->ctr_get_width = pmu_sbi_ctr_get_width;
pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx;
pmu->ctr_read = pmu_sbi_ctr_read;
+ pmu->event_init = pmu_sbi_event_init;
+ pmu->event_mapped = pmu_sbi_event_mapped;
+ pmu->event_unmapped = pmu_sbi_event_unmapped;
+ pmu->csr_index = pmu_sbi_csr_index;
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
if (ret)
@@ -894,6 +1072,8 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
if (ret)
goto out_unregister;
+ register_sysctl("kernel", sbi_pmu_sysctl_table);
+
return 0;
out_unregister:
@@ -907,7 +1087,7 @@ out_free:
static struct platform_driver pmu_sbi_driver = {
.probe = pmu_sbi_device_probe,
.driver = {
- .name = RISCV_PMU_PDEV_NAME,
+ .name = RISCV_PMU_SBI_PDEV_NAME,
},
};
@@ -934,7 +1114,7 @@ static int __init pmu_sbi_devinit(void)
if (ret)
return ret;
- pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0);
+ pdev = platform_device_register_simple(RISCV_PMU_SBI_PDEV_NAME, -1, NULL, 0);
if (IS_ERR(pdev)) {
platform_driver_unregister(&pmu_sbi_driver);
return PTR_ERR(pdev);
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 93539aac0e5b..f5693164ca9a 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -58,7 +58,7 @@ config ARCH_USE_GNU_PROPERTY
config BINFMT_ELF_FDPIC
bool "Kernel support for FDPIC ELF binaries"
default y if !BINFMT_ELF
- depends on ARM || ((M68K || SUPERH || XTENSA) && !MMU)
+ depends on ARM || ((M68K || RISCV || SUPERH || XTENSA) && !MMU)
select ELFCORE
help
ELF FDPIC binaries are based on ELF, but allow the individual load
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 1c6c5832af86..43b2a2851ba3 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -138,7 +138,7 @@ static int is_constdisp(struct elfhdr *hdr)
static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params,
struct file *file)
{
- struct elf32_phdr *phdr;
+ struct elf_phdr *phdr;
unsigned long size;
int retval, loop;
loff_t pos = params->hdr.e_phoff;
@@ -560,8 +560,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
sp &= ~7UL;
/* stack the load map(s) */
- len = sizeof(struct elf32_fdpic_loadmap);
- len += sizeof(struct elf32_fdpic_loadseg) * exec_params->loadmap->nsegs;
+ len = sizeof(struct elf_fdpic_loadmap);
+ len += sizeof(struct elf_fdpic_loadseg) * exec_params->loadmap->nsegs;
sp = (sp - len) & ~7UL;
exec_params->map_addr = sp;
@@ -571,8 +571,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
current->mm->context.exec_fdpic_loadmap = (unsigned long) sp;
if (interp_params->loadmap) {
- len = sizeof(struct elf32_fdpic_loadmap);
- len += sizeof(struct elf32_fdpic_loadseg) *
+ len = sizeof(struct elf_fdpic_loadmap);
+ len += sizeof(struct elf_fdpic_loadseg) *
interp_params->loadmap->nsegs;
sp = (sp - len) & ~7UL;
interp_params->map_addr = sp;
@@ -740,13 +740,13 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
struct mm_struct *mm,
const char *what)
{
- struct elf32_fdpic_loadmap *loadmap;
+ struct elf_fdpic_loadmap *loadmap;
#ifdef CONFIG_MMU
- struct elf32_fdpic_loadseg *mseg;
+ struct elf_fdpic_loadseg *mseg;
unsigned long load_addr;
#endif
- struct elf32_fdpic_loadseg *seg;
- struct elf32_phdr *phdr;
+ struct elf_fdpic_loadseg *seg;
+ struct elf_phdr *phdr;
unsigned nloads, tmp;
unsigned long stop;
int loop, ret;
@@ -766,7 +766,7 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
params->loadmap = loadmap;
- loadmap->version = ELF32_FDPIC_LOADMAP_VERSION;
+ loadmap->version = ELF_FDPIC_LOADMAP_VERSION;
loadmap->nsegs = nloads;
/* map the requested LOADs into the memory space */
@@ -839,8 +839,8 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
if (phdr->p_vaddr >= seg->p_vaddr &&
phdr->p_vaddr + phdr->p_memsz <=
seg->p_vaddr + seg->p_memsz) {
- Elf32_Dyn __user *dyn;
- Elf32_Sword d_tag;
+ Elf_Dyn __user *dyn;
+ Elf_Sword d_tag;
params->dynamic_addr =
(phdr->p_vaddr - seg->p_vaddr) +
@@ -850,11 +850,11 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
* one item, and that the last item is a NULL
* entry */
if (phdr->p_memsz == 0 ||
- phdr->p_memsz % sizeof(Elf32_Dyn) != 0)
+ phdr->p_memsz % sizeof(Elf_Dyn) != 0)
goto dynamic_error;
- tmp = phdr->p_memsz / sizeof(Elf32_Dyn);
- dyn = (Elf32_Dyn __user *)params->dynamic_addr;
+ tmp = phdr->p_memsz / sizeof(Elf_Dyn);
+ dyn = (Elf_Dyn __user *)params->dynamic_addr;
if (get_user(d_tag, &dyn[tmp - 1].d_tag) ||
d_tag != 0)
goto dynamic_error;
@@ -923,8 +923,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
struct file *file,
struct mm_struct *mm)
{
- struct elf32_fdpic_loadseg *seg;
- struct elf32_phdr *phdr;
+ struct elf_fdpic_loadseg *seg;
+ struct elf_phdr *phdr;
unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0;
int loop, ret;
@@ -1007,8 +1007,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
struct file *file,
struct mm_struct *mm)
{
- struct elf32_fdpic_loadseg *seg;
- struct elf32_phdr *phdr;
+ struct elf_fdpic_loadseg *seg;
+ struct elf_phdr *phdr;
unsigned long load_addr, delta_vaddr;
int loop, dvset;
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index b4d43a4af5f7..51f8f3881523 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -80,9 +80,21 @@ static __always_inline bool should_resched(int preempt_offset)
#ifdef CONFIG_PREEMPTION
extern asmlinkage void preempt_schedule(void);
-#define __preempt_schedule() preempt_schedule()
extern asmlinkage void preempt_schedule_notrace(void);
+
+#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
+
+void dynamic_preempt_schedule(void);
+void dynamic_preempt_schedule_notrace(void);
+#define __preempt_schedule() dynamic_preempt_schedule()
+#define __preempt_schedule_notrace() dynamic_preempt_schedule_notrace()
+
+#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_KEY*/
+
+#define __preempt_schedule() preempt_schedule()
#define __preempt_schedule_notrace() preempt_schedule_notrace()
+
+#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_KEY*/
#endif /* CONFIG_PREEMPTION */
#endif /* __ASM_PREEMPT_H */
diff --git a/include/linux/elf-fdpic.h b/include/linux/elf-fdpic.h
index 3bea95a1af53..e533f4513194 100644
--- a/include/linux/elf-fdpic.h
+++ b/include/linux/elf-fdpic.h
@@ -10,13 +10,25 @@
#include <uapi/linux/elf-fdpic.h>
+#if ELF_CLASS == ELFCLASS32
+#define Elf_Sword Elf32_Sword
+#define elf_fdpic_loadseg elf32_fdpic_loadseg
+#define elf_fdpic_loadmap elf32_fdpic_loadmap
+#define ELF_FDPIC_LOADMAP_VERSION ELF32_FDPIC_LOADMAP_VERSION
+#else
+#define Elf_Sword Elf64_Sxword
+#define elf_fdpic_loadmap elf64_fdpic_loadmap
+#define elf_fdpic_loadseg elf64_fdpic_loadseg
+#define ELF_FDPIC_LOADMAP_VERSION ELF64_FDPIC_LOADMAP_VERSION
+#endif
+
/*
* binfmt binary parameters structure
*/
struct elf_fdpic_params {
struct elfhdr hdr; /* ref copy of ELF header */
struct elf_phdr *phdrs; /* ref copy of PT_PHDR table */
- struct elf32_fdpic_loadmap *loadmap; /* loadmap to be passed to userspace */
+ struct elf_fdpic_loadmap *loadmap; /* loadmap to be passed to userspace */
unsigned long elfhdr_addr; /* mapped ELF header user address */
unsigned long ph_addr; /* mapped PT_PHDR user address */
unsigned long map_addr; /* mapped loadmap user address */
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
index 43fc892aa7d9..43282e22ebe1 100644
--- a/include/linux/perf/riscv_pmu.h
+++ b/include/linux/perf/riscv_pmu.h
@@ -6,8 +6,8 @@
*
*/
-#ifndef _ASM_RISCV_PERF_EVENT_H
-#define _ASM_RISCV_PERF_EVENT_H
+#ifndef _RISCV_PMU_H
+#define _RISCV_PMU_H
#include <linux/perf_event.h>
#include <linux/ptrace.h>
@@ -21,7 +21,7 @@
#define RISCV_MAX_COUNTERS 64
#define RISCV_OP_UNSUPP (-EOPNOTSUPP)
-#define RISCV_PMU_PDEV_NAME "riscv-pmu"
+#define RISCV_PMU_SBI_PDEV_NAME "riscv-pmu-sbi"
#define RISCV_PMU_LEGACY_PDEV_NAME "riscv-pmu-legacy"
#define RISCV_PMU_STOP_FLAG_RESET 1
@@ -55,6 +55,10 @@ struct riscv_pmu {
void (*ctr_start)(struct perf_event *event, u64 init_val);
void (*ctr_stop)(struct perf_event *event, unsigned long flag);
int (*event_map)(struct perf_event *event, u64 *config);
+ void (*event_init)(struct perf_event *event);
+ void (*event_mapped)(struct perf_event *event, struct mm_struct *mm);
+ void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm);
+ uint8_t (*csr_index)(struct perf_event *event);
struct cpu_hw_events __percpu *hw_events;
struct hlist_node node;
@@ -81,4 +85,4 @@ int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr);
#endif /* CONFIG_RISCV_PMU */
-#endif /* _ASM_RISCV_PERF_EVENT_H */
+#endif /* _RISCV_PMU_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 05253af70ce9..e85cd1c0eaf3 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -444,7 +444,8 @@ struct pmu {
/*
* Will return the value for perf_event_mmap_page::index for this event,
- * if no implementation is provided it will default to: event->hw.idx + 1.
+ * if no implementation is provided it will default to 0 (see
+ * perf_event_idx_default).
*/
int (*event_idx) (struct perf_event *event); /*optional */
diff --git a/include/uapi/linux/elf-fdpic.h b/include/uapi/linux/elf-fdpic.h
index 4fcc6cfebe18..ec23f0871129 100644
--- a/include/uapi/linux/elf-fdpic.h
+++ b/include/uapi/linux/elf-fdpic.h
@@ -32,4 +32,19 @@ struct elf32_fdpic_loadmap {
#define ELF32_FDPIC_LOADMAP_VERSION 0x0000
+/* segment mappings for ELF FDPIC libraries/executables/interpreters */
+struct elf64_fdpic_loadseg {
+ Elf64_Addr addr; /* core address to which mapped */
+ Elf64_Addr p_vaddr; /* VMA recorded in file */
+ Elf64_Word p_memsz; /* allocation size recorded in file */
+};
+
+struct elf64_fdpic_loadmap {
+ Elf64_Half version; /* version of these structures, just in case... */
+ Elf64_Half nsegs; /* number of segments */
+ struct elf64_fdpic_loadseg segs[];
+};
+
+#define ELF64_FDPIC_LOADMAP_VERSION 0x0000
+
#endif /* _UAPI_LINUX_ELF_FDPIC_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a8a1b0ac8b22..319cfbeb0738 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -355,6 +355,11 @@ endchoice # "Compressed Debug information"
config DEBUG_INFO_SPLIT
bool "Produce split debuginfo in .dwo files"
depends on $(cc-option,-gsplit-dwarf)
+ # RISC-V linker relaxation + -gsplit-dwarf has issues with LLVM and GCC
+ # prior to 12.x:
+ # https://github.com/llvm/llvm-project/issues/56642
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99090
+ depends on !RISCV || GCC_VERSION >= 120000
help
Generate debug info into separate .dwo files. This significantly
reduces the build directory size for builds with DEBUG_INFO,
@@ -502,7 +507,7 @@ config SECTION_MISMATCH_WARN_ONLY
config DEBUG_FORCE_FUNCTION_ALIGN_64B
bool "Force all function address 64B aligned"
- depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC || S390)
+ depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC || RISCV || S390)
select FUNCTION_ALIGNMENT_64B
help
There are cases that a commit from one domain changes the function
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index 0d1634cedf44..2184814b37dd 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -392,6 +392,72 @@ static u64 read_perf_counter(unsigned int counter)
static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
+/* __riscv_xlen contains the witdh of the native base integer, here 64-bit */
+#elif defined(__riscv) && __riscv_xlen == 64
+
+/* TODO: implement rv32 support */
+
+#define CSR_CYCLE 0xc00
+#define CSR_TIME 0xc01
+
+#define csr_read(csr) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__ ("csrr %0, %1" \
+ : "=r" (__v) \
+ : "i" (csr) : ); \
+ __v; \
+})
+
+static unsigned long csr_read_num(int csr_num)
+{
+#define switchcase_csr_read(__csr_num, __val) {\
+ case __csr_num: \
+ __val = csr_read(__csr_num); \
+ break; }
+#define switchcase_csr_read_2(__csr_num, __val) {\
+ switchcase_csr_read(__csr_num + 0, __val) \
+ switchcase_csr_read(__csr_num + 1, __val)}
+#define switchcase_csr_read_4(__csr_num, __val) {\
+ switchcase_csr_read_2(__csr_num + 0, __val) \
+ switchcase_csr_read_2(__csr_num + 2, __val)}
+#define switchcase_csr_read_8(__csr_num, __val) {\
+ switchcase_csr_read_4(__csr_num + 0, __val) \
+ switchcase_csr_read_4(__csr_num + 4, __val)}
+#define switchcase_csr_read_16(__csr_num, __val) {\
+ switchcase_csr_read_8(__csr_num + 0, __val) \
+ switchcase_csr_read_8(__csr_num + 8, __val)}
+#define switchcase_csr_read_32(__csr_num, __val) {\
+ switchcase_csr_read_16(__csr_num + 0, __val) \
+ switchcase_csr_read_16(__csr_num + 16, __val)}
+
+ unsigned long ret = 0;
+
+ switch (csr_num) {
+ switchcase_csr_read_32(CSR_CYCLE, ret)
+ default:
+ break;
+ }
+
+ return ret;
+#undef switchcase_csr_read_32
+#undef switchcase_csr_read_16
+#undef switchcase_csr_read_8
+#undef switchcase_csr_read_4
+#undef switchcase_csr_read_2
+#undef switchcase_csr_read
+}
+
+static u64 read_perf_counter(unsigned int counter)
+{
+ return csr_read_num(CSR_CYCLE + counter);
+}
+
+static u64 read_timestamp(void)
+{
+ return csr_read_num(CSR_TIME);
+}
+
#else
static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
static u64 read_timestamp(void) { return 0; }
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index e68ca6229756..886a13a77a16 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -284,7 +284,8 @@ static struct test_case tests__basic_mmap[] = {
"permissions"),
TEST_CASE_REASON("User space counter reading of instructions",
mmap_user_read_instr,
-#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \
+ (defined(__riscv) && __riscv_xlen == 64)
"permissions"
#else
"unsupported"
@@ -292,7 +293,8 @@ static struct test_case tests__basic_mmap[] = {
),
TEST_CASE_REASON("User space counter reading of cycles",
mmap_user_read_cycles,
-#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \
+ (defined(__riscv) && __riscv_xlen == 64)
"permissions"
#else
"unsupported"
diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile
index f4b3d5c9af5b..4a9ff515a3a0 100644
--- a/tools/testing/selftests/riscv/Makefile
+++ b/tools/testing/selftests/riscv/Makefile
@@ -5,7 +5,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),riscv))
-RISCV_SUBTARGETS ?= hwprobe vector
+RISCV_SUBTARGETS ?= hwprobe vector mm
else
RISCV_SUBTARGETS :=
endif
diff --git a/tools/testing/selftests/riscv/mm/.gitignore b/tools/testing/selftests/riscv/mm/.gitignore
new file mode 100644
index 000000000000..5c2c57cb950c
--- /dev/null
+++ b/tools/testing/selftests/riscv/mm/.gitignore
@@ -0,0 +1,2 @@
+mmap_bottomup
+mmap_default
diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile
new file mode 100644
index 000000000000..11e0f0568923
--- /dev/null
+++ b/tools/testing/selftests/riscv/mm/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 ARM Limited
+# Originally tools/testing/arm64/abi/Makefile
+
+# Additional include paths needed by kselftest.h and local headers
+CFLAGS += -D_GNU_SOURCE -std=gnu99 -I.
+
+TEST_GEN_FILES := testcases/mmap_default testcases/mmap_bottomup
+
+TEST_PROGS := testcases/run_mmap.sh
+
+include ../../lib.mk
+
+$(OUTPUT)/mm: testcases/mmap_default.c testcases/mmap_bottomup.c testcases/mmap_tests.h
+ $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c
new file mode 100644
index 000000000000..b29379f7e478
--- /dev/null
+++ b/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/mman.h>
+#include <testcases/mmap_test.h>
+
+#include "../../kselftest_harness.h"
+
+TEST(infinite_rlimit)
+{
+// Only works on 64 bit
+#if __riscv_xlen == 64
+ struct addresses mmap_addresses;
+
+ EXPECT_EQ(BOTTOM_UP, memory_layout());
+
+ do_mmaps(&mmap_addresses);
+
+ EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr);
+
+ EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint);
+ EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr);
+ EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr);
+ EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr);
+ EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr);
+ EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr);
+ EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr);
+#endif
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c b/tools/testing/selftests/riscv/mm/testcases/mmap_default.c
new file mode 100644
index 000000000000..d1accb91b726
--- /dev/null
+++ b/tools/testing/selftests/riscv/mm/testcases/mmap_default.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/mman.h>
+#include <testcases/mmap_test.h>
+
+#include "../../kselftest_harness.h"
+
+TEST(default_rlimit)
+{
+// Only works on 64 bit
+#if __riscv_xlen == 64
+ struct addresses mmap_addresses;
+
+ EXPECT_EQ(TOP_DOWN, memory_layout());
+
+ do_mmaps(&mmap_addresses);
+
+ EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr);
+
+ EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint);
+ EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr);
+ EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr);
+ EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr);
+ EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr);
+ EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr);
+ EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr);
+#endif
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h b/tools/testing/selftests/riscv/mm/testcases/mmap_test.h
new file mode 100644
index 000000000000..9b8434f62f57
--- /dev/null
+++ b/tools/testing/selftests/riscv/mm/testcases/mmap_test.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _TESTCASES_MMAP_TEST_H
+#define _TESTCASES_MMAP_TEST_H
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <stddef.h>
+
+#define TOP_DOWN 0
+#define BOTTOM_UP 1
+
+struct addresses {
+ int *no_hint;
+ int *on_37_addr;
+ int *on_38_addr;
+ int *on_46_addr;
+ int *on_47_addr;
+ int *on_55_addr;
+ int *on_56_addr;
+};
+
+static inline void do_mmaps(struct addresses *mmap_addresses)
+{
+ /*
+ * Place all of the hint addresses on the boundaries of mmap
+ * sv39, sv48, sv57
+ * User addresses end at 1<<38, 1<<47, 1<<56 respectively
+ */
+ void *on_37_bits = (void *)(1UL << 37);
+ void *on_38_bits = (void *)(1UL << 38);
+ void *on_46_bits = (void *)(1UL << 46);
+ void *on_47_bits = (void *)(1UL << 47);
+ void *on_55_bits = (void *)(1UL << 55);
+ void *on_56_bits = (void *)(1UL << 56);
+
+ int prot = PROT_READ | PROT_WRITE;
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+ mmap_addresses->no_hint =
+ mmap(NULL, 5 * sizeof(int), prot, flags, 0, 0);
+ mmap_addresses->on_37_addr =
+ mmap(on_37_bits, 5 * sizeof(int), prot, flags, 0, 0);
+ mmap_addresses->on_38_addr =
+ mmap(on_38_bits, 5 * sizeof(int), prot, flags, 0, 0);
+ mmap_addresses->on_46_addr =
+ mmap(on_46_bits, 5 * sizeof(int), prot, flags, 0, 0);
+ mmap_addresses->on_47_addr =
+ mmap(on_47_bits, 5 * sizeof(int), prot, flags, 0, 0);
+ mmap_addresses->on_55_addr =
+ mmap(on_55_bits, 5 * sizeof(int), prot, flags, 0, 0);
+ mmap_addresses->on_56_addr =
+ mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0);
+}
+
+static inline int memory_layout(void)
+{
+ int prot = PROT_READ | PROT_WRITE;
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+ void *value1 = mmap(NULL, sizeof(int), prot, flags, 0, 0);
+ void *value2 = mmap(NULL, sizeof(int), prot, flags, 0, 0);
+
+ return value2 > value1;
+}
+#endif /* _TESTCASES_MMAP_TEST_H */
diff --git a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh b/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh
new file mode 100755
index 000000000000..ca5ad7c48bad
--- /dev/null
+++ b/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+original_stack_limit=$(ulimit -s)
+
+./mmap_default
+
+# Force mmap_bottomup to be ran with bottomup memory due to
+# the unlimited stack
+ulimit -s unlimited
+./mmap_bottomup
+ulimit -s $original_stack_limit