summaryrefslogtreecommitdiff
path: root/arch/riscv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv')
-rw-r--r--arch/riscv/Kbuild1
-rw-r--r--arch/riscv/Kconfig23
-rw-r--r--arch/riscv/Makefile5
-rw-r--r--arch/riscv/boot/dts/renesas/r9a07g043f.dtsi4
-rw-r--r--arch/riscv/configs/defconfig2
-rw-r--r--arch/riscv/crypto/Kconfig93
-rw-r--r--arch/riscv/crypto/Makefile23
-rw-r--r--arch/riscv/crypto/aes-macros.S156
-rw-r--r--arch/riscv/crypto/aes-riscv64-glue.c550
-rw-r--r--arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S312
-rw-r--r--arch/riscv/crypto/aes-riscv64-zvkned-zvkb.S146
-rw-r--r--arch/riscv/crypto/aes-riscv64-zvkned.S180
-rw-r--r--arch/riscv/crypto/chacha-riscv64-glue.c101
-rw-r--r--arch/riscv/crypto/chacha-riscv64-zvkb.S294
-rw-r--r--arch/riscv/crypto/ghash-riscv64-glue.c168
-rw-r--r--arch/riscv/crypto/ghash-riscv64-zvkg.S72
-rw-r--r--arch/riscv/crypto/sha256-riscv64-glue.c137
-rw-r--r--arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S225
-rw-r--r--arch/riscv/crypto/sha512-riscv64-glue.c133
-rw-r--r--arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S203
-rw-r--r--arch/riscv/crypto/sm3-riscv64-glue.c112
-rw-r--r--arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S123
-rw-r--r--arch/riscv/crypto/sm4-riscv64-glue.c107
-rw-r--r--arch/riscv/crypto/sm4-riscv64-zvksed-zvkb.S117
-rw-r--r--arch/riscv/errata/andes/errata.c10
-rw-r--r--arch/riscv/include/asm/asm.h10
-rw-r--r--arch/riscv/include/asm/bitops.h138
-rw-r--r--arch/riscv/include/asm/errata_list.h13
-rw-r--r--arch/riscv/include/asm/hwcap.h1
-rw-r--r--arch/riscv/include/asm/membarrier.h50
-rw-r--r--arch/riscv/include/asm/pgalloc.h53
-rw-r--r--arch/riscv/include/asm/pgtable.h12
-rw-r--r--arch/riscv/include/asm/sync_core.h29
-rw-r--r--arch/riscv/include/asm/tlb.h18
-rw-r--r--arch/riscv/include/asm/vector.h11
-rw-r--r--arch/riscv/include/asm/vendorid_list.h2
-rw-r--r--arch/riscv/include/asm/vmalloc.h61
-rw-r--r--arch/riscv/kernel/alternative.c2
-rw-r--r--arch/riscv/kernel/cpufeature.c1
-rw-r--r--arch/riscv/kernel/entry.S3
-rw-r--r--arch/riscv/kernel/pi/Makefile3
-rw-r--r--arch/riscv/kernel/smpboot.c1
-rw-r--r--arch/riscv/kernel/traps.c17
-rw-r--r--arch/riscv/lib/uaccess_vector.S1
-rw-r--r--arch/riscv/mm/context.c2
-rw-r--r--arch/riscv/mm/init.c6
46 files changed, 3521 insertions, 210 deletions
diff --git a/arch/riscv/Kbuild b/arch/riscv/Kbuild
index d25ad1c19f88..2c585f7a0b6e 100644
--- a/arch/riscv/Kbuild
+++ b/arch/riscv/Kbuild
@@ -2,6 +2,7 @@
obj-y += kernel/ mm/ net/
obj-$(CONFIG_BUILTIN_DTB) += boot/dts/
+obj-$(CONFIG_CRYPTO) += crypto/
obj-y += errata/
obj-$(CONFIG_KVM) += kvm/
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 51481bf9364e..8ebafe337eac 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -27,14 +27,18 @@ config RISCV
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_MEMBARRIER_CALLBACKS
+ select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MMIOWB
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API
+ select ARCH_HAS_PREPARE_SYNC_CORE_CMD
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SET_DIRECT_MAP if MMU
select ARCH_HAS_SET_MEMORY if MMU
select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
+ select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UBSAN_SANITIZE_ALL
@@ -47,6 +51,9 @@ config RISCV
select ARCH_SUPPORTS_CFI_CLANG
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGETLBFS if MMU
+ # LLD >= 14: https://github.com/llvm/llvm-project/issues/50505
+ select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000
+ select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000
select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
@@ -106,6 +113,7 @@ config RISCV
select HAVE_ARCH_KGDB_QXFER_PKT
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+ select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
@@ -124,6 +132,7 @@ config RISCV
select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
select HAVE_EBPF_JIT if MMU
+ select HAVE_FAST_GUP if MMU
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_GCC_PLUGINS
@@ -154,6 +163,7 @@ config RISCV
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN
select LOCK_MM_AND_FIND_VMA
+ select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU
select MODULES_USE_ELF_RELA if MODULES
select MODULE_SECTIONS if MODULES
select OF
@@ -315,7 +325,6 @@ config AS_HAS_OPTION_ARCH
# https://reviews.llvm.org/D123515
def_bool y
depends on $(as-instr, .option arch$(comma) +m)
- depends on !$(as-instr, .option arch$(comma) -i)
source "arch/riscv/Kconfig.socs"
source "arch/riscv/Kconfig.errata"
@@ -578,6 +587,13 @@ config TOOLCHAIN_HAS_ZBB
depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
depends on AS_HAS_OPTION_ARCH
+# This symbol indicates that the toolchain supports all v1.0 vector crypto
+# extensions, including Zvk*, Zvbb, and Zvbc. LLVM added all of these at once.
+# binutils added all except Zvkb, then added Zvkb. So we just check for Zvkb.
+config TOOLCHAIN_HAS_VECTOR_CRYPTO
+ def_bool $(as-instr, .option arch$(comma) +v$(comma) +zvkb)
+ depends on AS_HAS_OPTION_ARCH
+
config RISCV_ISA_ZBB
bool "Zbb extension support for bit manipulation instructions"
depends on TOOLCHAIN_HAS_ZBB
@@ -1035,11 +1051,8 @@ menu "Power management options"
source "kernel/power/Kconfig"
-# Hibernation is only possible on systems where the SBI implementation has
-# marked its reserved memory as not accessible from, or does not run
-# from the same memory as, Linux
config ARCH_HIBERNATION_POSSIBLE
- def_bool NONPORTABLE
+ def_bool y
config ARCH_HIBERNATION_HEADER
def_bool HIBERNATION
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 0b7d109258e7..252d63942f34 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -50,6 +50,11 @@ ifndef CONFIG_AS_IS_LLVM
KBUILD_CFLAGS += -Wa,-mno-relax
KBUILD_AFLAGS += -Wa,-mno-relax
endif
+# LLVM has an issue with target-features and LTO: https://github.com/llvm/llvm-project/issues/59350
+# Ensure it is aware of linker relaxation with LTO, otherwise relocations may
+# be incorrect: https://github.com/llvm/llvm-project/issues/65090
+else ifeq ($(CONFIG_LTO_CLANG),y)
+ KBUILD_LDFLAGS += -mllvm -mattr=+c -mllvm -mattr=+relax
endif
ifeq ($(CONFIG_SHADOW_CALL_STACK),y)
diff --git a/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi b/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi
index a92cfcfc021b..d7a66043f13b 100644
--- a/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi
+++ b/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi
@@ -27,7 +27,7 @@
riscv,isa-base = "rv64i";
riscv,isa-extensions = "i", "m", "a", "f", "d", "c",
"zicntr", "zicsr", "zifencei",
- "zihpm";
+ "zihpm", "xandespmu";
mmu-type = "riscv,sv39";
i-cache-size = <0x8000>;
i-cache-line-size = <0x40>;
@@ -39,7 +39,7 @@
cpu0_intc: interrupt-controller {
#interrupt-cells = <1>;
- compatible = "riscv,cpu-intc";
+ compatible = "andestech,cpu-intc", "riscv,cpu-intc";
interrupt-controller;
};
};
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index eaf34e871e30..89a009a580fe 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -215,6 +215,7 @@ CONFIG_MMC=y
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_SDHCI_CADENCE=y
+CONFIG_MMC_SDHCI_OF_DWCMSHC=y
CONFIG_MMC_SPI=y
CONFIG_MMC_DW=y
CONFIG_MMC_DW_STARFIVE=y
@@ -224,6 +225,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_SUN6I=y
CONFIG_DMADEVICES=y
CONFIG_DMA_SUN6I=m
+CONFIG_DW_AXI_DMAC=y
CONFIG_RZ_DMAC=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BALLOON=y
diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig
new file mode 100644
index 000000000000..2ad44e1d464a
--- /dev/null
+++ b/arch/riscv/crypto/Kconfig
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (riscv)"
+
+config CRYPTO_AES_RISCV64
+ tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS"
+ depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+ select CRYPTO_ALGAPI
+ select CRYPTO_LIB_AES
+ select CRYPTO_SKCIPHER
+ help
+ Block cipher: AES cipher algorithms
+ Length-preserving ciphers: AES with ECB, CBC, CTR, XTS
+
+ Architecture: riscv64 using:
+ - Zvkned vector crypto extension
+ - Zvbb vector extension (XTS)
+ - Zvkb vector crypto extension (CTR)
+ - Zvkg vector crypto extension (XTS)
+
+config CRYPTO_CHACHA_RISCV64
+ tristate "Ciphers: ChaCha"
+ depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_CHACHA_GENERIC
+ help
+ Length-preserving ciphers: ChaCha20 stream cipher algorithm
+
+ Architecture: riscv64 using:
+ - Zvkb vector crypto extension
+
+config CRYPTO_GHASH_RISCV64
+ tristate "Hash functions: GHASH"
+ depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+ select CRYPTO_GCM
+ help
+ GCM GHASH function (NIST SP 800-38D)
+
+ Architecture: riscv64 using:
+ - Zvkg vector crypto extension
+
+config CRYPTO_SHA256_RISCV64
+ tristate "Hash functions: SHA-224 and SHA-256"
+ depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+ select CRYPTO_SHA256
+ help
+ SHA-224 and SHA-256 secure hash algorithm (FIPS 180)
+
+ Architecture: riscv64 using:
+ - Zvknha or Zvknhb vector crypto extensions
+ - Zvkb vector crypto extension
+
+config CRYPTO_SHA512_RISCV64
+ tristate "Hash functions: SHA-384 and SHA-512"
+ depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+ select CRYPTO_SHA512
+ help
+ SHA-384 and SHA-512 secure hash algorithm (FIPS 180)
+
+ Architecture: riscv64 using:
+ - Zvknhb vector crypto extension
+ - Zvkb vector crypto extension
+
+config CRYPTO_SM3_RISCV64
+ tristate "Hash functions: SM3 (ShangMi 3)"
+ depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+ select CRYPTO_HASH
+ select CRYPTO_SM3
+ help
+ SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012)
+
+ Architecture: riscv64 using:
+ - Zvksh vector crypto extension
+ - Zvkb vector crypto extension
+
+config CRYPTO_SM4_RISCV64
+ tristate "Ciphers: SM4 (ShangMi 4)"
+ depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+ select CRYPTO_ALGAPI
+ select CRYPTO_SM4
+ help
+ SM4 block cipher algorithm (OSCCA GB/T 32907-2016,
+ ISO/IEC 18033-3:2010/Amd 1:2021)
+
+ SM4 (GBT.32907-2016) is a cryptographic standard issued by the
+ Organization of State Commercial Administration of China (OSCCA)
+ as an authorized cryptographic algorithm for use within China.
+
+ Architecture: riscv64 using:
+ - Zvksed vector crypto extension
+ - Zvkb vector crypto extension
+
+endmenu
diff --git a/arch/riscv/crypto/Makefile b/arch/riscv/crypto/Makefile
new file mode 100644
index 000000000000..247c7bc7288c
--- /dev/null
+++ b/arch/riscv/crypto/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_AES_RISCV64) += aes-riscv64.o
+aes-riscv64-y := aes-riscv64-glue.o aes-riscv64-zvkned.o \
+ aes-riscv64-zvkned-zvbb-zvkg.o aes-riscv64-zvkned-zvkb.o
+
+obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
+chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
+
+obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
+ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
+
+obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o
+sha256-riscv64-y := sha256-riscv64-glue.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o
+
+obj-$(CONFIG_CRYPTO_SHA512_RISCV64) += sha512-riscv64.o
+sha512-riscv64-y := sha512-riscv64-glue.o sha512-riscv64-zvknhb-zvkb.o
+
+obj-$(CONFIG_CRYPTO_SM3_RISCV64) += sm3-riscv64.o
+sm3-riscv64-y := sm3-riscv64-glue.o sm3-riscv64-zvksh-zvkb.o
+
+obj-$(CONFIG_CRYPTO_SM4_RISCV64) += sm4-riscv64.o
+sm4-riscv64-y := sm4-riscv64-glue.o sm4-riscv64-zvksed-zvkb.o
diff --git a/arch/riscv/crypto/aes-macros.S b/arch/riscv/crypto/aes-macros.S
new file mode 100644
index 000000000000..d1a258d04bc7
--- /dev/null
+++ b/arch/riscv/crypto/aes-macros.S
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file contains macros that are shared by the other aes-*.S files. The
+// generated code of these macros depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector AES block cipher extension ('Zvkned')
+
+// Loads the AES round keys from \keyp into vector registers and jumps to code
+// specific to the length of the key. Specifically:
+// - If AES-128, loads round keys into v1-v11 and jumps to \label128.
+// - If AES-192, loads round keys into v1-v13 and jumps to \label192.
+// - If AES-256, loads round keys into v1-v15 and continues onwards.
+//
+// Also sets vl=4 and vtype=e32,m1,ta,ma. Clobbers t0 and t1.
+.macro aes_begin keyp, label128, label192
+ lwu t0, 480(\keyp) // t0 = key length in bytes
+ li t1, 24 // t1 = key length for AES-192
+ vsetivli zero, 4, e32, m1, ta, ma
+ vle32.v v1, (\keyp)
+ addi \keyp, \keyp, 16
+ vle32.v v2, (\keyp)
+ addi \keyp, \keyp, 16
+ vle32.v v3, (\keyp)
+ addi \keyp, \keyp, 16
+ vle32.v v4, (\keyp)
+ addi \keyp, \keyp, 16
+ vle32.v v5, (\keyp)
+ addi \keyp, \keyp, 16
+ vle32.v v6, (\keyp)
+ addi \keyp, \keyp, 16
+ vle32.v v7, (\keyp)
+ addi \keyp, \keyp, 16
+ vle32.v v8, (\keyp)
+ addi \keyp, \keyp, 16
+ vle32.v v9, (\keyp)
+ addi \keyp, \keyp, 16
+ vle32.v v10, (\keyp)
+ addi \keyp, \keyp, 16
+ vle32.v v11, (\keyp)
+ blt t0, t1, \label128 // If AES-128, goto label128.
+ addi \keyp, \keyp, 16
+ vle32.v v12, (\keyp)
+ addi \keyp, \keyp, 16
+ vle32.v v13, (\keyp)
+ beq t0, t1, \label192 // If AES-192, goto label192.
+ // Else, it's AES-256.
+ addi \keyp, \keyp, 16
+ vle32.v v14, (\keyp)
+ addi \keyp, \keyp, 16
+ vle32.v v15, (\keyp)
+.endm
+
+// Encrypts \data using zvkned instructions, using the round keys loaded into
+// v1-v11 (for AES-128), v1-v13 (for AES-192), or v1-v15 (for AES-256). \keylen
+// is the AES key length in bits. vl and vtype must already be set
+// appropriately. Note that if vl > 4, multiple blocks are encrypted.
+.macro aes_encrypt data, keylen
+ vaesz.vs \data, v1
+ vaesem.vs \data, v2
+ vaesem.vs \data, v3
+ vaesem.vs \data, v4
+ vaesem.vs \data, v5
+ vaesem.vs \data, v6
+ vaesem.vs \data, v7
+ vaesem.vs \data, v8
+ vaesem.vs \data, v9
+ vaesem.vs \data, v10
+.if \keylen == 128
+ vaesef.vs \data, v11
+.elseif \keylen == 192
+ vaesem.vs \data, v11
+ vaesem.vs \data, v12
+ vaesef.vs \data, v13
+.else
+ vaesem.vs \data, v11
+ vaesem.vs \data, v12
+ vaesem.vs \data, v13
+ vaesem.vs \data, v14
+ vaesef.vs \data, v15
+.endif
+.endm
+
+// Same as aes_encrypt, but decrypts instead of encrypts.
+.macro aes_decrypt data, keylen
+.if \keylen == 128
+ vaesz.vs \data, v11
+.elseif \keylen == 192
+ vaesz.vs \data, v13
+ vaesdm.vs \data, v12
+ vaesdm.vs \data, v11
+.else
+ vaesz.vs \data, v15
+ vaesdm.vs \data, v14
+ vaesdm.vs \data, v13
+ vaesdm.vs \data, v12
+ vaesdm.vs \data, v11
+.endif
+ vaesdm.vs \data, v10
+ vaesdm.vs \data, v9
+ vaesdm.vs \data, v8
+ vaesdm.vs \data, v7
+ vaesdm.vs \data, v6
+ vaesdm.vs \data, v5
+ vaesdm.vs \data, v4
+ vaesdm.vs \data, v3
+ vaesdm.vs \data, v2
+ vaesdf.vs \data, v1
+.endm
+
+// Expands to aes_encrypt or aes_decrypt according to \enc, which is 1 or 0.
+.macro aes_crypt data, enc, keylen
+.if \enc
+ aes_encrypt \data, \keylen
+.else
+ aes_decrypt \data, \keylen
+.endif
+.endm
diff --git a/arch/riscv/crypto/aes-riscv64-glue.c b/arch/riscv/crypto/aes-riscv64-glue.c
new file mode 100644
index 000000000000..37bc6ef0be40
--- /dev/null
+++ b/arch/riscv/crypto/aes-riscv64-glue.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AES using the RISC-V vector crypto extensions. Includes the bare block
+ * cipher and the ECB, CBC, CTR, and XTS modes.
+ *
+ * Copyright (C) 2023 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/aes.h>
+#include <crypto/internal/cipher.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/xts.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+asmlinkage void aes_encrypt_zvkned(const struct crypto_aes_ctx *key,
+ const u8 in[AES_BLOCK_SIZE],
+ u8 out[AES_BLOCK_SIZE]);
+asmlinkage void aes_decrypt_zvkned(const struct crypto_aes_ctx *key,
+ const u8 in[AES_BLOCK_SIZE],
+ u8 out[AES_BLOCK_SIZE]);
+
+asmlinkage void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key,
+ const u8 *in, u8 *out, size_t len);
+asmlinkage void aes_ecb_decrypt_zvkned(const struct crypto_aes_ctx *key,
+ const u8 *in, u8 *out, size_t len);
+
+asmlinkage void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key,
+ const u8 *in, u8 *out, size_t len,
+ u8 iv[AES_BLOCK_SIZE]);
+asmlinkage void aes_cbc_decrypt_zvkned(const struct crypto_aes_ctx *key,
+ const u8 *in, u8 *out, size_t len,
+ u8 iv[AES_BLOCK_SIZE]);
+
+asmlinkage void aes_ctr32_crypt_zvkned_zvkb(const struct crypto_aes_ctx *key,
+ const u8 *in, u8 *out, size_t len,
+ u8 iv[AES_BLOCK_SIZE]);
+
+asmlinkage void aes_xts_encrypt_zvkned_zvbb_zvkg(
+ const struct crypto_aes_ctx *key,
+ const u8 *in, u8 *out, size_t len,
+ u8 tweak[AES_BLOCK_SIZE]);
+
+asmlinkage void aes_xts_decrypt_zvkned_zvbb_zvkg(
+ const struct crypto_aes_ctx *key,
+ const u8 *in, u8 *out, size_t len,
+ u8 tweak[AES_BLOCK_SIZE]);
+
+static int riscv64_aes_setkey(struct crypto_aes_ctx *ctx,
+ const u8 *key, unsigned int keylen)
+{
+ /*
+ * For now we just use the generic key expansion, for these reasons:
+ *
+ * - zvkned's key expansion instructions don't support AES-192.
+ * So, non-zvkned fallback code would be needed anyway.
+ *
+ * - Users of AES in Linux usually don't change keys frequently.
+ * So, key expansion isn't performance-critical.
+ *
+ * - For single-block AES exposed as a "cipher" algorithm, it's
+ * necessary to use struct crypto_aes_ctx and initialize its 'key_dec'
+ * field with the round keys for the Equivalent Inverse Cipher. This
+ * is because with "cipher", decryption can be requested from a
+ * context where the vector unit isn't usable, necessitating a
+ * fallback to aes_decrypt(). But, zvkned can only generate and use
+ * the normal round keys. Of course, it's preferable to not have
+ * special code just for "cipher", as e.g. XTS also uses a
+ * single-block AES encryption. It's simplest to just use
+ * struct crypto_aes_ctx and aes_expandkey() everywhere.
+ */
+ return aes_expandkey(ctx, key, keylen);
+}
+
+static int riscv64_aes_setkey_cipher(struct crypto_tfm *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ return riscv64_aes_setkey(ctx, key, keylen);
+}
+
+static int riscv64_aes_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return riscv64_aes_setkey(ctx, key, keylen);
+}
+
+/* Bare AES, without a mode of operation */
+
+static void riscv64_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (crypto_simd_usable()) {
+ kernel_vector_begin();
+ aes_encrypt_zvkned(ctx, src, dst);
+ kernel_vector_end();
+ } else {
+ aes_encrypt(ctx, dst, src);
+ }
+}
+
+static void riscv64_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (crypto_simd_usable()) {
+ kernel_vector_begin();
+ aes_decrypt_zvkned(ctx, src, dst);
+ kernel_vector_end();
+ } else {
+ aes_decrypt(ctx, dst, src);
+ }
+}
+
+/* AES-ECB */
+
+static inline int riscv64_aes_ecb_crypt(struct skcipher_request *req, bool enc)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
+ kernel_vector_begin();
+ if (enc)
+ aes_ecb_encrypt_zvkned(ctx, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ nbytes & ~(AES_BLOCK_SIZE - 1));
+ else
+ aes_ecb_decrypt_zvkned(ctx, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ nbytes & ~(AES_BLOCK_SIZE - 1));
+ kernel_vector_end();
+ err = skcipher_walk_done(&walk, nbytes & (AES_BLOCK_SIZE - 1));
+ }
+
+ return err;
+}
+
+static int riscv64_aes_ecb_encrypt(struct skcipher_request *req)
+{
+ return riscv64_aes_ecb_crypt(req, true);
+}
+
+static int riscv64_aes_ecb_decrypt(struct skcipher_request *req)
+{
+ return riscv64_aes_ecb_crypt(req, false);
+}
+
+/* AES-CBC */
+
+static inline int riscv64_aes_cbc_crypt(struct skcipher_request *req, bool enc)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
+ kernel_vector_begin();
+ if (enc)
+ aes_cbc_encrypt_zvkned(ctx, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ nbytes & ~(AES_BLOCK_SIZE - 1),
+ walk.iv);
+ else
+ aes_cbc_decrypt_zvkned(ctx, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ nbytes & ~(AES_BLOCK_SIZE - 1),
+ walk.iv);
+ kernel_vector_end();
+ err = skcipher_walk_done(&walk, nbytes & (AES_BLOCK_SIZE - 1));
+ }
+
+ return err;
+}
+
+static int riscv64_aes_cbc_encrypt(struct skcipher_request *req)
+{
+ return riscv64_aes_cbc_crypt(req, true);
+}
+
+static int riscv64_aes_cbc_decrypt(struct skcipher_request *req)
+{
+ return riscv64_aes_cbc_crypt(req, false);
+}
+
+/* AES-CTR */
+
+static int riscv64_aes_ctr_crypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ unsigned int nbytes, p1_nbytes;
+ struct skcipher_walk walk;
+ u32 ctr32, nblocks;
+ int err;
+
+ /* Get the low 32-bit word of the 128-bit big endian counter. */
+ ctr32 = get_unaligned_be32(req->iv + 12);
+
+ err = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
+ if (nbytes < walk.total) {
+ /* Not the end yet, so keep the length block-aligned. */
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+ nblocks = nbytes / AES_BLOCK_SIZE;
+ } else {
+ /* It's the end, so include any final partial block. */
+ nblocks = DIV_ROUND_UP(nbytes, AES_BLOCK_SIZE);
+ }
+ ctr32 += nblocks;
+
+ kernel_vector_begin();
+ if (ctr32 >= nblocks) {
+ /* The low 32-bit word of the counter won't overflow. */
+ aes_ctr32_crypt_zvkned_zvkb(ctx, walk.src.virt.addr,
+ walk.dst.virt.addr, nbytes,
+ req->iv);
+ } else {
+ /*
+ * The low 32-bit word of the counter will overflow.
+ * The assembly doesn't handle this case, so split the
+ * operation into two at the point where the overflow
+ * will occur. After the first part, add the carry bit.
+ */
+ p1_nbytes = min_t(unsigned int, nbytes,
+ (nblocks - ctr32) * AES_BLOCK_SIZE);
+ aes_ctr32_crypt_zvkned_zvkb(ctx, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ p1_nbytes, req->iv);
+ crypto_inc(req->iv, 12);
+
+ if (ctr32) {
+ aes_ctr32_crypt_zvkned_zvkb(
+ ctx,
+ walk.src.virt.addr + p1_nbytes,
+ walk.dst.virt.addr + p1_nbytes,
+ nbytes - p1_nbytes, req->iv);
+ }
+ }
+ kernel_vector_end();
+
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+/* AES-XTS */
+
+struct riscv64_aes_xts_ctx {
+ struct crypto_aes_ctx ctx1;
+ struct crypto_aes_ctx ctx2;
+};
+
+static int riscv64_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct riscv64_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return xts_verify_key(tfm, key, keylen) ?:
+ riscv64_aes_setkey(&ctx->ctx1, key, keylen / 2) ?:
+ riscv64_aes_setkey(&ctx->ctx2, key + keylen / 2, keylen / 2);
+}
+
+static int riscv64_aes_xts_crypt(struct skcipher_request *req, bool enc)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct riscv64_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct scatterlist *src, *dst;
+ struct skcipher_walk walk;
+ int err;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ /* Encrypt the IV with the tweak key to get the first tweak. */
+ kernel_vector_begin();
+ aes_encrypt_zvkned(&ctx->ctx2, req->iv, req->iv);
+ kernel_vector_end();
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ /*
+ * If the message length isn't divisible by the AES block size and the
+ * full message isn't available in one step of the scatterlist walk,
+ * then separate off the last full block and the partial block. This
+ * ensures that they are processed in the same call to the assembly
+ * function, which is required for ciphertext stealing.
+ */
+ if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+ skcipher_walk_abort(&walk);
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq,
+ skcipher_request_flags(req),
+ NULL, NULL);
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ req->cryptlen - tail - AES_BLOCK_SIZE,
+ req->iv);
+ req = &subreq;
+ err = skcipher_walk_virt(&walk, req, false);
+ } else {
+ tail = 0;
+ }
+
+ while (walk.nbytes) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+
+ kernel_vector_begin();
+ if (enc)
+ aes_xts_encrypt_zvkned_zvbb_zvkg(
+ &ctx->ctx1, walk.src.virt.addr,
+ walk.dst.virt.addr, nbytes, req->iv);
+ else
+ aes_xts_decrypt_zvkned_zvbb_zvkg(
+ &ctx->ctx1, walk.src.virt.addr,
+ walk.dst.virt.addr, nbytes, req->iv);
+ kernel_vector_end();
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ if (err || likely(!tail))
+ return err;
+
+ /* Do ciphertext stealing with the last full block and partial block. */
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+ skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+
+ kernel_vector_begin();
+ if (enc)
+ aes_xts_encrypt_zvkned_zvbb_zvkg(
+ &ctx->ctx1, walk.src.virt.addr,
+ walk.dst.virt.addr, walk.nbytes, req->iv);
+ else
+ aes_xts_decrypt_zvkned_zvbb_zvkg(
+ &ctx->ctx1, walk.src.virt.addr,
+ walk.dst.virt.addr, walk.nbytes, req->iv);
+ kernel_vector_end();
+
+ return skcipher_walk_done(&walk, 0);
+}
+
+static int riscv64_aes_xts_encrypt(struct skcipher_request *req)
+{
+ return riscv64_aes_xts_crypt(req, true);
+}
+
+static int riscv64_aes_xts_decrypt(struct skcipher_request *req)
+{
+ return riscv64_aes_xts_crypt(req, false);
+}
+
+/* Algorithm definitions */
+
+static struct crypto_alg riscv64_zvkned_aes_cipher_alg = {
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_priority = 300,
+ .cra_name = "aes",
+ .cra_driver_name = "aes-riscv64-zvkned",
+ .cra_cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = riscv64_aes_setkey_cipher,
+ .cia_encrypt = riscv64_aes_encrypt,
+ .cia_decrypt = riscv64_aes_decrypt,
+ },
+ .cra_module = THIS_MODULE,
+};
+
+static struct skcipher_alg riscv64_zvkned_aes_skcipher_algs[] = {
+ {
+ .setkey = riscv64_aes_setkey_skcipher,
+ .encrypt = riscv64_aes_ecb_encrypt,
+ .decrypt = riscv64_aes_ecb_decrypt,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .walksize = 8 * AES_BLOCK_SIZE, /* matches LMUL=8 */
+ .base = {
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_priority = 300,
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-riscv64-zvkned",
+ .cra_module = THIS_MODULE,
+ },
+ }, {
+ .setkey = riscv64_aes_setkey_skcipher,
+ .encrypt = riscv64_aes_cbc_encrypt,
+ .decrypt = riscv64_aes_cbc_decrypt,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .base = {
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_priority = 300,
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-riscv64-zvkned",
+ .cra_module = THIS_MODULE,
+ },
+ }
+};
+
+static struct skcipher_alg riscv64_zvkned_zvkb_aes_skcipher_alg = {
+ .setkey = riscv64_aes_setkey_skcipher,
+ .encrypt = riscv64_aes_ctr_crypt,
+ .decrypt = riscv64_aes_ctr_crypt,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .walksize = 4 * AES_BLOCK_SIZE, /* matches LMUL=4 */
+ .base = {
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_priority = 300,
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-riscv64-zvkned-zvkb",
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static struct skcipher_alg riscv64_zvkned_zvbb_zvkg_aes_skcipher_alg = {
+ .setkey = riscv64_aes_xts_setkey,
+ .encrypt = riscv64_aes_xts_encrypt,
+ .decrypt = riscv64_aes_xts_decrypt,
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .walksize = 4 * AES_BLOCK_SIZE, /* matches LMUL=4 */
+ .base = {
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct riscv64_aes_xts_ctx),
+ .cra_priority = 300,
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-riscv64-zvkned-zvbb-zvkg",
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static inline bool riscv64_aes_xts_supported(void)
+{
+ return riscv_isa_extension_available(NULL, ZVBB) &&
+ riscv_isa_extension_available(NULL, ZVKG) &&
+ riscv_vector_vlen() < 2048 /* Implementation limitation */;
+}
+
+static int __init riscv64_aes_mod_init(void)
+{
+ int err = -ENODEV;
+
+ if (riscv_isa_extension_available(NULL, ZVKNED) &&
+ riscv_vector_vlen() >= 128) {
+ err = crypto_register_alg(&riscv64_zvkned_aes_cipher_alg);
+ if (err)
+ return err;
+
+ err = crypto_register_skciphers(
+ riscv64_zvkned_aes_skcipher_algs,
+ ARRAY_SIZE(riscv64_zvkned_aes_skcipher_algs));
+ if (err)
+ goto unregister_zvkned_cipher_alg;
+
+ if (riscv_isa_extension_available(NULL, ZVKB)) {
+ err = crypto_register_skcipher(
+ &riscv64_zvkned_zvkb_aes_skcipher_alg);
+ if (err)
+ goto unregister_zvkned_skcipher_algs;
+ }
+
+ if (riscv64_aes_xts_supported()) {
+ err = crypto_register_skcipher(
+ &riscv64_zvkned_zvbb_zvkg_aes_skcipher_alg);
+ if (err)
+ goto unregister_zvkned_zvkb_skcipher_alg;
+ }
+ }
+
+ return err;
+
+unregister_zvkned_zvkb_skcipher_alg:
+ if (riscv_isa_extension_available(NULL, ZVKB))
+ crypto_unregister_skcipher(&riscv64_zvkned_zvkb_aes_skcipher_alg);
+unregister_zvkned_skcipher_algs:
+ crypto_unregister_skciphers(riscv64_zvkned_aes_skcipher_algs,
+ ARRAY_SIZE(riscv64_zvkned_aes_skcipher_algs));
+unregister_zvkned_cipher_alg:
+ crypto_unregister_alg(&riscv64_zvkned_aes_cipher_alg);
+ return err;
+}
+
+static void __exit riscv64_aes_mod_exit(void)
+{
+ if (riscv64_aes_xts_supported())
+ crypto_unregister_skcipher(&riscv64_zvkned_zvbb_zvkg_aes_skcipher_alg);
+ if (riscv_isa_extension_available(NULL, ZVKB))
+ crypto_unregister_skcipher(&riscv64_zvkned_zvkb_aes_skcipher_alg);
+ crypto_unregister_skciphers(riscv64_zvkned_aes_skcipher_algs,
+ ARRAY_SIZE(riscv64_zvkned_aes_skcipher_algs));
+ crypto_unregister_alg(&riscv64_zvkned_aes_cipher_alg);
+}
+
+module_init(riscv64_aes_mod_init);
+module_exit(riscv64_aes_mod_exit);
+
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS (RISC-V accelerated)");
+MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("aes");
+MODULE_ALIAS_CRYPTO("ecb(aes)");
+MODULE_ALIAS_CRYPTO("cbc(aes)");
+MODULE_ALIAS_CRYPTO("ctr(aes)");
+MODULE_ALIAS_CRYPTO("xts(aes)");
diff --git a/arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S b/arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S
new file mode 100644
index 000000000000..146fc9cfb268
--- /dev/null
+++ b/arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128 && VLEN < 2048
+// - RISC-V Vector AES block cipher extension ('Zvkned')
+// - RISC-V Vector Bit-manipulation extension ('Zvbb')
+// - RISC-V Vector GCM/GMAC extension ('Zvkg')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkned, +zvbb, +zvkg
+
+#include "aes-macros.S"
+
+#define KEYP a0
+#define INP a1
+#define OUTP a2
+#define LEN a3
+#define TWEAKP a4
+
+#define LEN32 a5
+#define TAIL_LEN a6
+#define VL a7
+#define VLMAX t4
+
+// v1-v15 contain the AES round keys, but they are used for temporaries before
+// the AES round keys have been loaded.
+#define TWEAKS v16 // LMUL=4 (most of the time)
+#define TWEAKS_BREV v20 // LMUL=4 (most of the time)
+#define MULTS_BREV v24 // LMUL=4 (most of the time)
+#define TMP0 v28
+#define TMP1 v29
+#define TMP2 v30
+#define TMP3 v31
+
+// xts_init initializes the following values:
+//
+// TWEAKS: N 128-bit tweaks T*(x^i) for i in 0..(N - 1)
+// TWEAKS_BREV: same as TWEAKS, but bit-reversed
+// MULTS_BREV: N 128-bit values x^N, bit-reversed. Only if N > 1.
+//
+// N is the maximum number of blocks that will be processed per loop iteration,
+// computed using vsetvli.
+//
+// The field convention used by XTS is the same as that of GHASH, but with the
+// bits reversed within each byte. The zvkg extension provides the vgmul
+// instruction which does multiplication in this field. Therefore, for tweak
+// computation we use vgmul to do multiplications in parallel, instead of
+// serially multiplying by x using shifting+xoring. Note that for this to work,
+// the inputs and outputs to vgmul must be bit-reversed (we do it with vbrev8).
+.macro xts_init
+
+ // Load the first tweak T.
+ vsetivli zero, 4, e32, m1, ta, ma
+ vle32.v TWEAKS, (TWEAKP)
+
+ // If there's only one block (or no blocks at all), then skip the tweak
+ // sequence computation because (at most) T itself is needed.
+ li t0, 16
+ ble LEN, t0, .Linit_single_block\@
+
+ // Save a copy of T bit-reversed in v12.
+ vbrev8.v v12, TWEAKS
+
+ //
+ // Generate x^i for i in 0..(N - 1), i.e. 128-bit values 1 << i assuming
+ // that N <= 128. Though, this code actually requires N < 64 (or
+ // equivalently VLEN < 2048) due to the use of 64-bit intermediate
+ // values here and in the x^N computation later.
+ //
+ vsetvli VL, LEN32, e32, m4, ta, ma
+ srli t0, VL, 2 // t0 = N (num blocks)
+ // Generate two sequences, each with N 32-bit values:
+ // v0=[1, 1, 1, ...] and v1=[0, 1, 2, ...].
+ vsetvli zero, t0, e32, m1, ta, ma
+ vmv.v.i v0, 1
+ vid.v v1
+ // Use vzext to zero-extend the sequences to 64 bits. Reinterpret them
+ // as two sequences, each with 2*N 32-bit values:
+ // v2=[1, 0, 1, 0, 1, 0, ...] and v4=[0, 0, 1, 0, 2, 0, ...].
+ vsetvli zero, t0, e64, m2, ta, ma
+ vzext.vf2 v2, v0
+ vzext.vf2 v4, v1
+ slli t1, t0, 1 // t1 = 2*N
+ vsetvli zero, t1, e32, m2, ta, ma
+ // Use vwsll to compute [1<<0, 0<<0, 1<<1, 0<<0, 1<<2, 0<<0, ...],
+ // widening to 64 bits per element. When reinterpreted as N 128-bit
+ // values, this is the needed sequence of 128-bit values 1 << i (x^i).
+ vwsll.vv v8, v2, v4
+
+ // Copy the bit-reversed T to all N elements of TWEAKS_BREV, then
+ // multiply by x^i. This gives the sequence T*(x^i), bit-reversed.
+ vsetvli zero, LEN32, e32, m4, ta, ma
+ vmv.v.i TWEAKS_BREV, 0
+ vaesz.vs TWEAKS_BREV, v12
+ vbrev8.v v8, v8
+ vgmul.vv TWEAKS_BREV, v8
+
+ // Save a copy of the sequence T*(x^i) with the bit reversal undone.
+ vbrev8.v TWEAKS, TWEAKS_BREV
+
+ // Generate N copies of x^N, i.e. 128-bit values 1 << N, bit-reversed.
+ li t1, 1
+ sll t1, t1, t0 // t1 = 1 << N
+ vsetivli zero, 2, e64, m1, ta, ma
+ vmv.v.i v0, 0
+ vsetivli zero, 1, e64, m1, tu, ma
+ vmv.v.x v0, t1
+ vbrev8.v v0, v0
+ vsetvli zero, LEN32, e32, m4, ta, ma
+ vmv.v.i MULTS_BREV, 0
+ vaesz.vs MULTS_BREV, v0
+
+ j .Linit_done\@
+
+.Linit_single_block\@:
+ vbrev8.v TWEAKS_BREV, TWEAKS
+.Linit_done\@:
+.endm
+
+// Set the first 128 bits of MULTS_BREV to 0x40, i.e. 'x' bit-reversed. This is
+// the multiplier required to advance the tweak by one.
+.macro load_x
+ li t0, 0x40
+ vsetivli zero, 4, e32, m1, ta, ma
+ vmv.v.i MULTS_BREV, 0
+ vsetivli zero, 1, e8, m1, tu, ma
+ vmv.v.x MULTS_BREV, t0
+.endm
+
+.macro __aes_xts_crypt enc, keylen
+ // With 16 < len <= 31, there's no main loop, just ciphertext stealing.
+ beqz LEN32, .Lcts_without_main_loop\@
+
+ vsetvli VLMAX, zero, e32, m4, ta, ma
+1:
+ vsetvli VL, LEN32, e32, m4, ta, ma
+2:
+ // Encrypt or decrypt VL/4 blocks.
+ vle32.v TMP0, (INP)
+ vxor.vv TMP0, TMP0, TWEAKS
+ aes_crypt TMP0, \enc, \keylen
+ vxor.vv TMP0, TMP0, TWEAKS
+ vse32.v TMP0, (OUTP)
+
+ // Update the pointers and the remaining length.
+ slli t0, VL, 2
+ add INP, INP, t0
+ add OUTP, OUTP, t0
+ sub LEN32, LEN32, VL
+
+ // Check whether more blocks remain.
+ beqz LEN32, .Lmain_loop_done\@
+
+ // Compute the next sequence of tweaks by multiplying the previous
+ // sequence by x^N. Store the result in both bit-reversed order and
+ // regular order (i.e. with the bit reversal undone).
+ vgmul.vv TWEAKS_BREV, MULTS_BREV
+ vbrev8.v TWEAKS, TWEAKS_BREV
+
+ // Since we compute the tweak multipliers x^N in advance, we require
+ // that each iteration process the same length except possibly the last.
+ // This conflicts slightly with the behavior allowed by RISC-V Vector
+ // Extension, where CPUs can select a lower length for both of the last
+ // two iterations. E.g., vl might take the sequence of values
+ // [16, 16, 16, 12, 12], whereas we need [16, 16, 16, 16, 8] so that we
+ // can use x^4 again instead of computing x^3. Therefore, we explicitly
+ // keep the vl at VLMAX if there is at least VLMAX remaining.
+ bge LEN32, VLMAX, 2b
+ j 1b
+
+.Lmain_loop_done\@:
+ load_x
+
+ // Compute the next tweak.
+ addi t0, VL, -4
+ vsetivli zero, 4, e32, m4, ta, ma
+ vslidedown.vx TWEAKS_BREV, TWEAKS_BREV, t0 // Extract last tweak
+ vsetivli zero, 4, e32, m1, ta, ma
+ vgmul.vv TWEAKS_BREV, MULTS_BREV // Advance to next tweak
+
+ bnez TAIL_LEN, .Lcts\@
+
+ // Update *TWEAKP to contain the next tweak.
+ vbrev8.v TWEAKS, TWEAKS_BREV
+ vse32.v TWEAKS, (TWEAKP)
+ ret
+
+.Lcts_without_main_loop\@:
+ load_x
+.Lcts\@:
+ // TWEAKS_BREV now contains the next tweak. Compute the one after that.
+ vsetivli zero, 4, e32, m1, ta, ma
+ vmv.v.v TMP0, TWEAKS_BREV
+ vgmul.vv TMP0, MULTS_BREV
+ // Undo the bit reversal of the next two tweaks and store them in TMP1
+ // and TMP2, such that TMP1 is the first needed and TMP2 the second.
+.if \enc
+ vbrev8.v TMP1, TWEAKS_BREV
+ vbrev8.v TMP2, TMP0
+.else
+ vbrev8.v TMP1, TMP0
+ vbrev8.v TMP2, TWEAKS_BREV
+.endif
+
+ // Encrypt/decrypt the last full block.
+ vle32.v TMP0, (INP)
+ vxor.vv TMP0, TMP0, TMP1
+ aes_crypt TMP0, \enc, \keylen
+ vxor.vv TMP0, TMP0, TMP1
+
+ // Swap the first TAIL_LEN bytes of the above result with the tail.
+ // Note that to support in-place encryption/decryption, the load from
+ // the input tail must happen before the store to the output tail.
+ addi t0, INP, 16
+ addi t1, OUTP, 16
+ vmv.v.v TMP3, TMP0
+ vsetvli zero, TAIL_LEN, e8, m1, tu, ma
+ vle8.v TMP0, (t0)
+ vse8.v TMP3, (t1)
+
+ // Encrypt/decrypt again and store the last full block.
+ vsetivli zero, 4, e32, m1, ta, ma
+ vxor.vv TMP0, TMP0, TMP2
+ aes_crypt TMP0, \enc, \keylen
+ vxor.vv TMP0, TMP0, TMP2
+ vse32.v TMP0, (OUTP)
+
+ ret
+.endm
+
+.macro aes_xts_crypt enc
+
+ // Check whether the length is a multiple of the AES block size.
+ andi TAIL_LEN, LEN, 15
+ beqz TAIL_LEN, 1f
+
+ // The length isn't a multiple of the AES block size, so ciphertext
+ // stealing will be required. Ciphertext stealing involves special
+ // handling of the partial block and the last full block, so subtract
+ // the length of both from the length to be processed in the main loop.
+ sub LEN, LEN, TAIL_LEN
+ addi LEN, LEN, -16
+1:
+ srli LEN32, LEN, 2
+ // LEN and LEN32 now contain the total length of the blocks that will be
+ // processed in the main loop, in bytes and 32-bit words respectively.
+
+ xts_init
+ aes_begin KEYP, 128f, 192f
+ __aes_xts_crypt \enc, 256
+128:
+ __aes_xts_crypt \enc, 128
+192:
+ __aes_xts_crypt \enc, 192
+.endm
+
+// void aes_xts_encrypt_zvkned_zvbb_zvkg(const struct crypto_aes_ctx *key,
+// const u8 *in, u8 *out, size_t len,
+// u8 tweak[16]);
+//
+// |key| is the data key. |tweak| contains the next tweak; the encryption of
+// the original IV with the tweak key was already done. This function supports
+// incremental computation, but |len| must always be >= 16 (AES_BLOCK_SIZE), and
+// |len| must be a multiple of 16 except on the last call. If |len| is a
+// multiple of 16, then this function updates |tweak| to contain the next tweak.
+SYM_FUNC_START(aes_xts_encrypt_zvkned_zvbb_zvkg)
+ aes_xts_crypt 1
+SYM_FUNC_END(aes_xts_encrypt_zvkned_zvbb_zvkg)
+
+// Same prototype and calling convention as the encryption function
+SYM_FUNC_START(aes_xts_decrypt_zvkned_zvbb_zvkg)
+ aes_xts_crypt 0
+SYM_FUNC_END(aes_xts_decrypt_zvkned_zvbb_zvkg)
diff --git a/arch/riscv/crypto/aes-riscv64-zvkned-zvkb.S b/arch/riscv/crypto/aes-riscv64-zvkned-zvkb.S
new file mode 100644
index 000000000000..9962d4500587
--- /dev/null
+++ b/arch/riscv/crypto/aes-riscv64-zvkned-zvkb.S
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector AES block cipher extension ('Zvkned')
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkned, +zvkb
+
+#include "aes-macros.S"
+
+#define KEYP a0
+#define INP a1
+#define OUTP a2
+#define LEN a3
+#define IVP a4
+
+#define LEN32 a5
+#define VL_E32 a6
+#define VL_BLOCKS a7
+
+.macro aes_ctr32_crypt keylen
+ // LEN32 = number of blocks, rounded up, in 32-bit words.
+ addi t0, LEN, 15
+ srli t0, t0, 4
+ slli LEN32, t0, 2
+
+ // Create a mask that selects the last 32-bit word of each 128-bit
+ // block. This is the word that contains the (big-endian) counter.
+ li t0, 0x88
+ vsetvli t1, zero, e8, m1, ta, ma
+ vmv.v.x v0, t0
+
+ // Load the IV into v31. The last 32-bit word contains the counter.
+ vsetivli zero, 4, e32, m1, ta, ma
+ vle32.v v31, (IVP)
+
+ // Convert the big-endian counter into little-endian.
+ vsetivli zero, 4, e32, m1, ta, mu
+ vrev8.v v31, v31, v0.t
+
+ // Splat the IV to v16 (with LMUL=4). The number of copies is the
+ // maximum number of blocks that will be processed per iteration.
+ vsetvli zero, LEN32, e32, m4, ta, ma
+ vmv.v.i v16, 0
+ vaesz.vs v16, v31
+
+ // v20 = [x, x, x, 0, x, x, x, 1, ...]
+ viota.m v20, v0, v0.t
+ // v16 = [IV0, IV1, IV2, counter+0, IV0, IV1, IV2, counter+1, ...]
+ vsetvli VL_E32, LEN32, e32, m4, ta, mu
+ vadd.vv v16, v16, v20, v0.t
+
+ j 2f
+1:
+ // Set the number of blocks to process in this iteration. vl=VL_E32 is
+ // the length in 32-bit words, i.e. 4 times the number of blocks.
+ vsetvli VL_E32, LEN32, e32, m4, ta, mu
+
+ // Increment the counters by the number of blocks processed in the
+ // previous iteration.
+ vadd.vx v16, v16, VL_BLOCKS, v0.t
+2:
+ // Prepare the AES inputs into v24.
+ vmv.v.v v24, v16
+ vrev8.v v24, v24, v0.t // Convert counters back to big-endian.
+
+ // Encrypt the AES inputs to create the next portion of the keystream.
+ aes_encrypt v24, \keylen
+
+ // XOR the data with the keystream.
+ vsetvli t0, LEN, e8, m4, ta, ma
+ vle8.v v20, (INP)
+ vxor.vv v20, v20, v24
+ vse8.v v20, (OUTP)
+
+ // Advance the pointers and update the remaining length.
+ add INP, INP, t0
+ add OUTP, OUTP, t0
+ sub LEN, LEN, t0
+ sub LEN32, LEN32, VL_E32
+ srli VL_BLOCKS, VL_E32, 2
+
+ // Repeat if more data remains.
+ bnez LEN, 1b
+
+ // Update *IVP to contain the next counter.
+ vsetivli zero, 4, e32, m1, ta, mu
+ vadd.vx v16, v16, VL_BLOCKS, v0.t
+ vrev8.v v16, v16, v0.t // Convert counters back to big-endian.
+ vse32.v v16, (IVP)
+
+ ret
+.endm
+
+// void aes_ctr32_crypt_zvkned_zvkb(const struct crypto_aes_ctx *key,
+// const u8 *in, u8 *out, size_t len,
+// u8 iv[16]);
+SYM_FUNC_START(aes_ctr32_crypt_zvkned_zvkb)
+ aes_begin KEYP, 128f, 192f
+ aes_ctr32_crypt 256
+128:
+ aes_ctr32_crypt 128
+192:
+ aes_ctr32_crypt 192
+SYM_FUNC_END(aes_ctr32_crypt_zvkned_zvkb)
diff --git a/arch/riscv/crypto/aes-riscv64-zvkned.S b/arch/riscv/crypto/aes-riscv64-zvkned.S
new file mode 100644
index 000000000000..78d4e1186c07
--- /dev/null
+++ b/arch/riscv/crypto/aes-riscv64-zvkned.S
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector AES block cipher extension ('Zvkned')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkned
+
+#include "aes-macros.S"
+
+#define KEYP a0
+#define INP a1
+#define OUTP a2
+#define LEN a3
+#define IVP a4
+
+.macro __aes_crypt_zvkned enc, keylen
+ vle32.v v16, (INP)
+ aes_crypt v16, \enc, \keylen
+ vse32.v v16, (OUTP)
+ ret
+.endm
+
+.macro aes_crypt_zvkned enc
+ aes_begin KEYP, 128f, 192f
+ __aes_crypt_zvkned \enc, 256
+128:
+ __aes_crypt_zvkned \enc, 128
+192:
+ __aes_crypt_zvkned \enc, 192
+.endm
+
+// void aes_encrypt_zvkned(const struct crypto_aes_ctx *key,
+// const u8 in[16], u8 out[16]);
+SYM_FUNC_START(aes_encrypt_zvkned)
+ aes_crypt_zvkned 1
+SYM_FUNC_END(aes_encrypt_zvkned)
+
+// Same prototype and calling convention as the encryption function
+SYM_FUNC_START(aes_decrypt_zvkned)
+ aes_crypt_zvkned 0
+SYM_FUNC_END(aes_decrypt_zvkned)
+
+.macro __aes_ecb_crypt enc, keylen
+ srli t0, LEN, 2
+ // t0 is the remaining length in 32-bit words. It's a multiple of 4.
+1:
+ vsetvli t1, t0, e32, m8, ta, ma
+ sub t0, t0, t1 // Subtract number of words processed
+ slli t1, t1, 2 // Words to bytes
+ vle32.v v16, (INP)
+ aes_crypt v16, \enc, \keylen
+ vse32.v v16, (OUTP)
+ add INP, INP, t1
+ add OUTP, OUTP, t1
+ bnez t0, 1b
+
+ ret
+.endm
+
+.macro aes_ecb_crypt enc
+ aes_begin KEYP, 128f, 192f
+ __aes_ecb_crypt \enc, 256
+128:
+ __aes_ecb_crypt \enc, 128
+192:
+ __aes_ecb_crypt \enc, 192
+.endm
+
+// void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key,
+// const u8 *in, u8 *out, size_t len);
+//
+// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
+SYM_FUNC_START(aes_ecb_encrypt_zvkned)
+ aes_ecb_crypt 1
+SYM_FUNC_END(aes_ecb_encrypt_zvkned)
+
+// Same prototype and calling convention as the encryption function
+SYM_FUNC_START(aes_ecb_decrypt_zvkned)
+ aes_ecb_crypt 0
+SYM_FUNC_END(aes_ecb_decrypt_zvkned)
+
+.macro aes_cbc_encrypt keylen
+ vle32.v v16, (IVP) // Load IV
+1:
+ vle32.v v17, (INP) // Load plaintext block
+ vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block
+ aes_encrypt v16, \keylen // Encrypt
+ vse32.v v16, (OUTP) // Store ciphertext block
+ addi INP, INP, 16
+ addi OUTP, OUTP, 16
+ addi LEN, LEN, -16
+ bnez LEN, 1b
+
+ vse32.v v16, (IVP) // Store next IV
+ ret
+.endm
+
+.macro aes_cbc_decrypt keylen
+ vle32.v v16, (IVP) // Load IV
+1:
+ vle32.v v17, (INP) // Load ciphertext block
+ vmv.v.v v18, v17 // Save ciphertext block
+ aes_decrypt v17, \keylen // Decrypt
+ vxor.vv v17, v17, v16 // XOR with IV or prev ciphertext block
+ vse32.v v17, (OUTP) // Store plaintext block
+ vmv.v.v v16, v18 // Next "IV" is prev ciphertext block
+ addi INP, INP, 16
+ addi OUTP, OUTP, 16
+ addi LEN, LEN, -16
+ bnez LEN, 1b
+
+ vse32.v v16, (IVP) // Store next IV
+ ret
+.endm
+
+// void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key,
+// const u8 *in, u8 *out, size_t len, u8 iv[16]);
+//
+// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
+SYM_FUNC_START(aes_cbc_encrypt_zvkned)
+ aes_begin KEYP, 128f, 192f
+ aes_cbc_encrypt 256
+128:
+ aes_cbc_encrypt 128
+192:
+ aes_cbc_encrypt 192
+SYM_FUNC_END(aes_cbc_encrypt_zvkned)
+
+// Same prototype and calling convention as the encryption function
+SYM_FUNC_START(aes_cbc_decrypt_zvkned)
+ aes_begin KEYP, 128f, 192f
+ aes_cbc_decrypt 256
+128:
+ aes_cbc_decrypt 128
+192:
+ aes_cbc_decrypt 192
+SYM_FUNC_END(aes_cbc_decrypt_zvkned)
diff --git a/arch/riscv/crypto/chacha-riscv64-glue.c b/arch/riscv/crypto/chacha-riscv64-glue.c
new file mode 100644
index 000000000000..10b46f36375a
--- /dev/null
+++ b/arch/riscv/crypto/chacha-riscv64-glue.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ChaCha20 using the RISC-V vector crypto extensions
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+asmlinkage void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out,
+ size_t len, const u32 iv[4]);
+
+static int riscv64_chacha20_crypt(struct skcipher_request *req)
+{
+ u32 iv[CHACHA_IV_SIZE / sizeof(u32)];
+ u8 block_buffer[CHACHA_BLOCK_SIZE];
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ unsigned int tail_bytes;
+ int err;
+
+ iv[0] = get_unaligned_le32(req->iv);
+ iv[1] = get_unaligned_le32(req->iv + 4);
+ iv[2] = get_unaligned_le32(req->iv + 8);
+ iv[3] = get_unaligned_le32(req->iv + 12);
+
+ err = skcipher_walk_virt(&walk, req, false);
+ while (walk.nbytes) {
+ nbytes = walk.nbytes & ~(CHACHA_BLOCK_SIZE - 1);
+ tail_bytes = walk.nbytes & (CHACHA_BLOCK_SIZE - 1);
+ kernel_vector_begin();
+ if (nbytes) {
+ chacha20_zvkb(ctx->key, walk.src.virt.addr,
+ walk.dst.virt.addr, nbytes, iv);
+ iv[0] += nbytes / CHACHA_BLOCK_SIZE;
+ }
+ if (walk.nbytes == walk.total && tail_bytes > 0) {
+ memcpy(block_buffer, walk.src.virt.addr + nbytes,
+ tail_bytes);
+ chacha20_zvkb(ctx->key, block_buffer, block_buffer,
+ CHACHA_BLOCK_SIZE, iv);
+ memcpy(walk.dst.virt.addr + nbytes, block_buffer,
+ tail_bytes);
+ tail_bytes = 0;
+ }
+ kernel_vector_end();
+
+ err = skcipher_walk_done(&walk, tail_bytes);
+ }
+
+ return err;
+}
+
+static struct skcipher_alg riscv64_chacha_alg = {
+ .setkey = chacha20_setkey,
+ .encrypt = riscv64_chacha20_crypt,
+ .decrypt = riscv64_chacha20_crypt,
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .base = {
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct chacha_ctx),
+ .cra_priority = 300,
+ .cra_name = "chacha20",
+ .cra_driver_name = "chacha20-riscv64-zvkb",
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init riscv64_chacha_mod_init(void)
+{
+ if (riscv_isa_extension_available(NULL, ZVKB) &&
+ riscv_vector_vlen() >= 128)
+ return crypto_register_skcipher(&riscv64_chacha_alg);
+
+ return -ENODEV;
+}
+
+static void __exit riscv64_chacha_mod_exit(void)
+{
+ crypto_unregister_skcipher(&riscv64_chacha_alg);
+}
+
+module_init(riscv64_chacha_mod_init);
+module_exit(riscv64_chacha_mod_exit);
+
+MODULE_DESCRIPTION("ChaCha20 (RISC-V accelerated)");
+MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/riscv/crypto/chacha-riscv64-zvkb.S b/arch/riscv/crypto/chacha-riscv64-zvkb.S
new file mode 100644
index 000000000000..bf057737ac69
--- /dev/null
+++ b/arch/riscv/crypto/chacha-riscv64-zvkb.S
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkb
+
+#define KEYP a0
+#define INP a1
+#define OUTP a2
+#define LEN a3
+#define IVP a4
+
+#define CONSTS0 a5
+#define CONSTS1 a6
+#define CONSTS2 a7
+#define CONSTS3 t0
+#define TMP t1
+#define VL t2
+#define STRIDE t3
+#define NROUNDS t4
+#define KEY0 s0
+#define KEY1 s1
+#define KEY2 s2
+#define KEY3 s3
+#define KEY4 s4
+#define KEY5 s5
+#define KEY6 s6
+#define KEY7 s7
+#define COUNTER s8
+#define NONCE0 s9
+#define NONCE1 s10
+#define NONCE2 s11
+
+.macro chacha_round a0, b0, c0, d0, a1, b1, c1, d1, \
+ a2, b2, c2, d2, a3, b3, c3, d3
+ // a += b; d ^= a; d = rol(d, 16);
+ vadd.vv \a0, \a0, \b0
+ vadd.vv \a1, \a1, \b1
+ vadd.vv \a2, \a2, \b2
+ vadd.vv \a3, \a3, \b3
+ vxor.vv \d0, \d0, \a0
+ vxor.vv \d1, \d1, \a1
+ vxor.vv \d2, \d2, \a2
+ vxor.vv \d3, \d3, \a3
+ vror.vi \d0, \d0, 32 - 16
+ vror.vi \d1, \d1, 32 - 16
+ vror.vi \d2, \d2, 32 - 16
+ vror.vi \d3, \d3, 32 - 16
+
+ // c += d; b ^= c; b = rol(b, 12);
+ vadd.vv \c0, \c0, \d0
+ vadd.vv \c1, \c1, \d1
+ vadd.vv \c2, \c2, \d2
+ vadd.vv \c3, \c3, \d3
+ vxor.vv \b0, \b0, \c0
+ vxor.vv \b1, \b1, \c1
+ vxor.vv \b2, \b2, \c2
+ vxor.vv \b3, \b3, \c3
+ vror.vi \b0, \b0, 32 - 12
+ vror.vi \b1, \b1, 32 - 12
+ vror.vi \b2, \b2, 32 - 12
+ vror.vi \b3, \b3, 32 - 12
+
+ // a += b; d ^= a; d = rol(d, 8);
+ vadd.vv \a0, \a0, \b0
+ vadd.vv \a1, \a1, \b1
+ vadd.vv \a2, \a2, \b2
+ vadd.vv \a3, \a3, \b3
+ vxor.vv \d0, \d0, \a0
+ vxor.vv \d1, \d1, \a1
+ vxor.vv \d2, \d2, \a2
+ vxor.vv \d3, \d3, \a3
+ vror.vi \d0, \d0, 32 - 8
+ vror.vi \d1, \d1, 32 - 8
+ vror.vi \d2, \d2, 32 - 8
+ vror.vi \d3, \d3, 32 - 8
+
+ // c += d; b ^= c; b = rol(b, 7);
+ vadd.vv \c0, \c0, \d0
+ vadd.vv \c1, \c1, \d1
+ vadd.vv \c2, \c2, \d2
+ vadd.vv \c3, \c3, \d3
+ vxor.vv \b0, \b0, \c0
+ vxor.vv \b1, \b1, \c1
+ vxor.vv \b2, \b2, \c2
+ vxor.vv \b3, \b3, \c3
+ vror.vi \b0, \b0, 32 - 7
+ vror.vi \b1, \b1, 32 - 7
+ vror.vi \b2, \b2, 32 - 7
+ vror.vi \b3, \b3, 32 - 7
+.endm
+
+// void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out, size_t len,
+// const u32 iv[4]);
+//
+// |len| must be nonzero and a multiple of 64 (CHACHA_BLOCK_SIZE).
+// The counter is treated as 32-bit, following the RFC7539 convention.
+SYM_FUNC_START(chacha20_zvkb)
+ srli LEN, LEN, 6 // Bytes to blocks
+
+ addi sp, sp, -96
+ sd s0, 0(sp)
+ sd s1, 8(sp)
+ sd s2, 16(sp)
+ sd s3, 24(sp)
+ sd s4, 32(sp)
+ sd s5, 40(sp)
+ sd s6, 48(sp)
+ sd s7, 56(sp)
+ sd s8, 64(sp)
+ sd s9, 72(sp)
+ sd s10, 80(sp)
+ sd s11, 88(sp)
+
+ li STRIDE, 64
+
+ // Set up the initial state matrix in scalar registers.
+ li CONSTS0, 0x61707865 // "expa" little endian
+ li CONSTS1, 0x3320646e // "nd 3" little endian
+ li CONSTS2, 0x79622d32 // "2-by" little endian
+ li CONSTS3, 0x6b206574 // "te k" little endian
+ lw KEY0, 0(KEYP)
+ lw KEY1, 4(KEYP)
+ lw KEY2, 8(KEYP)
+ lw KEY3, 12(KEYP)
+ lw KEY4, 16(KEYP)
+ lw KEY5, 20(KEYP)
+ lw KEY6, 24(KEYP)
+ lw KEY7, 28(KEYP)
+ lw COUNTER, 0(IVP)
+ lw NONCE0, 4(IVP)
+ lw NONCE1, 8(IVP)
+ lw NONCE2, 12(IVP)
+
+.Lblock_loop:
+ // Set vl to the number of blocks to process in this iteration.
+ vsetvli VL, LEN, e32, m1, ta, ma
+
+ // Set up the initial state matrix for the next VL blocks in v0-v15.
+ // v{i} holds the i'th 32-bit word of the state matrix for all blocks.
+ // Note that only the counter word, at index 12, differs across blocks.
+ vmv.v.x v0, CONSTS0
+ vmv.v.x v1, CONSTS1
+ vmv.v.x v2, CONSTS2
+ vmv.v.x v3, CONSTS3
+ vmv.v.x v4, KEY0
+ vmv.v.x v5, KEY1
+ vmv.v.x v6, KEY2
+ vmv.v.x v7, KEY3
+ vmv.v.x v8, KEY4
+ vmv.v.x v9, KEY5
+ vmv.v.x v10, KEY6
+ vmv.v.x v11, KEY7
+ vid.v v12
+ vadd.vx v12, v12, COUNTER
+ vmv.v.x v13, NONCE0
+ vmv.v.x v14, NONCE1
+ vmv.v.x v15, NONCE2
+
+ // Load the first half of the input data for each block into v16-v23.
+ // v{16+i} holds the i'th 32-bit word for all blocks.
+ vlsseg8e32.v v16, (INP), STRIDE
+
+ li NROUNDS, 20
+.Lnext_doubleround:
+ addi NROUNDS, NROUNDS, -2
+ // column round
+ chacha_round v0, v4, v8, v12, v1, v5, v9, v13, \
+ v2, v6, v10, v14, v3, v7, v11, v15
+ // diagonal round
+ chacha_round v0, v5, v10, v15, v1, v6, v11, v12, \
+ v2, v7, v8, v13, v3, v4, v9, v14
+ bnez NROUNDS, .Lnext_doubleround
+
+ // Load the second half of the input data for each block into v24-v31.
+ // v{24+i} holds the {8+i}'th 32-bit word for all blocks.
+ addi TMP, INP, 32
+ vlsseg8e32.v v24, (TMP), STRIDE
+
+ // Finalize the first half of the keystream for each block.
+ vadd.vx v0, v0, CONSTS0
+ vadd.vx v1, v1, CONSTS1
+ vadd.vx v2, v2, CONSTS2
+ vadd.vx v3, v3, CONSTS3
+ vadd.vx v4, v4, KEY0
+ vadd.vx v5, v5, KEY1
+ vadd.vx v6, v6, KEY2
+ vadd.vx v7, v7, KEY3
+
+ // Encrypt/decrypt the first half of the data for each block.
+ vxor.vv v16, v16, v0
+ vxor.vv v17, v17, v1
+ vxor.vv v18, v18, v2
+ vxor.vv v19, v19, v3
+ vxor.vv v20, v20, v4
+ vxor.vv v21, v21, v5
+ vxor.vv v22, v22, v6
+ vxor.vv v23, v23, v7
+
+ // Store the first half of the output data for each block.
+ vssseg8e32.v v16, (OUTP), STRIDE
+
+ // Finalize the second half of the keystream for each block.
+ vadd.vx v8, v8, KEY4
+ vadd.vx v9, v9, KEY5
+ vadd.vx v10, v10, KEY6
+ vadd.vx v11, v11, KEY7
+ vid.v v0
+ vadd.vx v12, v12, COUNTER
+ vadd.vx v13, v13, NONCE0
+ vadd.vx v14, v14, NONCE1
+ vadd.vx v15, v15, NONCE2
+ vadd.vv v12, v12, v0
+
+ // Encrypt/decrypt the second half of the data for each block.
+ vxor.vv v24, v24, v8
+ vxor.vv v25, v25, v9
+ vxor.vv v26, v26, v10
+ vxor.vv v27, v27, v11
+ vxor.vv v29, v29, v13
+ vxor.vv v28, v28, v12
+ vxor.vv v30, v30, v14
+ vxor.vv v31, v31, v15
+
+ // Store the second half of the output data for each block.
+ addi TMP, OUTP, 32
+ vssseg8e32.v v24, (TMP), STRIDE
+
+ // Update the counter, the remaining number of blocks, and the input and
+ // output pointers according to the number of blocks processed (VL).
+ add COUNTER, COUNTER, VL
+ sub LEN, LEN, VL
+ slli TMP, VL, 6
+ add OUTP, OUTP, TMP
+ add INP, INP, TMP
+ bnez LEN, .Lblock_loop
+
+ ld s0, 0(sp)
+ ld s1, 8(sp)
+ ld s2, 16(sp)
+ ld s3, 24(sp)
+ ld s4, 32(sp)
+ ld s5, 40(sp)
+ ld s6, 48(sp)
+ ld s7, 56(sp)
+ ld s8, 64(sp)
+ ld s9, 72(sp)
+ ld s10, 80(sp)
+ ld s11, 88(sp)
+ addi sp, sp, 96
+ ret
+SYM_FUNC_END(chacha20_zvkb)
diff --git a/arch/riscv/crypto/ghash-riscv64-glue.c b/arch/riscv/crypto/ghash-riscv64-glue.c
new file mode 100644
index 000000000000..312e7891fd0a
--- /dev/null
+++ b/arch/riscv/crypto/ghash-riscv64-glue.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * GHASH using the RISC-V vector crypto extensions
+ *
+ * Copyright (C) 2023 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/ghash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+asmlinkage void ghash_zvkg(be128 *accumulator, const be128 *key, const u8 *data,
+ size_t len);
+
+struct riscv64_ghash_tfm_ctx {
+ be128 key;
+};
+
+struct riscv64_ghash_desc_ctx {
+ be128 accumulator;
+ u8 buffer[GHASH_BLOCK_SIZE];
+ u32 bytes;
+};
+
+static int riscv64_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+
+ if (keylen != GHASH_BLOCK_SIZE)
+ return -EINVAL;
+
+ memcpy(&tctx->key, key, GHASH_BLOCK_SIZE);
+
+ return 0;
+}
+
+static int riscv64_ghash_init(struct shash_desc *desc)
+{
+ struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ *dctx = (struct riscv64_ghash_desc_ctx){};
+
+ return 0;
+}
+
+static inline void
+riscv64_ghash_blocks(const struct riscv64_ghash_tfm_ctx *tctx,
+ struct riscv64_ghash_desc_ctx *dctx,
+ const u8 *src, size_t srclen)
+{
+ /* The srclen is nonzero and a multiple of 16. */
+ if (crypto_simd_usable()) {
+ kernel_vector_begin();
+ ghash_zvkg(&dctx->accumulator, &tctx->key, src, srclen);
+ kernel_vector_end();
+ } else {
+ do {
+ crypto_xor((u8 *)&dctx->accumulator, src,
+ GHASH_BLOCK_SIZE);
+ gf128mul_lle(&dctx->accumulator, &tctx->key);
+ src += GHASH_BLOCK_SIZE;
+ srclen -= GHASH_BLOCK_SIZE;
+ } while (srclen);
+ }
+}
+
+static int riscv64_ghash_update(struct shash_desc *desc, const u8 *src,
+ unsigned int srclen)
+{
+ const struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ unsigned int len;
+
+ if (dctx->bytes) {
+ if (dctx->bytes + srclen < GHASH_BLOCK_SIZE) {
+ memcpy(dctx->buffer + dctx->bytes, src, srclen);
+ dctx->bytes += srclen;
+ return 0;
+ }
+ memcpy(dctx->buffer + dctx->bytes, src,
+ GHASH_BLOCK_SIZE - dctx->bytes);
+ riscv64_ghash_blocks(tctx, dctx, dctx->buffer,
+ GHASH_BLOCK_SIZE);
+ src += GHASH_BLOCK_SIZE - dctx->bytes;
+ srclen -= GHASH_BLOCK_SIZE - dctx->bytes;
+ dctx->bytes = 0;
+ }
+
+ len = round_down(srclen, GHASH_BLOCK_SIZE);
+ if (len) {
+ riscv64_ghash_blocks(tctx, dctx, src, len);
+ src += len;
+ srclen -= len;
+ }
+
+ if (srclen) {
+ memcpy(dctx->buffer, src, srclen);
+ dctx->bytes = srclen;
+ }
+
+ return 0;
+}
+
+static int riscv64_ghash_final(struct shash_desc *desc, u8 *out)
+{
+ const struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ int i;
+
+ if (dctx->bytes) {
+ for (i = dctx->bytes; i < GHASH_BLOCK_SIZE; i++)
+ dctx->buffer[i] = 0;
+
+ riscv64_ghash_blocks(tctx, dctx, dctx->buffer,
+ GHASH_BLOCK_SIZE);
+ }
+
+ memcpy(out, &dctx->accumulator, GHASH_DIGEST_SIZE);
+ return 0;
+}
+
+static struct shash_alg riscv64_ghash_alg = {
+ .init = riscv64_ghash_init,
+ .update = riscv64_ghash_update,
+ .final = riscv64_ghash_final,
+ .setkey = riscv64_ghash_setkey,
+ .descsize = sizeof(struct riscv64_ghash_desc_ctx),
+ .digestsize = GHASH_DIGEST_SIZE,
+ .base = {
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct riscv64_ghash_tfm_ctx),
+ .cra_priority = 300,
+ .cra_name = "ghash",
+ .cra_driver_name = "ghash-riscv64-zvkg",
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init riscv64_ghash_mod_init(void)
+{
+ if (riscv_isa_extension_available(NULL, ZVKG) &&
+ riscv_vector_vlen() >= 128)
+ return crypto_register_shash(&riscv64_ghash_alg);
+
+ return -ENODEV;
+}
+
+static void __exit riscv64_ghash_mod_exit(void)
+{
+ crypto_unregister_shash(&riscv64_ghash_alg);
+}
+
+module_init(riscv64_ghash_mod_init);
+module_exit(riscv64_ghash_mod_exit);
+
+MODULE_DESCRIPTION("GHASH (RISC-V accelerated)");
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/riscv/crypto/ghash-riscv64-zvkg.S b/arch/riscv/crypto/ghash-riscv64-zvkg.S
new file mode 100644
index 000000000000..f2b43fb4d434
--- /dev/null
+++ b/arch/riscv/crypto/ghash-riscv64-zvkg.S
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector GCM/GMAC extension ('Zvkg')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkg
+
+#define ACCUMULATOR a0
+#define KEY a1
+#define DATA a2
+#define LEN a3
+
+// void ghash_zvkg(be128 *accumulator, const be128 *key, const u8 *data,
+// size_t len);
+//
+// |len| must be nonzero and a multiple of 16 (GHASH_BLOCK_SIZE).
+SYM_FUNC_START(ghash_zvkg)
+ vsetivli zero, 4, e32, m1, ta, ma
+ vle32.v v1, (ACCUMULATOR)
+ vle32.v v2, (KEY)
+.Lnext_block:
+ vle32.v v3, (DATA)
+ vghsh.vv v1, v2, v3
+ addi DATA, DATA, 16
+ addi LEN, LEN, -16
+ bnez LEN, .Lnext_block
+
+ vse32.v v1, (ACCUMULATOR)
+ ret
+SYM_FUNC_END(ghash_zvkg)
diff --git a/arch/riscv/crypto/sha256-riscv64-glue.c b/arch/riscv/crypto/sha256-riscv64-glue.c
new file mode 100644
index 000000000000..71e051e40a64
--- /dev/null
+++ b/arch/riscv/crypto/sha256-riscv64-glue.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256 and SHA-224 using the RISC-V vector crypto extensions
+ *
+ * Copyright (C) 2022 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <crypto/sha256_base.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+/*
+ * Note: the asm function only uses the 'state' field of struct sha256_state.
+ * It is assumed to be the first field.
+ */
+asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb(
+ struct sha256_state *state, const u8 *data, int num_blocks);
+
+static int riscv64_sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ /*
+ * Ensure struct sha256_state begins directly with the SHA-256
+ * 256-bit internal state, as this is what the asm function expects.
+ */
+ BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
+
+ if (crypto_simd_usable()) {
+ kernel_vector_begin();
+ sha256_base_do_update(desc, data, len,
+ sha256_transform_zvknha_or_zvknhb_zvkb);
+ kernel_vector_end();
+ } else {
+ crypto_sha256_update(desc, data, len);
+ }
+ return 0;
+}
+
+static int riscv64_sha256_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ if (crypto_simd_usable()) {
+ kernel_vector_begin();
+ if (len)
+ sha256_base_do_update(
+ desc, data, len,
+ sha256_transform_zvknha_or_zvknhb_zvkb);
+ sha256_base_do_finalize(
+ desc, sha256_transform_zvknha_or_zvknhb_zvkb);
+ kernel_vector_end();
+
+ return sha256_base_finish(desc, out);
+ }
+
+ return crypto_sha256_finup(desc, data, len, out);
+}
+
+static int riscv64_sha256_final(struct shash_desc *desc, u8 *out)
+{
+ return riscv64_sha256_finup(desc, NULL, 0, out);
+}
+
+static int riscv64_sha256_digest(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ return sha256_base_init(desc) ?:
+ riscv64_sha256_finup(desc, data, len, out);
+}
+
+static struct shash_alg riscv64_sha256_algs[] = {
+ {
+ .init = sha256_base_init,
+ .update = riscv64_sha256_update,
+ .final = riscv64_sha256_final,
+ .finup = riscv64_sha256_finup,
+ .digest = riscv64_sha256_digest,
+ .descsize = sizeof(struct sha256_state),
+ .digestsize = SHA256_DIGEST_SIZE,
+ .base = {
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_priority = 300,
+ .cra_name = "sha256",
+ .cra_driver_name = "sha256-riscv64-zvknha_or_zvknhb-zvkb",
+ .cra_module = THIS_MODULE,
+ },
+ }, {
+ .init = sha224_base_init,
+ .update = riscv64_sha256_update,
+ .final = riscv64_sha256_final,
+ .finup = riscv64_sha256_finup,
+ .descsize = sizeof(struct sha256_state),
+ .digestsize = SHA224_DIGEST_SIZE,
+ .base = {
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_priority = 300,
+ .cra_name = "sha224",
+ .cra_driver_name = "sha224-riscv64-zvknha_or_zvknhb-zvkb",
+ .cra_module = THIS_MODULE,
+ },
+ },
+};
+
+static int __init riscv64_sha256_mod_init(void)
+{
+ /* Both zvknha and zvknhb provide the SHA-256 instructions. */
+ if ((riscv_isa_extension_available(NULL, ZVKNHA) ||
+ riscv_isa_extension_available(NULL, ZVKNHB)) &&
+ riscv_isa_extension_available(NULL, ZVKB) &&
+ riscv_vector_vlen() >= 128)
+ return crypto_register_shashes(riscv64_sha256_algs,
+ ARRAY_SIZE(riscv64_sha256_algs));
+
+ return -ENODEV;
+}
+
+static void __exit riscv64_sha256_mod_exit(void)
+{
+ crypto_unregister_shashes(riscv64_sha256_algs,
+ ARRAY_SIZE(riscv64_sha256_algs));
+}
+
+module_init(riscv64_sha256_mod_init);
+module_exit(riscv64_sha256_mod_exit);
+
+MODULE_DESCRIPTION("SHA-256 (RISC-V accelerated)");
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha224");
diff --git a/arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S b/arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S
new file mode 100644
index 000000000000..8ebcc17de4dc
--- /dev/null
+++ b/arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb')
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/cfi_types.h>
+
+.text
+.option arch, +zvknha, +zvkb
+
+#define STATEP a0
+#define DATA a1
+#define NUM_BLOCKS a2
+
+#define STATEP_C a3
+
+#define MASK v0
+#define INDICES v1
+#define W0 v2
+#define W1 v3
+#define W2 v4
+#define W3 v5
+#define VTMP v6
+#define FEBA v7
+#define HGDC v8
+#define K0 v10
+#define K1 v11
+#define K2 v12
+#define K3 v13
+#define K4 v14
+#define K5 v15
+#define K6 v16
+#define K7 v17
+#define K8 v18
+#define K9 v19
+#define K10 v20
+#define K11 v21
+#define K12 v22
+#define K13 v23
+#define K14 v24
+#define K15 v25
+#define PREV_FEBA v26
+#define PREV_HGDC v27
+
+// Do 4 rounds of SHA-256. w0 contains the current 4 message schedule words.
+//
+// If not all the message schedule words have been computed yet, then this also
+// computes 4 more message schedule words. w1-w3 contain the next 3 groups of 4
+// message schedule words; this macro computes the group after w3 and writes it
+// to w0. This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3,
+// w0), so the caller must cycle through the registers accordingly.
+.macro sha256_4rounds last, k, w0, w1, w2, w3
+ vadd.vv VTMP, \k, \w0
+ vsha2cl.vv HGDC, FEBA, VTMP
+ vsha2ch.vv FEBA, HGDC, VTMP
+.if !\last
+ vmerge.vvm VTMP, \w2, \w1, MASK
+ vsha2ms.vv \w0, VTMP, \w3
+.endif
+.endm
+
+.macro sha256_16rounds last, k0, k1, k2, k3
+ sha256_4rounds \last, \k0, W0, W1, W2, W3
+ sha256_4rounds \last, \k1, W1, W2, W3, W0
+ sha256_4rounds \last, \k2, W2, W3, W0, W1
+ sha256_4rounds \last, \k3, W3, W0, W1, W2
+.endm
+
+// void sha256_transform_zvknha_or_zvknhb_zvkb(u32 state[8], const u8 *data,
+// int num_blocks);
+SYM_TYPED_FUNC_START(sha256_transform_zvknha_or_zvknhb_zvkb)
+
+ // Load the round constants into K0-K15.
+ vsetivli zero, 4, e32, m1, ta, ma
+ la t0, K256
+ vle32.v K0, (t0)
+ addi t0, t0, 16
+ vle32.v K1, (t0)
+ addi t0, t0, 16
+ vle32.v K2, (t0)
+ addi t0, t0, 16
+ vle32.v K3, (t0)
+ addi t0, t0, 16
+ vle32.v K4, (t0)
+ addi t0, t0, 16
+ vle32.v K5, (t0)
+ addi t0, t0, 16
+ vle32.v K6, (t0)
+ addi t0, t0, 16
+ vle32.v K7, (t0)
+ addi t0, t0, 16
+ vle32.v K8, (t0)
+ addi t0, t0, 16
+ vle32.v K9, (t0)
+ addi t0, t0, 16
+ vle32.v K10, (t0)
+ addi t0, t0, 16
+ vle32.v K11, (t0)
+ addi t0, t0, 16
+ vle32.v K12, (t0)
+ addi t0, t0, 16
+ vle32.v K13, (t0)
+ addi t0, t0, 16
+ vle32.v K14, (t0)
+ addi t0, t0, 16
+ vle32.v K15, (t0)
+
+ // Setup mask for the vmerge to replace the first word (idx==0) in
+ // message scheduling. There are 4 words, so an 8-bit mask suffices.
+ vsetivli zero, 1, e8, m1, ta, ma
+ vmv.v.i MASK, 0x01
+
+ // Load the state. The state is stored as {a,b,c,d,e,f,g,h}, but we
+ // need {f,e,b,a},{h,g,d,c}. The dst vtype is e32m1 and the index vtype
+ // is e8mf4. We use index-load with the i8 indices {20, 16, 4, 0},
+ // loaded using the 32-bit little endian value 0x00041014.
+ li t0, 0x00041014
+ vsetivli zero, 1, e32, m1, ta, ma
+ vmv.v.x INDICES, t0
+ addi STATEP_C, STATEP, 8
+ vsetivli zero, 4, e32, m1, ta, ma
+ vluxei8.v FEBA, (STATEP), INDICES
+ vluxei8.v HGDC, (STATEP_C), INDICES
+
+.Lnext_block:
+ addi NUM_BLOCKS, NUM_BLOCKS, -1
+
+ // Save the previous state, as it's needed later.
+ vmv.v.v PREV_FEBA, FEBA
+ vmv.v.v PREV_HGDC, HGDC
+
+ // Load the next 512-bit message block and endian-swap each 32-bit word.
+ vle32.v W0, (DATA)
+ vrev8.v W0, W0
+ addi DATA, DATA, 16
+ vle32.v W1, (DATA)
+ vrev8.v W1, W1
+ addi DATA, DATA, 16
+ vle32.v W2, (DATA)
+ vrev8.v W2, W2
+ addi DATA, DATA, 16
+ vle32.v W3, (DATA)
+ vrev8.v W3, W3
+ addi DATA, DATA, 16
+
+ // Do the 64 rounds of SHA-256.
+ sha256_16rounds 0, K0, K1, K2, K3
+ sha256_16rounds 0, K4, K5, K6, K7
+ sha256_16rounds 0, K8, K9, K10, K11
+ sha256_16rounds 1, K12, K13, K14, K15
+
+ // Add the previous state.
+ vadd.vv FEBA, FEBA, PREV_FEBA
+ vadd.vv HGDC, HGDC, PREV_HGDC
+
+ // Repeat if more blocks remain.
+ bnez NUM_BLOCKS, .Lnext_block
+
+ // Store the new state and return.
+ vsuxei8.v FEBA, (STATEP), INDICES
+ vsuxei8.v HGDC, (STATEP_C), INDICES
+ ret
+SYM_FUNC_END(sha256_transform_zvknha_or_zvknhb_zvkb)
+
+.section ".rodata"
+.p2align 2
+.type K256, @object
+K256:
+ .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+ .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+ .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+ .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+ .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+ .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+ .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+ .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+ .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+ .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+ .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+ .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+ .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+ .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+ .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+ .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+.size K256, . - K256
diff --git a/arch/riscv/crypto/sha512-riscv64-glue.c b/arch/riscv/crypto/sha512-riscv64-glue.c
new file mode 100644
index 000000000000..43b56a08aeb5
--- /dev/null
+++ b/arch/riscv/crypto/sha512-riscv64-glue.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-512 and SHA-384 using the RISC-V vector crypto extensions
+ *
+ * Copyright (C) 2023 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <crypto/sha512_base.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+/*
+ * Note: the asm function only uses the 'state' field of struct sha512_state.
+ * It is assumed to be the first field.
+ */
+asmlinkage void sha512_transform_zvknhb_zvkb(
+ struct sha512_state *state, const u8 *data, int num_blocks);
+
+static int riscv64_sha512_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ /*
+ * Ensure struct sha512_state begins directly with the SHA-512
+ * 512-bit internal state, as this is what the asm function expects.
+ */
+ BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
+
+ if (crypto_simd_usable()) {
+ kernel_vector_begin();
+ sha512_base_do_update(desc, data, len,
+ sha512_transform_zvknhb_zvkb);
+ kernel_vector_end();
+ } else {
+ crypto_sha512_update(desc, data, len);
+ }
+ return 0;
+}
+
+static int riscv64_sha512_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ if (crypto_simd_usable()) {
+ kernel_vector_begin();
+ if (len)
+ sha512_base_do_update(desc, data, len,
+ sha512_transform_zvknhb_zvkb);
+ sha512_base_do_finalize(desc, sha512_transform_zvknhb_zvkb);
+ kernel_vector_end();
+
+ return sha512_base_finish(desc, out);
+ }
+
+ return crypto_sha512_finup(desc, data, len, out);
+}
+
+static int riscv64_sha512_final(struct shash_desc *desc, u8 *out)
+{
+ return riscv64_sha512_finup(desc, NULL, 0, out);
+}
+
+static int riscv64_sha512_digest(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ return sha512_base_init(desc) ?:
+ riscv64_sha512_finup(desc, data, len, out);
+}
+
+static struct shash_alg riscv64_sha512_algs[] = {
+ {
+ .init = sha512_base_init,
+ .update = riscv64_sha512_update,
+ .final = riscv64_sha512_final,
+ .finup = riscv64_sha512_finup,
+ .digest = riscv64_sha512_digest,
+ .descsize = sizeof(struct sha512_state),
+ .digestsize = SHA512_DIGEST_SIZE,
+ .base = {
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_priority = 300,
+ .cra_name = "sha512",
+ .cra_driver_name = "sha512-riscv64-zvknhb-zvkb",
+ .cra_module = THIS_MODULE,
+ },
+ }, {
+ .init = sha384_base_init,
+ .update = riscv64_sha512_update,
+ .final = riscv64_sha512_final,
+ .finup = riscv64_sha512_finup,
+ .descsize = sizeof(struct sha512_state),
+ .digestsize = SHA384_DIGEST_SIZE,
+ .base = {
+ .cra_blocksize = SHA384_BLOCK_SIZE,
+ .cra_priority = 300,
+ .cra_name = "sha384",
+ .cra_driver_name = "sha384-riscv64-zvknhb-zvkb",
+ .cra_module = THIS_MODULE,
+ },
+ },
+};
+
+static int __init riscv64_sha512_mod_init(void)
+{
+ if (riscv_isa_extension_available(NULL, ZVKNHB) &&
+ riscv_isa_extension_available(NULL, ZVKB) &&
+ riscv_vector_vlen() >= 128)
+ return crypto_register_shashes(riscv64_sha512_algs,
+ ARRAY_SIZE(riscv64_sha512_algs));
+
+ return -ENODEV;
+}
+
+static void __exit riscv64_sha512_mod_exit(void)
+{
+ crypto_unregister_shashes(riscv64_sha512_algs,
+ ARRAY_SIZE(riscv64_sha512_algs));
+}
+
+module_init(riscv64_sha512_mod_init);
+module_exit(riscv64_sha512_mod_exit);
+
+MODULE_DESCRIPTION("SHA-512 (RISC-V accelerated)");
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("sha512");
+MODULE_ALIAS_CRYPTO("sha384");
diff --git a/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S b/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S
new file mode 100644
index 000000000000..3a9ae210f915
--- /dev/null
+++ b/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknhb')
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/cfi_types.h>
+
+.text
+.option arch, +zvknhb, +zvkb
+
+#define STATEP a0
+#define DATA a1
+#define NUM_BLOCKS a2
+
+#define STATEP_C a3
+#define K a4
+
+#define MASK v0
+#define INDICES v1
+#define W0 v10 // LMUL=2
+#define W1 v12 // LMUL=2
+#define W2 v14 // LMUL=2
+#define W3 v16 // LMUL=2
+#define VTMP v20 // LMUL=2
+#define FEBA v22 // LMUL=2
+#define HGDC v24 // LMUL=2
+#define PREV_FEBA v26 // LMUL=2
+#define PREV_HGDC v28 // LMUL=2
+
+// Do 4 rounds of SHA-512. w0 contains the current 4 message schedule words.
+//
+// If not all the message schedule words have been computed yet, then this also
+// computes 4 more message schedule words. w1-w3 contain the next 3 groups of 4
+// message schedule words; this macro computes the group after w3 and writes it
+// to w0. This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3,
+// w0), so the caller must cycle through the registers accordingly.
+.macro sha512_4rounds last, w0, w1, w2, w3
+ vle64.v VTMP, (K)
+ addi K, K, 32
+ vadd.vv VTMP, VTMP, \w0
+ vsha2cl.vv HGDC, FEBA, VTMP
+ vsha2ch.vv FEBA, HGDC, VTMP
+.if !\last
+ vmerge.vvm VTMP, \w2, \w1, MASK
+ vsha2ms.vv \w0, VTMP, \w3
+.endif
+.endm
+
+.macro sha512_16rounds last
+ sha512_4rounds \last, W0, W1, W2, W3
+ sha512_4rounds \last, W1, W2, W3, W0
+ sha512_4rounds \last, W2, W3, W0, W1
+ sha512_4rounds \last, W3, W0, W1, W2
+.endm
+
+// void sha512_transform_zvknhb_zvkb(u64 state[8], const u8 *data,
+// int num_blocks);
+SYM_TYPED_FUNC_START(sha512_transform_zvknhb_zvkb)
+
+ // Setup mask for the vmerge to replace the first word (idx==0) in
+ // message scheduling. There are 4 words, so an 8-bit mask suffices.
+ vsetivli zero, 1, e8, m1, ta, ma
+ vmv.v.i MASK, 0x01
+
+ // Load the state. The state is stored as {a,b,c,d,e,f,g,h}, but we
+ // need {f,e,b,a},{h,g,d,c}. The dst vtype is e64m2 and the index vtype
+ // is e8mf4. We use index-load with the i8 indices {40, 32, 8, 0},
+ // loaded using the 32-bit little endian value 0x00082028.
+ li t0, 0x00082028
+ vsetivli zero, 1, e32, m1, ta, ma
+ vmv.v.x INDICES, t0
+ addi STATEP_C, STATEP, 16
+ vsetivli zero, 4, e64, m2, ta, ma
+ vluxei8.v FEBA, (STATEP), INDICES
+ vluxei8.v HGDC, (STATEP_C), INDICES
+
+.Lnext_block:
+ la K, K512
+ addi NUM_BLOCKS, NUM_BLOCKS, -1
+
+ // Save the previous state, as it's needed later.
+ vmv.v.v PREV_FEBA, FEBA
+ vmv.v.v PREV_HGDC, HGDC
+
+ // Load the next 1024-bit message block and endian-swap each 64-bit word
+ vle64.v W0, (DATA)
+ vrev8.v W0, W0
+ addi DATA, DATA, 32
+ vle64.v W1, (DATA)
+ vrev8.v W1, W1
+ addi DATA, DATA, 32
+ vle64.v W2, (DATA)
+ vrev8.v W2, W2
+ addi DATA, DATA, 32
+ vle64.v W3, (DATA)
+ vrev8.v W3, W3
+ addi DATA, DATA, 32
+
+ // Do the 80 rounds of SHA-512.
+ sha512_16rounds 0
+ sha512_16rounds 0
+ sha512_16rounds 0
+ sha512_16rounds 0
+ sha512_16rounds 1
+
+ // Add the previous state.
+ vadd.vv FEBA, FEBA, PREV_FEBA
+ vadd.vv HGDC, HGDC, PREV_HGDC
+
+ // Repeat if more blocks remain.
+ bnez NUM_BLOCKS, .Lnext_block
+
+ // Store the new state and return.
+ vsuxei8.v FEBA, (STATEP), INDICES
+ vsuxei8.v HGDC, (STATEP_C), INDICES
+ ret
+SYM_FUNC_END(sha512_transform_zvknhb_zvkb)
+
+.section ".rodata"
+.p2align 3
+.type K512, @object
+K512:
+ .dword 0x428a2f98d728ae22, 0x7137449123ef65cd
+ .dword 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
+ .dword 0x3956c25bf348b538, 0x59f111f1b605d019
+ .dword 0x923f82a4af194f9b, 0xab1c5ed5da6d8118
+ .dword 0xd807aa98a3030242, 0x12835b0145706fbe
+ .dword 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
+ .dword 0x72be5d74f27b896f, 0x80deb1fe3b1696b1
+ .dword 0x9bdc06a725c71235, 0xc19bf174cf692694
+ .dword 0xe49b69c19ef14ad2, 0xefbe4786384f25e3
+ .dword 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
+ .dword 0x2de92c6f592b0275, 0x4a7484aa6ea6e483
+ .dword 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
+ .dword 0x983e5152ee66dfab, 0xa831c66d2db43210
+ .dword 0xb00327c898fb213f, 0xbf597fc7beef0ee4
+ .dword 0xc6e00bf33da88fc2, 0xd5a79147930aa725
+ .dword 0x06ca6351e003826f, 0x142929670a0e6e70
+ .dword 0x27b70a8546d22ffc, 0x2e1b21385c26c926
+ .dword 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
+ .dword 0x650a73548baf63de, 0x766a0abb3c77b2a8
+ .dword 0x81c2c92e47edaee6, 0x92722c851482353b
+ .dword 0xa2bfe8a14cf10364, 0xa81a664bbc423001
+ .dword 0xc24b8b70d0f89791, 0xc76c51a30654be30
+ .dword 0xd192e819d6ef5218, 0xd69906245565a910
+ .dword 0xf40e35855771202a, 0x106aa07032bbd1b8
+ .dword 0x19a4c116b8d2d0c8, 0x1e376c085141ab53
+ .dword 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
+ .dword 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
+ .dword 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
+ .dword 0x748f82ee5defb2fc, 0x78a5636f43172f60
+ .dword 0x84c87814a1f0ab72, 0x8cc702081a6439ec
+ .dword 0x90befffa23631e28, 0xa4506cebde82bde9
+ .dword 0xbef9a3f7b2c67915, 0xc67178f2e372532b
+ .dword 0xca273eceea26619c, 0xd186b8c721c0c207
+ .dword 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
+ .dword 0x06f067aa72176fba, 0x0a637dc5a2c898a6
+ .dword 0x113f9804bef90dae, 0x1b710b35131c471b
+ .dword 0x28db77f523047d84, 0x32caab7b40c72493
+ .dword 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
+ .dword 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
+ .dword 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
+.size K512, . - K512
diff --git a/arch/riscv/crypto/sm3-riscv64-glue.c b/arch/riscv/crypto/sm3-riscv64-glue.c
new file mode 100644
index 000000000000..e1737a970c7c
--- /dev/null
+++ b/arch/riscv/crypto/sm3-riscv64-glue.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SM3 using the RISC-V vector crypto extensions
+ *
+ * Copyright (C) 2023 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <crypto/sm3_base.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+/*
+ * Note: the asm function only uses the 'state' field of struct sm3_state.
+ * It is assumed to be the first field.
+ */
+asmlinkage void sm3_transform_zvksh_zvkb(
+ struct sm3_state *state, const u8 *data, int num_blocks);
+
+static int riscv64_sm3_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ /*
+ * Ensure struct sm3_state begins directly with the SM3
+ * 256-bit internal state, as this is what the asm function expects.
+ */
+ BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0);
+
+ if (crypto_simd_usable()) {
+ kernel_vector_begin();
+ sm3_base_do_update(desc, data, len, sm3_transform_zvksh_zvkb);
+ kernel_vector_end();
+ } else {
+ sm3_update(shash_desc_ctx(desc), data, len);
+ }
+ return 0;
+}
+
+static int riscv64_sm3_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct sm3_state *ctx;
+
+ if (crypto_simd_usable()) {
+ kernel_vector_begin();
+ if (len)
+ sm3_base_do_update(desc, data, len,
+ sm3_transform_zvksh_zvkb);
+ sm3_base_do_finalize(desc, sm3_transform_zvksh_zvkb);
+ kernel_vector_end();
+
+ return sm3_base_finish(desc, out);
+ }
+
+ ctx = shash_desc_ctx(desc);
+ if (len)
+ sm3_update(ctx, data, len);
+ sm3_final(ctx, out);
+
+ return 0;
+}
+
+static int riscv64_sm3_final(struct shash_desc *desc, u8 *out)
+{
+ return riscv64_sm3_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg riscv64_sm3_alg = {
+ .init = sm3_base_init,
+ .update = riscv64_sm3_update,
+ .final = riscv64_sm3_final,
+ .finup = riscv64_sm3_finup,
+ .descsize = sizeof(struct sm3_state),
+ .digestsize = SM3_DIGEST_SIZE,
+ .base = {
+ .cra_blocksize = SM3_BLOCK_SIZE,
+ .cra_priority = 300,
+ .cra_name = "sm3",
+ .cra_driver_name = "sm3-riscv64-zvksh-zvkb",
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init riscv64_sm3_mod_init(void)
+{
+ if (riscv_isa_extension_available(NULL, ZVKSH) &&
+ riscv_isa_extension_available(NULL, ZVKB) &&
+ riscv_vector_vlen() >= 128)
+ return crypto_register_shash(&riscv64_sm3_alg);
+
+ return -ENODEV;
+}
+
+static void __exit riscv64_sm3_mod_exit(void)
+{
+ crypto_unregister_shash(&riscv64_sm3_alg);
+}
+
+module_init(riscv64_sm3_mod_init);
+module_exit(riscv64_sm3_mod_exit);
+
+MODULE_DESCRIPTION("SM3 (RISC-V accelerated)");
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("sm3");
diff --git a/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S b/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S
new file mode 100644
index 000000000000..a2b65d961c04
--- /dev/null
+++ b/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector SM3 Secure Hash extension ('Zvksh')
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/cfi_types.h>
+
+.text
+.option arch, +zvksh, +zvkb
+
+#define STATEP a0
+#define DATA a1
+#define NUM_BLOCKS a2
+
+#define STATE v0 // LMUL=2
+#define PREV_STATE v2 // LMUL=2
+#define W0 v4 // LMUL=2
+#define W1 v6 // LMUL=2
+#define VTMP v8 // LMUL=2
+
+.macro sm3_8rounds i, w0, w1
+ // Do 4 rounds using W_{0+i}..W_{7+i}.
+ vsm3c.vi STATE, \w0, \i + 0
+ vslidedown.vi VTMP, \w0, 2
+ vsm3c.vi STATE, VTMP, \i + 1
+
+ // Compute W_{4+i}..W_{11+i}.
+ vslidedown.vi VTMP, \w0, 4
+ vslideup.vi VTMP, \w1, 4
+
+ // Do 4 rounds using W_{4+i}..W_{11+i}.
+ vsm3c.vi STATE, VTMP, \i + 2
+ vslidedown.vi VTMP, VTMP, 2
+ vsm3c.vi STATE, VTMP, \i + 3
+
+.if \i < 28
+ // Compute W_{16+i}..W_{23+i}.
+ vsm3me.vv \w0, \w1, \w0
+.endif
+ // For the next 8 rounds, w0 and w1 are swapped.
+.endm
+
+// void sm3_transform_zvksh_zvkb(u32 state[8], const u8 *data, int num_blocks);
+SYM_TYPED_FUNC_START(sm3_transform_zvksh_zvkb)
+
+ // Load the state and endian-swap each 32-bit word.
+ vsetivli zero, 8, e32, m2, ta, ma
+ vle32.v STATE, (STATEP)
+ vrev8.v STATE, STATE
+
+.Lnext_block:
+ addi NUM_BLOCKS, NUM_BLOCKS, -1
+
+ // Save the previous state, as it's needed later.
+ vmv.v.v PREV_STATE, STATE
+
+ // Load the next 512-bit message block into W0-W1.
+ vle32.v W0, (DATA)
+ addi DATA, DATA, 32
+ vle32.v W1, (DATA)
+ addi DATA, DATA, 32
+
+ // Do the 64 rounds of SM3.
+ sm3_8rounds 0, W0, W1
+ sm3_8rounds 4, W1, W0
+ sm3_8rounds 8, W0, W1
+ sm3_8rounds 12, W1, W0
+ sm3_8rounds 16, W0, W1
+ sm3_8rounds 20, W1, W0
+ sm3_8rounds 24, W0, W1
+ sm3_8rounds 28, W1, W0
+
+ // XOR in the previous state.
+ vxor.vv STATE, STATE, PREV_STATE
+
+ // Repeat if more blocks remain.
+ bnez NUM_BLOCKS, .Lnext_block
+
+ // Store the new state and return.
+ vrev8.v STATE, STATE
+ vse32.v STATE, (STATEP)
+ ret
+SYM_FUNC_END(sm3_transform_zvksh_zvkb)
diff --git a/arch/riscv/crypto/sm4-riscv64-glue.c b/arch/riscv/crypto/sm4-riscv64-glue.c
new file mode 100644
index 000000000000..47fb84ebe577
--- /dev/null
+++ b/arch/riscv/crypto/sm4-riscv64-glue.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SM4 using the RISC-V vector crypto extensions
+ *
+ * Copyright (C) 2023 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/internal/cipher.h>
+#include <crypto/internal/simd.h>
+#include <crypto/sm4.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+asmlinkage void sm4_expandkey_zvksed_zvkb(const u8 user_key[SM4_KEY_SIZE],
+ u32 rkey_enc[SM4_RKEY_WORDS],
+ u32 rkey_dec[SM4_RKEY_WORDS]);
+asmlinkage void sm4_crypt_zvksed_zvkb(const u32 rkey[SM4_RKEY_WORDS],
+ const u8 in[SM4_BLOCK_SIZE],
+ u8 out[SM4_BLOCK_SIZE]);
+
+static int riscv64_sm4_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (crypto_simd_usable()) {
+ if (keylen != SM4_KEY_SIZE)
+ return -EINVAL;
+ kernel_vector_begin();
+ sm4_expandkey_zvksed_zvkb(key, ctx->rkey_enc, ctx->rkey_dec);
+ kernel_vector_end();
+ return 0;
+ }
+ return sm4_expandkey(ctx, key, keylen);
+}
+
+static void riscv64_sm4_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (crypto_simd_usable()) {
+ kernel_vector_begin();
+ sm4_crypt_zvksed_zvkb(ctx->rkey_enc, src, dst);
+ kernel_vector_end();
+ } else {
+ sm4_crypt_block(ctx->rkey_enc, dst, src);
+ }
+}
+
+static void riscv64_sm4_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (crypto_simd_usable()) {
+ kernel_vector_begin();
+ sm4_crypt_zvksed_zvkb(ctx->rkey_dec, src, dst);
+ kernel_vector_end();
+ } else {
+ sm4_crypt_block(ctx->rkey_dec, dst, src);
+ }
+}
+
+static struct crypto_alg riscv64_sm4_alg = {
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_priority = 300,
+ .cra_name = "sm4",
+ .cra_driver_name = "sm4-riscv64-zvksed-zvkb",
+ .cra_cipher = {
+ .cia_min_keysize = SM4_KEY_SIZE,
+ .cia_max_keysize = SM4_KEY_SIZE,
+ .cia_setkey = riscv64_sm4_setkey,
+ .cia_encrypt = riscv64_sm4_encrypt,
+ .cia_decrypt = riscv64_sm4_decrypt,
+ },
+ .cra_module = THIS_MODULE,
+};
+
+static int __init riscv64_sm4_mod_init(void)
+{
+ if (riscv_isa_extension_available(NULL, ZVKSED) &&
+ riscv_isa_extension_available(NULL, ZVKB) &&
+ riscv_vector_vlen() >= 128)
+ return crypto_register_alg(&riscv64_sm4_alg);
+
+ return -ENODEV;
+}
+
+static void __exit riscv64_sm4_mod_exit(void)
+{
+ crypto_unregister_alg(&riscv64_sm4_alg);
+}
+
+module_init(riscv64_sm4_mod_init);
+module_exit(riscv64_sm4_mod_exit);
+
+MODULE_DESCRIPTION("SM4 (RISC-V accelerated)");
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("sm4");
diff --git a/arch/riscv/crypto/sm4-riscv64-zvksed-zvkb.S b/arch/riscv/crypto/sm4-riscv64-zvksed-zvkb.S
new file mode 100644
index 000000000000..fae62179a4a3
--- /dev/null
+++ b/arch/riscv/crypto/sm4-riscv64-zvksed-zvkb.S
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector SM4 Block Cipher extension ('Zvksed')
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvksed, +zvkb
+
+// void sm4_expandkey_zksed_zvkb(const u8 user_key[16], u32 rkey_enc[32],
+// u32 rkey_dec[32]);
+SYM_FUNC_START(sm4_expandkey_zvksed_zvkb)
+ vsetivli zero, 4, e32, m1, ta, ma
+
+ // Load the user key.
+ vle32.v v1, (a0)
+ vrev8.v v1, v1
+
+ // XOR the user key with the family key.
+ la t0, FAMILY_KEY
+ vle32.v v2, (t0)
+ vxor.vv v1, v1, v2
+
+ // Compute the round keys. Store them in forwards order in rkey_enc
+ // and in reverse order in rkey_dec.
+ addi a2, a2, 31*4
+ li t0, -4
+ .set i, 0
+.rept 8
+ vsm4k.vi v1, v1, i
+ vse32.v v1, (a1) // Store to rkey_enc.
+ vsse32.v v1, (a2), t0 // Store to rkey_dec.
+.if i < 7
+ addi a1, a1, 16
+ addi a2, a2, -16
+.endif
+ .set i, i + 1
+.endr
+
+ ret
+SYM_FUNC_END(sm4_expandkey_zvksed_zvkb)
+
+// void sm4_crypt_zvksed_zvkb(const u32 rkey[32], const u8 in[16], u8 out[16]);
+SYM_FUNC_START(sm4_crypt_zvksed_zvkb)
+ vsetivli zero, 4, e32, m1, ta, ma
+
+ // Load the input data.
+ vle32.v v1, (a1)
+ vrev8.v v1, v1
+
+ // Do the 32 rounds of SM4, 4 at a time.
+ .set i, 0
+.rept 8
+ vle32.v v2, (a0)
+ vsm4r.vs v1, v2
+.if i < 7
+ addi a0, a0, 16
+.endif
+ .set i, i + 1
+.endr
+
+ // Store the output data (in reverse element order).
+ vrev8.v v1, v1
+ li t0, -4
+ addi a2, a2, 12
+ vsse32.v v1, (a2), t0
+
+ ret
+SYM_FUNC_END(sm4_crypt_zvksed_zvkb)
+
+.section ".rodata"
+.p2align 2
+.type FAMILY_KEY, @object
+FAMILY_KEY:
+ .word 0xA3B1BAC6, 0x56AA3350, 0x677D9197, 0xB27022DC
+.size FAMILY_KEY, . - FAMILY_KEY
diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c
index 17a904869724..f2708a9494a1 100644
--- a/arch/riscv/errata/andes/errata.c
+++ b/arch/riscv/errata/andes/errata.c
@@ -18,9 +18,9 @@
#include <asm/sbi.h>
#include <asm/vendorid_list.h>
-#define ANDESTECH_AX45MP_MARCHID 0x8000000000008a45UL
-#define ANDESTECH_AX45MP_MIMPID 0x500UL
-#define ANDESTECH_SBI_EXT_ANDES 0x0900031E
+#define ANDES_AX45MP_MARCHID 0x8000000000008a45UL
+#define ANDES_AX45MP_MIMPID 0x500UL
+#define ANDES_SBI_EXT_ANDES 0x0900031E
#define ANDES_SBI_EXT_IOCP_SW_WORKAROUND 1
@@ -32,7 +32,7 @@ static long ax45mp_iocp_sw_workaround(void)
* ANDES_SBI_EXT_IOCP_SW_WORKAROUND SBI EXT checks if the IOCP is missing and
* cache is controllable only then CMO will be applied to the platform.
*/
- ret = sbi_ecall(ANDESTECH_SBI_EXT_ANDES, ANDES_SBI_EXT_IOCP_SW_WORKAROUND,
+ ret = sbi_ecall(ANDES_SBI_EXT_ANDES, ANDES_SBI_EXT_IOCP_SW_WORKAROUND,
0, 0, 0, 0, 0, 0);
return ret.error ? 0 : ret.value;
@@ -50,7 +50,7 @@ static void errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigne
done = true;
- if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID)
+ if (arch_id != ANDES_AX45MP_MARCHID || impid != ANDES_AX45MP_MIMPID)
return;
if (!ax45mp_iocp_sw_workaround())
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index b0487b39e674..776354895b81 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -183,6 +183,16 @@
REG_L x31, PT_T6(sp)
.endm
+/* Annotate a function as being unsuitable for kprobes. */
+#ifdef CONFIG_KPROBES
+#define ASM_NOKPROBE(name) \
+ .pushsection "_kprobe_blacklist", "aw"; \
+ RISCV_PTR name; \
+ .popsection
+#else
+#define ASM_NOKPROBE(name)
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_ASM_H */
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 9ffc35537024..c4c2173dfe99 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -22,6 +22,16 @@
#include <asm-generic/bitops/fls.h>
#else
+#define __HAVE_ARCH___FFS
+#define __HAVE_ARCH___FLS
+#define __HAVE_ARCH_FFS
+#define __HAVE_ARCH_FLS
+
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/fls.h>
+
#include <asm/alternative-macros.h>
#include <asm/hwcap.h>
@@ -37,8 +47,6 @@
static __always_inline unsigned long variable__ffs(unsigned long word)
{
- int num;
-
asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
: : : : legacy);
@@ -52,32 +60,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
return word;
legacy:
- num = 0;
-#if BITS_PER_LONG == 64
- if ((word & 0xffffffff) == 0) {
- num += 32;
- word >>= 32;
- }
-#endif
- if ((word & 0xffff) == 0) {
- num += 16;
- word >>= 16;
- }
- if ((word & 0xff) == 0) {
- num += 8;
- word >>= 8;
- }
- if ((word & 0xf) == 0) {
- num += 4;
- word >>= 4;
- }
- if ((word & 0x3) == 0) {
- num += 2;
- word >>= 2;
- }
- if ((word & 0x1) == 0)
- num += 1;
- return num;
+ return generic___ffs(word);
}
/**
@@ -93,8 +76,6 @@ legacy:
static __always_inline unsigned long variable__fls(unsigned long word)
{
- int num;
-
asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
: : : : legacy);
@@ -108,32 +89,7 @@ static __always_inline unsigned long variable__fls(unsigned long word)
return BITS_PER_LONG - 1 - word;
legacy:
- num = BITS_PER_LONG - 1;
-#if BITS_PER_LONG == 64
- if (!(word & (~0ul << 32))) {
- num -= 32;
- word <<= 32;
- }
-#endif
- if (!(word & (~0ul << (BITS_PER_LONG - 16)))) {
- num -= 16;
- word <<= 16;
- }
- if (!(word & (~0ul << (BITS_PER_LONG - 8)))) {
- num -= 8;
- word <<= 8;
- }
- if (!(word & (~0ul << (BITS_PER_LONG - 4)))) {
- num -= 4;
- word <<= 4;
- }
- if (!(word & (~0ul << (BITS_PER_LONG - 2)))) {
- num -= 2;
- word <<= 2;
- }
- if (!(word & (~0ul << (BITS_PER_LONG - 1))))
- num -= 1;
- return num;
+ return generic___fls(word);
}
/**
@@ -149,46 +105,23 @@ legacy:
static __always_inline int variable_ffs(int x)
{
- int r;
-
- if (!x)
- return 0;
-
asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
: : : : legacy);
+ if (!x)
+ return 0;
+
asm volatile (".option push\n"
".option arch,+zbb\n"
CTZW "%0, %1\n"
".option pop\n"
- : "=r" (r) : "r" (x) :);
+ : "=r" (x) : "r" (x) :);
- return r + 1;
+ return x + 1;
legacy:
- r = 1;
- if (!(x & 0xffff)) {
- x >>= 16;
- r += 16;
- }
- if (!(x & 0xff)) {
- x >>= 8;
- r += 8;
- }
- if (!(x & 0xf)) {
- x >>= 4;
- r += 4;
- }
- if (!(x & 3)) {
- x >>= 2;
- r += 2;
- }
- if (!(x & 1)) {
- x >>= 1;
- r += 1;
- }
- return r;
+ return generic_ffs(x);
}
/**
@@ -204,46 +137,23 @@ legacy:
static __always_inline int variable_fls(unsigned int x)
{
- int r;
-
- if (!x)
- return 0;
-
asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
: : : : legacy);
+ if (!x)
+ return 0;
+
asm volatile (".option push\n"
".option arch,+zbb\n"
CLZW "%0, %1\n"
".option pop\n"
- : "=r" (r) : "r" (x) :);
+ : "=r" (x) : "r" (x) :);
- return 32 - r;
+ return 32 - x;
legacy:
- r = 32;
- if (!(x & 0xffff0000u)) {
- x <<= 16;
- r -= 16;
- }
- if (!(x & 0xff000000u)) {
- x <<= 8;
- r -= 8;
- }
- if (!(x & 0xf0000000u)) {
- x <<= 4;
- r -= 4;
- }
- if (!(x & 0xc0000000u)) {
- x <<= 2;
- r -= 2;
- }
- if (!(x & 0x80000000u)) {
- x <<= 1;
- r -= 1;
- }
- return r;
+ return generic_fls(x);
}
/**
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index ea33288f8a25..1f2dbfb8a8bf 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -12,8 +12,8 @@
#include <asm/vendorid_list.h>
#ifdef CONFIG_ERRATA_ANDES
-#define ERRATA_ANDESTECH_NO_IOCP 0
-#define ERRATA_ANDESTECH_NUMBER 1
+#define ERRATA_ANDES_NO_IOCP 0
+#define ERRATA_ANDES_NUMBER 1
#endif
#ifdef CONFIG_ERRATA_SIFIVE
@@ -112,15 +112,6 @@ asm volatile(ALTERNATIVE( \
#define THEAD_C9XX_RV_IRQ_PMU 17
#define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5
-#define ALT_SBI_PMU_OVERFLOW(__ovl) \
-asm volatile(ALTERNATIVE( \
- "csrr %0, " __stringify(CSR_SSCOUNTOVF), \
- "csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \
- THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \
- CONFIG_ERRATA_THEAD_PMU) \
- : "=r" (__ovl) : \
- : "memory")
-
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 5340f818746b..bae7eac76c18 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -80,6 +80,7 @@
#define RISCV_ISA_EXT_ZFA 71
#define RISCV_ISA_EXT_ZTSO 72
#define RISCV_ISA_EXT_ZACAS 73
+#define RISCV_ISA_EXT_XANDESPMU 74
#define RISCV_ISA_EXT_MAX 128
#define RISCV_ISA_EXT_INVALID U32_MAX
diff --git a/arch/riscv/include/asm/membarrier.h b/arch/riscv/include/asm/membarrier.h
new file mode 100644
index 000000000000..47b240d0d596
--- /dev/null
+++ b/arch/riscv/include/asm/membarrier.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_RISCV_MEMBARRIER_H
+#define _ASM_RISCV_MEMBARRIER_H
+
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+ struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ /*
+ * Only need the full barrier when switching between processes.
+ * Barrier when switching from kernel to userspace is not
+ * required here, given that it is implied by mmdrop(). Barrier
+ * when switching from userspace to kernel is not needed after
+ * store to rq->curr.
+ */
+ if (IS_ENABLED(CONFIG_SMP) &&
+ likely(!(atomic_read(&next->membarrier_state) &
+ (MEMBARRIER_STATE_PRIVATE_EXPEDITED |
+ MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
+ return;
+
+ /*
+ * The membarrier system call requires a full memory barrier
+ * after storing to rq->curr, before going back to user-space.
+ *
+ * This barrier is also needed for the SYNC_CORE command when
+ * switching between processes; in particular, on a transition
+ * from a thread belonging to another mm to a thread belonging
+ * to the mm for which a membarrier SYNC_CORE is done on CPU0:
+ *
+ * - [CPU0] sets all bits in the mm icache_stale_mask (in
+ * prepare_sync_core_cmd());
+ *
+ * - [CPU1] stores to rq->curr (by the scheduler);
+ *
+ * - [CPU0] loads rq->curr within membarrier and observes
+ * cpu_rq(1)->curr->mm != mm, so the IPI is skipped on
+ * CPU1; this means membarrier relies on switch_mm() to
+ * issue the sync-core;
+ *
+ * - [CPU1] switch_mm() loads icache_stale_mask; if the bit
+ * is zero, switch_mm() may incorrectly skip the sync-core.
+ *
+ * Matches a full barrier in the proximity of the membarrier
+ * system call entry.
+ */
+ smp_mb();
+}
+
+#endif /* _ASM_RISCV_MEMBARRIER_H */
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index d169a4f41a2e..deaf971253a2 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -95,7 +95,19 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
__pud_free(mm, pud);
}
-#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long addr)
+{
+ if (pgtable_l4_enabled) {
+ struct ptdesc *ptdesc = virt_to_ptdesc(pud);
+
+ pagetable_pud_dtor(ptdesc);
+ if (riscv_use_ipi_for_rfence())
+ tlb_remove_page_ptdesc(tlb, ptdesc);
+ else
+ tlb_remove_ptdesc(tlb, ptdesc);
+ }
+}
#define p4d_alloc_one p4d_alloc_one
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -124,7 +136,16 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
__p4d_free(mm, p4d);
}
-#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
+static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
+ unsigned long addr)
+{
+ if (pgtable_l5_enabled) {
+ if (riscv_use_ipi_for_rfence())
+ tlb_remove_page_ptdesc(tlb, virt_to_ptdesc(p4d));
+ else
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
+ }
+}
#endif /* __PAGETABLE_PMD_FOLDED */
static inline void sync_kernel_mappings(pgd_t *pgd)
@@ -149,15 +170,31 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
#ifndef __PAGETABLE_PMD_FOLDED
-#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+ unsigned long addr)
+{
+ struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
+
+ pagetable_pmd_dtor(ptdesc);
+ if (riscv_use_ipi_for_rfence())
+ tlb_remove_page_ptdesc(tlb, ptdesc);
+ else
+ tlb_remove_ptdesc(tlb, ptdesc);
+}
#endif /* __PAGETABLE_PMD_FOLDED */
-#define __pte_free_tlb(tlb, pte, buf) \
-do { \
- pagetable_pte_dtor(page_ptdesc(pte)); \
- tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\
-} while (0)
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
+ unsigned long addr)
+{
+ struct ptdesc *ptdesc = page_ptdesc(pte);
+
+ pagetable_pte_dtor(ptdesc);
+ if (riscv_use_ipi_for_rfence())
+ tlb_remove_page_ptdesc(tlb, ptdesc);
+ else
+ tlb_remove_ptdesc(tlb, ptdesc);
+}
#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_PGALLOC_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 0c94260b5d0c..3e9177f40c68 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -439,6 +439,12 @@ static inline pte_t pte_mkhuge(pte_t pte)
return pte;
}
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+#define pte_leaf_size(pte) (pte_napot(pte) ? \
+ napot_cont_size(napot_cont_order(pte)) :\
+ PAGE_SIZE)
+#endif
+
#ifdef CONFIG_NUMA_BALANCING
/*
* See the comment in include/asm-generic/pgtable.h
@@ -656,6 +662,12 @@ static inline int pmd_write(pmd_t pmd)
return pte_write(pmd_pte(pmd));
}
+#define pud_write pud_write
+static inline int pud_write(pud_t pud)
+{
+ return pte_write(pud_pte(pud));
+}
+
#define pmd_dirty pmd_dirty
static inline int pmd_dirty(pmd_t pmd)
{
diff --git a/arch/riscv/include/asm/sync_core.h b/arch/riscv/include/asm/sync_core.h
new file mode 100644
index 000000000000..9153016da8f1
--- /dev/null
+++ b/arch/riscv/include/asm/sync_core.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_SYNC_CORE_H
+#define _ASM_RISCV_SYNC_CORE_H
+
+/*
+ * RISC-V implements return to user-space through an xRET instruction,
+ * which is not core serializing.
+ */
+static inline void sync_core_before_usermode(void)
+{
+ asm volatile ("fence.i" ::: "memory");
+}
+
+#ifdef CONFIG_SMP
+/*
+ * Ensure the next switch_mm() on every CPU issues a core serializing
+ * instruction for the given @mm.
+ */
+static inline void prepare_sync_core_cmd(struct mm_struct *mm)
+{
+ cpumask_setall(&mm->context.icache_stale_mask);
+}
+#else
+static inline void prepare_sync_core_cmd(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_SMP */
+
+#endif /* _ASM_RISCV_SYNC_CORE_H */
diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h
index 1eb5682b2af6..a0b8b853503f 100644
--- a/arch/riscv/include/asm/tlb.h
+++ b/arch/riscv/include/asm/tlb.h
@@ -10,6 +10,24 @@ struct mmu_gather;
static void tlb_flush(struct mmu_gather *tlb);
+#ifdef CONFIG_MMU
+#include <linux/swap.h>
+
+/*
+ * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to
+ * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use
+ * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this
+ * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the
+ * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h
+ * for more details.
+ */
+static inline void __tlb_remove_table(void *table)
+{
+ free_page_and_swap_cache(table);
+}
+
+#endif /* CONFIG_MMU */
+
#define tlb_flush tlb_flush
#include <asm-generic/tlb.h>
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 0cd6f0a027d1..731dcd0ed4de 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -284,4 +284,15 @@ static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
#endif /* CONFIG_RISCV_ISA_V */
+/*
+ * Return the implementation's vlen value.
+ *
+ * riscv_v_vsize contains the value of "32 vector registers with vlenb length"
+ * so rebuild the vlen value in bits from it.
+ */
+static inline int riscv_vector_vlen(void)
+{
+ return riscv_v_vsize / 32 * 8;
+}
+
#endif /* ! __ASM_RISCV_VECTOR_H */
diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h
index e55407ace0c3..2f2bb0c84f9a 100644
--- a/arch/riscv/include/asm/vendorid_list.h
+++ b/arch/riscv/include/asm/vendorid_list.h
@@ -5,7 +5,7 @@
#ifndef ASM_VENDOR_LIST_H
#define ASM_VENDOR_LIST_H
-#define ANDESTECH_VENDOR_ID 0x31e
+#define ANDES_VENDOR_ID 0x31e
#define SIFIVE_VENDOR_ID 0x489
#define THEAD_VENDOR_ID 0x5b7
diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h
index 924d01b56c9a..51f6dfe19745 100644
--- a/arch/riscv/include/asm/vmalloc.h
+++ b/arch/riscv/include/asm/vmalloc.h
@@ -19,65 +19,6 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
return true;
}
-#ifdef CONFIG_RISCV_ISA_SVNAPOT
-#include <linux/pgtable.h>
+#endif
-#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size
-static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end,
- u64 pfn, unsigned int max_page_shift)
-{
- unsigned long map_size = PAGE_SIZE;
- unsigned long size, order;
-
- if (!has_svnapot())
- return map_size;
-
- for_each_napot_order_rev(order) {
- if (napot_cont_shift(order) > max_page_shift)
- continue;
-
- size = napot_cont_size(order);
- if (end - addr < size)
- continue;
-
- if (!IS_ALIGNED(addr, size))
- continue;
-
- if (!IS_ALIGNED(PFN_PHYS(pfn), size))
- continue;
-
- map_size = size;
- break;
- }
-
- return map_size;
-}
-
-#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift
-static inline int arch_vmap_pte_supported_shift(unsigned long size)
-{
- int shift = PAGE_SHIFT;
- unsigned long order;
-
- if (!has_svnapot())
- return shift;
-
- WARN_ON_ONCE(size >= PMD_SIZE);
-
- for_each_napot_order_rev(order) {
- if (napot_cont_size(order) > size)
- continue;
-
- if (!IS_ALIGNED(size, napot_cont_size(order)))
- continue;
-
- shift = napot_cont_shift(order);
- break;
- }
-
- return shift;
-}
-
-#endif /* CONFIG_RISCV_ISA_SVNAPOT */
-#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
#endif /* _ASM_RISCV_VMALLOC_H */
diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
index 319a1da0358b..0128b161bfda 100644
--- a/arch/riscv/kernel/alternative.c
+++ b/arch/riscv/kernel/alternative.c
@@ -43,7 +43,7 @@ static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info
switch (cpu_mfr_info->vendor_id) {
#ifdef CONFIG_ERRATA_ANDES
- case ANDESTECH_VENDOR_ID:
+ case ANDES_VENDOR_ID:
cpu_mfr_info->patch_func = andes_errata_patch_func;
break;
#endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 319670af5704..afeae3ff43dc 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -293,6 +293,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
+ __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_EXT_XANDESPMU),
};
const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 9d1a305d5508..68a24cf9481a 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -111,6 +111,7 @@ SYM_CODE_START(handle_exception)
1:
tail do_trap_unknown
SYM_CODE_END(handle_exception)
+ASM_NOKPROBE(handle_exception)
/*
* The ret_from_exception must be called with interrupt disabled. Here is the
@@ -184,6 +185,7 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
sret
#endif
SYM_CODE_END(ret_from_exception)
+ASM_NOKPROBE(ret_from_exception)
#ifdef CONFIG_VMAP_STACK
SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
@@ -219,6 +221,7 @@ SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
move a0, sp
tail handle_bad_stack
SYM_CODE_END(handle_kernel_stack_overflow)
+ASM_NOKPROBE(handle_kernel_stack_overflow)
#endif
SYM_CODE_START(ret_from_fork)
diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile
index 07915dc9279e..b75f150b923d 100644
--- a/arch/riscv/kernel/pi/Makefile
+++ b/arch/riscv/kernel/pi/Makefile
@@ -9,6 +9,9 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
-fno-asynchronous-unwind-tables -fno-unwind-tables \
$(call cc-option,-fno-addrsig)
+# Disable LTO
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
+
KBUILD_CFLAGS += -mcmodel=medany
CFLAGS_cmdline_early.o += -D__NO_FORTIFY
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 519b6bd946e5..cfbe4b840d42 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -28,7 +28,6 @@
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
-#include <asm/cpufeature.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/numa.h>
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index a1b9be3c4332..868d6280cf66 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -6,6 +6,7 @@
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/randomize_kstack.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/signal.h>
@@ -310,7 +311,8 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
}
}
-asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
+asmlinkage __visible __trap_section __no_stack_protector
+void do_trap_ecall_u(struct pt_regs *regs)
{
if (user_mode(regs)) {
long syscall = regs->a7;
@@ -322,10 +324,23 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
syscall = syscall_enter_from_user_mode(regs, syscall);
+ add_random_kstack_offset();
+
if (syscall >= 0 && syscall < NR_syscalls)
syscall_handler(regs, syscall);
else if (syscall != -1)
regs->a0 = -ENOSYS;
+ /*
+ * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+ * so the maximum stack offset is 1k bytes (10 bits).
+ *
+ * The actual entropy will be further reduced by the compiler when
+ * applying stack alignment constraints: 16-byte (i.e. 4-bit) aligned
+ * for RV32I or RV64I.
+ *
+ * The resulting 6 bits of entropy is seen in SP[9:4].
+ */
+ choose_random_kstack_offset(get_random_u16());
syscall_exit_to_user_mode(regs);
} else {
diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
index 51ab5588e9ff..7c45f26de4f7 100644
--- a/arch/riscv/lib/uaccess_vector.S
+++ b/arch/riscv/lib/uaccess_vector.S
@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
-#include <asm-generic/export.h>
#include <asm/asm.h>
#include <asm/asm-extable.h>
#include <asm/csr.h>
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 217fd4de6134..ba8eb3944687 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -323,6 +323,8 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
if (unlikely(prev == next))
return;
+ membarrier_arch_switch_mm(prev, next, task);
+
/*
* Mark the current MM context as inactive, and the next as
* active. This is at least used by the icache flushing
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 32cad6a65ccd..c55915554836 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -767,6 +767,11 @@ static int __init print_no5lvl(char *p)
}
early_param("no5lvl", print_no5lvl);
+static void __init set_mmap_rnd_bits_max(void)
+{
+ mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3;
+}
+
/*
* There is a simple way to determine if 4-level is supported by the
* underlying hardware: establish 1:1 mapping in 4-level page table mode
@@ -1081,6 +1086,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
set_satp_mode(dtb_pa);
+ set_mmap_rnd_bits_max();
#endif
/*