170 files changed, 12701 insertions, 2444 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index df3f3da95990..46806332a8cc 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -342,6 +342,10 @@ config DECOMPRESS_LZ4
 	select LZ4_DECOMPRESS
 	tristate
 
+config DECOMPRESS_ZSTD
+	select ZSTD_DECOMPRESS
+	tristate
+
 #
 # Generic allocator support is selected if needed
 #
@@ -631,7 +635,12 @@ config UACCESS_MEMCPY
 config ARCH_HAS_UACCESS_FLUSHCACHE
 	bool
 
-config ARCH_HAS_UACCESS_MCSAFE
+# arch has a concept of a recoverable synchronous exception due to a
+# memory-read error like x86 machine-check or ARM data-abort, and
+# implements copy_mc_to_{user,kernel} to abort and report
+# 'bytes-transferred' if that exception fires when accessing the source
+# buffer.
+config ARCH_HAS_COPY_MC
 	bool
 
 # Temporary. Goes away when all archs are cleaned up
@@ -676,3 +685,10 @@ config GENERIC_LIB_CMPDI2
 
 config GENERIC_LIB_UCMPDI2
 	bool
+
+config GENERIC_LIB_DEVMEM_IS_ALLOWED
+	bool
+
+config PLDMFW
+	bool
+	default n
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9ad9210d70a1..e6e58b26e888 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -212,9 +212,10 @@ config DEBUG_INFO
 
 	  If unsure, say N.
 
+if DEBUG_INFO
+
 config DEBUG_INFO_REDUCED
 	bool "Reduce debugging information"
-	depends on DEBUG_INFO
 	help
 	  If you say Y here gcc is instructed to generate less debugging
 	  information for structure types. This means that tools that
@@ -227,7 +228,6 @@ config DEBUG_INFO_REDUCED
 
 config DEBUG_INFO_COMPRESSED
 	bool "Compressed debugging information"
-	depends on DEBUG_INFO
 	depends on $(cc-option,-gz=zlib)
 	depends on $(ld-option,--compress-debug-sections=zlib)
 	help
@@ -243,7 +243,6 @@ config DEBUG_INFO_COMPRESSED
 
 config DEBUG_INFO_SPLIT
 	bool "Produce split debuginfo in .dwo files"
-	depends on DEBUG_INFO
 	depends on $(cc-option,-gsplit-dwarf)
 	help
 	  Generate debug info into separate .dwo files. This significantly
@@ -259,7 +258,6 @@ config DEBUG_INFO_SPLIT
 
 config DEBUG_INFO_DWARF4
 	bool "Generate dwarf4 debuginfo"
-	depends on DEBUG_INFO
 	depends on $(cc-option,-gdwarf-4)
 	help
 	  Generate dwarf4 debug info. This requires recent versions
@@ -269,7 +267,6 @@ config DEBUG_INFO_DWARF4
 
 config DEBUG_INFO_BTF
 	bool "Generate BTF typeinfo"
-	depends on DEBUG_INFO
 	depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED
 	depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST
 	help
@@ -277,9 +274,17 @@ config DEBUG_INFO_BTF
 	  Turning this on expects presence of pahole tool, which will convert
 	  DWARF type info into equivalent deduplicated BTF type info.
 
+config PAHOLE_HAS_SPLIT_BTF
+	def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119")
+
+config DEBUG_INFO_BTF_MODULES
+	def_bool y
+	depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
+	help
+	  Generate compact split BTF type information for kernel modules.
+
 config GDB_SCRIPTS
 	bool "Provide GDB scripts for kernel debugging"
-	depends on DEBUG_INFO
 	help
 	  This creates the required links to GDB helper scripts in the
 	  build directory. If you load vmlinux into gdb, the helper
@@ -288,6 +293,8 @@ config GDB_SCRIPTS
 	  instance. See Documentation/dev-tools/gdb-kernel-debugging.rst
 	  for further details.
 
+endif # DEBUG_INFO
+
 config ENABLE_MUST_CHECK
 	bool "Enable __must_check logic"
 	default y
@@ -365,6 +372,17 @@ config SECTION_MISMATCH_WARN_ONLY
 
 	  If unsure, say Y.
 
+config DEBUG_FORCE_FUNCTION_ALIGN_32B
+	bool "Force all function address 32B aligned" if EXPERT
+	help
+	  There are cases that a commit from one domain changes the function
+	  address alignment of other domains, and cause magic performance
+	  bump (regression or improvement). Enable this option will help to
+	  verify if the bump is caused by function alignment changes, while
+	  it will slightly increase the kernel size and affect icache usage.
+
+	  It is mainly for debug and performance tuning use.
+
 #
 # Select this config option from the architecture Kconfig, if it
 # is preferred to always offer frame pointers as a config
@@ -476,9 +494,41 @@ config DEBUG_FS
 
 	  If unsure, say N.
 
-source "lib/Kconfig.kgdb"
+choice
+	prompt "Debugfs default access"
+	depends on DEBUG_FS
+	default DEBUG_FS_ALLOW_ALL
+	help
+	  This selects the default access restrictions for debugfs.
+	  It can be overridden with kernel command line option
+	  debugfs=[on,no-mount,off]. The restrictions apply for API access
+	  and filesystem registration.
 
+config DEBUG_FS_ALLOW_ALL
+	bool "Access normal"
+	help
+	  No restrictions apply. Both API and filesystem registration
+	  is on. This is the normal default operation.
+
+config DEBUG_FS_DISALLOW_MOUNT
+	bool "Do not register debugfs as filesystem"
+	help
+	  The API is open but filesystem is not loaded. Clients can still do
+	  their work and read with debug tools that do not need
+	  debugfs filesystem.
+
+config DEBUG_FS_ALLOW_NONE
+	bool "No access"
+	help
+	  Access is off. Clients get -PERM when trying to create nodes in
+	  debugfs tree and debugfs is not registered as a filesystem.
+	  Client can then back-off or continue without debugfs access.
+
+endchoice
+
+source "lib/Kconfig.kgdb"
 source "lib/Kconfig.ubsan"
+source "lib/Kconfig.kcsan"
 
 endmenu
 
@@ -808,9 +858,31 @@ config DEBUG_PER_CPU_MAPS
 
 	  Say N if unsure.
 
+config DEBUG_KMAP_LOCAL
+	bool "Debug kmap_local temporary mappings"
+	depends on DEBUG_KERNEL && KMAP_LOCAL
+	help
+	  This option enables additional error checking for the kmap_local
+	  infrastructure.  Disable for production use.
+
+config ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP
+	bool
+
+config DEBUG_KMAP_LOCAL_FORCE_MAP
+	bool "Enforce kmap_local temporary mappings"
+	depends on DEBUG_KERNEL && ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP
+	select KMAP_LOCAL
+	select DEBUG_KMAP_LOCAL
+	help
+	  This option enforces temporary mappings through the kmap_local
+	  mechanism for non-highmem pages and on non-highmem systems.
+	  Disable this for production systems!
+
 config DEBUG_HIGHMEM
 	bool "Highmem debugging"
 	depends on DEBUG_KERNEL && HIGHMEM
+	select DEBUG_KMAP_LOCAL_FORCE_MAP if ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP
+	select DEBUG_KMAP_LOCAL
 	help
 	  This option enables additional error checking for high memory
 	  systems.  Disable for production systems.
@@ -844,10 +916,10 @@ config DEBUG_SHIRQ
 	bool "Debug shared IRQ handlers"
 	depends on DEBUG_KERNEL
 	help
-	  Enable this to generate a spurious interrupt as soon as a shared
-	  interrupt handler is registered, and just before one is deregistered.
-	  Drivers ought to be able to handle interrupts coming in at those
-	  points; some don't and need to be caught.
+	  Enable this to generate a spurious interrupt just before a shared
+	  interrupt handler is deregistered (generating one when registering
+	  is currently disabled). Drivers need to handle this correctly. Some
+	  don't and need to be caught.
 
 menu "Debug Oops, Lockups and Hangs"
 
@@ -874,7 +946,7 @@ config PANIC_TIMEOUT
 	int "panic timeout"
 	default 0
 	help
-	  Set the timeout value (in seconds) until a reboot occurs when the
+	  Set the timeout value (in seconds) until a reboot occurs when
 	  the kernel panics. If n = 0, then we wait forever. A timeout
 	  value n > 0 will wait n seconds before rebooting, while a timeout
 	  value n < 0 will reboot immediately.
@@ -1035,6 +1107,7 @@ config WQ_WATCHDOG
 
 config TEST_LOCKUP
 	tristate "Test module to generate lockups"
+	depends on m
 	help
 	  This builds the "test_lockup" module that helps to make sure
 	  that watchdogs and lockup detectors are working properly.
@@ -1117,6 +1190,7 @@ config PROVE_LOCKING
 	select DEBUG_RWSEMS
 	select DEBUG_WW_MUTEX_SLOWPATH
 	select DEBUG_LOCK_ALLOC
+	select PREEMPT_COUNT if !ARCH_NO_PREEMPT
 	select TRACE_IRQFLAGS
 	default n
 	help
@@ -1322,14 +1396,41 @@ config WW_MUTEX_SELFTEST
 	  Say M if you want these self tests to build as a module.
 	  Say N if you are unsure.
 
+config SCF_TORTURE_TEST
+	tristate "torture tests for smp_call_function*()"
+	depends on DEBUG_KERNEL
+	select TORTURE_TEST
+	help
+	  This option provides a kernel module that runs torture tests
+	  on the smp_call_function() family of primitives.  The kernel
+	  module may be built after the fact on the running kernel to
+	  be tested, if desired.
+
+config CSD_LOCK_WAIT_DEBUG
+	bool "Debugging for csd_lock_wait(), called from smp_call_function*()"
+	depends on DEBUG_KERNEL
+	depends on 64BIT
+	default n
+	help
+	  This option enables debug prints when CPUs are slow to respond
+	  to the smp_call_function*() IPI wrappers.  These debug prints
+	  include the IPI handler function currently executing (if any)
+	  and relevant stack traces.
+
 endmenu # lock debugging
 
 config TRACE_IRQFLAGS
+	depends on TRACE_IRQFLAGS_SUPPORT
 	bool
 	help
 	  Enables hooks to interrupt enabling and disabling for
 	  either tracing or lock debugging.
 
+config TRACE_IRQFLAGS_NMI
+	def_bool y
+	depends on TRACE_IRQFLAGS
+	depends on TRACE_IRQFLAGS_NMI_SUPPORT
+
 config STACKTRACE
 	bool "Stack backtrace support"
 	depends on STACKTRACE_SUPPORT
@@ -1569,15 +1670,13 @@ config PROVIDE_OHCI1394_DMA_INIT
 
 source "samples/Kconfig"
 
-source "lib/Kconfig.kcsan"
-
 config ARCH_HAS_DEVMEM_IS_ALLOWED
 	bool
 
 config STRICT_DEVMEM
 	bool "Filter access to /dev/mem"
 	depends on MMU && DEVMEM
-	depends on ARCH_HAS_DEVMEM_IS_ALLOWED
+	depends on ARCH_HAS_DEVMEM_IS_ALLOWED || GENERIC_LIB_DEVMEM_IS_ALLOWED
 	default y if PPC || X86 || ARM64
 	help
 	  If this option is disabled, you allow userspace (root) access to all
@@ -1719,6 +1818,13 @@ config FAIL_PAGE_ALLOC
 	help
 	  Provide fault-injection capability for alloc_pages().
 
+config FAULT_INJECTION_USERCOPY
+	bool "Fault injection capability for usercopy functions"
+	depends on FAULT_INJECTION
+	help
+	  Provides fault-injection capability to inject failures
+	  in usercopy functions (copy_from_user(), get_user(), ...).
+
 config FAIL_MAKE_REQUEST
 	bool "Fault-injection capability for disk IO"
 	depends on FAULT_INJECTION && BLOCK
@@ -1986,13 +2092,6 @@ config TEST_BITMAP
 
 	  If unsure, say N.
 
-config TEST_BITFIELD
-	tristate "Test bitfield functions at runtime"
-	help
-	  Enable this option to test the bitfield functions at boot.
-
-	  If unsure, say N.
-
 config TEST_UUID
 	tristate "Test functions located in the uuid module at runtime"
 
@@ -2142,6 +2241,33 @@ config TEST_SYSCTL
 
 	  If unsure, say N.
 
+config BITFIELD_KUNIT
+	tristate "KUnit test bitfield functions at runtime"
+	depends on KUNIT
+	help
+	  Enable this option to test the bitfield functions at boot.
+
+	  KUnit tests run during boot and output the results to the debug log
+	  in TAP format (http://testanything.org/). Only useful for kernel devs
+	  running the KUnit test harness, and not intended for inclusion into a
+	  production build.
+
+	  For more information on KUnit and unit tests in general please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  If unsure, say N.
+
+config RESOURCE_KUNIT_TEST
+	tristate "KUnit test for resource API"
+	depends on KUNIT
+	help
+	  This builds the resource API unit test.
+	  Tests the logic of API provided by resource.c and ioport.h.
+	  For more information on KUnit and unit tests in general please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  If unsure, say N.
+
 config SYSCTL_KUNIT_TEST
 	tristate "KUnit test for sysctl" if !KUNIT_ALL_TESTS
 	depends on KUNIT
@@ -2164,7 +2290,7 @@ config LIST_KUNIT_TEST
 	  and associated macros.
 
 	  KUnit tests run during boot and output the results to the debug log
-	  in TAP format (http://testanything.org/). Only useful for kernel devs
+	  in TAP format (https://testanything.org/). Only useful for kernel devs
 	  running the KUnit test harness, and not intended for inclusion into a
 	  production build.
 
@@ -2185,6 +2311,28 @@ config LINEAR_RANGES_TEST
 
 	  If unsure, say N.
 
+config CMDLINE_KUNIT_TEST
+	tristate "KUnit test for cmdline API"
+	depends on KUNIT
+	help
+	  This builds the cmdline API unit test.
+	  Tests the logic of API provided by cmdline.c.
+	  For more information on KUnit and unit tests in general please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  If unsure, say N.
+
+config BITS_TEST
+	tristate "KUnit test for bits.h"
+	depends on KUNIT
+	help
+	  This builds the bits unit test.
+	  Tests the logic of macros defined in bits.h.
+	  For more information on KUnit and unit tests in general please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  If unsure, say N.
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	help
@@ -2307,6 +2455,26 @@ config TEST_HMM
 
 	  If unsure, say N.
 
+config TEST_FREE_PAGES
+	tristate "Test freeing pages"
+	help
+	  Test that a memory leak does not occur due to a race between
+	  freeing a block of pages and a speculative page reference.
+	  Loading this module is safe if your kernel has the bug fixed.
+	  If the bug is not fixed, it will leak gigabytes of memory and
+	  probably OOM your system.
+
+config TEST_FPU
+	tristate "Test floating point operations in kernel space"
+	depends on X86 && !KCOV_INSTRUMENT_ALL
+	help
+	  Enable this option to add /sys/kernel/debug/selftest_helpers/test_fpu
+	  which will trigger a sequence of floating point operations. This is used
+	  for self-testing floating point control register setting in
+	  kernel_fpu_begin().
+
+	  If unsure, say N.
+
 endif # RUNTIME_TESTING_MENU
 
 config MEMTEST
@@ -2331,4 +2499,6 @@ config HYPERV_TESTING
 
 endmenu # "Kernel Testing and Coverage"
 
+source "Documentation/Kconfig"
+
 endmenu # Kernel hacking
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 81f5464ea9e1..f5fa4ba126bf 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -6,7 +6,10 @@ config HAVE_ARCH_KASAN
 config HAVE_ARCH_KASAN_SW_TAGS
 	bool
 
-config	HAVE_ARCH_KASAN_VMALLOC
+config HAVE_ARCH_KASAN_HW_TAGS
+	bool
+
+config HAVE_ARCH_KASAN_VMALLOC
 	bool
 
 config CC_HAS_KASAN_GENERIC
@@ -15,76 +18,105 @@ config CC_HAS_KASAN_GENERIC
 config CC_HAS_KASAN_SW_TAGS
 	def_bool $(cc-option, -fsanitize=kernel-hwaddress)
 
-config KASAN
+# This option is only required for software KASAN modes.
+# Old GCC versions don't have proper support for no_sanitize_address.
+# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89124 for details.
+config CC_HAS_WORKING_NOSANITIZE_ADDRESS
+	def_bool !CC_IS_GCC || GCC_VERSION >= 80300
+
+menuconfig KASAN
 	bool "KASAN: runtime memory debugger"
-	depends on (HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC) || \
-		   (HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)
+	depends on (((HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC) || \
+		     (HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
+		    CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
+		   HAVE_ARCH_KASAN_HW_TAGS
 	depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
+	select STACKDEPOT
 	help
 	  Enables KASAN (KernelAddressSANitizer) - runtime memory debugger,
 	  designed to find out-of-bounds accesses and use-after-free bugs.
 	  See Documentation/dev-tools/kasan.rst for details.
 
+if KASAN
+
 choice
 	prompt "KASAN mode"
-	depends on KASAN
 	default KASAN_GENERIC
 	help
-	  KASAN has two modes: generic KASAN (similar to userspace ASan,
-	  x86_64/arm64/xtensa, enabled with CONFIG_KASAN_GENERIC) and
-	  software tag-based KASAN (a version based on software memory
-	  tagging, arm64 only, similar to userspace HWASan, enabled with
-	  CONFIG_KASAN_SW_TAGS).
-	  Both generic and tag-based KASAN are strictly debugging features.
+	  KASAN has three modes:
+	  1. generic KASAN (similar to userspace ASan,
+	     x86_64/arm64/xtensa, enabled with CONFIG_KASAN_GENERIC),
+	  2. software tag-based KASAN (arm64 only, based on software
+	     memory tagging (similar to userspace HWASan), enabled with
+	     CONFIG_KASAN_SW_TAGS), and
+	  3. hardware tag-based KASAN (arm64 only, based on hardware
+	     memory tagging, enabled with CONFIG_KASAN_HW_TAGS).
+
+	  All KASAN modes are strictly debugging features.
+
+	  For better error reports enable CONFIG_STACKTRACE.
 
 config KASAN_GENERIC
 	bool "Generic mode"
 	depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC
-	depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
 	select SLUB_DEBUG if SLUB
 	select CONSTRUCTORS
-	select STACKDEPOT
 	help
 	  Enables generic KASAN mode.
-	  Supported in both GCC and Clang. With GCC it requires version 4.9.2
-	  or later for basic support and version 5.0 or later for detection of
-	  out-of-bounds accesses for stack and global variables and for inline
-	  instrumentation mode (CONFIG_KASAN_INLINE). With Clang it requires
-	  version 3.7.0 or later and it doesn't support detection of
-	  out-of-bounds accesses for global variables yet.
+
+	  This mode is supported in both GCC and Clang. With GCC it requires
+	  version 8.3.0 or later. Any supported Clang version is compatible,
+	  but detection of out-of-bounds accesses for global variables is
+	  supported only since Clang 11.
+
 	  This mode consumes about 1/8th of available memory at kernel start
 	  and introduces an overhead of ~x1.5 for the rest of the allocations.
 	  The performance slowdown is ~x3.
-	  For better error detection enable CONFIG_STACKTRACE.
+
 	  Currently CONFIG_KASAN_GENERIC doesn't work with CONFIG_DEBUG_SLAB
 	  (the resulting kernel does not boot).
 
 config KASAN_SW_TAGS
 	bool "Software tag-based mode"
 	depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS
-	depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
 	select SLUB_DEBUG if SLUB
 	select CONSTRUCTORS
-	select STACKDEPOT
 	help
 	  Enables software tag-based KASAN mode.
-	  This mode requires Top Byte Ignore support by the CPU and therefore
-	  is only supported for arm64.
-	  This mode requires Clang version 7.0.0 or later.
+
+	  This mode require software memory tagging support in the form of
+	  HWASan-like compiler instrumentation.
+
+	  Currently this mode is only implemented for arm64 CPUs and relies on
+	  Top Byte Ignore. This mode requires Clang.
+
 	  This mode consumes about 1/16th of available memory at kernel start
 	  and introduces an overhead of ~20% for the rest of the allocations.
 	  This mode may potentially introduce problems relating to pointer
 	  casting and comparison, as it embeds tags into the top byte of each
 	  pointer.
-	  For better error detection enable CONFIG_STACKTRACE.
+
 	  Currently CONFIG_KASAN_SW_TAGS doesn't work with CONFIG_DEBUG_SLAB
 	  (the resulting kernel does not boot).
 
+config KASAN_HW_TAGS
+	bool "Hardware tag-based mode"
+	depends on HAVE_ARCH_KASAN_HW_TAGS
+	depends on SLUB
+	help
+	  Enables hardware tag-based KASAN mode.
+
+	  This mode requires hardware memory tagging support, and can be used
+	  by any architecture that provides it.
+
+	  Currently this mode is only implemented for arm64 CPUs starting from
+	  ARMv8.5 and relies on Memory Tagging Extension and Top Byte Ignore.
+
 endchoice
 
 choice
 	prompt "Instrumentation type"
-	depends on KASAN
+	depends on KASAN_GENERIC || KASAN_SW_TAGS
 	default KASAN_OUTLINE
 
 config KASAN_OUTLINE
@@ -103,13 +135,12 @@ config KASAN_INLINE
 	  memory accesses. This is faster than outline (in some workloads
 	  it gives about x2 boost over outline instrumentation), but
 	  make kernel's .text size much bigger.
-	  For CONFIG_KASAN_GENERIC this requires GCC 5.0 or later.
 
 endchoice
 
 config KASAN_STACK_ENABLE
 	bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST
-	depends on KASAN
+	depends on KASAN_GENERIC || KASAN_SW_TAGS
 	help
 	  The LLVM stack address sanitizer has a know problem that
 	  causes excessive stack usage in a lot of functions, see
@@ -128,15 +159,6 @@ config KASAN_STACK
 	default 1 if KASAN_STACK_ENABLE || CC_IS_GCC
 	default 0
 
-config KASAN_S390_4_LEVEL_PAGING
-	bool "KASan: use 4-level paging"
-	depends on KASAN && S390
-	help
-	  Compiling the kernel with KASan disables automatic 3-level vs
-	  4-level paging selection. 3-level paging is used by default (up
-	  to 3TB of RAM with KASan enabled). This options allows to force
-	  4-level paging instead.
-
 config KASAN_SW_TAGS_IDENTIFY
 	bool "Enable memory corruption identification"
 	depends on KASAN_SW_TAGS
@@ -147,7 +169,7 @@ config KASAN_SW_TAGS_IDENTIFY
 
 config KASAN_VMALLOC
 	bool "Back mappings in vmalloc space with real shadow memory"
-	depends on KASAN && HAVE_ARCH_KASAN_VMALLOC
+	depends on KASAN_GENERIC && HAVE_ARCH_KASAN_VMALLOC
 	help
 	  By default, the shadow region for vmalloc space is the read-only
 	  zero page. This means that KASAN cannot detect errors involving
@@ -158,10 +180,24 @@ config KASAN_VMALLOC
 	  for KASAN to detect more sorts of errors (and to support vmapped
 	  stacks), but at the cost of higher memory usage.
 
-config TEST_KASAN
-	tristate "Module for testing KASAN for bug detection"
-	depends on m && KASAN
+config KASAN_KUNIT_TEST
+	tristate "KUnit-compatible tests of KASAN bug detection capabilities" if !KUNIT_ALL_TESTS
+	depends on KASAN && KUNIT
+	default KUNIT_ALL_TESTS
 	help
-	  This is a test module doing various nasty things like
-	  out of bounds accesses, use after free. It is useful for testing
+	  This is a KUnit test suite doing various nasty things like
+	  out of bounds and use after free accesses. It is useful for testing
 	  kernel debugging features like KASAN.
+
+	  For more information on KUnit and unit tests in general, please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit
+
+config TEST_KASAN_MODULE
+	tristate "KUnit-incompatible tests of KASAN bug detection capabilities"
+	depends on m && KASAN
+	help
+	  This is a part of the KASAN test suite that is incompatible with
+	  KUnit. Currently includes tests that do bad copy_from/to_user
+	  accesses.
+
+endif # KASAN
diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
index 5ee88e5119c2..f271ff5fbb5a 100644
--- a/lib/Kconfig.kcsan
+++ b/lib/Kconfig.kcsan
@@ -4,7 +4,8 @@ config HAVE_ARCH_KCSAN
 	bool
 
 config HAVE_KCSAN_COMPILER
-	def_bool CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-distinguish-volatile=1)
+	def_bool (CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-distinguish-volatile=1)) || \
+		 (CC_IS_GCC && $(cc-option,-fsanitize=thread --param tsan-distinguish-volatile=1))
 	help
 	  For the list of compilers that support KCSAN, please see
 	  <file:Documentation/dev-tools/kcsan.rst>.
@@ -39,6 +40,11 @@ menuconfig KCSAN
 
 if KCSAN
 
+# Compiler capabilities that should not fail the test if they are unavailable.
+config CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE
+	def_bool (CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-compound-read-before-write=1)) || \
+		 (CC_IS_GCC && $(cc-option,-fsanitize=thread --param tsan-compound-read-before-write=1))
+
 config KCSAN_VERBOSE
 	bool "Show verbose reports with more information about system state"
 	depends on PROVE_LOCKING
@@ -59,7 +65,28 @@ config KCSAN_SELFTEST
 	bool "Perform short selftests on boot"
 	default y
 	help
-	  Run KCSAN selftests on boot. On test failure, causes the kernel to panic.
+	  Run KCSAN selftests on boot. On test failure, causes the kernel to
+	  panic. Recommended to be enabled, ensuring critical functionality
+	  works as intended.
+
+config KCSAN_TEST
+	tristate "KCSAN test for integrated runtime behaviour"
+	depends on TRACEPOINTS && KUNIT
+	select TORTURE_TEST
+	help
+	  KCSAN test focusing on behaviour of the integrated runtime. Tests
+	  various race scenarios, and verifies the reports generated to
+	  console. Makes use of KUnit for test organization, and the Torture
+	  framework for test thread control.
+
+	  Each test case may run at least up to KCSAN_REPORT_ONCE_IN_MS
+	  milliseconds. Test run duration may be optimized by building the
+	  kernel and KCSAN test with KCSAN_REPORT_ONCE_IN_MS set to a lower
+	  than default value.
+
+	  Say Y here if you want the test to be built into the kernel and run
+	  during boot; say M if you want the test to build as a module; say N
+	  if you are unsure.
 
 config KCSAN_EARLY_ENABLE
 	bool "Early enable during boot"
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index ffa7a76de086..05dae05b6cc9 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -3,6 +3,11 @@
 config HAVE_ARCH_KGDB
 	bool
 
+# set if architecture has the its kgdb_arch_handle_qxfer_pkt
+# function to enable gdb stub to address XML packet sent from GDB.
+config HAVE_ARCH_KGDB_QXFER_PKT
+	bool
+
 menuconfig KGDB
 	bool "KGDB: kernel debugger"
 	depends on HAVE_ARCH_KGDB
@@ -19,6 +24,21 @@ menuconfig KGDB
 
 if KGDB
 
+config KGDB_HONOUR_BLOCKLIST
+	bool "KGDB: use kprobe blocklist to prohibit unsafe breakpoints"
+	depends on HAVE_KPROBES
+	depends on MODULES
+	select KPROBES
+	default y
+	help
+	  If set to Y the debug core will use the kprobe blocklist to
+	  identify symbols where it is unsafe to set breakpoints.
+	  In particular this disallows instrumentation of functions
+	  called during debug trap handling and thus makes it very
+	  difficult to inadvertently provoke recursive trap handling.
+
+	  If unsure, say Y.
+
 config KGDB_SERIAL_CONSOLE
 	tristate "KGDB: use kgdb over the serial console"
 	select CONSOLE_POLL
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 774315de555a..8b635fd75fe4 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -14,6 +14,7 @@ if UBSAN
 
 config UBSAN_TRAP
 	bool "On Sanitizer warnings, abort the running kernel code"
+	depends on !COMPILE_TEST
 	depends on $(cc-option, -fsanitize-undefined-trap-on-error)
 	help
 	  Building kernels with Sanitizer features enabled tends to grow
@@ -36,10 +37,17 @@ config UBSAN_KCOV_BROKEN
 	  See https://bugs.llvm.org/show_bug.cgi?id=45831 for the status
 	  in newer releases.
 
+config CC_HAS_UBSAN_BOUNDS
+	def_bool $(cc-option,-fsanitize=bounds)
+
+config CC_HAS_UBSAN_ARRAY_BOUNDS
+	def_bool $(cc-option,-fsanitize=array-bounds)
+
 config UBSAN_BOUNDS
 	bool "Perform array index bounds checking"
 	default UBSAN
 	depends on !UBSAN_KCOV_BROKEN
+	depends on CC_HAS_UBSAN_ARRAY_BOUNDS || CC_HAS_UBSAN_BOUNDS
 	help
 	  This option enables detection of directly indexed out of bounds
 	  array accesses, where the array size is known at compile time.
@@ -47,22 +55,121 @@ config UBSAN_BOUNDS
 	  to the {str,mem}*cpy() family of functions (that is addressed
 	  by CONFIG_FORTIFY_SOURCE).
 
-config UBSAN_MISC
-	bool "Enable all other Undefined Behavior sanity checks"
+config UBSAN_ONLY_BOUNDS
+	def_bool CC_HAS_UBSAN_BOUNDS && !CC_HAS_UBSAN_ARRAY_BOUNDS
+	depends on UBSAN_BOUNDS
+	help
+	  This is a weird case: Clang's -fsanitize=bounds includes
+	  -fsanitize=local-bounds, but it's trapping-only, so for
+	  Clang, we must use -fsanitize=array-bounds when we want
+	  traditional array bounds checking enabled. For GCC, we
+	  want -fsanitize=bounds.
+
+config UBSAN_ARRAY_BOUNDS
+	def_bool CC_HAS_UBSAN_ARRAY_BOUNDS
+	depends on UBSAN_BOUNDS
+
+config UBSAN_LOCAL_BOUNDS
+	bool "Perform array local bounds checking"
+	depends on UBSAN_TRAP
+	depends on !UBSAN_KCOV_BROKEN
+	depends on $(cc-option,-fsanitize=local-bounds)
+	help
+	  This option enables -fsanitize=local-bounds which traps when an
+	  exception/error is detected. Therefore, it may only be enabled
+	  with CONFIG_UBSAN_TRAP.
+
+	  Enabling this option detects errors due to accesses through a
+	  pointer that is derived from an object of a statically-known size,
+	  where an added offset (which may not be known statically) is
+	  out-of-bounds.
+
+config UBSAN_SHIFT
+	bool "Perform checking for bit-shift overflows"
+	default UBSAN
+	depends on $(cc-option,-fsanitize=shift)
+	help
+	  This option enables -fsanitize=shift which checks for bit-shift
+	  operations that overflow to the left or go switch to negative
+	  for signed types.
+
+config UBSAN_DIV_ZERO
+	bool "Perform checking for integer divide-by-zero"
+	depends on $(cc-option,-fsanitize=integer-divide-by-zero)
+	help
+	  This option enables -fsanitize=integer-divide-by-zero which checks
+	  for integer division by zero. This is effectively redundant with the
+	  kernel's existing exception handling, though it can provide greater
+	  debugging information under CONFIG_UBSAN_REPORT_FULL.
+
+config UBSAN_UNREACHABLE
+	bool "Perform checking for unreachable code"
+	# objtool already handles unreachable checking and gets angry about
+	# seeing UBSan instrumentation located in unreachable places.
+	depends on !STACK_VALIDATION
+	depends on $(cc-option,-fsanitize=unreachable)
+	help
+	  This option enables -fsanitize=unreachable which checks for control
+	  flow reaching an expected-to-be-unreachable position.
+
+config UBSAN_SIGNED_OVERFLOW
+	bool "Perform checking for signed arithmetic overflow"
+	default UBSAN
+	depends on $(cc-option,-fsanitize=signed-integer-overflow)
+	help
+	  This option enables -fsanitize=signed-integer-overflow which checks
+	  for overflow of any arithmetic operations with signed integers.
+
+config UBSAN_UNSIGNED_OVERFLOW
+	bool "Perform checking for unsigned arithmetic overflow"
+	depends on $(cc-option,-fsanitize=unsigned-integer-overflow)
+	help
+	  This option enables -fsanitize=unsigned-integer-overflow which checks
+	  for overflow of any arithmetic operations with unsigned integers. This
+	  currently causes x86 to fail to boot.
+
+config UBSAN_OBJECT_SIZE
+	bool "Perform checking for accesses beyond the end of objects"
 	default UBSAN
+	# gcc hugely expands stack usage with -fsanitize=object-size
+	# https://lore.kernel.org/lkml/CAHk-=wjPasyJrDuwDnpHJS2TuQfExwe=px-SzLeN8GFMAQJPmQ@mail.gmail.com/
+	depends on !CC_IS_GCC
+	depends on $(cc-option,-fsanitize=object-size)
 	help
-	  This option enables all sanity checks that don't have their
-	  own Kconfig options. Disable this if you only want to have
-	  individually selected checks.
+	  This option enables -fsanitize=object-size which checks for accesses
+	  beyond the end of objects where the optimizer can determine both the
+	  object being operated on and its size, usually seen with bad downcasts,
+	  or access to struct members from NULL pointers.
+
+config UBSAN_BOOL
+	bool "Perform checking for non-boolean values used as boolean"
+	default UBSAN
+	depends on $(cc-option,-fsanitize=bool)
+	help
+	  This option enables -fsanitize=bool which checks for boolean values being
+	  loaded that are neither 0 nor 1.
+
+config UBSAN_ENUM
+	bool "Perform checking for out of bounds enum values"
+	default UBSAN
+	depends on $(cc-option,-fsanitize=enum)
+	help
+	  This option enables -fsanitize=enum which checks for values being loaded
+	  into an enum that are outside the range of given values for the given enum.
+
+config UBSAN_ALIGNMENT
+	bool "Perform checking for misaligned pointer usage"
+	default !HAVE_EFFICIENT_UNALIGNED_ACCESS
+	depends on !UBSAN_TRAP && !COMPILE_TEST
+	depends on $(cc-option,-fsanitize=alignment)
+	help
+	  This option enables the check of unaligned memory accesses.
+	  Enabling this option on architectures that support unaligned
+	  accesses may produce a lot of false positives.
 
 config UBSAN_SANITIZE_ALL
 	bool "Enable instrumentation for the entire kernel"
 	depends on ARCH_HAS_UBSAN_SANITIZE_ALL
-
-	# We build with -Wno-maybe-uninitilzed, but we still want to
-	# use -Wmaybe-uninitilized in allmodconfig builds.
-	# So dependsy bellow used to disable this option in allmodconfig
-	depends on !COMPILE_TEST
 	default y
 	help
 	  This option activates instrumentation for the entire kernel.
@@ -71,15 +178,6 @@ config UBSAN_SANITIZE_ALL
 	  Enabling this option will get kernel image size increased
 	  significantly.
 
-config UBSAN_ALIGNMENT
-	bool "Enable checks for pointers alignment"
-	default !HAVE_EFFICIENT_UNALIGNED_ACCESS
-	depends on !UBSAN_TRAP
-	help
-	  This option enables the check of unaligned memory accesses.
-	  Enabling this option on architectures that support unaligned
-	  accesses may produce a lot of false positives.
-
 config TEST_UBSAN
 	tristate "Module for testing for undefined behavior detection"
 	depends on m
diff --git a/lib/Makefile b/lib/Makefile
index b1c42c10073b..afeff05fa8c5 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -3,10 +3,7 @@
 # Makefile for some libs needed in the kernel.
 #
 
-ifdef CONFIG_FUNCTION_TRACER
-ORIG_CFLAGS := $(KBUILD_CFLAGS)
-KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
-endif
+ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
 
 # These files are disabled because they produce lots of non-interesting and/or
 # flaky coverage that is not a function of syscall inputs. For example,
@@ -18,11 +15,16 @@ KCOV_INSTRUMENT_debugobjects.o := n
 KCOV_INSTRUMENT_dynamic_debug.o := n
 KCOV_INSTRUMENT_fault-inject.o := n
 
+# string.o implements standard library functions like memset/memcpy etc.
+# Use -ffreestanding to ensure that the compiler does not try to "optimize"
+# them into calls to themselves.
+CFLAGS_string.o := -ffreestanding
+
 # Early boot use of cmdline, don't instrument it
 ifdef CONFIG_AMD_MEM_ENCRYPT
 KASAN_SANITIZE_string.o := n
 
-CFLAGS_string.o := $(call cc-option, -fno-stack-protector)
+CFLAGS_string.o += -fno-stack-protector
 endif
 
 # Used by KCSAN while enabled, avoid recursion.
@@ -37,7 +39,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o
 
 lib-$(CONFIG_PRINTK) += dump_stack.o
-lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 
 lib-y	+= kobject.o klist.o
@@ -64,9 +65,11 @@ CFLAGS_test_bitops.o += -Werror
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
 obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
 obj-$(CONFIG_TEST_IDA) += test_ida.o
-obj-$(CONFIG_TEST_KASAN) += test_kasan.o
+obj-$(CONFIG_KASAN_KUNIT_TEST) += test_kasan.o
 CFLAGS_test_kasan.o += -fno-builtin
 CFLAGS_test_kasan.o += $(call cc-disable-warning, vla)
+obj-$(CONFIG_TEST_KASAN_MODULE) += test_kasan_module.o
+CFLAGS_test_kasan_module.o += -fno-builtin
 obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
 CFLAGS_test_ubsan.o += $(call cc-disable-warning, vla)
 UBSAN_SANITIZE_test_ubsan.o := y
@@ -84,7 +87,6 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
 obj-$(CONFIG_TEST_PRINTF) += test_printf.o
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
 obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
-obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
 obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
@@ -98,6 +100,32 @@ obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o
 obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o
 obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o
 obj-$(CONFIG_TEST_HMM) += test_hmm.o
+obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o
+
+#
+# CFLAGS for compiling floating point code inside the kernel. x86/Makefile turns
+# off the generation of FPU/SSE* instructions for kernel proper but FPU_FLAGS
+# get appended last to CFLAGS and thus override those previous compiler options.
+#
+FPU_CFLAGS := -msse -msse2
+ifdef CONFIG_CC_IS_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
+#
+# The "-msse" in the first argument is there so that the
+# -mpreferred-stack-boundary=3 build error:
+#
+#  -mpreferred-stack-boundary=3 is not between 4 and 12
+#
+# can be triggered. Otherwise gcc doesn't complain.
+FPU_CFLAGS += -mhard-float
+FPU_CFLAGS += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4)
+endif
+
+obj-$(CONFIG_TEST_FPU) += test_fpu.o
+CFLAGS_test_fpu.o += $(FPU_CFLAGS)
 
 obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
 
@@ -170,6 +198,7 @@ lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
 lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o
 lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o
 lib-$(CONFIG_DECOMPRESS_LZ4) += decompress_unlz4.o
+lib-$(CONFIG_DECOMPRESS_ZSTD) += decompress_unzstd.o
 
 obj-$(CONFIG_TEXTSEARCH) += textsearch.o
 obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
@@ -181,6 +210,7 @@ obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
 
 obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
+obj-$(CONFIG_FAULT_INJECTION_USERCOPY) += fault-inject-usercopy.o
 obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
 obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
 obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
@@ -300,7 +330,7 @@ endif
 UBSAN_SANITIZE_ubsan.o := n
 KASAN_SANITIZE_ubsan.o := n
 KCSAN_SANITIZE_ubsan.o := n
-CFLAGS_ubsan.o := $(call cc-option, -fno-stack-protector) $(DISABLE_STACKLEAK_PLUGIN)
+CFLAGS_ubsan.o := -fno-stack-protector $(DISABLE_STACKLEAK_PLUGIN)
 
 obj-$(CONFIG_SBITMAP) += sbitmap.o
 
@@ -315,6 +345,14 @@ obj-$(CONFIG_GENERIC_LIB_CMPDI2) += cmpdi2.o
 obj-$(CONFIG_GENERIC_LIB_UCMPDI2) += ucmpdi2.o
 obj-$(CONFIG_OBJAGG) += objagg.o
 
+# pldmfw library
+obj-$(CONFIG_PLDMFW) += pldmfw/
+
 # KUnit tests
+obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o
 obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
+obj-$(CONFIG_BITS_TEST) += test_bits.o
+obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o
+
+obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index 58f72b25f8e9..13da529e2e72 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -381,7 +381,7 @@ next_op:
 	case ASN1_OP_END_SET_ACT:
 		if (unlikely(!(flags & FLAG_MATCHED)))
 			goto tag_mismatch;
-		/* fall through */
+		fallthrough;
 
 	case ASN1_OP_END_SEQ:
 	case ASN1_OP_END_SET_OF:
@@ -448,7 +448,7 @@ next_op:
 			pc += asn1_op_lengths[op];
 			goto next_op;
 		}
-		/* fall through */
+		fallthrough;
 
 	case ASN1_OP_ACT:
 		ret = actions[machine[pc + 1]](context, hdr, tag, data + tdp, len);
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
index 6f4bcf524554..04c98799c3ba 100644
--- a/lib/assoc_array.c
+++ b/lib/assoc_array.c
@@ -1113,7 +1113,7 @@ struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
 						index_key))
 				goto found_leaf;
 		}
-		/* fall through */
+		fallthrough;
 	case assoc_array_walk_tree_empty:
 	case assoc_array_walk_found_wrong_shortcut:
 	default:
diff --git a/lib/test_bitfield.c b/lib/bitfield_kunit.c
index 5b8f4108662d..1473d8b4bf0f 100644
--- a/lib/test_bitfield.c
+++ b/lib/bitfield_kunit.c
@@ -5,8 +5,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/kernel.h>
-#include <linux/module.h>
+#include <kunit/test.h>
 #include <linux/bitfield.h>
 
 #define CHECK_ENC_GET_U(tp, v, field, res) do {				\
@@ -14,13 +13,11 @@
 			u##tp _res;					\
 									\
 			_res = u##tp##_encode_bits(v, field);		\
-			if (_res != res) {				\
-				pr_warn("u" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != " #res "\n",\
-					(u64)_res);			\
-				return -EINVAL;				\
-			}						\
-			if (u##tp##_get_bits(_res, field) != v)		\
-				return -EINVAL;				\
+			KUNIT_ASSERT_FALSE_MSG(context, _res != res,	\
+				       "u" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != " #res "\n",	\
+				       (u64)_res);			\
+			KUNIT_ASSERT_FALSE(context,			\
+				   u##tp##_get_bits(_res, field) != v);	\
 		}							\
 	} while (0)
 
@@ -29,14 +26,13 @@
 			__le##tp _res;					\
 									\
 			_res = le##tp##_encode_bits(v, field);		\
-			if (_res != cpu_to_le##tp(res)) {		\
-				pr_warn("le" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\
-					(u64)le##tp##_to_cpu(_res),	\
-					(u64)(res));			\
-				return -EINVAL;				\
-			}						\
-			if (le##tp##_get_bits(_res, field) != v)	\
-				return -EINVAL;				\
+			KUNIT_ASSERT_FALSE_MSG(context,			\
+				       _res != cpu_to_le##tp(res),	\
+				       "le" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx",\
+				       (u64)le##tp##_to_cpu(_res),	\
+				       (u64)(res));			\
+			KUNIT_ASSERT_FALSE(context,			\
+				   le##tp##_get_bits(_res, field) != v);\
 		}							\
 	} while (0)
 
@@ -45,14 +41,13 @@
 			__be##tp _res;					\
 									\
 			_res = be##tp##_encode_bits(v, field);		\
-			if (_res != cpu_to_be##tp(res)) {		\
-				pr_warn("be" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\
-					(u64)be##tp##_to_cpu(_res),	\
-					(u64)(res));			\
-				return -EINVAL;				\
-			}						\
-			if (be##tp##_get_bits(_res, field) != v)	\
-				return -EINVAL;				\
+			KUNIT_ASSERT_FALSE_MSG(context,			\
+				       _res != cpu_to_be##tp(res),	\
+				       "be" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx",	\
+				       (u64)be##tp##_to_cpu(_res),	\
+				       (u64)(res));			\
+			KUNIT_ASSERT_FALSE(context,			\
+				   be##tp##_get_bits(_res, field) != v);\
 		}							\
 	} while (0)
 
@@ -62,7 +57,7 @@
 		CHECK_ENC_GET_BE(tp, v, field, res);			\
 	} while (0)
 
-static int test_constants(void)
+static void __init test_bitfields_constants(struct kunit *context)
 {
 	/*
 	 * NOTE
@@ -95,19 +90,17 @@ static int test_constants(void)
 	CHECK_ENC_GET(64,  7, 0x00f0000000000000ull, 0x0070000000000000ull);
 	CHECK_ENC_GET(64, 14, 0x0f00000000000000ull, 0x0e00000000000000ull);
 	CHECK_ENC_GET(64, 15, 0xf000000000000000ull, 0xf000000000000000ull);
-
-	return 0;
 }
 
 #define CHECK(tp, mask) do {						\
 		u64 v;							\
 									\
 		for (v = 0; v < 1 << hweight32(mask); v++)		\
-			if (tp##_encode_bits(v, mask) != v << __ffs64(mask)) \
-				return -EINVAL;				\
+			KUNIT_ASSERT_FALSE(context,			\
+				tp##_encode_bits(v, mask) != v << __ffs64(mask));\
 	} while (0)
 
-static int test_variables(void)
+static void __init test_bitfields_variables(struct kunit *context)
 {
 	CHECK(u8, 0x0f);
 	CHECK(u8, 0xf0);
@@ -130,39 +123,32 @@ static int test_variables(void)
 	CHECK(u64, 0x000000007f000000ull);
 	CHECK(u64, 0x0000000018000000ull);
 	CHECK(u64, 0x0000001f8000000ull);
-
-	return 0;
 }
 
-static int __init test_bitfields(void)
-{
-	int ret = test_constants();
-
-	if (ret) {
-		pr_warn("constant tests failed!\n");
-		return ret;
-	}
-
-	ret = test_variables();
-	if (ret) {
-		pr_warn("variable tests failed!\n");
-		return ret;
-	}
-
 #ifdef TEST_BITFIELD_COMPILE
+static void __init test_bitfields_compile(struct kunit *context)
+{
 	/* these should fail compilation */
 	CHECK_ENC_GET(16, 16, 0x0f00, 0x1000);
 	u32_encode_bits(7, 0x06000000);
 
 	/* this should at least give a warning */
 	u16_encode_bits(0, 0x60000);
+}
 #endif
 
-	pr_info("tests passed\n");
+static struct kunit_case __refdata bitfields_test_cases[] = {
+	KUNIT_CASE(test_bitfields_constants),
+	KUNIT_CASE(test_bitfields_variables),
+	{}
+};
 
-	return 0;
-}
-module_init(test_bitfields)
+static struct kunit_suite bitfields_test_suite = {
+	.name = "bitfields",
+	.test_cases = bitfields_test_cases,
+};
+
+kunit_test_suites(&bitfields_test_suite);
 
 MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
 MODULE_LICENSE("GPL");
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 0364452b1617..75006c4036e9 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -23,7 +23,7 @@
 /**
  * DOC: bitmap introduction
  *
- * bitmaps provide an array of bits, implemented using an an
+ * bitmaps provide an array of bits, implemented using an
  * array of unsigned longs.  The number of valid bits in a
  * given bitmap does _not_ need to be an exact multiple of
  * BITS_PER_LONG.
@@ -212,13 +212,13 @@ void bitmap_cut(unsigned long *dst, const unsigned long *src,
 	unsigned long keep = 0, carry;
 	int i;
 
-	memmove(dst, src, len * sizeof(*dst));
-
 	if (first % BITS_PER_LONG) {
 		keep = src[first / BITS_PER_LONG] &
 		       (~0UL >> (BITS_PER_LONG - first % BITS_PER_LONG));
 	}
 
+	memmove(dst, src, len * sizeof(*dst));
+
 	while (cut--) {
 		for (i = first / BITS_PER_LONG; i < len; i++) {
 			if (i < len - 1)
@@ -552,7 +552,7 @@ static inline bool end_of_region(char c)
 }
 
 /*
- * The format allows commas and whitespases at the beginning
+ * The format allows commas and whitespaces at the beginning
  * of the region.
  */
 static const char *bitmap_find_region(const char *str)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 912ef4921398..9f8c70a98fcf 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -31,6 +31,8 @@ static size_t xbc_data_size __initdata;
 static struct xbc_node *last_parent __initdata;
 static const char *xbc_err_msg __initdata;
 static int xbc_err_pos __initdata;
+static int open_brace[XBC_DEPTH_MAX] __initdata;
+static int brace_index __initdata;
 
 static int __init xbc_parse_error(const char *msg, const char *p)
 {
@@ -329,22 +331,30 @@ const char * __init xbc_node_find_next_key_value(struct xbc_node *root,
 
 /* XBC parse and tree build */
 
+static int __init xbc_init_node(struct xbc_node *node, char *data, u32 flag)
+{
+	unsigned long offset = data - xbc_data;
+
+	if (WARN_ON(offset >= XBC_DATA_MAX))
+		return -EINVAL;
+
+	node->data = (u16)offset | flag;
+	node->child = 0;
+	node->next = 0;
+
+	return 0;
+}
+
 static struct xbc_node * __init xbc_add_node(char *data, u32 flag)
 {
 	struct xbc_node *node;
-	unsigned long offset;
 
 	if (xbc_node_num == XBC_NODE_MAX)
 		return NULL;
 
 	node = &xbc_nodes[xbc_node_num++];
-	offset = data - xbc_data;
-	node->data = (u16)offset;
-	if (WARN_ON(offset >= XBC_DATA_MAX))
+	if (xbc_init_node(node, data, flag) < 0)
 		return NULL;
-	node->data |= flag;
-	node->child = 0;
-	node->next = 0;
 
 	return node;
 }
@@ -423,27 +433,27 @@ static char *skip_spaces_until_newline(char *p)
 	return p;
 }
 
-static int __init __xbc_open_brace(void)
+static int __init __xbc_open_brace(char *p)
 {
-	/* Mark the last key as open brace */
-	last_parent->next = XBC_NODE_MAX;
+	/* Push the last key as open brace */
+	open_brace[brace_index++] = xbc_node_index(last_parent);
+	if (brace_index >= XBC_DEPTH_MAX)
+		return xbc_parse_error("Exceed max depth of braces", p);
 
 	return 0;
 }
 
 static int __init __xbc_close_brace(char *p)
 {
-	struct xbc_node *node;
-
-	if (!last_parent || last_parent->next != XBC_NODE_MAX)
+	brace_index--;
+	if (!last_parent || brace_index < 0 ||
+	    (open_brace[brace_index] != xbc_node_index(last_parent)))
 		return xbc_parse_error("Unexpected closing brace", p);
 
-	node = last_parent;
-	node->next = 0;
-	do {
-		node = xbc_node_get_parent(node);
-	} while (node && node->next != XBC_NODE_MAX);
-	last_parent = node;
+	if (brace_index == 0)
+		last_parent = NULL;
+	else
+		last_parent = &xbc_nodes[open_brace[brace_index - 1]];
 
 	return 0;
 }
@@ -484,8 +494,8 @@ static int __init __xbc_parse_value(char **__v, char **__n)
 			break;
 		}
 		if (strchr(",;\n#}", c)) {
-			v = strim(v);
 			*p++ = '\0';
+			v = strim(v);
 			break;
 		}
 	}
@@ -603,7 +613,9 @@ static int __init xbc_parse_kv(char **k, char *v, int op)
 	if (c < 0)
 		return c;
 
-	if (!xbc_add_sibling(v, XBC_VALUE))
+	if (op == ':' && child) {
+		xbc_init_node(child, v, XBC_VALUE);
+	} else if (!xbc_add_sibling(v, XBC_VALUE))
 		return -ENOMEM;
 
 	if (c == ',') {	/* Array */
@@ -651,7 +663,7 @@ static int __init xbc_open_brace(char **k, char *n)
 		return ret;
 	*k = n;
 
-	return __xbc_open_brace();
+	return __xbc_open_brace(n - 1);
 }
 
 static int __init xbc_close_brace(char **k, char *n)
@@ -671,6 +683,13 @@ static int __init xbc_verify_tree(void)
 	int i, depth, len, wlen;
 	struct xbc_node *n, *m;
 
+	/* Brace closing */
+	if (brace_index) {
+		n = &xbc_nodes[open_brace[brace_index]];
+		return xbc_parse_error("Brace is not closed",
+					xbc_node_get_data(n));
+	}
+
 	/* Empty tree */
 	if (xbc_node_num == 0) {
 		xbc_parse_error("Empty config", xbc_data);
@@ -735,6 +754,7 @@ void __init xbc_destroy_all(void)
 	xbc_node_num = 0;
 	memblock_free(__pa(xbc_nodes), sizeof(struct xbc_node) * XBC_NODE_MAX);
 	xbc_nodes = NULL;
+	brace_index = 0;
 }
 
 /**
@@ -787,7 +807,7 @@ int __init xbc_init(char *buf, const char **emsg, int *epos)
 
 	p = buf;
 	do {
-		q = strpbrk(p, "{}=+;\n#");
+		q = strpbrk(p, "{}=+;:\n#");
 		if (!q) {
 			p = skip_spaces(p);
 			if (*p != '\0')
@@ -798,13 +818,16 @@ int __init xbc_init(char *buf, const char **emsg, int *epos)
 		c = *q;
 		*q++ = '\0';
 		switch (c) {
+		case ':':
 		case '+':
 			if (*q++ != '=') {
-				ret = xbc_parse_error("Wrong '+' operator",
+				ret = xbc_parse_error(c == '+' ?
+						"Wrong '+' operator" :
+						"Wrong ':' operator",
 							q - 2);
 				break;
 			}
-			/* Fall through */
+			fallthrough;
 		case '=':
 			ret = xbc_parse_kv(&p, q, c);
 			break;
@@ -813,7 +836,7 @@ int __init xbc_init(char *buf, const char **emsg, int *epos)
 			break;
 		case '#':
 			q = skip_comment(q);
-			/* fall through */
+			fallthrough;
 		case ';':
 		case '\n':
 			ret = xbc_parse_key(&p, q);
diff --git a/lib/checksum.c b/lib/checksum.c
index 7ac65a0000ff..6860d6b05a17 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -145,17 +145,6 @@ __sum16 ip_compute_csum(const void *buff, int len)
 }
 EXPORT_SYMBOL(ip_compute_csum);
 
-/*
- * copy from ds while checksumming, otherwise like csum_partial
- */
-__wsum
-csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
-{
-	memcpy(dst, src, len);
-	return csum_partial(dst, len, sum);
-}
-EXPORT_SYMBOL(csum_partial_copy);
-
 #ifndef csum_tcpudp_nofold
 static inline u32 from64to32(u64 x)
 {
diff --git a/lib/cmdline.c b/lib/cmdline.c
index fbb9981a04a4..b390dd03363b 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -35,25 +35,37 @@ static int get_range(char **str, int *pint, int n)
 /**
  *	get_option - Parse integer from an option string
  *	@str: option string
- *	@pint: (output) integer value parsed from @str
+ *	@pint: (optional output) integer value parsed from @str
  *
  *	Read an int from an option string; if available accept a subsequent
  *	comma as well.
  *
+ *	When @pint is NULL the function can be used as a validator of
+ *	the current option in the string.
+ *
  *	Return values:
  *	0 - no int in string
  *	1 - int found, no subsequent comma
  *	2 - int found including a subsequent comma
  *	3 - hyphen found to denote a range
+ *
+ *	Leading hyphen without integer is no integer case, but we consume it
+ *	for the sake of simplification.
  */
 
 int get_option(char **str, int *pint)
 {
 	char *cur = *str;
+	int value;
 
 	if (!cur || !(*cur))
 		return 0;
-	*pint = simple_strtol(cur, str, 0);
+	if (*cur == '-')
+		value = -simple_strtoull(++cur, str, 0);
+	else
+		value = simple_strtoull(cur, str, 0);
+	if (pint)
+		*pint = value;
 	if (cur == *str)
 		return 0;
 	if (**str == ',') {
@@ -132,27 +144,28 @@ unsigned long long memparse(const char *ptr, char **retptr)
 	case 'E':
 	case 'e':
 		ret <<= 10;
-		/* fall through */
+		fallthrough;
 	case 'P':
 	case 'p':
 		ret <<= 10;
-		/* fall through */
+		fallthrough;
 	case 'T':
 	case 't':
 		ret <<= 10;
-		/* fall through */
+		fallthrough;
 	case 'G':
 	case 'g':
 		ret <<= 10;
-		/* fall through */
+		fallthrough;
 	case 'M':
 	case 'm':
 		ret <<= 10;
-		/* fall through */
+		fallthrough;
 	case 'K':
 	case 'k':
 		ret <<= 10;
 		endptr++;
+		fallthrough;
 	default:
 		break;
 	}
diff --git a/lib/cmdline_kunit.c b/lib/cmdline_kunit.c
new file mode 100644
index 000000000000..550e7a47fd24
--- /dev/null
+++ b/lib/cmdline_kunit.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Test cases for API provided by cmdline.c
+ */
+
+#include <kunit/test.h>
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/string.h>
+
+static const char *cmdline_test_strings[] = {
+	"\"\"", ""  , "=" , "\"-", ","    , "-,"   , ",-"   , "-" ,
+	"+,"  , "--", ",,", "''" , "\"\",", "\",\"", "-\"\"", "\"",
+};
+
+static const int cmdline_test_values[] = {
+	1, 1, 1, 1, 2, 3, 2, 3,
+	1, 3, 2, 1, 1, 1, 3, 1,
+};
+
+static void cmdline_do_one_test(struct kunit *test, const char *in, int rc, int offset)
+{
+	const char *fmt = "Pattern: %s";
+	const char *out = in;
+	int dummy;
+	int ret;
+
+	ret = get_option((char **)&out, &dummy);
+
+	KUNIT_EXPECT_EQ_MSG(test, ret, rc, fmt, in);
+	KUNIT_EXPECT_PTR_EQ_MSG(test, out, in + offset, fmt, in);
+}
+
+static void cmdline_test_noint(struct kunit *test)
+{
+	unsigned int i = 0;
+
+	do {
+		const char *str = cmdline_test_strings[i];
+		int rc = 0;
+		int offset;
+
+		/* Only first and leading '-' will advance the pointer */
+		offset = !!(*str == '-');
+		cmdline_do_one_test(test, str, rc, offset);
+	} while (++i < ARRAY_SIZE(cmdline_test_strings));
+}
+
+static void cmdline_test_lead_int(struct kunit *test)
+{
+	unsigned int i = 0;
+	char in[32];
+
+	do {
+		const char *str = cmdline_test_strings[i];
+		int rc = cmdline_test_values[i];
+		int offset;
+
+		sprintf(in, "%u%s", get_random_int() % 256, str);
+		/* Only first '-' after the number will advance the pointer */
+		offset = strlen(in) - strlen(str) + !!(rc == 2);
+		cmdline_do_one_test(test, in, rc, offset);
+	} while (++i < ARRAY_SIZE(cmdline_test_strings));
+}
+
+static void cmdline_test_tail_int(struct kunit *test)
+{
+	unsigned int i = 0;
+	char in[32];
+
+	do {
+		const char *str = cmdline_test_strings[i];
+		/* When "" or "-" the result will be valid integer */
+		int rc = strcmp(str, "") ? (strcmp(str, "-") ? 0 : 1) : 1;
+		int offset;
+
+		sprintf(in, "%s%u", str, get_random_int() % 256);
+		/*
+		 * Only first and leading '-' not followed by integer
+		 * will advance the pointer.
+		 */
+		offset = rc ? strlen(in) : !!(*str == '-');
+		cmdline_do_one_test(test, in, rc, offset);
+	} while (++i < ARRAY_SIZE(cmdline_test_strings));
+}
+
+static struct kunit_case cmdline_test_cases[] = {
+	KUNIT_CASE(cmdline_test_noint),
+	KUNIT_CASE(cmdline_test_lead_int),
+	KUNIT_CASE(cmdline_test_tail_int),
+	{}
+};
+
+static struct kunit_suite cmdline_test_suite = {
+	.name = "cmdline",
+	.test_cases = cmdline_test_cases,
+};
+kunit_test_suite(cmdline_test_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/cpumask.c b/lib/cpumask.c
index fb22fb266f93..35924025097b 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -6,6 +6,7 @@
 #include <linux/export.h>
 #include <linux/memblock.h>
 #include <linux/numa.h>
+#include <linux/sched/isolation.h>
 
 /**
  * cpumask_next - get the next cpu in a cpumask
@@ -205,22 +206,27 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
  */
 unsigned int cpumask_local_spread(unsigned int i, int node)
 {
-	int cpu;
+	int cpu, hk_flags;
+	const struct cpumask *mask;
 
+	hk_flags = HK_FLAG_DOMAIN | HK_FLAG_MANAGED_IRQ;
+	mask = housekeeping_cpumask(hk_flags);
 	/* Wrap: we always want a cpu. */
-	i %= num_online_cpus();
+	i %= cpumask_weight(mask);
 
 	if (node == NUMA_NO_NODE) {
-		for_each_cpu(cpu, cpu_online_mask)
+		for_each_cpu(cpu, mask) {
 			if (i-- == 0)
 				return cpu;
+		}
 	} else {
 		/* NUMA first. */
-		for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask)
+		for_each_cpu_and(cpu, cpumask_of_node(node), mask) {
 			if (i-- == 0)
 				return cpu;
+		}
 
-		for_each_cpu(cpu, cpu_online_mask) {
+		for_each_cpu(cpu, mask) {
 			/* Skip NUMA nodes, done above. */
 			if (cpumask_test_cpu(cpu, cpumask_of_node(node)))
 				continue;
@@ -261,3 +267,21 @@ int cpumask_any_and_distribute(const struct cpumask *src1p,
 	return next;
 }
 EXPORT_SYMBOL(cpumask_any_and_distribute);
+
+int cpumask_any_distribute(const struct cpumask *srcp)
+{
+	int next, prev;
+
+	/* NOTE: our first selection will skip 0. */
+	prev = __this_cpu_read(distribute_cpu_mask_prev);
+
+	next = cpumask_next(prev, srcp);
+	if (next >= nr_cpu_ids)
+		next = cpumask_first(srcp);
+
+	if (next < nr_cpu_ids)
+		__this_cpu_write(distribute_cpu_mask_prev, next);
+
+	return next;
+}
+EXPORT_SYMBOL(cpumask_any_distribute);
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index 8cc01a603416..1ed2ed487097 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -17,64 +17,69 @@
 #include <linux/notifier.h>
 
 static struct crypto_shash __rcu *crct10dif_tfm;
-static struct static_key crct10dif_fallback __read_mostly;
+static DEFINE_STATIC_KEY_TRUE(crct10dif_fallback);
 static DEFINE_MUTEX(crc_t10dif_mutex);
+static struct work_struct crct10dif_rehash_work;
 
-static int crc_t10dif_rehash(struct notifier_block *self, unsigned long val, void *data)
+static int crc_t10dif_notify(struct notifier_block *self, unsigned long val, void *data)
 {
 	struct crypto_alg *alg = data;
-	struct crypto_shash *new, *old;
 
 	if (val != CRYPTO_MSG_ALG_LOADED ||
-	    static_key_false(&crct10dif_fallback) ||
-	    strncmp(alg->cra_name, CRC_T10DIF_STRING, strlen(CRC_T10DIF_STRING)))
-		return 0;
+	    strcmp(alg->cra_name, CRC_T10DIF_STRING))
+		return NOTIFY_DONE;
+
+	schedule_work(&crct10dif_rehash_work);
+	return NOTIFY_OK;
+}
+
+static void crc_t10dif_rehash(struct work_struct *work)
+{
+	struct crypto_shash *new, *old;
 
 	mutex_lock(&crc_t10dif_mutex);
 	old = rcu_dereference_protected(crct10dif_tfm,
 					lockdep_is_held(&crc_t10dif_mutex));
-	if (!old) {
-		mutex_unlock(&crc_t10dif_mutex);
-		return 0;
-	}
-	new = crypto_alloc_shash("crct10dif", 0, 0);
+	new = crypto_alloc_shash(CRC_T10DIF_STRING, 0, 0);
 	if (IS_ERR(new)) {
 		mutex_unlock(&crc_t10dif_mutex);
-		return 0;
+		return;
 	}
 	rcu_assign_pointer(crct10dif_tfm, new);
 	mutex_unlock(&crc_t10dif_mutex);
 
-	synchronize_rcu();
-	crypto_free_shash(old);
-	return 0;
+	if (old) {
+		synchronize_rcu();
+		crypto_free_shash(old);
+	} else {
+		static_branch_disable(&crct10dif_fallback);
+	}
 }
 
 static struct notifier_block crc_t10dif_nb = {
-	.notifier_call = crc_t10dif_rehash,
+	.notifier_call = crc_t10dif_notify,
 };
 
 __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
 {
 	struct {
 		struct shash_desc shash;
-		char ctx[2];
+		__u16 crc;
 	} desc;
 	int err;
 
-	if (static_key_false(&crct10dif_fallback))
+	if (static_branch_unlikely(&crct10dif_fallback))
 		return crc_t10dif_generic(crc, buffer, len);
 
 	rcu_read_lock();
 	desc.shash.tfm = rcu_dereference(crct10dif_tfm);
-	*(__u16 *)desc.ctx = crc;
-
+	desc.crc = crc;
 	err = crypto_shash_update(&desc.shash, buffer, len);
 	rcu_read_unlock();
 
 	BUG_ON(err);
 
-	return *(__u16 *)desc.ctx;
+	return desc.crc;
 }
 EXPORT_SYMBOL(crc_t10dif_update);
 
@@ -86,19 +91,17 @@ EXPORT_SYMBOL(crc_t10dif);
 
 static int __init crc_t10dif_mod_init(void)
 {
+	INIT_WORK(&crct10dif_rehash_work, crc_t10dif_rehash);
 	crypto_register_notifier(&crc_t10dif_nb);
-	crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
-	if (IS_ERR(crct10dif_tfm)) {
-		static_key_slow_inc(&crct10dif_fallback);
-		crct10dif_tfm = NULL;
-	}
+	crc_t10dif_rehash(&crct10dif_rehash_work);
 	return 0;
 }
 
 static void __exit crc_t10dif_mod_fini(void)
 {
 	crypto_unregister_notifier(&crc_t10dif_nb);
-	crypto_free_shash(crct10dif_tfm);
+	cancel_work_sync(&crct10dif_rehash_work);
+	crypto_free_shash(rcu_dereference_protected(crct10dif_tfm, 1));
 }
 
 module_init(crc_t10dif_mod_init);
@@ -106,15 +109,23 @@ module_exit(crc_t10dif_mod_fini);
 
 static int crc_t10dif_transform_show(char *buffer, const struct kernel_param *kp)
 {
-	if (static_key_false(&crct10dif_fallback))
+	struct crypto_shash *tfm;
+	int len;
+
+	if (static_branch_unlikely(&crct10dif_fallback))
 		return sprintf(buffer, "fallback\n");
 
-	return sprintf(buffer, "%s\n",
-		crypto_tfm_alg_driver_name(crypto_shash_tfm(crct10dif_tfm)));
+	rcu_read_lock();
+	tfm = rcu_dereference(crct10dif_tfm);
+	len = snprintf(buffer, PAGE_SIZE, "%s\n",
+		       crypto_shash_driver_name(tfm));
+	rcu_read_unlock();
+
+	return len;
 }
 
-module_param_call(transform, NULL, crc_t10dif_transform_show, NULL, 0644);
+module_param_call(transform, NULL, crc_t10dif_transform_show, NULL, 0444);
 
-MODULE_DESCRIPTION("T10 DIF CRC calculation");
+MODULE_DESCRIPTION("T10 DIF CRC calculation (library API)");
 MODULE_LICENSE("GPL");
 MODULE_SOFTDEP("pre: crct10dif");
diff --git a/lib/crc32.c b/lib/crc32.c
index 4a20455d1f61..2a68dfd3b96c 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -24,7 +24,7 @@
  * Version 2.  See the file COPYING for more details.
  */
 
-/* see: Documentation/crc32.txt for a description of algorithms */
+/* see: Documentation/staging/crc32.rst for a description of algorithms */
 
 #include <linux/crc32.h>
 #include <linux/crc32poly.h>
@@ -331,7 +331,7 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
 	return crc;
 }
 
-#if CRC_LE_BITS == 1
+#if CRC_BE_BITS == 1
 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
 	return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
diff --git a/lib/crc32test.c b/lib/crc32test.c
index 97d6a57cefcc..61ddce2cff77 100644
--- a/lib/crc32test.c
+++ b/lib/crc32test.c
@@ -683,7 +683,6 @@ static int __init crc32c_test(void)
 
 	/* reduce OS noise */
 	local_irq_save(flags);
-	local_irq_disable();
 
 	nsec = ktime_get_ns();
 	for (i = 0; i < 100; i++) {
@@ -694,7 +693,6 @@ static int __init crc32c_test(void)
 	nsec = ktime_get_ns() - nsec;
 
 	local_irq_restore(flags);
-	local_irq_enable();
 
 	pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS);
 
@@ -768,7 +766,6 @@ static int __init crc32_test(void)
 
 	/* reduce OS noise */
 	local_irq_save(flags);
-	local_irq_disable();
 
 	nsec = ktime_get_ns();
 	for (i = 0; i < 100; i++) {
@@ -783,7 +780,6 @@ static int __init crc32_test(void)
 	nsec = ktime_get_ns() - nsec;
 
 	local_irq_restore(flags);
-	local_irq_enable();
 
 	pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n",
 		 CRC_LE_BITS, CRC_BE_BITS);
diff --git a/lib/crc64.c b/lib/crc64.c
index f8928ce28280..47cfa054827f 100644
--- a/lib/crc64.c
+++ b/lib/crc64.c
@@ -4,7 +4,7 @@
  *
  * This is a basic crc64 implementation following ECMA-182 specification,
  * which can be found from,
- * http://www.ecma-international.org/publications/standards/Ecma-182.htm
+ * https://www.ecma-international.org/publications/standards/Ecma-182.htm
  *
  * Dr. Ross N. Williams has a great document to introduce the idea of CRC
  * algorithm, here the CRC64 code is also inspired by the table-driven
diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c
index 79ef404a990d..5d9ea53be973 100644
--- a/lib/crypto/blake2s-selftest.c
+++ b/lib/crypto/blake2s-selftest.c
@@ -3,7 +3,7 @@
  * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  */
 
-#include <crypto/blake2s.h>
+#include <crypto/internal/blake2s.h>
 #include <linux/string.h>
 
 /*
diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
index 41025a30c524..6a4b6b78d630 100644
--- a/lib/crypto/blake2s.c
+++ b/lib/crypto/blake2s.c
@@ -17,8 +17,6 @@
 #include <linux/bug.h>
 #include <asm/unaligned.h>
 
-bool blake2s_selftest(void);
-
 void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
 {
 	const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
index ad0699ce702f..5850f3b87359 100644
--- a/lib/crypto/chacha20poly1305.c
+++ b/lib/crypto/chacha20poly1305.c
@@ -21,8 +21,6 @@
 
 #define CHACHA_KEY_WORDS	(CHACHA_KEY_SIZE / sizeof(u32))
 
-bool __init chacha20poly1305_selftest(void);
-
 static void chacha_load_key(u32 *k, const u8 *in)
 {
 	k[0] = get_unaligned_le32(in);
@@ -253,9 +251,7 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
 			poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
 	}
 
-	flags = SG_MITER_TO_SG;
-	if (!preemptible())
-		flags |= SG_MITER_ATOMIC;
+	flags = SG_MITER_TO_SG | SG_MITER_ATOMIC;
 
 	sg_miter_start(&miter, src, sg_nents(src), flags);
 
diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c
index 288a62cd29b2..fb29739e8c29 100644
--- a/lib/crypto/curve25519.c
+++ b/lib/crypto/curve25519.c
@@ -13,8 +13,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-bool curve25519_selftest(void);
-
 static int __init mod_init(void)
 {
 	if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c
index 2e621697c5c3..72a4b0b1df28 100644
--- a/lib/crypto/sha256.c
+++ b/lib/crypto/sha256.c
@@ -15,9 +15,28 @@
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/string.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <asm/unaligned.h>
 
+static const u32 SHA256_K[] = {
+	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+};
+
 static inline u32 Ch(u32 x, u32 y, u32 z)
 {
 	return z ^ (x & (y ^ z));
@@ -43,173 +62,68 @@ static inline void BLEND_OP(int I, u32 *W)
 	W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
 }
 
-static void sha256_transform(u32 *state, const u8 *input)
+#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) do {		\
+	u32 t1, t2;						\
+	t1 = h + e1(e) + Ch(e, f, g) + SHA256_K[i] + W[i];	\
+	t2 = e0(a) + Maj(a, b, c);				\
+	d += t1;						\
+	h = t1 + t2;						\
+} while (0)
+
+static void sha256_transform(u32 *state, const u8 *input, u32 *W)
 {
-	u32 a, b, c, d, e, f, g, h, t1, t2;
-	u32 W[64];
+	u32 a, b, c, d, e, f, g, h;
 	int i;
 
 	/* load the input */
-	for (i = 0; i < 16; i++)
-		LOAD_OP(i, W, input);
+	for (i = 0; i < 16; i += 8) {
+		LOAD_OP(i + 0, W, input);
+		LOAD_OP(i + 1, W, input);
+		LOAD_OP(i + 2, W, input);
+		LOAD_OP(i + 3, W, input);
+		LOAD_OP(i + 4, W, input);
+		LOAD_OP(i + 5, W, input);
+		LOAD_OP(i + 6, W, input);
+		LOAD_OP(i + 7, W, input);
+	}
 
 	/* now blend */
-	for (i = 16; i < 64; i++)
-		BLEND_OP(i, W);
+	for (i = 16; i < 64; i += 8) {
+		BLEND_OP(i + 0, W);
+		BLEND_OP(i + 1, W);
+		BLEND_OP(i + 2, W);
+		BLEND_OP(i + 3, W);
+		BLEND_OP(i + 4, W);
+		BLEND_OP(i + 5, W);
+		BLEND_OP(i + 6, W);
+		BLEND_OP(i + 7, W);
+	}
 
 	/* load the state into our registers */
 	a = state[0];  b = state[1];  c = state[2];  d = state[3];
 	e = state[4];  f = state[5];  g = state[6];  h = state[7];
 
 	/* now iterate */
-	t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
+	for (i = 0; i < 64; i += 8) {
+		SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h);
+		SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g);
+		SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f);
+		SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e);
+		SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d);
+		SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c);
+		SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b);
+		SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a);
+	}
 
 	state[0] += a; state[1] += b; state[2] += c; state[3] += d;
 	state[4] += e; state[5] += f; state[6] += g; state[7] += h;
-
-	/* clear any sensitive info... */
-	a = b = c = d = e = f = g = h = t1 = t2 = 0;
-	memzero_explicit(W, 64 * sizeof(u32));
 }
 
 void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
 {
 	unsigned int partial, done;
 	const u8 *src;
+	u32 W[64];
 
 	partial = sctx->count & 0x3f;
 	sctx->count += len;
@@ -224,11 +138,13 @@ void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
 		}
 
 		do {
-			sha256_transform(sctx->state, src);
+			sha256_transform(sctx->state, src, W);
 			done += 64;
 			src = data + done;
 		} while (done + 63 < len);
 
+		memzero_explicit(W, sizeof(W));
+
 		partial = 0;
 	}
 	memcpy(sctx->buf + partial, src, len - done);
@@ -265,7 +181,7 @@ static void __sha256_final(struct sha256_state *sctx, u8 *out, int digest_words)
 		put_unaligned_be32(sctx->state[i], &dst[i]);
 
 	/* Zeroize sensitive information. */
-	memset(sctx, 0, sizeof(*sctx));
+	memzero_explicit(sctx, sizeof(*sctx));
 }
 
 void sha256_final(struct sha256_state *sctx, u8 *out)
@@ -280,4 +196,14 @@ void sha224_final(struct sha256_state *sctx, u8 *out)
 }
 EXPORT_SYMBOL(sha224_final);
 
+void sha256(const u8 *data, unsigned int len, u8 *out)
+{
+	struct sha256_state sctx;
+
+	sha256_init(&sctx);
+	sha256_update(&sctx, data, len);
+	sha256_final(&sctx, out);
+}
+EXPORT_SYMBOL(sha256);
+
 MODULE_LICENSE("GPL");
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 48054dbf1b51..9e14ae02306b 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -19,6 +19,7 @@
 #include <linux/slab.h>
 #include <linux/hash.h>
 #include <linux/kmemleak.h>
+#include <linux/cpu.h>
 
 #define ODEBUG_HASH_BITS	14
 #define ODEBUG_HASH_SIZE	(1 << ODEBUG_HASH_BITS)
@@ -90,7 +91,7 @@ static int			debug_objects_pool_size __read_mostly
 				= ODEBUG_POOL_SIZE;
 static int			debug_objects_pool_min_level __read_mostly
 				= ODEBUG_POOL_MIN_LEVEL;
-static struct debug_obj_descr	*descr_test  __read_mostly;
+static const struct debug_obj_descr *descr_test  __read_mostly;
 static struct kmem_cache	*obj_cache __read_mostly;
 
 /*
@@ -223,7 +224,7 @@ static struct debug_obj *__alloc_object(struct hlist_head *list)
  * Must be called with interrupts disabled.
  */
 static struct debug_obj *
-alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
+alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
 {
 	struct debug_percpu_free *percpu_pool = this_cpu_ptr(&percpu_obj_pool);
 	struct debug_obj *obj;
@@ -433,6 +434,25 @@ static void free_object(struct debug_obj *obj)
 	}
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static int object_cpu_offline(unsigned int cpu)
+{
+	struct debug_percpu_free *percpu_pool;
+	struct hlist_node *tmp;
+	struct debug_obj *obj;
+
+	/* Remote access is safe as the CPU is dead already */
+	percpu_pool = per_cpu_ptr(&percpu_obj_pool, cpu);
+	hlist_for_each_entry_safe(obj, tmp, &percpu_pool->free_objs, node) {
+		hlist_del(&obj->node);
+		kmem_cache_free(obj_cache, obj);
+	}
+	percpu_pool->obj_free = 0;
+
+	return 0;
+}
+#endif
+
 /*
  * We run out of memory. That means we probably have tons of objects
  * allocated.
@@ -475,7 +495,7 @@ static struct debug_bucket *get_bucket(unsigned long addr)
 
 static void debug_print_object(struct debug_obj *obj, char *msg)
 {
-	struct debug_obj_descr *descr = obj->descr;
+	const struct debug_obj_descr *descr = obj->descr;
 	static int limit;
 
 	if (limit < 5 && descr != descr_test) {
@@ -529,7 +549,7 @@ static void debug_object_is_on_stack(void *addr, int onstack)
 }
 
 static void
-__debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
+__debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack)
 {
 	enum debug_obj_state state;
 	bool check_stack = false;
@@ -587,7 +607,7 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
  */
-void debug_object_init(void *addr, struct debug_obj_descr *descr)
+void debug_object_init(void *addr, const struct debug_obj_descr *descr)
 {
 	if (!debug_objects_enabled)
 		return;
@@ -602,7 +622,7 @@ EXPORT_SYMBOL_GPL(debug_object_init);
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
  */
-void debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr)
+void debug_object_init_on_stack(void *addr, const struct debug_obj_descr *descr)
 {
 	if (!debug_objects_enabled)
 		return;
@@ -617,7 +637,7 @@ EXPORT_SYMBOL_GPL(debug_object_init_on_stack);
  * @descr:	pointer to an object specific debug description structure
  * Returns 0 for success, -EINVAL for check failed.
  */
-int debug_object_activate(void *addr, struct debug_obj_descr *descr)
+int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
 {
 	enum debug_obj_state state;
 	struct debug_bucket *db;
@@ -695,7 +715,7 @@ EXPORT_SYMBOL_GPL(debug_object_activate);
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
  */
-void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
+void debug_object_deactivate(void *addr, const struct debug_obj_descr *descr)
 {
 	struct debug_bucket *db;
 	struct debug_obj *obj;
@@ -747,7 +767,7 @@ EXPORT_SYMBOL_GPL(debug_object_deactivate);
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
  */
-void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
+void debug_object_destroy(void *addr, const struct debug_obj_descr *descr)
 {
 	enum debug_obj_state state;
 	struct debug_bucket *db;
@@ -797,7 +817,7 @@ EXPORT_SYMBOL_GPL(debug_object_destroy);
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
  */
-void debug_object_free(void *addr, struct debug_obj_descr *descr)
+void debug_object_free(void *addr, const struct debug_obj_descr *descr)
 {
 	enum debug_obj_state state;
 	struct debug_bucket *db;
@@ -838,7 +858,7 @@ EXPORT_SYMBOL_GPL(debug_object_free);
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
  */
-void debug_object_assert_init(void *addr, struct debug_obj_descr *descr)
+void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
 {
 	struct debug_bucket *db;
 	struct debug_obj *obj;
@@ -886,7 +906,7 @@ EXPORT_SYMBOL_GPL(debug_object_assert_init);
  * @next:	state to move to if expected state is found
  */
 void
-debug_object_active_state(void *addr, struct debug_obj_descr *descr,
+debug_object_active_state(void *addr, const struct debug_obj_descr *descr,
 			  unsigned int expect, unsigned int next)
 {
 	struct debug_bucket *db;
@@ -934,7 +954,7 @@ EXPORT_SYMBOL_GPL(debug_object_active_state);
 static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 {
 	unsigned long flags, oaddr, saddr, eaddr, paddr, chunks;
-	struct debug_obj_descr *descr;
+	const struct debug_obj_descr *descr;
 	enum debug_obj_state state;
 	struct debug_bucket *db;
 	struct hlist_node *tmp;
@@ -1022,18 +1042,7 @@ static int debug_stats_show(struct seq_file *m, void *v)
 	seq_printf(m, "objs_freed    :%d\n", debug_objects_freed);
 	return 0;
 }
-
-static int debug_stats_open(struct inode *inode, struct file *filp)
-{
-	return single_open(filp, debug_stats_show, NULL);
-}
-
-static const struct file_operations debug_stats_fops = {
-	.open		= debug_stats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(debug_stats);
 
 static int __init debug_objects_init_debugfs(void)
 {
@@ -1063,7 +1072,7 @@ struct self_test {
 	unsigned long	dummy2[3];
 };
 
-static __initdata struct debug_obj_descr descr_type_test;
+static __initconst const struct debug_obj_descr descr_type_test;
 
 static bool __init is_static_object(void *addr)
 {
@@ -1188,7 +1197,7 @@ out:
 	return res;
 }
 
-static __initdata struct debug_obj_descr descr_type_test = {
+static __initconst const struct debug_obj_descr descr_type_test = {
 	.name			= "selftest",
 	.is_static_object	= is_static_object,
 	.fixup_init		= fixup_init,
@@ -1378,6 +1387,11 @@ void __init debug_objects_mem_init(void)
 	} else
 		debug_objects_selftest();
 
+#ifdef CONFIG_HOTPLUG_CPU
+	cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL,
+					object_cpu_offline);
+#endif
+
 	/*
 	 * Increase the thresholds for allocating and freeing objects
 	 * according to the number of possible CPUs available in the system.
diff --git a/lib/decompress.c b/lib/decompress.c
index 857ab1af1ef3..ab3fc90ffc64 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -13,6 +13,7 @@
 #include <linux/decompress/inflate.h>
 #include <linux/decompress/unlzo.h>
 #include <linux/decompress/unlz4.h>
+#include <linux/decompress/unzstd.h>
 
 #include <linux/types.h>
 #include <linux/string.h>
@@ -37,6 +38,9 @@
 #ifndef CONFIG_DECOMPRESS_LZ4
 # define unlz4 NULL
 #endif
+#ifndef CONFIG_DECOMPRESS_ZSTD
+# define unzstd NULL
+#endif
 
 struct compress_format {
 	unsigned char magic[2];
@@ -52,6 +56,7 @@ static const struct compress_format compressed_formats[] __initconst = {
 	{ {0xfd, 0x37}, "xz", unxz },
 	{ {0x89, 0x4c}, "lzo", unlzo },
 	{ {0x02, 0x21}, "lz4", unlz4 },
+	{ {0x28, 0xb5}, "zstd", unzstd },
 	{ {0, 0}, NULL, NULL }
 };
 
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 7c4932eed748..c72c865032fa 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -34,7 +34,7 @@
 		Phone (337) 232-1234 or 1-800-738-2226
 		Fax   (337) 232-1297
 
-		http://www.hospiceacadiana.com/
+		https://www.hospiceacadiana.com/
 
 	Manuel
  */
@@ -390,7 +390,7 @@ static int INIT get_next_block(struct bunzip_data *bd)
 		j = (bd->inbufBits >> bd->inbufBitCount)&
 			((1 << hufGroup->maxLen)-1);
 got_huff_bits:
-		/* Figure how how many bits are in next symbol and
+		/* Figure how many bits are in next symbol and
 		 * unget extras */
 		i = hufGroup->minLen;
 		while (j > limit[i])
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index ed7a1fd819f2..1cf409ef8d04 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -8,7 +8,7 @@
  *implementation for lzma.
  *Copyright (C) 2006  Aurelien Jacobs < aurel@gnuage.org >
  *
- *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
+ *Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/)
  *Copyright (C) 1999-2005  Igor Pavlov
  *
  *Copyrights of the parts, see headers below.
@@ -56,7 +56,7 @@ static long long INIT read_int(unsigned char *ptr, int size)
 /* Small range coder implementation for lzma.
  *Copyright (C) 2006  Aurelien Jacobs < aurel@gnuage.org >
  *
- *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
+ *Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/)
  *Copyright (c) 1999-2005  Igor Pavlov
  */
 
@@ -213,7 +213,7 @@ rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol)
  * Small lzma deflate implementation.
  * Copyright (C) 2006  Aurelien Jacobs < aurel@gnuage.org >
  *
- * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
+ * Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/)
  * Copyright (C) 1999-2005  Igor Pavlov
  */
 
diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c
new file mode 100644
index 000000000000..790abc472f5b
--- /dev/null
+++ b/lib/decompress_unzstd.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Important notes about in-place decompression
+ *
+ * At least on x86, the kernel is decompressed in place: the compressed data
+ * is placed to the end of the output buffer, and the decompressor overwrites
+ * most of the compressed data. There must be enough safety margin to
+ * guarantee that the write position is always behind the read position.
+ *
+ * The safety margin for ZSTD with a 128 KB block size is calculated below.
+ * Note that the margin with ZSTD is bigger than with GZIP or XZ!
+ *
+ * The worst case for in-place decompression is that the beginning of
+ * the file is compressed extremely well, and the rest of the file is
+ * uncompressible. Thus, we must look for worst-case expansion when the
+ * compressor is encoding uncompressible data.
+ *
+ * The structure of the .zst file in case of a compresed kernel is as follows.
+ * Maximum sizes (as bytes) of the fields are in parenthesis.
+ *
+ *    Frame Header: (18)
+ *    Blocks: (N)
+ *    Checksum: (4)
+ *
+ * The frame header and checksum overhead is at most 22 bytes.
+ *
+ * ZSTD stores the data in blocks. Each block has a header whose size is
+ * a 3 bytes. After the block header, there is up to 128 KB of payload.
+ * The maximum uncompressed size of the payload is 128 KB. The minimum
+ * uncompressed size of the payload is never less than the payload size
+ * (excluding the block header).
+ *
+ * The assumption, that the uncompressed size of the payload is never
+ * smaller than the payload itself, is valid only when talking about
+ * the payload as a whole. It is possible that the payload has parts where
+ * the decompressor consumes more input than it produces output. Calculating
+ * the worst case for this would be tricky. Instead of trying to do that,
+ * let's simply make sure that the decompressor never overwrites any bytes
+ * of the payload which it is currently reading.
+ *
+ * Now we have enough information to calculate the safety margin. We need
+ *   - 22 bytes for the .zst file format headers;
+ *   - 3 bytes per every 128 KiB of uncompressed size (one block header per
+ *     block); and
+ *   - 128 KiB (biggest possible zstd block size) to make sure that the
+ *     decompressor never overwrites anything from the block it is currently
+ *     reading.
+ *
+ * We get the following formula:
+ *
+ *    safety_margin = 22 + uncompressed_size * 3 / 131072 + 131072
+ *                 <= 22 + (uncompressed_size >> 15) + 131072
+ */
+
+/*
+ * Preboot environments #include "path/to/decompress_unzstd.c".
+ * All of the source files we depend on must be #included.
+ * zstd's only source dependeny is xxhash, which has no source
+ * dependencies.
+ *
+ * When UNZSTD_PREBOOT is defined we declare __decompress(), which is
+ * used for kernel decompression, instead of unzstd().
+ *
+ * Define __DISABLE_EXPORTS in preboot environments to prevent symbols
+ * from xxhash and zstd from being exported by the EXPORT_SYMBOL macro.
+ */
+#ifdef STATIC
+# define UNZSTD_PREBOOT
+# include "xxhash.c"
+# include "zstd/entropy_common.c"
+# include "zstd/fse_decompress.c"
+# include "zstd/huf_decompress.c"
+# include "zstd/zstd_common.c"
+# include "zstd/decompress.c"
+#endif
+
+#include <linux/decompress/mm.h>
+#include <linux/kernel.h>
+#include <linux/zstd.h>
+
+/* 128MB is the maximum window size supported by zstd. */
+#define ZSTD_WINDOWSIZE_MAX	(1 << ZSTD_WINDOWLOG_MAX)
+/*
+ * Size of the input and output buffers in multi-call mode.
+ * Pick a larger size because it isn't used during kernel decompression,
+ * since that is single pass, and we have to allocate a large buffer for
+ * zstd's window anyway. The larger size speeds up initramfs decompression.
+ */
+#define ZSTD_IOBUF_SIZE		(1 << 17)
+
+static int INIT handle_zstd_error(size_t ret, void (*error)(char *x))
+{
+	const int err = ZSTD_getErrorCode(ret);
+
+	if (!ZSTD_isError(ret))
+		return 0;
+
+	switch (err) {
+	case ZSTD_error_memory_allocation:
+		error("ZSTD decompressor ran out of memory");
+		break;
+	case ZSTD_error_prefix_unknown:
+		error("Input is not in the ZSTD format (wrong magic bytes)");
+		break;
+	case ZSTD_error_dstSize_tooSmall:
+	case ZSTD_error_corruption_detected:
+	case ZSTD_error_checksum_wrong:
+		error("ZSTD-compressed data is corrupt");
+		break;
+	default:
+		error("ZSTD-compressed data is probably corrupt");
+		break;
+	}
+	return -1;
+}
+
+/*
+ * Handle the case where we have the entire input and output in one segment.
+ * We can allocate less memory (no circular buffer for the sliding window),
+ * and avoid some memcpy() calls.
+ */
+static int INIT decompress_single(const u8 *in_buf, long in_len, u8 *out_buf,
+				  long out_len, long *in_pos,
+				  void (*error)(char *x))
+{
+	const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+	void *wksp = large_malloc(wksp_size);
+	ZSTD_DCtx *dctx = ZSTD_initDCtx(wksp, wksp_size);
+	int err;
+	size_t ret;
+
+	if (dctx == NULL) {
+		error("Out of memory while allocating ZSTD_DCtx");
+		err = -1;
+		goto out;
+	}
+	/*
+	 * Find out how large the frame actually is, there may be junk at
+	 * the end of the frame that ZSTD_decompressDCtx() can't handle.
+	 */
+	ret = ZSTD_findFrameCompressedSize(in_buf, in_len);
+	err = handle_zstd_error(ret, error);
+	if (err)
+		goto out;
+	in_len = (long)ret;
+
+	ret = ZSTD_decompressDCtx(dctx, out_buf, out_len, in_buf, in_len);
+	err = handle_zstd_error(ret, error);
+	if (err)
+		goto out;
+
+	if (in_pos != NULL)
+		*in_pos = in_len;
+
+	err = 0;
+out:
+	if (wksp != NULL)
+		large_free(wksp);
+	return err;
+}
+
+static int INIT __unzstd(unsigned char *in_buf, long in_len,
+			 long (*fill)(void*, unsigned long),
+			 long (*flush)(void*, unsigned long),
+			 unsigned char *out_buf, long out_len,
+			 long *in_pos,
+			 void (*error)(char *x))
+{
+	ZSTD_inBuffer in;
+	ZSTD_outBuffer out;
+	ZSTD_frameParams params;
+	void *in_allocated = NULL;
+	void *out_allocated = NULL;
+	void *wksp = NULL;
+	size_t wksp_size;
+	ZSTD_DStream *dstream;
+	int err;
+	size_t ret;
+
+	/*
+	 * ZSTD decompression code won't be happy if the buffer size is so big
+	 * that its end address overflows. When the size is not provided, make
+	 * it as big as possible without having the end address overflow.
+	 */
+	if (out_len == 0)
+		out_len = UINTPTR_MAX - (uintptr_t)out_buf;
+
+	if (fill == NULL && flush == NULL)
+		/*
+		 * We can decompress faster and with less memory when we have a
+		 * single chunk.
+		 */
+		return decompress_single(in_buf, in_len, out_buf, out_len,
+					 in_pos, error);
+
+	/*
+	 * If in_buf is not provided, we must be using fill(), so allocate
+	 * a large enough buffer. If it is provided, it must be at least
+	 * ZSTD_IOBUF_SIZE large.
+	 */
+	if (in_buf == NULL) {
+		in_allocated = large_malloc(ZSTD_IOBUF_SIZE);
+		if (in_allocated == NULL) {
+			error("Out of memory while allocating input buffer");
+			err = -1;
+			goto out;
+		}
+		in_buf = in_allocated;
+		in_len = 0;
+	}
+	/* Read the first chunk, since we need to decode the frame header. */
+	if (fill != NULL)
+		in_len = fill(in_buf, ZSTD_IOBUF_SIZE);
+	if (in_len < 0) {
+		error("ZSTD-compressed data is truncated");
+		err = -1;
+		goto out;
+	}
+	/* Set the first non-empty input buffer. */
+	in.src = in_buf;
+	in.pos = 0;
+	in.size = in_len;
+	/* Allocate the output buffer if we are using flush(). */
+	if (flush != NULL) {
+		out_allocated = large_malloc(ZSTD_IOBUF_SIZE);
+		if (out_allocated == NULL) {
+			error("Out of memory while allocating output buffer");
+			err = -1;
+			goto out;
+		}
+		out_buf = out_allocated;
+		out_len = ZSTD_IOBUF_SIZE;
+	}
+	/* Set the output buffer. */
+	out.dst = out_buf;
+	out.pos = 0;
+	out.size = out_len;
+
+	/*
+	 * We need to know the window size to allocate the ZSTD_DStream.
+	 * Since we are streaming, we need to allocate a buffer for the sliding
+	 * window. The window size varies from 1 KB to ZSTD_WINDOWSIZE_MAX
+	 * (8 MB), so it is important to use the actual value so as not to
+	 * waste memory when it is smaller.
+	 */
+	ret = ZSTD_getFrameParams(&params, in.src, in.size);
+	err = handle_zstd_error(ret, error);
+	if (err)
+		goto out;
+	if (ret != 0) {
+		error("ZSTD-compressed data has an incomplete frame header");
+		err = -1;
+		goto out;
+	}
+	if (params.windowSize > ZSTD_WINDOWSIZE_MAX) {
+		error("ZSTD-compressed data has too large a window size");
+		err = -1;
+		goto out;
+	}
+
+	/*
+	 * Allocate the ZSTD_DStream now that we know how much memory is
+	 * required.
+	 */
+	wksp_size = ZSTD_DStreamWorkspaceBound(params.windowSize);
+	wksp = large_malloc(wksp_size);
+	dstream = ZSTD_initDStream(params.windowSize, wksp, wksp_size);
+	if (dstream == NULL) {
+		error("Out of memory while allocating ZSTD_DStream");
+		err = -1;
+		goto out;
+	}
+
+	/*
+	 * Decompression loop:
+	 * Read more data if necessary (error if no more data can be read).
+	 * Call the decompression function, which returns 0 when finished.
+	 * Flush any data produced if using flush().
+	 */
+	if (in_pos != NULL)
+		*in_pos = 0;
+	do {
+		/*
+		 * If we need to reload data, either we have fill() and can
+		 * try to get more data, or we don't and the input is truncated.
+		 */
+		if (in.pos == in.size) {
+			if (in_pos != NULL)
+				*in_pos += in.pos;
+			in_len = fill ? fill(in_buf, ZSTD_IOBUF_SIZE) : -1;
+			if (in_len < 0) {
+				error("ZSTD-compressed data is truncated");
+				err = -1;
+				goto out;
+			}
+			in.pos = 0;
+			in.size = in_len;
+		}
+		/* Returns zero when the frame is complete. */
+		ret = ZSTD_decompressStream(dstream, &out, &in);
+		err = handle_zstd_error(ret, error);
+		if (err)
+			goto out;
+		/* Flush all of the data produced if using flush(). */
+		if (flush != NULL && out.pos > 0) {
+			if (out.pos != flush(out.dst, out.pos)) {
+				error("Failed to flush()");
+				err = -1;
+				goto out;
+			}
+			out.pos = 0;
+		}
+	} while (ret != 0);
+
+	if (in_pos != NULL)
+		*in_pos += in.pos;
+
+	err = 0;
+out:
+	if (in_allocated != NULL)
+		large_free(in_allocated);
+	if (out_allocated != NULL)
+		large_free(out_allocated);
+	if (wksp != NULL)
+		large_free(wksp);
+	return err;
+}
+
+#ifndef UNZSTD_PREBOOT
+STATIC int INIT unzstd(unsigned char *buf, long len,
+		       long (*fill)(void*, unsigned long),
+		       long (*flush)(void*, unsigned long),
+		       unsigned char *out_buf,
+		       long *pos,
+		       void (*error)(char *x))
+{
+	return __unzstd(buf, len, fill, flush, out_buf, 0, pos, error);
+}
+#else
+STATIC int INIT __decompress(unsigned char *buf, long len,
+			     long (*fill)(void*, unsigned long),
+			     long (*flush)(void*, unsigned long),
+			     unsigned char *out_buf, long out_len,
+			     long *pos,
+			     void (*error)(char *x))
+{
+	return __unzstd(buf, len, fill, flush, out_buf, out_len, pos, error);
+}
+#endif
diff --git a/lib/devmem_is_allowed.c b/lib/devmem_is_allowed.c
new file mode 100644
index 000000000000..c0d67c541849
--- /dev/null
+++ b/lib/devmem_is_allowed.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * A generic version of devmem_is_allowed.
+ *
+ * Based on arch/arm64/mm/mmap.c
+ *
+ * Copyright (C) 2020 Google, Inc.
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#include <linux/mm.h>
+#include <linux/ioport.h>
+
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.  We mimic x86 here by
+ * disallowing access to system RAM as well as device-exclusive MMIO regions.
+ * This effectively disable read()/write() on /dev/mem.
+ */
+int devmem_is_allowed(unsigned long pfn)
+{
+	if (iomem_is_exclusive(pfn << PAGE_SHIFT))
+		return 0;
+	if (!page_is_ram(pfn))
+		return 1;
+	return 0;
+}
diff --git a/lib/devres.c b/lib/devres.c
index 6ef51f159c54..2a4ff5d64288 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -119,6 +119,7 @@ __devm_ioremap_resource(struct device *dev, const struct resource *res,
 {
 	resource_size_t size;
 	void __iomem *dest_ptr;
+	char *pretty_name;
 
 	BUG_ON(!dev);
 
@@ -129,7 +130,15 @@ __devm_ioremap_resource(struct device *dev, const struct resource *res,
 
 	size = resource_size(res);
 
-	if (!devm_request_mem_region(dev, res->start, size, dev_name(dev))) {
+	if (res->name)
+		pretty_name = devm_kasprintf(dev, GFP_KERNEL, "%s %s",
+					     dev_name(dev), res->name);
+	else
+		pretty_name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL);
+	if (!pretty_name)
+		return IOMEM_ERR_PTR(-ENOMEM);
+
+	if (!devm_request_mem_region(dev, res->start, size, pretty_name)) {
 		dev_err(dev, "can't request region for resource %pR\n", res);
 		return IOMEM_ERR_PTR(-EBUSY);
 	}
@@ -153,13 +162,15 @@ __devm_ioremap_resource(struct device *dev, const struct resource *res,
  * region and ioremaps it. All operations are managed and will be undone
  * on driver detach.
  *
- * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code
- * on failure. Usage example:
+ * Usage example:
  *
  *	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
  *	base = devm_ioremap_resource(&pdev->dev, res);
  *	if (IS_ERR(base))
  *		return PTR_ERR(base);
+ *
+ * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code
+ * on failure.
  */
 void __iomem *devm_ioremap_resource(struct device *dev,
 				    const struct resource *res)
@@ -174,8 +185,8 @@ EXPORT_SYMBOL(devm_ioremap_resource);
  * @dev: generic device to handle the resource for
  * @res: resource to be handled
  *
- * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code
- * on failure. Usage example:
+ * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code
+ * on failure.
  */
 void __iomem *devm_ioremap_resource_wc(struct device *dev,
 				       const struct resource *res)
@@ -198,12 +209,20 @@ void __iomem *devm_ioremap_resource_wc(struct device *dev,
  * @node:       The device-tree node where the resource resides
  * @index:	index of the MMIO range in the "reg" property
  * @size:	Returns the size of the resource (pass NULL if not needed)
- * Returns a pointer to the requested and mapped memory or an ERR_PTR() encoded
- * error code on failure. Usage example:
+ *
+ * Usage example:
  *
  *	base = devm_of_iomap(&pdev->dev, node, 0, NULL);
  *	if (IS_ERR(base))
  *		return PTR_ERR(base);
+ *
+ * Please Note: This is not a one-to-one replacement for of_iomap() because the
+ * of_iomap() function does not track whether the region is already mapped.  If
+ * two drivers try to map the same memory, the of_iomap() function will succeed
+ * but the devm_of_iomap() function will return -EBUSY.
+ *
+ * Return: a pointer to the requested and mapped memory or an ERR_PTR() encoded
+ * error code on failure.
  */
 void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index,
 			    resource_size_t *size)
@@ -241,6 +260,8 @@ static int devm_ioport_map_match(struct device *dev, void *res,
  *
  * Managed ioport_map().  Map is automatically unmapped on driver
  * detach.
+ *
+ * Return: a pointer to the remapped memory or NULL on failure.
  */
 void __iomem *devm_ioport_map(struct device *dev, unsigned long port,
 			       unsigned int nr)
diff --git a/lib/digsig.c b/lib/digsig.c
index e0627c3e53b2..04b5e55ed95f 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -20,7 +20,7 @@
 #include <linux/key.h>
 #include <linux/crypto.h>
 #include <crypto/hash.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 #include <keys/user-type.h>
 #include <linux/mpi.h>
 #include <linux/digsig.h>
diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c
index a4db51c21266..06811d866775 100644
--- a/lib/dim/net_dim.c
+++ b/lib/dim/net_dim.c
@@ -233,7 +233,7 @@ void net_dim(struct dim *dim, struct dim_sample end_sample)
 			schedule_work(&dim->work);
 			break;
 		}
-		/* fall through */
+		fallthrough;
 	case DIM_START_MEASURE:
 		dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
 				  end_sample.byte_ctr, &dim->start_sample);
diff --git a/lib/dim/rdma_dim.c b/lib/dim/rdma_dim.c
index f7e26c7b4749..15462d54758d 100644
--- a/lib/dim/rdma_dim.c
+++ b/lib/dim/rdma_dim.c
@@ -59,7 +59,7 @@ static bool rdma_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
 			break;
 		case DIM_STATS_WORSE:
 			dim_turn(dim);
-			/* fall through */
+			fallthrough;
 		case DIM_STATS_BETTER:
 			step_res = rdma_dim_step(dim);
 			if (step_res == DIM_ON_EDGE)
@@ -94,7 +94,7 @@ void rdma_dim(struct dim *dim, u64 completions)
 			schedule_work(&dim->work);
 			break;
 		}
-		/* fall through */
+		fallthrough;
 	case DIM_START_MEASURE:
 		dim->state = DIM_MEASURE_IN_PROGRESS;
 		dim_update_sample_with_comps(curr_sample->event_ctr, 0, 0,
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index a00ee6eedc7c..f5a33b6f773f 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -12,6 +12,7 @@
 #include <linux/atomic.h>
 #include <linux/kexec.h>
 #include <linux/utsname.h>
+#include <linux/stop_machine.h>
 
 static char dump_stack_arch_desc_str[128];
 
@@ -57,6 +58,7 @@ void dump_stack_print_info(const char *log_lvl)
 		       log_lvl, dump_stack_arch_desc_str);
 
 	print_worker_info(log_lvl, current);
+	print_stop_info(log_lvl, current);
 }
 
 /**
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 321437bbf87d..c70d6347afa2 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -11,7 +11,7 @@
  * Copyright (C) 2013 Du, Changbin <changbin.du@gmail.com>
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
+#define pr_fmt(fmt) "dyndbg: " fmt
 
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -39,8 +39,8 @@
 
 #include <rdma/ib_verbs.h>
 
-extern struct _ddebug __start___verbose[];
-extern struct _ddebug __stop___verbose[];
+extern struct _ddebug __start___dyndbg[];
+extern struct _ddebug __stop___dyndbg[];
 
 struct ddebug_table {
 	struct list_head link;
@@ -62,6 +62,11 @@ struct ddebug_iter {
 	unsigned int idx;
 };
 
+struct flag_settings {
+	unsigned int flags;
+	unsigned int mask;
+};
+
 static DEFINE_MUTEX(ddebug_lock);
 static LIST_HEAD(ddebug_tables);
 static int verbose;
@@ -87,30 +92,33 @@ static struct { unsigned flag:8; char opt_char; } opt_array[] = {
 	{ _DPRINTK_FLAGS_NONE, '_' },
 };
 
+struct flagsbuf { char buf[ARRAY_SIZE(opt_array)+1]; };
+
 /* format a string into buf[] which describes the _ddebug's flags */
-static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
-				    size_t maxlen)
+static char *ddebug_describe_flags(unsigned int flags, struct flagsbuf *fb)
 {
-	char *p = buf;
+	char *p = fb->buf;
 	int i;
 
-	BUG_ON(maxlen < 6);
 	for (i = 0; i < ARRAY_SIZE(opt_array); ++i)
-		if (dp->flags & opt_array[i].flag)
+		if (flags & opt_array[i].flag)
 			*p++ = opt_array[i].opt_char;
-	if (p == buf)
+	if (p == fb->buf)
 		*p++ = '_';
 	*p = '\0';
 
-	return buf;
+	return fb->buf;
 }
 
-#define vpr_info(fmt, ...)					\
+#define vnpr_info(lvl, fmt, ...)				\
 do {								\
-	if (verbose)						\
+	if (verbose >= lvl)					\
 		pr_info(fmt, ##__VA_ARGS__);			\
 } while (0)
 
+#define vpr_info(fmt, ...)	vnpr_info(1, fmt, ##__VA_ARGS__)
+#define v2pr_info(fmt, ...)	vnpr_info(2, fmt, ##__VA_ARGS__)
+
 static void vpr_info_dq(const struct ddebug_query *query, const char *msg)
 {
 	/* trim any trailing newlines */
@@ -124,10 +132,10 @@ static void vpr_info_dq(const struct ddebug_query *query, const char *msg)
 
 	vpr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u\n",
 		 msg,
-		 query->function ? query->function : "",
-		 query->filename ? query->filename : "",
-		 query->module ? query->module : "",
-		 fmtlen, query->format ? query->format : "",
+		 query->function ?: "",
+		 query->filename ?: "",
+		 query->module ?: "",
+		 fmtlen, query->format ?: "",
 		 query->first_lineno, query->last_lineno);
 }
 
@@ -138,13 +146,13 @@ static void vpr_info_dq(const struct ddebug_query *query, const char *msg)
  * logs the changes.  Takes ddebug_lock.
  */
 static int ddebug_change(const struct ddebug_query *query,
-			unsigned int flags, unsigned int mask)
+			 struct flag_settings *modifiers)
 {
 	int i;
 	struct ddebug_table *dt;
 	unsigned int newflags;
 	unsigned int nfound = 0;
-	char flagbuf[10];
+	struct flagsbuf fbuf;
 
 	/* search for matching ddebugs */
 	mutex_lock(&ddebug_lock);
@@ -173,9 +181,16 @@ static int ddebug_change(const struct ddebug_query *query,
 				continue;
 
 			/* match against the format */
-			if (query->format &&
-			    !strstr(dp->format, query->format))
-				continue;
+			if (query->format) {
+				if (*query->format == '^') {
+					char *p;
+					/* anchored search. match must be at beginning */
+					p = strstr(dp->format, query->format+1);
+					if (p != dp->format)
+						continue;
+				} else if (!strstr(dp->format, query->format))
+					continue;
+			}
 
 			/* match against the line number range */
 			if (query->first_lineno &&
@@ -187,22 +202,21 @@ static int ddebug_change(const struct ddebug_query *query,
 
 			nfound++;
 
-			newflags = (dp->flags & mask) | flags;
+			newflags = (dp->flags & modifiers->mask) | modifiers->flags;
 			if (newflags == dp->flags)
 				continue;
 #ifdef CONFIG_JUMP_LABEL
 			if (dp->flags & _DPRINTK_FLAGS_PRINT) {
-				if (!(flags & _DPRINTK_FLAGS_PRINT))
+				if (!(modifiers->flags & _DPRINTK_FLAGS_PRINT))
 					static_branch_disable(&dp->key.dd_key_true);
-			} else if (flags & _DPRINTK_FLAGS_PRINT)
+			} else if (modifiers->flags & _DPRINTK_FLAGS_PRINT)
 				static_branch_enable(&dp->key.dd_key_true);
 #endif
 			dp->flags = newflags;
-			vpr_info("changed %s:%d [%s]%s =%s\n",
+			v2pr_info("changed %s:%d [%s]%s =%s\n",
 				 trim_prefix(dp->filename), dp->lineno,
 				 dt->mod_name, dp->function,
-				 ddebug_describe_flags(dp, flagbuf,
-						       sizeof(flagbuf)));
+				 ddebug_describe_flags(dp->flags, &fbuf));
 		}
 	}
 	mutex_unlock(&ddebug_lock);
@@ -289,6 +303,41 @@ static inline int parse_lineno(const char *str, unsigned int *val)
 	return 0;
 }
 
+static int parse_linerange(struct ddebug_query *query, const char *first)
+{
+	char *last = strchr(first, '-');
+
+	if (query->first_lineno || query->last_lineno) {
+		pr_err("match-spec: line used 2x\n");
+		return -EINVAL;
+	}
+	if (last)
+		*last++ = '\0';
+	if (parse_lineno(first, &query->first_lineno) < 0)
+		return -EINVAL;
+	if (last) {
+		/* range <first>-<last> */
+		if (parse_lineno(last, &query->last_lineno) < 0)
+			return -EINVAL;
+
+		/* special case for last lineno not specified */
+		if (query->last_lineno == 0)
+			query->last_lineno = UINT_MAX;
+
+		if (query->last_lineno < query->first_lineno) {
+			pr_err("last-line:%d < 1st-line:%d\n",
+			       query->last_lineno,
+			       query->first_lineno);
+			return -EINVAL;
+		}
+	} else {
+		query->last_lineno = query->first_lineno;
+	}
+	vpr_info("parsed line %d-%d\n", query->first_lineno,
+		 query->last_lineno);
+	return 0;
+}
+
 static int check_set(const char **dest, char *src, char *name)
 {
 	int rc = 0;
@@ -322,61 +371,53 @@ static int ddebug_parse_query(char *words[], int nwords,
 {
 	unsigned int i;
 	int rc = 0;
+	char *fline;
 
 	/* check we have an even number of words */
 	if (nwords % 2 != 0) {
 		pr_err("expecting pairs of match-spec <value>\n");
 		return -EINVAL;
 	}
-	memset(query, 0, sizeof(*query));
 
 	if (modname)
 		/* support $modname.dyndbg=<multiple queries> */
 		query->module = modname;
 
 	for (i = 0; i < nwords; i += 2) {
-		if (!strcmp(words[i], "func")) {
-			rc = check_set(&query->function, words[i+1], "func");
-		} else if (!strcmp(words[i], "file")) {
-			rc = check_set(&query->filename, words[i+1], "file");
-		} else if (!strcmp(words[i], "module")) {
-			rc = check_set(&query->module, words[i+1], "module");
-		} else if (!strcmp(words[i], "format")) {
-			string_unescape_inplace(words[i+1], UNESCAPE_SPACE |
-							    UNESCAPE_OCTAL |
-							    UNESCAPE_SPECIAL);
-			rc = check_set(&query->format, words[i+1], "format");
-		} else if (!strcmp(words[i], "line")) {
-			char *first = words[i+1];
-			char *last = strchr(first, '-');
-			if (query->first_lineno || query->last_lineno) {
-				pr_err("match-spec: line used 2x\n");
-				return -EINVAL;
-			}
-			if (last)
-				*last++ = '\0';
-			if (parse_lineno(first, &query->first_lineno) < 0)
-				return -EINVAL;
-			if (last) {
-				/* range <first>-<last> */
-				if (parse_lineno(last, &query->last_lineno) < 0)
-					return -EINVAL;
+		char *keyword = words[i];
+		char *arg = words[i+1];
 
-				/* special case for last lineno not specified */
-				if (query->last_lineno == 0)
-					query->last_lineno = UINT_MAX;
+		if (!strcmp(keyword, "func")) {
+			rc = check_set(&query->function, arg, "func");
+		} else if (!strcmp(keyword, "file")) {
+			if (check_set(&query->filename, arg, "file"))
+				return -EINVAL;
 
-				if (query->last_lineno < query->first_lineno) {
-					pr_err("last-line:%d < 1st-line:%d\n",
-						query->last_lineno,
-						query->first_lineno);
+			/* tail :$info is function or line-range */
+			fline = strchr(query->filename, ':');
+			if (!fline)
+				break;
+			*fline++ = '\0';
+			if (isalpha(*fline) || *fline == '*' || *fline == '?') {
+				/* take as function name */
+				if (check_set(&query->function, fline, "func"))
 					return -EINVAL;
-				}
 			} else {
-				query->last_lineno = query->first_lineno;
+				if (parse_linerange(query, fline))
+					return -EINVAL;
 			}
+		} else if (!strcmp(keyword, "module")) {
+			rc = check_set(&query->module, arg, "module");
+		} else if (!strcmp(keyword, "format")) {
+			string_unescape_inplace(arg, UNESCAPE_SPACE |
+							    UNESCAPE_OCTAL |
+							    UNESCAPE_SPECIAL);
+			rc = check_set(&query->format, arg, "format");
+		} else if (!strcmp(keyword, "line")) {
+			if (parse_linerange(query, arg))
+				return -EINVAL;
 		} else {
-			pr_err("unknown keyword \"%s\"\n", words[i]);
+			pr_err("unknown keyword \"%s\"\n", keyword);
 			return -EINVAL;
 		}
 		if (rc)
@@ -392,11 +433,9 @@ static int ddebug_parse_query(char *words[], int nwords,
  * flags fields of matched _ddebug's.  Returns 0 on success
  * or <0 on error.
  */
-static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
-			       unsigned int *maskp)
+static int ddebug_parse_flags(const char *str, struct flag_settings *modifiers)
 {
-	unsigned flags = 0;
-	int op = '=', i;
+	int op, i;
 
 	switch (*str) {
 	case '+':
@@ -413,40 +452,40 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
 	for (; *str ; ++str) {
 		for (i = ARRAY_SIZE(opt_array) - 1; i >= 0; i--) {
 			if (*str == opt_array[i].opt_char) {
-				flags |= opt_array[i].flag;
+				modifiers->flags |= opt_array[i].flag;
 				break;
 			}
 		}
 		if (i < 0) {
-			pr_err("unknown flag '%c' in \"%s\"\n", *str, str);
+			pr_err("unknown flag '%c'\n", *str);
 			return -EINVAL;
 		}
 	}
-	vpr_info("flags=0x%x\n", flags);
+	vpr_info("flags=0x%x\n", modifiers->flags);
 
-	/* calculate final *flagsp, *maskp according to mask and op */
+	/* calculate final flags, mask based upon op */
 	switch (op) {
 	case '=':
-		*maskp = 0;
-		*flagsp = flags;
+		/* modifiers->flags already set */
+		modifiers->mask = 0;
 		break;
 	case '+':
-		*maskp = ~0U;
-		*flagsp = flags;
+		modifiers->mask = ~0U;
 		break;
 	case '-':
-		*maskp = ~flags;
-		*flagsp = 0;
+		modifiers->mask = ~modifiers->flags;
+		modifiers->flags = 0;
 		break;
 	}
-	vpr_info("*flagsp=0x%x *maskp=0x%x\n", *flagsp, *maskp);
+	vpr_info("*flagsp=0x%x *maskp=0x%x\n", modifiers->flags, modifiers->mask);
+
 	return 0;
 }
 
 static int ddebug_exec_query(char *query_string, const char *modname)
 {
-	unsigned int flags = 0, mask = 0;
-	struct ddebug_query query;
+	struct flag_settings modifiers = {};
+	struct ddebug_query query = {};
 #define MAXWORDS 9
 	int nwords, nfound;
 	char *words[MAXWORDS];
@@ -457,7 +496,7 @@ static int ddebug_exec_query(char *query_string, const char *modname)
 		return -EINVAL;
 	}
 	/* check flags 1st (last arg) so query is pairs of spec,val */
-	if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) {
+	if (ddebug_parse_flags(words[nwords-1], &modifiers)) {
 		pr_err("flags parse failed\n");
 		return -EINVAL;
 	}
@@ -466,7 +505,7 @@ static int ddebug_exec_query(char *query_string, const char *modname)
 		return -EINVAL;
 	}
 	/* actually go and implement the change */
-	nfound = ddebug_change(&query, flags, mask);
+	nfound = ddebug_change(&query, &modifiers);
 	vpr_info_dq(&query, nfound ? "applied" : "no-match");
 
 	return nfound;
@@ -509,6 +548,35 @@ static int ddebug_exec_queries(char *query, const char *modname)
 	return nfound;
 }
 
+/**
+ * dynamic_debug_exec_queries - select and change dynamic-debug prints
+ * @query: query-string described in admin-guide/dynamic-debug-howto
+ * @modname: string containing module name, usually &module.mod_name
+ *
+ * This uses the >/proc/dynamic_debug/control reader, allowing module
+ * authors to modify their dynamic-debug callsites. The modname is
+ * canonically struct module.mod_name, but can also be null or a
+ * module-wildcard, for example: "drm*".
+ */
+int dynamic_debug_exec_queries(const char *query, const char *modname)
+{
+	int rc;
+	char *qry; /* writable copy of query */
+
+	if (!query) {
+		pr_err("non-null query/command string expected\n");
+		return -EINVAL;
+	}
+	qry = kstrndup(query, PAGE_SIZE, GFP_KERNEL);
+	if (!qry)
+		return -ENOMEM;
+
+	rc = ddebug_exec_queries(qry, modname);
+	kfree(qry);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(dynamic_debug_exec_queries);
+
 #define PREFIX_SIZE 64
 
 static int remaining(int wrote)
@@ -771,8 +839,6 @@ static void *ddebug_proc_start(struct seq_file *m, loff_t *pos)
 	struct _ddebug *dp;
 	int n = *pos;
 
-	vpr_info("called m=%p *pos=%lld\n", m, (unsigned long long)*pos);
-
 	mutex_lock(&ddebug_lock);
 
 	if (!n)
@@ -795,9 +861,6 @@ static void *ddebug_proc_next(struct seq_file *m, void *p, loff_t *pos)
 	struct ddebug_iter *iter = m->private;
 	struct _ddebug *dp;
 
-	vpr_info("called m=%p p=%p *pos=%lld\n",
-		 m, p, (unsigned long long)*pos);
-
 	if (p == SEQ_START_TOKEN)
 		dp = ddebug_iter_first(iter);
 	else
@@ -816,9 +879,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
 {
 	struct ddebug_iter *iter = m->private;
 	struct _ddebug *dp = p;
-	char flagsbuf[10];
-
-	vpr_info("called m=%p p=%p\n", m, p);
+	struct flagsbuf flags;
 
 	if (p == SEQ_START_TOKEN) {
 		seq_puts(m,
@@ -829,7 +890,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
 	seq_printf(m, "%s:%u [%s]%s =%s \"",
 		   trim_prefix(dp->filename), dp->lineno,
 		   iter->table->mod_name, dp->function,
-		   ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf)));
+		   ddebug_describe_flags(dp->flags, &flags));
 	seq_escape(m, dp->format, "\t\r\n\"");
 	seq_puts(m, "\"\n");
 
@@ -842,7 +903,6 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
  */
 static void ddebug_proc_stop(struct seq_file *m, void *p)
 {
-	vpr_info("called m=%p p=%p\n", m, p);
 	mutex_unlock(&ddebug_lock);
 }
 
@@ -853,13 +913,6 @@ static const struct seq_operations ddebug_proc_seqops = {
 	.stop = ddebug_proc_stop
 };
 
-/*
- * File_ops->open method for <debugfs>/dynamic_debug/control.  Does
- * the seq_file setup dance, and also creates an iterator to walk the
- * _ddebugs.  Note that we create a seq_file always, even for O_WRONLY
- * files where it's not needed, as doing so simplifies the ->release
- * method.
- */
 static int ddebug_proc_open(struct inode *inode, struct file *file)
 {
 	vpr_info("called\n");
@@ -909,10 +962,10 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
 	dt->ddebugs = tab;
 
 	mutex_lock(&ddebug_lock);
-	list_add_tail(&dt->link, &ddebug_tables);
+	list_add(&dt->link, &ddebug_tables);
 	mutex_unlock(&ddebug_lock);
 
-	vpr_info("%u debug prints in module %s\n", n, dt->mod_name);
+	v2pr_info("%3u debug prints in module %s\n", n, dt->mod_name);
 	return 0;
 }
 
@@ -971,7 +1024,7 @@ int ddebug_remove_module(const char *mod_name)
 	struct ddebug_table *dt, *nextdt;
 	int ret = -ENOENT;
 
-	vpr_info("removing module \"%s\"\n", mod_name);
+	v2pr_info("removing module \"%s\"\n", mod_name);
 
 	mutex_lock(&ddebug_lock);
 	list_for_each_entry_safe(dt, nextdt, &ddebug_tables, link) {
@@ -1029,9 +1082,8 @@ static int __init dynamic_debug_init(void)
 	char *cmdline;
 	int ret = 0;
 	int n = 0, entries = 0, modct = 0;
-	int verbose_bytes = 0;
 
-	if (&__start___verbose == &__stop___verbose) {
+	if (&__start___dyndbg == &__stop___dyndbg) {
 		if (IS_ENABLED(CONFIG_DYNAMIC_DEBUG)) {
 			pr_warn("_ddebug table is empty in a CONFIG_DYNAMIC_DEBUG build\n");
 			return 1;
@@ -1040,14 +1092,11 @@ static int __init dynamic_debug_init(void)
 		ddebug_init_success = 1;
 		return 0;
 	}
-	iter = __start___verbose;
+	iter = __start___dyndbg;
 	modname = iter->modname;
 	iter_start = iter;
-	for (; iter < __stop___verbose; iter++) {
+	for (; iter < __stop___dyndbg; iter++) {
 		entries++;
-		verbose_bytes += strlen(iter->modname) + strlen(iter->function)
-			+ strlen(iter->filename) + strlen(iter->format);
-
 		if (strcmp(modname, iter->modname)) {
 			modct++;
 			ret = ddebug_add_module(iter_start, n, modname);
@@ -1064,9 +1113,9 @@ static int __init dynamic_debug_init(void)
 		goto out_err;
 
 	ddebug_init_success = 1;
-	vpr_info("%d modules, %d entries and %d bytes in ddebug tables, %d bytes in (readonly) verbose section\n",
+	vpr_info("%d modules, %d entries and %d bytes in ddebug tables, %d bytes in __dyndbg section\n",
 		 modct, entries, (int)(modct * sizeof(struct ddebug_table)),
-		 verbose_bytes + (int)(__stop___verbose - __start___verbose));
+		 (int)(entries * sizeof(struct _ddebug)));
 
 	/* apply ddebug_query boot param, dont unload tables on err */
 	if (ddebug_setup_string[0] != '\0') {
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index e659a027036e..fde0aa244148 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -60,8 +60,8 @@ void dql_completed(struct dql *dql, unsigned int count)
 		 * A decrease is only considered if the queue has been busy in
 		 * the whole interval (the check above).
 		 *
-		 * If there is slack, the amount of execess data queued above
-		 * the the amount needed to prevent starvation, the queue limit
+		 * If there is slack, the amount of excess data queued above
+		 * the amount needed to prevent starvation, the queue limit
 		 * can be decreased.  To avoid hysteresis we consider the
 		 * minimum amount of slack found over several iterations of the
 		 * completion routine.
diff --git a/lib/earlycpio.c b/lib/earlycpio.c
index c001e084829e..e83628882001 100644
--- a/lib/earlycpio.c
+++ b/lib/earlycpio.c
@@ -42,7 +42,7 @@ enum cpio_fields {
 /**
  * cpio_data find_cpio_data - Search for files in an uncompressed cpio
  * @path:       The directory to search for, including a slash at the end
- * @data:       Pointer to the the cpio archive or a header inside
+ * @data:       Pointer to the cpio archive or a header inside
  * @len:        Remaining length of the cpio based on data pointer
  * @nextoff:    When a matching file is found, this is the offset from the
  *              beginning of the cpio to the beginning of the next file, not the
diff --git a/lib/errname.c b/lib/errname.c
index 0c4d3e66170e..05cbf731545f 100644
--- a/lib/errname.c
+++ b/lib/errname.c
@@ -3,6 +3,7 @@
 #include <linux/errno.h>
 #include <linux/errname.h>
 #include <linux/kernel.h>
+#include <linux/math.h>
 
 /*
  * Ensure these tables do not accidentally become gigantic if some
diff --git a/lib/error-inject.c b/lib/error-inject.c
index aa63751c916f..c73651b15b76 100644
--- a/lib/error-inject.c
+++ b/lib/error-inject.c
@@ -180,6 +180,8 @@ static const char *error_type_string(int etype)
 		return "ERRNO";
 	case EI_ETYPE_ERRNO_NULL:
 		return "ERRNO_NULL";
+	case EI_ETYPE_TRUE:
+		return "TRUE";
 	default:
 		return "(unknown)";
 	}
diff --git a/lib/errseq.c b/lib/errseq.c
index 81f9e33aa7e7..93e9b94358dc 100644
--- a/lib/errseq.c
+++ b/lib/errseq.c
@@ -3,6 +3,7 @@
 #include <linux/bug.h>
 #include <linux/atomic.h>
 #include <linux/errseq.h>
+#include <linux/log2.h>
 
 /*
  * An errseq_t is a way of recording errors in one place, and allowing any
diff --git a/lib/extable.c b/lib/extable.c
index c3e59caf7ffa..9c9f40bd2b3d 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -21,7 +21,6 @@ static inline unsigned long ex_to_insn(const struct exception_table_entry *x)
 }
 #endif
 
-#ifndef ARCH_HAS_SORT_EXTABLE
 #ifndef ARCH_HAS_RELATIVE_EXTABLE
 #define swap_ex		NULL
 #else
@@ -88,9 +87,6 @@ void trim_init_extable(struct module *m)
 		m->num_exentries--;
 }
 #endif /* CONFIG_MODULES */
-#endif /* !ARCH_HAS_SORT_EXTABLE */
-
-#ifndef ARCH_HAS_SEARCH_EXTABLE
 
 static int cmp_ex_search(const void *key, const void *elt)
 {
@@ -120,4 +116,3 @@ search_extable(const struct exception_table_entry *base,
 	return bsearch(&value, base, num,
 		       sizeof(struct exception_table_entry), cmp_ex_search);
 }
-#endif
diff --git a/lib/fault-inject-usercopy.c b/lib/fault-inject-usercopy.c
new file mode 100644
index 000000000000..77558b6c29ca
--- /dev/null
+++ b/lib/fault-inject-usercopy.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/fault-inject.h>
+#include <linux/fault-inject-usercopy.h>
+
+static struct {
+	struct fault_attr attr;
+} fail_usercopy = {
+	.attr = FAULT_ATTR_INITIALIZER,
+};
+
+static int __init setup_fail_usercopy(char *str)
+{
+	return setup_fault_attr(&fail_usercopy.attr, str);
+}
+__setup("fail_usercopy=", setup_fail_usercopy);
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int __init fail_usercopy_debugfs(void)
+{
+	struct dentry *dir;
+
+	dir = fault_create_debugfs_attr("fail_usercopy", NULL,
+					&fail_usercopy.attr);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+
+	return 0;
+}
+
+late_initcall(fail_usercopy_debugfs);
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
+
+bool should_fail_usercopy(void)
+{
+	return should_fail(&fail_usercopy.attr, 1);
+}
+EXPORT_SYMBOL_GPL(should_fail_usercopy);
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 49f875f1baf7..f67f86fd2f62 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -15,7 +15,9 @@
 #include <linux/bitops.h>
 #include <linux/bitmap.h>
 #include <linux/export.h>
-#include <linux/kernel.h>
+#include <linux/math.h>
+#include <linux/minmax.h>
+#include <linux/swab.h>
 
 #if !defined(find_next_bit) || !defined(find_next_zero_bit) ||			\
 	!defined(find_next_bit_le) || !defined(find_next_zero_bit_le) ||	\
diff --git a/lib/fonts/Kconfig b/lib/fonts/Kconfig
index 37baa79cdd71..c035fde66aeb 100644
--- a/lib/fonts/Kconfig
+++ b/lib/fonts/Kconfig
@@ -119,6 +119,12 @@ config FONT_TER16x32
 	  This is the high resolution, large version for use with HiDPI screens.
 	  If the standard font is unreadable for you, say Y, otherwise say N.
 
+config FONT_6x8
+	bool "OLED 6x8 font" if FONTS
+	depends on FRAMEBUFFER_CONSOLE
+	help
+	  This font is useful for small displays (OLED).
+
 config FONT_AUTOSELECT
 	def_bool y
 	depends on !FONT_8x8
@@ -132,6 +138,7 @@ config FONT_AUTOSELECT
 	depends on !FONT_SUN12x22
 	depends on !FONT_10x18
 	depends on !FONT_TER16x32
+	depends on !FONT_6x8
 	select FONT_8x16
 
 endif # FONT_SUPPORT
diff --git a/lib/fonts/Makefile b/lib/fonts/Makefile
index ed95070860de..e16f68492174 100644
--- a/lib/fonts/Makefile
+++ b/lib/fonts/Makefile
@@ -15,6 +15,7 @@ font-objs-$(CONFIG_FONT_ACORN_8x8) += font_acorn_8x8.o
 font-objs-$(CONFIG_FONT_MINI_4x6)  += font_mini_4x6.o
 font-objs-$(CONFIG_FONT_6x10)      += font_6x10.o
 font-objs-$(CONFIG_FONT_TER16x32)  += font_ter16x32.o
+font-objs-$(CONFIG_FONT_6x8)       += font_6x8.o
 
 font-objs += $(font-objs-y)
 
diff --git a/lib/fonts/font_10x18.c b/lib/fonts/font_10x18.c
index 532f0ff89a96..5d940db626e7 100644
--- a/lib/fonts/font_10x18.c
+++ b/lib/fonts/font_10x18.c
@@ -8,8 +8,8 @@
 
 #define FONTDATAMAX 9216
 
-static const unsigned char fontdata_10x18[FONTDATAMAX] = {
-
+static const struct font_data fontdata_10x18 = {
+	{ 0, 0, FONTDATAMAX, 0 }, {
 	/* 0 0x00 '^@' */
 	0x00, 0x00, /* 0000000000 */
 	0x00, 0x00, /* 0000000000 */
@@ -5129,8 +5129,7 @@ static const unsigned char fontdata_10x18[FONTDATAMAX] = {
 	0x00, 0x00, /* 0000000000 */
 	0x00, 0x00, /* 0000000000 */
 	0x00, 0x00, /* 0000000000 */
-
-};
+} };
 
 
 const struct font_desc font_10x18 = {
@@ -5138,7 +5137,8 @@ const struct font_desc font_10x18 = {
 	.name	= "10x18",
 	.width	= 10,
 	.height	= 18,
-	.data	= fontdata_10x18,
+	.charcount = 256,
+	.data	= fontdata_10x18.data,
 #ifdef __sparc__
 	.pref	= 5,
 #else
diff --git a/lib/fonts/font_6x10.c b/lib/fonts/font_6x10.c
index 09b2cc03435b..e65df019e0d2 100644
--- a/lib/fonts/font_6x10.c
+++ b/lib/fonts/font_6x10.c
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/font.h>
 
-static const unsigned char fontdata_6x10[] = {
+#define FONTDATAMAX 2560
 
+static const struct font_data fontdata_6x10 = {
+	{ 0, 0, FONTDATAMAX, 0 }, {
 	/* 0 0x00 '^@' */
 	0x00, /* 00000000 */
 	0x00, /* 00000000 */
@@ -3074,14 +3076,14 @@ static const unsigned char fontdata_6x10[] = {
 	0x00, /* 00000000 */
 	0x00, /* 00000000 */
 	0x00, /* 00000000 */
-
-};
+} };
 
 const struct font_desc font_6x10 = {
 	.idx	= FONT6x10_IDX,
 	.name	= "6x10",
 	.width	= 6,
 	.height	= 10,
-	.data	= fontdata_6x10,
+	.charcount = 256,
+	.data	= fontdata_6x10.data,
 	.pref	= 0,
 };
diff --git a/lib/fonts/font_6x11.c b/lib/fonts/font_6x11.c
index d7136c33f1f0..bd76b3f6b635 100644
--- a/lib/fonts/font_6x11.c
+++ b/lib/fonts/font_6x11.c
@@ -9,8 +9,8 @@
 
 #define FONTDATAMAX (11*256)
 
-static const unsigned char fontdata_6x11[FONTDATAMAX] = {
-
+static const struct font_data fontdata_6x11 = {
+	{ 0, 0, FONTDATAMAX, 0 }, {
 	/* 0 0x00 '^@' */
 	0x00, /* 00000000 */
 	0x00, /* 00000000 */
@@ -3338,8 +3338,7 @@ static const unsigned char fontdata_6x11[FONTDATAMAX] = {
 	0x00, /* 00000000 */
 	0x00, /* 00000000 */
 	0x00, /* 00000000 */
-
-};
+} };
 
 
 const struct font_desc font_vga_6x11 = {
@@ -3347,7 +3346,8 @@ const struct font_desc font_vga_6x11 = {
 	.name	= "ProFont6x11",
 	.width	= 6,
 	.height	= 11,
-	.data	= fontdata_6x11,
+	.charcount = 256,
+	.data	= fontdata_6x11.data,
 	/* Try avoiding this font if possible unless on MAC */
 	.pref	= -2000,
 };
diff --git a/lib/fonts/font_6x8.c b/lib/fonts/font_6x8.c
new file mode 100644
index 000000000000..06ace7792521
--- /dev/null
+++ b/lib/fonts/font_6x8.c
@@ -0,0 +1,2577 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/font.h>
+
+#define FONTDATAMAX 2048
+
+static const struct font_data fontdata_6x8 = {
+	{ 0, 0, FONTDATAMAX, 0 }, {
+	/* 0 0x00 '^@' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 1 0x01 '^A' */
+	0x78, /* 011110 */
+	0x84, /* 100001 */
+	0xCC, /* 110011 */
+	0x84, /* 100001 */
+	0xCC, /* 110011 */
+	0xB4, /* 101101 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 2 0x02 '^B' */
+	0x78, /* 011110 */
+	0xFC, /* 111111 */
+	0xB4, /* 101101 */
+	0xFC, /* 111111 */
+	0xB4, /* 101101 */
+	0xCC, /* 110011 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 3 0x03 '^C' */
+	0x00, /* 000000 */
+	0x28, /* 001010 */
+	0x7C, /* 011111 */
+	0x7C, /* 011111 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 4 0x04 '^D' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x7C, /* 011111 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 5 0x05 '^E' */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x6C, /* 011011 */
+	0x6C, /* 011011 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 6 0x06 '^F' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x7C, /* 011111 */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 7 0x07 '^G' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x78, /* 011110 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 8 0x08 '^H' */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xCC, /* 110011 */
+	0x84, /* 100001 */
+	0xCC, /* 110011 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+
+	/* 9 0x09 '^I' */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x48, /* 010010 */
+	0x84, /* 100001 */
+	0x48, /* 010010 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 10 0x0A '^J' */
+	0xFC, /* 111111 */
+	0xCC, /* 110011 */
+	0xB4, /* 101101 */
+	0x78, /* 011110 */
+	0xB4, /* 101101 */
+	0xCC, /* 110011 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+
+	/* 11 0x0B '^K' */
+	0x3C, /* 001111 */
+	0x14, /* 000101 */
+	0x20, /* 001000 */
+	0x78, /* 011110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 12 0x0C '^L' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 13 0x0D '^M' */
+	0x18, /* 000110 */
+	0x14, /* 000101 */
+	0x14, /* 000101 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x70, /* 011100 */
+	0x60, /* 011000 */
+	0x00, /* 000000 */
+
+	/* 14 0x0E '^N' */
+	0x3C, /* 001111 */
+	0x24, /* 001001 */
+	0x3C, /* 001111 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x6C, /* 011011 */
+	0x6C, /* 011011 */
+	0x00, /* 000000 */
+
+	/* 15 0x0F '^O' */
+	0x10, /* 000100 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x6C, /* 011011 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 16 0x10 '^P' */
+	0x40, /* 010000 */
+	0x60, /* 011000 */
+	0x70, /* 011100 */
+	0x78, /* 011110 */
+	0x70, /* 011100 */
+	0x60, /* 011000 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+
+	/* 17 0x11 '^Q' */
+	0x04, /* 000001 */
+	0x0C, /* 000011 */
+	0x1C, /* 000111 */
+	0x3C, /* 001111 */
+	0x1C, /* 000111 */
+	0x0C, /* 000011 */
+	0x04, /* 000001 */
+	0x00, /* 000000 */
+
+	/* 18 0x12 '^R' */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x10, /* 000100 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 19 0x13 '^S' */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x00, /* 000000 */
+	0x48, /* 010010 */
+	0x00, /* 000000 */
+
+	/* 20 0x14 '^T' */
+	0x3C, /* 001111 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x3C, /* 001111 */
+	0x14, /* 000101 */
+	0x14, /* 000101 */
+	0x14, /* 000101 */
+	0x00, /* 000000 */
+
+	/* 21 0x15 '^U' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x30, /* 001100 */
+	0x28, /* 001010 */
+	0x14, /* 000101 */
+	0x0C, /* 000011 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+
+	/* 22 0x16 '^V' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xF8, /* 111110 */
+	0xF8, /* 111110 */
+	0xF8, /* 111110 */
+	0x00, /* 000000 */
+
+	/* 23 0x17 '^W' */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x10, /* 000100 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x7C, /* 011111 */
+
+	/* 24 0x18 '^X' */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 25 0x19 '^Y' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 26 0x1A '^Z' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x7C, /* 011111 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 27 0x1B '^[' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x7C, /* 011111 */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 28 0x1C '^\' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 29 0x1D '^]' */
+	0x00, /* 000000 */
+	0x48, /* 010010 */
+	0x84, /* 100001 */
+	0xFC, /* 111111 */
+	0x84, /* 100001 */
+	0x48, /* 010010 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 30 0x1E '^^' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x7C, /* 011111 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 31 0x1F '^_' */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x7C, /* 011111 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 32 0x20 ' ' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 33 0x21 '!' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 34 0x22 '"' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 35 0x23 '#' */
+	0x00, /* 000000 */
+	0x28, /* 001010 */
+	0x7C, /* 011111 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x7C, /* 011111 */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+
+	/* 36 0x24 '$' */
+	0x10, /* 000000 */
+	0x38, /* 001000 */
+	0x40, /* 010000 */
+	0x30, /* 001000 */
+	0x08, /* 000000 */
+	0x70, /* 011000 */
+	0x20, /* 001000 */
+	0x00, /* 000000 */
+
+	/* 37 0x25 '%' */
+	0x64, /* 011001 */
+	0x64, /* 011001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x4C, /* 010011 */
+	0x4C, /* 010011 */
+	0x00, /* 000000 */
+
+	/* 38 0x26 '&' */
+	0x30, /* 001100 */
+	0x48, /* 010010 */
+	0x50, /* 010100 */
+	0x20, /* 001000 */
+	0x54, /* 010101 */
+	0x48, /* 010010 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 39 0x27 ''' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 40 0x28 '(' */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x20, /* 001000 */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+
+	/* 41 0x29 ')' */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x00, /* 000000 */
+
+	/* 42 0x2A '*' */
+	0x10, /* 000100 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 43 0x2B '+' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 44 0x2C ',' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x30, /* 001100 */
+	0x20, /* 001000 */
+
+	/* 45 0x2D '-' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 46 0x2E '.' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x18, /* 000110 */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+
+	/* 47 0x2F '/' */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x20, /* 001000 */
+	0x40, /* 010000 */
+
+	/* 48 0x30 '0' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x54, /* 010101 */
+	0x64, /* 011001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 49 0x31 '1' */
+	0x10, /* 000100 */
+	0x30, /* 001100 */
+	0x50, /* 010100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 50 0x32 '2' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 51 0x33 '3' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x04, /* 000001 */
+	0x18, /* 000110 */
+	0x04, /* 000001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 52 0x34 '4' */
+	0x08, /* 000010 */
+	0x18, /* 000110 */
+	0x28, /* 001010 */
+	0x48, /* 010010 */
+	0x7C, /* 011111 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+
+	/* 53 0x35 '5' */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x04, /* 000001 */
+	0x04, /* 000001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 54 0x36 '6' */
+	0x18, /* 000110 */
+	0x20, /* 001000 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 55 0x37 '7' */
+	0x7C, /* 011111 */
+	0x04, /* 000001 */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 56 0x38 '8' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 57 0x39 '9' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+
+	/* 58 0x3A ':' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x18, /* 000110 */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x18, /* 000110 */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+
+	/* 59 0x3B ';' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x30, /* 001100 */
+	0x20, /* 001000 */
+
+	/* 60 0x3C '<' */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x04, /* 000001 */
+	0x00, /* 000000 */
+
+	/* 61 0x3D '=' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 62 0x3E '>' */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x00, /* 000000 */
+
+	/* 63 0x3F '?' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 64 0x40 '@' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x5C, /* 010111 */
+	0x54, /* 010101 */
+	0x5C, /* 010111 */
+	0x40, /* 010000 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 65 0x41 'A' */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 66 0x42 'B' */
+	0x78, /* 011110 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x38, /* 001110 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 67 0x43 'C' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 68 0x44 'D' */
+	0x78, /* 011110 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 69 0x45 'E' */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 70 0x46 'F' */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+
+	/* 71 0x47 'G' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x5C, /* 010111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 72 0x48 'H' */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 73 0x49 'I' */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 74 0x4A 'J' */
+	0x1C, /* 000111 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+
+	/* 75 0x4B 'K' */
+	0x44, /* 010001 */
+	0x48, /* 010010 */
+	0x50, /* 010100 */
+	0x60, /* 011000 */
+	0x50, /* 010100 */
+	0x48, /* 010010 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 76 0x4C 'L' */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 77 0x4D 'M' */
+	0x44, /* 010001 */
+	0x6C, /* 011011 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 78 0x4E 'N' */
+	0x44, /* 010001 */
+	0x64, /* 011001 */
+	0x54, /* 010101 */
+	0x4C, /* 010011 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 79 0x4F 'O' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 80 0x50 'P' */
+	0x78, /* 011110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x78, /* 011110 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+
+	/* 81 0x51 'Q' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x54, /* 010101 */
+	0x48, /* 010010 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 82 0x52 'R' */
+	0x78, /* 011110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x78, /* 011110 */
+	0x50, /* 010100 */
+	0x48, /* 010010 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 83 0x53 'S' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 84 0x54 'T' */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 85 0x55 'U' */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 86 0x56 'V' */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 87 0x57 'W' */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x6C, /* 011011 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 88 0x58 'X' */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 89 0x59 'Y' */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 90 0x5A 'Z' */
+	0x7C, /* 011111 */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x40, /* 010000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 91 0x5B '[' */
+	0x18, /* 000110 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+
+	/* 92 0x5C '\' */
+	0x40, /* 010000 */
+	0x20, /* 001000 */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x04, /* 000001 */
+
+	/* 93 0x5D ']' */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+
+	/* 94 0x5E '^' */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 95 0x5F '_' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+
+	/* 96 0x60 '`' */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 97 0x61 'a' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 98 0x62 'b' */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x58, /* 010110 */
+	0x64, /* 011001 */
+	0x44, /* 010001 */
+	0x64, /* 011001 */
+	0x58, /* 010110 */
+	0x00, /* 000000 */
+
+	/* 99 0x63 'c' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 100 0x64 'd' */
+	0x04, /* 000001 */
+	0x04, /* 000001 */
+	0x34, /* 001101 */
+	0x4C, /* 010011 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 101 0x65 'e' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 102 0x66 'f' */
+	0x0C, /* 000011 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 103 0x67 'g' */
+	0x00, /* 000000 */
+	0x34, /* 001101 */
+	0x4C, /* 010011 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x04, /* 000001 */
+	0x38, /* 001110 */
+
+	/* 104 0x68 'h' */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 105 0x69 'i' */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 106 0x6A 'j' */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x60, /* 011000 */
+
+	/* 107 0x6B 'k' */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x48, /* 010010 */
+	0x50, /* 010100 */
+	0x70, /* 011100 */
+	0x48, /* 010010 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 108 0x6C 'l' */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 109 0x6D 'm' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x68, /* 011010 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x00, /* 000000 */
+
+	/* 110 0x6E 'n' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x58, /* 010110 */
+	0x64, /* 011001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 111 0x6F 'o' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 112 0x70 'p' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x78, /* 011110 */
+	0x44, /* 010001 */
+	0x64, /* 011001 */
+	0x58, /* 010110 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+
+	/* 113 0x71 'q' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x04, /* 000001 */
+	0x04, /* 000001 */
+
+	/* 114 0x72 'r' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x58, /* 010110 */
+	0x64, /* 011001 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+
+	/* 115 0x73 's' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x3C, /* 001111 */
+	0x40, /* 010000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 116 0x74 't' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x0C, /* 000011 */
+	0x00, /* 000000 */
+
+	/* 117 0x75 'u' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 118 0x76 'v' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 119 0x77 'w' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+
+	/* 120 0x78 'x' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 121 0x79 'y' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x04, /* 000001 */
+	0x38, /* 001110 */
+
+	/* 122 0x7A 'z' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 123 0x7B '{' */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+
+	/* 124 0x7C '|' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 125 0x7D '}' */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x00, /* 000000 */
+
+	/* 126 0x7E '~' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x20, /* 001000 */
+	0x54, /* 010101 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 127 0x7F '' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 128 0x80 '\200' */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+
+	/* 129 0x81 '\201' */
+	0x00, /* 000000 */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 130 0x82 '\202' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 131 0x83 '\203' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 132 0x84 '\204' */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 133 0x85 '\205' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 134 0x86 '\206' */
+	0x3C, /* 001111 */
+	0x18, /* 000110 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 135 0x87 '\207' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+
+	/* 136 0x88 '\210' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 137 0x89 '\211' */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 138 0x8A '\212' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 139 0x8B '\213' */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 140 0x8C '\214' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 141 0x8D '\215' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 142 0x8E '\216' */
+	0x44, /* 010001 */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 143 0x8F '\217' */
+	0x30, /* 001100 */
+	0x48, /* 010010 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 144 0x90 '\220' */
+	0x10, /* 000100 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 145 0x91 '\221' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x78, /* 011110 */
+	0x14, /* 000101 */
+	0x7C, /* 011111 */
+	0x50, /* 010100 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 146 0x92 '\222' */
+	0x3C, /* 001111 */
+	0x50, /* 010100 */
+	0x50, /* 010100 */
+	0x78, /* 011110 */
+	0x50, /* 010100 */
+	0x50, /* 010100 */
+	0x5C, /* 010111 */
+	0x00, /* 000000 */
+
+	/* 147 0x93 '\223' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 148 0x94 '\224' */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 149 0x95 '\225' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 150 0x96 '\226' */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 151 0x97 '\227' */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 152 0x98 '\230' */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x04, /* 000001 */
+	0x38, /* 001110 */
+
+	/* 153 0x99 '\231' */
+	0x84, /* 100001 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 154 0x9A '\232' */
+	0x88, /* 100010 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 155 0x9B '\233' */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x50, /* 010100 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 156 0x9C '\234' */
+	0x30, /* 001100 */
+	0x48, /* 010010 */
+	0x40, /* 010000 */
+	0x70, /* 011100 */
+	0x40, /* 010000 */
+	0x44, /* 010001 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 157 0x9D '\235' */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 158 0x9E '\236' */
+	0x70, /* 011100 */
+	0x48, /* 010010 */
+	0x70, /* 011100 */
+	0x48, /* 010010 */
+	0x5C, /* 010111 */
+	0x48, /* 010010 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 159 0x9F '\237' */
+	0x0C, /* 000011 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x60, /* 011000 */
+	0x00, /* 000000 */
+
+	/* 160 0xA0 '\240' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 161 0xA1 '\241' */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 162 0xA2 '\242' */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 163 0xA3 '\243' */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 164 0xA4 '\244' */
+	0x34, /* 001101 */
+	0x58, /* 010110 */
+	0x00, /* 000000 */
+	0x58, /* 010110 */
+	0x64, /* 011001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 165 0xA5 '\245' */
+	0x58, /* 010110 */
+	0x44, /* 010001 */
+	0x64, /* 011001 */
+	0x54, /* 010101 */
+	0x4C, /* 010011 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 166 0xA6 '\246' */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 167 0xA7 '\247' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 168 0xA8 '\250' */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x40, /* 010000 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 169 0xA9 '\251' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 170 0xAA '\252' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x04, /* 000001 */
+	0x04, /* 000001 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 171 0xAB '\253' */
+	0x20, /* 001000 */
+	0x24, /* 001001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x08, /* 000010 */
+	0x1C, /* 000111 */
+
+	/* 172 0xAC '\254' */
+	0x20, /* 001000 */
+	0x24, /* 001001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x58, /* 010110 */
+	0x3C, /* 001111 */
+	0x08, /* 000010 */
+
+	/* 173 0xAD '\255' */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 174 0xAE '\256' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x24, /* 001001 */
+	0x48, /* 010010 */
+	0x90, /* 100100 */
+	0x48, /* 010010 */
+	0x24, /* 001001 */
+	0x00, /* 000000 */
+
+	/* 175 0xAF '\257' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x90, /* 100100 */
+	0x48, /* 010010 */
+	0x24, /* 001001 */
+	0x48, /* 010010 */
+	0x90, /* 100100 */
+	0x00, /* 000000 */
+
+	/* 176 0xB0 '\260' */
+	0x10, /* 000100 */
+	0x44, /* 010001 */
+	0x10, /* 000100 */
+	0x44, /* 010001 */
+	0x10, /* 000100 */
+	0x44, /* 010001 */
+	0x10, /* 000100 */
+	0x44, /* 010001 */
+
+	/* 177 0xB1 '\261' */
+	0xA8, /* 101010 */
+	0x54, /* 010101 */
+	0xA8, /* 101010 */
+	0x54, /* 010101 */
+	0xA8, /* 101010 */
+	0x54, /* 010101 */
+	0xA8, /* 101010 */
+	0x54, /* 010101 */
+
+	/* 178 0xB2 '\262' */
+	0xDC, /* 110111 */
+	0x74, /* 011101 */
+	0xDC, /* 110111 */
+	0x74, /* 011101 */
+	0xDC, /* 110111 */
+	0x74, /* 011101 */
+	0xDC, /* 110111 */
+	0x74, /* 011101 */
+
+	/* 179 0xB3 '\263' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 180 0xB4 '\264' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 181 0xB5 '\265' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 182 0xB6 '\266' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xE8, /* 111010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 183 0xB7 '\267' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xF8, /* 111110 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 184 0xB8 '\270' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 185 0xB9 '\271' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xE8, /* 111010 */
+	0x08, /* 000010 */
+	0xE8, /* 111010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 186 0xBA '\272' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 187 0xBB '\273' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xF8, /* 111110 */
+	0x08, /* 000010 */
+	0xE8, /* 111010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 188 0xBC '\274' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xE8, /* 111010 */
+	0x08, /* 000010 */
+	0xF8, /* 111110 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 189 0xBD '\275' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xF8, /* 111110 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 190 0xBE '\276' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 191 0xBF '\277' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 192 0xC0 '\300' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 193 0xC1 '\301' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 194 0xC2 '\302' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 195 0xC3 '\303' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 196 0xC4 '\304' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 197 0xC5 '\305' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xFC, /* 111111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 198 0xC6 '\306' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 199 0xC7 '\307' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x2C, /* 001011 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 200 0xC8 '\310' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x2C, /* 001011 */
+	0x20, /* 001000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 201 0xC9 '\311' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x3C, /* 001111 */
+	0x20, /* 001000 */
+	0x2C, /* 001011 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 202 0xCA '\312' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xEC, /* 111011 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 203 0xCB '\313' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0xEC, /* 111011 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 204 0xCC '\314' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x2C, /* 001011 */
+	0x20, /* 001000 */
+	0x2C, /* 001011 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 205 0xCD '\315' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 206 0xCE '\316' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xEC, /* 111011 */
+	0x00, /* 000000 */
+	0xEC, /* 111011 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 207 0xCF '\317' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 208 0xD0 '\320' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 209 0xD1 '\321' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 210 0xD2 '\322' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 211 0xD3 '\323' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 212 0xD4 '\324' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 213 0xD5 '\325' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 214 0xD6 '\326' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x3C, /* 001111 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 215 0xD7 '\327' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xFC, /* 111111 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 216 0xD8 '\330' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xFC, /* 111111 */
+	0x10, /* 000100 */
+	0xFC, /* 111111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 217 0xD9 '\331' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 218 0xDA '\332' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 219 0xDB '\333' */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+
+	/* 220 0xDC '\334' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+
+	/* 221 0xDD '\335' */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+
+	/* 222 0xDE '\336' */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+
+	/* 223 0xDF '\337' */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 224 0xE0 '\340' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x34, /* 001101 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 225 0xE1 '\341' */
+	0x24, /* 001001 */
+	0x44, /* 010001 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x58, /* 010110 */
+	0x40, /* 010000 */
+
+	/* 226 0xE2 '\342' */
+	0x7C, /* 011111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+
+	/* 227 0xE3 '\343' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+
+	/* 228 0xE4 '\344' */
+	0x7C, /* 011111 */
+	0x24, /* 001001 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x24, /* 001001 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 229 0xE5 '\345' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x3C, /* 001111 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+
+	/* 230 0xE6 '\346' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x74, /* 011101 */
+	0x40, /* 010000 */
+
+	/* 231 0xE7 '\347' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x0C, /* 000011 */
+	0x00, /* 000000 */
+
+	/* 232 0xE8 '\350' */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 233 0xE9 '\351' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 234 0xEA '\352' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x6C, /* 011011 */
+	0x00, /* 000000 */
+
+	/* 235 0xEB '\353' */
+	0x18, /* 000110 */
+	0x20, /* 001000 */
+	0x18, /* 000110 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+
+	/* 236 0xEC '\354' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 237 0xED '\355' */
+	0x00, /* 000000 */
+	0x04, /* 000001 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+
+	/* 238 0xEE '\356' */
+	0x3C, /* 001111 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x38, /* 001110 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 239 0xEF '\357' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 240 0xF0 '\360' */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 241 0xF1 '\361' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 242 0xF2 '\362' */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 243 0xF3 '\363' */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 244 0xF4 '\364' */
+	0x0C, /* 000011 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 245 0xF5 '\365' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x60, /* 011000 */
+
+	/* 246 0xF6 '\366' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 247 0xF7 '\367' */
+	0x00, /* 000000 */
+	0x20, /* 001000 */
+	0x54, /* 010101 */
+	0x08, /* 000010 */
+	0x20, /* 001000 */
+	0x54, /* 010101 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+
+	/* 248 0xF8 '\370' */
+	0x30, /* 001100 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 249 0xF9 '\371' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 250 0xFA '\372' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 251 0xFB '\373' */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x50, /* 010100 */
+	0x50, /* 010100 */
+	0x20, /* 001000 */
+	0x20, /* 001000 */
+	0x00, /* 000000 */
+
+	/* 252 0xFC '\374' */
+	0x60, /* 011000 */
+	0x50, /* 010100 */
+	0x50, /* 010100 */
+	0x50, /* 010100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 253 0xFD '\375' */
+	0x60, /* 011000 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x70, /* 011100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 254 0xFE '\376' */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 255 0xFF '\377' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+} };
+
+const struct font_desc font_6x8 = {
+	.idx	= FONT6x8_IDX,
+	.name	= "6x8",
+	.width	= 6,
+	.height	= 8,
+	.charcount = 256,
+	.data	= fontdata_6x8.data,
+	.pref	= 0,
+};
diff --git a/lib/fonts/font_7x14.c b/lib/fonts/font_7x14.c
index 89752d0b23e8..a2f561c9fa04 100644
--- a/lib/fonts/font_7x14.c
+++ b/lib/fonts/font_7x14.c
@@ -8,8 +8,8 @@
 
 #define FONTDATAMAX 3584
 
-static const unsigned char fontdata_7x14[FONTDATAMAX] = {
-
+static const struct font_data fontdata_7x14 = {
+	{ 0, 0, FONTDATAMAX, 0 }, {
 	/* 0 0x00 '^@' */
 	0x00, /* 0000000 */
 	0x00, /* 0000000 */
@@ -4105,8 +4105,7 @@ static const unsigned char fontdata_7x14[FONTDATAMAX] = {
 	0x00, /* 0000000 */
 	0x00, /* 0000000 */
 	0x00, /* 0000000 */
-
-};
+} };
 
 
 const struct font_desc font_7x14 = {
@@ -4114,6 +4113,7 @@ const struct font_desc font_7x14 = {
 	.name	= "7x14",
 	.width	= 7,
 	.height	= 14,
-	.data	= fontdata_7x14,
+	.charcount = 256,
+	.data	= fontdata_7x14.data,
 	.pref	= 0,
 };
diff --git a/lib/fonts/font_8x16.c b/lib/fonts/font_8x16.c
index b7ab1f5fbdb8..06ae14088514 100644
--- a/lib/fonts/font_8x16.c
+++ b/lib/fonts/font_8x16.c
@@ -10,8 +10,8 @@
 
 #define FONTDATAMAX 4096
 
-static const unsigned char fontdata_8x16[FONTDATAMAX] = {
-
+static const struct font_data fontdata_8x16 = {
+	{ 0, 0, FONTDATAMAX, 0 }, {
 	/* 0 0x00 '^@' */
 	0x00, /* 00000000 */
 	0x00, /* 00000000 */
@@ -4619,8 +4619,7 @@ static const unsigned char fontdata_8x16[FONTDATAMAX] = {
 	0x00, /* 00000000 */
 	0x00, /* 00000000 */
 	0x00, /* 00000000 */
-
-};
+} };
 
 
 const struct font_desc font_vga_8x16 = {
@@ -4628,7 +4627,8 @@ const struct font_desc font_vga_8x16 = {
 	.name	= "VGA8x16",
 	.width	= 8,
 	.height	= 16,
-	.data	= fontdata_8x16,
+	.charcount = 256,
+	.data	= fontdata_8x16.data,
 	.pref	= 0,
 };
 EXPORT_SYMBOL(font_vga_8x16);
diff --git a/lib/fonts/font_8x8.c b/lib/fonts/font_8x8.c
index 2328ebc8bab5..69570b8c31af 100644
--- a/lib/fonts/font_8x8.c
+++ b/lib/fonts/font_8x8.c
@@ -9,8 +9,8 @@
 
 #define FONTDATAMAX 2048
 
-static const unsigned char fontdata_8x8[FONTDATAMAX] = {
-
+static const struct font_data fontdata_8x8 = {
+	{ 0, 0, FONTDATAMAX, 0 }, {
 	/* 0 0x00 '^@' */
 	0x00, /* 00000000 */
 	0x00, /* 00000000 */
@@ -2570,8 +2570,7 @@ static const unsigned char fontdata_8x8[FONTDATAMAX] = {
 	0x00, /* 00000000 */
 	0x00, /* 00000000 */
 	0x00, /* 00000000 */
-
-};
+} };
 
 
 const struct font_desc font_vga_8x8 = {
@@ -2579,6 +2578,7 @@ const struct font_desc font_vga_8x8 = {
 	.name	= "VGA8x8",
 	.width	= 8,
 	.height	= 8,
-	.data	= fontdata_8x8,
+	.charcount = 256,
+	.data	= fontdata_8x8.data,
 	.pref	= 0,
 };
diff --git a/lib/fonts/font_acorn_8x8.c b/lib/fonts/font_acorn_8x8.c
index 0ff0e85d4481..18755c33d249 100644
--- a/lib/fonts/font_acorn_8x8.c
+++ b/lib/fonts/font_acorn_8x8.c
@@ -3,7 +3,10 @@
 
 #include <linux/font.h>
 
-static const unsigned char acorndata_8x8[] = {
+#define FONTDATAMAX 2048
+
+static const struct font_data acorndata_8x8 = {
+{ 0, 0, FONTDATAMAX, 0 }, {
 /* 00 */  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ^@ */
 /* 01 */  0x7e, 0x81, 0xa5, 0x81, 0xbd, 0x99, 0x81, 0x7e, /* ^A */
 /* 02 */  0x7e, 0xff, 0xbd, 0xff, 0xc3, 0xe7, 0xff, 0x7e, /* ^B */
@@ -260,14 +263,15 @@ static const unsigned char acorndata_8x8[] = {
 /* FD */  0x38, 0x04, 0x18, 0x20, 0x3c, 0x00, 0x00, 0x00,
 /* FE */  0x00, 0x00, 0x3c, 0x3c, 0x3c, 0x3c, 0x00, 0x00,
 /* FF */  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
+} };
 
 const struct font_desc font_acorn_8x8 = {
 	.idx	= ACORN8x8_IDX,
 	.name	= "Acorn8x8",
 	.width	= 8,
 	.height	= 8,
-	.data	= acorndata_8x8,
+	.charcount = 256,
+	.data	= acorndata_8x8.data,
 #ifdef CONFIG_ARCH_ACORN
 	.pref	= 20,
 #else
diff --git a/lib/fonts/font_mini_4x6.c b/lib/fonts/font_mini_4x6.c
index 838caa1cfef7..8d39fd447952 100644
--- a/lib/fonts/font_mini_4x6.c
+++ b/lib/fonts/font_mini_4x6.c
@@ -43,8 +43,8 @@ __END__;
 
 #define FONTDATAMAX 1536
 
-static const unsigned char fontdata_mini_4x6[FONTDATAMAX] = {
-
+static const struct font_data fontdata_mini_4x6 = {
+	{ 0, 0, FONTDATAMAX, 0 }, {
 	/*{*/
 	  	/*   Char 0: ' '  */
 	0xee,	/*=  [*** ]       */
@@ -2145,14 +2145,15 @@ static const unsigned char fontdata_mini_4x6[FONTDATAMAX] = {
 	0xee,	/*=   [*** ]        */
 	0x00,	/*=   [    ]        */
 	/*}*/
-};
+} };
 
 const struct font_desc font_mini_4x6 = {
 	.idx	= MINI4x6_IDX,
 	.name	= "MINI4x6",
 	.width	= 4,
 	.height	= 6,
-	.data	= fontdata_mini_4x6,
+	.charcount = 256,
+	.data	= fontdata_mini_4x6.data,
 	.pref	= 3,
 };
 
diff --git a/lib/fonts/font_pearl_8x8.c b/lib/fonts/font_pearl_8x8.c
index b15d3c342c5b..b1678ed0017c 100644
--- a/lib/fonts/font_pearl_8x8.c
+++ b/lib/fonts/font_pearl_8x8.c
@@ -14,8 +14,8 @@
 
 #define FONTDATAMAX 2048
 
-static const unsigned char fontdata_pearl8x8[FONTDATAMAX] = {
-
+static const struct font_data fontdata_pearl8x8 = {
+   { 0, 0, FONTDATAMAX, 0 }, {
    /* 0 0x00 '^@' */
    0x00, /* 00000000 */
    0x00, /* 00000000 */
@@ -2575,14 +2575,14 @@ static const unsigned char fontdata_pearl8x8[FONTDATAMAX] = {
    0x00, /* 00000000 */
    0x00, /* 00000000 */
    0x00, /* 00000000 */
-
-};
+} };
 
 const struct font_desc font_pearl_8x8 = {
 	.idx	= PEARL8x8_IDX,
 	.name	= "PEARL8x8",
 	.width	= 8,
 	.height	= 8,
-	.data	= fontdata_pearl8x8,
+	.charcount = 256,
+	.data	= fontdata_pearl8x8.data,
 	.pref	= 2,
 };
diff --git a/lib/fonts/font_sun12x22.c b/lib/fonts/font_sun12x22.c
index 955d6eee3959..91daf5ab8b6b 100644
--- a/lib/fonts/font_sun12x22.c
+++ b/lib/fonts/font_sun12x22.c
@@ -3,8 +3,8 @@
 
 #define FONTDATAMAX 11264
 
-static const unsigned char fontdata_sun12x22[FONTDATAMAX] = {
-
+static const struct font_data fontdata_sun12x22 = {
+	{ 0, 0, FONTDATAMAX, 0 }, {
 	/* 0 0x00 '^@' */
 	0x00, 0x00, /* 000000000000 */
 	0x00, 0x00, /* 000000000000 */
@@ -6148,8 +6148,7 @@ static const unsigned char fontdata_sun12x22[FONTDATAMAX] = {
 	0x00, 0x00, /* 000000000000 */
 	0x00, 0x00, /* 000000000000 */
 	0x00, 0x00, /* 000000000000 */
-
-};
+} };
 
 
 const struct font_desc font_sun_12x22 = {
@@ -6157,7 +6156,8 @@ const struct font_desc font_sun_12x22 = {
 	.name	= "SUN12x22",
 	.width	= 12,
 	.height	= 22,
-	.data	= fontdata_sun12x22,
+	.charcount = 256,
+	.data	= fontdata_sun12x22.data,
 #ifdef __sparc__
 	.pref	= 5,
 #else
diff --git a/lib/fonts/font_sun8x16.c b/lib/fonts/font_sun8x16.c
index 03d71e53954a..81bb4eeae04e 100644
--- a/lib/fonts/font_sun8x16.c
+++ b/lib/fonts/font_sun8x16.c
@@ -3,7 +3,8 @@
 
 #define FONTDATAMAX 4096
 
-static const unsigned char fontdata_sun8x16[FONTDATAMAX] = {
+static const struct font_data fontdata_sun8x16 = {
+{ 0, 0, FONTDATAMAX, 0 }, {
 /* */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
 /* */ 0x00,0x00,0x7e,0x81,0xa5,0x81,0x81,0xbd,0x99,0x81,0x81,0x7e,0x00,0x00,0x00,0x00,
 /* */ 0x00,0x00,0x7e,0xff,0xdb,0xff,0xff,0xc3,0xe7,0xff,0xff,0x7e,0x00,0x00,0x00,0x00,
@@ -260,14 +261,15 @@ static const unsigned char fontdata_sun8x16[FONTDATAMAX] = {
 /* */ 0x00,0x70,0xd8,0x30,0x60,0xc8,0xf8,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
 /* */ 0x00,0x00,0x00,0x00,0x7c,0x7c,0x7c,0x7c,0x7c,0x7c,0x7c,0x00,0x00,0x00,0x00,0x00,
 /* */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-};
+} };
 
 const struct font_desc font_sun_8x16 = {
 	.idx	= SUN8x16_IDX,
 	.name	= "SUN8x16",
 	.width	= 8,
 	.height	= 16,
-	.data	= fontdata_sun8x16,
+	.charcount = 256,
+	.data	= fontdata_sun8x16.data,
 #ifdef __sparc__
 	.pref	= 10,
 #else
diff --git a/lib/fonts/font_ter16x32.c b/lib/fonts/font_ter16x32.c
index 3f0cf1ccdf3a..1955d624177c 100644
--- a/lib/fonts/font_ter16x32.c
+++ b/lib/fonts/font_ter16x32.c
@@ -4,8 +4,8 @@
 
 #define FONTDATAMAX 16384
 
-static const unsigned char fontdata_ter16x32[FONTDATAMAX] = {
-
+static const struct font_data fontdata_ter16x32 = {
+	{ 0, 0, FONTDATAMAX, 0 }, {
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x7f, 0xfc, 0x7f, 0xfc,
 	0x70, 0x1c, 0x70, 0x1c, 0x70, 0x1c, 0x70, 0x1c,
@@ -2054,8 +2054,7 @@ static const unsigned char fontdata_ter16x32[FONTDATAMAX] = {
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 255 */
-
-};
+} };
 
 
 const struct font_desc font_ter_16x32 = {
@@ -2063,7 +2062,8 @@ const struct font_desc font_ter_16x32 = {
 	.name	= "TER16x32",
 	.width	= 16,
 	.height = 32,
-	.data	= fontdata_ter16x32,
+	.charcount = 256,
+	.data	= fontdata_ter16x32.data,
 #ifdef __sparc__
 	.pref	= 5,
 #else
diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
index e7258d8c252b..5f4b07b56cd9 100644
--- a/lib/fonts/fonts.c
+++ b/lib/fonts/fonts.c
@@ -57,6 +57,9 @@ static const struct font_desc *fonts[] = {
 #ifdef CONFIG_FONT_TER16x32
 	&font_ter_16x32,
 #endif
+#ifdef CONFIG_FONT_6x8
+	&font_6x8,
+#endif
 };
 
 #define num_fonts ARRAY_SIZE(fonts)
diff --git a/lib/glob.c b/lib/glob.c
index 0ba3ea86b546..85ecbda45cd8 100644
--- a/lib/glob.c
+++ b/lib/glob.c
@@ -102,7 +102,7 @@ bool __pure glob_match(char const *pat, char const *str)
 			break;
 		case '\\':
 			d = *pat++;
-			/*FALLTHROUGH*/
+			fallthrough;
 		default:	/* Literal character */
 literal:
 			if (c == d) {
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 147133f8eb2f..9301578f98e8 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -7,6 +7,7 @@
 #include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/minmax.h>
 #include <linux/export.h>
 #include <asm/unaligned.h>
 
diff --git a/lib/idr.c b/lib/idr.c
index c2cf2c52bbde..f4ab4f4aa3c7 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -372,7 +372,8 @@ EXPORT_SYMBOL(idr_replace);
  * Allocate an ID between @min and @max, inclusive.  The allocated ID will
  * not exceed %INT_MAX, even if @max is larger.
  *
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
  * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
  * or %-ENOSPC if there are no free IDs.
  */
@@ -470,6 +471,7 @@ alloc:
 	goto retry;
 nospc:
 	xas_unlock_irqrestore(&xas, flags);
+	kfree(alloc);
 	return -ENOSPC;
 }
 EXPORT_SYMBOL(ida_alloc_range);
@@ -479,7 +481,8 @@ EXPORT_SYMBOL(ida_alloc_range);
  * @ida: IDA handle.
  * @id: Previously allocated ID.
  *
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
  */
 void ida_free(struct ida *ida, unsigned int id)
 {
@@ -531,7 +534,8 @@ EXPORT_SYMBOL(ida_free);
  * or freed.  If the IDA is already empty, there is no need to call this
  * function.
  *
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
  */
 void ida_destroy(struct ida *ida)
 {
diff --git a/lib/iomap.c b/lib/iomap.c
index e909ab71e995..fbaa3e8f19d6 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -70,27 +70,27 @@ static void bad_io_access(unsigned long port, const char *access)
 #define mmio_read64be(addr) swab64(readq(addr))
 #endif
 
-unsigned int ioread8(void __iomem *addr)
+unsigned int ioread8(const void __iomem *addr)
 {
 	IO_COND(addr, return inb(port), return readb(addr));
 	return 0xff;
 }
-unsigned int ioread16(void __iomem *addr)
+unsigned int ioread16(const void __iomem *addr)
 {
 	IO_COND(addr, return inw(port), return readw(addr));
 	return 0xffff;
 }
-unsigned int ioread16be(void __iomem *addr)
+unsigned int ioread16be(const void __iomem *addr)
 {
 	IO_COND(addr, return pio_read16be(port), return mmio_read16be(addr));
 	return 0xffff;
 }
-unsigned int ioread32(void __iomem *addr)
+unsigned int ioread32(const void __iomem *addr)
 {
 	IO_COND(addr, return inl(port), return readl(addr));
 	return 0xffffffff;
 }
-unsigned int ioread32be(void __iomem *addr)
+unsigned int ioread32be(const void __iomem *addr)
 {
 	IO_COND(addr, return pio_read32be(port), return mmio_read32be(addr));
 	return 0xffffffff;
@@ -142,26 +142,26 @@ static u64 pio_read64be_hi_lo(unsigned long port)
 	return lo | (hi << 32);
 }
 
-u64 ioread64_lo_hi(void __iomem *addr)
+u64 ioread64_lo_hi(const void __iomem *addr)
 {
 	IO_COND(addr, return pio_read64_lo_hi(port), return readq(addr));
 	return 0xffffffffffffffffULL;
 }
 
-u64 ioread64_hi_lo(void __iomem *addr)
+u64 ioread64_hi_lo(const void __iomem *addr)
 {
 	IO_COND(addr, return pio_read64_hi_lo(port), return readq(addr));
 	return 0xffffffffffffffffULL;
 }
 
-u64 ioread64be_lo_hi(void __iomem *addr)
+u64 ioread64be_lo_hi(const void __iomem *addr)
 {
 	IO_COND(addr, return pio_read64be_lo_hi(port),
 		return mmio_read64be(addr));
 	return 0xffffffffffffffffULL;
 }
 
-u64 ioread64be_hi_lo(void __iomem *addr)
+u64 ioread64be_hi_lo(const void __iomem *addr)
 {
 	IO_COND(addr, return pio_read64be_hi_lo(port),
 		return mmio_read64be(addr));
@@ -275,7 +275,7 @@ EXPORT_SYMBOL(iowrite64be_hi_lo);
  * order" (we also don't have IO barriers).
  */
 #ifndef mmio_insb
-static inline void mmio_insb(void __iomem *addr, u8 *dst, int count)
+static inline void mmio_insb(const void __iomem *addr, u8 *dst, int count)
 {
 	while (--count >= 0) {
 		u8 data = __raw_readb(addr);
@@ -283,7 +283,7 @@ static inline void mmio_insb(void __iomem *addr, u8 *dst, int count)
 		dst++;
 	}
 }
-static inline void mmio_insw(void __iomem *addr, u16 *dst, int count)
+static inline void mmio_insw(const void __iomem *addr, u16 *dst, int count)
 {
 	while (--count >= 0) {
 		u16 data = __raw_readw(addr);
@@ -291,7 +291,7 @@ static inline void mmio_insw(void __iomem *addr, u16 *dst, int count)
 		dst++;
 	}
 }
-static inline void mmio_insl(void __iomem *addr, u32 *dst, int count)
+static inline void mmio_insl(const void __iomem *addr, u32 *dst, int count)
 {
 	while (--count >= 0) {
 		u32 data = __raw_readl(addr);
@@ -325,15 +325,15 @@ static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count)
 }
 #endif
 
-void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count)
 {
 	IO_COND(addr, insb(port,dst,count), mmio_insb(addr, dst, count));
 }
-void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count)
 {
 	IO_COND(addr, insw(port,dst,count), mmio_insw(addr, dst, count));
 }
-void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count)
 {
 	IO_COND(addr, insl(port,dst,count), mmio_insl(addr, dst, count));
 }
diff --git a/lib/ioremap.c b/lib/ioremap.c
deleted file mode 100644
index 5ee3526f71b8..000000000000
--- a/lib/ioremap.c
+++ /dev/null
@@ -1,287 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- */
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/io.h>
-#include <linux/export.h>
-#include <asm/cacheflush.h>
-
-#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-static int __read_mostly ioremap_p4d_capable;
-static int __read_mostly ioremap_pud_capable;
-static int __read_mostly ioremap_pmd_capable;
-static int __read_mostly ioremap_huge_disabled;
-
-static int __init set_nohugeiomap(char *str)
-{
-	ioremap_huge_disabled = 1;
-	return 0;
-}
-early_param("nohugeiomap", set_nohugeiomap);
-
-void __init ioremap_huge_init(void)
-{
-	if (!ioremap_huge_disabled) {
-		if (arch_ioremap_p4d_supported())
-			ioremap_p4d_capable = 1;
-		if (arch_ioremap_pud_supported())
-			ioremap_pud_capable = 1;
-		if (arch_ioremap_pmd_supported())
-			ioremap_pmd_capable = 1;
-	}
-}
-
-static inline int ioremap_p4d_enabled(void)
-{
-	return ioremap_p4d_capable;
-}
-
-static inline int ioremap_pud_enabled(void)
-{
-	return ioremap_pud_capable;
-}
-
-static inline int ioremap_pmd_enabled(void)
-{
-	return ioremap_pmd_capable;
-}
-
-#else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
-static inline int ioremap_p4d_enabled(void) { return 0; }
-static inline int ioremap_pud_enabled(void) { return 0; }
-static inline int ioremap_pmd_enabled(void) { return 0; }
-#endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
-
-static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
-		pgtbl_mod_mask *mask)
-{
-	pte_t *pte;
-	u64 pfn;
-
-	pfn = phys_addr >> PAGE_SHIFT;
-	pte = pte_alloc_kernel_track(pmd, addr, mask);
-	if (!pte)
-		return -ENOMEM;
-	do {
-		BUG_ON(!pte_none(*pte));
-		set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
-		pfn++;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-	*mask |= PGTBL_PTE_MODIFIED;
-	return 0;
-}
-
-static int ioremap_try_huge_pmd(pmd_t *pmd, unsigned long addr,
-				unsigned long end, phys_addr_t phys_addr,
-				pgprot_t prot)
-{
-	if (!ioremap_pmd_enabled())
-		return 0;
-
-	if ((end - addr) != PMD_SIZE)
-		return 0;
-
-	if (!IS_ALIGNED(addr, PMD_SIZE))
-		return 0;
-
-	if (!IS_ALIGNED(phys_addr, PMD_SIZE))
-		return 0;
-
-	if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr))
-		return 0;
-
-	return pmd_set_huge(pmd, phys_addr, prot);
-}
-
-static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
-		pgtbl_mod_mask *mask)
-{
-	pmd_t *pmd;
-	unsigned long next;
-
-	pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
-	if (!pmd)
-		return -ENOMEM;
-	do {
-		next = pmd_addr_end(addr, end);
-
-		if (ioremap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) {
-			*mask |= PGTBL_PMD_MODIFIED;
-			continue;
-		}
-
-		if (ioremap_pte_range(pmd, addr, next, phys_addr, prot, mask))
-			return -ENOMEM;
-	} while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
-	return 0;
-}
-
-static int ioremap_try_huge_pud(pud_t *pud, unsigned long addr,
-				unsigned long end, phys_addr_t phys_addr,
-				pgprot_t prot)
-{
-	if (!ioremap_pud_enabled())
-		return 0;
-
-	if ((end - addr) != PUD_SIZE)
-		return 0;
-
-	if (!IS_ALIGNED(addr, PUD_SIZE))
-		return 0;
-
-	if (!IS_ALIGNED(phys_addr, PUD_SIZE))
-		return 0;
-
-	if (pud_present(*pud) && !pud_free_pmd_page(pud, addr))
-		return 0;
-
-	return pud_set_huge(pud, phys_addr, prot);
-}
-
-static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
-		pgtbl_mod_mask *mask)
-{
-	pud_t *pud;
-	unsigned long next;
-
-	pud = pud_alloc_track(&init_mm, p4d, addr, mask);
-	if (!pud)
-		return -ENOMEM;
-	do {
-		next = pud_addr_end(addr, end);
-
-		if (ioremap_try_huge_pud(pud, addr, next, phys_addr, prot)) {
-			*mask |= PGTBL_PUD_MODIFIED;
-			continue;
-		}
-
-		if (ioremap_pmd_range(pud, addr, next, phys_addr, prot, mask))
-			return -ENOMEM;
-	} while (pud++, phys_addr += (next - addr), addr = next, addr != end);
-	return 0;
-}
-
-static int ioremap_try_huge_p4d(p4d_t *p4d, unsigned long addr,
-				unsigned long end, phys_addr_t phys_addr,
-				pgprot_t prot)
-{
-	if (!ioremap_p4d_enabled())
-		return 0;
-
-	if ((end - addr) != P4D_SIZE)
-		return 0;
-
-	if (!IS_ALIGNED(addr, P4D_SIZE))
-		return 0;
-
-	if (!IS_ALIGNED(phys_addr, P4D_SIZE))
-		return 0;
-
-	if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr))
-		return 0;
-
-	return p4d_set_huge(p4d, phys_addr, prot);
-}
-
-static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr,
-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
-		pgtbl_mod_mask *mask)
-{
-	p4d_t *p4d;
-	unsigned long next;
-
-	p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
-	if (!p4d)
-		return -ENOMEM;
-	do {
-		next = p4d_addr_end(addr, end);
-
-		if (ioremap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) {
-			*mask |= PGTBL_P4D_MODIFIED;
-			continue;
-		}
-
-		if (ioremap_pud_range(p4d, addr, next, phys_addr, prot, mask))
-			return -ENOMEM;
-	} while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
-	return 0;
-}
-
-int ioremap_page_range(unsigned long addr,
-		       unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
-{
-	pgd_t *pgd;
-	unsigned long start;
-	unsigned long next;
-	int err;
-	pgtbl_mod_mask mask = 0;
-
-	might_sleep();
-	BUG_ON(addr >= end);
-
-	start = addr;
-	pgd = pgd_offset_k(addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		err = ioremap_p4d_range(pgd, addr, next, phys_addr, prot,
-					&mask);
-		if (err)
-			break;
-	} while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
-
-	flush_cache_vmap(start, end);
-
-	if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
-		arch_sync_kernel_mappings(start, end);
-
-	return err;
-}
-
-#ifdef CONFIG_GENERIC_IOREMAP
-void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
-{
-	unsigned long offset, vaddr;
-	phys_addr_t last_addr;
-	struct vm_struct *area;
-
-	/* Disallow wrap-around or zero size */
-	last_addr = addr + size - 1;
-	if (!size || last_addr < addr)
-		return NULL;
-
-	/* Page-align mappings */
-	offset = addr & (~PAGE_MASK);
-	addr -= offset;
-	size = PAGE_ALIGN(size + offset);
-
-	area = get_vm_area_caller(size, VM_IOREMAP,
-			__builtin_return_address(0));
-	if (!area)
-		return NULL;
-	vaddr = (unsigned long)area->addr;
-
-	if (ioremap_page_range(vaddr, vaddr + size, addr, __pgprot(prot))) {
-		free_vm_area(area);
-		return NULL;
-	}
-
-	return (void __iomem *)(vaddr + offset);
-}
-EXPORT_SYMBOL(ioremap_prot);
-
-void iounmap(volatile void __iomem *addr)
-{
-	vunmap((void *)((unsigned long)addr & PAGE_MASK));
-}
-EXPORT_SYMBOL(iounmap);
-#endif /* CONFIG_GENERIC_IOREMAP */
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index bf538c2bec77..1635111c5bd2 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1,11 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0-only
+#include <crypto/hash.h>
 #include <linux/export.h>
 #include <linux/bvec.h>
+#include <linux/fault-inject-usercopy.h>
 #include <linux/uio.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/splice.h>
+#include <linux/compat.h>
 #include <net/checksum.h>
 #include <linux/scatterlist.h>
 #include <linux/instrumented.h>
@@ -138,6 +141,8 @@
 
 static int copyout(void __user *to, const void *from, size_t n)
 {
+	if (should_fail_usercopy())
+		return n;
 	if (access_ok(to, n)) {
 		instrument_copy_to_user(to, from, n);
 		n = raw_copy_to_user(to, from, n);
@@ -147,6 +152,8 @@ static int copyout(void __user *to, const void *from, size_t n)
 
 static int copyin(void *to, const void __user *from, size_t n)
 {
+	if (should_fail_usercopy())
+		return n;
 	if (access_ok(from, n)) {
 		instrument_copy_from_user(to, from, n);
 		n = raw_copy_from_user(to, from, n);
@@ -580,7 +587,7 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
 			      __wsum sum, size_t off)
 {
-	__wsum next = csum_partial_copy_nocheck(from, to, len, 0);
+	__wsum next = csum_partial_copy_nocheck(from, to, len);
 	return csum_block_add(sum, next, off);
 }
 
@@ -636,30 +643,30 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 }
 EXPORT_SYMBOL(_copy_to_iter);
 
-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
-static int copyout_mcsafe(void __user *to, const void *from, size_t n)
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+static int copyout_mc(void __user *to, const void *from, size_t n)
 {
 	if (access_ok(to, n)) {
 		instrument_copy_to_user(to, from, n);
-		n = copy_to_user_mcsafe((__force void *) to, from, n);
+		n = copy_mc_to_user((__force void *) to, from, n);
 	}
 	return n;
 }
 
-static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
+static unsigned long copy_mc_to_page(struct page *page, size_t offset,
 		const char *from, size_t len)
 {
 	unsigned long ret;
 	char *to;
 
 	to = kmap_atomic(page);
-	ret = memcpy_mcsafe(to + offset, from, len);
+	ret = copy_mc_to_kernel(to + offset, from, len);
 	kunmap_atomic(to);
 
 	return ret;
 }
 
-static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
+static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
 				struct iov_iter *i)
 {
 	struct pipe_inode_info *pipe = i->pipe;
@@ -677,7 +684,7 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 		unsigned long rem;
 
-		rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
+		rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
 					    off, addr, chunk);
 		i->head = i_head;
 		i->iov_offset = off + chunk - rem;
@@ -694,18 +701,17 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
 }
 
 /**
- * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
+ * _copy_mc_to_iter - copy to iter with source memory error exception handling
  * @addr: source kernel address
  * @bytes: total transfer length
  * @iter: destination iterator
  *
- * The pmem driver arranges for filesystem-dax to use this facility via
- * dax_copy_to_iter() for protecting read/write to persistent memory.
- * Unless / until an architecture can guarantee identical performance
- * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
- * performance regression to switch more users to the mcsafe version.
+ * The pmem driver deploys this for the dax operation
+ * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
+ * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
+ * successfully copied.
  *
- * Otherwise, the main differences between this and typical _copy_to_iter().
+ * The main differences between this and typical _copy_to_iter().
  *
  * * Typical tail/residue handling after a fault retries the copy
  *   byte-by-byte until the fault happens again. Re-triggering machine
@@ -716,23 +722,22 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
  *   a short copy.
- *
- * See MCSAFE_TEST for self-test.
  */
-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
+size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	const char *from = addr;
 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
 
 	if (unlikely(iov_iter_is_pipe(i)))
-		return copy_pipe_to_iter_mcsafe(addr, bytes, i);
+		return copy_mc_pipe_to_iter(addr, bytes, i);
 	if (iter_is_iovec(i))
 		might_fault();
 	iterate_and_advance(i, bytes, v,
-		copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
+		copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
+			   v.iov_len),
 		({
-		rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
-                               (from += v.bv_len) - v.bv_len, v.bv_len);
+		rem = copy_mc_to_page(v.bv_page, v.bv_offset,
+				      (from += v.bv_len) - v.bv_len, v.bv_len);
 		if (rem) {
 			curr_addr = (unsigned long) from;
 			bytes = curr_addr - s_addr - rem;
@@ -740,8 +745,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 		}
 		}),
 		({
-		rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
-				v.iov_len);
+		rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
+					- v.iov_len, v.iov_len);
 		if (rem) {
 			curr_addr = (unsigned long) from;
 			bytes = curr_addr - s_addr - rem;
@@ -752,8 +757,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 
 	return bytes;
 }
-EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
-#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
+EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
+#endif /* CONFIG_ARCH_HAS_COPY_MC */
 
 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
@@ -1448,15 +1453,14 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 		return 0;
 	}
 	iterate_and_advance(i, bytes, v, ({
-		int err = 0;
 		next = csum_and_copy_from_user(v.iov_base,
 					       (to += v.iov_len) - v.iov_len,
-					       v.iov_len, 0, &err);
-		if (!err) {
+					       v.iov_len);
+		if (next) {
 			sum = csum_block_add(sum, next, off);
 			off += v.iov_len;
 		}
-		err ? v.iov_len : 0;
+		next ? 0 : v.iov_len;
 	}), ({
 		char *p = kmap_atomic(v.bv_page);
 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
@@ -1490,11 +1494,10 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
 	if (unlikely(i->count < bytes))
 		return false;
 	iterate_all_kinds(i, bytes, v, ({
-		int err = 0;
 		next = csum_and_copy_from_user(v.iov_base,
 					       (to += v.iov_len) - v.iov_len,
-					       v.iov_len, 0, &err);
-		if (err)
+					       v.iov_len);
+		if (!next)
 			return false;
 		sum = csum_block_add(sum, next, off);
 		off += v.iov_len;
@@ -1536,15 +1539,14 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
 		return 0;
 	}
 	iterate_and_advance(i, bytes, v, ({
-		int err = 0;
 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
 					     v.iov_base,
-					     v.iov_len, 0, &err);
-		if (!err) {
+					     v.iov_len);
+		if (next) {
 			sum = csum_block_add(sum, next, off);
 			off += v.iov_len;
 		}
-		err ? v.iov_len : 0;
+		next ? 0 : v.iov_len;
 	}), ({
 		char *p = kmap_atomic(v.bv_page);
 		sum = csum_and_memcpy(p + v.bv_offset,
@@ -1567,7 +1569,7 @@ EXPORT_SYMBOL(csum_and_copy_to_iter);
 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
 		struct iov_iter *i)
 {
-#ifdef CONFIG_CRYPTO
+#ifdef CONFIG_CRYPTO_HASH
 	struct ahash_request *hash = hashp;
 	struct scatterlist sg;
 	size_t copied;
@@ -1649,16 +1651,145 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 }
 EXPORT_SYMBOL(dup_iter);
 
+static int copy_compat_iovec_from_user(struct iovec *iov,
+		const struct iovec __user *uvec, unsigned long nr_segs)
+{
+	const struct compat_iovec __user *uiov =
+		(const struct compat_iovec __user *)uvec;
+	int ret = -EFAULT, i;
+
+	if (!user_access_begin(uvec, nr_segs * sizeof(*uvec)))
+		return -EFAULT;
+
+	for (i = 0; i < nr_segs; i++) {
+		compat_uptr_t buf;
+		compat_ssize_t len;
+
+		unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
+		unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
+
+		/* check for compat_size_t not fitting in compat_ssize_t .. */
+		if (len < 0) {
+			ret = -EINVAL;
+			goto uaccess_end;
+		}
+		iov[i].iov_base = compat_ptr(buf);
+		iov[i].iov_len = len;
+	}
+
+	ret = 0;
+uaccess_end:
+	user_access_end();
+	return ret;
+}
+
+static int copy_iovec_from_user(struct iovec *iov,
+		const struct iovec __user *uvec, unsigned long nr_segs)
+{
+	unsigned long seg;
+
+	if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
+		return -EFAULT;
+	for (seg = 0; seg < nr_segs; seg++) {
+		if ((ssize_t)iov[seg].iov_len < 0)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+struct iovec *iovec_from_user(const struct iovec __user *uvec,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_iov, bool compat)
+{
+	struct iovec *iov = fast_iov;
+	int ret;
+
+	/*
+	 * SuS says "The readv() function *may* fail if the iovcnt argument was
+	 * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+	 * traditionally returned zero for zero segments, so...
+	 */
+	if (nr_segs == 0)
+		return iov;
+	if (nr_segs > UIO_MAXIOV)
+		return ERR_PTR(-EINVAL);
+	if (nr_segs > fast_segs) {
+		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
+		if (!iov)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	if (compat)
+		ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
+	else
+		ret = copy_iovec_from_user(iov, uvec, nr_segs);
+	if (ret) {
+		if (iov != fast_iov)
+			kfree(iov);
+		return ERR_PTR(ret);
+	}
+
+	return iov;
+}
+
+ssize_t __import_iovec(int type, const struct iovec __user *uvec,
+		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		 struct iov_iter *i, bool compat)
+{
+	ssize_t total_len = 0;
+	unsigned long seg;
+	struct iovec *iov;
+
+	iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
+	if (IS_ERR(iov)) {
+		*iovp = NULL;
+		return PTR_ERR(iov);
+	}
+
+	/*
+	 * According to the Single Unix Specification we should return EINVAL if
+	 * an element length is < 0 when cast to ssize_t or if the total length
+	 * would overflow the ssize_t return value of the system call.
+	 *
+	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
+	 * overflow case.
+	 */
+	for (seg = 0; seg < nr_segs; seg++) {
+		ssize_t len = (ssize_t)iov[seg].iov_len;
+
+		if (!access_ok(iov[seg].iov_base, len)) {
+			if (iov != *iovp)
+				kfree(iov);
+			*iovp = NULL;
+			return -EFAULT;
+		}
+
+		if (len > MAX_RW_COUNT - total_len) {
+			len = MAX_RW_COUNT - total_len;
+			iov[seg].iov_len = len;
+		}
+		total_len += len;
+	}
+
+	iov_iter_init(i, type, iov, nr_segs, total_len);
+	if (iov == *iovp)
+		*iovp = NULL;
+	else
+		*iovp = iov;
+	return total_len;
+}
+
 /**
  * import_iovec() - Copy an array of &struct iovec from userspace
  *     into the kernel, check that it is valid, and initialize a new
  *     &struct iov_iter iterator to access it.
  *
  * @type: One of %READ or %WRITE.
- * @uvector: Pointer to the userspace array.
+ * @uvec: Pointer to the userspace array.
  * @nr_segs: Number of elements in userspace array.
  * @fast_segs: Number of elements in @iov.
- * @iov: (input and output parameter) Pointer to pointer to (usually small
+ * @iovp: (input and output parameter) Pointer to pointer to (usually small
  *     on-stack) kernel array.
  * @i: Pointer to iterator that will be initialized on success.
  *
@@ -1671,51 +1802,15 @@ EXPORT_SYMBOL(dup_iter);
  *
  * Return: Negative error code on error, bytes imported on success
  */
-ssize_t import_iovec(int type, const struct iovec __user * uvector,
+ssize_t import_iovec(int type, const struct iovec __user *uvec,
 		 unsigned nr_segs, unsigned fast_segs,
-		 struct iovec **iov, struct iov_iter *i)
+		 struct iovec **iovp, struct iov_iter *i)
 {
-	ssize_t n;
-	struct iovec *p;
-	n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
-				  *iov, &p);
-	if (n < 0) {
-		if (p != *iov)
-			kfree(p);
-		*iov = NULL;
-		return n;
-	}
-	iov_iter_init(i, type, p, nr_segs, n);
-	*iov = p == *iov ? NULL : p;
-	return n;
+	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
+			      in_compat_syscall());
 }
 EXPORT_SYMBOL(import_iovec);
 
-#ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-
-ssize_t compat_import_iovec(int type,
-		const struct compat_iovec __user * uvector,
-		unsigned nr_segs, unsigned fast_segs,
-		struct iovec **iov, struct iov_iter *i)
-{
-	ssize_t n;
-	struct iovec *p;
-	n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
-				  *iov, &p);
-	if (n < 0) {
-		if (p != *iov)
-			kfree(p);
-		*iov = NULL;
-		return n;
-	}
-	iov_iter_init(i, type, p, nr_segs, n);
-	*iov = p == *iov ? NULL : p;
-	return n;
-}
-EXPORT_SYMBOL(compat_import_iovec);
-#endif
-
 int import_single_range(int rw, void __user *buf, size_t len,
 		 struct iovec *iov, struct iov_iter *i)
 {
diff --git a/lib/kobject.c b/lib/kobject.c
index 1e4b7382a88e..ea53b30cf483 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -599,21 +599,11 @@ out:
 }
 EXPORT_SYMBOL_GPL(kobject_move);
 
-/**
- * kobject_del() - Unlink kobject from hierarchy.
- * @kobj: object.
- *
- * This is the function that should be called to delete an object
- * successfully added via kobject_add().
- */
-void kobject_del(struct kobject *kobj)
+static void __kobject_del(struct kobject *kobj)
 {
 	struct kernfs_node *sd;
 	const struct kobj_type *ktype;
 
-	if (!kobj)
-		return;
-
 	sd = kobj->sd;
 	ktype = get_ktype(kobj);
 
@@ -632,9 +622,27 @@ void kobject_del(struct kobject *kobj)
 
 	kobj->state_in_sysfs = 0;
 	kobj_kset_leave(kobj);
-	kobject_put(kobj->parent);
 	kobj->parent = NULL;
 }
+
+/**
+ * kobject_del() - Unlink kobject from hierarchy.
+ * @kobj: object.
+ *
+ * This is the function that should be called to delete an object
+ * successfully added via kobject_add().
+ */
+void kobject_del(struct kobject *kobj)
+{
+	struct kobject *parent;
+
+	if (!kobj)
+		return;
+
+	parent = kobj->parent;
+	__kobject_del(kobj);
+	kobject_put(parent);
+}
 EXPORT_SYMBOL(kobject_del);
 
 /**
@@ -670,6 +678,7 @@ EXPORT_SYMBOL(kobject_get_unless_zero);
  */
 static void kobject_cleanup(struct kobject *kobj)
 {
+	struct kobject *parent = kobj->parent;
 	struct kobj_type *t = get_ktype(kobj);
 	const char *name = kobj->name;
 
@@ -684,7 +693,10 @@ static void kobject_cleanup(struct kobject *kobj)
 	if (kobj->state_in_sysfs) {
 		pr_debug("kobject: '%s' (%p): auto cleanup kobject_del\n",
 			 kobject_name(kobj), kobj);
-		kobject_del(kobj);
+		__kobject_del(kobj);
+	} else {
+		/* avoid dropping the parent reference unnecessarily */
+		parent = NULL;
 	}
 
 	if (t && t->release) {
@@ -698,6 +710,8 @@ static void kobject_cleanup(struct kobject *kobj)
 		pr_debug("kobject: '%s': free name\n", name);
 		kfree_const(name);
 	}
+
+	kobject_put(parent);
 }
 
 #ifdef CONFIG_DEBUG_KOBJECT_RELEASE
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index 1006bf70bf74..a118b0b1e9b2 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -115,8 +115,7 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
  * @res: Where to write the result of the conversion on success.
  *
  * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
- * Used as a replacement for the obsolete simple_strtoull. Return code must
- * be checked.
+ * Preferred over simple_strtoull(). Return code must be checked.
  */
 int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 {
@@ -139,8 +138,7 @@ EXPORT_SYMBOL(kstrtoull);
  * @res: Where to write the result of the conversion on success.
  *
  * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
- * Used as a replacement for the obsolete simple_strtoull. Return code must
- * be checked.
+ * Preferred over simple_strtoll(). Return code must be checked.
  */
 int kstrtoll(const char *s, unsigned int base, long long *res)
 {
@@ -211,8 +209,7 @@ EXPORT_SYMBOL(_kstrtol);
  * @res: Where to write the result of the conversion on success.
  *
  * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
- * Used as a replacement for the obsolete simple_strtoull. Return code must
- * be checked.
+ * Preferred over simple_strtoul(). Return code must be checked.
  */
 int kstrtouint(const char *s, unsigned int base, unsigned int *res)
 {
@@ -242,8 +239,7 @@ EXPORT_SYMBOL(kstrtouint);
  * @res: Where to write the result of the conversion on success.
  *
  * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
- * Used as a replacement for the obsolete simple_strtoull. Return code must
- * be checked.
+ * Preferred over simple_strtol(). Return code must be checked.
  */
 int kstrtoint(const char *s, unsigned int base, int *res)
 {
@@ -359,6 +355,7 @@ int kstrtobool(const char *s, bool *res)
 		default:
 			break;
 		}
+		break;
 	default:
 		break;
 	}
diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile
index 724b94311ca3..c49f4ffb6273 100644
--- a/lib/kunit/Makefile
+++ b/lib/kunit/Makefile
@@ -3,7 +3,8 @@ obj-$(CONFIG_KUNIT) +=			kunit.o
 kunit-objs +=				test.o \
 					string-stream.o \
 					assert.o \
-					try-catch.o
+					try-catch.o \
+					executor.o
 
 ifeq ($(CONFIG_KUNIT_DEBUGFS),y)
 kunit-objs +=				debugfs.o
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
new file mode 100644
index 000000000000..a95742a4ece7
--- /dev/null
+++ b/lib/kunit/executor.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <kunit/test.h>
+
+/*
+ * These symbols point to the .kunit_test_suites section and are defined in
+ * include/asm-generic/vmlinux.lds.h, and consequently must be extern.
+ */
+extern struct kunit_suite * const * const __kunit_suites_start[];
+extern struct kunit_suite * const * const __kunit_suites_end[];
+
+#if IS_BUILTIN(CONFIG_KUNIT)
+
+static void kunit_print_tap_header(void)
+{
+	struct kunit_suite * const * const *suites, * const *subsuite;
+	int num_of_suites = 0;
+
+	for (suites = __kunit_suites_start;
+	     suites < __kunit_suites_end;
+	     suites++)
+		for (subsuite = *suites; *subsuite != NULL; subsuite++)
+			num_of_suites++;
+
+	pr_info("TAP version 14\n");
+	pr_info("1..%d\n", num_of_suites);
+}
+
+int kunit_run_all_tests(void)
+{
+	struct kunit_suite * const * const *suites;
+
+	kunit_print_tap_header();
+
+	for (suites = __kunit_suites_start;
+	     suites < __kunit_suites_end;
+	     suites++)
+			__kunit_test_suites_init(*suites);
+
+	return 0;
+}
+
+#endif /* IS_BUILTIN(CONFIG_KUNIT) */
diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c
index 4f3d36a72f8f..69f902440a0e 100644
--- a/lib/kunit/kunit-test.c
+++ b/lib/kunit/kunit-test.c
@@ -118,14 +118,14 @@ static int fake_resource_init(struct kunit_resource *res, void *context)
 {
 	struct kunit_test_resource_context *ctx = context;
 
-	res->allocation = &ctx->is_resource_initialized;
+	res->data = &ctx->is_resource_initialized;
 	ctx->is_resource_initialized = true;
 	return 0;
 }
 
 static void fake_resource_free(struct kunit_resource *res)
 {
-	bool *is_resource_initialized = res->allocation;
+	bool *is_resource_initialized = res->data;
 
 	*is_resource_initialized = false;
 }
@@ -154,11 +154,21 @@ static void kunit_resource_test_alloc_resource(struct kunit *test)
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, res);
 	KUNIT_EXPECT_PTR_EQ(test,
 			    &ctx->is_resource_initialized,
-			    (bool *) res->allocation);
+			    (bool *)res->data);
 	KUNIT_EXPECT_TRUE(test, list_is_last(&res->node, &ctx->test.resources));
 	KUNIT_EXPECT_PTR_EQ(test, free, res->free);
+
+	kunit_put_resource(res);
 }
 
+/*
+ * Note: tests below use kunit_alloc_and_get_resource(), so as a consequence
+ * they have a reference to the associated resource that they must release
+ * via kunit_put_resource().  In normal operation, users will only
+ * have to do this for cases where they use kunit_find_resource(), and the
+ * kunit_alloc_resource() function will be used (which does not take a
+ * resource reference).
+ */
 static void kunit_resource_test_destroy_resource(struct kunit *test)
 {
 	struct kunit_test_resource_context *ctx = test->priv;
@@ -169,11 +179,12 @@ static void kunit_resource_test_destroy_resource(struct kunit *test)
 			GFP_KERNEL,
 			ctx);
 
+	kunit_put_resource(res);
+
 	KUNIT_ASSERT_FALSE(test,
-			   kunit_resource_destroy(&ctx->test,
+			   kunit_destroy_resource(&ctx->test,
 						  kunit_resource_instance_match,
-						  res->free,
-						  res->allocation));
+						  res->data));
 
 	KUNIT_EXPECT_FALSE(test, ctx->is_resource_initialized);
 	KUNIT_EXPECT_TRUE(test, list_empty(&ctx->test.resources));
@@ -191,6 +202,7 @@ static void kunit_resource_test_cleanup_resources(struct kunit *test)
 							    fake_resource_free,
 							    GFP_KERNEL,
 							    ctx);
+		kunit_put_resource(resources[i]);
 	}
 
 	kunit_cleanup(&ctx->test);
@@ -221,14 +233,14 @@ static int fake_resource_2_init(struct kunit_resource *res, void *context)
 
 	KUNIT_RESOURCE_TEST_MARK_ORDER(ctx, allocate_order, 2);
 
-	res->allocation = ctx;
+	res->data = ctx;
 
 	return 0;
 }
 
 static void fake_resource_2_free(struct kunit_resource *res)
 {
-	struct kunit_test_resource_context *ctx = res->allocation;
+	struct kunit_test_resource_context *ctx = res->data;
 
 	KUNIT_RESOURCE_TEST_MARK_ORDER(ctx, free_order, 2);
 }
@@ -236,23 +248,26 @@ static void fake_resource_2_free(struct kunit_resource *res)
 static int fake_resource_1_init(struct kunit_resource *res, void *context)
 {
 	struct kunit_test_resource_context *ctx = context;
+	struct kunit_resource *res2;
 
-	kunit_alloc_and_get_resource(&ctx->test,
-				     fake_resource_2_init,
-				     fake_resource_2_free,
-				     GFP_KERNEL,
-				     ctx);
+	res2 = kunit_alloc_and_get_resource(&ctx->test,
+					    fake_resource_2_init,
+					    fake_resource_2_free,
+					    GFP_KERNEL,
+					    ctx);
 
 	KUNIT_RESOURCE_TEST_MARK_ORDER(ctx, allocate_order, 1);
 
-	res->allocation = ctx;
+	res->data = ctx;
+
+	kunit_put_resource(res2);
 
 	return 0;
 }
 
 static void fake_resource_1_free(struct kunit_resource *res)
 {
-	struct kunit_test_resource_context *ctx = res->allocation;
+	struct kunit_test_resource_context *ctx = res->data;
 
 	KUNIT_RESOURCE_TEST_MARK_ORDER(ctx, free_order, 1);
 }
@@ -265,13 +280,14 @@ static void fake_resource_1_free(struct kunit_resource *res)
 static void kunit_resource_test_proper_free_ordering(struct kunit *test)
 {
 	struct kunit_test_resource_context *ctx = test->priv;
+	struct kunit_resource *res;
 
 	/* fake_resource_1 allocates a fake_resource_2 in its init. */
-	kunit_alloc_and_get_resource(&ctx->test,
-				     fake_resource_1_init,
-				     fake_resource_1_free,
-				     GFP_KERNEL,
-				     ctx);
+	res = kunit_alloc_and_get_resource(&ctx->test,
+					   fake_resource_1_init,
+					   fake_resource_1_free,
+					   GFP_KERNEL,
+					   ctx);
 
 	/*
 	 * Since fake_resource_2_init calls KUNIT_RESOURCE_TEST_MARK_ORDER
@@ -281,6 +297,8 @@ static void kunit_resource_test_proper_free_ordering(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, ctx->allocate_order[0], 2);
 	KUNIT_EXPECT_EQ(test, ctx->allocate_order[1], 1);
 
+	kunit_put_resource(res);
+
 	kunit_cleanup(&ctx->test);
 
 	/*
@@ -292,6 +310,57 @@ static void kunit_resource_test_proper_free_ordering(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, ctx->free_order[1], 2);
 }
 
+static void kunit_resource_test_static(struct kunit *test)
+{
+	struct kunit_test_resource_context ctx;
+	struct kunit_resource res;
+
+	KUNIT_EXPECT_EQ(test, kunit_add_resource(test, NULL, NULL, &res, &ctx),
+			0);
+
+	KUNIT_EXPECT_PTR_EQ(test, res.data, (void *)&ctx);
+
+	kunit_cleanup(test);
+
+	KUNIT_EXPECT_TRUE(test, list_empty(&test->resources));
+}
+
+static void kunit_resource_test_named(struct kunit *test)
+{
+	struct kunit_resource res1, res2, *found = NULL;
+	struct kunit_test_resource_context ctx;
+
+	KUNIT_EXPECT_EQ(test,
+			kunit_add_named_resource(test, NULL, NULL, &res1,
+						 "resource_1", &ctx),
+			0);
+	KUNIT_EXPECT_PTR_EQ(test, res1.data, (void *)&ctx);
+
+	KUNIT_EXPECT_EQ(test,
+			kunit_add_named_resource(test, NULL, NULL, &res1,
+						 "resource_1", &ctx),
+			-EEXIST);
+
+	KUNIT_EXPECT_EQ(test,
+			kunit_add_named_resource(test, NULL, NULL, &res2,
+						 "resource_2", &ctx),
+			0);
+
+	found = kunit_find_named_resource(test, "resource_1");
+
+	KUNIT_EXPECT_PTR_EQ(test, found, &res1);
+
+	if (found)
+		kunit_put_resource(&res1);
+
+	KUNIT_EXPECT_EQ(test, kunit_destroy_named_resource(test, "resource_2"),
+			0);
+
+	kunit_cleanup(test);
+
+	KUNIT_EXPECT_TRUE(test, list_empty(&test->resources));
+}
+
 static int kunit_resource_test_init(struct kunit *test)
 {
 	struct kunit_test_resource_context *ctx =
@@ -320,6 +389,8 @@ static struct kunit_case kunit_resource_test_cases[] = {
 	KUNIT_CASE(kunit_resource_test_destroy_resource),
 	KUNIT_CASE(kunit_resource_test_cleanup_resources),
 	KUNIT_CASE(kunit_resource_test_proper_free_ordering),
+	KUNIT_CASE(kunit_resource_test_static),
+	KUNIT_CASE(kunit_resource_test_named),
 	{}
 };
 
diff --git a/lib/kunit/string-stream.c b/lib/kunit/string-stream.c
index 350392013c14..141789ca8949 100644
--- a/lib/kunit/string-stream.c
+++ b/lib/kunit/string-stream.c
@@ -33,14 +33,14 @@ static int string_stream_fragment_init(struct kunit_resource *res,
 	if (!frag->fragment)
 		return -ENOMEM;
 
-	res->allocation = frag;
+	res->data = frag;
 
 	return 0;
 }
 
 static void string_stream_fragment_free(struct kunit_resource *res)
 {
-	struct string_stream_fragment *frag = res->allocation;
+	struct string_stream_fragment *frag = res->data;
 
 	list_del(&frag->node);
 	kunit_kfree(frag->test, frag->fragment);
@@ -65,9 +65,8 @@ static struct string_stream_fragment *alloc_string_stream_fragment(
 
 static int string_stream_fragment_destroy(struct string_stream_fragment *frag)
 {
-	return kunit_resource_destroy(frag->test,
+	return kunit_destroy_resource(frag->test,
 				      kunit_resource_instance_match,
-				      string_stream_fragment_free,
 				      frag);
 }
 
@@ -179,7 +178,7 @@ static int string_stream_init(struct kunit_resource *res, void *context)
 	if (!stream)
 		return -ENOMEM;
 
-	res->allocation = stream;
+	res->data = stream;
 	stream->gfp = ctx->gfp;
 	stream->test = ctx->test;
 	INIT_LIST_HEAD(&stream->fragments);
@@ -190,7 +189,7 @@ static int string_stream_init(struct kunit_resource *res, void *context)
 
 static void string_stream_free(struct kunit_resource *res)
 {
-	struct string_stream *stream = res->allocation;
+	struct string_stream *stream = res->data;
 
 	string_stream_clear(stream);
 }
@@ -211,8 +210,7 @@ struct string_stream *alloc_string_stream(struct kunit *test, gfp_t gfp)
 
 int string_stream_destroy(struct string_stream *stream)
 {
-	return kunit_resource_destroy(stream->test,
+	return kunit_destroy_resource(stream->test,
 				      kunit_resource_instance_match,
-				      string_stream_free,
 				      stream);
 }
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index ccb2ffad8dcf..ec9494e914ef 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -8,27 +8,14 @@
 
 #include <kunit/test.h>
 #include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/sched/debug.h>
+#include <linux/sched.h>
 
 #include "debugfs.h"
 #include "string-stream.h"
 #include "try-catch-impl.h"
 
-static void kunit_set_failure(struct kunit *test)
-{
-	WRITE_ONCE(test->success, false);
-}
-
-static void kunit_print_tap_version(void)
-{
-	static bool kunit_has_printed_tap_version;
-
-	if (!kunit_has_printed_tap_version) {
-		pr_info("TAP version 14\n");
-		kunit_has_printed_tap_version = true;
-	}
-}
-
 /*
  * Append formatted message to log, size of which is limited to
  * KUNIT_LOG_SIZE bytes (including null terminating byte).
@@ -68,7 +55,6 @@ EXPORT_SYMBOL_GPL(kunit_suite_num_test_cases);
 
 static void kunit_print_subtest_start(struct kunit_suite *suite)
 {
-	kunit_print_tap_version();
 	kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "# Subtest: %s",
 		  suite->name);
 	kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "1..%zd",
@@ -287,6 +273,10 @@ static void kunit_try_run_case(void *data)
 	struct kunit_suite *suite = ctx->suite;
 	struct kunit_case *test_case = ctx->test_case;
 
+#if (IS_ENABLED(CONFIG_KASAN) && IS_ENABLED(CONFIG_KUNIT))
+	current->kunit_test = test;
+#endif /* IS_ENABLED(CONFIG_KASAN) && IS_ENABLED(CONFIG_KUNIT) */
+
 	/*
 	 * kunit_run_case_internal may encounter a fatal error; if it does,
 	 * abort will be called, this thread will exit, and finally the parent
@@ -335,39 +325,72 @@ static void kunit_catch_run_case(void *data)
  * occur in a test case and reports them as failures.
  */
 static void kunit_run_case_catch_errors(struct kunit_suite *suite,
-					struct kunit_case *test_case)
+					struct kunit_case *test_case,
+					struct kunit *test)
 {
 	struct kunit_try_catch_context context;
 	struct kunit_try_catch *try_catch;
-	struct kunit test;
 
-	kunit_init_test(&test, test_case->name, test_case->log);
-	try_catch = &test.try_catch;
+	kunit_init_test(test, test_case->name, test_case->log);
+	try_catch = &test->try_catch;
 
 	kunit_try_catch_init(try_catch,
-			     &test,
+			     test,
 			     kunit_try_run_case,
 			     kunit_catch_run_case);
-	context.test = &test;
+	context.test = test;
 	context.suite = suite;
 	context.test_case = test_case;
 	kunit_try_catch_run(try_catch, &context);
 
-	test_case->success = test.success;
-
-	kunit_print_ok_not_ok(&test, true, test_case->success,
-			      kunit_test_case_num(suite, test_case),
-			      test_case->name);
+	test_case->success = test->success;
 }
 
 int kunit_run_tests(struct kunit_suite *suite)
 {
+	char param_desc[KUNIT_PARAM_DESC_SIZE];
 	struct kunit_case *test_case;
 
 	kunit_print_subtest_start(suite);
 
-	kunit_suite_for_each_test_case(suite, test_case)
-		kunit_run_case_catch_errors(suite, test_case);
+	kunit_suite_for_each_test_case(suite, test_case) {
+		struct kunit test = { .param_value = NULL, .param_index = 0 };
+		bool test_success = true;
+
+		if (test_case->generate_params) {
+			/* Get initial param. */
+			param_desc[0] = '\0';
+			test.param_value = test_case->generate_params(NULL, param_desc);
+		}
+
+		do {
+			kunit_run_case_catch_errors(suite, test_case, &test);
+			test_success &= test_case->success;
+
+			if (test_case->generate_params) {
+				if (param_desc[0] == '\0') {
+					snprintf(param_desc, sizeof(param_desc),
+						 "param-%d", test.param_index);
+				}
+
+				kunit_log(KERN_INFO, &test,
+					  KUNIT_SUBTEST_INDENT
+					  "# %s: %s %d - %s",
+					  test_case->name,
+					  kunit_status_to_string(test.success),
+					  test.param_index + 1, param_desc);
+
+				/* Get next param. */
+				param_desc[0] = '\0';
+				test.param_value = test_case->generate_params(test.param_value, param_desc);
+				test.param_index++;
+			}
+		} while (test.param_value);
+
+		kunit_print_ok_not_ok(&test, true, test_success,
+				      kunit_test_case_num(suite, test_case),
+				      test_case->name);
+	}
 
 	kunit_print_subtest_end(suite);
 
@@ -380,7 +403,7 @@ static void kunit_init_suite(struct kunit_suite *suite)
 	kunit_debugfs_create_suite(suite);
 }
 
-int __kunit_test_suites_init(struct kunit_suite **suites)
+int __kunit_test_suites_init(struct kunit_suite * const * const suites)
 {
 	unsigned int i;
 
@@ -406,90 +429,116 @@ void __kunit_test_suites_exit(struct kunit_suite **suites)
 }
 EXPORT_SYMBOL_GPL(__kunit_test_suites_exit);
 
-struct kunit_resource *kunit_alloc_and_get_resource(struct kunit *test,
-						    kunit_resource_init_t init,
-						    kunit_resource_free_t free,
-						    gfp_t internal_gfp,
-						    void *context)
+/*
+ * Used for static resources and when a kunit_resource * has been created by
+ * kunit_alloc_resource().  When an init function is supplied, @data is passed
+ * into the init function; otherwise, we simply set the resource data field to
+ * the data value passed in.
+ */
+int kunit_add_resource(struct kunit *test,
+		       kunit_resource_init_t init,
+		       kunit_resource_free_t free,
+		       struct kunit_resource *res,
+		       void *data)
 {
-	struct kunit_resource *res;
-	int ret;
+	int ret = 0;
 
-	res = kzalloc(sizeof(*res), internal_gfp);
-	if (!res)
-		return NULL;
+	res->free = free;
+	kref_init(&res->refcount);
 
-	ret = init(res, context);
-	if (ret)
-		return NULL;
+	if (init) {
+		ret = init(res, data);
+		if (ret)
+			return ret;
+	} else {
+		res->data = data;
+	}
 
-	res->free = free;
 	spin_lock(&test->lock);
 	list_add_tail(&res->node, &test->resources);
+	/* refcount for list is established by kref_init() */
 	spin_unlock(&test->lock);
 
-	return res;
+	return ret;
 }
-EXPORT_SYMBOL_GPL(kunit_alloc_and_get_resource);
+EXPORT_SYMBOL_GPL(kunit_add_resource);
 
-static void kunit_resource_free(struct kunit *test, struct kunit_resource *res)
+int kunit_add_named_resource(struct kunit *test,
+			     kunit_resource_init_t init,
+			     kunit_resource_free_t free,
+			     struct kunit_resource *res,
+			     const char *name,
+			     void *data)
 {
-	res->free(res);
-	kfree(res);
+	struct kunit_resource *existing;
+
+	if (!name)
+		return -EINVAL;
+
+	existing = kunit_find_named_resource(test, name);
+	if (existing) {
+		kunit_put_resource(existing);
+		return -EEXIST;
+	}
+
+	res->name = name;
+
+	return kunit_add_resource(test, init, free, res, data);
 }
+EXPORT_SYMBOL_GPL(kunit_add_named_resource);
 
-static struct kunit_resource *kunit_resource_find(struct kunit *test,
-						  kunit_resource_match_t match,
-						  kunit_resource_free_t free,
-						  void *match_data)
+struct kunit_resource *kunit_alloc_and_get_resource(struct kunit *test,
+						    kunit_resource_init_t init,
+						    kunit_resource_free_t free,
+						    gfp_t internal_gfp,
+						    void *data)
 {
-	struct kunit_resource *resource;
+	struct kunit_resource *res;
+	int ret;
 
-	lockdep_assert_held(&test->lock);
+	res = kzalloc(sizeof(*res), internal_gfp);
+	if (!res)
+		return NULL;
 
-	list_for_each_entry_reverse(resource, &test->resources, node) {
-		if (resource->free != free)
-			continue;
-		if (match(test, resource->allocation, match_data))
-			return resource;
+	ret = kunit_add_resource(test, init, free, res, data);
+	if (!ret) {
+		/*
+		 * bump refcount for get; kunit_resource_put() should be called
+		 * when done.
+		 */
+		kunit_get_resource(res);
+		return res;
 	}
-
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(kunit_alloc_and_get_resource);
 
-static struct kunit_resource *kunit_resource_remove(
-		struct kunit *test,
-		kunit_resource_match_t match,
-		kunit_resource_free_t free,
-		void *match_data)
+void kunit_remove_resource(struct kunit *test, struct kunit_resource *res)
 {
-	struct kunit_resource *resource;
-
 	spin_lock(&test->lock);
-	resource = kunit_resource_find(test, match, free, match_data);
-	if (resource)
-		list_del(&resource->node);
+	list_del(&res->node);
 	spin_unlock(&test->lock);
-
-	return resource;
+	kunit_put_resource(res);
 }
+EXPORT_SYMBOL_GPL(kunit_remove_resource);
 
-int kunit_resource_destroy(struct kunit *test,
-			   kunit_resource_match_t match,
-			   kunit_resource_free_t free,
+int kunit_destroy_resource(struct kunit *test, kunit_resource_match_t match,
 			   void *match_data)
 {
-	struct kunit_resource *resource;
+	struct kunit_resource *res = kunit_find_resource(test, match,
+							 match_data);
 
-	resource = kunit_resource_remove(test, match, free, match_data);
-
-	if (!resource)
+	if (!res)
 		return -ENOENT;
 
-	kunit_resource_free(test, resource);
+	kunit_remove_resource(test, res);
+
+	/* We have a reference also via _find(); drop it. */
+	kunit_put_resource(res);
+
 	return 0;
 }
-EXPORT_SYMBOL_GPL(kunit_resource_destroy);
+EXPORT_SYMBOL_GPL(kunit_destroy_resource);
 
 struct kunit_kmalloc_params {
 	size_t size;
@@ -500,8 +549,8 @@ static int kunit_kmalloc_init(struct kunit_resource *res, void *context)
 {
 	struct kunit_kmalloc_params *params = context;
 
-	res->allocation = kmalloc(params->size, params->gfp);
-	if (!res->allocation)
+	res->data = kmalloc(params->size, params->gfp);
+	if (!res->data)
 		return -ENOMEM;
 
 	return 0;
@@ -509,7 +558,7 @@ static int kunit_kmalloc_init(struct kunit_resource *res, void *context)
 
 static void kunit_kmalloc_free(struct kunit_resource *res)
 {
-	kfree(res->allocation);
+	kfree(res->data);
 }
 
 void *kunit_kmalloc(struct kunit *test, size_t size, gfp_t gfp)
@@ -529,20 +578,25 @@ EXPORT_SYMBOL_GPL(kunit_kmalloc);
 
 void kunit_kfree(struct kunit *test, const void *ptr)
 {
-	int rc;
+	struct kunit_resource *res;
 
-	rc = kunit_resource_destroy(test,
-				    kunit_resource_instance_match,
-				    kunit_kmalloc_free,
-				    (void *)ptr);
+	res = kunit_find_resource(test, kunit_resource_instance_match,
+				  (void *)ptr);
+
+	/*
+	 * Removing the resource from the list of resources drops the
+	 * reference count to 1; the final put will trigger the free.
+	 */
+	kunit_remove_resource(test, res);
+
+	kunit_put_resource(res);
 
-	WARN_ON(rc);
 }
 EXPORT_SYMBOL_GPL(kunit_kfree);
 
 void kunit_cleanup(struct kunit *test)
 {
-	struct kunit_resource *resource;
+	struct kunit_resource *res;
 
 	/*
 	 * test->resources is a stack - each allocation must be freed in the
@@ -559,14 +613,20 @@ void kunit_cleanup(struct kunit *test)
 			spin_unlock(&test->lock);
 			break;
 		}
-		resource = list_last_entry(&test->resources,
-					   struct kunit_resource,
-					   node);
-		list_del(&resource->node);
+		res = list_last_entry(&test->resources,
+				      struct kunit_resource,
+				      node);
+		/*
+		 * Need to unlock here as a resource may remove another
+		 * resource, and this can't happen if the test->lock
+		 * is held.
+		 */
 		spin_unlock(&test->lock);
-
-		kunit_resource_free(test, resource);
+		kunit_remove_resource(test, res);
 	}
+#if (IS_ENABLED(CONFIG_KASAN) && IS_ENABLED(CONFIG_KUNIT))
+	current->kunit_test = NULL;
+#endif /* IS_ENABLED(CONFIG_KASAN) && IS_ENABLED(CONFIG_KUNIT)*/
 }
 EXPORT_SYMBOL_GPL(kunit_cleanup);
 
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
index 77ab839644c5..5ca0d815a95d 100644
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c
@@ -12,7 +12,7 @@
  * pages =        {},
  * month =        {June},
  *}
- * Used by the iSCSI driver, possibly others, and derived from the
+ * Used by the iSCSI driver, possibly others, and derived from
  * the iscsi-crc.c module of the linux-iscsi driver at
  * http://linux-iscsi.sourceforge.net.
  *
diff --git a/lib/livepatch/Makefile b/lib/livepatch/Makefile
index 295b94bff370..dcc912b3478f 100644
--- a/lib/livepatch/Makefile
+++ b/lib/livepatch/Makefile
@@ -12,7 +12,3 @@ obj-$(CONFIG_TEST_LIVEPATCH) += test_klp_atomic_replace.o \
 				test_klp_state.o \
 				test_klp_state2.o \
 				test_klp_state3.o
-
-# Target modules to be livepatched require CC_FLAGS_FTRACE
-CFLAGS_test_klp_callbacks_busy.o	+= $(CC_FLAGS_FTRACE)
-CFLAGS_test_klp_callbacks_mod.o		+= $(CC_FLAGS_FTRACE)
diff --git a/lib/livepatch/test_klp_callbacks_busy.c b/lib/livepatch/test_klp_callbacks_busy.c
index 40beddf8a0e2..7ac845f65be5 100644
--- a/lib/livepatch/test_klp_callbacks_busy.c
+++ b/lib/livepatch/test_klp_callbacks_busy.c
@@ -5,34 +5,53 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/workqueue.h>
 #include <linux/delay.h>
 
-static int sleep_secs;
-module_param(sleep_secs, int, 0644);
-MODULE_PARM_DESC(sleep_secs, "sleep_secs (default=0)");
+/* load/run-time control from sysfs writer  */
+static bool block_transition;
+module_param(block_transition, bool, 0644);
+MODULE_PARM_DESC(block_transition, "block_transition (default=false)");
 
 static void busymod_work_func(struct work_struct *work);
-static DECLARE_DELAYED_WORK(work, busymod_work_func);
+static DECLARE_WORK(work, busymod_work_func);
 
 static void busymod_work_func(struct work_struct *work)
 {
-	pr_info("%s, sleeping %d seconds ...\n", __func__, sleep_secs);
-	msleep(sleep_secs * 1000);
+	pr_info("%s enter\n", __func__);
+
+	while (READ_ONCE(block_transition)) {
+		/*
+		 * Busy-wait until the sysfs writer has acknowledged a
+		 * blocked transition and clears the flag.
+		 */
+		msleep(20);
+	}
+
 	pr_info("%s exit\n", __func__);
 }
 
 static int test_klp_callbacks_busy_init(void)
 {
 	pr_info("%s\n", __func__);
-	schedule_delayed_work(&work,
-		msecs_to_jiffies(1000 * 0));
+	schedule_work(&work);
+
+	if (!block_transition) {
+		/*
+		 * Serialize output: print all messages from the work
+		 * function before returning from init().
+		 */
+		flush_work(&work);
+	}
+
 	return 0;
 }
 
 static void test_klp_callbacks_busy_exit(void)
 {
-	cancel_delayed_work_sync(&work);
+	WRITE_ONCE(block_transition, false);
+	flush_work(&work);
 	pr_info("%s\n", __func__);
 }
 
diff --git a/lib/livepatch/test_klp_shadow_vars.c b/lib/livepatch/test_klp_shadow_vars.c
index f0b5a1d24e55..b99116490858 100644
--- a/lib/livepatch/test_klp_shadow_vars.c
+++ b/lib/livepatch/test_klp_shadow_vars.c
@@ -109,8 +109,7 @@ static void shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor)
 static void shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor)
 {
 	klp_shadow_free_all(id, dtor);
-	pr_info("klp_%s(id=0x%lx, dtor=PTR%d)\n",
-		__func__, id, ptr_id(dtor));
+	pr_info("klp_%s(id=0x%lx, dtor=PTR%d)\n", __func__, id, ptr_id(dtor));
 }
 
 
@@ -124,12 +123,16 @@ static int shadow_ctor(void *obj, void *shadow_data, void *ctor_data)
 		return -EINVAL;
 
 	*sv = *var;
-	pr_info("%s: PTR%d -> PTR%d\n",
-		__func__, ptr_id(sv), ptr_id(*var));
+	pr_info("%s: PTR%d -> PTR%d\n", __func__, ptr_id(sv), ptr_id(*var));
 
 	return 0;
 }
 
+/*
+ * With more than one item to free in the list, order is not determined and
+ * shadow_dtor will not be passed to shadow_free_all() which would make the
+ * test fail. (see pass 6)
+ */
 static void shadow_dtor(void *obj, void *shadow_data)
 {
 	int **sv = shadow_data;
@@ -138,132 +141,153 @@ static void shadow_dtor(void *obj, void *shadow_data)
 		__func__, ptr_id(obj), ptr_id(sv));
 }
 
-static int test_klp_shadow_vars_init(void)
-{
-	void *obj			= THIS_MODULE;
-	int id			= 0x1234;
-	gfp_t gfp_flags		= GFP_KERNEL;
+/* number of objects we simulate that need shadow vars */
+#define NUM_OBJS 3
 
-	int var1, var2, var3, var4;
-	int *pv1, *pv2, *pv3, *pv4;
-	int **sv1, **sv2, **sv3, **sv4;
+/* dynamically created obj fields have the following shadow var id values */
+#define SV_ID1 0x1234
+#define SV_ID2 0x1235
 
-	int **sv;
+/*
+ * The main test case adds/removes new fields (shadow var) to each of these
+ * test structure instances. The last group of fields in the struct represent
+ * the idea that shadow variables may be added and removed to and from the
+ * struct during execution.
+ */
+struct test_object {
+	 /* add anything here below and avoid to define an empty struct */
+	struct shadow_ptr sp;
 
-	pv1 = &var1;
-	pv2 = &var2;
-	pv3 = &var3;
-	pv4 = &var4;
+	/* these represent shadow vars added and removed with SV_ID{1,2} */
+	/* char nfield1; */
+	/* int  nfield2; */
+};
+
+static int test_klp_shadow_vars_init(void)
+{
+	struct test_object objs[NUM_OBJS];
+	char nfields1[NUM_OBJS], *pnfields1[NUM_OBJS], **sv1[NUM_OBJS];
+	char *pndup[NUM_OBJS];
+	int nfields2[NUM_OBJS], *pnfields2[NUM_OBJS], **sv2[NUM_OBJS];
+	void **sv;
+	int ret;
+	int i;
 
 	ptr_id(NULL);
-	ptr_id(pv1);
-	ptr_id(pv2);
-	ptr_id(pv3);
-	ptr_id(pv4);
 
 	/*
 	 * With an empty shadow variable hash table, expect not to find
 	 * any matches.
 	 */
-	sv = shadow_get(obj, id);
+	sv = shadow_get(&objs[0], SV_ID1);
 	if (!sv)
 		pr_info("  got expected NULL result\n");
 
-	/*
-	 * Allocate a few shadow variables with different <obj> and <id>.
-	 */
-	sv1 = shadow_alloc(obj, id, sizeof(pv1), gfp_flags, shadow_ctor, &pv1);
-	if (!sv1)
-		return -ENOMEM;
-
-	sv2 = shadow_alloc(obj + 1, id, sizeof(pv2), gfp_flags, shadow_ctor, &pv2);
-	if (!sv2)
-		return -ENOMEM;
-
-	sv3 = shadow_alloc(obj, id + 1, sizeof(pv3), gfp_flags, shadow_ctor, &pv3);
-	if (!sv3)
-		return -ENOMEM;
-
-	/*
-	 * Verify we can find our new shadow variables and that they point
-	 * to expected data.
-	 */
-	sv = shadow_get(obj, id);
-	if (!sv)
-		return -EINVAL;
-	if (sv == sv1 && *sv1 == pv1)
-		pr_info("  got expected PTR%d -> PTR%d result\n",
-			ptr_id(sv1), ptr_id(*sv1));
-
-	sv = shadow_get(obj + 1, id);
-	if (!sv)
-		return -EINVAL;
-	if (sv == sv2 && *sv2 == pv2)
-		pr_info("  got expected PTR%d -> PTR%d result\n",
-			ptr_id(sv2), ptr_id(*sv2));
-	sv = shadow_get(obj, id + 1);
-	if (!sv)
-		return -EINVAL;
-	if (sv == sv3 && *sv3 == pv3)
-		pr_info("  got expected PTR%d -> PTR%d result\n",
-			ptr_id(sv3), ptr_id(*sv3));
-
-	/*
-	 * Allocate or get a few more, this time with the same <obj>, <id>.
-	 * The second invocation should return the same shadow var.
-	 */
-	sv4 = shadow_get_or_alloc(obj + 2, id, sizeof(pv4), gfp_flags, shadow_ctor, &pv4);
-	if (!sv4)
-		return -ENOMEM;
-
-	sv = shadow_get_or_alloc(obj + 2, id, sizeof(pv4), gfp_flags, shadow_ctor, &pv4);
-	if (!sv)
-		return -EINVAL;
-	if (sv == sv4 && *sv4 == pv4)
-		pr_info("  got expected PTR%d -> PTR%d result\n",
-			ptr_id(sv4), ptr_id(*sv4));
-
-	/*
-	 * Free the <obj=*, id> shadow variables and check that we can no
-	 * longer find them.
-	 */
-	shadow_free(obj, id, shadow_dtor);			/* sv1 */
-	sv = shadow_get(obj, id);
-	if (!sv)
-		pr_info("  got expected NULL result\n");
+	/* pass 1: init & alloc a char+int pair of svars for each objs */
+	for (i = 0; i < NUM_OBJS; i++) {
+		pnfields1[i] = &nfields1[i];
+		ptr_id(pnfields1[i]);
+
+		if (i % 2) {
+			sv1[i] = shadow_alloc(&objs[i], SV_ID1,
+					sizeof(pnfields1[i]), GFP_KERNEL,
+					shadow_ctor, &pnfields1[i]);
+		} else {
+			sv1[i] = shadow_get_or_alloc(&objs[i], SV_ID1,
+					sizeof(pnfields1[i]), GFP_KERNEL,
+					shadow_ctor, &pnfields1[i]);
+		}
+		if (!sv1[i]) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		pnfields2[i] = &nfields2[i];
+		ptr_id(pnfields2[i]);
+		sv2[i] = shadow_alloc(&objs[i], SV_ID2, sizeof(pnfields2[i]),
+					GFP_KERNEL, shadow_ctor, &pnfields2[i]);
+		if (!sv2[i]) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
 
-	shadow_free(obj + 1, id, shadow_dtor);			/* sv2 */
-	sv = shadow_get(obj + 1, id);
-	if (!sv)
-		pr_info("  got expected NULL result\n");
+	/* pass 2: verify we find allocated svars and where they point to */
+	for (i = 0; i < NUM_OBJS; i++) {
+		/* check the "char" svar for all objects */
+		sv = shadow_get(&objs[i], SV_ID1);
+		if (!sv) {
+			ret = -EINVAL;
+			goto out;
+		}
+		if ((char **)sv == sv1[i] && *sv1[i] == pnfields1[i])
+			pr_info("  got expected PTR%d -> PTR%d result\n",
+				ptr_id(sv1[i]), ptr_id(*sv1[i]));
+
+		/* check the "int" svar for all objects */
+		sv = shadow_get(&objs[i], SV_ID2);
+		if (!sv) {
+			ret = -EINVAL;
+			goto out;
+		}
+		if ((int **)sv == sv2[i] && *sv2[i] == pnfields2[i])
+			pr_info("  got expected PTR%d -> PTR%d result\n",
+				ptr_id(sv2[i]), ptr_id(*sv2[i]));
+	}
 
-	shadow_free(obj + 2, id, shadow_dtor);			/* sv4 */
-	sv = shadow_get(obj + 2, id);
-	if (!sv)
-		pr_info("  got expected NULL result\n");
+	/* pass 3: verify that 'get_or_alloc' returns already allocated svars */
+	for (i = 0; i < NUM_OBJS; i++) {
+		pndup[i] = &nfields1[i];
+		ptr_id(pndup[i]);
+
+		sv = shadow_get_or_alloc(&objs[i], SV_ID1, sizeof(pndup[i]),
+					GFP_KERNEL, shadow_ctor, &pndup[i]);
+		if (!sv) {
+			ret = -EINVAL;
+			goto out;
+		}
+		if ((char **)sv == sv1[i] && *sv1[i] == pnfields1[i])
+			pr_info("  got expected PTR%d -> PTR%d result\n",
+					ptr_id(sv1[i]), ptr_id(*sv1[i]));
+	}
 
-	/*
-	 * We should still find an <id+1> variable.
-	 */
-	sv = shadow_get(obj, id + 1);
-	if (!sv)
-		return -EINVAL;
-	if (sv == sv3 && *sv3 == pv3)
-		pr_info("  got expected PTR%d -> PTR%d result\n",
-			ptr_id(sv3), ptr_id(*sv3));
+	/* pass 4: free <objs[*], SV_ID1> pairs of svars, verify removal */
+	for (i = 0; i < NUM_OBJS; i++) {
+		shadow_free(&objs[i], SV_ID1, shadow_dtor); /* 'char' pairs */
+		sv = shadow_get(&objs[i], SV_ID1);
+		if (!sv)
+			pr_info("  got expected NULL result\n");
+	}
 
-	/*
-	 * Free all the <id+1> variables, too.
-	 */
-	shadow_free_all(id + 1, shadow_dtor);			/* sv3 */
-	sv = shadow_get(obj, id);
-	if (!sv)
-		pr_info("  shadow_get() got expected NULL result\n");
+	/* pass 5: check we still find <objs[*], SV_ID2> svar pairs */
+	for (i = 0; i < NUM_OBJS; i++) {
+		sv = shadow_get(&objs[i], SV_ID2);	/* 'int' pairs */
+		if (!sv) {
+			ret = -EINVAL;
+			goto out;
+		}
+		if ((int **)sv == sv2[i] && *sv2[i] == pnfields2[i])
+			pr_info("  got expected PTR%d -> PTR%d result\n",
+					ptr_id(sv2[i]), ptr_id(*sv2[i]));
+	}
 
+	/* pass 6: free all the <objs[*], SV_ID2> svar pairs too. */
+	shadow_free_all(SV_ID2, NULL);		/* 'int' pairs */
+	for (i = 0; i < NUM_OBJS; i++) {
+		sv = shadow_get(&objs[i], SV_ID2);
+		if (!sv)
+			pr_info("  got expected NULL result\n");
+	}
 
 	free_ptr_list();
 
 	return 0;
+out:
+	shadow_free_all(SV_ID1, NULL);		/* 'char' pairs */
+	shadow_free_all(SV_ID2, NULL);		/* 'int' pairs */
+	free_ptr_list();
+
+	return ret;
 }
 
 static void test_klp_shadow_vars_exit(void)
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 14f44f59e733..9959ea23529e 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -15,6 +15,7 @@
 #include <linux/mutex.h>
 #include <linux/ww_mutex.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/delay.h>
 #include <linux/lockdep.h>
 #include <linux/spinlock.h>
@@ -28,6 +29,7 @@
  * Change this to 1 if you want to see the failure printouts:
  */
 static unsigned int debug_locks_verbose;
+unsigned int force_read_lock_recursive;
 
 static DEFINE_WD_CLASS(ww_lockdep);
 
@@ -57,10 +59,10 @@ static struct ww_mutex o, o2, o3;
  * Normal standalone locks, for the circular and irq-context
  * dependency tests:
  */
-static DEFINE_RAW_SPINLOCK(lock_A);
-static DEFINE_RAW_SPINLOCK(lock_B);
-static DEFINE_RAW_SPINLOCK(lock_C);
-static DEFINE_RAW_SPINLOCK(lock_D);
+static DEFINE_SPINLOCK(lock_A);
+static DEFINE_SPINLOCK(lock_B);
+static DEFINE_SPINLOCK(lock_C);
+static DEFINE_SPINLOCK(lock_D);
 
 static DEFINE_RWLOCK(rwlock_A);
 static DEFINE_RWLOCK(rwlock_B);
@@ -92,12 +94,12 @@ static DEFINE_RT_MUTEX(rtmutex_D);
  * but X* and Y* are different classes. We do this so that
  * we do not trigger a real lockup:
  */
-static DEFINE_RAW_SPINLOCK(lock_X1);
-static DEFINE_RAW_SPINLOCK(lock_X2);
-static DEFINE_RAW_SPINLOCK(lock_Y1);
-static DEFINE_RAW_SPINLOCK(lock_Y2);
-static DEFINE_RAW_SPINLOCK(lock_Z1);
-static DEFINE_RAW_SPINLOCK(lock_Z2);
+static DEFINE_SPINLOCK(lock_X1);
+static DEFINE_SPINLOCK(lock_X2);
+static DEFINE_SPINLOCK(lock_Y1);
+static DEFINE_SPINLOCK(lock_Y2);
+static DEFINE_SPINLOCK(lock_Z1);
+static DEFINE_SPINLOCK(lock_Z2);
 
 static DEFINE_RWLOCK(rwlock_X1);
 static DEFINE_RWLOCK(rwlock_X2);
@@ -137,10 +139,10 @@ static DEFINE_RT_MUTEX(rtmutex_Z2);
  */
 #define INIT_CLASS_FUNC(class) 				\
 static noinline void					\
-init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \
+init_class_##class(spinlock_t *lock, rwlock_t *rwlock, \
 	struct mutex *mutex, struct rw_semaphore *rwsem)\
 {							\
-	raw_spin_lock_init(lock);			\
+	spin_lock_init(lock);			\
 	rwlock_init(rwlock);				\
 	mutex_init(mutex);				\
 	init_rwsem(rwsem);				\
@@ -209,10 +211,10 @@ static void init_shared_classes(void)
  * Shortcuts for lock/unlock API variants, to keep
  * the testcases compact:
  */
-#define L(x)			raw_spin_lock(&lock_##x)
-#define U(x)			raw_spin_unlock(&lock_##x)
+#define L(x)			spin_lock(&lock_##x)
+#define U(x)			spin_unlock(&lock_##x)
 #define LU(x)			L(x); U(x)
-#define SI(x)			raw_spin_lock_init(&lock_##x)
+#define SI(x)			spin_lock_init(&lock_##x)
 
 #define WL(x)			write_lock(&rwlock_##x)
 #define WU(x)			write_unlock(&rwlock_##x)
@@ -399,6 +401,49 @@ static void rwsem_ABBA1(void)
  * read_lock(A)
  * spin_lock(B)
  *		spin_lock(B)
+ *		write_lock(A)
+ *
+ * This test case is aimed at poking whether the chain cache prevents us from
+ * detecting a read-lock/lock-write deadlock: if the chain cache doesn't differ
+ * read/write locks, the following case may happen
+ *
+ * 	{ read_lock(A)->lock(B) dependency exists }
+ *
+ * 	P0:
+ * 	lock(B);
+ * 	read_lock(A);
+ *
+ *	{ Not a deadlock, B -> A is added in the chain cache }
+ *
+ *	P1:
+ *	lock(B);
+ *	write_lock(A);
+ *
+ *	{ B->A found in chain cache, not reported as a deadlock }
+ *
+ */
+static void rlock_chaincache_ABBA1(void)
+{
+	RL(X1);
+	L(Y1);
+	U(Y1);
+	RU(X1);
+
+	L(Y1);
+	RL(X1);
+	RU(X1);
+	U(Y1);
+
+	L(Y1);
+	WL(X1);
+	WU(X1);
+	U(Y1); // should fail
+}
+
+/*
+ * read_lock(A)
+ * spin_lock(B)
+ *		spin_lock(B)
  *		read_lock(A)
  */
 static void rlock_ABBA2(void)
@@ -991,6 +1036,133 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
 #undef E3
 
 /*
+ * write-read / write-read / write-read deadlock even if read is recursive
+ */
+
+#define E1()				\
+					\
+	WL(X1);				\
+	RL(Y1);				\
+	RU(Y1);				\
+	WU(X1);
+
+#define E2()				\
+					\
+	WL(Y1);				\
+	RL(Z1);				\
+	RU(Z1);				\
+	WU(Y1);
+
+#define E3()				\
+					\
+	WL(Z1);				\
+	RL(X1);				\
+	RU(X1);				\
+	WU(Z1);
+
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(W1R2_W2R3_W3R1)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * write-write / read-read / write-read deadlock even if read is recursive
+ */
+
+#define E1()				\
+					\
+	WL(X1);				\
+	WL(Y1);				\
+	WU(Y1);				\
+	WU(X1);
+
+#define E2()				\
+					\
+	RL(Y1);				\
+	RL(Z1);				\
+	RU(Z1);				\
+	RU(Y1);
+
+#define E3()				\
+					\
+	WL(Z1);				\
+	RL(X1);				\
+	RU(X1);				\
+	WU(Z1);
+
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(W1W2_R2R3_W3R1)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * write-write / read-read / read-write is not deadlock when read is recursive
+ */
+
+#define E1()				\
+					\
+	WL(X1);				\
+	WL(Y1);				\
+	WU(Y1);				\
+	WU(X1);
+
+#define E2()				\
+					\
+	RL(Y1);				\
+	RL(Z1);				\
+	RU(Z1);				\
+	RU(Y1);
+
+#define E3()				\
+					\
+	RL(Z1);				\
+	WL(X1);				\
+	WU(X1);				\
+	RU(Z1);
+
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(W1R2_R2R3_W3W1)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * write-read / read-read / write-write is not deadlock when read is recursive
+ */
+
+#define E1()				\
+					\
+	WL(X1);				\
+	RL(Y1);				\
+	RU(Y1);				\
+	WU(X1);
+
+#define E2()				\
+					\
+	RL(Y1);				\
+	RL(Z1);				\
+	RU(Z1);				\
+	RU(Y1);
+
+#define E3()				\
+					\
+	WL(Z1);				\
+	WL(X1);				\
+	WU(X1);				\
+	WU(Z1);
+
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(W1W2_R2R3_R3W1)
+
+#undef E1
+#undef E2
+#undef E3
+/*
  * read-lock / write-lock recursion that is actually safe.
  */
 
@@ -1009,20 +1181,28 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
 #define E3()				\
 					\
 	IRQ_ENTER();			\
-	RL(A);				\
+	LOCK(A);			\
 	L(B);				\
 	U(B);				\
-	RU(A);				\
+	UNLOCK(A);			\
 	IRQ_EXIT();
 
 /*
- * Generate 12 testcases:
+ * Generate 24 testcases:
  */
 #include "locking-selftest-hardirq.h"
-GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard)
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_rlock)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_wlock)
 
 #include "locking-selftest-softirq.h"
-GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_rlock)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_wlock)
 
 #undef E1
 #undef E2
@@ -1036,8 +1216,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
 					\
 	IRQ_DISABLE();			\
 	L(B);				\
-	WL(A);				\
-	WU(A);				\
+	LOCK(A);			\
+	UNLOCK(A);			\
 	U(B);				\
 	IRQ_ENABLE();
 
@@ -1054,13 +1234,75 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
 	IRQ_EXIT();
 
 /*
- * Generate 12 testcases:
+ * Generate 24 testcases:
+ */
+#include "locking-selftest-hardirq.h"
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_rlock)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_wlock)
+
+#include "locking-selftest-softirq.h"
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_rlock)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_wlock)
+
+#undef E1
+#undef E2
+#undef E3
+/*
+ * read-lock / write-lock recursion that is unsafe.
+ *
+ * A is a ENABLED_*_READ lock
+ * B is a USED_IN_*_READ lock
+ *
+ * read_lock(A);
+ *			write_lock(B);
+ * <interrupt>
+ * read_lock(B);
+ * 			write_lock(A); // if this one is read_lock(), no deadlock
+ */
+
+#define E1()				\
+					\
+	IRQ_DISABLE();			\
+	WL(B);				\
+	LOCK(A);			\
+	UNLOCK(A);			\
+	WU(B);				\
+	IRQ_ENABLE();
+
+#define E2()				\
+					\
+	RL(A);				\
+	RU(A);				\
+
+#define E3()				\
+					\
+	IRQ_ENTER();			\
+	RL(B);				\
+	RU(B);				\
+	IRQ_EXIT();
+
+/*
+ * Generate 24 testcases:
  */
 #include "locking-selftest-hardirq.h"
-// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard)
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_rlock)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_wlock)
 
 #include "locking-selftest-softirq.h"
-// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft)
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_rlock)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define I_SPINLOCK(x)	lockdep_reset_lock(&lock_##x.dep_map)
@@ -1100,7 +1342,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
 
 #define I2(x)					\
 	do {					\
-		raw_spin_lock_init(&lock_##x);	\
+		spin_lock_init(&lock_##x);	\
 		rwlock_init(&rwlock_##x);	\
 		mutex_init(&mutex_##x);		\
 		init_rwsem(&rwsem_##x);		\
@@ -1199,6 +1441,19 @@ static inline void print_testname(const char *testname)
 	dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
 	pr_cont("\n");
 
+#define DO_TESTCASE_1RR(desc, name, nr)				\
+	print_testname(desc"/"#nr);				\
+	pr_cont("             |");				\
+	dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK);		\
+	pr_cont("\n");
+
+#define DO_TESTCASE_1RRB(desc, name, nr)			\
+	print_testname(desc"/"#nr);				\
+	pr_cont("             |");				\
+	dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
+	pr_cont("\n");
+
+
 #define DO_TESTCASE_3(desc, name, nr)				\
 	print_testname(desc"/"#nr);				\
 	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN);	\
@@ -1213,6 +1468,25 @@ static inline void print_testname(const char *testname)
 	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
 	pr_cont("\n");
 
+#define DO_TESTCASE_2RW(desc, name, nr)				\
+	print_testname(desc"/"#nr);				\
+	pr_cont("      |");					\
+	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
+	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
+	pr_cont("\n");
+
+#define DO_TESTCASE_2x2RW(desc, name, nr)			\
+	DO_TESTCASE_2RW("hard-"desc, name##_hard, nr)		\
+	DO_TESTCASE_2RW("soft-"desc, name##_soft, nr)		\
+
+#define DO_TESTCASE_6x2x2RW(desc, name)				\
+	DO_TESTCASE_2x2RW(desc, name, 123);			\
+	DO_TESTCASE_2x2RW(desc, name, 132);			\
+	DO_TESTCASE_2x2RW(desc, name, 213);			\
+	DO_TESTCASE_2x2RW(desc, name, 231);			\
+	DO_TESTCASE_2x2RW(desc, name, 312);			\
+	DO_TESTCASE_2x2RW(desc, name, 321);
+
 #define DO_TESTCASE_6(desc, name)				\
 	print_testname(desc);					\
 	dotest(name##_spin, FAILURE, LOCKTYPE_SPIN);		\
@@ -1289,6 +1563,22 @@ static inline void print_testname(const char *testname)
 	DO_TESTCASE_2IB(desc, name, 312);			\
 	DO_TESTCASE_2IB(desc, name, 321);
 
+#define DO_TESTCASE_6x1RR(desc, name)				\
+	DO_TESTCASE_1RR(desc, name, 123);			\
+	DO_TESTCASE_1RR(desc, name, 132);			\
+	DO_TESTCASE_1RR(desc, name, 213);			\
+	DO_TESTCASE_1RR(desc, name, 231);			\
+	DO_TESTCASE_1RR(desc, name, 312);			\
+	DO_TESTCASE_1RR(desc, name, 321);
+
+#define DO_TESTCASE_6x1RRB(desc, name)				\
+	DO_TESTCASE_1RRB(desc, name, 123);			\
+	DO_TESTCASE_1RRB(desc, name, 132);			\
+	DO_TESTCASE_1RRB(desc, name, 213);			\
+	DO_TESTCASE_1RRB(desc, name, 231);			\
+	DO_TESTCASE_1RRB(desc, name, 312);			\
+	DO_TESTCASE_1RRB(desc, name, 321);
+
 #define DO_TESTCASE_6x6(desc, name)				\
 	DO_TESTCASE_6I(desc, name, 123);			\
 	DO_TESTCASE_6I(desc, name, 132);			\
@@ -1716,10 +2006,23 @@ static void ww_test_edeadlk_acquire_wrong_slow(void)
 
 static void ww_test_spin_nest_unlocked(void)
 {
-	raw_spin_lock_nest_lock(&lock_A, &o.base);
+	spin_lock_nest_lock(&lock_A, &o.base);
 	U(A);
 }
 
+/* This is not a deadlock, because we have X1 to serialize Y1 and Y2 */
+static void ww_test_spin_nest_lock(void)
+{
+	spin_lock(&lock_X1);
+	spin_lock_nest_lock(&lock_Y1, &lock_X1);
+	spin_lock(&lock_A);
+	spin_lock_nest_lock(&lock_Y2, &lock_X1);
+	spin_unlock(&lock_A);
+	spin_unlock(&lock_Y2);
+	spin_unlock(&lock_Y1);
+	spin_unlock(&lock_X1);
+}
+
 static void ww_test_unneeded_slow(void)
 {
 	WWAI(&t);
@@ -1937,6 +2240,10 @@ static void ww_tests(void)
 	dotest(ww_test_spin_nest_unlocked, FAILURE, LOCKTYPE_WW);
 	pr_cont("\n");
 
+	print_testname("spinlock nest test");
+	dotest(ww_test_spin_nest_lock, SUCCESS, LOCKTYPE_WW);
+	pr_cont("\n");
+
 	printk("  -----------------------------------------------------\n");
 	printk("                                 |block | try  |context|\n");
 	printk("  -----------------------------------------------------\n");
@@ -1966,6 +2273,152 @@ static void ww_tests(void)
 	pr_cont("\n");
 }
 
+
+/*
+ * <in hardirq handler>
+ * read_lock(&A);
+ *			<hardirq disable>
+ *			spin_lock(&B);
+ * spin_lock(&B);
+ *			read_lock(&A);
+ *
+ * is a deadlock.
+ */
+static void queued_read_lock_hardirq_RE_Er(void)
+{
+	HARDIRQ_ENTER();
+	read_lock(&rwlock_A);
+	LOCK(B);
+	UNLOCK(B);
+	read_unlock(&rwlock_A);
+	HARDIRQ_EXIT();
+
+	HARDIRQ_DISABLE();
+	LOCK(B);
+	read_lock(&rwlock_A);
+	read_unlock(&rwlock_A);
+	UNLOCK(B);
+	HARDIRQ_ENABLE();
+}
+
+/*
+ * <in hardirq handler>
+ * spin_lock(&B);
+ *			<hardirq disable>
+ *			read_lock(&A);
+ * read_lock(&A);
+ *			spin_lock(&B);
+ *
+ * is not a deadlock.
+ */
+static void queued_read_lock_hardirq_ER_rE(void)
+{
+	HARDIRQ_ENTER();
+	LOCK(B);
+	read_lock(&rwlock_A);
+	read_unlock(&rwlock_A);
+	UNLOCK(B);
+	HARDIRQ_EXIT();
+
+	HARDIRQ_DISABLE();
+	read_lock(&rwlock_A);
+	LOCK(B);
+	UNLOCK(B);
+	read_unlock(&rwlock_A);
+	HARDIRQ_ENABLE();
+}
+
+/*
+ * <hardirq disable>
+ * spin_lock(&B);
+ *			read_lock(&A);
+ *			<in hardirq handler>
+ *			spin_lock(&B);
+ * read_lock(&A);
+ *
+ * is a deadlock. Because the two read_lock()s are both non-recursive readers.
+ */
+static void queued_read_lock_hardirq_inversion(void)
+{
+
+	HARDIRQ_ENTER();
+	LOCK(B);
+	UNLOCK(B);
+	HARDIRQ_EXIT();
+
+	HARDIRQ_DISABLE();
+	LOCK(B);
+	read_lock(&rwlock_A);
+	read_unlock(&rwlock_A);
+	UNLOCK(B);
+	HARDIRQ_ENABLE();
+
+	read_lock(&rwlock_A);
+	read_unlock(&rwlock_A);
+}
+
+static void queued_read_lock_tests(void)
+{
+	printk("  --------------------------------------------------------------------------\n");
+	printk("  | queued read lock tests |\n");
+	printk("  ---------------------------\n");
+	print_testname("hardirq read-lock/lock-read");
+	dotest(queued_read_lock_hardirq_RE_Er, FAILURE, LOCKTYPE_RWLOCK);
+	pr_cont("\n");
+
+	print_testname("hardirq lock-read/read-lock");
+	dotest(queued_read_lock_hardirq_ER_rE, SUCCESS, LOCKTYPE_RWLOCK);
+	pr_cont("\n");
+
+	print_testname("hardirq inversion");
+	dotest(queued_read_lock_hardirq_inversion, FAILURE, LOCKTYPE_RWLOCK);
+	pr_cont("\n");
+}
+
+static void fs_reclaim_correct_nesting(void)
+{
+	fs_reclaim_acquire(GFP_KERNEL);
+	might_alloc(GFP_NOFS);
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+static void fs_reclaim_wrong_nesting(void)
+{
+	fs_reclaim_acquire(GFP_KERNEL);
+	might_alloc(GFP_KERNEL);
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+static void fs_reclaim_protected_nesting(void)
+{
+	unsigned int flags;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+	flags = memalloc_nofs_save();
+	might_alloc(GFP_KERNEL);
+	memalloc_nofs_restore(flags);
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+static void fs_reclaim_tests(void)
+{
+	printk("  --------------------\n");
+	printk("  | fs_reclaim tests |\n");
+	printk("  --------------------\n");
+
+	print_testname("correct nesting");
+	dotest(fs_reclaim_correct_nesting, SUCCESS, 0);
+	pr_cont("\n");
+
+	print_testname("wrong nesting");
+	dotest(fs_reclaim_wrong_nesting, FAILURE, 0);
+	pr_cont("\n");
+
+	print_testname("protected nesting");
+	dotest(fs_reclaim_protected_nesting, SUCCESS, 0);
+	pr_cont("\n");
+}
+
 void locking_selftest(void)
 {
 	/*
@@ -1979,6 +2432,11 @@ void locking_selftest(void)
 	}
 
 	/*
+	 * treats read_lock() as recursive read locks for testing purpose
+	 */
+	force_read_lock_recursive = 1;
+
+	/*
 	 * Run the testsuite:
 	 */
 	printk("------------------------\n");
@@ -2033,14 +2491,6 @@ void locking_selftest(void)
 	print_testname("mixed read-lock/lock-write ABBA");
 	pr_cont("             |");
 	dotest(rlock_ABBA1, FAILURE, LOCKTYPE_RWLOCK);
-#ifdef CONFIG_PROVE_LOCKING
-	/*
-	 * Lockdep does indeed fail here, but there's nothing we can do about
-	 * that now.  Don't kill lockdep for it.
-	 */
-	unexpected_testcase_failures--;
-#endif
-
 	pr_cont("             |");
 	dotest(rwsem_ABBA1, FAILURE, LOCKTYPE_RWSEM);
 
@@ -2056,6 +2506,15 @@ void locking_selftest(void)
 	pr_cont("             |");
 	dotest(rwsem_ABBA3, FAILURE, LOCKTYPE_RWSEM);
 
+	print_testname("chain cached mixed R-L/L-W ABBA");
+	pr_cont("             |");
+	dotest(rlock_chaincache_ABBA1, FAILURE, LOCKTYPE_RWLOCK);
+
+	DO_TESTCASE_6x1RRB("rlock W1R2/W2R3/W3R1", W1R2_W2R3_W3R1);
+	DO_TESTCASE_6x1RRB("rlock W1W2/R2R3/W3R1", W1W2_R2R3_W3R1);
+	DO_TESTCASE_6x1RR("rlock W1W2/R2R3/R3W1", W1W2_R2R3_R3W1);
+	DO_TESTCASE_6x1RR("rlock W1R2/R2R3/W3W1", W1R2_R2R3_W3W1);
+
 	printk("  --------------------------------------------------------------------------\n");
 
 	/*
@@ -2068,11 +2527,21 @@ void locking_selftest(void)
 	DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
 	DO_TESTCASE_6x6RW("irq lock-inversion", irq_inversion);
 
-	DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
-//	DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
+	DO_TESTCASE_6x2x2RW("irq read-recursion", irq_read_recursion);
+	DO_TESTCASE_6x2x2RW("irq read-recursion #2", irq_read_recursion2);
+	DO_TESTCASE_6x2x2RW("irq read-recursion #3", irq_read_recursion3);
 
 	ww_tests();
 
+	force_read_lock_recursive = 0;
+	/*
+	 * queued_read_lock() specific test cases can be put here
+	 */
+	if (IS_ENABLED(CONFIG_QUEUED_RWLOCKS))
+		queued_read_lock_tests();
+
+	fs_reclaim_tests();
+
 	if (unexpected_testcase_failures) {
 		printk("-----------------------------------------------------------------\n");
 		debug_locks = 0;
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index cc7b6d4cc7c7..90bb67994688 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -446,7 +446,7 @@ _last_literals:
 			*op++ = (BYTE)(lastRun << ML_BITS);
 		}
 
-		memcpy(op, anchor, lastRun);
+		LZ4_memcpy(op, anchor, lastRun);
 
 		op += lastRun;
 	}
@@ -708,7 +708,7 @@ _last_literals:
 		} else {
 			*op++ = (BYTE)(lastRunSize<<ML_BITS);
 		}
-		memcpy(op, anchor, lastRunSize);
+		LZ4_memcpy(op, anchor, lastRunSize);
 		op += lastRunSize;
 	}
 
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 5371dab6b481..8a7724a6ce2f 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -153,7 +153,7 @@ static FORCE_INLINE int LZ4_decompress_generic(
 		   && likely((endOnInput ? ip < shortiend : 1) &
 			     (op <= shortoend))) {
 			/* Copy the literals */
-			memcpy(op, ip, endOnInput ? 16 : 8);
+			LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
 			op += length; ip += length;
 
 			/*
@@ -172,9 +172,9 @@ static FORCE_INLINE int LZ4_decompress_generic(
 			    (offset >= 8) &&
 			    (dict == withPrefix64k || match >= lowPrefix)) {
 				/* Copy the match. */
-				memcpy(op + 0, match + 0, 8);
-				memcpy(op + 8, match + 8, 8);
-				memcpy(op + 16, match + 16, 2);
+				LZ4_memcpy(op + 0, match + 0, 8);
+				LZ4_memcpy(op + 8, match + 8, 8);
+				LZ4_memcpy(op + 16, match + 16, 2);
 				op += length + MINMATCH;
 				/* Both stages worked, load the next token. */
 				continue;
@@ -263,7 +263,11 @@ static FORCE_INLINE int LZ4_decompress_generic(
 				}
 			}
 
-			memcpy(op, ip, length);
+			/*
+			 * supports overlapping memory regions; only matters
+			 * for in-place decompression scenarios
+			 */
+			LZ4_memmove(op, ip, length);
 			ip += length;
 			op += length;
 
@@ -350,7 +354,7 @@ _copy_match:
 				size_t const copySize = (size_t)(lowPrefix - match);
 				size_t const restSize = length - copySize;
 
-				memcpy(op, dictEnd - copySize, copySize);
+				LZ4_memcpy(op, dictEnd - copySize, copySize);
 				op += copySize;
 				if (restSize > (size_t)(op - lowPrefix)) {
 					/* overlap copy */
@@ -360,7 +364,7 @@ _copy_match:
 					while (op < endOfMatch)
 						*op++ = *copyFrom++;
 				} else {
-					memcpy(op, lowPrefix, restSize);
+					LZ4_memcpy(op, lowPrefix, restSize);
 					op += restSize;
 				}
 			}
@@ -386,7 +390,7 @@ _copy_match:
 				while (op < copyEnd)
 					*op++ = *match++;
 			} else {
-				memcpy(op, match, mlen);
+				LZ4_memcpy(op, match, mlen);
 			}
 			op = copyEnd;
 			if (op == oend)
@@ -400,7 +404,7 @@ _copy_match:
 			op[2] = match[2];
 			op[3] = match[3];
 			match += inc32table[offset];
-			memcpy(op + 4, match, 4);
+			LZ4_memcpy(op + 4, match, 4);
 			match -= dec64table[offset];
 		} else {
 			LZ4_copy8(op, match);
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index 1a7fa9d9170f..673bd206aa98 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -137,6 +137,17 @@ static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
 	return put_unaligned_le16(value, memPtr);
 }
 
+/*
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
+ * environments, the compiler can't assume the implementation of memcpy() is
+ * standard compliant, so apply its specialized memcpy() inlining logic. When
+ * possible, use __builtin_memcpy() to tell the compiler to analyze memcpy()
+ * as-if it were standard compliant, so it can inline it in freestanding
+ * environments. This is needed when decompressing the Linux Kernel, for example.
+ */
+#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
+#define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size)
+
 static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
 {
 #if LZ4_ARCH64
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index 1b61d874e337..e7ac8694b797 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -570,7 +570,7 @@ _Search3:
 			*op++ = (BYTE) lastRun;
 		} else
 			*op++ = (BYTE)(lastRun<<ML_BITS);
-		memcpy(op, anchor, iend - anchor);
+		LZ4_memcpy(op, anchor, iend - anchor);
 		op += iend - anchor;
 	}
 
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c
index 8ad5ba2b86e2..76758e9296ba 100644
--- a/lib/lzo/lzo1x_compress.c
+++ b/lib/lzo/lzo1x_compress.c
@@ -301,7 +301,7 @@ finished_writing_instruction:
 	return in_end - (ii - ti);
 }
 
-int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
+static int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
 		     unsigned char *out, size_t *out_len,
 		     void *wrkmem, const unsigned char bitstream_version)
 {
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index 2717c7963acd..7892a40cf765 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -32,7 +32,7 @@
  * depending on the base count. Since the base count is taken from a u8
  * and a few bits, it is safe to assume that it will always be lower than
  * or equal to 2*255, thus we can always prevent any overflow by accepting
- * two less 255 steps. See Documentation/lzo.txt for more information.
+ * two less 255 steps. See Documentation/staging/lzo.rst for more information.
  */
 #define MAX_255_COUNT      ((((size_t)~0) / 255) - 2)
 
diff --git a/lib/math/div64.c b/lib/math/div64.c
index 368ca7fd0d82..064d68a5391a 100644
--- a/lib/math/div64.c
+++ b/lib/math/div64.c
@@ -18,9 +18,11 @@
  * or by defining a preprocessor macro in arch/include/asm/div64.h.
  */
 
+#include <linux/bitops.h>
 #include <linux/export.h>
-#include <linux/kernel.h>
+#include <linux/math.h>
 #include <linux/math64.h>
+#include <linux/log2.h>
 
 /* Not needed on 64bit architectures */
 #if BITS_PER_LONG == 32
@@ -190,3 +192,44 @@ u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
 	return __iter_div_u64_rem(dividend, divisor, remainder);
 }
 EXPORT_SYMBOL(iter_div_u64_rem);
+
+#ifndef mul_u64_u64_div_u64
+u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
+{
+	u64 res = 0, div, rem;
+	int shift;
+
+	/* can a * b overflow ? */
+	if (ilog2(a) + ilog2(b) > 62) {
+		/*
+		 * (b * a) / c is equal to
+		 *
+		 *      (b / c) * a +
+		 *      (b % c) * a / c
+		 *
+		 * if nothing overflows. Can the 1st multiplication
+		 * overflow? Yes, but we do not care: this can only
+		 * happen if the end result can't fit in u64 anyway.
+		 *
+		 * So the code below does
+		 *
+		 *      res = (b / c) * a;
+		 *      b = b % c;
+		 */
+		div = div64_u64_rem(b, c, &rem);
+		res = div * a;
+		b = rem;
+
+		shift = ilog2(a) + ilog2(b) - 62;
+		if (shift > 0) {
+			/* drop precision */
+			b >>= shift;
+			c >>= shift;
+			if (!c)
+				return res;
+		}
+	}
+
+	return res + div64_u64(a * b, c);
+}
+#endif
diff --git a/lib/math/int_pow.c b/lib/math/int_pow.c
index 622fc1ab3c74..0cf426e69bda 100644
--- a/lib/math/int_pow.c
+++ b/lib/math/int_pow.c
@@ -6,7 +6,7 @@
  */
 
 #include <linux/export.h>
-#include <linux/kernel.h>
+#include <linux/math.h>
 #include <linux/types.h>
 
 /**
diff --git a/lib/math/int_sqrt.c b/lib/math/int_sqrt.c
index 30e0f9770f88..a8170bb9142f 100644
--- a/lib/math/int_sqrt.c
+++ b/lib/math/int_sqrt.c
@@ -6,9 +6,10 @@
  *  square root from Guy L. Steele.
  */
 
-#include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/bitops.h>
+#include <linux/limits.h>
+#include <linux/math.h>
 
 /**
  * int_sqrt - computes the integer square root
diff --git a/lib/math/rational.c b/lib/math/rational.c
index 31fb27db2deb..9781d521963d 100644
--- a/lib/math/rational.c
+++ b/lib/math/rational.c
@@ -11,7 +11,7 @@
 #include <linux/rational.h>
 #include <linux/compiler.h>
 #include <linux/export.h>
-#include <linux/kernel.h>
+#include <linux/minmax.h>
 
 /*
  * calculate best rational approximation for a given fraction
@@ -27,7 +27,7 @@
  * with the fractional part size described in given_denominator.
  *
  * for theoretical background, see:
- * http://en.wikipedia.org/wiki/Continued_fraction
+ * https://en.wikipedia.org/wiki/Continued_fraction
  */
 
 void rational_best_approximation(
diff --git a/lib/math/reciprocal_div.c b/lib/math/reciprocal_div.c
index bf043258fa00..6cb4adbb81d2 100644
--- a/lib/math/reciprocal_div.c
+++ b/lib/math/reciprocal_div.c
@@ -1,9 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/bitops.h>
 #include <linux/bug.h>
-#include <linux/kernel.h>
-#include <asm/div64.h>
-#include <linux/reciprocal_div.h>
 #include <linux/export.h>
+#include <linux/limits.h>
+#include <linux/math.h>
+#include <linux/minmax.h>
+#include <linux/types.h>
+
+#include <linux/reciprocal_div.h>
 
 /*
  * For a description of the algorithm please have a look at
diff --git a/lib/memregion.c b/lib/memregion.c
index 77c85b5251da..be5cfa5a3b57 100644
--- a/lib/memregion.c
+++ b/lib/memregion.c
@@ -2,6 +2,7 @@
 /* identifiers for device / performance-differentiated memory regions */
 #include <linux/idr.h>
 #include <linux/types.h>
+#include <linux/memregion.h>
 
 static DEFINE_IDA(memregion_ids);
 
diff --git a/lib/mpi/Makefile b/lib/mpi/Makefile
index d5874a7f5ff9..6e6ef9a34fe1 100644
--- a/lib/mpi/Makefile
+++ b/lib/mpi/Makefile
@@ -13,9 +13,16 @@ mpi-y = \
 	generic_mpih-rshift.o		\
 	generic_mpih-sub1.o		\
 	generic_mpih-add1.o		\
+	ec.o				\
 	mpicoder.o			\
+	mpi-add.o			\
 	mpi-bit.o			\
 	mpi-cmp.o			\
+	mpi-sub-ui.o			\
+	mpi-div.o			\
+	mpi-inv.o			\
+	mpi-mod.o			\
+	mpi-mul.o			\
 	mpih-cmp.o			\
 	mpih-div.o			\
 	mpih-mul.o			\
diff --git a/lib/mpi/ec.c b/lib/mpi/ec.c
new file mode 100644
index 000000000000..40f5908e57a4
--- /dev/null
+++ b/lib/mpi/ec.c
@@ -0,0 +1,1506 @@
+/* ec.c -  Elliptic Curve functions
+ * Copyright (C) 2007 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+#define point_init(a)  mpi_point_init((a))
+#define point_free(a)  mpi_point_free_parts((a))
+
+#define log_error(fmt, ...) pr_err(fmt, ##__VA_ARGS__)
+#define log_fatal(fmt, ...) pr_err(fmt, ##__VA_ARGS__)
+
+#define DIM(v) (sizeof(v)/sizeof((v)[0]))
+
+
+/* Create a new point option.  NBITS gives the size in bits of one
+ * coordinate; it is only used to pre-allocate some resources and
+ * might also be passed as 0 to use a default value.
+ */
+MPI_POINT mpi_point_new(unsigned int nbits)
+{
+	MPI_POINT p;
+
+	(void)nbits;  /* Currently not used.  */
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p)
+		mpi_point_init(p);
+	return p;
+}
+EXPORT_SYMBOL_GPL(mpi_point_new);
+
+/* Release the point object P.  P may be NULL. */
+void mpi_point_release(MPI_POINT p)
+{
+	if (p) {
+		mpi_point_free_parts(p);
+		kfree(p);
+	}
+}
+EXPORT_SYMBOL_GPL(mpi_point_release);
+
+/* Initialize the fields of a point object.  gcry_mpi_point_free_parts
+ * may be used to release the fields.
+ */
+void mpi_point_init(MPI_POINT p)
+{
+	p->x = mpi_new(0);
+	p->y = mpi_new(0);
+	p->z = mpi_new(0);
+}
+EXPORT_SYMBOL_GPL(mpi_point_init);
+
+/* Release the parts of a point object. */
+void mpi_point_free_parts(MPI_POINT p)
+{
+	mpi_free(p->x); p->x = NULL;
+	mpi_free(p->y); p->y = NULL;
+	mpi_free(p->z); p->z = NULL;
+}
+EXPORT_SYMBOL_GPL(mpi_point_free_parts);
+
+/* Set the value from S into D.  */
+static void point_set(MPI_POINT d, MPI_POINT s)
+{
+	mpi_set(d->x, s->x);
+	mpi_set(d->y, s->y);
+	mpi_set(d->z, s->z);
+}
+
+static void point_resize(MPI_POINT p, struct mpi_ec_ctx *ctx)
+{
+	size_t nlimbs = ctx->p->nlimbs;
+
+	mpi_resize(p->x, nlimbs);
+	p->x->nlimbs = nlimbs;
+	mpi_resize(p->z, nlimbs);
+	p->z->nlimbs = nlimbs;
+
+	if (ctx->model != MPI_EC_MONTGOMERY) {
+		mpi_resize(p->y, nlimbs);
+		p->y->nlimbs = nlimbs;
+	}
+}
+
+static void point_swap_cond(MPI_POINT d, MPI_POINT s, unsigned long swap,
+		struct mpi_ec_ctx *ctx)
+{
+	mpi_swap_cond(d->x, s->x, swap);
+	if (ctx->model != MPI_EC_MONTGOMERY)
+		mpi_swap_cond(d->y, s->y, swap);
+	mpi_swap_cond(d->z, s->z, swap);
+}
+
+
+/* W = W mod P.  */
+static void ec_mod(MPI w, struct mpi_ec_ctx *ec)
+{
+	if (ec->t.p_barrett)
+		mpi_mod_barrett(w, w, ec->t.p_barrett);
+	else
+		mpi_mod(w, w, ec->p);
+}
+
+static void ec_addm(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_add(w, u, v);
+	ec_mod(w, ctx);
+}
+
+static void ec_subm(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ec)
+{
+	mpi_sub(w, u, v);
+	while (w->sign)
+		mpi_add(w, w, ec->p);
+	/*ec_mod(w, ec);*/
+}
+
+static void ec_mulm(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_mul(w, u, v);
+	ec_mod(w, ctx);
+}
+
+/* W = 2 * U mod P.  */
+static void ec_mul2(MPI w, MPI u, struct mpi_ec_ctx *ctx)
+{
+	mpi_lshift(w, u, 1);
+	ec_mod(w, ctx);
+}
+
+static void ec_powm(MPI w, const MPI b, const MPI e,
+		struct mpi_ec_ctx *ctx)
+{
+	mpi_powm(w, b, e, ctx->p);
+	/* mpi_abs(w); */
+}
+
+/* Shortcut for
+ * ec_powm(B, B, mpi_const(MPI_C_TWO), ctx);
+ * for easier optimization.
+ */
+static void ec_pow2(MPI w, const MPI b, struct mpi_ec_ctx *ctx)
+{
+	/* Using mpi_mul is slightly faster (at least on amd64).  */
+	/* mpi_powm(w, b, mpi_const(MPI_C_TWO), ctx->p); */
+	ec_mulm(w, b, b, ctx);
+}
+
+/* Shortcut for
+ * ec_powm(B, B, mpi_const(MPI_C_THREE), ctx);
+ * for easier optimization.
+ */
+static void ec_pow3(MPI w, const MPI b, struct mpi_ec_ctx *ctx)
+{
+	mpi_powm(w, b, mpi_const(MPI_C_THREE), ctx->p);
+}
+
+static void ec_invm(MPI x, MPI a, struct mpi_ec_ctx *ctx)
+{
+	if (!mpi_invm(x, a, ctx->p))
+		log_error("ec_invm: inverse does not exist:\n");
+}
+
+static void mpih_set_cond(mpi_ptr_t wp, mpi_ptr_t up,
+		mpi_size_t usize, unsigned long set)
+{
+	mpi_size_t i;
+	mpi_limb_t mask = ((mpi_limb_t)0) - set;
+	mpi_limb_t x;
+
+	for (i = 0; i < usize; i++) {
+		x = mask & (wp[i] ^ up[i]);
+		wp[i] = wp[i] ^ x;
+	}
+}
+
+/* Routines for 2^255 - 19.  */
+
+#define LIMB_SIZE_25519 ((256+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB)
+
+static void ec_addm_25519(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t wsize = LIMB_SIZE_25519;
+	mpi_limb_t n[LIMB_SIZE_25519];
+	mpi_limb_t borrow;
+
+	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+		log_bug("addm_25519: different sizes\n");
+
+	memset(n, 0, sizeof(n));
+	up = u->d;
+	vp = v->d;
+	wp = w->d;
+
+	mpihelp_add_n(wp, up, vp, wsize);
+	borrow = mpihelp_sub_n(wp, wp, ctx->p->d, wsize);
+	mpih_set_cond(n, ctx->p->d, wsize, (borrow != 0UL));
+	mpihelp_add_n(wp, wp, n, wsize);
+	wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
+}
+
+static void ec_subm_25519(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t wsize = LIMB_SIZE_25519;
+	mpi_limb_t n[LIMB_SIZE_25519];
+	mpi_limb_t borrow;
+
+	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+		log_bug("subm_25519: different sizes\n");
+
+	memset(n, 0, sizeof(n));
+	up = u->d;
+	vp = v->d;
+	wp = w->d;
+
+	borrow = mpihelp_sub_n(wp, up, vp, wsize);
+	mpih_set_cond(n, ctx->p->d, wsize, (borrow != 0UL));
+	mpihelp_add_n(wp, wp, n, wsize);
+	wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
+}
+
+static void ec_mulm_25519(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t wsize = LIMB_SIZE_25519;
+	mpi_limb_t n[LIMB_SIZE_25519*2];
+	mpi_limb_t m[LIMB_SIZE_25519+1];
+	mpi_limb_t cy;
+	int msb;
+
+	(void)ctx;
+	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+		log_bug("mulm_25519: different sizes\n");
+
+	up = u->d;
+	vp = v->d;
+	wp = w->d;
+
+	mpihelp_mul_n(n, up, vp, wsize);
+	memcpy(wp, n, wsize * BYTES_PER_MPI_LIMB);
+	wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
+
+	memcpy(m, n+LIMB_SIZE_25519-1, (wsize+1) * BYTES_PER_MPI_LIMB);
+	mpihelp_rshift(m, m, LIMB_SIZE_25519+1, (255 % BITS_PER_MPI_LIMB));
+
+	memcpy(n, m, wsize * BYTES_PER_MPI_LIMB);
+	cy = mpihelp_lshift(m, m, LIMB_SIZE_25519, 4);
+	m[LIMB_SIZE_25519] = cy;
+	cy = mpihelp_add_n(m, m, n, wsize);
+	m[LIMB_SIZE_25519] += cy;
+	cy = mpihelp_add_n(m, m, n, wsize);
+	m[LIMB_SIZE_25519] += cy;
+	cy = mpihelp_add_n(m, m, n, wsize);
+	m[LIMB_SIZE_25519] += cy;
+
+	cy = mpihelp_add_n(wp, wp, m, wsize);
+	m[LIMB_SIZE_25519] += cy;
+
+	memset(m, 0, wsize * BYTES_PER_MPI_LIMB);
+	msb = (wp[LIMB_SIZE_25519-1] >> (255 % BITS_PER_MPI_LIMB));
+	m[0] = (m[LIMB_SIZE_25519] * 2 + msb) * 19;
+	wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
+	mpihelp_add_n(wp, wp, m, wsize);
+
+	m[0] = 0;
+	cy = mpihelp_sub_n(wp, wp, ctx->p->d, wsize);
+	mpih_set_cond(m, ctx->p->d, wsize, (cy != 0UL));
+	mpihelp_add_n(wp, wp, m, wsize);
+}
+
+static void ec_mul2_25519(MPI w, MPI u, struct mpi_ec_ctx *ctx)
+{
+	ec_addm_25519(w, u, u, ctx);
+}
+
+static void ec_pow2_25519(MPI w, const MPI b, struct mpi_ec_ctx *ctx)
+{
+	ec_mulm_25519(w, b, b, ctx);
+}
+
+/* Routines for 2^448 - 2^224 - 1.  */
+
+#define LIMB_SIZE_448 ((448+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB)
+#define LIMB_SIZE_HALF_448 ((LIMB_SIZE_448+1)/2)
+
+static void ec_addm_448(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t wsize = LIMB_SIZE_448;
+	mpi_limb_t n[LIMB_SIZE_448];
+	mpi_limb_t cy;
+
+	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+		log_bug("addm_448: different sizes\n");
+
+	memset(n, 0, sizeof(n));
+	up = u->d;
+	vp = v->d;
+	wp = w->d;
+
+	cy = mpihelp_add_n(wp, up, vp, wsize);
+	mpih_set_cond(n, ctx->p->d, wsize, (cy != 0UL));
+	mpihelp_sub_n(wp, wp, n, wsize);
+}
+
+static void ec_subm_448(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t wsize = LIMB_SIZE_448;
+	mpi_limb_t n[LIMB_SIZE_448];
+	mpi_limb_t borrow;
+
+	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+		log_bug("subm_448: different sizes\n");
+
+	memset(n, 0, sizeof(n));
+	up = u->d;
+	vp = v->d;
+	wp = w->d;
+
+	borrow = mpihelp_sub_n(wp, up, vp, wsize);
+	mpih_set_cond(n, ctx->p->d, wsize, (borrow != 0UL));
+	mpihelp_add_n(wp, wp, n, wsize);
+}
+
+static void ec_mulm_448(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t wsize = LIMB_SIZE_448;
+	mpi_limb_t n[LIMB_SIZE_448*2];
+	mpi_limb_t a2[LIMB_SIZE_HALF_448];
+	mpi_limb_t a3[LIMB_SIZE_HALF_448];
+	mpi_limb_t b0[LIMB_SIZE_HALF_448];
+	mpi_limb_t b1[LIMB_SIZE_HALF_448];
+	mpi_limb_t cy;
+	int i;
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	mpi_limb_t b1_rest, a3_rest;
+#endif
+
+	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+		log_bug("mulm_448: different sizes\n");
+
+	up = u->d;
+	vp = v->d;
+	wp = w->d;
+
+	mpihelp_mul_n(n, up, vp, wsize);
+
+	for (i = 0; i < (wsize + 1) / 2; i++) {
+		b0[i] = n[i];
+		b1[i] = n[i+wsize/2];
+		a2[i] = n[i+wsize];
+		a3[i] = n[i+wsize+wsize/2];
+	}
+
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	b0[LIMB_SIZE_HALF_448-1] &= ((mpi_limb_t)1UL << 32)-1;
+	a2[LIMB_SIZE_HALF_448-1] &= ((mpi_limb_t)1UL << 32)-1;
+
+	b1_rest = 0;
+	a3_rest = 0;
+
+	for (i = (wsize + 1) / 2 - 1; i >= 0; i--) {
+		mpi_limb_t b1v, a3v;
+		b1v = b1[i];
+		a3v = a3[i];
+		b1[i] = (b1_rest << 32) | (b1v >> 32);
+		a3[i] = (a3_rest << 32) | (a3v >> 32);
+		b1_rest = b1v & (((mpi_limb_t)1UL << 32)-1);
+		a3_rest = a3v & (((mpi_limb_t)1UL << 32)-1);
+	}
+#endif
+
+	cy = mpihelp_add_n(b0, b0, a2, LIMB_SIZE_HALF_448);
+	cy += mpihelp_add_n(b0, b0, a3, LIMB_SIZE_HALF_448);
+	for (i = 0; i < (wsize + 1) / 2; i++)
+		wp[i] = b0[i];
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	wp[LIMB_SIZE_HALF_448-1] &= (((mpi_limb_t)1UL << 32)-1);
+#endif
+
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	cy = b0[LIMB_SIZE_HALF_448-1] >> 32;
+#endif
+
+	cy = mpihelp_add_1(b1, b1, LIMB_SIZE_HALF_448, cy);
+	cy += mpihelp_add_n(b1, b1, a2, LIMB_SIZE_HALF_448);
+	cy += mpihelp_add_n(b1, b1, a3, LIMB_SIZE_HALF_448);
+	cy += mpihelp_add_n(b1, b1, a3, LIMB_SIZE_HALF_448);
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	b1_rest = 0;
+	for (i = (wsize + 1) / 2 - 1; i >= 0; i--) {
+		mpi_limb_t b1v = b1[i];
+		b1[i] = (b1_rest << 32) | (b1v >> 32);
+		b1_rest = b1v & (((mpi_limb_t)1UL << 32)-1);
+	}
+	wp[LIMB_SIZE_HALF_448-1] |= (b1_rest << 32);
+#endif
+	for (i = 0; i < wsize / 2; i++)
+		wp[i+(wsize + 1) / 2] = b1[i];
+
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	cy = b1[LIMB_SIZE_HALF_448-1];
+#endif
+
+	memset(n, 0, wsize * BYTES_PER_MPI_LIMB);
+
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	n[LIMB_SIZE_HALF_448-1] = cy << 32;
+#else
+	n[LIMB_SIZE_HALF_448] = cy;
+#endif
+	n[0] = cy;
+	mpihelp_add_n(wp, wp, n, wsize);
+
+	memset(n, 0, wsize * BYTES_PER_MPI_LIMB);
+	cy = mpihelp_sub_n(wp, wp, ctx->p->d, wsize);
+	mpih_set_cond(n, ctx->p->d, wsize, (cy != 0UL));
+	mpihelp_add_n(wp, wp, n, wsize);
+}
+
+static void ec_mul2_448(MPI w, MPI u, struct mpi_ec_ctx *ctx)
+{
+	ec_addm_448(w, u, u, ctx);
+}
+
+static void ec_pow2_448(MPI w, const MPI b, struct mpi_ec_ctx *ctx)
+{
+	ec_mulm_448(w, b, b, ctx);
+}
+
+struct field_table {
+	const char *p;
+
+	/* computation routines for the field.  */
+	void (*addm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
+	void (*subm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
+	void (*mulm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
+	void (*mul2)(MPI w, MPI u, struct mpi_ec_ctx *ctx);
+	void (*pow2)(MPI w, const MPI b, struct mpi_ec_ctx *ctx);
+};
+
+static const struct field_table field_table[] = {
+	{
+		"0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED",
+		ec_addm_25519,
+		ec_subm_25519,
+		ec_mulm_25519,
+		ec_mul2_25519,
+		ec_pow2_25519
+	},
+	{
+		"0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE"
+		"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
+		ec_addm_448,
+		ec_subm_448,
+		ec_mulm_448,
+		ec_mul2_448,
+		ec_pow2_448
+	},
+	{ NULL, NULL, NULL, NULL, NULL, NULL },
+};
+
+/* Force recomputation of all helper variables.  */
+static void mpi_ec_get_reset(struct mpi_ec_ctx *ec)
+{
+	ec->t.valid.a_is_pminus3 = 0;
+	ec->t.valid.two_inv_p = 0;
+}
+
+/* Accessor for helper variable.  */
+static int ec_get_a_is_pminus3(struct mpi_ec_ctx *ec)
+{
+	MPI tmp;
+
+	if (!ec->t.valid.a_is_pminus3) {
+		ec->t.valid.a_is_pminus3 = 1;
+		tmp = mpi_alloc_like(ec->p);
+		mpi_sub_ui(tmp, ec->p, 3);
+		ec->t.a_is_pminus3 = !mpi_cmp(ec->a, tmp);
+		mpi_free(tmp);
+	}
+
+	return ec->t.a_is_pminus3;
+}
+
+/* Accessor for helper variable.  */
+static MPI ec_get_two_inv_p(struct mpi_ec_ctx *ec)
+{
+	if (!ec->t.valid.two_inv_p) {
+		ec->t.valid.two_inv_p = 1;
+		if (!ec->t.two_inv_p)
+			ec->t.two_inv_p = mpi_alloc(0);
+		ec_invm(ec->t.two_inv_p, mpi_const(MPI_C_TWO), ec);
+	}
+	return ec->t.two_inv_p;
+}
+
+static const char *const curve25519_bad_points[] = {
+	"0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed",
+	"0x0000000000000000000000000000000000000000000000000000000000000000",
+	"0x0000000000000000000000000000000000000000000000000000000000000001",
+	"0x00b8495f16056286fdb1329ceb8d09da6ac49ff1fae35616aeb8413b7c7aebe0",
+	"0x57119fd0dd4e22d8868e1c58c45c44045bef839c55b1d0b1248c50a3bc959c5f",
+	"0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffec",
+	"0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffee",
+	NULL
+};
+
+static const char *const curve448_bad_points[] = {
+	"0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
+	"ffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+	"0x00000000000000000000000000000000000000000000000000000000"
+	"00000000000000000000000000000000000000000000000000000000",
+	"0x00000000000000000000000000000000000000000000000000000000"
+	"00000000000000000000000000000000000000000000000000000001",
+	"0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
+	"fffffffffffffffffffffffffffffffffffffffffffffffffffffffe",
+	"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+	"00000000000000000000000000000000000000000000000000000000",
+	NULL
+};
+
+static const char *const *bad_points_table[] = {
+	curve25519_bad_points,
+	curve448_bad_points,
+};
+
+static void mpi_ec_coefficient_normalize(MPI a, MPI p)
+{
+	if (a->sign) {
+		mpi_resize(a, p->nlimbs);
+		mpihelp_sub_n(a->d, p->d, a->d, p->nlimbs);
+		a->nlimbs = p->nlimbs;
+		a->sign = 0;
+	}
+}
+
+/* This function initialized a context for elliptic curve based on the
+ * field GF(p).  P is the prime specifying this field, A is the first
+ * coefficient.  CTX is expected to be zeroized.
+ */
+void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model,
+			enum ecc_dialects dialect,
+			int flags, MPI p, MPI a, MPI b)
+{
+	int i;
+	static int use_barrett = -1 /* TODO: 1 or -1 */;
+
+	mpi_ec_coefficient_normalize(a, p);
+	mpi_ec_coefficient_normalize(b, p);
+
+	/* Fixme: Do we want to check some constraints? e.g.  a < p  */
+
+	ctx->model = model;
+	ctx->dialect = dialect;
+	ctx->flags = flags;
+	if (dialect == ECC_DIALECT_ED25519)
+		ctx->nbits = 256;
+	else
+		ctx->nbits = mpi_get_nbits(p);
+	ctx->p = mpi_copy(p);
+	ctx->a = mpi_copy(a);
+	ctx->b = mpi_copy(b);
+
+	ctx->t.p_barrett = use_barrett > 0 ? mpi_barrett_init(ctx->p, 0) : NULL;
+
+	mpi_ec_get_reset(ctx);
+
+	if (model == MPI_EC_MONTGOMERY) {
+		for (i = 0; i < DIM(bad_points_table); i++) {
+			MPI p_candidate = mpi_scanval(bad_points_table[i][0]);
+			int match_p = !mpi_cmp(ctx->p, p_candidate);
+			int j;
+
+			mpi_free(p_candidate);
+			if (!match_p)
+				continue;
+
+			for (j = 0; i < DIM(ctx->t.scratch) && bad_points_table[i][j]; j++)
+				ctx->t.scratch[j] = mpi_scanval(bad_points_table[i][j]);
+		}
+	} else {
+		/* Allocate scratch variables.  */
+		for (i = 0; i < DIM(ctx->t.scratch); i++)
+			ctx->t.scratch[i] = mpi_alloc_like(ctx->p);
+	}
+
+	ctx->addm = ec_addm;
+	ctx->subm = ec_subm;
+	ctx->mulm = ec_mulm;
+	ctx->mul2 = ec_mul2;
+	ctx->pow2 = ec_pow2;
+
+	for (i = 0; field_table[i].p; i++) {
+		MPI f_p;
+
+		f_p = mpi_scanval(field_table[i].p);
+		if (!f_p)
+			break;
+
+		if (!mpi_cmp(p, f_p)) {
+			ctx->addm = field_table[i].addm;
+			ctx->subm = field_table[i].subm;
+			ctx->mulm = field_table[i].mulm;
+			ctx->mul2 = field_table[i].mul2;
+			ctx->pow2 = field_table[i].pow2;
+			mpi_free(f_p);
+
+			mpi_resize(ctx->a, ctx->p->nlimbs);
+			ctx->a->nlimbs = ctx->p->nlimbs;
+
+			mpi_resize(ctx->b, ctx->p->nlimbs);
+			ctx->b->nlimbs = ctx->p->nlimbs;
+
+			for (i = 0; i < DIM(ctx->t.scratch) && ctx->t.scratch[i]; i++)
+				ctx->t.scratch[i]->nlimbs = ctx->p->nlimbs;
+
+			break;
+		}
+
+		mpi_free(f_p);
+	}
+}
+EXPORT_SYMBOL_GPL(mpi_ec_init);
+
+void mpi_ec_deinit(struct mpi_ec_ctx *ctx)
+{
+	int i;
+
+	mpi_barrett_free(ctx->t.p_barrett);
+
+	/* Domain parameter.  */
+	mpi_free(ctx->p);
+	mpi_free(ctx->a);
+	mpi_free(ctx->b);
+	mpi_point_release(ctx->G);
+	mpi_free(ctx->n);
+
+	/* The key.  */
+	mpi_point_release(ctx->Q);
+	mpi_free(ctx->d);
+
+	/* Private data of ec.c.  */
+	mpi_free(ctx->t.two_inv_p);
+
+	for (i = 0; i < DIM(ctx->t.scratch); i++)
+		mpi_free(ctx->t.scratch[i]);
+}
+EXPORT_SYMBOL_GPL(mpi_ec_deinit);
+
+/* Compute the affine coordinates from the projective coordinates in
+ * POINT.  Set them into X and Y.  If one coordinate is not required,
+ * X or Y may be passed as NULL.  CTX is the usual context. Returns: 0
+ * on success or !0 if POINT is at infinity.
+ */
+int mpi_ec_get_affine(MPI x, MPI y, MPI_POINT point, struct mpi_ec_ctx *ctx)
+{
+	if (!mpi_cmp_ui(point->z, 0))
+		return -1;
+
+	switch (ctx->model) {
+	case MPI_EC_WEIERSTRASS: /* Using Jacobian coordinates.  */
+		{
+			MPI z1, z2, z3;
+
+			z1 = mpi_new(0);
+			z2 = mpi_new(0);
+			ec_invm(z1, point->z, ctx);  /* z1 = z^(-1) mod p  */
+			ec_mulm(z2, z1, z1, ctx);    /* z2 = z^(-2) mod p  */
+
+			if (x)
+				ec_mulm(x, point->x, z2, ctx);
+
+			if (y) {
+				z3 = mpi_new(0);
+				ec_mulm(z3, z2, z1, ctx);      /* z3 = z^(-3) mod p */
+				ec_mulm(y, point->y, z3, ctx);
+				mpi_free(z3);
+			}
+
+			mpi_free(z2);
+			mpi_free(z1);
+		}
+		return 0;
+
+	case MPI_EC_MONTGOMERY:
+		{
+			if (x)
+				mpi_set(x, point->x);
+
+			if (y) {
+				log_fatal("%s: Getting Y-coordinate on %s is not supported\n",
+						"mpi_ec_get_affine", "Montgomery");
+				return -1;
+			}
+		}
+		return 0;
+
+	case MPI_EC_EDWARDS:
+		{
+			MPI z;
+
+			z = mpi_new(0);
+			ec_invm(z, point->z, ctx);
+
+			mpi_resize(z, ctx->p->nlimbs);
+			z->nlimbs = ctx->p->nlimbs;
+
+			if (x) {
+				mpi_resize(x, ctx->p->nlimbs);
+				x->nlimbs = ctx->p->nlimbs;
+				ctx->mulm(x, point->x, z, ctx);
+			}
+			if (y) {
+				mpi_resize(y, ctx->p->nlimbs);
+				y->nlimbs = ctx->p->nlimbs;
+				ctx->mulm(y, point->y, z, ctx);
+			}
+
+			mpi_free(z);
+		}
+		return 0;
+
+	default:
+		return -1;
+	}
+}
+EXPORT_SYMBOL_GPL(mpi_ec_get_affine);
+
+/*  RESULT = 2 * POINT  (Weierstrass version). */
+static void dup_point_weierstrass(MPI_POINT result,
+		MPI_POINT point, struct mpi_ec_ctx *ctx)
+{
+#define x3 (result->x)
+#define y3 (result->y)
+#define z3 (result->z)
+#define t1 (ctx->t.scratch[0])
+#define t2 (ctx->t.scratch[1])
+#define t3 (ctx->t.scratch[2])
+#define l1 (ctx->t.scratch[3])
+#define l2 (ctx->t.scratch[4])
+#define l3 (ctx->t.scratch[5])
+
+	if (!mpi_cmp_ui(point->y, 0) || !mpi_cmp_ui(point->z, 0)) {
+		/* P_y == 0 || P_z == 0 => [1:1:0] */
+		mpi_set_ui(x3, 1);
+		mpi_set_ui(y3, 1);
+		mpi_set_ui(z3, 0);
+	} else {
+		if (ec_get_a_is_pminus3(ctx)) {
+			/* Use the faster case.  */
+			/* L1 = 3(X - Z^2)(X + Z^2) */
+			/*                          T1: used for Z^2. */
+			/*                          T2: used for the right term. */
+			ec_pow2(t1, point->z, ctx);
+			ec_subm(l1, point->x, t1, ctx);
+			ec_mulm(l1, l1, mpi_const(MPI_C_THREE), ctx);
+			ec_addm(t2, point->x, t1, ctx);
+			ec_mulm(l1, l1, t2, ctx);
+		} else {
+			/* Standard case. */
+			/* L1 = 3X^2 + aZ^4 */
+			/*                          T1: used for aZ^4. */
+			ec_pow2(l1, point->x, ctx);
+			ec_mulm(l1, l1, mpi_const(MPI_C_THREE), ctx);
+			ec_powm(t1, point->z, mpi_const(MPI_C_FOUR), ctx);
+			ec_mulm(t1, t1, ctx->a, ctx);
+			ec_addm(l1, l1, t1, ctx);
+		}
+		/* Z3 = 2YZ */
+		ec_mulm(z3, point->y, point->z, ctx);
+		ec_mul2(z3, z3, ctx);
+
+		/* L2 = 4XY^2 */
+		/*                              T2: used for Y2; required later. */
+		ec_pow2(t2, point->y, ctx);
+		ec_mulm(l2, t2, point->x, ctx);
+		ec_mulm(l2, l2, mpi_const(MPI_C_FOUR), ctx);
+
+		/* X3 = L1^2 - 2L2 */
+		/*                              T1: used for L2^2. */
+		ec_pow2(x3, l1, ctx);
+		ec_mul2(t1, l2, ctx);
+		ec_subm(x3, x3, t1, ctx);
+
+		/* L3 = 8Y^4 */
+		/*                              T2: taken from above. */
+		ec_pow2(t2, t2, ctx);
+		ec_mulm(l3, t2, mpi_const(MPI_C_EIGHT), ctx);
+
+		/* Y3 = L1(L2 - X3) - L3 */
+		ec_subm(y3, l2, x3, ctx);
+		ec_mulm(y3, y3, l1, ctx);
+		ec_subm(y3, y3, l3, ctx);
+	}
+
+#undef x3
+#undef y3
+#undef z3
+#undef t1
+#undef t2
+#undef t3
+#undef l1
+#undef l2
+#undef l3
+}
+
+/*  RESULT = 2 * POINT  (Montgomery version). */
+static void dup_point_montgomery(MPI_POINT result,
+				MPI_POINT point, struct mpi_ec_ctx *ctx)
+{
+	(void)result;
+	(void)point;
+	(void)ctx;
+	log_fatal("%s: %s not yet supported\n",
+			"mpi_ec_dup_point", "Montgomery");
+}
+
+/*  RESULT = 2 * POINT  (Twisted Edwards version). */
+static void dup_point_edwards(MPI_POINT result,
+		MPI_POINT point, struct mpi_ec_ctx *ctx)
+{
+#define X1 (point->x)
+#define Y1 (point->y)
+#define Z1 (point->z)
+#define X3 (result->x)
+#define Y3 (result->y)
+#define Z3 (result->z)
+#define B (ctx->t.scratch[0])
+#define C (ctx->t.scratch[1])
+#define D (ctx->t.scratch[2])
+#define E (ctx->t.scratch[3])
+#define F (ctx->t.scratch[4])
+#define H (ctx->t.scratch[5])
+#define J (ctx->t.scratch[6])
+
+	/* Compute: (X_3 : Y_3 : Z_3) = 2( X_1 : Y_1 : Z_1 ) */
+
+	/* B = (X_1 + Y_1)^2  */
+	ctx->addm(B, X1, Y1, ctx);
+	ctx->pow2(B, B, ctx);
+
+	/* C = X_1^2 */
+	/* D = Y_1^2 */
+	ctx->pow2(C, X1, ctx);
+	ctx->pow2(D, Y1, ctx);
+
+	/* E = aC */
+	if (ctx->dialect == ECC_DIALECT_ED25519)
+		ctx->subm(E, ctx->p, C, ctx);
+	else
+		ctx->mulm(E, ctx->a, C, ctx);
+
+	/* F = E + D */
+	ctx->addm(F, E, D, ctx);
+
+	/* H = Z_1^2 */
+	ctx->pow2(H, Z1, ctx);
+
+	/* J = F - 2H */
+	ctx->mul2(J, H, ctx);
+	ctx->subm(J, F, J, ctx);
+
+	/* X_3 = (B - C - D) · J */
+	ctx->subm(X3, B, C, ctx);
+	ctx->subm(X3, X3, D, ctx);
+	ctx->mulm(X3, X3, J, ctx);
+
+	/* Y_3 = F · (E - D) */
+	ctx->subm(Y3, E, D, ctx);
+	ctx->mulm(Y3, Y3, F, ctx);
+
+	/* Z_3 = F · J */
+	ctx->mulm(Z3, F, J, ctx);
+
+#undef X1
+#undef Y1
+#undef Z1
+#undef X3
+#undef Y3
+#undef Z3
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef H
+#undef J
+}
+
+/*  RESULT = 2 * POINT  */
+static void
+mpi_ec_dup_point(MPI_POINT result, MPI_POINT point, struct mpi_ec_ctx *ctx)
+{
+	switch (ctx->model) {
+	case MPI_EC_WEIERSTRASS:
+		dup_point_weierstrass(result, point, ctx);
+		break;
+	case MPI_EC_MONTGOMERY:
+		dup_point_montgomery(result, point, ctx);
+		break;
+	case MPI_EC_EDWARDS:
+		dup_point_edwards(result, point, ctx);
+		break;
+	}
+}
+
+/* RESULT = P1 + P2  (Weierstrass version).*/
+static void add_points_weierstrass(MPI_POINT result,
+		MPI_POINT p1, MPI_POINT p2,
+		struct mpi_ec_ctx *ctx)
+{
+#define x1 (p1->x)
+#define y1 (p1->y)
+#define z1 (p1->z)
+#define x2 (p2->x)
+#define y2 (p2->y)
+#define z2 (p2->z)
+#define x3 (result->x)
+#define y3 (result->y)
+#define z3 (result->z)
+#define l1 (ctx->t.scratch[0])
+#define l2 (ctx->t.scratch[1])
+#define l3 (ctx->t.scratch[2])
+#define l4 (ctx->t.scratch[3])
+#define l5 (ctx->t.scratch[4])
+#define l6 (ctx->t.scratch[5])
+#define l7 (ctx->t.scratch[6])
+#define l8 (ctx->t.scratch[7])
+#define l9 (ctx->t.scratch[8])
+#define t1 (ctx->t.scratch[9])
+#define t2 (ctx->t.scratch[10])
+
+	if ((!mpi_cmp(x1, x2)) && (!mpi_cmp(y1, y2)) && (!mpi_cmp(z1, z2))) {
+		/* Same point; need to call the duplicate function.  */
+		mpi_ec_dup_point(result, p1, ctx);
+	} else if (!mpi_cmp_ui(z1, 0)) {
+		/* P1 is at infinity.  */
+		mpi_set(x3, p2->x);
+		mpi_set(y3, p2->y);
+		mpi_set(z3, p2->z);
+	} else if (!mpi_cmp_ui(z2, 0)) {
+		/* P2 is at infinity.  */
+		mpi_set(x3, p1->x);
+		mpi_set(y3, p1->y);
+		mpi_set(z3, p1->z);
+	} else {
+		int z1_is_one = !mpi_cmp_ui(z1, 1);
+		int z2_is_one = !mpi_cmp_ui(z2, 1);
+
+		/* l1 = x1 z2^2  */
+		/* l2 = x2 z1^2  */
+		if (z2_is_one)
+			mpi_set(l1, x1);
+		else {
+			ec_pow2(l1, z2, ctx);
+			ec_mulm(l1, l1, x1, ctx);
+		}
+		if (z1_is_one)
+			mpi_set(l2, x2);
+		else {
+			ec_pow2(l2, z1, ctx);
+			ec_mulm(l2, l2, x2, ctx);
+		}
+		/* l3 = l1 - l2 */
+		ec_subm(l3, l1, l2, ctx);
+		/* l4 = y1 z2^3  */
+		ec_powm(l4, z2, mpi_const(MPI_C_THREE), ctx);
+		ec_mulm(l4, l4, y1, ctx);
+		/* l5 = y2 z1^3  */
+		ec_powm(l5, z1, mpi_const(MPI_C_THREE), ctx);
+		ec_mulm(l5, l5, y2, ctx);
+		/* l6 = l4 - l5  */
+		ec_subm(l6, l4, l5, ctx);
+
+		if (!mpi_cmp_ui(l3, 0)) {
+			if (!mpi_cmp_ui(l6, 0)) {
+				/* P1 and P2 are the same - use duplicate function. */
+				mpi_ec_dup_point(result, p1, ctx);
+			} else {
+				/* P1 is the inverse of P2.  */
+				mpi_set_ui(x3, 1);
+				mpi_set_ui(y3, 1);
+				mpi_set_ui(z3, 0);
+			}
+		} else {
+			/* l7 = l1 + l2  */
+			ec_addm(l7, l1, l2, ctx);
+			/* l8 = l4 + l5  */
+			ec_addm(l8, l4, l5, ctx);
+			/* z3 = z1 z2 l3  */
+			ec_mulm(z3, z1, z2, ctx);
+			ec_mulm(z3, z3, l3, ctx);
+			/* x3 = l6^2 - l7 l3^2  */
+			ec_pow2(t1, l6, ctx);
+			ec_pow2(t2, l3, ctx);
+			ec_mulm(t2, t2, l7, ctx);
+			ec_subm(x3, t1, t2, ctx);
+			/* l9 = l7 l3^2 - 2 x3  */
+			ec_mul2(t1, x3, ctx);
+			ec_subm(l9, t2, t1, ctx);
+			/* y3 = (l9 l6 - l8 l3^3)/2  */
+			ec_mulm(l9, l9, l6, ctx);
+			ec_powm(t1, l3, mpi_const(MPI_C_THREE), ctx); /* fixme: Use saved value*/
+			ec_mulm(t1, t1, l8, ctx);
+			ec_subm(y3, l9, t1, ctx);
+			ec_mulm(y3, y3, ec_get_two_inv_p(ctx), ctx);
+		}
+	}
+
+#undef x1
+#undef y1
+#undef z1
+#undef x2
+#undef y2
+#undef z2
+#undef x3
+#undef y3
+#undef z3
+#undef l1
+#undef l2
+#undef l3
+#undef l4
+#undef l5
+#undef l6
+#undef l7
+#undef l8
+#undef l9
+#undef t1
+#undef t2
+}
+
+/* RESULT = P1 + P2  (Montgomery version).*/
+static void add_points_montgomery(MPI_POINT result,
+		MPI_POINT p1, MPI_POINT p2,
+		struct mpi_ec_ctx *ctx)
+{
+	(void)result;
+	(void)p1;
+	(void)p2;
+	(void)ctx;
+	log_fatal("%s: %s not yet supported\n",
+			"mpi_ec_add_points", "Montgomery");
+}
+
+/* RESULT = P1 + P2  (Twisted Edwards version).*/
+static void add_points_edwards(MPI_POINT result,
+		MPI_POINT p1, MPI_POINT p2,
+		struct mpi_ec_ctx *ctx)
+{
+#define X1 (p1->x)
+#define Y1 (p1->y)
+#define Z1 (p1->z)
+#define X2 (p2->x)
+#define Y2 (p2->y)
+#define Z2 (p2->z)
+#define X3 (result->x)
+#define Y3 (result->y)
+#define Z3 (result->z)
+#define A (ctx->t.scratch[0])
+#define B (ctx->t.scratch[1])
+#define C (ctx->t.scratch[2])
+#define D (ctx->t.scratch[3])
+#define E (ctx->t.scratch[4])
+#define F (ctx->t.scratch[5])
+#define G (ctx->t.scratch[6])
+#define tmp (ctx->t.scratch[7])
+
+	point_resize(result, ctx);
+
+	/* Compute: (X_3 : Y_3 : Z_3) = (X_1 : Y_1 : Z_1) + (X_2 : Y_2 : Z_3) */
+
+	/* A = Z1 · Z2 */
+	ctx->mulm(A, Z1, Z2, ctx);
+
+	/* B = A^2 */
+	ctx->pow2(B, A, ctx);
+
+	/* C = X1 · X2 */
+	ctx->mulm(C, X1, X2, ctx);
+
+	/* D = Y1 · Y2 */
+	ctx->mulm(D, Y1, Y2, ctx);
+
+	/* E = d · C · D */
+	ctx->mulm(E, ctx->b, C, ctx);
+	ctx->mulm(E, E, D, ctx);
+
+	/* F = B - E */
+	ctx->subm(F, B, E, ctx);
+
+	/* G = B + E */
+	ctx->addm(G, B, E, ctx);
+
+	/* X_3 = A · F · ((X_1 + Y_1) · (X_2 + Y_2) - C - D) */
+	ctx->addm(tmp, X1, Y1, ctx);
+	ctx->addm(X3, X2, Y2, ctx);
+	ctx->mulm(X3, X3, tmp, ctx);
+	ctx->subm(X3, X3, C, ctx);
+	ctx->subm(X3, X3, D, ctx);
+	ctx->mulm(X3, X3, F, ctx);
+	ctx->mulm(X3, X3, A, ctx);
+
+	/* Y_3 = A · G · (D - aC) */
+	if (ctx->dialect == ECC_DIALECT_ED25519) {
+		ctx->addm(Y3, D, C, ctx);
+	} else {
+		ctx->mulm(Y3, ctx->a, C, ctx);
+		ctx->subm(Y3, D, Y3, ctx);
+	}
+	ctx->mulm(Y3, Y3, G, ctx);
+	ctx->mulm(Y3, Y3, A, ctx);
+
+	/* Z_3 = F · G */
+	ctx->mulm(Z3, F, G, ctx);
+
+
+#undef X1
+#undef Y1
+#undef Z1
+#undef X2
+#undef Y2
+#undef Z2
+#undef X3
+#undef Y3
+#undef Z3
+#undef A
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef G
+#undef tmp
+}
+
+/* Compute a step of Montgomery Ladder (only use X and Z in the point).
+ * Inputs:  P1, P2, and x-coordinate of DIF = P1 - P1.
+ * Outputs: PRD = 2 * P1 and  SUM = P1 + P2.
+ */
+static void montgomery_ladder(MPI_POINT prd, MPI_POINT sum,
+		MPI_POINT p1, MPI_POINT p2, MPI dif_x,
+		struct mpi_ec_ctx *ctx)
+{
+	ctx->addm(sum->x, p2->x, p2->z, ctx);
+	ctx->subm(p2->z, p2->x, p2->z, ctx);
+	ctx->addm(prd->x, p1->x, p1->z, ctx);
+	ctx->subm(p1->z, p1->x, p1->z, ctx);
+	ctx->mulm(p2->x, p1->z, sum->x, ctx);
+	ctx->mulm(p2->z, prd->x, p2->z, ctx);
+	ctx->pow2(p1->x, prd->x, ctx);
+	ctx->pow2(p1->z, p1->z, ctx);
+	ctx->addm(sum->x, p2->x, p2->z, ctx);
+	ctx->subm(p2->z, p2->x, p2->z, ctx);
+	ctx->mulm(prd->x, p1->x, p1->z, ctx);
+	ctx->subm(p1->z, p1->x, p1->z, ctx);
+	ctx->pow2(sum->x, sum->x, ctx);
+	ctx->pow2(sum->z, p2->z, ctx);
+	ctx->mulm(prd->z, p1->z, ctx->a, ctx); /* CTX->A: (a-2)/4 */
+	ctx->mulm(sum->z, sum->z, dif_x, ctx);
+	ctx->addm(prd->z, p1->x, prd->z, ctx);
+	ctx->mulm(prd->z, prd->z, p1->z, ctx);
+}
+
+/* RESULT = P1 + P2 */
+void mpi_ec_add_points(MPI_POINT result,
+		MPI_POINT p1, MPI_POINT p2,
+		struct mpi_ec_ctx *ctx)
+{
+	switch (ctx->model) {
+	case MPI_EC_WEIERSTRASS:
+		add_points_weierstrass(result, p1, p2, ctx);
+		break;
+	case MPI_EC_MONTGOMERY:
+		add_points_montgomery(result, p1, p2, ctx);
+		break;
+	case MPI_EC_EDWARDS:
+		add_points_edwards(result, p1, p2, ctx);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(mpi_ec_add_points);
+
+/* Scalar point multiplication - the main function for ECC.  If takes
+ * an integer SCALAR and a POINT as well as the usual context CTX.
+ * RESULT will be set to the resulting point.
+ */
+void mpi_ec_mul_point(MPI_POINT result,
+			MPI scalar, MPI_POINT point,
+			struct mpi_ec_ctx *ctx)
+{
+	MPI x1, y1, z1, k, h, yy;
+	unsigned int i, loops;
+	struct gcry_mpi_point p1, p2, p1inv;
+
+	if (ctx->model == MPI_EC_EDWARDS) {
+		/* Simple left to right binary method.  Algorithm 3.27 from
+		 * {author={Hankerson, Darrel and Menezes, Alfred J. and Vanstone, Scott},
+		 *  title = {Guide to Elliptic Curve Cryptography},
+		 *  year = {2003}, isbn = {038795273X},
+		 *  url = {http://www.cacr.math.uwaterloo.ca/ecc/},
+		 *  publisher = {Springer-Verlag New York, Inc.}}
+		 */
+		unsigned int nbits;
+		int j;
+
+		if (mpi_cmp(scalar, ctx->p) >= 0)
+			nbits = mpi_get_nbits(scalar);
+		else
+			nbits = mpi_get_nbits(ctx->p);
+
+		mpi_set_ui(result->x, 0);
+		mpi_set_ui(result->y, 1);
+		mpi_set_ui(result->z, 1);
+		point_resize(point, ctx);
+
+		point_resize(result, ctx);
+		point_resize(point, ctx);
+
+		for (j = nbits-1; j >= 0; j--) {
+			mpi_ec_dup_point(result, result, ctx);
+			if (mpi_test_bit(scalar, j))
+				mpi_ec_add_points(result, result, point, ctx);
+		}
+		return;
+	} else if (ctx->model == MPI_EC_MONTGOMERY) {
+		unsigned int nbits;
+		int j;
+		struct gcry_mpi_point p1_, p2_;
+		MPI_POINT q1, q2, prd, sum;
+		unsigned long sw;
+		mpi_size_t rsize;
+
+		/* Compute scalar point multiplication with Montgomery Ladder.
+		 * Note that we don't use Y-coordinate in the points at all.
+		 * RESULT->Y will be filled by zero.
+		 */
+
+		nbits = mpi_get_nbits(scalar);
+		point_init(&p1);
+		point_init(&p2);
+		point_init(&p1_);
+		point_init(&p2_);
+		mpi_set_ui(p1.x, 1);
+		mpi_free(p2.x);
+		p2.x = mpi_copy(point->x);
+		mpi_set_ui(p2.z, 1);
+
+		point_resize(&p1, ctx);
+		point_resize(&p2, ctx);
+		point_resize(&p1_, ctx);
+		point_resize(&p2_, ctx);
+
+		mpi_resize(point->x, ctx->p->nlimbs);
+		point->x->nlimbs = ctx->p->nlimbs;
+
+		q1 = &p1;
+		q2 = &p2;
+		prd = &p1_;
+		sum = &p2_;
+
+		for (j = nbits-1; j >= 0; j--) {
+			MPI_POINT t;
+
+			sw = mpi_test_bit(scalar, j);
+			point_swap_cond(q1, q2, sw, ctx);
+			montgomery_ladder(prd, sum, q1, q2, point->x, ctx);
+			point_swap_cond(prd, sum, sw, ctx);
+			t = q1;  q1 = prd;  prd = t;
+			t = q2;  q2 = sum;  sum = t;
+		}
+
+		mpi_clear(result->y);
+		sw = (nbits & 1);
+		point_swap_cond(&p1, &p1_, sw, ctx);
+
+		rsize = p1.z->nlimbs;
+		MPN_NORMALIZE(p1.z->d, rsize);
+		if (rsize == 0) {
+			mpi_set_ui(result->x, 1);
+			mpi_set_ui(result->z, 0);
+		} else {
+			z1 = mpi_new(0);
+			ec_invm(z1, p1.z, ctx);
+			ec_mulm(result->x, p1.x, z1, ctx);
+			mpi_set_ui(result->z, 1);
+			mpi_free(z1);
+		}
+
+		point_free(&p1);
+		point_free(&p2);
+		point_free(&p1_);
+		point_free(&p2_);
+		return;
+	}
+
+	x1 = mpi_alloc_like(ctx->p);
+	y1 = mpi_alloc_like(ctx->p);
+	h  = mpi_alloc_like(ctx->p);
+	k  = mpi_copy(scalar);
+	yy = mpi_copy(point->y);
+
+	if (mpi_has_sign(k)) {
+		k->sign = 0;
+		ec_invm(yy, yy, ctx);
+	}
+
+	if (!mpi_cmp_ui(point->z, 1)) {
+		mpi_set(x1, point->x);
+		mpi_set(y1, yy);
+	} else {
+		MPI z2, z3;
+
+		z2 = mpi_alloc_like(ctx->p);
+		z3 = mpi_alloc_like(ctx->p);
+		ec_mulm(z2, point->z, point->z, ctx);
+		ec_mulm(z3, point->z, z2, ctx);
+		ec_invm(z2, z2, ctx);
+		ec_mulm(x1, point->x, z2, ctx);
+		ec_invm(z3, z3, ctx);
+		ec_mulm(y1, yy, z3, ctx);
+		mpi_free(z2);
+		mpi_free(z3);
+	}
+	z1 = mpi_copy(mpi_const(MPI_C_ONE));
+
+	mpi_mul(h, k, mpi_const(MPI_C_THREE)); /* h = 3k */
+	loops = mpi_get_nbits(h);
+	if (loops < 2) {
+		/* If SCALAR is zero, the above mpi_mul sets H to zero and thus
+		 * LOOPs will be zero.  To avoid an underflow of I in the main
+		 * loop we set LOOP to 2 and the result to (0,0,0).
+		 */
+		loops = 2;
+		mpi_clear(result->x);
+		mpi_clear(result->y);
+		mpi_clear(result->z);
+	} else {
+		mpi_set(result->x, point->x);
+		mpi_set(result->y, yy);
+		mpi_set(result->z, point->z);
+	}
+	mpi_free(yy); yy = NULL;
+
+	p1.x = x1; x1 = NULL;
+	p1.y = y1; y1 = NULL;
+	p1.z = z1; z1 = NULL;
+	point_init(&p2);
+	point_init(&p1inv);
+
+	/* Invert point: y = p - y mod p  */
+	point_set(&p1inv, &p1);
+	ec_subm(p1inv.y, ctx->p, p1inv.y, ctx);
+
+	for (i = loops-2; i > 0; i--) {
+		mpi_ec_dup_point(result, result, ctx);
+		if (mpi_test_bit(h, i) == 1 && mpi_test_bit(k, i) == 0) {
+			point_set(&p2, result);
+			mpi_ec_add_points(result, &p2, &p1, ctx);
+		}
+		if (mpi_test_bit(h, i) == 0 && mpi_test_bit(k, i) == 1) {
+			point_set(&p2, result);
+			mpi_ec_add_points(result, &p2, &p1inv, ctx);
+		}
+	}
+
+	point_free(&p1);
+	point_free(&p2);
+	point_free(&p1inv);
+	mpi_free(h);
+	mpi_free(k);
+}
+EXPORT_SYMBOL_GPL(mpi_ec_mul_point);
+
+/* Return true if POINT is on the curve described by CTX.  */
+int mpi_ec_curve_point(MPI_POINT point, struct mpi_ec_ctx *ctx)
+{
+	int res = 0;
+	MPI x, y, w;
+
+	x = mpi_new(0);
+	y = mpi_new(0);
+	w = mpi_new(0);
+
+	/* Check that the point is in range.  This needs to be done here and
+	 * not after conversion to affine coordinates.
+	 */
+	if (mpi_cmpabs(point->x, ctx->p) >= 0)
+		goto leave;
+	if (mpi_cmpabs(point->y, ctx->p) >= 0)
+		goto leave;
+	if (mpi_cmpabs(point->z, ctx->p) >= 0)
+		goto leave;
+
+	switch (ctx->model) {
+	case MPI_EC_WEIERSTRASS:
+		{
+			MPI xxx;
+
+			if (mpi_ec_get_affine(x, y, point, ctx))
+				goto leave;
+
+			xxx = mpi_new(0);
+
+			/* y^2 == x^3 + a·x + b */
+			ec_pow2(y, y, ctx);
+
+			ec_pow3(xxx, x, ctx);
+			ec_mulm(w, ctx->a, x, ctx);
+			ec_addm(w, w, ctx->b, ctx);
+			ec_addm(w, w, xxx, ctx);
+
+			if (!mpi_cmp(y, w))
+				res = 1;
+
+			mpi_free(xxx);
+		}
+		break;
+
+	case MPI_EC_MONTGOMERY:
+		{
+#define xx y
+			/* With Montgomery curve, only X-coordinate is valid. */
+			if (mpi_ec_get_affine(x, NULL, point, ctx))
+				goto leave;
+
+			/* The equation is: b * y^2 == x^3 + a · x^2 + x */
+			/* We check if right hand is quadratic residue or not by
+			 * Euler's criterion.
+			 */
+			/* CTX->A has (a-2)/4 and CTX->B has b^-1 */
+			ec_mulm(w, ctx->a, mpi_const(MPI_C_FOUR), ctx);
+			ec_addm(w, w, mpi_const(MPI_C_TWO), ctx);
+			ec_mulm(w, w, x, ctx);
+			ec_pow2(xx, x, ctx);
+			ec_addm(w, w, xx, ctx);
+			ec_addm(w, w, mpi_const(MPI_C_ONE), ctx);
+			ec_mulm(w, w, x, ctx);
+			ec_mulm(w, w, ctx->b, ctx);
+#undef xx
+			/* Compute Euler's criterion: w^(p-1)/2 */
+#define p_minus1 y
+			ec_subm(p_minus1, ctx->p, mpi_const(MPI_C_ONE), ctx);
+			mpi_rshift(p_minus1, p_minus1, 1);
+			ec_powm(w, w, p_minus1, ctx);
+
+			res = !mpi_cmp_ui(w, 1);
+#undef p_minus1
+		}
+		break;
+
+	case MPI_EC_EDWARDS:
+		{
+			if (mpi_ec_get_affine(x, y, point, ctx))
+				goto leave;
+
+			mpi_resize(w, ctx->p->nlimbs);
+			w->nlimbs = ctx->p->nlimbs;
+
+			/* a · x^2 + y^2 - 1 - b · x^2 · y^2 == 0 */
+			ctx->pow2(x, x, ctx);
+			ctx->pow2(y, y, ctx);
+			if (ctx->dialect == ECC_DIALECT_ED25519)
+				ctx->subm(w, ctx->p, x, ctx);
+			else
+				ctx->mulm(w, ctx->a, x, ctx);
+			ctx->addm(w, w, y, ctx);
+			ctx->mulm(x, x, y, ctx);
+			ctx->mulm(x, x, ctx->b, ctx);
+			ctx->subm(w, w, x, ctx);
+			if (!mpi_cmp_ui(w, 1))
+				res = 1;
+		}
+		break;
+	}
+
+leave:
+	mpi_free(w);
+	mpi_free(x);
+	mpi_free(y);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(mpi_ec_curve_point);
diff --git a/lib/mpi/mpi-add.c b/lib/mpi/mpi-add.c
new file mode 100644
index 000000000000..2cdae54c1bd0
--- /dev/null
+++ b/lib/mpi/mpi-add.c
@@ -0,0 +1,155 @@
+/* mpi-add.c  -  MPI functions
+ * Copyright (C) 1994, 1996, 1998, 2001, 2002,
+ *               2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Add the unsigned integer V to the mpi-integer U and store the
+ * result in W. U and V may be the same.
+ */
+void mpi_add_ui(MPI w, MPI u, unsigned long v)
+{
+	mpi_ptr_t wp, up;
+	mpi_size_t usize, wsize;
+	int usign, wsign;
+
+	usize = u->nlimbs;
+	usign = u->sign;
+	wsign = 0;
+
+	/* If not space for W (and possible carry), increase space.  */
+	wsize = usize + 1;
+	if (w->alloced < wsize)
+		mpi_resize(w, wsize);
+
+	/* These must be after realloc (U may be the same as W).  */
+	up = u->d;
+	wp = w->d;
+
+	if (!usize) {  /* simple */
+		wp[0] = v;
+		wsize = v ? 1:0;
+	} else if (!usign) {  /* mpi is not negative */
+		mpi_limb_t cy;
+		cy = mpihelp_add_1(wp, up, usize, v);
+		wp[usize] = cy;
+		wsize = usize + cy;
+	} else {
+		/* The signs are different.  Need exact comparison to determine
+		 * which operand to subtract from which.
+		 */
+		if (usize == 1 && up[0] < v) {
+			wp[0] = v - up[0];
+			wsize = 1;
+		} else {
+			mpihelp_sub_1(wp, up, usize, v);
+			/* Size can decrease with at most one limb. */
+			wsize = usize - (wp[usize-1] == 0);
+			wsign = 1;
+		}
+	}
+
+	w->nlimbs = wsize;
+	w->sign   = wsign;
+}
+
+
+void mpi_add(MPI w, MPI u, MPI v)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t usize, vsize, wsize;
+	int usign, vsign, wsign;
+
+	if (u->nlimbs < v->nlimbs) { /* Swap U and V. */
+		usize = v->nlimbs;
+		usign = v->sign;
+		vsize = u->nlimbs;
+		vsign = u->sign;
+		wsize = usize + 1;
+		RESIZE_IF_NEEDED(w, wsize);
+		/* These must be after realloc (u or v may be the same as w).  */
+		up = v->d;
+		vp = u->d;
+	} else {
+		usize = u->nlimbs;
+		usign = u->sign;
+		vsize = v->nlimbs;
+		vsign = v->sign;
+		wsize = usize + 1;
+		RESIZE_IF_NEEDED(w, wsize);
+		/* These must be after realloc (u or v may be the same as w).  */
+		up = u->d;
+		vp = v->d;
+	}
+	wp = w->d;
+	wsign = 0;
+
+	if (!vsize) {  /* simple */
+		MPN_COPY(wp, up, usize);
+		wsize = usize;
+		wsign = usign;
+	} else if (usign != vsign) { /* different sign */
+		/* This test is right since USIZE >= VSIZE */
+		if (usize != vsize) {
+			mpihelp_sub(wp, up, usize, vp, vsize);
+			wsize = usize;
+			MPN_NORMALIZE(wp, wsize);
+			wsign = usign;
+		} else if (mpihelp_cmp(up, vp, usize) < 0) {
+			mpihelp_sub_n(wp, vp, up, usize);
+			wsize = usize;
+			MPN_NORMALIZE(wp, wsize);
+			if (!usign)
+				wsign = 1;
+		} else {
+			mpihelp_sub_n(wp, up, vp, usize);
+			wsize = usize;
+			MPN_NORMALIZE(wp, wsize);
+			if (usign)
+				wsign = 1;
+		}
+	} else { /* U and V have same sign. Add them. */
+		mpi_limb_t cy = mpihelp_add(wp, up, usize, vp, vsize);
+		wp[usize] = cy;
+		wsize = usize + cy;
+		if (usign)
+			wsign = 1;
+	}
+
+	w->nlimbs = wsize;
+	w->sign = wsign;
+}
+EXPORT_SYMBOL_GPL(mpi_add);
+
+void mpi_sub(MPI w, MPI u, MPI v)
+{
+	MPI vv = mpi_copy(v);
+	vv->sign = !vv->sign;
+	mpi_add(w, u, vv);
+	mpi_free(vv);
+}
+
+
+void mpi_addm(MPI w, MPI u, MPI v, MPI m)
+{
+	mpi_add(w, u, v);
+	mpi_mod(w, w, m);
+}
+EXPORT_SYMBOL_GPL(mpi_addm);
+
+void mpi_subm(MPI w, MPI u, MPI v, MPI m)
+{
+	mpi_sub(w, u, v);
+	mpi_mod(w, w, m);
+}
+EXPORT_SYMBOL_GPL(mpi_subm);
diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c
index 503537e08436..142b680835df 100644
--- a/lib/mpi/mpi-bit.c
+++ b/lib/mpi/mpi-bit.c
@@ -1,4 +1,4 @@
-/* mpi-bit.c  -  MPI bit level fucntions
+/* mpi-bit.c  -  MPI bit level functions
  * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
  *
  * This file is part of GnuPG.
@@ -32,6 +32,7 @@ void mpi_normalize(MPI a)
 	for (; a->nlimbs && !a->d[a->nlimbs - 1]; a->nlimbs--)
 		;
 }
+EXPORT_SYMBOL_GPL(mpi_normalize);
 
 /****************
  * Return the number of bits in A.
@@ -54,3 +55,253 @@ unsigned mpi_get_nbits(MPI a)
 	return n;
 }
 EXPORT_SYMBOL_GPL(mpi_get_nbits);
+
+/****************
+ * Test whether bit N is set.
+ */
+int mpi_test_bit(MPI a, unsigned int n)
+{
+	unsigned int limbno, bitno;
+	mpi_limb_t limb;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno  = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs)
+		return 0; /* too far left: this is a 0 */
+	limb = a->d[limbno];
+	return (limb & (A_LIMB_1 << bitno)) ? 1 : 0;
+}
+EXPORT_SYMBOL_GPL(mpi_test_bit);
+
+/****************
+ * Set bit N of A.
+ */
+void mpi_set_bit(MPI a, unsigned int n)
+{
+	unsigned int i, limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno  = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs) {
+		for (i = a->nlimbs; i < a->alloced; i++)
+			a->d[i] = 0;
+		mpi_resize(a, limbno+1);
+		a->nlimbs = limbno+1;
+	}
+	a->d[limbno] |= (A_LIMB_1<<bitno);
+}
+
+/****************
+ * Set bit N of A. and clear all bits above
+ */
+void mpi_set_highbit(MPI a, unsigned int n)
+{
+	unsigned int i, limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno  = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs) {
+		for (i = a->nlimbs; i < a->alloced; i++)
+			a->d[i] = 0;
+		mpi_resize(a, limbno+1);
+		a->nlimbs = limbno+1;
+	}
+	a->d[limbno] |= (A_LIMB_1<<bitno);
+	for (bitno++; bitno < BITS_PER_MPI_LIMB; bitno++)
+		a->d[limbno] &= ~(A_LIMB_1 << bitno);
+	a->nlimbs = limbno+1;
+}
+EXPORT_SYMBOL_GPL(mpi_set_highbit);
+
+/****************
+ * clear bit N of A and all bits above
+ */
+void mpi_clear_highbit(MPI a, unsigned int n)
+{
+	unsigned int limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno  = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs)
+		return; /* not allocated, therefore no need to clear bits :-) */
+
+	for ( ; bitno < BITS_PER_MPI_LIMB; bitno++)
+		a->d[limbno] &= ~(A_LIMB_1 << bitno);
+	a->nlimbs = limbno+1;
+}
+
+/****************
+ * Clear bit N of A.
+ */
+void mpi_clear_bit(MPI a, unsigned int n)
+{
+	unsigned int limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno  = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs)
+		return; /* Don't need to clear this bit, it's far too left.  */
+	a->d[limbno] &= ~(A_LIMB_1 << bitno);
+}
+EXPORT_SYMBOL_GPL(mpi_clear_bit);
+
+
+/****************
+ * Shift A by COUNT limbs to the right
+ * This is used only within the MPI library
+ */
+void mpi_rshift_limbs(MPI a, unsigned int count)
+{
+	mpi_ptr_t ap = a->d;
+	mpi_size_t n = a->nlimbs;
+	unsigned int i;
+
+	if (count >= n) {
+		a->nlimbs = 0;
+		return;
+	}
+
+	for (i = 0; i < n - count; i++)
+		ap[i] = ap[i+count];
+	ap[i] = 0;
+	a->nlimbs -= count;
+}
+
+/*
+ * Shift A by N bits to the right.
+ */
+void mpi_rshift(MPI x, MPI a, unsigned int n)
+{
+	mpi_size_t xsize;
+	unsigned int i;
+	unsigned int nlimbs = (n/BITS_PER_MPI_LIMB);
+	unsigned int nbits = (n%BITS_PER_MPI_LIMB);
+
+	if (x == a) {
+		/* In-place operation.  */
+		if (nlimbs >= x->nlimbs) {
+			x->nlimbs = 0;
+			return;
+		}
+
+		if (nlimbs) {
+			for (i = 0; i < x->nlimbs - nlimbs; i++)
+				x->d[i] = x->d[i+nlimbs];
+			x->d[i] = 0;
+			x->nlimbs -= nlimbs;
+		}
+		if (x->nlimbs && nbits)
+			mpihelp_rshift(x->d, x->d, x->nlimbs, nbits);
+	} else if (nlimbs) {
+		/* Copy and shift by more or equal bits than in a limb. */
+		xsize = a->nlimbs;
+		x->sign = a->sign;
+		RESIZE_IF_NEEDED(x, xsize);
+		x->nlimbs = xsize;
+		for (i = 0; i < a->nlimbs; i++)
+			x->d[i] = a->d[i];
+		x->nlimbs = i;
+
+		if (nlimbs >= x->nlimbs) {
+			x->nlimbs = 0;
+			return;
+		}
+
+		if (nlimbs) {
+			for (i = 0; i < x->nlimbs - nlimbs; i++)
+				x->d[i] = x->d[i+nlimbs];
+			x->d[i] = 0;
+			x->nlimbs -= nlimbs;
+		}
+
+		if (x->nlimbs && nbits)
+			mpihelp_rshift(x->d, x->d, x->nlimbs, nbits);
+	} else {
+		/* Copy and shift by less than bits in a limb.  */
+		xsize = a->nlimbs;
+		x->sign = a->sign;
+		RESIZE_IF_NEEDED(x, xsize);
+		x->nlimbs = xsize;
+
+		if (xsize) {
+			if (nbits)
+				mpihelp_rshift(x->d, a->d, x->nlimbs, nbits);
+			else {
+				/* The rshift helper function is not specified for
+				 * NBITS==0, thus we do a plain copy here.
+				 */
+				for (i = 0; i < x->nlimbs; i++)
+					x->d[i] = a->d[i];
+			}
+		}
+	}
+	MPN_NORMALIZE(x->d, x->nlimbs);
+}
+
+/****************
+ * Shift A by COUNT limbs to the left
+ * This is used only within the MPI library
+ */
+void mpi_lshift_limbs(MPI a, unsigned int count)
+{
+	mpi_ptr_t ap;
+	int n = a->nlimbs;
+	int i;
+
+	if (!count || !n)
+		return;
+
+	RESIZE_IF_NEEDED(a, n+count);
+
+	ap = a->d;
+	for (i = n-1; i >= 0; i--)
+		ap[i+count] = ap[i];
+	for (i = 0; i < count; i++)
+		ap[i] = 0;
+	a->nlimbs += count;
+}
+
+/*
+ * Shift A by N bits to the left.
+ */
+void mpi_lshift(MPI x, MPI a, unsigned int n)
+{
+	unsigned int nlimbs = (n/BITS_PER_MPI_LIMB);
+	unsigned int nbits = (n%BITS_PER_MPI_LIMB);
+
+	if (x == a && !n)
+		return;  /* In-place shift with an amount of zero.  */
+
+	if (x != a) {
+		/* Copy A to X.  */
+		unsigned int alimbs = a->nlimbs;
+		int asign = a->sign;
+		mpi_ptr_t xp, ap;
+
+		RESIZE_IF_NEEDED(x, alimbs+nlimbs+1);
+		xp = x->d;
+		ap = a->d;
+		MPN_COPY(xp, ap, alimbs);
+		x->nlimbs = alimbs;
+		x->flags = a->flags;
+		x->sign = asign;
+	}
+
+	if (nlimbs && !nbits) {
+		/* Shift a full number of limbs.  */
+		mpi_lshift_limbs(x, nlimbs);
+	} else if (n) {
+		/* We use a very dump approach: Shift left by the number of
+		 * limbs plus one and than fix it up by an rshift.
+		 */
+		mpi_lshift_limbs(x, nlimbs+1);
+		mpi_rshift(x, x, BITS_PER_MPI_LIMB - nbits);
+	}
+
+	MPN_NORMALIZE(x->d, x->nlimbs);
+}
diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c
index d25e9e96c310..c4cfa3ff0581 100644
--- a/lib/mpi/mpi-cmp.c
+++ b/lib/mpi/mpi-cmp.c
@@ -41,28 +41,54 @@ int mpi_cmp_ui(MPI u, unsigned long v)
 }
 EXPORT_SYMBOL_GPL(mpi_cmp_ui);
 
-int mpi_cmp(MPI u, MPI v)
+static int do_mpi_cmp(MPI u, MPI v, int absmode)
 {
-	mpi_size_t usize, vsize;
+	mpi_size_t usize;
+	mpi_size_t vsize;
+	int usign;
+	int vsign;
 	int cmp;
 
 	mpi_normalize(u);
 	mpi_normalize(v);
+
 	usize = u->nlimbs;
 	vsize = v->nlimbs;
-	if (!u->sign && v->sign)
+	usign = absmode ? 0 : u->sign;
+	vsign = absmode ? 0 : v->sign;
+
+	/* Compare sign bits.  */
+
+	if (!usign && vsign)
 		return 1;
-	if (u->sign && !v->sign)
+	if (usign && !vsign)
 		return -1;
-	if (usize != vsize && !u->sign && !v->sign)
+
+	/* U and V are either both positive or both negative.  */
+
+	if (usize != vsize && !usign && !vsign)
 		return usize - vsize;
-	if (usize != vsize && u->sign && v->sign)
-		return vsize - usize;
+	if (usize != vsize && usign && vsign)
+		return vsize + usize;
 	if (!usize)
 		return 0;
 	cmp = mpihelp_cmp(u->d, v->d, usize);
-	if (u->sign)
-		return -cmp;
-	return cmp;
+	if (!cmp)
+		return 0;
+	if ((cmp < 0?1:0) == (usign?1:0))
+		return 1;
+
+	return -1;
+}
+
+int mpi_cmp(MPI u, MPI v)
+{
+	return do_mpi_cmp(u, v, 0);
 }
 EXPORT_SYMBOL_GPL(mpi_cmp);
+
+int mpi_cmpabs(MPI u, MPI v)
+{
+	return do_mpi_cmp(u, v, 1);
+}
+EXPORT_SYMBOL_GPL(mpi_cmpabs);
diff --git a/lib/mpi/mpi-div.c b/lib/mpi/mpi-div.c
new file mode 100644
index 000000000000..45beab8b9e9e
--- /dev/null
+++ b/lib/mpi/mpi-div.c
@@ -0,0 +1,234 @@
+/* mpi-div.c  -  MPI functions
+ * Copyright (C) 1994, 1996, 1998, 2001, 2002,
+ *               2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den);
+void mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor);
+
+void mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor)
+{
+	int divisor_sign = divisor->sign;
+	MPI temp_divisor = NULL;
+
+	/* We need the original value of the divisor after the remainder has been
+	 * preliminary calculated.	We have to copy it to temporary space if it's
+	 * the same variable as REM.
+	 */
+	if (rem == divisor) {
+		temp_divisor = mpi_copy(divisor);
+		divisor = temp_divisor;
+	}
+
+	mpi_tdiv_r(rem, dividend, divisor);
+
+	if (((divisor_sign?1:0) ^ (dividend->sign?1:0)) && rem->nlimbs)
+		mpi_add(rem, rem, divisor);
+
+	if (temp_divisor)
+		mpi_free(temp_divisor);
+}
+
+void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor)
+{
+	MPI tmp = mpi_alloc(mpi_get_nlimbs(quot));
+	mpi_fdiv_qr(quot, tmp, dividend, divisor);
+	mpi_free(tmp);
+}
+
+void mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor)
+{
+	int divisor_sign = divisor->sign;
+	MPI temp_divisor = NULL;
+
+	if (quot == divisor || rem == divisor) {
+		temp_divisor = mpi_copy(divisor);
+		divisor = temp_divisor;
+	}
+
+	mpi_tdiv_qr(quot, rem, dividend, divisor);
+
+	if ((divisor_sign ^ dividend->sign) && rem->nlimbs) {
+		mpi_sub_ui(quot, quot, 1);
+		mpi_add(rem, rem, divisor);
+	}
+
+	if (temp_divisor)
+		mpi_free(temp_divisor);
+}
+
+/* If den == quot, den needs temporary storage.
+ * If den == rem, den needs temporary storage.
+ * If num == quot, num needs temporary storage.
+ * If den has temporary storage, it can be normalized while being copied,
+ *   i.e no extra storage should be allocated.
+ */
+
+void mpi_tdiv_r(MPI rem, MPI num, MPI den)
+{
+	mpi_tdiv_qr(NULL, rem, num, den);
+}
+
+void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
+{
+	mpi_ptr_t np, dp;
+	mpi_ptr_t qp, rp;
+	mpi_size_t nsize = num->nlimbs;
+	mpi_size_t dsize = den->nlimbs;
+	mpi_size_t qsize, rsize;
+	mpi_size_t sign_remainder = num->sign;
+	mpi_size_t sign_quotient = num->sign ^ den->sign;
+	unsigned int normalization_steps;
+	mpi_limb_t q_limb;
+	mpi_ptr_t marker[5];
+	int markidx = 0;
+
+	/* Ensure space is enough for quotient and remainder.
+	 * We need space for an extra limb in the remainder, because it's
+	 * up-shifted (normalized) below.
+	 */
+	rsize = nsize + 1;
+	mpi_resize(rem, rsize);
+
+	qsize = rsize - dsize;	  /* qsize cannot be bigger than this.	*/
+	if (qsize <= 0) {
+		if (num != rem) {
+			rem->nlimbs = num->nlimbs;
+			rem->sign = num->sign;
+			MPN_COPY(rem->d, num->d, nsize);
+		}
+		if (quot) {
+			/* This needs to follow the assignment to rem, in case the
+			 * numerator and quotient are the same.
+			 */
+			quot->nlimbs = 0;
+			quot->sign = 0;
+		}
+		return;
+	}
+
+	if (quot)
+		mpi_resize(quot, qsize);
+
+	/* Read pointers here, when reallocation is finished.  */
+	np = num->d;
+	dp = den->d;
+	rp = rem->d;
+
+	/* Optimize division by a single-limb divisor.  */
+	if (dsize == 1) {
+		mpi_limb_t rlimb;
+		if (quot) {
+			qp = quot->d;
+			rlimb = mpihelp_divmod_1(qp, np, nsize, dp[0]);
+			qsize -= qp[qsize - 1] == 0;
+			quot->nlimbs = qsize;
+			quot->sign = sign_quotient;
+		} else
+			rlimb = mpihelp_mod_1(np, nsize, dp[0]);
+		rp[0] = rlimb;
+		rsize = rlimb != 0?1:0;
+		rem->nlimbs = rsize;
+		rem->sign = sign_remainder;
+		return;
+	}
+
+
+	if (quot) {
+		qp = quot->d;
+		/* Make sure QP and NP point to different objects.  Otherwise the
+		 * numerator would be gradually overwritten by the quotient limbs.
+		 */
+		if (qp == np) { /* Copy NP object to temporary space.  */
+			np = marker[markidx++] = mpi_alloc_limb_space(nsize);
+			MPN_COPY(np, qp, nsize);
+		}
+	} else /* Put quotient at top of remainder. */
+		qp = rp + dsize;
+
+	normalization_steps = count_leading_zeros(dp[dsize - 1]);
+
+	/* Normalize the denominator, i.e. make its most significant bit set by
+	 * shifting it NORMALIZATION_STEPS bits to the left.  Also shift the
+	 * numerator the same number of steps (to keep the quotient the same!).
+	 */
+	if (normalization_steps) {
+		mpi_ptr_t tp;
+		mpi_limb_t nlimb;
+
+		/* Shift up the denominator setting the most significant bit of
+		 * the most significant word.  Use temporary storage not to clobber
+		 * the original contents of the denominator.
+		 */
+		tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+		mpihelp_lshift(tp, dp, dsize, normalization_steps);
+		dp = tp;
+
+		/* Shift up the numerator, possibly introducing a new most
+		 * significant word.  Move the shifted numerator in the remainder
+		 * meanwhile.
+		 */
+		nlimb = mpihelp_lshift(rp, np, nsize, normalization_steps);
+		if (nlimb) {
+			rp[nsize] = nlimb;
+			rsize = nsize + 1;
+		} else
+			rsize = nsize;
+	} else {
+		/* The denominator is already normalized, as required.	Copy it to
+		 * temporary space if it overlaps with the quotient or remainder.
+		 */
+		if (dp == rp || (quot && (dp == qp))) {
+			mpi_ptr_t tp;
+
+			tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+			MPN_COPY(tp, dp, dsize);
+			dp = tp;
+		}
+
+		/* Move the numerator to the remainder.  */
+		if (rp != np)
+			MPN_COPY(rp, np, nsize);
+
+		rsize = nsize;
+	}
+
+	q_limb = mpihelp_divrem(qp, 0, rp, rsize, dp, dsize);
+
+	if (quot) {
+		qsize = rsize - dsize;
+		if (q_limb) {
+			qp[qsize] = q_limb;
+			qsize += 1;
+		}
+
+		quot->nlimbs = qsize;
+		quot->sign = sign_quotient;
+	}
+
+	rsize = dsize;
+	MPN_NORMALIZE(rp, rsize);
+
+	if (normalization_steps && rsize) {
+		mpihelp_rshift(rp, rp, rsize, normalization_steps);
+		rsize -= rp[rsize - 1] == 0?1:0;
+	}
+
+	rem->nlimbs = rsize;
+	rem->sign	= sign_remainder;
+	while (markidx) {
+		markidx--;
+		mpi_free_limb_space(marker[markidx]);
+	}
+}
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
index 91df5f0b70f2..554002182db1 100644
--- a/lib/mpi/mpi-internal.h
+++ b/lib/mpi/mpi-internal.h
@@ -52,6 +52,12 @@
 typedef mpi_limb_t *mpi_ptr_t;	/* pointer to a limb */
 typedef int mpi_size_t;		/* (must be a signed type) */
 
+#define RESIZE_IF_NEEDED(a, b)			\
+	do {					\
+		if ((a)->alloced < (b))		\
+			mpi_resize((a), (b));	\
+	} while (0)
+
 /* Copy N limbs from S to D.  */
 #define MPN_COPY(d, s, n) \
 	do {					\
@@ -60,6 +66,14 @@ typedef int mpi_size_t;		/* (must be a signed type) */
 			(d)[_i] = (s)[_i];	\
 	} while (0)
 
+#define MPN_COPY_INCR(d, s, n)		\
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = (s)[_i];	\
+	} while (0)
+
+
 #define MPN_COPY_DECR(d, s, n) \
 	do {					\
 		mpi_size_t _i;			\
@@ -92,6 +106,38 @@ typedef int mpi_size_t;		/* (must be a signed type) */
 			mul_n(prodp, up, vp, size, tspace);	\
 	} while (0);
 
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ * If this would yield overflow, DI should be the largest possible number
+ * (i.e., only ones).  For correct operation, the most significant bit of D
+ * has to be set.  Put the quotient in Q and the remainder in R.
+ */
+#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di)				\
+	do {								\
+		mpi_limb_t _ql __maybe_unused;				\
+		mpi_limb_t _q, _r;					\
+		mpi_limb_t _xh, _xl;					\
+		umul_ppmm(_q, _ql, (nh), (di));				\
+		_q += (nh);	/* DI is 2**BITS_PER_MPI_LIMB too small */ \
+		umul_ppmm(_xh, _xl, _q, (d));				\
+		sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl);		\
+		if (_xh) {						\
+			sub_ddmmss(_xh, _r, _xh, _r, 0, (d));		\
+			_q++;						\
+			if (_xh) {					\
+				sub_ddmmss(_xh, _r, _xh, _r, 0, (d));	\
+				_q++;					\
+			}						\
+		}							\
+		if (_r >= (d)) {					\
+			_r -= (d);					\
+			_q++;						\
+		}							\
+		(r) = _r;						\
+		(q) = _q;						\
+	} while (0)
+
+
 /*-- mpiutil.c --*/
 mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs);
 void mpi_free_limb_space(mpi_ptr_t a);
@@ -135,6 +181,8 @@ int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
 void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size);
 void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
 		mpi_ptr_t tspace);
+void mpihelp_mul_n(mpi_ptr_t prodp,
+		mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size);
 
 int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
 			       mpi_ptr_t up, mpi_size_t usize,
@@ -146,9 +194,14 @@ mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			 mpi_size_t s1_size, mpi_limb_t s2_limb);
 
 /*-- mpih-div.c --*/
+mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			 mpi_limb_t divisor_limb);
 mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
 			  mpi_ptr_t np, mpi_size_t nsize,
 			  mpi_ptr_t dp, mpi_size_t dsize);
+mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+			    mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			    mpi_limb_t divisor_limb);
 
 /*-- generic_mpih-[lr]shift.c --*/
 mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
diff --git a/lib/mpi/mpi-inv.c b/lib/mpi/mpi-inv.c
new file mode 100644
index 000000000000..61e37d18f793
--- /dev/null
+++ b/lib/mpi/mpi-inv.c
@@ -0,0 +1,143 @@
+/* mpi-inv.c  -  MPI functions
+ *	Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Calculate the multiplicative inverse X of A mod N
+ * That is: Find the solution x for
+ *		1 = (a*x) mod n
+ */
+int mpi_invm(MPI x, MPI a, MPI n)
+{
+	/* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
+	 * modified according to Michael Penk's solution for Exercise 35
+	 * with further enhancement
+	 */
+	MPI u, v, u1, u2 = NULL, u3, v1, v2 = NULL, v3, t1, t2 = NULL, t3;
+	unsigned int k;
+	int sign;
+	int odd;
+
+	if (!mpi_cmp_ui(a, 0))
+		return 0; /* Inverse does not exists.  */
+	if (!mpi_cmp_ui(n, 1))
+		return 0; /* Inverse does not exists.  */
+
+	u = mpi_copy(a);
+	v = mpi_copy(n);
+
+	for (k = 0; !mpi_test_bit(u, 0) && !mpi_test_bit(v, 0); k++) {
+		mpi_rshift(u, u, 1);
+		mpi_rshift(v, v, 1);
+	}
+	odd = mpi_test_bit(v, 0);
+
+	u1 = mpi_alloc_set_ui(1);
+	if (!odd)
+		u2 = mpi_alloc_set_ui(0);
+	u3 = mpi_copy(u);
+	v1 = mpi_copy(v);
+	if (!odd) {
+		v2 = mpi_alloc(mpi_get_nlimbs(u));
+		mpi_sub(v2, u1, u); /* U is used as const 1 */
+	}
+	v3 = mpi_copy(v);
+	if (mpi_test_bit(u, 0)) { /* u is odd */
+		t1 = mpi_alloc_set_ui(0);
+		if (!odd) {
+			t2 = mpi_alloc_set_ui(1);
+			t2->sign = 1;
+		}
+		t3 = mpi_copy(v);
+		t3->sign = !t3->sign;
+		goto Y4;
+	} else {
+		t1 = mpi_alloc_set_ui(1);
+		if (!odd)
+			t2 = mpi_alloc_set_ui(0);
+		t3 = mpi_copy(u);
+	}
+
+	do {
+		do {
+			if (!odd) {
+				if (mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0)) {
+					/* one is odd */
+					mpi_add(t1, t1, v);
+					mpi_sub(t2, t2, u);
+				}
+				mpi_rshift(t1, t1, 1);
+				mpi_rshift(t2, t2, 1);
+				mpi_rshift(t3, t3, 1);
+			} else {
+				if (mpi_test_bit(t1, 0))
+					mpi_add(t1, t1, v);
+				mpi_rshift(t1, t1, 1);
+				mpi_rshift(t3, t3, 1);
+			}
+Y4:
+			;
+		} while (!mpi_test_bit(t3, 0)); /* while t3 is even */
+
+		if (!t3->sign) {
+			mpi_set(u1, t1);
+			if (!odd)
+				mpi_set(u2, t2);
+			mpi_set(u3, t3);
+		} else {
+			mpi_sub(v1, v, t1);
+			sign = u->sign; u->sign = !u->sign;
+			if (!odd)
+				mpi_sub(v2, u, t2);
+			u->sign = sign;
+			sign = t3->sign; t3->sign = !t3->sign;
+			mpi_set(v3, t3);
+			t3->sign = sign;
+		}
+		mpi_sub(t1, u1, v1);
+		if (!odd)
+			mpi_sub(t2, u2, v2);
+		mpi_sub(t3, u3, v3);
+		if (t1->sign) {
+			mpi_add(t1, t1, v);
+			if (!odd)
+				mpi_sub(t2, t2, u);
+		}
+	} while (mpi_cmp_ui(t3, 0)); /* while t3 != 0 */
+	/* mpi_lshift( u3, k ); */
+	mpi_set(x, u1);
+
+	mpi_free(u1);
+	mpi_free(v1);
+	mpi_free(t1);
+	if (!odd) {
+		mpi_free(u2);
+		mpi_free(v2);
+		mpi_free(t2);
+	}
+	mpi_free(u3);
+	mpi_free(v3);
+	mpi_free(t3);
+
+	mpi_free(u);
+	mpi_free(v);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(mpi_invm);
diff --git a/lib/mpi/mpi-mod.c b/lib/mpi/mpi-mod.c
new file mode 100644
index 000000000000..47bc59edd4ff
--- /dev/null
+++ b/lib/mpi/mpi-mod.c
@@ -0,0 +1,155 @@
+/* mpi-mod.c -  Modular reduction
+ * Copyright (C) 1998, 1999, 2001, 2002, 2003,
+ *               2007  Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ */
+
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+/* Context used with Barrett reduction.  */
+struct barrett_ctx_s {
+	MPI m;   /* The modulus - may not be modified. */
+	int m_copied;   /* If true, M needs to be released.  */
+	int k;
+	MPI y;
+	MPI r1;  /* Helper MPI. */
+	MPI r2;  /* Helper MPI. */
+	MPI r3;  /* Helper MPI allocated on demand. */
+};
+
+
+
+void mpi_mod(MPI rem, MPI dividend, MPI divisor)
+{
+	mpi_fdiv_r(rem, dividend, divisor);
+}
+
+/* This function returns a new context for Barrett based operations on
+ * the modulus M.  This context needs to be released using
+ * _gcry_mpi_barrett_free.  If COPY is true M will be transferred to
+ * the context and the user may change M.  If COPY is false, M may not
+ * be changed until gcry_mpi_barrett_free has been called.
+ */
+mpi_barrett_t mpi_barrett_init(MPI m, int copy)
+{
+	mpi_barrett_t ctx;
+	MPI tmp;
+
+	mpi_normalize(m);
+	ctx = kcalloc(1, sizeof(*ctx), GFP_KERNEL);
+
+	if (copy) {
+		ctx->m = mpi_copy(m);
+		ctx->m_copied = 1;
+	} else
+		ctx->m = m;
+
+	ctx->k = mpi_get_nlimbs(m);
+	tmp = mpi_alloc(ctx->k + 1);
+
+	/* Barrett precalculation: y = floor(b^(2k) / m). */
+	mpi_set_ui(tmp, 1);
+	mpi_lshift_limbs(tmp, 2 * ctx->k);
+	mpi_fdiv_q(tmp, tmp, m);
+
+	ctx->y  = tmp;
+	ctx->r1 = mpi_alloc(2 * ctx->k + 1);
+	ctx->r2 = mpi_alloc(2 * ctx->k + 1);
+
+	return ctx;
+}
+
+void mpi_barrett_free(mpi_barrett_t ctx)
+{
+	if (ctx) {
+		mpi_free(ctx->y);
+		mpi_free(ctx->r1);
+		mpi_free(ctx->r2);
+		if (ctx->r3)
+			mpi_free(ctx->r3);
+		if (ctx->m_copied)
+			mpi_free(ctx->m);
+		kfree(ctx);
+	}
+}
+
+
+/* R = X mod M
+ *
+ * Using Barrett reduction.  Before using this function
+ * _gcry_mpi_barrett_init must have been called to do the
+ * precalculations.  CTX is the context created by this precalculation
+ * and also conveys M.  If the Barret reduction could no be done a
+ * straightforward reduction method is used.
+ *
+ * We assume that these conditions are met:
+ * Input:  x =(x_2k-1 ...x_0)_b
+ *     m =(m_k-1 ....m_0)_b	  with m_k-1 != 0
+ * Output: r = x mod m
+ */
+void mpi_mod_barrett(MPI r, MPI x, mpi_barrett_t ctx)
+{
+	MPI m = ctx->m;
+	int k = ctx->k;
+	MPI y = ctx->y;
+	MPI r1 = ctx->r1;
+	MPI r2 = ctx->r2;
+	int sign;
+
+	mpi_normalize(x);
+	if (mpi_get_nlimbs(x) > 2*k) {
+		mpi_mod(r, x, m);
+		return;
+	}
+
+	sign = x->sign;
+	x->sign = 0;
+
+	/* 1. q1 = floor( x / b^k-1)
+	 *    q2 = q1 * y
+	 *    q3 = floor( q2 / b^k+1 )
+	 * Actually, we don't need qx, we can work direct on r2
+	 */
+	mpi_set(r2, x);
+	mpi_rshift_limbs(r2, k-1);
+	mpi_mul(r2, r2, y);
+	mpi_rshift_limbs(r2, k+1);
+
+	/* 2. r1 = x mod b^k+1
+	 *	r2 = q3 * m mod b^k+1
+	 *	r  = r1 - r2
+	 * 3. if r < 0 then  r = r + b^k+1
+	 */
+	mpi_set(r1, x);
+	if (r1->nlimbs > k+1) /* Quick modulo operation.  */
+		r1->nlimbs = k+1;
+	mpi_mul(r2, r2, m);
+	if (r2->nlimbs > k+1) /* Quick modulo operation. */
+		r2->nlimbs = k+1;
+	mpi_sub(r, r1, r2);
+
+	if (mpi_has_sign(r)) {
+		if (!ctx->r3) {
+			ctx->r3 = mpi_alloc(k + 2);
+			mpi_set_ui(ctx->r3, 1);
+			mpi_lshift_limbs(ctx->r3, k + 1);
+		}
+		mpi_add(r, r, ctx->r3);
+	}
+
+	/* 4. while r >= m do r = r - m */
+	while (mpi_cmp(r, m) >= 0)
+		mpi_sub(r, r, m);
+
+	x->sign = sign;
+}
+
+
+void mpi_mul_barrett(MPI w, MPI u, MPI v, mpi_barrett_t ctx)
+{
+	mpi_mul(w, u, v);
+	mpi_mod_barrett(w, w, ctx);
+}
diff --git a/lib/mpi/mpi-mul.c b/lib/mpi/mpi-mul.c
new file mode 100644
index 000000000000..8f5fa200f297
--- /dev/null
+++ b/lib/mpi/mpi-mul.c
@@ -0,0 +1,91 @@
+/* mpi-mul.c  -  MPI functions
+ * Copyright (C) 1994, 1996, 1998, 2001, 2002,
+ *               2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ */
+
+#include "mpi-internal.h"
+
+void mpi_mul(MPI w, MPI u, MPI v)
+{
+	mpi_size_t usize, vsize, wsize;
+	mpi_ptr_t up, vp, wp;
+	mpi_limb_t cy;
+	int usign, vsign, sign_product;
+	int assign_wp = 0;
+	mpi_ptr_t tmp_limb = NULL;
+
+	if (u->nlimbs < v->nlimbs) {
+		/* Swap U and V. */
+		usize = v->nlimbs;
+		usign = v->sign;
+		up    = v->d;
+		vsize = u->nlimbs;
+		vsign = u->sign;
+		vp    = u->d;
+	} else {
+		usize = u->nlimbs;
+		usign = u->sign;
+		up    = u->d;
+		vsize = v->nlimbs;
+		vsign = v->sign;
+		vp    = v->d;
+	}
+	sign_product = usign ^ vsign;
+	wp = w->d;
+
+	/* Ensure W has space enough to store the result.  */
+	wsize = usize + vsize;
+	if (w->alloced < wsize) {
+		if (wp == up || wp == vp) {
+			wp = mpi_alloc_limb_space(wsize);
+			assign_wp = 1;
+		} else {
+			mpi_resize(w, wsize);
+			wp = w->d;
+		}
+	} else { /* Make U and V not overlap with W.	*/
+		if (wp == up) {
+			/* W and U are identical.  Allocate temporary space for U. */
+			up = tmp_limb = mpi_alloc_limb_space(usize);
+			/* Is V identical too?  Keep it identical with U.  */
+			if (wp == vp)
+				vp = up;
+			/* Copy to the temporary space.  */
+			MPN_COPY(up, wp, usize);
+		} else if (wp == vp) {
+			/* W and V are identical.  Allocate temporary space for V. */
+			vp = tmp_limb = mpi_alloc_limb_space(vsize);
+			/* Copy to the temporary space.  */
+			MPN_COPY(vp, wp, vsize);
+		}
+	}
+
+	if (!vsize)
+		wsize = 0;
+	else {
+		mpihelp_mul(wp, up, usize, vp, vsize, &cy);
+		wsize -= cy ? 0:1;
+	}
+
+	if (assign_wp)
+		mpi_assign_limb_space(w, wp, wsize);
+	w->nlimbs = wsize;
+	w->sign = sign_product;
+	if (tmp_limb)
+		mpi_free_limb_space(tmp_limb);
+}
+
+void mpi_mulm(MPI w, MPI u, MPI v, MPI m)
+{
+	mpi_mul(w, u, v);
+	mpi_tdiv_r(w, w, m);
+}
+EXPORT_SYMBOL_GPL(mpi_mulm);
diff --git a/lib/mpi/mpi-sub-ui.c b/lib/mpi/mpi-sub-ui.c
new file mode 100644
index 000000000000..b41b082b5f3e
--- /dev/null
+++ b/lib/mpi/mpi-sub-ui.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* mpi-sub-ui.c - Subtract an unsigned integer from an MPI.
+ *
+ * Copyright 1991, 1993, 1994, 1996, 1999-2002, 2004, 2012, 2013, 2015
+ * Free Software Foundation, Inc.
+ *
+ * This file was based on the GNU MP Library source file:
+ * https://gmplib.org/repo/gmp-6.2/file/510b83519d1c/mpz/aors_ui.h
+ *
+ * The GNU MP Library is free software; you can redistribute it and/or modify
+ * it under the terms of either:
+ *
+ *   * the GNU Lesser General Public License as published by the Free
+ *     Software Foundation; either version 3 of the License, or (at your
+ *     option) any later version.
+ *
+ * or
+ *
+ *   * the GNU General Public License as published by the Free Software
+ *     Foundation; either version 2 of the License, or (at your option) any
+ *     later version.
+ *
+ * or both in parallel, as here.
+ *
+ * The GNU MP Library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received copies of the GNU General Public License and the
+ * GNU Lesser General Public License along with the GNU MP Library.  If not,
+ * see https://www.gnu.org/licenses/.
+ */
+
+#include "mpi-internal.h"
+
+int mpi_sub_ui(MPI w, MPI u, unsigned long vval)
+{
+	if (u->nlimbs == 0) {
+		if (mpi_resize(w, 1) < 0)
+			return -ENOMEM;
+		w->d[0] = vval;
+		w->nlimbs = (vval != 0);
+		w->sign = (vval != 0);
+		return 0;
+	}
+
+	/* If not space for W (and possible carry), increase space. */
+	if (mpi_resize(w, u->nlimbs + 1))
+		return -ENOMEM;
+
+	if (u->sign) {
+		mpi_limb_t cy;
+
+		cy = mpihelp_add_1(w->d, u->d, u->nlimbs, (mpi_limb_t) vval);
+		w->d[u->nlimbs] = cy;
+		w->nlimbs = u->nlimbs + cy;
+		w->sign = 1;
+	} else {
+		/* The signs are different.  Need exact comparison to determine
+		 * which operand to subtract from which.
+		 */
+		if (u->nlimbs == 1 && u->d[0] < vval) {
+			w->d[0] = vval - u->d[0];
+			w->nlimbs = 1;
+			w->sign = 1;
+		} else {
+			mpihelp_sub_1(w->d, u->d, u->nlimbs, (mpi_limb_t) vval);
+			/* Size can decrease with at most one limb. */
+			w->nlimbs = (u->nlimbs - (w->d[u->nlimbs - 1] == 0));
+			w->sign = 0;
+		}
+	}
+
+	mpi_normalize(w);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_sub_ui);
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index eead4b339466..7ea225b2204f 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -25,6 +25,7 @@
 #include <linux/string.h>
 #include "mpi-internal.h"
 
+#define MAX_EXTERN_SCAN_BYTES (16*1024*1024)
 #define MAX_EXTERN_MPI_BITS 16384
 
 /**
@@ -109,6 +110,112 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
 }
 EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
 
+/****************
+ * Fill the mpi VAL from the hex string in STR.
+ */
+int mpi_fromstr(MPI val, const char *str)
+{
+	int sign = 0;
+	int prepend_zero = 0;
+	int i, j, c, c1, c2;
+	unsigned int nbits, nbytes, nlimbs;
+	mpi_limb_t a;
+
+	if (*str == '-') {
+		sign = 1;
+		str++;
+	}
+
+	/* Skip optional hex prefix.  */
+	if (*str == '0' && str[1] == 'x')
+		str += 2;
+
+	nbits = strlen(str);
+	if (nbits > MAX_EXTERN_SCAN_BYTES) {
+		mpi_clear(val);
+		return -EINVAL;
+	}
+	nbits *= 4;
+	if ((nbits % 8))
+		prepend_zero = 1;
+
+	nbytes = (nbits+7) / 8;
+	nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB;
+
+	if (val->alloced < nlimbs)
+		mpi_resize(val, nlimbs);
+
+	i = BYTES_PER_MPI_LIMB - (nbytes % BYTES_PER_MPI_LIMB);
+	i %= BYTES_PER_MPI_LIMB;
+	j = val->nlimbs = nlimbs;
+	val->sign = sign;
+	for (; j > 0; j--) {
+		a = 0;
+		for (; i < BYTES_PER_MPI_LIMB; i++) {
+			if (prepend_zero) {
+				c1 = '0';
+				prepend_zero = 0;
+			} else
+				c1 = *str++;
+
+			if (!c1) {
+				mpi_clear(val);
+				return -EINVAL;
+			}
+			c2 = *str++;
+			if (!c2) {
+				mpi_clear(val);
+				return -EINVAL;
+			}
+			if (c1 >= '0' && c1 <= '9')
+				c = c1 - '0';
+			else if (c1 >= 'a' && c1 <= 'f')
+				c = c1 - 'a' + 10;
+			else if (c1 >= 'A' && c1 <= 'F')
+				c = c1 - 'A' + 10;
+			else {
+				mpi_clear(val);
+				return -EINVAL;
+			}
+			c <<= 4;
+			if (c2 >= '0' && c2 <= '9')
+				c |= c2 - '0';
+			else if (c2 >= 'a' && c2 <= 'f')
+				c |= c2 - 'a' + 10;
+			else if (c2 >= 'A' && c2 <= 'F')
+				c |= c2 - 'A' + 10;
+			else {
+				mpi_clear(val);
+				return -EINVAL;
+			}
+			a <<= 8;
+			a |= c;
+		}
+		i = 0;
+		val->d[j-1] = a;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_fromstr);
+
+MPI mpi_scanval(const char *string)
+{
+	MPI a;
+
+	a = mpi_alloc(0);
+	if (!a)
+		return NULL;
+
+	if (mpi_fromstr(a, string)) {
+		mpi_free(a);
+		return NULL;
+	}
+	mpi_normalize(a);
+	return a;
+}
+EXPORT_SYMBOL_GPL(mpi_scanval);
+
 static int count_lzeros(MPI a)
 {
 	mpi_limb_t alimb;
@@ -413,3 +520,232 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
 	return val;
 }
 EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl);
+
+/* Perform a two's complement operation on buffer P of size N bytes.  */
+static void twocompl(unsigned char *p, unsigned int n)
+{
+	int i;
+
+	for (i = n-1; i >= 0 && !p[i]; i--)
+		;
+	if (i >= 0) {
+		if ((p[i] & 0x01))
+			p[i] = (((p[i] ^ 0xfe) | 0x01) & 0xff);
+		else if ((p[i] & 0x02))
+			p[i] = (((p[i] ^ 0xfc) | 0x02) & 0xfe);
+		else if ((p[i] & 0x04))
+			p[i] = (((p[i] ^ 0xf8) | 0x04) & 0xfc);
+		else if ((p[i] & 0x08))
+			p[i] = (((p[i] ^ 0xf0) | 0x08) & 0xf8);
+		else if ((p[i] & 0x10))
+			p[i] = (((p[i] ^ 0xe0) | 0x10) & 0xf0);
+		else if ((p[i] & 0x20))
+			p[i] = (((p[i] ^ 0xc0) | 0x20) & 0xe0);
+		else if ((p[i] & 0x40))
+			p[i] = (((p[i] ^ 0x80) | 0x40) & 0xc0);
+		else
+			p[i] = 0x80;
+
+		for (i--; i >= 0; i--)
+			p[i] ^= 0xff;
+	}
+}
+
+int mpi_print(enum gcry_mpi_format format, unsigned char *buffer,
+			size_t buflen, size_t *nwritten, MPI a)
+{
+	unsigned int nbits = mpi_get_nbits(a);
+	size_t len;
+	size_t dummy_nwritten;
+	int negative;
+
+	if (!nwritten)
+		nwritten = &dummy_nwritten;
+
+	/* Libgcrypt does no always care to set clear the sign if the value
+	 * is 0.  For printing this is a bit of a surprise, in particular
+	 * because if some of the formats don't support negative numbers but
+	 * should be able to print a zero.  Thus we need this extra test
+	 * for a negative number.
+	 */
+	if (a->sign && mpi_cmp_ui(a, 0))
+		negative = 1;
+	else
+		negative = 0;
+
+	len = buflen;
+	*nwritten = 0;
+	if (format == GCRYMPI_FMT_STD) {
+		unsigned char *tmp;
+		int extra = 0;
+		unsigned int n;
+
+		tmp = mpi_get_buffer(a, &n, NULL);
+		if (!tmp)
+			return -EINVAL;
+
+		if (negative) {
+			twocompl(tmp, n);
+			if (!(*tmp & 0x80)) {
+				/* Need to extend the sign.  */
+				n++;
+				extra = 2;
+			}
+		} else if (n && (*tmp & 0x80)) {
+			/* Positive but the high bit of the returned buffer is set.
+			 * Thus we need to print an extra leading 0x00 so that the
+			 * output is interpreted as a positive number.
+			 */
+			n++;
+			extra = 1;
+		}
+
+		if (buffer && n > len) {
+			/* The provided buffer is too short. */
+			kfree(tmp);
+			return -E2BIG;
+		}
+		if (buffer) {
+			unsigned char *s = buffer;
+
+			if (extra == 1)
+				*s++ = 0;
+			else if (extra)
+				*s++ = 0xff;
+			memcpy(s, tmp, n-!!extra);
+		}
+		kfree(tmp);
+		*nwritten = n;
+		return 0;
+	} else if (format == GCRYMPI_FMT_USG) {
+		unsigned int n = (nbits + 7)/8;
+
+		/* Note:  We ignore the sign for this format.  */
+		/* FIXME: for performance reasons we should put this into
+		 * mpi_aprint because we can then use the buffer directly.
+		 */
+
+		if (buffer && n > len)
+			return -E2BIG;
+		if (buffer) {
+			unsigned char *tmp;
+
+			tmp = mpi_get_buffer(a, &n, NULL);
+			if (!tmp)
+				return -EINVAL;
+			memcpy(buffer, tmp, n);
+			kfree(tmp);
+		}
+		*nwritten = n;
+		return 0;
+	} else if (format == GCRYMPI_FMT_PGP) {
+		unsigned int n = (nbits + 7)/8;
+
+		/* The PGP format can only handle unsigned integers.  */
+		if (negative)
+			return -EINVAL;
+
+		if (buffer && n+2 > len)
+			return -E2BIG;
+
+		if (buffer) {
+			unsigned char *tmp;
+			unsigned char *s = buffer;
+
+			s[0] = nbits >> 8;
+			s[1] = nbits;
+
+			tmp = mpi_get_buffer(a, &n, NULL);
+			if (!tmp)
+				return -EINVAL;
+			memcpy(s+2, tmp, n);
+			kfree(tmp);
+		}
+		*nwritten = n+2;
+		return 0;
+	} else if (format == GCRYMPI_FMT_SSH) {
+		unsigned char *tmp;
+		int extra = 0;
+		unsigned int n;
+
+		tmp = mpi_get_buffer(a, &n, NULL);
+		if (!tmp)
+			return -EINVAL;
+
+		if (negative) {
+			twocompl(tmp, n);
+			if (!(*tmp & 0x80)) {
+				/* Need to extend the sign.  */
+				n++;
+				extra = 2;
+			}
+		} else if (n && (*tmp & 0x80)) {
+			n++;
+			extra = 1;
+		}
+
+		if (buffer && n+4 > len) {
+			kfree(tmp);
+			return -E2BIG;
+		}
+
+		if (buffer) {
+			unsigned char *s = buffer;
+
+			*s++ = n >> 24;
+			*s++ = n >> 16;
+			*s++ = n >> 8;
+			*s++ = n;
+			if (extra == 1)
+				*s++ = 0;
+			else if (extra)
+				*s++ = 0xff;
+			memcpy(s, tmp, n-!!extra);
+		}
+		kfree(tmp);
+		*nwritten = 4+n;
+		return 0;
+	} else if (format == GCRYMPI_FMT_HEX) {
+		unsigned char *tmp;
+		int i;
+		int extra = 0;
+		unsigned int n = 0;
+
+		tmp = mpi_get_buffer(a, &n, NULL);
+		if (!tmp)
+			return -EINVAL;
+		if (!n || (*tmp & 0x80))
+			extra = 2;
+
+		if (buffer && 2*n + extra + negative + 1 > len) {
+			kfree(tmp);
+			return -E2BIG;
+		}
+		if (buffer) {
+			unsigned char *s = buffer;
+
+			if (negative)
+				*s++ = '-';
+			if (extra) {
+				*s++ = '0';
+				*s++ = '0';
+			}
+
+			for (i = 0; i < n; i++) {
+				unsigned int c = tmp[i];
+
+				*s++ = (c >> 4) < 10 ? '0'+(c>>4) : 'A'+(c>>4)-10;
+				c &= 15;
+				*s++ = c < 10 ? '0'+c : 'A'+c-10;
+			}
+			*s++ = 0;
+			*nwritten = s - buffer;
+		} else {
+			*nwritten = 2*n + extra + negative + 1;
+		}
+		kfree(tmp);
+		return 0;
+	} else
+		return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(mpi_print);
diff --git a/lib/mpi/mpih-div.c b/lib/mpi/mpih-div.c
index 913a519eb005..be70ee2e42d3 100644
--- a/lib/mpi/mpih-div.c
+++ b/lib/mpi/mpih-div.c
@@ -24,6 +24,150 @@
 #define UDIV_TIME UMUL_TIME
 #endif
 
+
+mpi_limb_t
+mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			mpi_limb_t divisor_limb)
+{
+	mpi_size_t i;
+	mpi_limb_t n1, n0, r;
+	mpi_limb_t dummy __maybe_unused;
+
+	/* Botch: Should this be handled at all?  Rely on callers?	*/
+	if (!dividend_size)
+		return 0;
+
+	/* If multiplication is much faster than division, and the
+	 * dividend is large, pre-invert the divisor, and use
+	 * only multiplications in the inner loop.
+	 *
+	 * This test should be read:
+	 *	 Does it ever help to use udiv_qrnnd_preinv?
+	 *	   && Does what we save compensate for the inversion overhead?
+	 */
+	if (UDIV_TIME > (2 * UMUL_TIME + 6)
+			&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
+		int normalization_steps;
+
+		normalization_steps = count_leading_zeros(divisor_limb);
+		if (normalization_steps) {
+			mpi_limb_t divisor_limb_inverted;
+
+			divisor_limb <<= normalization_steps;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 *
+			 * Special case for DIVISOR_LIMB == 100...000.
+			 */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t)0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+						-divisor_limb, 0, divisor_limb);
+
+			n1 = dividend_ptr[dividend_size - 1];
+			r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+			/* Possible optimization:
+			 * if (r == 0
+			 * && divisor_limb > ((n1 << normalization_steps)
+			 *		       | (dividend_ptr[dividend_size - 2] >> ...)))
+			 * ...one division less...
+			 */
+			for (i = dividend_size - 2; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(dummy, r, r,
+						((n1 << normalization_steps)
+						 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+						divisor_limb, divisor_limb_inverted);
+				n1 = n0;
+			}
+			UDIV_QRNND_PREINV(dummy, r, r,
+					n1 << normalization_steps,
+					divisor_limb, divisor_limb_inverted);
+			return r >> normalization_steps;
+		} else {
+			mpi_limb_t divisor_limb_inverted;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 *
+			 * Special case for DIVISOR_LIMB == 100...000.
+			 */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t)0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+						-divisor_limb, 0, divisor_limb);
+
+			i = dividend_size - 1;
+			r = dividend_ptr[i];
+
+			if (r >= divisor_limb)
+				r = 0;
+			else
+				i--;
+
+			for ( ; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(dummy, r, r,
+						n0, divisor_limb, divisor_limb_inverted);
+			}
+			return r;
+		}
+	} else {
+		if (UDIV_NEEDS_NORMALIZATION) {
+			int normalization_steps;
+
+			normalization_steps = count_leading_zeros(divisor_limb);
+			if (normalization_steps) {
+				divisor_limb <<= normalization_steps;
+
+				n1 = dividend_ptr[dividend_size - 1];
+				r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+				/* Possible optimization:
+				 * if (r == 0
+				 * && divisor_limb > ((n1 << normalization_steps)
+				 *		   | (dividend_ptr[dividend_size - 2] >> ...)))
+				 * ...one division less...
+				 */
+				for (i = dividend_size - 2; i >= 0; i--) {
+					n0 = dividend_ptr[i];
+					udiv_qrnnd(dummy, r, r,
+						((n1 << normalization_steps)
+						 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+						divisor_limb);
+					n1 = n0;
+				}
+				udiv_qrnnd(dummy, r, r,
+						n1 << normalization_steps,
+						divisor_limb);
+				return r >> normalization_steps;
+			}
+		}
+		/* No normalization needed, either because udiv_qrnnd doesn't require
+		 * it, or because DIVISOR_LIMB is already normalized.
+		 */
+		i = dividend_size - 1;
+		r = dividend_ptr[i];
+
+		if (r >= divisor_limb)
+			r = 0;
+		else
+			i--;
+
+		for (; i >= 0; i--) {
+			n0 = dividend_ptr[i];
+			udiv_qrnnd(dummy, r, r, n0, divisor_limb);
+		}
+		return r;
+	}
+}
+
 /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
  * the NSIZE-DSIZE least significant quotient limbs at QP
  * and the DSIZE long remainder at NP.	If QEXTRA_LIMBS is
@@ -221,3 +365,153 @@ q_test:
 
 	return most_significant_q_limb;
 }
+
+/****************
+ * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+ * Return the single-limb remainder.
+ * There are no constraints on the value of the divisor.
+ *
+ * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
+ */
+
+mpi_limb_t
+mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+		mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+		mpi_limb_t divisor_limb)
+{
+	mpi_size_t i;
+	mpi_limb_t n1, n0, r;
+	mpi_limb_t dummy __maybe_unused;
+
+	if (!dividend_size)
+		return 0;
+
+	/* If multiplication is much faster than division, and the
+	 * dividend is large, pre-invert the divisor, and use
+	 * only multiplications in the inner loop.
+	 *
+	 * This test should be read:
+	 * Does it ever help to use udiv_qrnnd_preinv?
+	 * && Does what we save compensate for the inversion overhead?
+	 */
+	if (UDIV_TIME > (2 * UMUL_TIME + 6)
+			&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
+		int normalization_steps;
+
+		normalization_steps = count_leading_zeros(divisor_limb);
+		if (normalization_steps) {
+			mpi_limb_t divisor_limb_inverted;
+
+			divisor_limb <<= normalization_steps;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 */
+			/* Special case for DIVISOR_LIMB == 100...000.  */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t)0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+						-divisor_limb, 0, divisor_limb);
+
+			n1 = dividend_ptr[dividend_size - 1];
+			r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+			/* Possible optimization:
+			 * if (r == 0
+			 * && divisor_limb > ((n1 << normalization_steps)
+			 *		       | (dividend_ptr[dividend_size - 2] >> ...)))
+			 * ...one division less...
+			 */
+			for (i = dividend_size - 2; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r,
+						((n1 << normalization_steps)
+						 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+						divisor_limb, divisor_limb_inverted);
+				n1 = n0;
+			}
+			UDIV_QRNND_PREINV(quot_ptr[0], r, r,
+					n1 << normalization_steps,
+					divisor_limb, divisor_limb_inverted);
+			return r >> normalization_steps;
+		} else {
+			mpi_limb_t divisor_limb_inverted;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 */
+			/* Special case for DIVISOR_LIMB == 100...000.  */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t) 0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+						-divisor_limb, 0, divisor_limb);
+
+			i = dividend_size - 1;
+			r = dividend_ptr[i];
+
+			if (r >= divisor_limb)
+				r = 0;
+			else
+				quot_ptr[i--] = 0;
+
+			for ( ; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(quot_ptr[i], r, r,
+						n0, divisor_limb, divisor_limb_inverted);
+			}
+			return r;
+		}
+	} else {
+		if (UDIV_NEEDS_NORMALIZATION) {
+			int normalization_steps;
+
+			normalization_steps = count_leading_zeros(divisor_limb);
+			if (normalization_steps) {
+				divisor_limb <<= normalization_steps;
+
+				n1 = dividend_ptr[dividend_size - 1];
+				r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+				/* Possible optimization:
+				 * if (r == 0
+				 * && divisor_limb > ((n1 << normalization_steps)
+				 *		   | (dividend_ptr[dividend_size - 2] >> ...)))
+				 * ...one division less...
+				 */
+				for (i = dividend_size - 2; i >= 0; i--) {
+					n0 = dividend_ptr[i];
+					udiv_qrnnd(quot_ptr[i + 1], r, r,
+						((n1 << normalization_steps)
+						 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+						divisor_limb);
+					n1 = n0;
+				}
+				udiv_qrnnd(quot_ptr[0], r, r,
+						n1 << normalization_steps,
+						divisor_limb);
+				return r >> normalization_steps;
+			}
+		}
+		/* No normalization needed, either because udiv_qrnnd doesn't require
+		 * it, or because DIVISOR_LIMB is already normalized.
+		 */
+		i = dividend_size - 1;
+		r = dividend_ptr[i];
+
+		if (r >= divisor_limb)
+			r = 0;
+		else
+			quot_ptr[i--] = 0;
+
+		for (; i >= 0; i--) {
+			n0 = dividend_ptr[i];
+			udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb);
+		}
+		return r;
+	}
+}
diff --git a/lib/mpi/mpih-mul.c b/lib/mpi/mpih-mul.c
index a93647564054..e5f1c84e3c48 100644
--- a/lib/mpi/mpih-mul.c
+++ b/lib/mpi/mpih-mul.c
@@ -317,6 +317,31 @@ mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
 	}
 }
 
+
+void mpihelp_mul_n(mpi_ptr_t prodp,
+		mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
+{
+	if (up == vp) {
+		if (size < KARATSUBA_THRESHOLD)
+			mpih_sqr_n_basecase(prodp, up, size);
+		else {
+			mpi_ptr_t tspace;
+			tspace = mpi_alloc_limb_space(2 * size);
+			mpih_sqr_n(prodp, up, size, tspace);
+			mpi_free_limb_space(tspace);
+		}
+	} else {
+		if (size < KARATSUBA_THRESHOLD)
+			mul_n_basecase(prodp, up, vp, size);
+		else {
+			mpi_ptr_t tspace;
+			tspace = mpi_alloc_limb_space(2 * size);
+			mul_n(prodp, up, vp, size, tspace);
+			mpi_free_limb_space(tspace);
+		}
+	}
+}
+
 int
 mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
 			   mpi_ptr_t up, mpi_size_t usize,
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index 20ed0f766787..3c63710c20c6 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -20,6 +20,63 @@
 
 #include "mpi-internal.h"
 
+/* Constants allocated right away at startup.  */
+static MPI constants[MPI_NUMBER_OF_CONSTANTS];
+
+/* Initialize the MPI subsystem.  This is called early and allows to
+ * do some initialization without taking care of threading issues.
+ */
+static int __init mpi_init(void)
+{
+	int idx;
+	unsigned long value;
+
+	for (idx = 0; idx < MPI_NUMBER_OF_CONSTANTS; idx++) {
+		switch (idx) {
+		case MPI_C_ZERO:
+			value = 0;
+			break;
+		case MPI_C_ONE:
+			value = 1;
+			break;
+		case MPI_C_TWO:
+			value = 2;
+			break;
+		case MPI_C_THREE:
+			value = 3;
+			break;
+		case MPI_C_FOUR:
+			value = 4;
+			break;
+		case MPI_C_EIGHT:
+			value = 8;
+			break;
+		default:
+			pr_err("MPI: invalid mpi_const selector %d\n", idx);
+			return -EFAULT;
+		}
+		constants[idx] = mpi_alloc_set_ui(value);
+		constants[idx]->flags = (16|32);
+	}
+
+	return 0;
+}
+postcore_initcall(mpi_init);
+
+/* Return a constant MPI descripbed by NO which is one of the
+ * MPI_C_xxx macros.  There is no need to copy this returned value; it
+ * may be used directly.
+ */
+MPI mpi_const(enum gcry_mpi_constants no)
+{
+	if ((int)no < 0 || no > MPI_NUMBER_OF_CONSTANTS)
+		pr_err("MPI: invalid mpi_const selector %d\n", no);
+	if (!constants[no])
+		pr_err("MPI: MPI subsystem not initialized\n");
+	return constants[no];
+}
+EXPORT_SYMBOL_GPL(mpi_const);
+
 /****************
  * Note:  It was a bad idea to use the number of limbs to allocate
  *	  because on a alpha the limbs are large but we normally need
@@ -69,7 +126,7 @@ void mpi_free_limb_space(mpi_ptr_t a)
 	if (!a)
 		return;
 
-	kzfree(a);
+	kfree_sensitive(a);
 }
 
 void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs)
@@ -95,7 +152,7 @@ int mpi_resize(MPI a, unsigned nlimbs)
 		if (!p)
 			return -ENOMEM;
 		memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));
-		kzfree(a->d);
+		kfree_sensitive(a->d);
 		a->d = p;
 	} else {
 		a->d = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
@@ -106,13 +163,22 @@ int mpi_resize(MPI a, unsigned nlimbs)
 	return 0;
 }
 
+void mpi_clear(MPI a)
+{
+	if (!a)
+		return;
+	a->nlimbs = 0;
+	a->flags = 0;
+}
+EXPORT_SYMBOL_GPL(mpi_clear);
+
 void mpi_free(MPI a)
 {
 	if (!a)
 		return;
 
 	if (a->flags & 4)
-		kzfree(a->d);
+		kfree_sensitive(a->d);
 	else
 		mpi_free_limb_space(a->d);
 
@@ -122,5 +188,143 @@ void mpi_free(MPI a)
 }
 EXPORT_SYMBOL_GPL(mpi_free);
 
+/****************
+ * Note: This copy function should not interpret the MPI
+ *	 but copy it transparently.
+ */
+MPI mpi_copy(MPI a)
+{
+	int i;
+	MPI b;
+
+	if (a) {
+		b = mpi_alloc(a->nlimbs);
+		b->nlimbs = a->nlimbs;
+		b->sign = a->sign;
+		b->flags = a->flags;
+		b->flags &= ~(16|32); /* Reset the immutable and constant flags. */
+		for (i = 0; i < b->nlimbs; i++)
+			b->d[i] = a->d[i];
+	} else
+		b = NULL;
+	return b;
+}
+
+/****************
+ * This function allocates an MPI which is optimized to hold
+ * a value as large as the one given in the argument and allocates it
+ * with the same flags as A.
+ */
+MPI mpi_alloc_like(MPI a)
+{
+	MPI b;
+
+	if (a) {
+		b = mpi_alloc(a->nlimbs);
+		b->nlimbs = 0;
+		b->sign = 0;
+		b->flags = a->flags;
+	} else
+		b = NULL;
+
+	return b;
+}
+
+
+/* Set U into W and release U.  If W is NULL only U will be released. */
+void mpi_snatch(MPI w, MPI u)
+{
+	if (w) {
+		mpi_assign_limb_space(w, u->d, u->alloced);
+		w->nlimbs = u->nlimbs;
+		w->sign   = u->sign;
+		w->flags  = u->flags;
+		u->alloced = 0;
+		u->nlimbs = 0;
+		u->d = NULL;
+	}
+	mpi_free(u);
+}
+
+
+MPI mpi_set(MPI w, MPI u)
+{
+	mpi_ptr_t wp, up;
+	mpi_size_t usize = u->nlimbs;
+	int usign = u->sign;
+
+	if (!w)
+		w = mpi_alloc(mpi_get_nlimbs(u));
+	RESIZE_IF_NEEDED(w, usize);
+	wp = w->d;
+	up = u->d;
+	MPN_COPY(wp, up, usize);
+	w->nlimbs = usize;
+	w->flags = u->flags;
+	w->flags &= ~(16|32); /* Reset the immutable and constant flags.  */
+	w->sign = usign;
+	return w;
+}
+EXPORT_SYMBOL_GPL(mpi_set);
+
+MPI mpi_set_ui(MPI w, unsigned long u)
+{
+	if (!w)
+		w = mpi_alloc(1);
+	/* FIXME: If U is 0 we have no need to resize and thus possible
+	 * allocating the the limbs.
+	 */
+	RESIZE_IF_NEEDED(w, 1);
+	w->d[0] = u;
+	w->nlimbs = u ? 1 : 0;
+	w->sign = 0;
+	w->flags = 0;
+	return w;
+}
+EXPORT_SYMBOL_GPL(mpi_set_ui);
+
+MPI mpi_alloc_set_ui(unsigned long u)
+{
+	MPI w = mpi_alloc(1);
+	w->d[0] = u;
+	w->nlimbs = u ? 1 : 0;
+	w->sign = 0;
+	return w;
+}
+
+/****************
+ * Swap the value of A and B, when SWAP is 1.
+ * Leave the value when SWAP is 0.
+ * This implementation should be constant-time regardless of SWAP.
+ */
+void mpi_swap_cond(MPI a, MPI b, unsigned long swap)
+{
+	mpi_size_t i;
+	mpi_size_t nlimbs;
+	mpi_limb_t mask = ((mpi_limb_t)0) - swap;
+	mpi_limb_t x;
+
+	if (a->alloced > b->alloced)
+		nlimbs = b->alloced;
+	else
+		nlimbs = a->alloced;
+	if (a->nlimbs > nlimbs || b->nlimbs > nlimbs)
+		return;
+
+	for (i = 0; i < nlimbs; i++) {
+		x = mask & (a->d[i] ^ b->d[i]);
+		a->d[i] = a->d[i] ^ x;
+		b->d[i] = b->d[i] ^ x;
+	}
+
+	x = mask & (a->nlimbs ^ b->nlimbs);
+	a->nlimbs = a->nlimbs ^ x;
+	b->nlimbs = b->nlimbs ^ x;
+
+	x = mask & (a->sign ^ b->sign);
+	a->sign = a->sign ^ x;
+	b->sign = b->sign ^ x;
+}
+
 MODULE_DESCRIPTION("Multiprecision maths library");
 MODULE_LICENSE("GPL");
diff --git a/lib/nlattr.c b/lib/nlattr.c
index bc5b5cf608c4..5b6116e81f9f 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -96,8 +96,8 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
 			continue;
 
 		if (nla_len(entry) < NLA_HDRLEN) {
-			NL_SET_ERR_MSG_ATTR(extack, entry,
-					    "Array element too short");
+			NL_SET_ERR_MSG_ATTR_POL(extack, entry, policy,
+						"Array element too short");
 			return -ERANGE;
 		}
 
@@ -124,6 +124,7 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
 		range->max = U8_MAX;
 		break;
 	case NLA_U16:
+	case NLA_BINARY:
 		range->max = U16_MAX;
 		break;
 	case NLA_U32:
@@ -140,6 +141,7 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
 
 	switch (pt->validation_type) {
 	case NLA_VALIDATE_RANGE:
+	case NLA_VALIDATE_RANGE_WARN_TOO_LONG:
 		range->min = pt->min;
 		range->max = pt->max;
 		break;
@@ -157,9 +159,10 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
 	}
 }
 
-static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
-					   const struct nlattr *nla,
-					   struct netlink_ext_ack *extack)
+static int nla_validate_range_unsigned(const struct nla_policy *pt,
+				       const struct nlattr *nla,
+				       struct netlink_ext_ack *extack,
+				       unsigned int validate)
 {
 	struct netlink_range_validation range;
 	u64 value;
@@ -178,15 +181,39 @@ static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
 	case NLA_MSECS:
 		value = nla_get_u64(nla);
 		break;
+	case NLA_BINARY:
+		value = nla_len(nla);
+		break;
 	default:
 		return -EINVAL;
 	}
 
 	nla_get_range_unsigned(pt, &range);
 
+	if (pt->validation_type == NLA_VALIDATE_RANGE_WARN_TOO_LONG &&
+	    pt->type == NLA_BINARY && value > range.max) {
+		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
+				    current->comm, pt->type);
+		if (validate & NL_VALIDATE_STRICT_ATTRS) {
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"invalid attribute length");
+			return -EINVAL;
+		}
+
+		/* this assumes min <= max (don't validate against min) */
+		return 0;
+	}
+
 	if (value < range.min || value > range.max) {
-		NL_SET_ERR_MSG_ATTR(extack, nla,
-				    "integer out of range");
+		bool binary = pt->type == NLA_BINARY;
+
+		if (binary)
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"binary attribute size out of range");
+		else
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"integer out of range");
+
 		return -ERANGE;
 	}
 
@@ -264,8 +291,8 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt,
 	nla_get_range_signed(pt, &range);
 
 	if (value < range.min || value > range.max) {
-		NL_SET_ERR_MSG_ATTR(extack, nla,
-				    "integer out of range");
+		NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+					"integer out of range");
 		return -ERANGE;
 	}
 
@@ -274,7 +301,8 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt,
 
 static int nla_validate_int_range(const struct nla_policy *pt,
 				  const struct nlattr *nla,
-				  struct netlink_ext_ack *extack)
+				  struct netlink_ext_ack *extack,
+				  unsigned int validate)
 {
 	switch (pt->type) {
 	case NLA_U8:
@@ -282,7 +310,8 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 	case NLA_U32:
 	case NLA_U64:
 	case NLA_MSECS:
-		return nla_validate_int_range_unsigned(pt, nla, extack);
+	case NLA_BINARY:
+		return nla_validate_range_unsigned(pt, nla, extack, validate);
 	case NLA_S8:
 	case NLA_S16:
 	case NLA_S32:
@@ -294,6 +323,37 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 	}
 }
 
+static int nla_validate_mask(const struct nla_policy *pt,
+			     const struct nlattr *nla,
+			     struct netlink_ext_ack *extack)
+{
+	u64 value;
+
+	switch (pt->type) {
+	case NLA_U8:
+		value = nla_get_u8(nla);
+		break;
+	case NLA_U16:
+		value = nla_get_u16(nla);
+		break;
+	case NLA_U32:
+		value = nla_get_u32(nla);
+		break;
+	case NLA_U64:
+		value = nla_get_u64(nla);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (value & ~(u64)pt->mask) {
+		NL_SET_ERR_MSG_ATTR(extack, nla, "reserved bit set");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int validate_nla(const struct nlattr *nla, int maxtype,
 			const struct nla_policy *policy, unsigned int validate,
 			struct netlink_ext_ack *extack, unsigned int depth)
@@ -313,15 +373,12 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 
 	BUG_ON(pt->type > NLA_TYPE_MAX);
 
-	if ((nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) ||
-	    (pt->type == NLA_EXACT_LEN &&
-	     pt->validation_type == NLA_VALIDATE_WARN_TOO_LONG &&
-	     attrlen != pt->len)) {
+	if (nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) {
 		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
 				    current->comm, type);
 		if (validate & NL_VALIDATE_STRICT_ATTRS) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "invalid attribute length");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"invalid attribute length");
 			return -EINVAL;
 		}
 	}
@@ -329,14 +386,14 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 	if (validate & NL_VALIDATE_NESTED) {
 		if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) &&
 		    !(nla->nla_type & NLA_F_NESTED)) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "NLA_F_NESTED is missing");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"NLA_F_NESTED is missing");
 			return -EINVAL;
 		}
 		if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY &&
 		    pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "NLA_F_NESTED not expected");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"NLA_F_NESTED not expected");
 			return -EINVAL;
 		}
 	}
@@ -375,7 +432,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 			err = -EINVAL;
 			goto out_err;
 		}
-		/* fall through */
+		fallthrough;
 
 	case NLA_STRING:
 		if (attrlen < 1)
@@ -449,19 +506,10 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 					    "Unsupported attribute");
 			return -EINVAL;
 		}
-		/* fall through */
-	case NLA_MIN_LEN:
 		if (attrlen < pt->len)
 			goto out_err;
 		break;
 
-	case NLA_EXACT_LEN:
-		if (pt->validation_type != NLA_VALIDATE_WARN_TOO_LONG) {
-			if (attrlen != pt->len)
-				goto out_err;
-			break;
-		}
-		/* fall through */
 	default:
 		if (pt->len)
 			minlen = pt->len;
@@ -479,9 +527,15 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		break;
 	case NLA_VALIDATE_RANGE_PTR:
 	case NLA_VALIDATE_RANGE:
+	case NLA_VALIDATE_RANGE_WARN_TOO_LONG:
 	case NLA_VALIDATE_MIN:
 	case NLA_VALIDATE_MAX:
-		err = nla_validate_int_range(pt, nla, extack);
+		err = nla_validate_int_range(pt, nla, extack, validate);
+		if (err)
+			return err;
+		break;
+	case NLA_VALIDATE_MASK:
+		err = nla_validate_mask(pt, nla, extack);
 		if (err)
 			return err;
 		break;
@@ -496,7 +550,8 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 
 	return 0;
 out_err:
-	NL_SET_ERR_MSG_ATTR(extack, nla, "Attribute failed policy validation");
+	NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+				"Attribute failed policy validation");
 	return err;
 }
 
@@ -654,35 +709,47 @@ struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype)
 EXPORT_SYMBOL(nla_find);
 
 /**
- * nla_strlcpy - Copy string attribute payload into a sized buffer
- * @dst: where to copy the string to
- * @nla: attribute to copy the string from
- * @dstsize: size of destination buffer
+ * nla_strscpy - Copy string attribute payload into a sized buffer
+ * @dst: Where to copy the string to.
+ * @nla: Attribute to copy the string from.
+ * @dstsize: Size of destination buffer.
  *
  * Copies at most dstsize - 1 bytes into the destination buffer.
- * The result is always a valid NUL-terminated string. Unlike
- * strlcpy the destination buffer is always padded out.
+ * Unlike strlcpy the destination buffer is always padded out.
  *
- * Returns the length of the source buffer.
+ * Return:
+ * * srclen - Returns @nla length (not including the trailing %NUL).
+ * * -E2BIG - If @dstsize is 0 or greater than U16_MAX or @nla length greater
+ *            than @dstsize.
  */
-size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
+ssize_t nla_strscpy(char *dst, const struct nlattr *nla, size_t dstsize)
 {
 	size_t srclen = nla_len(nla);
 	char *src = nla_data(nla);
+	ssize_t ret;
+	size_t len;
+
+	if (dstsize == 0 || WARN_ON_ONCE(dstsize > U16_MAX))
+		return -E2BIG;
 
 	if (srclen > 0 && src[srclen - 1] == '\0')
 		srclen--;
 
-	if (dstsize > 0) {
-		size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen;
-
-		memset(dst, 0, dstsize);
-		memcpy(dst, src, len);
+	if (srclen >= dstsize) {
+		len = dstsize - 1;
+		ret = -E2BIG;
+	} else {
+		len = srclen;
+		ret = len;
 	}
 
-	return srclen;
+	memcpy(dst, src, len);
+	/* Zero pad end of dst. */
+	memset(dst + len, 0, dstsize - len);
+
+	return ret;
 }
-EXPORT_SYMBOL(nla_strlcpy);
+EXPORT_SYMBOL(nla_strscpy);
 
 /**
  * nla_strdup - Copy string attribute payload into a newly allocated buffer
@@ -816,8 +883,7 @@ EXPORT_SYMBOL(__nla_reserve);
 struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype,
 				   int attrlen, int padattr)
 {
-	if (nla_need_padding_for_64bit(skb))
-		nla_align_64bit(skb, padattr);
+	nla_align_64bit(skb, padattr);
 
 	return __nla_reserve(skb, attrtype, attrlen);
 }
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 15ca78e1c7d4..8abe1870dba4 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -85,12 +85,16 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
 	put_cpu();
 }
 
+// Dump stacks even for idle CPUs.
+static bool backtrace_idle;
+module_param(backtrace_idle, bool, 0644);
+
 bool nmi_cpu_backtrace(struct pt_regs *regs)
 {
 	int cpu = smp_processor_id();
 
 	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
-		if (regs && cpu_in_idle(instruction_pointer(regs))) {
+		if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) {
 			pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n",
 				cpu, (void *)instruction_pointer(regs));
 		} else {
diff --git a/lib/packing.c b/lib/packing.c
index 50d1e9f2f5a7..6ed72dccfdb5 100644
--- a/lib/packing.c
+++ b/lib/packing.c
@@ -73,6 +73,7 @@ static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit,
  * @endbit: The index (in logical notation, compensated for quirks) where
  *	    the packed value ends within pbuf. Must be smaller than, or equal
  *	    to, startbit.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
  * @op: If PACK, then uval will be treated as const pointer and copied (packed)
  *	into pbuf, between startbit and endbit.
  *	If UNPACK, then pbuf will be treated as const pointer and the logical
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 0ba686b8fe57..e59eda07305e 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -4,6 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
+#include <linux/slab.h>
 #include <linux/percpu-refcount.h>
 
 /*
@@ -64,18 +65,25 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 	size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
 			     __alignof__(unsigned long));
 	unsigned long start_count = 0;
+	struct percpu_ref_data *data;
 
 	ref->percpu_count_ptr = (unsigned long)
 		__alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
 	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
-	ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC;
-	ref->allow_reinit = flags & PERCPU_REF_ALLOW_REINIT;
+	data = kzalloc(sizeof(*ref->data), gfp);
+	if (!data) {
+		free_percpu((void __percpu *)ref->percpu_count_ptr);
+		return -ENOMEM;
+	}
+
+	data->force_atomic = flags & PERCPU_REF_INIT_ATOMIC;
+	data->allow_reinit = flags & PERCPU_REF_ALLOW_REINIT;
 
 	if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) {
 		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
-		ref->allow_reinit = true;
+		data->allow_reinit = true;
 	} else {
 		start_count += PERCPU_COUNT_BIAS;
 	}
@@ -85,14 +93,28 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 	else
 		start_count++;
 
-	atomic_long_set(&ref->count, start_count);
+	atomic_long_set(&data->count, start_count);
 
-	ref->release = release;
-	ref->confirm_switch = NULL;
+	data->release = release;
+	data->confirm_switch = NULL;
+	data->ref = ref;
+	ref->data = data;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(percpu_ref_init);
 
+static void __percpu_ref_exit(struct percpu_ref *ref)
+{
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
+
+	if (percpu_count) {
+		/* non-NULL confirm_switch indicates switching in progress */
+		WARN_ON_ONCE(ref->data && ref->data->confirm_switch);
+		free_percpu(percpu_count);
+		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
+	}
+}
+
 /**
  * percpu_ref_exit - undo percpu_ref_init()
  * @ref: percpu_ref to exit
@@ -105,27 +127,36 @@ EXPORT_SYMBOL_GPL(percpu_ref_init);
  */
 void percpu_ref_exit(struct percpu_ref *ref)
 {
-	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
+	struct percpu_ref_data *data = ref->data;
+	unsigned long flags;
 
-	if (percpu_count) {
-		/* non-NULL confirm_switch indicates switching in progress */
-		WARN_ON_ONCE(ref->confirm_switch);
-		free_percpu(percpu_count);
-		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
-	}
+	__percpu_ref_exit(ref);
+
+	if (!data)
+		return;
+
+	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
+	ref->percpu_count_ptr |= atomic_long_read(&ref->data->count) <<
+		__PERCPU_REF_FLAG_BITS;
+	ref->data = NULL;
+	spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
+
+	kfree(data);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
 
 static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
 {
-	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
+	struct percpu_ref_data *data = container_of(rcu,
+			struct percpu_ref_data, rcu);
+	struct percpu_ref *ref = data->ref;
 
-	ref->confirm_switch(ref);
-	ref->confirm_switch = NULL;
+	data->confirm_switch(ref);
+	data->confirm_switch = NULL;
 	wake_up_all(&percpu_ref_switch_waitq);
 
-	if (!ref->allow_reinit)
-		percpu_ref_exit(ref);
+	if (!data->allow_reinit)
+		__percpu_ref_exit(ref);
 
 	/* drop ref from percpu_ref_switch_to_atomic() */
 	percpu_ref_put(ref);
@@ -133,7 +164,9 @@ static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
 
 static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 {
-	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
+	struct percpu_ref_data *data = container_of(rcu,
+			struct percpu_ref_data, rcu);
+	struct percpu_ref *ref = data->ref;
 	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 	unsigned long count = 0;
 	int cpu;
@@ -142,7 +175,7 @@ static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 		count += *per_cpu_ptr(percpu_count, cpu);
 
 	pr_debug("global %lu percpu %lu\n",
-		 atomic_long_read(&ref->count), count);
+		 atomic_long_read(&data->count), count);
 
 	/*
 	 * It's crucial that we sum the percpu counters _before_ adding the sum
@@ -156,11 +189,11 @@ static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 	 * reaching 0 before we add the percpu counts. But doing it at the same
 	 * time is equivalent and saves us atomic operations:
 	 */
-	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
+	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &data->count);
 
-	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
+	WARN_ONCE(atomic_long_read(&data->count) <= 0,
 		  "percpu ref (%ps) <= 0 (%ld) after switching to atomic",
-		  ref->release, atomic_long_read(&ref->count));
+		  data->release, atomic_long_read(&data->count));
 
 	/* @ref is viewed as dead on all CPUs, send out switch confirmation */
 	percpu_ref_call_confirm_rcu(rcu);
@@ -186,10 +219,11 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 	 * Non-NULL ->confirm_switch is used to indicate that switching is
 	 * in progress.  Use noop one if unspecified.
 	 */
-	ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch;
+	ref->data->confirm_switch = confirm_switch ?:
+		percpu_ref_noop_confirm_switch;
 
 	percpu_ref_get(ref);	/* put after confirmation */
-	call_rcu(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
+	call_rcu(&ref->data->rcu, percpu_ref_switch_to_atomic_rcu);
 }
 
 static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
@@ -202,10 +236,10 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
 		return;
 
-	if (WARN_ON_ONCE(!ref->allow_reinit))
+	if (WARN_ON_ONCE(!ref->data->allow_reinit))
 		return;
 
-	atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
+	atomic_long_add(PERCPU_COUNT_BIAS, &ref->data->count);
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired
@@ -223,6 +257,8 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 static void __percpu_ref_switch_mode(struct percpu_ref *ref,
 				     percpu_ref_func_t *confirm_switch)
 {
+	struct percpu_ref_data *data = ref->data;
+
 	lockdep_assert_held(&percpu_ref_switch_lock);
 
 	/*
@@ -230,10 +266,10 @@ static void __percpu_ref_switch_mode(struct percpu_ref *ref,
 	 * its completion.  If the caller ensures that ATOMIC switching
 	 * isn't in progress, this function can be called from any context.
 	 */
-	wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch,
+	wait_event_lock_irq(percpu_ref_switch_waitq, !data->confirm_switch,
 			    percpu_ref_switch_lock);
 
-	if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD))
+	if (data->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD))
 		__percpu_ref_switch_to_atomic(ref, confirm_switch);
 	else
 		__percpu_ref_switch_to_percpu(ref);
@@ -266,7 +302,7 @@ void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 
 	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 
-	ref->force_atomic = true;
+	ref->data->force_atomic = true;
 	__percpu_ref_switch_mode(ref, confirm_switch);
 
 	spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
@@ -284,7 +320,7 @@ EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic);
 void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref)
 {
 	percpu_ref_switch_to_atomic(ref, NULL);
-	wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
+	wait_event(percpu_ref_switch_waitq, !ref->data->confirm_switch);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic_sync);
 
@@ -312,7 +348,7 @@ void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 
 	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 
-	ref->force_atomic = false;
+	ref->data->force_atomic = false;
 	__percpu_ref_switch_mode(ref, NULL);
 
 	spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
@@ -344,7 +380,8 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 
 	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
-		  "%s called more than once on %ps!", __func__, ref->release);
+		  "%s called more than once on %ps!", __func__,
+		  ref->data->release);
 
 	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
 	__percpu_ref_switch_mode(ref, confirm_kill);
@@ -355,6 +392,34 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
 
 /**
+ * percpu_ref_is_zero - test whether a percpu refcount reached zero
+ * @ref: percpu_ref to test
+ *
+ * Returns %true if @ref reached zero.
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+bool percpu_ref_is_zero(struct percpu_ref *ref)
+{
+	unsigned long __percpu *percpu_count;
+	unsigned long count, flags;
+
+	if (__ref_is_percpu(ref, &percpu_count))
+		return false;
+
+	/* protect us from being destroyed */
+	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
+	if (ref->data)
+		count = atomic_long_read(&ref->data->count);
+	else
+		count = ref->percpu_count_ptr >> __PERCPU_REF_FLAG_BITS;
+	spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
+
+	return count == 0;
+}
+EXPORT_SYMBOL_GPL(percpu_ref_is_zero);
+
+/**
  * percpu_ref_reinit - re-initialize a percpu refcount
  * @ref: perpcu_ref to re-initialize
  *
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index a66595ba5543..00f666d94486 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -17,7 +17,7 @@ static DEFINE_SPINLOCK(percpu_counters_lock);
 
 #ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
 
-static struct debug_obj_descr percpu_counter_debug_descr;
+static const struct debug_obj_descr percpu_counter_debug_descr;
 
 static bool percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
 {
@@ -33,7 +33,7 @@ static bool percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
 	}
 }
 
-static struct debug_obj_descr percpu_counter_debug_descr = {
+static const struct debug_obj_descr percpu_counter_debug_descr = {
 	.name		= "percpu_counter",
 	.fixup_free	= percpu_counter_fixup_free,
 };
@@ -85,7 +85,7 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 
 	preempt_disable();
 	count = __this_cpu_read(*fbc->counters) + amount;
-	if (count >= batch || count <= -batch) {
+	if (abs(count) >= batch) {
 		unsigned long flags;
 		raw_spin_lock_irqsave(&fbc->lock, flags);
 		fbc->count += count;
@@ -99,6 +99,25 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 EXPORT_SYMBOL(percpu_counter_add_batch);
 
 /*
+ * For percpu_counter with a big batch, the devication of its count could
+ * be big, and there is requirement to reduce the deviation, like when the
+ * counter's batch could be runtime decreased to get a better accuracy,
+ * which can be achieved by running this sync function on each CPU.
+ */
+void percpu_counter_sync(struct percpu_counter *fbc)
+{
+	unsigned long flags;
+	s64 count;
+
+	raw_spin_lock_irqsave(&fbc->lock, flags);
+	count = __this_cpu_read(*fbc->counters);
+	fbc->count += count;
+	__this_cpu_sub(*fbc->counters, count);
+	raw_spin_unlock_irqrestore(&fbc->lock, flags);
+}
+EXPORT_SYMBOL(percpu_counter_sync);
+
+/*
  * Add up all the per-cpu counts, return the result.  This is a more accurate
  * but much slower version of percpu_counter_read_positive()
  */
diff --git a/lib/pldmfw/Makefile b/lib/pldmfw/Makefile
new file mode 100644
index 000000000000..99ad10711abe
--- /dev/null
+++ b/lib/pldmfw/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_PLDMFW)	+= pldmfw.o
diff --git a/lib/pldmfw/pldmfw.c b/lib/pldmfw/pldmfw.c
new file mode 100644
index 000000000000..e5d4b3b2af81
--- /dev/null
+++ b/lib/pldmfw/pldmfw.c
@@ -0,0 +1,879 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2019, Intel Corporation. */
+
+#include <asm/unaligned.h>
+#include <linux/crc32.h>
+#include <linux/device.h>
+#include <linux/firmware.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pldmfw.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+
+#include "pldmfw_private.h"
+
+/* Internal structure used to store details about the PLDM image file as it is
+ * being validated and processed.
+ */
+struct pldmfw_priv {
+	struct pldmfw *context;
+	const struct firmware *fw;
+
+	/* current offset of firmware image */
+	size_t offset;
+
+	struct list_head records;
+	struct list_head components;
+
+	/* PLDM Firmware Package Header */
+	const struct __pldm_header *header;
+	u16 total_header_size;
+
+	/* length of the component bitmap */
+	u16 component_bitmap_len;
+	u16 bitmap_size;
+
+	/* Start of the component image information */
+	u16 component_count;
+	const u8 *component_start;
+
+	/* Start pf the firmware device id records */
+	const u8 *record_start;
+	u8 record_count;
+
+	/* The CRC at the end of the package header */
+	u32 header_crc;
+
+	struct pldmfw_record *matching_record;
+};
+
+/**
+ * pldm_check_fw_space - Verify that the firmware image has space left
+ * @data: pointer to private data
+ * @offset: offset to start from
+ * @length: length to check for
+ *
+ * Verify that the firmware data can hold a chunk of bytes with the specified
+ * offset and length.
+ *
+ * Returns: zero on success, or -EFAULT if the image does not have enough
+ * space left to fit the expected length.
+ */
+static int
+pldm_check_fw_space(struct pldmfw_priv *data, size_t offset, size_t length)
+{
+	size_t expected_size = offset + length;
+	struct device *dev = data->context->dev;
+
+	if (data->fw->size < expected_size) {
+		dev_dbg(dev, "Firmware file size smaller than expected. Got %zu bytes, needed %zu bytes\n",
+			data->fw->size, expected_size);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * pldm_move_fw_offset - Move the current firmware offset forward
+ * @data: pointer to private data
+ * @bytes_to_move: number of bytes to move the offset forward by
+ *
+ * Check that there is enough space past the current offset, and then move the
+ * offset forward by this ammount.
+ *
+ * Returns: zero on success, or -EFAULT if the image is too small to fit the
+ * expected length.
+ */
+static int
+pldm_move_fw_offset(struct pldmfw_priv *data, size_t bytes_to_move)
+{
+	int err;
+
+	err = pldm_check_fw_space(data, data->offset, bytes_to_move);
+	if (err)
+		return err;
+
+	data->offset += bytes_to_move;
+
+	return 0;
+}
+
+/**
+ * pldm_parse_header - Validate and extract details about the PLDM header
+ * @data: pointer to private data
+ *
+ * Performs initial basic verification of the PLDM image, up to the first
+ * firmware record.
+ *
+ * This includes the following checks and extractions
+ *
+ *   * Verify that the UUID at the start of the header matches the expected
+ *     value as defined in the DSP0267 PLDM specification
+ *   * Check that the revision is 0x01
+ *   * Extract the total header_size and verify that the image is large enough
+ *     to contain at least the length of this header
+ *   * Extract the size of the component bitmap length
+ *   * Extract a pointer to the start of the record area
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int pldm_parse_header(struct pldmfw_priv *data)
+{
+	const struct __pldmfw_record_area *record_area;
+	struct device *dev = data->context->dev;
+	const struct __pldm_header *header;
+	size_t header_size;
+	int err;
+
+	err = pldm_move_fw_offset(data, sizeof(*header));
+	if (err)
+		return err;
+
+	header = (const struct __pldm_header *)data->fw->data;
+	data->header = header;
+
+	if (!uuid_equal(&header->id, &pldm_firmware_header_id)) {
+		dev_dbg(dev, "Invalid package header identifier. Expected UUID %pUB, but got %pUB\n",
+			&pldm_firmware_header_id, &header->id);
+		return -EINVAL;
+	}
+
+	if (header->revision != PACKAGE_HEADER_FORMAT_REVISION) {
+		dev_dbg(dev, "Invalid package header revision. Expected revision %u but got %u\n",
+			PACKAGE_HEADER_FORMAT_REVISION, header->revision);
+		return -EOPNOTSUPP;
+	}
+
+	data->total_header_size = get_unaligned_le16(&header->size);
+	header_size = data->total_header_size - sizeof(*header);
+
+	err = pldm_check_fw_space(data, data->offset, header_size);
+	if (err)
+		return err;
+
+	data->component_bitmap_len =
+		get_unaligned_le16(&header->component_bitmap_len);
+
+	if (data->component_bitmap_len % 8 != 0) {
+		dev_dbg(dev, "Invalid component bitmap length. The length is %u, which is not a multiple of 8\n",
+			data->component_bitmap_len);
+		return -EINVAL;
+	}
+
+	data->bitmap_size = data->component_bitmap_len / 8;
+
+	err = pldm_move_fw_offset(data, header->version_len);
+	if (err)
+		return err;
+
+	/* extract a pointer to the record area, which just follows the main
+	 * PLDM header data.
+	 */
+	record_area = (const struct __pldmfw_record_area *)(data->fw->data +
+							 data->offset);
+
+	err = pldm_move_fw_offset(data, sizeof(*record_area));
+	if (err)
+		return err;
+
+	data->record_count = record_area->record_count;
+	data->record_start = record_area->records;
+
+	return 0;
+}
+
+/**
+ * pldm_check_desc_tlv_len - Check that the length matches expectation
+ * @data: pointer to image details
+ * @type: the descriptor type
+ * @size: the length from the descriptor header
+ *
+ * If the descriptor type is one of the documented descriptor types according
+ * to the standard, verify that the provided length matches.
+ *
+ * If the type is not recognized or is VENDOR_DEFINED, return zero.
+ *
+ * Returns: zero on success, or -EINVAL if the specified size of a standard
+ * TLV does not match the expected value defined for that TLV.
+ */
+static int
+pldm_check_desc_tlv_len(struct pldmfw_priv *data, u16 type, u16 size)
+{
+	struct device *dev = data->context->dev;
+	u16 expected_size;
+
+	switch (type) {
+	case PLDM_DESC_ID_PCI_VENDOR_ID:
+	case PLDM_DESC_ID_PCI_DEVICE_ID:
+	case PLDM_DESC_ID_PCI_SUBVENDOR_ID:
+	case PLDM_DESC_ID_PCI_SUBDEV_ID:
+		expected_size = 2;
+		break;
+	case PLDM_DESC_ID_PCI_REVISION_ID:
+		expected_size = 1;
+		break;
+	case PLDM_DESC_ID_PNP_VENDOR_ID:
+		expected_size = 3;
+		break;
+	case PLDM_DESC_ID_IANA_ENTERPRISE_ID:
+	case PLDM_DESC_ID_ACPI_VENDOR_ID:
+	case PLDM_DESC_ID_PNP_PRODUCT_ID:
+	case PLDM_DESC_ID_ACPI_PRODUCT_ID:
+		expected_size = 4;
+		break;
+	case PLDM_DESC_ID_UUID:
+		expected_size = 16;
+		break;
+	case PLDM_DESC_ID_VENDOR_DEFINED:
+		return 0;
+	default:
+		/* Do not report an error on an unexpected TLV */
+		dev_dbg(dev, "Found unrecognized TLV type 0x%04x\n", type);
+		return 0;
+	}
+
+	if (size != expected_size) {
+		dev_dbg(dev, "Found TLV type 0x%04x with unexpected length. Got %u bytes, but expected %u bytes\n",
+			type, size, expected_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * pldm_parse_desc_tlvs - Check and skip past a number of TLVs
+ * @data: pointer to private data
+ * @record: pointer to the record this TLV belongs too
+ * @desc_count: descriptor count
+ *
+ * From the current offset, read and extract the descriptor TLVs, updating the
+ * current offset each time.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+pldm_parse_desc_tlvs(struct pldmfw_priv *data, struct pldmfw_record *record, u8 desc_count)
+{
+	const struct __pldmfw_desc_tlv *__desc;
+	const u8 *desc_start;
+	u8 i;
+
+	desc_start = data->fw->data + data->offset;
+
+	pldm_for_each_desc_tlv(i, __desc, desc_start, desc_count) {
+		struct pldmfw_desc_tlv *desc;
+		int err;
+		u16 type, size;
+
+		err = pldm_move_fw_offset(data, sizeof(*__desc));
+		if (err)
+			return err;
+
+		type = get_unaligned_le16(&__desc->type);
+
+		/* According to DSP0267, this only includes the data field */
+		size = get_unaligned_le16(&__desc->size);
+
+		err = pldm_check_desc_tlv_len(data, type, size);
+		if (err)
+			return err;
+
+		/* check that we have space and move the offset forward */
+		err = pldm_move_fw_offset(data, size);
+		if (err)
+			return err;
+
+		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+		if (!desc)
+			return -ENOMEM;
+
+		desc->type = type;
+		desc->size = size;
+		desc->data = __desc->data;
+
+		list_add_tail(&desc->entry, &record->descs);
+	}
+
+	return 0;
+}
+
+/**
+ * pldm_parse_one_record - Verify size of one PLDM record
+ * @data: pointer to image details
+ * @__record: pointer to the record to check
+ *
+ * This function checks that the record size does not exceed either the size
+ * of the firmware file or the total length specified in the header section.
+ *
+ * It also verifies that the recorded length of the start of the record
+ * matches the size calculated by adding the static structure length, the
+ * component bitmap length, the version string length, the length of all
+ * descriptor TLVs, and the length of the package data.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+pldm_parse_one_record(struct pldmfw_priv *data,
+		      const struct __pldmfw_record_info *__record)
+{
+	struct pldmfw_record *record;
+	size_t measured_length;
+	int err;
+	const u8 *bitmap_ptr;
+	u16 record_len;
+	int i;
+
+	/* Make a copy and insert it into the record list */
+	record = kzalloc(sizeof(*record), GFP_KERNEL);
+	if (!record)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&record->descs);
+	list_add_tail(&record->entry, &data->records);
+
+	/* Then check that we have space and move the offset */
+	err = pldm_move_fw_offset(data, sizeof(*__record));
+	if (err)
+		return err;
+
+	record_len = get_unaligned_le16(&__record->record_len);
+	record->package_data_len = get_unaligned_le16(&__record->package_data_len);
+	record->version_len = __record->version_len;
+	record->version_type = __record->version_type;
+
+	bitmap_ptr = data->fw->data + data->offset;
+
+	/* check that we have space for the component bitmap length */
+	err = pldm_move_fw_offset(data, data->bitmap_size);
+	if (err)
+		return err;
+
+	record->component_bitmap_len = data->component_bitmap_len;
+	record->component_bitmap = bitmap_zalloc(record->component_bitmap_len,
+						 GFP_KERNEL);
+	if (!record->component_bitmap)
+		return -ENOMEM;
+
+	for (i = 0; i < data->bitmap_size; i++)
+		bitmap_set_value8(record->component_bitmap, bitmap_ptr[i], i * 8);
+
+	record->version_string = data->fw->data + data->offset;
+
+	err = pldm_move_fw_offset(data, record->version_len);
+	if (err)
+		return err;
+
+	/* Scan through the descriptor TLVs and find the end */
+	err = pldm_parse_desc_tlvs(data, record, __record->descriptor_count);
+	if (err)
+		return err;
+
+	record->package_data = data->fw->data + data->offset;
+
+	err = pldm_move_fw_offset(data, record->package_data_len);
+	if (err)
+		return err;
+
+	measured_length = data->offset - ((const u8 *)__record - data->fw->data);
+	if (measured_length != record_len) {
+		dev_dbg(data->context->dev, "Unexpected record length. Measured record length is %zu bytes, expected length is %u bytes\n",
+			measured_length, record_len);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * pldm_parse_records - Locate the start of the component area
+ * @data: pointer to private data
+ *
+ * Extract the record count, and loop through each record, searching for the
+ * component area.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int pldm_parse_records(struct pldmfw_priv *data)
+{
+	const struct __pldmfw_component_area *component_area;
+	const struct __pldmfw_record_info *record;
+	int err;
+	u8 i;
+
+	pldm_for_each_record(i, record, data->record_start, data->record_count) {
+		err = pldm_parse_one_record(data, record);
+		if (err)
+			return err;
+	}
+
+	/* Extract a pointer to the component area, which just follows the
+	 * PLDM device record data.
+	 */
+	component_area = (const struct __pldmfw_component_area *)(data->fw->data + data->offset);
+
+	err = pldm_move_fw_offset(data, sizeof(*component_area));
+	if (err)
+		return err;
+
+	data->component_count =
+		get_unaligned_le16(&component_area->component_image_count);
+	data->component_start = component_area->components;
+
+	return 0;
+}
+
+/**
+ * pldm_parse_components - Locate the CRC header checksum
+ * @data: pointer to private data
+ *
+ * Extract the component count, and find the pointer to the component area.
+ * Scan through each component searching for the end, which should point to
+ * the package header checksum.
+ *
+ * Extract the package header CRC and save it for verification.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int pldm_parse_components(struct pldmfw_priv *data)
+{
+	const struct __pldmfw_component_info *__component;
+	struct device *dev = data->context->dev;
+	const u8 *header_crc_ptr;
+	int err;
+	u8 i;
+
+	pldm_for_each_component(i, __component, data->component_start, data->component_count) {
+		struct pldmfw_component *component;
+		u32 offset, size;
+
+		err = pldm_move_fw_offset(data, sizeof(*__component));
+		if (err)
+			return err;
+
+		err = pldm_move_fw_offset(data, __component->version_len);
+		if (err)
+			return err;
+
+		offset = get_unaligned_le32(&__component->location_offset);
+		size = get_unaligned_le32(&__component->size);
+
+		err = pldm_check_fw_space(data, offset, size);
+		if (err)
+			return err;
+
+		component = kzalloc(sizeof(*component), GFP_KERNEL);
+		if (!component)
+			return -ENOMEM;
+
+		component->index = i;
+		component->classification = get_unaligned_le16(&__component->classification);
+		component->identifier = get_unaligned_le16(&__component->identifier);
+		component->comparison_stamp = get_unaligned_le32(&__component->comparison_stamp);
+		component->options = get_unaligned_le16(&__component->options);
+		component->activation_method = get_unaligned_le16(&__component->activation_method);
+		component->version_type = __component->version_type;
+		component->version_len = __component->version_len;
+		component->version_string = __component->version_string;
+		component->component_data = data->fw->data + offset;
+		component->component_size = size;
+
+		list_add_tail(&component->entry, &data->components);
+	}
+
+	header_crc_ptr = data->fw->data + data->offset;
+
+	err = pldm_move_fw_offset(data, sizeof(data->header_crc));
+	if (err)
+		return err;
+
+	/* Make sure that we reached the expected offset */
+	if (data->offset != data->total_header_size) {
+		dev_dbg(dev, "Invalid firmware header size. Expected %u but got %zu\n",
+			data->total_header_size, data->offset);
+		return -EFAULT;
+	}
+
+	data->header_crc = get_unaligned_le32(header_crc_ptr);
+
+	return 0;
+}
+
+/**
+ * pldm_verify_header_crc - Verify that the CRC in the header matches
+ * @data: pointer to private data
+ *
+ * Calculates the 32-bit CRC using the standard IEEE 802.3 CRC polynomial and
+ * compares it to the value stored in the header.
+ *
+ * Returns: zero on success if the CRC matches, or -EBADMSG on an invalid CRC.
+ */
+static int pldm_verify_header_crc(struct pldmfw_priv *data)
+{
+	struct device *dev = data->context->dev;
+	u32 calculated_crc;
+	size_t length;
+
+	/* Calculate the 32-bit CRC of the header header contents up to but
+	 * not including the checksum. Note that the Linux crc32_le function
+	 * does not perform an expected final XOR.
+	 */
+	length = data->offset - sizeof(data->header_crc);
+	calculated_crc = crc32_le(~0, data->fw->data, length) ^ ~0;
+
+	if (calculated_crc != data->header_crc) {
+		dev_dbg(dev, "Invalid CRC in firmware header. Got 0x%08x but expected 0x%08x\n",
+			calculated_crc, data->header_crc);
+		return -EBADMSG;
+	}
+
+	return 0;
+}
+
+/**
+ * pldmfw_free_priv - Free memory allocated while parsing the PLDM image
+ * @data: pointer to the PLDM data structure
+ *
+ * Loops through and clears all allocated memory associated with each
+ * allocated descriptor, record, and component.
+ */
+static void pldmfw_free_priv(struct pldmfw_priv *data)
+{
+	struct pldmfw_component *component, *c_safe;
+	struct pldmfw_record *record, *r_safe;
+	struct pldmfw_desc_tlv *desc, *d_safe;
+
+	list_for_each_entry_safe(component, c_safe, &data->components, entry) {
+		list_del(&component->entry);
+		kfree(component);
+	}
+
+	list_for_each_entry_safe(record, r_safe, &data->records, entry) {
+		list_for_each_entry_safe(desc, d_safe, &record->descs, entry) {
+			list_del(&desc->entry);
+			kfree(desc);
+		}
+
+		if (record->component_bitmap) {
+			bitmap_free(record->component_bitmap);
+			record->component_bitmap = NULL;
+		}
+
+		list_del(&record->entry);
+		kfree(record);
+	}
+}
+
+/**
+ * pldm_parse_image - parse and extract details from PLDM image
+ * @data: pointer to private data
+ *
+ * Verify that the firmware file contains valid data for a PLDM firmware
+ * file. Extract useful pointers and data from the firmware file and store
+ * them in the data structure.
+ *
+ * The PLDM firmware file format is defined in DMTF DSP0267 1.0.0. Care
+ * should be taken to use get_unaligned_le* when accessing data from the
+ * pointers in data.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int pldm_parse_image(struct pldmfw_priv *data)
+{
+	int err;
+
+	if (WARN_ON(!(data->context->dev && data->fw->data && data->fw->size)))
+		return -EINVAL;
+
+	err = pldm_parse_header(data);
+	if (err)
+		return err;
+
+	err = pldm_parse_records(data);
+	if (err)
+		return err;
+
+	err = pldm_parse_components(data);
+	if (err)
+		return err;
+
+	return pldm_verify_header_crc(data);
+}
+
+/* these are u32 so that we can store PCI_ANY_ID */
+struct pldm_pci_record_id {
+	int vendor;
+	int device;
+	int subsystem_vendor;
+	int subsystem_device;
+};
+
+/**
+ * pldmfw_op_pci_match_record - Check if a PCI device matches the record
+ * @context: PLDM fw update structure
+ * @record: list of records extracted from the PLDM image
+ *
+ * Determine of the PCI device associated with this device matches the record
+ * data provided.
+ *
+ * Searches the descriptor TLVs and extracts the relevant descriptor data into
+ * a pldm_pci_record_id. This is then compared against the PCI device ID
+ * information.
+ *
+ * Returns: true if the device matches the record, false otherwise.
+ */
+bool pldmfw_op_pci_match_record(struct pldmfw *context, struct pldmfw_record *record)
+{
+	struct pci_dev *pdev = to_pci_dev(context->dev);
+	struct pldm_pci_record_id id = {
+		.vendor = PCI_ANY_ID,
+		.device = PCI_ANY_ID,
+		.subsystem_vendor = PCI_ANY_ID,
+		.subsystem_device = PCI_ANY_ID,
+	};
+	struct pldmfw_desc_tlv *desc;
+
+	list_for_each_entry(desc, &record->descs, entry) {
+		u16 value;
+		int *ptr;
+
+		switch (desc->type) {
+		case PLDM_DESC_ID_PCI_VENDOR_ID:
+			ptr = &id.vendor;
+			break;
+		case PLDM_DESC_ID_PCI_DEVICE_ID:
+			ptr = &id.device;
+			break;
+		case PLDM_DESC_ID_PCI_SUBVENDOR_ID:
+			ptr = &id.subsystem_vendor;
+			break;
+		case PLDM_DESC_ID_PCI_SUBDEV_ID:
+			ptr = &id.subsystem_device;
+			break;
+		default:
+			/* Skip unrelated TLVs */
+			continue;
+		}
+
+		value = get_unaligned_le16(desc->data);
+		/* A value of zero for one of the descriptors is sometimes
+		 * used when the record should ignore this field when matching
+		 * device. For example if the record applies to any subsystem
+		 * device or vendor.
+		 */
+		if (value)
+			*ptr = (int)value;
+		else
+			*ptr = PCI_ANY_ID;
+	}
+
+	if ((id.vendor == PCI_ANY_ID || id.vendor == pdev->vendor) &&
+	    (id.device == PCI_ANY_ID || id.device == pdev->device) &&
+	    (id.subsystem_vendor == PCI_ANY_ID || id.subsystem_vendor == pdev->subsystem_vendor) &&
+	    (id.subsystem_device == PCI_ANY_ID || id.subsystem_device == pdev->subsystem_device))
+		return true;
+	else
+		return false;
+}
+EXPORT_SYMBOL(pldmfw_op_pci_match_record);
+
+/**
+ * pldm_find_matching_record - Find the first matching PLDM record
+ * @data: pointer to private data
+ *
+ * Search through PLDM records and find the first matching entry. It is
+ * expected that only one entry matches.
+ *
+ * Store a pointer to the matching record, if found.
+ *
+ * Returns: zero on success, or -ENOENT if no matching record is found.
+ */
+static int pldm_find_matching_record(struct pldmfw_priv *data)
+{
+	struct pldmfw_record *record;
+
+	list_for_each_entry(record, &data->records, entry) {
+		if (data->context->ops->match_record(data->context, record)) {
+			data->matching_record = record;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+/**
+ * pldm_send_package_data - Send firmware the package data for the record
+ * @data: pointer to private data
+ *
+ * Send the package data associated with the matching record to the firmware,
+ * using the send_pkg_data operation.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+pldm_send_package_data(struct pldmfw_priv *data)
+{
+	struct pldmfw_record *record = data->matching_record;
+	const struct pldmfw_ops *ops = data->context->ops;
+
+	return ops->send_package_data(data->context, record->package_data,
+				      record->package_data_len);
+}
+
+/**
+ * pldm_send_component_tables - Send component table information to firmware
+ * @data: pointer to private data
+ *
+ * Loop over each component, sending the applicable components to the firmware
+ * via the send_component_table operation.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+pldm_send_component_tables(struct pldmfw_priv *data)
+{
+	unsigned long *bitmap = data->matching_record->component_bitmap;
+	struct pldmfw_component *component;
+	int err;
+
+	list_for_each_entry(component, &data->components, entry) {
+		u8 index = component->index, transfer_flag = 0;
+
+		/* Skip components which are not intended for this device */
+		if (!test_bit(index, bitmap))
+			continue;
+
+		/* determine whether this is the start, middle, end, or both
+		 * the start and end of the component tables
+		 */
+		if (index == find_first_bit(bitmap, data->component_bitmap_len))
+			transfer_flag |= PLDM_TRANSFER_FLAG_START;
+		if (index == find_last_bit(bitmap, data->component_bitmap_len))
+			transfer_flag |= PLDM_TRANSFER_FLAG_END;
+		if (!transfer_flag)
+			transfer_flag = PLDM_TRANSFER_FLAG_MIDDLE;
+
+		err = data->context->ops->send_component_table(data->context,
+							       component,
+							       transfer_flag);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * pldm_flash_components - Program each component to device flash
+ * @data: pointer to private data
+ *
+ * Loop through each component that is active for the matching device record,
+ * and send it to the device driver for flashing.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int pldm_flash_components(struct pldmfw_priv *data)
+{
+	unsigned long *bitmap = data->matching_record->component_bitmap;
+	struct pldmfw_component *component;
+	int err;
+
+	list_for_each_entry(component, &data->components, entry) {
+		u8 index = component->index;
+
+		/* Skip components which are not intended for this device */
+		if (!test_bit(index, bitmap))
+			continue;
+
+		err = data->context->ops->flash_component(data->context, component);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * pldm_finalize_update - Finalize the device flash update
+ * @data: pointer to private data
+ *
+ * Tell the device driver to perform any remaining logic to complete the
+ * device update.
+ *
+ * Returns: zero on success, or a PLFM_FWU error indicating the reason for
+ * failure.
+ */
+static int pldm_finalize_update(struct pldmfw_priv *data)
+{
+	if (data->context->ops->finalize_update)
+		return data->context->ops->finalize_update(data->context);
+
+	return 0;
+}
+
+/**
+ * pldmfw_flash_image - Write a PLDM-formatted firmware image to the device
+ * @context: ops and data for firmware update
+ * @fw: firmware object pointing to the relevant firmware file to program
+ *
+ * Parse the data for a given firmware file, verifying that it is a valid PLDM
+ * formatted image that matches this device.
+ *
+ * Extract the device record Package Data and Component Tables and send them
+ * to the device firmware. Extract and write the flash data for each of the
+ * components indicated in the firmware file.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+int pldmfw_flash_image(struct pldmfw *context, const struct firmware *fw)
+{
+	struct pldmfw_priv *data;
+	int err;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&data->records);
+	INIT_LIST_HEAD(&data->components);
+
+	data->fw = fw;
+	data->context = context;
+
+	err = pldm_parse_image(data);
+	if (err)
+		goto out_release_data;
+
+	err = pldm_find_matching_record(data);
+	if (err)
+		goto out_release_data;
+
+	err = pldm_send_package_data(data);
+	if (err)
+		goto out_release_data;
+
+	err = pldm_send_component_tables(data);
+	if (err)
+		goto out_release_data;
+
+	err = pldm_flash_components(data);
+	if (err)
+		goto out_release_data;
+
+	err = pldm_finalize_update(data);
+
+out_release_data:
+	pldmfw_free_priv(data);
+	kfree(data);
+
+	return err;
+}
+EXPORT_SYMBOL(pldmfw_flash_image);
+
+MODULE_AUTHOR("Jacob Keller <jacob.e.keller@intel.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("PLDM firmware flash update library");
diff --git a/lib/pldmfw/pldmfw_private.h b/lib/pldmfw/pldmfw_private.h
new file mode 100644
index 000000000000..687ef2200692
--- /dev/null
+++ b/lib/pldmfw/pldmfw_private.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2019, Intel Corporation. */
+
+#ifndef _PLDMFW_PRIVATE_H_
+#define _PLDMFW_PRIVATE_H_
+
+/* The following data structures define the layout of a firmware binary
+ * following the "PLDM For Firmware Update Specification", DMTF standard
+ * #DSP0267.
+ *
+ * pldmfw.c uses these structures to implement a simple engine that will parse
+ * a fw binary file in this format and perform a firmware update for a given
+ * device.
+ *
+ * Due to the variable sized data layout, alignment of fields within these
+ * structures is not guaranteed when reading. For this reason, all multi-byte
+ * field accesses should be done using the unaligned access macros.
+ * Additionally, the standard specifies that multi-byte fields are in
+ * LittleEndian format.
+ *
+ * The structure definitions are not made public, in order to keep direct
+ * accesses within code that is prepared to deal with the limitation of
+ * unaligned access.
+ */
+
+/* UUID for PLDM firmware packages: f018878c-cb7d-4943-9800-a02f059aca02 */
+static const uuid_t pldm_firmware_header_id =
+	UUID_INIT(0xf018878c, 0xcb7d, 0x4943,
+		  0x98, 0x00, 0xa0, 0x2f, 0x05, 0x9a, 0xca, 0x02);
+
+/* Revision number of the PLDM header format this code supports */
+#define PACKAGE_HEADER_FORMAT_REVISION 0x01
+
+/* timestamp104 structure defined in PLDM Base specification */
+#define PLDM_TIMESTAMP_SIZE 13
+struct __pldm_timestamp {
+	u8 b[PLDM_TIMESTAMP_SIZE];
+} __packed __aligned(1);
+
+/* Package Header Information */
+struct __pldm_header {
+	uuid_t id;			    /* PackageHeaderIdentifier */
+	u8 revision;			    /* PackageHeaderFormatRevision */
+	__le16 size;			    /* PackageHeaderSize */
+	struct __pldm_timestamp release_date; /* PackageReleaseDateTime */
+	__le16 component_bitmap_len;	    /* ComponentBitmapBitLength */
+	u8 version_type;		    /* PackageVersionStringType */
+	u8 version_len;			    /* PackageVersionStringLength */
+
+	/*
+	 * DSP0267 also includes the following variable length fields at the
+	 * end of this structure:
+	 *
+	 * PackageVersionString, length is version_len.
+	 *
+	 * The total size of this section is
+	 *   sizeof(pldm_header) + version_len;
+	 */
+	u8 version_string[];		/* PackageVersionString */
+} __packed __aligned(1);
+
+/* Firmware Device ID Record */
+struct __pldmfw_record_info {
+	__le16 record_len;		/* RecordLength */
+	u8 descriptor_count;		/* DescriptorCount */
+	__le32 device_update_flags;	/* DeviceUpdateOptionFlags */
+	u8 version_type;		/* ComponentImageSetVersionType */
+	u8 version_len;			/* ComponentImageSetVersionLength */
+	__le16 package_data_len;	/* FirmwareDevicePackageDataLength */
+
+	/*
+	 * DSP0267 also includes the following variable length fields at the
+	 * end of this structure:
+	 *
+	 * ApplicableComponents, length is component_bitmap_len from header
+	 * ComponentImageSetVersionString, length is version_len
+	 * RecordDescriptors, a series of TLVs with 16bit type and length
+	 * FirmwareDevicePackageData, length is package_data_len
+	 *
+	 * The total size of each record is
+	 *   sizeof(pldmfw_record_info) +
+	 *   component_bitmap_len (converted to bytes!) +
+	 *   version_len +
+	 *   <length of RecordDescriptors> +
+	 *   package_data_len
+	 */
+	u8 variable_record_data[];
+} __packed __aligned(1);
+
+/* Firmware Descriptor Definition */
+struct __pldmfw_desc_tlv {
+	__le16 type;			/* DescriptorType */
+	__le16 size;			/* DescriptorSize */
+	u8 data[];			/* DescriptorData */
+} __aligned(1);
+
+/* Firmware Device Identification Area */
+struct __pldmfw_record_area {
+	u8 record_count;		/* DeviceIDRecordCount */
+	/* This is not a struct type because the size of each record varies */
+	u8 records[];
+} __aligned(1);
+
+/* Individual Component Image Information */
+struct __pldmfw_component_info {
+	__le16 classification;		/* ComponentClassfication */
+	__le16 identifier;		/* ComponentIdentifier */
+	__le32 comparison_stamp;	/* ComponentComparisonStamp */
+	__le16 options;			/* componentOptions */
+	__le16 activation_method;	/* RequestedComponentActivationMethod */
+	__le32 location_offset;		/* ComponentLocationOffset */
+	__le32 size;			/* ComponentSize */
+	u8 version_type;		/* ComponentVersionStringType */
+	u8 version_len;		/* ComponentVersionStringLength */
+
+	/*
+	 * DSP0267 also includes the following variable length fields at the
+	 * end of this structure:
+	 *
+	 * ComponentVersionString, length is version_len
+	 *
+	 * The total size of this section is
+	 *   sizeof(pldmfw_component_info) + version_len;
+	 */
+	u8 version_string[];		/* ComponentVersionString */
+} __packed __aligned(1);
+
+/* Component Image Information Area */
+struct __pldmfw_component_area {
+	__le16 component_image_count;
+	/* This is not a struct type because the component size varies */
+	u8 components[];
+} __aligned(1);
+
+/**
+ * pldm_first_desc_tlv
+ * @start: byte offset of the start of the descriptor TLVs
+ *
+ * Converts the starting offset of the descriptor TLVs into a pointer to the
+ * first descriptor.
+ */
+#define pldm_first_desc_tlv(start)					\
+	((const struct __pldmfw_desc_tlv *)(start))
+
+/**
+ * pldm_next_desc_tlv
+ * @desc: pointer to a descriptor TLV
+ *
+ * Finds the pointer to the next descriptor following a given descriptor
+ */
+#define pldm_next_desc_tlv(desc)						\
+	((const struct __pldmfw_desc_tlv *)((desc)->data +			\
+					     get_unaligned_le16(&(desc)->size)))
+
+/**
+ * pldm_for_each_desc_tlv
+ * @i: variable to store descriptor index
+ * @desc: variable to store descriptor pointer
+ * @start: byte offset of the start of the descriptors
+ * @count: the number of descriptors
+ *
+ * for loop macro to iterate over all of the descriptors of a given PLDM
+ * record.
+ */
+#define pldm_for_each_desc_tlv(i, desc, start, count)			\
+	for ((i) = 0, (desc) = pldm_first_desc_tlv(start);		\
+	     (i) < (count);						\
+	     (i)++, (desc) = pldm_next_desc_tlv(desc))
+
+/**
+ * pldm_first_record
+ * @start: byte offset of the start of the PLDM records
+ *
+ * Converts a starting offset of the PLDM records into a pointer to the first
+ * record.
+ */
+#define pldm_first_record(start)					\
+	((const struct __pldmfw_record_info *)(start))
+
+/**
+ * pldm_next_record
+ * @record: pointer to a PLDM record
+ *
+ * Finds a pointer to the next record following a given record
+ */
+#define pldm_next_record(record)					\
+	((const struct __pldmfw_record_info *)				\
+	 ((const u8 *)(record) + get_unaligned_le16(&(record)->record_len)))
+
+/**
+ * pldm_for_each_record
+ * @i: variable to store record index
+ * @record: variable to store record pointer
+ * @start: byte offset of the start of the records
+ * @count: the number of records
+ *
+ * for loop macro to iterate over all of the records of a PLDM file.
+ */
+#define pldm_for_each_record(i, record, start, count)			\
+	for ((i) = 0, (record) = pldm_first_record(start);		\
+	     (i) < (count);						\
+	     (i)++, (record) = pldm_next_record(record))
+
+/**
+ * pldm_first_component
+ * @start: byte offset of the start of the PLDM components
+ *
+ * Convert a starting offset of the PLDM components into a pointer to the
+ * first component
+ */
+#define pldm_first_component(start)					\
+	((const struct __pldmfw_component_info *)(start))
+
+/**
+ * pldm_next_component
+ * @component: pointer to a PLDM component
+ *
+ * Finds a pointer to the next component following a given component
+ */
+#define pldm_next_component(component)						\
+	((const struct __pldmfw_component_info *)((component)->version_string +	\
+						  (component)->version_len))
+
+/**
+ * pldm_for_each_component
+ * @i: variable to store component index
+ * @component: variable to store component pointer
+ * @start: byte offset to the start of the first component
+ * @count: the number of components
+ *
+ * for loop macro to iterate over all of the components of a PLDM file.
+ */
+#define pldm_for_each_component(i, component, start, count)		\
+	for ((i) = 0, (component) = pldm_first_component(start);	\
+	     (i) < (count);						\
+	     (i)++, (component) = pldm_next_component(component))
+
+#endif
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 34e406fe561f..3a4da11b804d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -20,7 +20,6 @@
 #include <linux/kernel.h>
 #include <linux/kmemleak.h>
 #include <linux/percpu.h>
-#include <linux/local_lock.h>
 #include <linux/preempt.h>		/* in_interrupt() */
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
@@ -325,7 +324,7 @@ static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
 	int ret = -ENOMEM;
 
 	/*
-	 * Nodes preloaded by one cgroup can be be used by another cgroup, so
+	 * Nodes preloaded by one cgroup can be used by another cgroup, so
 	 * they should never be accounted to any particular memory cgroup.
 	 */
 	gfp_mask &= ~__GFP_ACCOUNT;
@@ -1029,7 +1028,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 {
 	struct radix_tree_node *node, *parent;
 	unsigned long maxindex;
-	int uninitialized_var(offset);
+	int offset;
 
 	radix_tree_load_root(root, &node, &maxindex);
 	if (index > maxindex)
diff --git a/lib/random32.c b/lib/random32.c
index 763b920a6206..4d0e05e471d7 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -38,17 +38,9 @@
 #include <linux/jiffies.h>
 #include <linux/random.h>
 #include <linux/sched.h>
+#include <linux/bitops.h>
 #include <asm/unaligned.h>
-
-#ifdef CONFIG_RANDOM32_SELFTEST
-static void __init prandom_state_selftest(void);
-#else
-static inline void prandom_state_selftest(void)
-{
-}
-#endif
-
-static DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy;
+#include <trace/events/random.h>
 
 /**
  *	prandom_u32_state - seeded pseudo-random number generator.
@@ -70,25 +62,6 @@ u32 prandom_u32_state(struct rnd_state *state)
 EXPORT_SYMBOL(prandom_u32_state);
 
 /**
- *	prandom_u32 - pseudo random number generator
- *
- *	A 32 bit pseudo-random number is generated using a fast
- *	algorithm suitable for simulation. This algorithm is NOT
- *	considered safe for cryptographic use.
- */
-u32 prandom_u32(void)
-{
-	struct rnd_state *state = &get_cpu_var(net_rand_state);
-	u32 res;
-
-	res = prandom_u32_state(state);
-	put_cpu_var(net_rand_state);
-
-	return res;
-}
-EXPORT_SYMBOL(prandom_u32);
-
-/**
  *	prandom_bytes_state - get the requested number of pseudo-random bytes
  *
  *	@state: pointer to state structure holding seeded state.
@@ -119,20 +92,6 @@ void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes)
 }
 EXPORT_SYMBOL(prandom_bytes_state);
 
-/**
- *	prandom_bytes - get the requested number of pseudo-random bytes
- *	@buf: where to copy the pseudo-random bytes to
- *	@bytes: the requested number of bytes
- */
-void prandom_bytes(void *buf, size_t bytes)
-{
-	struct rnd_state *state = &get_cpu_var(net_rand_state);
-
-	prandom_bytes_state(state, buf, bytes);
-	put_cpu_var(net_rand_state);
-}
-EXPORT_SYMBOL(prandom_bytes);
-
 static void prandom_warmup(struct rnd_state *state)
 {
 	/* Calling RNG ten times to satisfy recurrence condition */
@@ -148,96 +107,6 @@ static void prandom_warmup(struct rnd_state *state)
 	prandom_u32_state(state);
 }
 
-static u32 __extract_hwseed(void)
-{
-	unsigned int val = 0;
-
-	(void)(arch_get_random_seed_int(&val) ||
-	       arch_get_random_int(&val));
-
-	return val;
-}
-
-static void prandom_seed_early(struct rnd_state *state, u32 seed,
-			       bool mix_with_hwseed)
-{
-#define LCG(x)	 ((x) * 69069U)	/* super-duper LCG */
-#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
-	state->s1 = __seed(HWSEED() ^ LCG(seed),        2U);
-	state->s2 = __seed(HWSEED() ^ LCG(state->s1),   8U);
-	state->s3 = __seed(HWSEED() ^ LCG(state->s2),  16U);
-	state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
-}
-
-/**
- *	prandom_seed - add entropy to pseudo random number generator
- *	@entropy: entropy value
- *
- *	Add some additional entropy to the prandom pool.
- */
-void prandom_seed(u32 entropy)
-{
-	int i;
-	/*
-	 * No locking on the CPUs, but then somewhat random results are, well,
-	 * expected.
-	 */
-	for_each_possible_cpu(i) {
-		struct rnd_state *state = &per_cpu(net_rand_state, i);
-
-		state->s1 = __seed(state->s1 ^ entropy, 2U);
-		prandom_warmup(state);
-	}
-}
-EXPORT_SYMBOL(prandom_seed);
-
-/*
- *	Generate some initially weak seeding values to allow
- *	to start the prandom_u32() engine.
- */
-static int __init prandom_init(void)
-{
-	int i;
-
-	prandom_state_selftest();
-
-	for_each_possible_cpu(i) {
-		struct rnd_state *state = &per_cpu(net_rand_state, i);
-		u32 weak_seed = (i + jiffies) ^ random_get_entropy();
-
-		prandom_seed_early(state, weak_seed, true);
-		prandom_warmup(state);
-	}
-
-	return 0;
-}
-core_initcall(prandom_init);
-
-static void __prandom_timer(struct timer_list *unused);
-
-static DEFINE_TIMER(seed_timer, __prandom_timer);
-
-static void __prandom_timer(struct timer_list *unused)
-{
-	u32 entropy;
-	unsigned long expires;
-
-	get_random_bytes(&entropy, sizeof(entropy));
-	prandom_seed(entropy);
-
-	/* reseed every ~60 seconds, in [40 .. 80) interval with slack */
-	expires = 40 + prandom_u32_max(40);
-	seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC);
-
-	add_timer(&seed_timer);
-}
-
-static void __init __prandom_start_seed_timer(void)
-{
-	seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
-	add_timer(&seed_timer);
-}
-
 void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
 {
 	int i;
@@ -257,51 +126,6 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
 }
 EXPORT_SYMBOL(prandom_seed_full_state);
 
-/*
- *	Generate better values after random number generator
- *	is fully initialized.
- */
-static void __prandom_reseed(bool late)
-{
-	unsigned long flags;
-	static bool latch = false;
-	static DEFINE_SPINLOCK(lock);
-
-	/* Asking for random bytes might result in bytes getting
-	 * moved into the nonblocking pool and thus marking it
-	 * as initialized. In this case we would double back into
-	 * this function and attempt to do a late reseed.
-	 * Ignore the pointless attempt to reseed again if we're
-	 * already waiting for bytes when the nonblocking pool
-	 * got initialized.
-	 */
-
-	/* only allow initial seeding (late == false) once */
-	if (!spin_trylock_irqsave(&lock, flags))
-		return;
-
-	if (latch && !late)
-		goto out;
-
-	latch = true;
-	prandom_seed_full_state(&net_rand_state);
-out:
-	spin_unlock_irqrestore(&lock, flags);
-}
-
-void prandom_reseed_late(void)
-{
-	__prandom_reseed(true);
-}
-
-static int __init prandom_reseed(void)
-{
-	__prandom_reseed(false);
-	__prandom_start_seed_timer();
-	return 0;
-}
-late_initcall(prandom_reseed);
-
 #ifdef CONFIG_RANDOM32_SELFTEST
 static struct prandom_test1 {
 	u32 seed;
@@ -421,7 +245,28 @@ static struct prandom_test2 {
 	{  407983964U, 921U,  728767059U },
 };
 
-static void __init prandom_state_selftest(void)
+static u32 __extract_hwseed(void)
+{
+	unsigned int val = 0;
+
+	(void)(arch_get_random_seed_int(&val) ||
+	       arch_get_random_int(&val));
+
+	return val;
+}
+
+static void prandom_seed_early(struct rnd_state *state, u32 seed,
+			       bool mix_with_hwseed)
+{
+#define LCG(x)	 ((x) * 69069U)	/* super-duper LCG */
+#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
+	state->s1 = __seed(HWSEED() ^ LCG(seed),        2U);
+	state->s2 = __seed(HWSEED() ^ LCG(state->s1),   8U);
+	state->s3 = __seed(HWSEED() ^ LCG(state->s2),  16U);
+	state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
+}
+
+static int __init prandom_state_selftest(void)
 {
 	int i, j, errors = 0, runs = 0;
 	bool error = false;
@@ -461,5 +306,327 @@ static void __init prandom_state_selftest(void)
 		pr_warn("prandom: %d/%d self tests failed\n", errors, runs);
 	else
 		pr_info("prandom: %d self tests passed\n", runs);
+	return 0;
+}
+core_initcall(prandom_state_selftest);
+#endif
+
+/*
+ * The prandom_u32() implementation is now completely separate from the
+ * prandom_state() functions, which are retained (for now) for compatibility.
+ *
+ * Because of (ab)use in the networking code for choosing random TCP/UDP port
+ * numbers, which open DoS possibilities if guessable, we want something
+ * stronger than a standard PRNG.  But the performance requirements of
+ * the network code do not allow robust crypto for this application.
+ *
+ * So this is a homebrew Junior Spaceman implementation, based on the
+ * lowest-latency trustworthy crypto primitive available, SipHash.
+ * (The authors of SipHash have not been consulted about this abuse of
+ * their work.)
+ *
+ * Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to
+ * one word of output.  This abbreviated version uses 2 rounds per word
+ * of output.
+ */
+
+struct siprand_state {
+	unsigned long v0;
+	unsigned long v1;
+	unsigned long v2;
+	unsigned long v3;
+};
+
+static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;
+DEFINE_PER_CPU(unsigned long, net_rand_noise);
+EXPORT_PER_CPU_SYMBOL(net_rand_noise);
+
+/*
+ * This is the core CPRNG function.  As "pseudorandom", this is not used
+ * for truly valuable things, just intended to be a PITA to guess.
+ * For maximum speed, we do just two SipHash rounds per word.  This is
+ * the same rate as 4 rounds per 64 bits that SipHash normally uses,
+ * so hopefully it's reasonably secure.
+ *
+ * There are two changes from the official SipHash finalization:
+ * - We omit some constants XORed with v2 in the SipHash spec as irrelevant;
+ *   they are there only to make the output rounds distinct from the input
+ *   rounds, and this application has no input rounds.
+ * - Rather than returning v0^v1^v2^v3, return v1+v3.
+ *   If you look at the SipHash round, the last operation on v3 is
+ *   "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time.
+ *   Likewise "v1 ^= v2".  (The rotate of v2 makes a difference, but
+ *   it still cancels out half of the bits in v2 for no benefit.)
+ *   Second, since the last combining operation was xor, continue the
+ *   pattern of alternating xor/add for a tiny bit of extra non-linearity.
+ */
+static inline u32 siprand_u32(struct siprand_state *s)
+{
+	unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3;
+	unsigned long n = raw_cpu_read(net_rand_noise);
+
+	v3 ^= n;
+	PRND_SIPROUND(v0, v1, v2, v3);
+	PRND_SIPROUND(v0, v1, v2, v3);
+	v0 ^= n;
+	s->v0 = v0;  s->v1 = v1;  s->v2 = v2;  s->v3 = v3;
+	return v1 + v3;
+}
+
+
+/**
+ *	prandom_u32 - pseudo random number generator
+ *
+ *	A 32 bit pseudo-random number is generated using a fast
+ *	algorithm suitable for simulation. This algorithm is NOT
+ *	considered safe for cryptographic use.
+ */
+u32 prandom_u32(void)
+{
+	struct siprand_state *state = get_cpu_ptr(&net_rand_state);
+	u32 res = siprand_u32(state);
+
+	trace_prandom_u32(res);
+	put_cpu_ptr(&net_rand_state);
+	return res;
+}
+EXPORT_SYMBOL(prandom_u32);
+
+/**
+ *	prandom_bytes - get the requested number of pseudo-random bytes
+ *	@buf: where to copy the pseudo-random bytes to
+ *	@bytes: the requested number of bytes
+ */
+void prandom_bytes(void *buf, size_t bytes)
+{
+	struct siprand_state *state = get_cpu_ptr(&net_rand_state);
+	u8 *ptr = buf;
+
+	while (bytes >= sizeof(u32)) {
+		put_unaligned(siprand_u32(state), (u32 *)ptr);
+		ptr += sizeof(u32);
+		bytes -= sizeof(u32);
+	}
+
+	if (bytes > 0) {
+		u32 rem = siprand_u32(state);
+
+		do {
+			*ptr++ = (u8)rem;
+			rem >>= BITS_PER_BYTE;
+		} while (--bytes > 0);
+	}
+	put_cpu_ptr(&net_rand_state);
+}
+EXPORT_SYMBOL(prandom_bytes);
+
+/**
+ *	prandom_seed - add entropy to pseudo random number generator
+ *	@entropy: entropy value
+ *
+ *	Add some additional seed material to the prandom pool.
+ *	The "entropy" is actually our IP address (the only caller is
+ *	the network code), not for unpredictability, but to ensure that
+ *	different machines are initialized differently.
+ */
+void prandom_seed(u32 entropy)
+{
+	int i;
+
+	add_device_randomness(&entropy, sizeof(entropy));
+
+	for_each_possible_cpu(i) {
+		struct siprand_state *state = per_cpu_ptr(&net_rand_state, i);
+		unsigned long v0 = state->v0, v1 = state->v1;
+		unsigned long v2 = state->v2, v3 = state->v3;
+
+		do {
+			v3 ^= entropy;
+			PRND_SIPROUND(v0, v1, v2, v3);
+			PRND_SIPROUND(v0, v1, v2, v3);
+			v0 ^= entropy;
+		} while (unlikely(!v0 || !v1 || !v2 || !v3));
+
+		WRITE_ONCE(state->v0, v0);
+		WRITE_ONCE(state->v1, v1);
+		WRITE_ONCE(state->v2, v2);
+		WRITE_ONCE(state->v3, v3);
+	}
+}
+EXPORT_SYMBOL(prandom_seed);
+
+/*
+ *	Generate some initially weak seeding values to allow
+ *	the prandom_u32() engine to be started.
+ */
+static int __init prandom_init_early(void)
+{
+	int i;
+	unsigned long v0, v1, v2, v3;
+
+	if (!arch_get_random_long(&v0))
+		v0 = jiffies;
+	if (!arch_get_random_long(&v1))
+		v1 = random_get_entropy();
+	v2 = v0 ^ PRND_K0;
+	v3 = v1 ^ PRND_K1;
+
+	for_each_possible_cpu(i) {
+		struct siprand_state *state;
+
+		v3 ^= i;
+		PRND_SIPROUND(v0, v1, v2, v3);
+		PRND_SIPROUND(v0, v1, v2, v3);
+		v0 ^= i;
+
+		state = per_cpu_ptr(&net_rand_state, i);
+		state->v0 = v0;  state->v1 = v1;
+		state->v2 = v2;  state->v3 = v3;
+	}
+
+	return 0;
 }
+core_initcall(prandom_init_early);
+
+
+/* Stronger reseeding when available, and periodically thereafter. */
+static void prandom_reseed(struct timer_list *unused);
+
+static DEFINE_TIMER(seed_timer, prandom_reseed);
+
+static void prandom_reseed(struct timer_list *unused)
+{
+	unsigned long expires;
+	int i;
+
+	/*
+	 * Reinitialize each CPU's PRNG with 128 bits of key.
+	 * No locking on the CPUs, but then somewhat random results are,
+	 * well, expected.
+	 */
+	for_each_possible_cpu(i) {
+		struct siprand_state *state;
+		unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0;
+		unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1;
+#if BITS_PER_LONG == 32
+		int j;
+
+		/*
+		 * On 32-bit machines, hash in two extra words to
+		 * approximate 128-bit key length.  Not that the hash
+		 * has that much security, but this prevents a trivial
+		 * 64-bit brute force.
+		 */
+		for (j = 0; j < 2; j++) {
+			unsigned long m = get_random_long();
+
+			v3 ^= m;
+			PRND_SIPROUND(v0, v1, v2, v3);
+			PRND_SIPROUND(v0, v1, v2, v3);
+			v0 ^= m;
+		}
 #endif
+		/*
+		 * Probably impossible in practice, but there is a
+		 * theoretical risk that a race between this reseeding
+		 * and the target CPU writing its state back could
+		 * create the all-zero SipHash fixed point.
+		 *
+		 * To ensure that never happens, ensure the state
+		 * we write contains no zero words.
+		 */
+		state = per_cpu_ptr(&net_rand_state, i);
+		WRITE_ONCE(state->v0, v0 ? v0 : -1ul);
+		WRITE_ONCE(state->v1, v1 ? v1 : -1ul);
+		WRITE_ONCE(state->v2, v2 ? v2 : -1ul);
+		WRITE_ONCE(state->v3, v3 ? v3 : -1ul);
+	}
+
+	/* reseed every ~60 seconds, in [40 .. 80) interval with slack */
+	expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ));
+	mod_timer(&seed_timer, expires);
+}
+
+/*
+ * The random ready callback can be called from almost any interrupt.
+ * To avoid worrying about whether it's safe to delay that interrupt
+ * long enough to seed all CPUs, just schedule an immediate timer event.
+ */
+static void prandom_timer_start(struct random_ready_callback *unused)
+{
+	mod_timer(&seed_timer, jiffies);
+}
+
+#ifdef CONFIG_RANDOM32_SELFTEST
+/* Principle: True 32-bit random numbers will all have 16 differing bits on
+ * average. For each 32-bit number, there are 601M numbers differing by 16
+ * bits, and 89% of the numbers differ by at least 12 bits. Note that more
+ * than 16 differing bits also implies a correlation with inverted bits. Thus
+ * we take 1024 random numbers and compare each of them to the other ones,
+ * counting the deviation of correlated bits to 16. Constants report 32,
+ * counters 32-log2(TEST_SIZE), and pure randoms, around 6 or lower. With the
+ * u32 total, TEST_SIZE may be as large as 4096 samples.
+ */
+#define TEST_SIZE 1024
+static int __init prandom32_state_selftest(void)
+{
+	unsigned int x, y, bits, samples;
+	u32 xor, flip;
+	u32 total;
+	u32 *data;
+
+	data = kmalloc(sizeof(*data) * TEST_SIZE, GFP_KERNEL);
+	if (!data)
+		return 0;
+
+	for (samples = 0; samples < TEST_SIZE; samples++)
+		data[samples] = prandom_u32();
+
+	flip = total = 0;
+	for (x = 0; x < samples; x++) {
+		for (y = 0; y < samples; y++) {
+			if (x == y)
+				continue;
+			xor = data[x] ^ data[y];
+			flip |= xor;
+			bits = hweight32(xor);
+			total += (bits - 16) * (bits - 16);
+		}
+	}
+
+	/* We'll return the average deviation as 2*sqrt(corr/samples), which
+	 * is also sqrt(4*corr/samples) which provides a better resolution.
+	 */
+	bits = int_sqrt(total / (samples * (samples - 1)) * 4);
+	if (bits > 6)
+		pr_warn("prandom32: self test failed (at least %u bits"
+			" correlated, fixed_mask=%#x fixed_value=%#x\n",
+			bits, ~flip, data[0] & ~flip);
+	else
+		pr_info("prandom32: self test passed (less than %u bits"
+			" correlated)\n",
+			bits+1);
+	kfree(data);
+	return 0;
+}
+core_initcall(prandom32_state_selftest);
+#endif /*  CONFIG_RANDOM32_SELFTEST */
+
+/*
+ * Start periodic full reseeding as soon as strong
+ * random numbers are available.
+ */
+static int __init prandom_init_late(void)
+{
+	static struct random_ready_callback random_ready = {
+		.func = prandom_timer_start
+	};
+	int ret = add_random_ready_callback(&random_ready);
+
+	if (ret == -EALREADY) {
+		prandom_timer_start(&random_ready);
+		ret = 0;
+	}
+	return ret;
+}
+late_initcall(prandom_init_late);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 8545872e61db..c4ac5c2421f2 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -13,7 +13,7 @@
 #include <linux/export.h>
 
 /*
- * red-black trees properties:  http://en.wikipedia.org/wiki/Rbtree
+ * red-black trees properties:  https://en.wikipedia.org/wiki/Rbtree
  *
  *  1) A node is either red or black
  *  2) The root is black
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 9f6890aedd1a..c949c1e3b87c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -31,7 +31,7 @@
 
 union nested_table {
 	union nested_table __rcu *table;
-	struct rhash_lock_head *bucket;
+	struct rhash_lock_head __rcu *bucket;
 };
 
 static u32 head_hashfn(struct rhashtable *ht,
@@ -222,7 +222,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
 }
 
 static int rhashtable_rehash_one(struct rhashtable *ht,
-				 struct rhash_lock_head **bkt,
+				 struct rhash_lock_head __rcu **bkt,
 				 unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
@@ -275,7 +275,7 @@ static int rhashtable_rehash_chain(struct rhashtable *ht,
 				    unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
-	struct rhash_lock_head **bkt = rht_bucket_var(old_tbl, old_hash);
+	struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
 	int err;
 
 	if (!bkt)
@@ -485,7 +485,7 @@ fail:
 }
 
 static void *rhashtable_lookup_one(struct rhashtable *ht,
-				   struct rhash_lock_head **bkt,
+				   struct rhash_lock_head __rcu **bkt,
 				   struct bucket_table *tbl, unsigned int hash,
 				   const void *key, struct rhash_head *obj)
 {
@@ -535,12 +535,10 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 	return ERR_PTR(-ENOENT);
 }
 
-static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
-						  struct rhash_lock_head **bkt,
-						  struct bucket_table *tbl,
-						  unsigned int hash,
-						  struct rhash_head *obj,
-						  void *data)
+static struct bucket_table *rhashtable_insert_one(
+	struct rhashtable *ht, struct rhash_lock_head __rcu **bkt,
+	struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj,
+	void *data)
 {
 	struct bucket_table *new_tbl;
 	struct rhash_head *head;
@@ -591,7 +589,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
 {
 	struct bucket_table *new_tbl;
 	struct bucket_table *tbl;
-	struct rhash_lock_head **bkt;
+	struct rhash_lock_head __rcu **bkt;
 	unsigned int hash;
 	void *data;
 
@@ -1173,8 +1171,8 @@ void rhashtable_destroy(struct rhashtable *ht)
 }
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
 
-struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl,
-					     unsigned int hash)
+struct rhash_lock_head __rcu **__rht_bucket_nested(
+	const struct bucket_table *tbl, unsigned int hash)
 {
 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
 	unsigned int index = hash & ((1 << tbl->nest) - 1);
@@ -1202,10 +1200,10 @@ struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl,
 }
 EXPORT_SYMBOL_GPL(__rht_bucket_nested);
 
-struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl,
-					   unsigned int hash)
+struct rhash_lock_head __rcu **rht_bucket_nested(
+	const struct bucket_table *tbl, unsigned int hash)
 {
-	static struct rhash_lock_head *rhnull;
+	static struct rhash_lock_head __rcu *rhnull;
 
 	if (!rhnull)
 		INIT_RHT_NULLS_HEAD(rhnull);
@@ -1213,9 +1211,8 @@ struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl,
 }
 EXPORT_SYMBOL_GPL(rht_bucket_nested);
 
-struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht,
-						  struct bucket_table *tbl,
-						  unsigned int hash)
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(
+	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
 {
 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
 	unsigned int index = hash & ((1 << tbl->nest) - 1);
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index af88d1346dd7..d693d9213ceb 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -12,33 +12,24 @@
 /*
  * See if we have deferred clears that we can batch move
  */
-static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
+static inline bool sbitmap_deferred_clear(struct sbitmap_word *map)
 {
-	unsigned long mask, val;
-	bool ret = false;
-	unsigned long flags;
+	unsigned long mask;
 
-	spin_lock_irqsave(&sb->map[index].swap_lock, flags);
-
-	if (!sb->map[index].cleared)
-		goto out_unlock;
+	if (!READ_ONCE(map->cleared))
+		return false;
 
 	/*
 	 * First get a stable cleared mask, setting the old mask to 0.
 	 */
-	mask = xchg(&sb->map[index].cleared, 0);
+	mask = xchg(&map->cleared, 0);
 
 	/*
 	 * Now clear the masked bits in our free word
 	 */
-	do {
-		val = sb->map[index].word;
-	} while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val);
-
-	ret = true;
-out_unlock:
-	spin_unlock_irqrestore(&sb->map[index].swap_lock, flags);
-	return ret;
+	atomic_long_andnot(mask, (atomic_long_t *)&map->word);
+	BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(map->word));
+	return true;
 }
 
 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
@@ -80,7 +71,6 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
 	for (i = 0; i < sb->map_nr; i++) {
 		sb->map[i].depth = min(depth, bits_per_word);
 		depth -= sb->map[i].depth;
-		spin_lock_init(&sb->map[i].swap_lock);
 	}
 	return 0;
 }
@@ -92,7 +82,7 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
 	unsigned int i;
 
 	for (i = 0; i < sb->map_nr; i++)
-		sbitmap_deferred_clear(sb, i);
+		sbitmap_deferred_clear(&sb->map[i]);
 
 	sb->depth = depth;
 	sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
@@ -107,9 +97,11 @@ EXPORT_SYMBOL_GPL(sbitmap_resize);
 static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
 			      unsigned int hint, bool wrap)
 {
-	unsigned int orig_hint = hint;
 	int nr;
 
+	/* don't wrap if starting from 0 */
+	wrap = wrap && hint;
+
 	while (1) {
 		nr = find_next_zero_bit(word, depth, hint);
 		if (unlikely(nr >= depth)) {
@@ -118,8 +110,8 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
 			 * offset to 0 in a failure case, so start from 0 to
 			 * exhaust the map.
 			 */
-			if (orig_hint && hint && wrap) {
-				hint = orig_hint = 0;
+			if (hint && wrap) {
+				hint = 0;
 				continue;
 			}
 			return -1;
@@ -139,15 +131,15 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
 static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
 				     unsigned int alloc_hint, bool round_robin)
 {
+	struct sbitmap_word *map = &sb->map[index];
 	int nr;
 
 	do {
-		nr = __sbitmap_get_word(&sb->map[index].word,
-					sb->map[index].depth, alloc_hint,
+		nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint,
 					!round_robin);
 		if (nr != -1)
 			break;
-		if (!sbitmap_deferred_clear(sb, index))
+		if (!sbitmap_deferred_clear(map))
 			break;
 	} while (1);
 
@@ -207,7 +199,7 @@ again:
 			break;
 		}
 
-		if (sbitmap_deferred_clear(sb, index))
+		if (sbitmap_deferred_clear(&sb->map[index]))
 			goto again;
 
 		/* Jump to next index. */
@@ -292,8 +284,11 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
 
 	for (i = 0; i < sb->map_nr; i++) {
 		unsigned long word = READ_ONCE(sb->map[i].word);
+		unsigned long cleared = READ_ONCE(sb->map[i].cleared);
 		unsigned int word_bits = READ_ONCE(sb->map[i].depth);
 
+		word &= ~cleared;
+
 		while (word_bits > 0) {
 			unsigned int bits = min(8 - byte_bits, word_bits);
 
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 5d63a8857f36..a59778946404 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -365,6 +365,37 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(sg_alloc_table);
 
+static struct scatterlist *get_next_sg(struct sg_table *table,
+				       struct scatterlist *cur,
+				       unsigned long needed_sges,
+				       gfp_t gfp_mask)
+{
+	struct scatterlist *new_sg, *next_sg;
+	unsigned int alloc_size;
+
+	if (cur) {
+		next_sg = sg_next(cur);
+		/* Check if last entry should be keeped for chainning */
+		if (!sg_is_last(next_sg) || needed_sges == 1)
+			return next_sg;
+	}
+
+	alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
+	new_sg = sg_kmalloc(alloc_size, gfp_mask);
+	if (!new_sg)
+		return ERR_PTR(-ENOMEM);
+	sg_init_table(new_sg, alloc_size);
+	if (cur) {
+		__sg_chain(next_sg, new_sg);
+		table->orig_nents += alloc_size - 1;
+	} else {
+		table->sgl = new_sg;
+		table->orig_nents = alloc_size;
+		table->nents = 0;
+	}
+	return new_sg;
+}
+
 /**
  * __sg_alloc_table_from_pages - Allocate and initialize an sg table from
  *			         an array of pages
@@ -373,30 +404,69 @@ EXPORT_SYMBOL(sg_alloc_table);
  * @n_pages:	 Number of pages in the pages array
  * @offset:      Offset from start of the first page to the start of a buffer
  * @size:        Number of valid bytes in the buffer (after offset)
- * @max_segment: Maximum size of a scatterlist node in bytes (page aligned)
+ * @max_segment: Maximum size of a scatterlist element in bytes
+ * @prv:	 Last populated sge in sgt
+ * @left_pages:  Left pages caller have to set after this call
  * @gfp_mask:	 GFP allocation mask
  *
- *  Description:
- *    Allocate and initialize an sg table from a list of pages. Contiguous
- *    ranges of the pages are squashed into a single scatterlist node up to the
- *    maximum size specified in @max_segment. An user may provide an offset at a
- *    start and a size of valid data in a buffer specified by the page array.
- *    The returned sg table is released by sg_free_table.
+ * Description:
+ *    If @prv is NULL, allocate and initialize an sg table from a list of pages,
+ *    else reuse the scatterlist passed in at @prv.
+ *    Contiguous ranges of the pages are squashed into a single scatterlist
+ *    entry up to the maximum size specified in @max_segment.  A user may
+ *    provide an offset at a start and a size of valid data in a buffer
+ *    specified by the page array.
  *
  * Returns:
- *   0 on success, negative error on failure
+ *   Last SGE in sgt on success, PTR_ERR on otherwise.
+ *   The allocation in @sgt must be released by sg_free_table.
+ *
+ * Notes:
+ *   If this function returns non-0 (eg failure), the caller must call
+ *   sg_free_table() to cleanup any leftover allocations.
  */
-int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
-				unsigned int n_pages, unsigned int offset,
-				unsigned long size, unsigned int max_segment,
-				gfp_t gfp_mask)
+struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
+		struct page **pages, unsigned int n_pages, unsigned int offset,
+		unsigned long size, unsigned int max_segment,
+		struct scatterlist *prv, unsigned int left_pages,
+		gfp_t gfp_mask)
 {
-	unsigned int chunks, cur_page, seg_len, i;
-	int ret;
-	struct scatterlist *s;
+	unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
+	unsigned int added_nents = 0;
+	struct scatterlist *s = prv;
 
-	if (WARN_ON(!max_segment || offset_in_page(max_segment)))
-		return -EINVAL;
+	/*
+	 * The algorithm below requires max_segment to be aligned to PAGE_SIZE
+	 * otherwise it can overshoot.
+	 */
+	max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
+	if (WARN_ON(max_segment < PAGE_SIZE))
+		return ERR_PTR(-EINVAL);
+
+	if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (prv) {
+		unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE +
+				       prv->offset + prv->length) /
+				      PAGE_SIZE;
+
+		if (WARN_ON(offset))
+			return ERR_PTR(-EINVAL);
+
+		/* Merge contiguous pages into the last SG */
+		prv_len = prv->length;
+		while (n_pages && page_to_pfn(pages[0]) == paddr) {
+			if (prv->length + PAGE_SIZE > max_segment)
+				break;
+			prv->length += PAGE_SIZE;
+			paddr++;
+			pages++;
+			n_pages--;
+		}
+		if (!n_pages)
+			goto out;
+	}
 
 	/* compute number of contiguous chunks */
 	chunks = 1;
@@ -410,13 +480,9 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 		}
 	}
 
-	ret = sg_alloc_table(sgt, chunks, gfp_mask);
-	if (unlikely(ret))
-		return ret;
-
 	/* merging chunks and putting them into the scatterlist */
 	cur_page = 0;
-	for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
+	for (i = 0; i < chunks; i++) {
 		unsigned int j, chunk_size;
 
 		/* look for the end of the current chunk */
@@ -429,15 +495,30 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 				break;
 		}
 
+		/* Pass how many chunks might be left */
+		s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask);
+		if (IS_ERR(s)) {
+			/*
+			 * Adjust entry length to be as before function was
+			 * called.
+			 */
+			if (prv)
+				prv->length = prv_len;
+			return s;
+		}
 		chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
 		sg_set_page(s, pages[cur_page],
 			    min_t(unsigned long, size, chunk_size), offset);
+		added_nents++;
 		size -= chunk_size;
 		offset = 0;
 		cur_page = j;
 	}
-
-	return 0;
+	sgt->nents += added_nents;
+out:
+	if (!left_pages)
+		sg_mark_end(s);
+	return s;
 }
 EXPORT_SYMBOL(__sg_alloc_table_from_pages);
 
@@ -465,8 +546,8 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 			      unsigned int n_pages, unsigned int offset,
 			      unsigned long size, gfp_t gfp_mask)
 {
-	return __sg_alloc_table_from_pages(sgt, pages, n_pages, offset, size,
-					   SCATTERLIST_MAX_SEGMENT, gfp_mask);
+	return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages,
+			offset, size, UINT_MAX, NULL, 0, gfp_mask));
 }
 EXPORT_SYMBOL(sg_alloc_table_from_pages);
 
@@ -504,7 +585,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length,
 		nalloc++;
 	}
 	sgl = kmalloc_array(nalloc, sizeof(struct scatterlist),
-			    (gfp & ~GFP_DMA) | __GFP_ZERO);
+			    gfp & ~GFP_DMA);
 	if (!sgl)
 		return NULL;
 
@@ -514,7 +595,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length,
 		elem_len = min_t(u64, length, PAGE_SIZE << order);
 		page = alloc_pages(gfp, order);
 		if (!page) {
-			sgl_free(sgl);
+			sgl_free_order(sgl, order);
 			return NULL;
 		}
 
@@ -852,7 +933,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
 	sg_miter_start(&miter, sgl, nents, sg_flags);
 
 	if (!sg_miter_skip(&miter, skip))
-		return false;
+		return 0;
 
 	while ((offset < buflen) && sg_miter_next(&miter)) {
 		unsigned int len;
diff --git a/lib/sha1.c b/lib/sha1.c
index 49257a915bb6..9bd1935a1472 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -9,7 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/bitops.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 #include <asm/unaligned.h>
 
 /*
diff --git a/lib/siphash.c b/lib/siphash.c
index c47bb6ff2149..a90112ee72a1 100644
--- a/lib/siphash.c
+++ b/lib/siphash.c
@@ -68,11 +68,11 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
 						  bytemask_from_count(left)));
 #else
 	switch (left) {
-	case 7: b |= ((u64)end[6]) << 48; /* fall through */
-	case 6: b |= ((u64)end[5]) << 40; /* fall through */
-	case 5: b |= ((u64)end[4]) << 32; /* fall through */
+	case 7: b |= ((u64)end[6]) << 48; fallthrough;
+	case 6: b |= ((u64)end[5]) << 40; fallthrough;
+	case 5: b |= ((u64)end[4]) << 32; fallthrough;
 	case 4: b |= le32_to_cpup(data); break;
-	case 3: b |= ((u64)end[2]) << 16; /* fall through */
+	case 3: b |= ((u64)end[2]) << 16; fallthrough;
 	case 2: b |= le16_to_cpup(data); break;
 	case 1: b |= end[0];
 	}
@@ -101,11 +101,11 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
 						  bytemask_from_count(left)));
 #else
 	switch (left) {
-	case 7: b |= ((u64)end[6]) << 48; /* fall through */
-	case 6: b |= ((u64)end[5]) << 40; /* fall through */
-	case 5: b |= ((u64)end[4]) << 32; /* fall through */
+	case 7: b |= ((u64)end[6]) << 48; fallthrough;
+	case 6: b |= ((u64)end[5]) << 40; fallthrough;
+	case 5: b |= ((u64)end[4]) << 32; fallthrough;
 	case 4: b |= get_unaligned_le32(end); break;
-	case 3: b |= ((u64)end[2]) << 16; /* fall through */
+	case 3: b |= ((u64)end[2]) << 16; fallthrough;
 	case 2: b |= get_unaligned_le16(end); break;
 	case 1: b |= end[0];
 	}
@@ -268,11 +268,11 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 						  bytemask_from_count(left)));
 #else
 	switch (left) {
-	case 7: b |= ((u64)end[6]) << 48; /* fall through */
-	case 6: b |= ((u64)end[5]) << 40; /* fall through */
-	case 5: b |= ((u64)end[4]) << 32; /* fall through */
+	case 7: b |= ((u64)end[6]) << 48; fallthrough;
+	case 6: b |= ((u64)end[5]) << 40; fallthrough;
+	case 5: b |= ((u64)end[4]) << 32; fallthrough;
 	case 4: b |= le32_to_cpup(data); break;
-	case 3: b |= ((u64)end[2]) << 16; /* fall through */
+	case 3: b |= ((u64)end[2]) << 16; fallthrough;
 	case 2: b |= le16_to_cpup(data); break;
 	case 1: b |= end[0];
 	}
@@ -301,11 +301,11 @@ u32 __hsiphash_unaligned(const void *data, size_t len,
 						  bytemask_from_count(left)));
 #else
 	switch (left) {
-	case 7: b |= ((u64)end[6]) << 48; /* fall through */
-	case 6: b |= ((u64)end[5]) << 40; /* fall through */
-	case 5: b |= ((u64)end[4]) << 32; /* fall through */
+	case 7: b |= ((u64)end[6]) << 48; fallthrough;
+	case 6: b |= ((u64)end[5]) << 40; fallthrough;
+	case 5: b |= ((u64)end[4]) << 32; fallthrough;
 	case 4: b |= get_unaligned_le32(end); break;
-	case 3: b |= ((u64)end[2]) << 16; /* fall through */
+	case 3: b |= ((u64)end[2]) << 16; fallthrough;
 	case 2: b |= get_unaligned_le16(end); break;
 	case 1: b |= end[0];
 	}
@@ -431,7 +431,7 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 		v0 ^= m;
 	}
 	switch (left) {
-	case 3: b |= ((u32)end[2]) << 16; /* fall through */
+	case 3: b |= ((u32)end[2]) << 16; fallthrough;
 	case 2: b |= le16_to_cpup(data); break;
 	case 1: b |= end[0];
 	}
@@ -454,7 +454,7 @@ u32 __hsiphash_unaligned(const void *data, size_t len,
 		v0 ^= m;
 	}
 	switch (left) {
-	case 3: b |= ((u32)end[2]) << 16; /* fall through */
+	case 3: b |= ((u32)end[2]) << 16; fallthrough;
 	case 2: b |= get_unaligned_le16(end); break;
 	case 1: b |= end[0];
 	}
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 525222e4f409..1c1dbd300325 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -26,6 +26,11 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2)
 	if (current->nr_cpus_allowed == 1)
 		goto out;
 
+#ifdef CONFIG_SMP
+	if (current->migration_disabled)
+		goto out;
+#endif
+
 	/*
 	 * It is valid to assume CPU-locality during early bootup:
 	 */
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 2caffc64e4c8..890dcc2e984e 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -62,7 +62,7 @@ struct stack_record {
 	u32 hash;			/* Hash in the hastable */
 	u32 size;			/* Number of frames in the stack */
 	union handle_parts handle;
-	unsigned long entries[1];	/* Variable-sized array of entries. */
+	unsigned long entries[];	/* Variable-sized array of entries. */
 };
 
 static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
@@ -104,9 +104,8 @@ static bool init_stack_slab(void **prealloc)
 static struct stack_record *depot_alloc_stack(unsigned long *entries, int size,
 		u32 hash, void **prealloc, gfp_t alloc_flags)
 {
-	int required_size = offsetof(struct stack_record, entries) +
-		sizeof(unsigned long) * size;
 	struct stack_record *stack;
+	size_t required_size = struct_size(stack, entries, size);
 
 	required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN);
 
@@ -136,7 +135,7 @@ static struct stack_record *depot_alloc_stack(unsigned long *entries, int size,
 	stack->handle.slabindex = depot_index;
 	stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
 	stack->handle.valid = 1;
-	memcpy(stack->entries, entries, size * sizeof(unsigned long));
+	memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
 	depot_offset += required_size;
 
 	return stack;
@@ -155,8 +154,8 @@ static struct stack_record *stack_table[STACK_HASH_SIZE] = {
 static inline u32 hash_stack(unsigned long *entries, unsigned int size)
 {
 	return jhash2((u32 *)entries,
-			       size * sizeof(unsigned long) / sizeof(u32),
-			       STACK_HASH_SEED);
+		      array_size(size,  sizeof(*entries)) / sizeof(u32),
+		      STACK_HASH_SEED);
 }
 
 /* Use our own, non-instrumented version of memcmp().
diff --git a/lib/string.c b/lib/string.c
index 6012c385fb31..7548eb715ddb 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -85,7 +85,6 @@ EXPORT_SYMBOL(strcasecmp);
  * @dest: Where to copy the string to
  * @src: Where to copy the string from
  */
-#undef strcpy
 char *strcpy(char *dest, const char *src)
 {
 	char *tmp = dest;
@@ -272,13 +271,36 @@ ssize_t strscpy_pad(char *dest, const char *src, size_t count)
 }
 EXPORT_SYMBOL(strscpy_pad);
 
+/**
+ * stpcpy - copy a string from src to dest returning a pointer to the new end
+ *          of dest, including src's %NUL-terminator. May overrun dest.
+ * @dest: pointer to end of string being copied into. Must be large enough
+ *        to receive copy.
+ * @src: pointer to the beginning of string being copied from. Must not overlap
+ *       dest.
+ *
+ * stpcpy differs from strcpy in a key way: the return value is a pointer
+ * to the new %NUL-terminating character in @dest. (For strcpy, the return
+ * value is a pointer to the start of @dest). This interface is considered
+ * unsafe as it doesn't perform bounds checking of the inputs. As such it's
+ * not recommended for usage. Instead, its definition is provided in case
+ * the compiler lowers other libcalls to stpcpy.
+ */
+char *stpcpy(char *__restrict__ dest, const char *__restrict__ src);
+char *stpcpy(char *__restrict__ dest, const char *__restrict__ src)
+{
+	while ((*dest++ = *src++) != '\0')
+		/* nothing */;
+	return --dest;
+}
+EXPORT_SYMBOL(stpcpy);
+
 #ifndef __HAVE_ARCH_STRCAT
 /**
  * strcat - Append one %NUL-terminated string to another
  * @dest: The string to be appended to
  * @src: The string to append to it
  */
-#undef strcat
 char *strcat(char *dest, const char *src)
 {
 	char *tmp = dest;
@@ -354,7 +376,6 @@ EXPORT_SYMBOL(strlcat);
  * @cs: One string
  * @ct: Another string
  */
-#undef strcmp
 int strcmp(const char *cs, const char *ct)
 {
 	unsigned char c1, c2;
@@ -934,7 +955,6 @@ EXPORT_SYMBOL(memcmp);
  * while this particular implementation is a simple (tail) call to memcmp, do
  * not rely on anything but whether the return value is zero or non-zero.
  */
-#undef bcmp
 int bcmp(const void *a, const void *b, size_t len)
 {
 	return memcmp(a, b, len);
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 963050c0283e..7f2d5fbaf243 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -649,3 +649,26 @@ char *kstrdup_quotable_file(struct file *file, gfp_t gfp)
 	return pathname;
 }
 EXPORT_SYMBOL_GPL(kstrdup_quotable_file);
+
+/**
+ * kfree_strarray - free a number of dynamically allocated strings contained
+ *                  in an array and the array itself
+ *
+ * @array: Dynamically allocated array of strings to free.
+ * @n: Number of strings (starting from the beginning of the array) to free.
+ *
+ * Passing a non-NULL @array and @n == 0 as well as NULL @array are valid
+ * use-cases. If @array is NULL, the function does nothing.
+ */
+void kfree_strarray(char **array, size_t n)
+{
+	unsigned int i;
+
+	if (!array)
+		return;
+
+	for (i = 0; i < n; i++)
+		kfree(array[i]);
+	kfree(array);
+}
+EXPORT_SYMBOL_GPL(kfree_strarray);
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 34696a348864..122d8d0e253c 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/compiler.h>
 #include <linux/export.h>
+#include <linux/fault-inject-usercopy.h>
 #include <linux/kasan-checks.h>
 #include <linux/thread_info.h>
 #include <linux/uaccess.h>
@@ -34,17 +35,32 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src,
 		goto byte_at_a_time;
 
 	while (max >= sizeof(unsigned long)) {
-		unsigned long c, data;
+		unsigned long c, data, mask;
 
 		/* Fall back to byte-at-a-time if we get a page fault */
 		unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);
 
-		*(unsigned long *)(dst+res) = c;
+		/*
+		 * Note that we mask out the bytes following the NUL. This is
+		 * important to do because string oblivious code may read past
+		 * the NUL. For those routines, we don't want to give them
+		 * potentially random bytes after the NUL in `src`.
+		 *
+		 * One example of such code is BPF map keys. BPF treats map keys
+		 * as an opaque set of bytes. Without the post-NUL mask, any BPF
+		 * maps keyed by strings returned from strncpy_from_user() may
+		 * have multiple entries for semantically identical strings.
+		 */
 		if (has_zero(c, &data, &constants)) {
 			data = prep_zero_mask(c, data, &constants);
 			data = create_zero_mask(data);
+			mask = zero_bytemask(data);
+			*(unsigned long *)(dst+res) = c & mask;
 			return res + find_zero(data);
 		}
+
+		*(unsigned long *)(dst+res) = c;
+
 		res += sizeof(unsigned long);
 		max -= sizeof(unsigned long);
 	}
@@ -99,6 +115,8 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
 	unsigned long max_addr, src_addr;
 
 	might_fault();
+	if (should_fail_usercopy())
+		return -EFAULT;
 	if (unlikely(count <= 0))
 		return 0;
 
diff --git a/lib/syscall.c b/lib/syscall.c
index fb328e7ccb08..ba13e924c430 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -7,6 +7,7 @@
 
 static int collect_syscall(struct task_struct *target, struct syscall_info *info)
 {
+	unsigned long args[6] = { };
 	struct pt_regs *regs;
 
 	if (!try_get_task_stack(target)) {
@@ -27,8 +28,14 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info
 
 	info->data.nr = syscall_get_nr(target, regs);
 	if (info->data.nr != -1L)
-		syscall_get_arguments(target, regs,
-				      (unsigned long *)&info->data.args[0]);
+		syscall_get_arguments(target, regs, args);
+
+	info->data.args[0] = args[0];
+	info->data.args[1] = args[1];
+	info->data.args[2] = args[2];
+	info->data.args[3] = args[3];
+	info->data.args[4] = args[4];
+	info->data.args[5] = args[5];
 
 	put_task_stack(target);
 	return 0;
@@ -44,7 +51,7 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info
  *			 .data.instruction_pointer - filled with user PC
  *
  * If @target is blocked in a system call, returns zero with @info.data.nr
- * set to the the call's number and @info.data.args filled in with its
+ * set to the call's number and @info.data.args filled in with its
  * arguments. Registers not used for system call arguments may not be available
  * and it is not kosher to use &struct user_regset calls while the system
  * call is still in progress.  Note we may get this result if @target
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index 25b5cbfb7615..10360d4ea273 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -238,6 +238,28 @@ static const struct test_string_2 escape1[] __initconst = {{
 	/* terminator */
 }};
 
+static const struct test_string strings_upper[] __initconst = {
+	{
+		.in = "abcdefgh1234567890test",
+		.out = "ABCDEFGH1234567890TEST",
+	},
+	{
+		.in = "abCdeFgH1234567890TesT",
+		.out = "ABCDEFGH1234567890TEST",
+	},
+};
+
+static const struct test_string strings_lower[] __initconst = {
+	{
+		.in = "ABCDEFGH1234567890TEST",
+		.out = "abcdefgh1234567890test",
+	},
+	{
+		.in = "abCdeFgH1234567890TesT",
+		.out = "abcdefgh1234567890test",
+	},
+};
+
 static __init const char *test_string_find_match(const struct test_string_2 *s2,
 						 unsigned int flags)
 {
@@ -390,6 +412,48 @@ static __init void test_string_get_size(void)
 	test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB");
 }
 
+static void __init test_string_upper_lower(void)
+{
+	char *dst;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(strings_upper); i++) {
+		const char *s = strings_upper[i].in;
+		int len = strlen(strings_upper[i].in) + 1;
+
+		dst = kmalloc(len, GFP_KERNEL);
+		if (!dst)
+			return;
+
+		string_upper(dst, s);
+		if (memcmp(dst, strings_upper[i].out, len)) {
+			pr_warn("Test 'string_upper' failed : expected %s, got %s!\n",
+				strings_upper[i].out, dst);
+			kfree(dst);
+			return;
+		}
+		kfree(dst);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(strings_lower); i++) {
+		const char *s = strings_lower[i].in;
+		int len = strlen(strings_lower[i].in) + 1;
+
+		dst = kmalloc(len, GFP_KERNEL);
+		if (!dst)
+			return;
+
+		string_lower(dst, s);
+		if (memcmp(dst, strings_lower[i].out, len)) {
+			pr_warn("Test 'string_lower failed : : expected %s, got %s!\n",
+				strings_lower[i].out, dst);
+			kfree(dst);
+			return;
+		}
+		kfree(dst);
+	}
+}
+
 static int __init test_string_helpers_init(void)
 {
 	unsigned int i;
@@ -411,6 +475,9 @@ static int __init test_string_helpers_init(void)
 	/* Test string_get_size() */
 	test_string_get_size();
 
+	/* Test string upper(), string_lower() */
+	test_string_upper_lower();
+
 	return -EINVAL;
 }
 module_init(test_string_helpers_init);
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 6b13150667f5..4425a1dd4ef1 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -354,50 +354,37 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
 
 };
 
-static void __init __test_bitmap_parselist(int is_user)
+static void __init test_bitmap_parselist(void)
 {
 	int i;
 	int err;
 	ktime_t time;
 	DECLARE_BITMAP(bmap, 2048);
-	char *mode = is_user ? "_user"  : "";
 
 	for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) {
 #define ptest parselist_tests[i]
 
-		if (is_user) {
-			mm_segment_t orig_fs = get_fs();
-			size_t len = strlen(ptest.in);
-
-			set_fs(KERNEL_DS);
-			time = ktime_get();
-			err = bitmap_parselist_user((__force const char __user *)ptest.in, len,
-						    bmap, ptest.nbits);
-			time = ktime_get() - time;
-			set_fs(orig_fs);
-		} else {
-			time = ktime_get();
-			err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
-			time = ktime_get() - time;
-		}
+		time = ktime_get();
+		err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
+		time = ktime_get() - time;
 
 		if (err != ptest.errno) {
-			pr_err("parselist%s: %d: input is %s, errno is %d, expected %d\n",
-					mode, i, ptest.in, err, ptest.errno);
+			pr_err("parselist: %d: input is %s, errno is %d, expected %d\n",
+					i, ptest.in, err, ptest.errno);
 			continue;
 		}
 
 		if (!err && ptest.expected
 			 && !__bitmap_equal(bmap, ptest.expected, ptest.nbits)) {
-			pr_err("parselist%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
-					mode, i, ptest.in, bmap[0],
+			pr_err("parselist: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
+					i, ptest.in, bmap[0],
 					*ptest.expected);
 			continue;
 		}
 
 		if (ptest.flags & PARSE_TIME)
-			pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n",
-					mode, i, ptest.in, time);
+			pr_err("parselist: %d: input is '%s' OK, Time: %llu\n",
+					i, ptest.in, time);
 
 #undef ptest
 	}
@@ -443,75 +430,41 @@ static const struct test_bitmap_parselist parse_tests[] __initconst = {
 #undef step
 };
 
-static void __init __test_bitmap_parse(int is_user)
+static void __init test_bitmap_parse(void)
 {
 	int i;
 	int err;
 	ktime_t time;
 	DECLARE_BITMAP(bmap, 2048);
-	char *mode = is_user ? "_user"  : "";
 
 	for (i = 0; i < ARRAY_SIZE(parse_tests); i++) {
 		struct test_bitmap_parselist test = parse_tests[i];
+		size_t len = test.flags & NO_LEN ? UINT_MAX : strlen(test.in);
 
-		if (is_user) {
-			size_t len = strlen(test.in);
-			mm_segment_t orig_fs = get_fs();
-
-			set_fs(KERNEL_DS);
-			time = ktime_get();
-			err = bitmap_parse_user((__force const char __user *)test.in, len,
-						bmap, test.nbits);
-			time = ktime_get() - time;
-			set_fs(orig_fs);
-		} else {
-			size_t len = test.flags & NO_LEN ?
-				UINT_MAX : strlen(test.in);
-			time = ktime_get();
-			err = bitmap_parse(test.in, len, bmap, test.nbits);
-			time = ktime_get() - time;
-		}
+		time = ktime_get();
+		err = bitmap_parse(test.in, len, bmap, test.nbits);
+		time = ktime_get() - time;
 
 		if (err != test.errno) {
-			pr_err("parse%s: %d: input is %s, errno is %d, expected %d\n",
-					mode, i, test.in, err, test.errno);
+			pr_err("parse: %d: input is %s, errno is %d, expected %d\n",
+					i, test.in, err, test.errno);
 			continue;
 		}
 
 		if (!err && test.expected
 			 && !__bitmap_equal(bmap, test.expected, test.nbits)) {
-			pr_err("parse%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
-					mode, i, test.in, bmap[0],
+			pr_err("parse: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
+					i, test.in, bmap[0],
 					*test.expected);
 			continue;
 		}
 
 		if (test.flags & PARSE_TIME)
-			pr_err("parse%s: %d: input is '%s' OK, Time: %llu\n",
-					mode, i, test.in, time);
+			pr_err("parse: %d: input is '%s' OK, Time: %llu\n",
+					i, test.in, time);
 	}
 }
 
-static void __init test_bitmap_parselist(void)
-{
-	__test_bitmap_parselist(0);
-}
-
-static void __init test_bitmap_parselist_user(void)
-{
-	__test_bitmap_parselist(1);
-}
-
-static void __init test_bitmap_parse(void)
-{
-	__test_bitmap_parse(0);
-}
-
-static void __init test_bitmap_parse_user(void)
-{
-	__test_bitmap_parse(1);
-}
-
 #define EXP1_IN_BITS	(sizeof(exp1) * 8)
 
 static void __init test_bitmap_arr32(void)
@@ -610,6 +563,63 @@ static void __init test_for_each_set_clump8(void)
 		expect_eq_clump8(start, CLUMP_EXP_NUMBITS, clump_exp, &clump);
 }
 
+struct test_bitmap_cut {
+	unsigned int first;
+	unsigned int cut;
+	unsigned int nbits;
+	unsigned long in[4];
+	unsigned long expected[4];
+};
+
+static struct test_bitmap_cut test_cut[] = {
+	{  0,  0,  8, { 0x0000000aUL, }, { 0x0000000aUL, }, },
+	{  0,  0, 32, { 0xdadadeadUL, }, { 0xdadadeadUL, }, },
+	{  0,  3,  8, { 0x000000aaUL, }, { 0x00000015UL, }, },
+	{  3,  3,  8, { 0x000000aaUL, }, { 0x00000012UL, }, },
+	{  0,  1, 32, { 0xa5a5a5a5UL, }, { 0x52d2d2d2UL, }, },
+	{  0,  8, 32, { 0xdeadc0deUL, }, { 0x00deadc0UL, }, },
+	{  1,  1, 32, { 0x5a5a5a5aUL, }, { 0x2d2d2d2cUL, }, },
+	{  0, 15, 32, { 0xa5a5a5a5UL, }, { 0x00014b4bUL, }, },
+	{  0, 16, 32, { 0xa5a5a5a5UL, }, { 0x0000a5a5UL, }, },
+	{ 15, 15, 32, { 0xa5a5a5a5UL, }, { 0x000125a5UL, }, },
+	{ 15, 16, 32, { 0xa5a5a5a5UL, }, { 0x0000a5a5UL, }, },
+	{ 16, 15, 32, { 0xa5a5a5a5UL, }, { 0x0001a5a5UL, }, },
+
+	{ BITS_PER_LONG, BITS_PER_LONG, BITS_PER_LONG,
+		{ 0xa5a5a5a5UL, 0xa5a5a5a5UL, },
+		{ 0xa5a5a5a5UL, 0xa5a5a5a5UL, },
+	},
+	{ 1, BITS_PER_LONG - 1, BITS_PER_LONG,
+		{ 0xa5a5a5a5UL, 0xa5a5a5a5UL, },
+		{ 0x00000001UL, 0x00000001UL, },
+	},
+
+	{ 0, BITS_PER_LONG * 2, BITS_PER_LONG * 2 + 1,
+		{ 0xa5a5a5a5UL, 0x00000001UL, 0x00000001UL, 0x00000001UL },
+		{ 0x00000001UL, },
+	},
+	{ 16, BITS_PER_LONG * 2 + 1, BITS_PER_LONG * 2 + 1 + 16,
+		{ 0x0000ffffUL, 0x5a5a5a5aUL, 0x5a5a5a5aUL, 0x5a5a5a5aUL },
+		{ 0x2d2dffffUL, },
+	},
+};
+
+static void __init test_bitmap_cut(void)
+{
+	unsigned long b[5], *in = &b[1], *out = &b[0];	/* Partial overlap */
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(test_cut); i++) {
+		struct test_bitmap_cut *t = &test_cut[i];
+
+		memcpy(in, t->in, sizeof(t->in));
+
+		bitmap_cut(out, in, t->first, t->cut, t->nbits);
+
+		expect_eq_bitmap(t->expected, out, t->nbits);
+	}
+}
+
 static void __init selftest(void)
 {
 	test_zero_clear();
@@ -618,11 +628,10 @@ static void __init selftest(void)
 	test_replace();
 	test_bitmap_arr32();
 	test_bitmap_parse();
-	test_bitmap_parse_user();
 	test_bitmap_parselist();
-	test_bitmap_parselist_user();
 	test_mem_optimisations();
 	test_for_each_set_clump8();
+	test_bitmap_cut();
 }
 
 KSTM_MODULE_LOADERS(test_bitmap);
diff --git a/lib/test_bitops.c b/lib/test_bitops.c
index ced25e3a779b..471141ddd691 100644
--- a/lib/test_bitops.c
+++ b/lib/test_bitops.c
@@ -52,9 +52,9 @@ static unsigned long order_comb_long[][2] = {
 
 static int __init test_bitops_startup(void)
 {
-	int i;
+	int i, bit_set;
 
-	pr_warn("Loaded test module\n");
+	pr_info("Starting bitops test\n");
 	set_bit(BITOPS_4, g_bitmap);
 	set_bit(BITOPS_7, g_bitmap);
 	set_bit(BITOPS_11, g_bitmap);
@@ -81,12 +81,8 @@ static int __init test_bitops_startup(void)
 				       order_comb_long[i][0]);
 	}
 #endif
-	return 0;
-}
 
-static void __exit test_bitops_unstartup(void)
-{
-	int bit_set;
+	barrier();
 
 	clear_bit(BITOPS_4, g_bitmap);
 	clear_bit(BITOPS_7, g_bitmap);
@@ -98,7 +94,13 @@ static void __exit test_bitops_unstartup(void)
 	if (bit_set != BITOPS_LAST)
 		pr_err("ERROR: FOUND SET BIT %d\n", bit_set);
 
-	pr_warn("Unloaded test module\n");
+	pr_info("Completed bitops test\n");
+
+	return 0;
+}
+
+static void __exit test_bitops_unstartup(void)
+{
 }
 
 module_init(test_bitops_startup);
diff --git a/lib/test_bits.c b/lib/test_bits.c
new file mode 100644
index 000000000000..c9368a2314e7
--- /dev/null
+++ b/lib/test_bits.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Test cases for functions and macros in bits.h
+ */
+
+#include <kunit/test.h>
+#include <linux/bits.h>
+
+
+static void genmask_test(struct kunit *test)
+{
+	KUNIT_EXPECT_EQ(test, 1ul, GENMASK(0, 0));
+	KUNIT_EXPECT_EQ(test, 3ul, GENMASK(1, 0));
+	KUNIT_EXPECT_EQ(test, 6ul, GENMASK(2, 1));
+	KUNIT_EXPECT_EQ(test, 0xFFFFFFFFul, GENMASK(31, 0));
+
+#ifdef TEST_GENMASK_FAILURES
+	/* these should fail compilation */
+	GENMASK(0, 1);
+	GENMASK(0, 10);
+	GENMASK(9, 10);
+#endif
+
+
+}
+
+static void genmask_ull_test(struct kunit *test)
+{
+	KUNIT_EXPECT_EQ(test, 1ull, GENMASK_ULL(0, 0));
+	KUNIT_EXPECT_EQ(test, 3ull, GENMASK_ULL(1, 0));
+	KUNIT_EXPECT_EQ(test, 0x000000ffffe00000ull, GENMASK_ULL(39, 21));
+	KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, GENMASK_ULL(63, 0));
+
+#ifdef TEST_GENMASK_FAILURES
+	/* these should fail compilation */
+	GENMASK_ULL(0, 1);
+	GENMASK_ULL(0, 10);
+	GENMASK_ULL(9, 10);
+#endif
+}
+
+static void genmask_input_check_test(struct kunit *test)
+{
+	unsigned int x, y;
+	int z, w;
+
+	/* Unknown input */
+	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(x, 0));
+	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(0, x));
+	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(x, y));
+
+	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(z, 0));
+	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(0, z));
+	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(z, w));
+
+	/* Valid input */
+	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(1, 1));
+	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(39, 21));
+}
+
+
+static struct kunit_case bits_test_cases[] = {
+	KUNIT_CASE(genmask_test),
+	KUNIT_CASE(genmask_ull_test),
+	KUNIT_CASE(genmask_input_check_test),
+	{}
+};
+
+static struct kunit_suite bits_test_suite = {
+	.name = "bits-test",
+	.test_cases = bits_test_cases,
+};
+kunit_test_suite(bits_test_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index a5fddf9ebcb7..ca7d635bccd9 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5275,31 +5275,21 @@ static struct bpf_test tests[] = {
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Ctx heavy transformations",
 		{ },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
-		CLASSIC | FLAG_EXPECTED_FAIL,
-#else
 		CLASSIC,
-#endif
 		{ },
 		{
 			{  1, SKB_VLAN_PRESENT },
 			{ 10, SKB_VLAN_PRESENT }
 		},
 		.fill_helper = bpf_fill_maxinsns6,
-		.expected_errcode = -ENOTSUPP,
 	},
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Call heavy transformations",
 		{ },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
-		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
-#else
 		CLASSIC | FLAG_NO_DATA,
-#endif
 		{ },
 		{ { 1, 0 }, { 10, 0 } },
 		.fill_helper = bpf_fill_maxinsns7,
-		.expected_errcode = -ENOTSUPP,
 	},
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Jump heavy test",
@@ -5350,28 +5340,18 @@ static struct bpf_test tests[] = {
 	{
 		"BPF_MAXINSNS: exec all MSH",
 		{ },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
-		CLASSIC | FLAG_EXPECTED_FAIL,
-#else
 		CLASSIC,
-#endif
 		{ 0xfa, 0xfb, 0xfc, 0xfd, },
 		{ { 4, 0xababab83 } },
 		.fill_helper = bpf_fill_maxinsns13,
-		.expected_errcode = -ENOTSUPP,
 	},
 	{
 		"BPF_MAXINSNS: ld_abs+get_processor_id",
 		{ },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
-		CLASSIC | FLAG_EXPECTED_FAIL,
-#else
 		CLASSIC,
-#endif
 		{ },
 		{ { 1, 0xbee } },
 		.fill_helper = bpf_fill_ld_abs_get_processor_id,
-		.expected_errcode = -ENOTSUPP,
 	},
 	/*
 	 * LD_IND / LD_ABS on fragmented SKBs
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 9fee2b93a8d1..b6fe89add9fe 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -26,6 +26,8 @@
 #include <linux/vmalloc.h>
 #include <linux/efi_embedded_fw.h>
 
+MODULE_IMPORT_NS(TEST_FIRMWARE);
+
 #define TEST_FIRMWARE_NAME	"test-firmware.bin"
 #define TEST_FIRMWARE_NUM_REQS	4
 #define TEST_FIRMWARE_BUF_SIZE	SZ_1K
@@ -50,6 +52,9 @@ struct test_batched_req {
  * @name: the name of the firmware file to look for
  * @into_buf: when the into_buf is used if this is true
  *	request_firmware_into_buf() will be used instead.
+ * @buf_size: size of buf to allocate when into_buf is true
+ * @file_offset: file offset to request when calling request_firmware_into_buf
+ * @partial: partial read opt when calling request_firmware_into_buf
  * @sync_direct: when the sync trigger is used if this is true
  *	request_firmware_direct() will be used instead.
  * @send_uevent: whether or not to send a uevent for async requests
@@ -89,6 +94,9 @@ struct test_batched_req {
 struct test_config {
 	char *name;
 	bool into_buf;
+	size_t buf_size;
+	size_t file_offset;
+	bool partial;
 	bool sync_direct;
 	bool send_uevent;
 	u8 num_requests;
@@ -183,6 +191,9 @@ static int __test_firmware_config_init(void)
 	test_fw_config->num_requests = TEST_FIRMWARE_NUM_REQS;
 	test_fw_config->send_uevent = true;
 	test_fw_config->into_buf = false;
+	test_fw_config->buf_size = TEST_FIRMWARE_BUF_SIZE;
+	test_fw_config->file_offset = 0;
+	test_fw_config->partial = false;
 	test_fw_config->sync_direct = false;
 	test_fw_config->req_firmware = request_firmware;
 	test_fw_config->test_result = 0;
@@ -236,28 +247,35 @@ static ssize_t config_show(struct device *dev,
 			dev_name(dev));
 
 	if (test_fw_config->name)
-		len += scnprintf(buf+len, PAGE_SIZE - len,
+		len += scnprintf(buf + len, PAGE_SIZE - len,
 				"name:\t%s\n",
 				test_fw_config->name);
 	else
-		len += scnprintf(buf+len, PAGE_SIZE - len,
+		len += scnprintf(buf + len, PAGE_SIZE - len,
 				"name:\tEMTPY\n");
 
-	len += scnprintf(buf+len, PAGE_SIZE - len,
+	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"num_requests:\t%u\n", test_fw_config->num_requests);
 
-	len += scnprintf(buf+len, PAGE_SIZE - len,
+	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"send_uevent:\t\t%s\n",
 			test_fw_config->send_uevent ?
 			"FW_ACTION_HOTPLUG" :
 			"FW_ACTION_NOHOTPLUG");
-	len += scnprintf(buf+len, PAGE_SIZE - len,
+	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"into_buf:\t\t%s\n",
 			test_fw_config->into_buf ? "true" : "false");
-	len += scnprintf(buf+len, PAGE_SIZE - len,
+	len += scnprintf(buf + len, PAGE_SIZE - len,
+			"buf_size:\t%zu\n", test_fw_config->buf_size);
+	len += scnprintf(buf + len, PAGE_SIZE - len,
+			"file_offset:\t%zu\n", test_fw_config->file_offset);
+	len += scnprintf(buf + len, PAGE_SIZE - len,
+			"partial:\t\t%s\n",
+			test_fw_config->partial ? "true" : "false");
+	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"sync_direct:\t\t%s\n",
 			test_fw_config->sync_direct ? "true" : "false");
-	len += scnprintf(buf+len, PAGE_SIZE - len,
+	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"read_fw_idx:\t%u\n", test_fw_config->read_fw_idx);
 
 	mutex_unlock(&test_fw_mutex);
@@ -315,6 +333,30 @@ static ssize_t test_dev_config_show_bool(char *buf, bool val)
 	return snprintf(buf, PAGE_SIZE, "%d\n", val);
 }
 
+static int test_dev_config_update_size_t(const char *buf,
+					 size_t size,
+					 size_t *cfg)
+{
+	int ret;
+	long new;
+
+	ret = kstrtol(buf, 10, &new);
+	if (ret)
+		return ret;
+
+	mutex_lock(&test_fw_mutex);
+	*(size_t *)cfg = new;
+	mutex_unlock(&test_fw_mutex);
+
+	/* Always return full write size even if we didn't consume all */
+	return size;
+}
+
+static ssize_t test_dev_config_show_size_t(char *buf, size_t val)
+{
+	return snprintf(buf, PAGE_SIZE, "%zu\n", val);
+}
+
 static ssize_t test_dev_config_show_int(char *buf, int val)
 {
 	return snprintf(buf, PAGE_SIZE, "%d\n", val);
@@ -322,18 +364,15 @@ static ssize_t test_dev_config_show_int(char *buf, int val)
 
 static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
 {
+	u8 val;
 	int ret;
-	long new;
 
-	ret = kstrtol(buf, 10, &new);
+	ret = kstrtou8(buf, 10, &val);
 	if (ret)
 		return ret;
 
-	if (new > U8_MAX)
-		return -EINVAL;
-
 	mutex_lock(&test_fw_mutex);
-	*(u8 *)cfg = new;
+	*(u8 *)cfg = val;
 	mutex_unlock(&test_fw_mutex);
 
 	/* Always return full write size even if we didn't consume all */
@@ -400,6 +439,83 @@ static ssize_t config_into_buf_show(struct device *dev,
 }
 static DEVICE_ATTR_RW(config_into_buf);
 
+static ssize_t config_buf_size_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	int rc;
+
+	mutex_lock(&test_fw_mutex);
+	if (test_fw_config->reqs) {
+		pr_err("Must call release_all_firmware prior to changing config\n");
+		rc = -EINVAL;
+		mutex_unlock(&test_fw_mutex);
+		goto out;
+	}
+	mutex_unlock(&test_fw_mutex);
+
+	rc = test_dev_config_update_size_t(buf, count,
+					   &test_fw_config->buf_size);
+
+out:
+	return rc;
+}
+
+static ssize_t config_buf_size_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	return test_dev_config_show_size_t(buf, test_fw_config->buf_size);
+}
+static DEVICE_ATTR_RW(config_buf_size);
+
+static ssize_t config_file_offset_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	int rc;
+
+	mutex_lock(&test_fw_mutex);
+	if (test_fw_config->reqs) {
+		pr_err("Must call release_all_firmware prior to changing config\n");
+		rc = -EINVAL;
+		mutex_unlock(&test_fw_mutex);
+		goto out;
+	}
+	mutex_unlock(&test_fw_mutex);
+
+	rc = test_dev_config_update_size_t(buf, count,
+					   &test_fw_config->file_offset);
+
+out:
+	return rc;
+}
+
+static ssize_t config_file_offset_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	return test_dev_config_show_size_t(buf, test_fw_config->file_offset);
+}
+static DEVICE_ATTR_RW(config_file_offset);
+
+static ssize_t config_partial_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	return test_dev_config_update_bool(buf,
+					   count,
+					   &test_fw_config->partial);
+}
+
+static ssize_t config_partial_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	return test_dev_config_show_bool(buf, test_fw_config->partial);
+}
+static DEVICE_ATTR_RW(config_partial);
+
 static ssize_t config_sync_direct_store(struct device *dev,
 					struct device_attribute *attr,
 					const char *buf, size_t count)
@@ -489,6 +605,9 @@ out:
 static DEVICE_ATTR_WO(trigger_request);
 
 #ifdef CONFIG_EFI_EMBEDDED_FIRMWARE
+extern struct list_head efi_embedded_fw_list;
+extern bool efi_embedded_fw_checked;
+
 static ssize_t trigger_request_platform_store(struct device *dev,
 					      struct device_attribute *attr,
 					      const char *buf, size_t count)
@@ -501,6 +620,7 @@ static ssize_t trigger_request_platform_store(struct device *dev,
 	};
 	struct efi_embedded_fw efi_embedded_fw;
 	const struct firmware *firmware = NULL;
+	bool saved_efi_embedded_fw_checked;
 	char *name;
 	int rc;
 
@@ -513,6 +633,8 @@ static ssize_t trigger_request_platform_store(struct device *dev,
 	efi_embedded_fw.data = (void *)test_data;
 	efi_embedded_fw.length = sizeof(test_data);
 	list_add(&efi_embedded_fw.list, &efi_embedded_fw_list);
+	saved_efi_embedded_fw_checked = efi_embedded_fw_checked;
+	efi_embedded_fw_checked = true;
 
 	pr_info("loading '%s'\n", name);
 	rc = firmware_request_platform(&firmware, name, dev);
@@ -530,6 +652,7 @@ static ssize_t trigger_request_platform_store(struct device *dev,
 	rc = count;
 
 out:
+	efi_embedded_fw_checked = saved_efi_embedded_fw_checked;
 	release_firmware(firmware);
 	list_del(&efi_embedded_fw.list);
 	kfree(name);
@@ -650,11 +773,21 @@ static int test_fw_run_batch_request(void *data)
 		if (!test_buf)
 			return -ENOSPC;
 
-		req->rc = request_firmware_into_buf(&req->fw,
-						    req->name,
-						    req->dev,
-						    test_buf,
-						    TEST_FIRMWARE_BUF_SIZE);
+		if (test_fw_config->partial)
+			req->rc = request_partial_firmware_into_buf
+						(&req->fw,
+						 req->name,
+						 req->dev,
+						 test_buf,
+						 test_fw_config->buf_size,
+						 test_fw_config->file_offset);
+		else
+			req->rc = request_firmware_into_buf
+						(&req->fw,
+						 req->name,
+						 req->dev,
+						 test_buf,
+						 test_fw_config->buf_size);
 		if (!req->fw)
 			kfree(test_buf);
 	} else {
@@ -927,6 +1060,9 @@ static struct attribute *test_dev_attrs[] = {
 	TEST_FW_DEV_ATTR(config_name),
 	TEST_FW_DEV_ATTR(config_num_requests),
 	TEST_FW_DEV_ATTR(config_into_buf),
+	TEST_FW_DEV_ATTR(config_buf_size),
+	TEST_FW_DEV_ATTR(config_file_offset),
+	TEST_FW_DEV_ATTR(config_partial),
 	TEST_FW_DEV_ATTR(config_sync_direct),
 	TEST_FW_DEV_ATTR(config_send_uevent),
 	TEST_FW_DEV_ATTR(config_read_fw_idx),
diff --git a/lib/test_fpu.c b/lib/test_fpu.c
new file mode 100644
index 000000000000..c33764aa3eb8
--- /dev/null
+++ b/lib/test_fpu.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Test cases for using floating point operations inside a kernel module.
+ *
+ * This tests kernel_fpu_begin() and kernel_fpu_end() functions, especially
+ * when userland has modified the floating point control registers. The kernel
+ * state might depend on the state set by the userland thread that was active
+ * before a syscall.
+ *
+ * To facilitate the test, this module registers file
+ * /sys/kernel/debug/selftest_helpers/test_fpu, which when read causes a
+ * sequence of floating point operations. If the operations fail, either the
+ * read returns error status or the kernel crashes.
+ * If the operations succeed, the read returns "1\n".
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <asm/fpu/api.h>
+
+static int test_fpu(void)
+{
+	/*
+	 * This sequence of operations tests that rounding mode is
+	 * to nearest and that denormal numbers are supported.
+	 * Volatile variables are used to avoid compiler optimizing
+	 * the calculations away.
+	 */
+	volatile double a, b, c, d, e, f, g;
+
+	a = 4.0;
+	b = 1e-15;
+	c = 1e-310;
+
+	/* Sets precision flag */
+	d = a + b;
+
+	/* Result depends on rounding mode */
+	e = a + b / 2;
+
+	/* Denormal and very large values */
+	f = b / c;
+
+	/* Depends on denormal support */
+	g = a + c * f;
+
+	if (d > a && e > a && g > a)
+		return 0;
+	else
+		return -EINVAL;
+}
+
+static int test_fpu_get(void *data, u64 *val)
+{
+	int status = -EINVAL;
+
+	kernel_fpu_begin();
+	status = test_fpu();
+	kernel_fpu_end();
+
+	*val = 1;
+	return status;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(test_fpu_fops, test_fpu_get, NULL, "%lld\n");
+static struct dentry *selftest_dir;
+
+static int __init test_fpu_init(void)
+{
+	selftest_dir = debugfs_create_dir("selftest_helpers", NULL);
+	if (!selftest_dir)
+		return -ENOMEM;
+
+	debugfs_create_file("test_fpu", 0444, selftest_dir, NULL,
+			    &test_fpu_fops);
+
+	return 0;
+}
+
+static void __exit test_fpu_exit(void)
+{
+	debugfs_remove(selftest_dir);
+}
+
+module_init(test_fpu_init);
+module_exit(test_fpu_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/test_free_pages.c b/lib/test_free_pages.c
new file mode 100644
index 000000000000..25ae1ac2624a
--- /dev/null
+++ b/lib/test_free_pages.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * test_free_pages.c: Check that free_pages() doesn't leak memory
+ * Copyright (c) 2020 Oracle
+ * Author: Matthew Wilcox <willy@infradead.org>
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+static void test_free_pages(gfp_t gfp)
+{
+	unsigned int i;
+
+	for (i = 0; i < 1000 * 1000; i++) {
+		unsigned long addr = __get_free_pages(gfp, 3);
+		struct page *page = virt_to_page(addr);
+
+		/* Simulate page cache getting a speculative reference */
+		get_page(page);
+		free_pages(addr, 3);
+		put_page(page);
+	}
+}
+
+static int m_in(void)
+{
+	pr_info("Testing with GFP_KERNEL\n");
+	test_free_pages(GFP_KERNEL);
+	pr_info("Testing with GFP_KERNEL | __GFP_COMP\n");
+	test_free_pages(GFP_KERNEL | __GFP_COMP);
+	pr_info("Test completed\n");
+
+	return 0;
+}
+
+static void m_ex(void)
+{
+}
+
+module_init(m_in);
+module_exit(m_ex);
+MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>");
+MODULE_LICENSE("GPL");
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 28528285942c..80a78877bd93 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -36,7 +36,6 @@
 static const struct dev_pagemap_ops dmirror_devmem_ops;
 static const struct mmu_interval_notifier_ops dmirror_min_ops;
 static dev_t dmirror_dev;
-static struct page *dmirror_zero_page;
 
 struct dmirror_device;
 
@@ -214,6 +213,14 @@ static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni,
 {
 	struct dmirror *dmirror = container_of(mni, struct dmirror, notifier);
 
+	/*
+	 * Ignore invalidation callbacks for device private pages since
+	 * the invalidation is handled as part of the migration process.
+	 */
+	if (range->event == MMU_NOTIFY_MIGRATE &&
+	    range->migrate_pgmap_owner == dmirror->mdevice)
+		return true;
+
 	if (mmu_notifier_range_blockable(range))
 		mutex_lock(&dmirror->mutex);
 	else if (!mutex_trylock(&dmirror->mutex))
@@ -452,6 +459,22 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
 	unsigned long pfn_last;
 	void *ptr;
 
+	devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
+	if (!devmem)
+		return false;
+
+	res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
+				      "hmm_dmirror");
+	if (IS_ERR(res))
+		goto err_devmem;
+
+	devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
+	devmem->pagemap.range.start = res->start;
+	devmem->pagemap.range.end = res->end;
+	devmem->pagemap.nr_range = 1;
+	devmem->pagemap.ops = &dmirror_devmem_ops;
+	devmem->pagemap.owner = mdevice;
+
 	mutex_lock(&mdevice->devmem_lock);
 
 	if (mdevice->devmem_count == mdevice->devmem_capacity) {
@@ -464,33 +487,18 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
 				sizeof(new_chunks[0]) * new_capacity,
 				GFP_KERNEL);
 		if (!new_chunks)
-			goto err;
+			goto err_release;
 		mdevice->devmem_capacity = new_capacity;
 		mdevice->devmem_chunks = new_chunks;
 	}
 
-	res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
-					"hmm_dmirror");
-	if (IS_ERR(res))
-		goto err;
-
-	devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
-	if (!devmem)
-		goto err_release;
-
-	devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
-	devmem->pagemap.res = *res;
-	devmem->pagemap.ops = &dmirror_devmem_ops;
-	devmem->pagemap.owner = mdevice;
-
 	ptr = memremap_pages(&devmem->pagemap, numa_node_id());
 	if (IS_ERR(ptr))
-		goto err_free;
+		goto err_release;
 
 	devmem->mdevice = mdevice;
-	pfn_first = devmem->pagemap.res.start >> PAGE_SHIFT;
-	pfn_last = pfn_first +
-		(resource_size(&devmem->pagemap.res) >> PAGE_SHIFT);
+	pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT;
+	pfn_last = pfn_first + (range_len(&devmem->pagemap.range) >> PAGE_SHIFT);
 	mdevice->devmem_chunks[mdevice->devmem_count++] = devmem;
 
 	mutex_unlock(&mdevice->devmem_lock);
@@ -517,13 +525,12 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
 
 	return true;
 
-err_free:
-	kfree(devmem);
 err_release:
-	release_mem_region(devmem->pagemap.res.start,
-			   resource_size(&devmem->pagemap.res));
-err:
 	mutex_unlock(&mdevice->devmem_lock);
+	release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range));
+err_devmem:
+	kfree(devmem);
+
 	return false;
 }
 
@@ -586,15 +593,6 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
 		 */
 		spage = migrate_pfn_to_page(*src);
 
-		/*
-		 * Don't migrate device private pages from our own driver or
-		 * others. For our own we would do a device private memory copy
-		 * not a migration and for others, we would need to fault the
-		 * other device's page into system memory first.
-		 */
-		if (spage && is_zone_device_page(spage))
-			continue;
-
 		dpage = dmirror_devmem_alloc_page(mdevice);
 		if (!dpage)
 			continue;
@@ -703,7 +701,8 @@ static int dmirror_migrate(struct dmirror *dmirror,
 		args.dst = dst_pfns;
 		args.start = addr;
 		args.end = next;
-		args.src_owner = NULL;
+		args.pgmap_owner = dmirror->mdevice;
+		args.flags = MIGRATE_VMA_SELECT_SYSTEM;
 		ret = migrate_vma_setup(&args);
 		if (ret)
 			goto out;
@@ -767,6 +766,10 @@ static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range,
 		*perm |= HMM_DMIRROR_PROT_WRITE;
 	else
 		*perm |= HMM_DMIRROR_PROT_READ;
+	if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PMD_SHIFT)
+		*perm |= HMM_DMIRROR_PROT_PMD;
+	else if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PUD_SHIFT)
+		*perm |= HMM_DMIRROR_PROT_PUD;
 }
 
 static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni,
@@ -988,7 +991,7 @@ static void dmirror_devmem_free(struct page *page)
 }
 
 static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
-						struct dmirror_device *mdevice)
+						      struct dmirror *dmirror)
 {
 	const unsigned long *src = args->src;
 	unsigned long *dst = args->dst;
@@ -1010,6 +1013,7 @@ static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
 			continue;
 
 		lock_page(dpage);
+		xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
 		copy_highpage(dpage, spage);
 		*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
 		if (*src & MIGRATE_PFN_WRITE)
@@ -1018,15 +1022,6 @@ static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
 	return 0;
 }
 
-static void dmirror_devmem_fault_finalize_and_map(struct migrate_vma *args,
-						  struct dmirror *dmirror)
-{
-	/* Invalidate the device's page table mapping. */
-	mutex_lock(&dmirror->mutex);
-	dmirror_do_update(dmirror, args->start, args->end);
-	mutex_unlock(&dmirror->mutex);
-}
-
 static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
 {
 	struct migrate_vma args;
@@ -1050,16 +1045,21 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
 	args.end = args.start + PAGE_SIZE;
 	args.src = &src_pfns;
 	args.dst = &dst_pfns;
-	args.src_owner = dmirror->mdevice;
+	args.pgmap_owner = dmirror->mdevice;
+	args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
 
 	if (migrate_vma_setup(&args))
 		return VM_FAULT_SIGBUS;
 
-	ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror->mdevice);
+	ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
 	if (ret)
 		return ret;
 	migrate_vma_pages(&args);
-	dmirror_devmem_fault_finalize_and_map(&args, dmirror);
+	/*
+	 * No device finalize step is needed since
+	 * dmirror_devmem_fault_alloc_and_copy() will have already
+	 * invalidated the device page table.
+	 */
 	migrate_vma_finalize(&args);
 	return 0;
 }
@@ -1100,8 +1100,8 @@ static void dmirror_device_remove(struct dmirror_device *mdevice)
 				mdevice->devmem_chunks[i];
 
 			memunmap_pages(&devmem->pagemap);
-			release_mem_region(devmem->pagemap.res.start,
-					   resource_size(&devmem->pagemap.res));
+			release_mem_region(devmem->pagemap.range.start,
+					   range_len(&devmem->pagemap.range));
 			kfree(devmem);
 		}
 		kfree(mdevice->devmem_chunks);
@@ -1126,17 +1126,6 @@ static int __init hmm_dmirror_init(void)
 			goto err_chrdev;
 	}
 
-	/*
-	 * Allocate a zero page to simulate a reserved page of device private
-	 * memory which is always zero. The zero_pfn page isn't used just to
-	 * make the code here simpler (i.e., we need a struct page for it).
-	 */
-	dmirror_zero_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
-	if (!dmirror_zero_page) {
-		ret = -ENOMEM;
-		goto err_chrdev;
-	}
-
 	pr_info("HMM test module loaded. This is only for testing HMM.\n");
 	return 0;
 
@@ -1152,8 +1141,6 @@ static void __exit hmm_dmirror_exit(void)
 {
 	int id;
 
-	if (dmirror_zero_page)
-		__free_page(dmirror_zero_page);
 	for (id = 0; id < DMIRROR_NDEVICES; id++)
 		dmirror_device_remove(dmirror_devices + id);
 	unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h
index 67b3b2e6ff5d..670b4ef2a5b6 100644
--- a/lib/test_hmm_uapi.h
+++ b/lib/test_hmm_uapi.h
@@ -40,6 +40,8 @@ struct hmm_dmirror_cmd {
  * HMM_DMIRROR_PROT_NONE: unpopulated PTE or PTE with no access
  * HMM_DMIRROR_PROT_READ: read-only PTE
  * HMM_DMIRROR_PROT_WRITE: read/write PTE
+ * HMM_DMIRROR_PROT_PMD: PMD sized page is fully mapped by same permissions
+ * HMM_DMIRROR_PROT_PUD: PUD sized page is fully mapped by same permissions
  * HMM_DMIRROR_PROT_ZERO: special read-only zero page
  * HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL: Migrated device private page on the
  *					device the ioctl() is made
@@ -51,6 +53,8 @@ enum {
 	HMM_DMIRROR_PROT_NONE			= 0x00,
 	HMM_DMIRROR_PROT_READ			= 0x01,
 	HMM_DMIRROR_PROT_WRITE			= 0x02,
+	HMM_DMIRROR_PROT_PMD			= 0x04,
+	HMM_DMIRROR_PROT_PUD			= 0x08,
 	HMM_DMIRROR_PROT_ZERO			= 0x10,
 	HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL	= 0x20,
 	HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE	= 0x30,
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index dc2c6a51d11a..2947274cc2d3 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -5,8 +5,6 @@
  * Author: Andrey Ryabinin <a.ryabinin@samsung.com>
  */
 
-#define pr_fmt(fmt) "kasan test: %s " fmt, __func__
-
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/kasan.h>
@@ -23,412 +21,404 @@
 
 #include <asm/page.h>
 
+#include <kunit/test.h>
+
+#include "../mm/kasan/kasan.h"
+
+#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE)
+
 /*
  * We assign some test results to these globals to make sure the tests
  * are not eliminated as dead code.
  */
 
-int kasan_int_result;
 void *kasan_ptr_result;
+int kasan_int_result;
 
-/*
- * Note: test functions are marked noinline so that their names appear in
- * reports.
- */
+static struct kunit_resource resource;
+static struct kunit_kasan_expectation fail_data;
+static bool multishot;
 
-static noinline void __init kmalloc_oob_right(void)
+static int kasan_test_init(struct kunit *test)
+{
+	/*
+	 * Temporarily enable multi-shot mode and set panic_on_warn=0.
+	 * Otherwise, we'd only get a report for the first case.
+	 */
+	multishot = kasan_save_enable_multi_shot();
+
+	return 0;
+}
+
+static void kasan_test_exit(struct kunit *test)
+{
+	kasan_restore_multi_shot(multishot);
+}
+
+/**
+ * KUNIT_EXPECT_KASAN_FAIL() - Causes a test failure when the expression does
+ * not cause a KASAN error. This uses a KUnit resource named "kasan_data." Do
+ * Do not use this name for a KUnit resource outside here.
+ *
+ */
+#define KUNIT_EXPECT_KASAN_FAIL(test, condition) do { \
+	fail_data.report_expected = true; \
+	fail_data.report_found = false; \
+	kunit_add_named_resource(test, \
+				NULL, \
+				NULL, \
+				&resource, \
+				"kasan_data", &fail_data); \
+	condition; \
+	KUNIT_EXPECT_EQ(test, \
+			fail_data.report_expected, \
+			fail_data.report_found); \
+} while (0)
+
+static void kmalloc_oob_right(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 123;
 
-	pr_info("out-of-bounds to right\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	ptr[size] = 'x';
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 'x');
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_oob_left(void)
+static void kmalloc_oob_left(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 15;
 
-	pr_info("out-of-bounds to left\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	*ptr = *(ptr - 1);
+	KUNIT_EXPECT_KASAN_FAIL(test, *ptr = *(ptr - 1));
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_node_oob_right(void)
+static void kmalloc_node_oob_right(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 4096;
 
-	pr_info("kmalloc_node(): out-of-bounds to right\n");
 	ptr = kmalloc_node(size, GFP_KERNEL, 0);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	ptr[size] = 0;
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0);
 	kfree(ptr);
 }
 
-#ifdef CONFIG_SLUB
-static noinline void __init kmalloc_pagealloc_oob_right(void)
+static void kmalloc_pagealloc_oob_right(struct kunit *test)
 {
 	char *ptr;
 	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
 
+	if (!IS_ENABLED(CONFIG_SLUB)) {
+		kunit_info(test, "CONFIG_SLUB is not enabled.");
+		return;
+	}
+
 	/* Allocate a chunk that does not fit into a SLUB cache to trigger
 	 * the page allocator fallback.
 	 */
-	pr_info("kmalloc pagealloc allocation: out-of-bounds to right\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	ptr[size] = 0;
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 0);
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_pagealloc_uaf(void)
+static void kmalloc_pagealloc_uaf(struct kunit *test)
 {
 	char *ptr;
 	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
 
-	pr_info("kmalloc pagealloc allocation: use-after-free\n");
-	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
+	if (!IS_ENABLED(CONFIG_SLUB)) {
+		kunit_info(test, "CONFIG_SLUB is not enabled.");
 		return;
 	}
 
+	ptr = kmalloc(size, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
 	kfree(ptr);
-	ptr[0] = 0;
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = 0);
 }
 
-static noinline void __init kmalloc_pagealloc_invalid_free(void)
+static void kmalloc_pagealloc_invalid_free(struct kunit *test)
 {
 	char *ptr;
 	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
 
-	pr_info("kmalloc pagealloc allocation: invalid-free\n");
-	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
+	if (!IS_ENABLED(CONFIG_SLUB)) {
+		kunit_info(test, "CONFIG_SLUB is not enabled.");
 		return;
 	}
 
-	kfree(ptr + 1);
+	ptr = kmalloc(size, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
+	KUNIT_EXPECT_KASAN_FAIL(test, kfree(ptr + 1));
 }
-#endif
 
-static noinline void __init kmalloc_large_oob_right(void)
+static void kmalloc_large_oob_right(struct kunit *test)
 {
 	char *ptr;
 	size_t size = KMALLOC_MAX_CACHE_SIZE - 256;
 	/* Allocate a chunk that is large enough, but still fits into a slab
 	 * and does not trigger the page allocator fallback in SLUB.
 	 */
-	pr_info("kmalloc large allocation: out-of-bounds to right\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	ptr[size] = 0;
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0);
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_oob_krealloc_more(void)
+static void kmalloc_oob_krealloc_more(struct kunit *test)
 {
 	char *ptr1, *ptr2;
 	size_t size1 = 17;
 	size_t size2 = 19;
 
-	pr_info("out-of-bounds after krealloc more\n");
 	ptr1 = kmalloc(size1, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
+
 	ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
-	if (!ptr1 || !ptr2) {
-		pr_err("Allocation failed\n");
-		kfree(ptr1);
-		kfree(ptr2);
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
 
-	ptr2[size2] = 'x';
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2 + OOB_TAG_OFF] = 'x');
 	kfree(ptr2);
 }
 
-static noinline void __init kmalloc_oob_krealloc_less(void)
+static void kmalloc_oob_krealloc_less(struct kunit *test)
 {
 	char *ptr1, *ptr2;
 	size_t size1 = 17;
 	size_t size2 = 15;
 
-	pr_info("out-of-bounds after krealloc less\n");
 	ptr1 = kmalloc(size1, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
+
 	ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
-	if (!ptr1 || !ptr2) {
-		pr_err("Allocation failed\n");
-		kfree(ptr1);
-		return;
-	}
-	ptr2[size2] = 'x';
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
+
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2 + OOB_TAG_OFF] = 'x');
 	kfree(ptr2);
 }
 
-static noinline void __init kmalloc_oob_16(void)
+static void kmalloc_oob_16(struct kunit *test)
 {
 	struct {
 		u64 words[2];
 	} *ptr1, *ptr2;
 
-	pr_info("kmalloc out-of-bounds for 16-bytes access\n");
-	ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL);
-	ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL);
-	if (!ptr1 || !ptr2) {
-		pr_err("Allocation failed\n");
-		kfree(ptr1);
-		kfree(ptr2);
+	/* This test is specifically crafted for the generic mode. */
+	if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
+		kunit_info(test, "CONFIG_KASAN_GENERIC required\n");
 		return;
 	}
-	*ptr1 = *ptr2;
+
+	ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
+
+	ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
+
+	KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2);
 	kfree(ptr1);
 	kfree(ptr2);
 }
 
-static noinline void __init kmalloc_oob_memset_2(void)
+static void kmalloc_uaf_16(struct kunit *test)
+{
+	struct {
+		u64 words[2];
+	} *ptr1, *ptr2;
+
+	ptr1 = kmalloc(sizeof(*ptr1), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
+
+	ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
+	kfree(ptr2);
+
+	KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2);
+	kfree(ptr1);
+}
+
+static void kmalloc_oob_memset_2(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 8;
 
-	pr_info("out-of-bounds in memset2\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	memset(ptr+7, 0, 2);
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 7 + OOB_TAG_OFF, 0, 2));
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_oob_memset_4(void)
+static void kmalloc_oob_memset_4(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 8;
 
-	pr_info("out-of-bounds in memset4\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	memset(ptr+5, 0, 4);
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 5 + OOB_TAG_OFF, 0, 4));
 	kfree(ptr);
 }
 
 
-static noinline void __init kmalloc_oob_memset_8(void)
+static void kmalloc_oob_memset_8(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 8;
 
-	pr_info("out-of-bounds in memset8\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	memset(ptr+1, 0, 8);
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 1 + OOB_TAG_OFF, 0, 8));
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_oob_memset_16(void)
+static void kmalloc_oob_memset_16(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 16;
 
-	pr_info("out-of-bounds in memset16\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	memset(ptr+1, 0, 16);
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 1 + OOB_TAG_OFF, 0, 16));
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_oob_in_memset(void)
+static void kmalloc_oob_in_memset(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 666;
 
-	pr_info("out-of-bounds in memset\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	memset(ptr, 0, size+5);
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size + 5 + OOB_TAG_OFF));
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_memmove_invalid_size(void)
+static void kmalloc_memmove_invalid_size(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 64;
 	volatile size_t invalid_size = -2;
 
-	pr_info("invalid size in memmove\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	memset((char *)ptr, 0, 64);
-	memmove((char *)ptr, (char *)ptr + 4, invalid_size);
+
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		memmove((char *)ptr, (char *)ptr + 4, invalid_size));
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_uaf(void)
+static void kmalloc_uaf(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 10;
 
-	pr_info("use-after-free\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	kfree(ptr);
-	*(ptr + 8) = 'x';
+	KUNIT_EXPECT_KASAN_FAIL(test, *(ptr + 8) = 'x');
 }
 
-static noinline void __init kmalloc_uaf_memset(void)
+static void kmalloc_uaf_memset(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 33;
 
-	pr_info("use-after-free in memset\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	kfree(ptr);
-	memset(ptr, 0, size);
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size));
 }
 
-static noinline void __init kmalloc_uaf2(void)
+static void kmalloc_uaf2(struct kunit *test)
 {
 	char *ptr1, *ptr2;
 	size_t size = 43;
 
-	pr_info("use-after-free after another kmalloc\n");
 	ptr1 = kmalloc(size, GFP_KERNEL);
-	if (!ptr1) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
 
 	kfree(ptr1);
+
 	ptr2 = kmalloc(size, GFP_KERNEL);
-	if (!ptr2) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
+
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr1[40] = 'x');
+	KUNIT_EXPECT_PTR_NE(test, ptr1, ptr2);
 
-	ptr1[40] = 'x';
-	if (ptr1 == ptr2)
-		pr_err("Could not detect use-after-free: ptr1 == ptr2\n");
 	kfree(ptr2);
 }
 
-static noinline void __init kfree_via_page(void)
+static void kfree_via_page(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 8;
 	struct page *page;
 	unsigned long offset;
 
-	pr_info("invalid-free false positive (via page)\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	page = virt_to_page(ptr);
 	offset = offset_in_page(ptr);
 	kfree(page_address(page) + offset);
 }
 
-static noinline void __init kfree_via_phys(void)
+static void kfree_via_phys(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 8;
 	phys_addr_t phys;
 
-	pr_info("invalid-free false positive (via phys)\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	phys = virt_to_phys(ptr);
 	kfree(phys_to_virt(phys));
 }
 
-static noinline void __init kmem_cache_oob(void)
+static void kmem_cache_oob(struct kunit *test)
 {
 	char *p;
 	size_t size = 200;
 	struct kmem_cache *cache = kmem_cache_create("test_cache",
 						size, 0,
 						0, NULL);
-	if (!cache) {
-		pr_err("Cache allocation failed\n");
-		return;
-	}
-	pr_info("out-of-bounds in kmem_cache_alloc\n");
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
 	p = kmem_cache_alloc(cache, GFP_KERNEL);
 	if (!p) {
-		pr_err("Allocation failed\n");
+		kunit_err(test, "Allocation failed: %s\n", __func__);
 		kmem_cache_destroy(cache);
 		return;
 	}
 
-	*p = p[size];
+	KUNIT_EXPECT_KASAN_FAIL(test, *p = p[size + OOB_TAG_OFF]);
 	kmem_cache_free(cache, p);
 	kmem_cache_destroy(cache);
 }
 
-static noinline void __init memcg_accounted_kmem_cache(void)
+static void memcg_accounted_kmem_cache(struct kunit *test)
 {
 	int i;
 	char *p;
@@ -436,12 +426,8 @@ static noinline void __init memcg_accounted_kmem_cache(void)
 	struct kmem_cache *cache;
 
 	cache = kmem_cache_create("test_cache", size, 0, SLAB_ACCOUNT, NULL);
-	if (!cache) {
-		pr_err("Cache allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
 
-	pr_info("allocate memcg accounted object\n");
 	/*
 	 * Several allocations with a delay to allow for lazy per memcg kmem
 	 * cache creation.
@@ -461,134 +447,111 @@ free_cache:
 
 static char global_array[10];
 
-static noinline void __init kasan_global_oob(void)
+static void kasan_global_oob(struct kunit *test)
 {
 	volatile int i = 3;
 	char *p = &global_array[ARRAY_SIZE(global_array) + i];
 
-	pr_info("out-of-bounds global variable\n");
-	*(volatile char *)p;
-}
-
-static noinline void __init kasan_stack_oob(void)
-{
-	char stack_array[10];
-	volatile int i = 0;
-	char *p = &stack_array[ARRAY_SIZE(stack_array) + i];
+	/* Only generic mode instruments globals. */
+	if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
+		kunit_info(test, "CONFIG_KASAN_GENERIC required");
+		return;
+	}
 
-	pr_info("out-of-bounds on stack\n");
-	*(volatile char *)p;
+	KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
 
-static noinline void __init ksize_unpoisons_memory(void)
+static void ksize_unpoisons_memory(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 123, real_size;
 
-	pr_info("ksize() unpoisons the whole allocated chunk\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 	real_size = ksize(ptr);
 	/* This access doesn't trigger an error. */
 	ptr[size] = 'x';
 	/* This one does. */
-	ptr[real_size] = 'y';
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[real_size] = 'y');
 	kfree(ptr);
 }
 
-static noinline void __init copy_user_test(void)
+static void kasan_stack_oob(struct kunit *test)
 {
-	char *kmem;
-	char __user *usermem;
-	size_t size = 10;
-	int unused;
-
-	kmem = kmalloc(size, GFP_KERNEL);
-	if (!kmem)
-		return;
+	char stack_array[10];
+	volatile int i = OOB_TAG_OFF;
+	char *p = &stack_array[ARRAY_SIZE(stack_array) + i];
 
-	usermem = (char __user *)vm_mmap(NULL, 0, PAGE_SIZE,
-			    PROT_READ | PROT_WRITE | PROT_EXEC,
-			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
-	if (IS_ERR(usermem)) {
-		pr_err("Failed to allocate user memory\n");
-		kfree(kmem);
+	if (!IS_ENABLED(CONFIG_KASAN_STACK)) {
+		kunit_info(test, "CONFIG_KASAN_STACK is not enabled");
 		return;
 	}
 
-	pr_info("out-of-bounds in copy_from_user()\n");
-	unused = copy_from_user(kmem, usermem, size + 1);
-
-	pr_info("out-of-bounds in copy_to_user()\n");
-	unused = copy_to_user(usermem, kmem, size + 1);
-
-	pr_info("out-of-bounds in __copy_from_user()\n");
-	unused = __copy_from_user(kmem, usermem, size + 1);
-
-	pr_info("out-of-bounds in __copy_to_user()\n");
-	unused = __copy_to_user(usermem, kmem, size + 1);
-
-	pr_info("out-of-bounds in __copy_from_user_inatomic()\n");
-	unused = __copy_from_user_inatomic(kmem, usermem, size + 1);
-
-	pr_info("out-of-bounds in __copy_to_user_inatomic()\n");
-	unused = __copy_to_user_inatomic(usermem, kmem, size + 1);
-
-	pr_info("out-of-bounds in strncpy_from_user()\n");
-	unused = strncpy_from_user(kmem, usermem, size + 1);
-
-	vm_munmap((unsigned long)usermem, PAGE_SIZE);
-	kfree(kmem);
+	KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
 
-static noinline void __init kasan_alloca_oob_left(void)
+static void kasan_alloca_oob_left(struct kunit *test)
 {
 	volatile int i = 10;
 	char alloca_array[i];
 	char *p = alloca_array - 1;
 
-	pr_info("out-of-bounds to left on alloca\n");
-	*(volatile char *)p;
+	/* Only generic mode instruments dynamic allocas. */
+	if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
+		kunit_info(test, "CONFIG_KASAN_GENERIC required");
+		return;
+	}
+
+	if (!IS_ENABLED(CONFIG_KASAN_STACK)) {
+		kunit_info(test, "CONFIG_KASAN_STACK is not enabled");
+		return;
+	}
+
+	KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
 
-static noinline void __init kasan_alloca_oob_right(void)
+static void kasan_alloca_oob_right(struct kunit *test)
 {
 	volatile int i = 10;
 	char alloca_array[i];
 	char *p = alloca_array + i;
 
-	pr_info("out-of-bounds to right on alloca\n");
-	*(volatile char *)p;
+	/* Only generic mode instruments dynamic allocas. */
+	if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
+		kunit_info(test, "CONFIG_KASAN_GENERIC required");
+		return;
+	}
+
+	if (!IS_ENABLED(CONFIG_KASAN_STACK)) {
+		kunit_info(test, "CONFIG_KASAN_STACK is not enabled");
+		return;
+	}
+
+	KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
 
-static noinline void __init kmem_cache_double_free(void)
+static void kmem_cache_double_free(struct kunit *test)
 {
 	char *p;
 	size_t size = 200;
 	struct kmem_cache *cache;
 
 	cache = kmem_cache_create("test_cache", size, 0, 0, NULL);
-	if (!cache) {
-		pr_err("Cache allocation failed\n");
-		return;
-	}
-	pr_info("double-free on heap object\n");
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
+
 	p = kmem_cache_alloc(cache, GFP_KERNEL);
 	if (!p) {
-		pr_err("Allocation failed\n");
+		kunit_err(test, "Allocation failed: %s\n", __func__);
 		kmem_cache_destroy(cache);
 		return;
 	}
 
 	kmem_cache_free(cache, p);
-	kmem_cache_free(cache, p);
+	KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_free(cache, p));
 	kmem_cache_destroy(cache);
 }
 
-static noinline void __init kmem_cache_invalid_free(void)
+static void kmem_cache_invalid_free(struct kunit *test)
 {
 	char *p;
 	size_t size = 200;
@@ -596,20 +559,17 @@ static noinline void __init kmem_cache_invalid_free(void)
 
 	cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU,
 				  NULL);
-	if (!cache) {
-		pr_err("Cache allocation failed\n");
-		return;
-	}
-	pr_info("invalid-free of heap object\n");
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
+
 	p = kmem_cache_alloc(cache, GFP_KERNEL);
 	if (!p) {
-		pr_err("Allocation failed\n");
+		kunit_err(test, "Allocation failed: %s\n", __func__);
 		kmem_cache_destroy(cache);
 		return;
 	}
 
 	/* Trigger invalid free, the object doesn't get freed */
-	kmem_cache_free(cache, p + 1);
+	KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_free(cache, p + 1));
 
 	/*
 	 * Properly free the object to prevent the "Objects remaining in
@@ -620,45 +580,69 @@ static noinline void __init kmem_cache_invalid_free(void)
 	kmem_cache_destroy(cache);
 }
 
-static noinline void __init kasan_memchr(void)
+static void kasan_memchr(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 24;
 
-	pr_info("out-of-bounds in memchr\n");
-	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
-	if (!ptr)
+	/* See https://bugzilla.kernel.org/show_bug.cgi?id=206337 */
+	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+		kunit_info(test,
+			"str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT");
 		return;
+	}
+
+	if (OOB_TAG_OFF)
+		size = round_up(size, OOB_TAG_OFF);
+
+	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		kasan_ptr_result = memchr(ptr, '1', size + 1));
 
-	kasan_ptr_result = memchr(ptr, '1', size + 1);
 	kfree(ptr);
 }
 
-static noinline void __init kasan_memcmp(void)
+static void kasan_memcmp(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 24;
 	int arr[9];
 
-	pr_info("out-of-bounds in memcmp\n");
-	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
-	if (!ptr)
+	/* See https://bugzilla.kernel.org/show_bug.cgi?id=206337 */
+	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+		kunit_info(test,
+			"str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT");
 		return;
+	}
 
+	if (OOB_TAG_OFF)
+		size = round_up(size, OOB_TAG_OFF);
+
+	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 	memset(arr, 0, sizeof(arr));
-	kasan_int_result = memcmp(ptr, arr, size + 1);
+
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		kasan_int_result = memcmp(ptr, arr, size+1));
 	kfree(ptr);
 }
 
-static noinline void __init kasan_strings(void)
+static void kasan_strings(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 24;
 
-	pr_info("use-after-free in strchr\n");
-	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
-	if (!ptr)
+	/* See https://bugzilla.kernel.org/show_bug.cgi?id=206337 */
+	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+		kunit_info(test,
+			"str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT");
 		return;
+	}
+
+	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	kfree(ptr);
 
@@ -669,190 +653,182 @@ static noinline void __init kasan_strings(void)
 	 * will likely point to zeroed byte.
 	 */
 	ptr += 16;
-	kasan_ptr_result = strchr(ptr, '1');
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = strchr(ptr, '1'));
 
-	pr_info("use-after-free in strrchr\n");
-	kasan_ptr_result = strrchr(ptr, '1');
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = strrchr(ptr, '1'));
 
-	pr_info("use-after-free in strcmp\n");
-	kasan_int_result = strcmp(ptr, "2");
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strcmp(ptr, "2"));
 
-	pr_info("use-after-free in strncmp\n");
-	kasan_int_result = strncmp(ptr, "2", 1);
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strncmp(ptr, "2", 1));
 
-	pr_info("use-after-free in strlen\n");
-	kasan_int_result = strlen(ptr);
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strlen(ptr));
 
-	pr_info("use-after-free in strnlen\n");
-	kasan_int_result = strnlen(ptr, 1);
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strnlen(ptr, 1));
+}
+
+static void kasan_bitops_modify(struct kunit *test, int nr, void *addr)
+{
+	KUNIT_EXPECT_KASAN_FAIL(test, set_bit(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, __set_bit(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, clear_bit(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, clear_bit_unlock(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit_unlock(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, change_bit(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, __change_bit(nr, addr));
+}
+
+static void kasan_bitops_test_and_modify(struct kunit *test, int nr, void *addr)
+{
+	KUNIT_EXPECT_KASAN_FAIL(test, test_and_set_bit(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, __test_and_set_bit(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, test_and_set_bit_lock(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, test_and_clear_bit(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, __test_and_clear_bit(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, test_and_change_bit(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, __test_and_change_bit(nr, addr));
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = test_bit(nr, addr));
+
+#if defined(clear_bit_unlock_is_negative_byte)
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result =
+				clear_bit_unlock_is_negative_byte(nr, addr));
+#endif
 }
 
-static noinline void __init kasan_bitops(void)
+static void kasan_bitops_generic(struct kunit *test)
 {
+	long *bits;
+
+	/* This test is specifically crafted for the generic mode. */
+	if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
+		kunit_info(test, "CONFIG_KASAN_GENERIC required\n");
+		return;
+	}
+
 	/*
 	 * Allocate 1 more byte, which causes kzalloc to round up to 16-bytes;
 	 * this way we do not actually corrupt other memory.
 	 */
-	long *bits = kzalloc(sizeof(*bits) + 1, GFP_KERNEL);
-	if (!bits)
-		return;
+	bits = kzalloc(sizeof(*bits) + 1, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits);
 
 	/*
 	 * Below calls try to access bit within allocated memory; however, the
 	 * below accesses are still out-of-bounds, since bitops are defined to
 	 * operate on the whole long the bit is in.
 	 */
-	pr_info("out-of-bounds in set_bit\n");
-	set_bit(BITS_PER_LONG, bits);
-
-	pr_info("out-of-bounds in __set_bit\n");
-	__set_bit(BITS_PER_LONG, bits);
-
-	pr_info("out-of-bounds in clear_bit\n");
-	clear_bit(BITS_PER_LONG, bits);
-
-	pr_info("out-of-bounds in __clear_bit\n");
-	__clear_bit(BITS_PER_LONG, bits);
-
-	pr_info("out-of-bounds in clear_bit_unlock\n");
-	clear_bit_unlock(BITS_PER_LONG, bits);
-
-	pr_info("out-of-bounds in __clear_bit_unlock\n");
-	__clear_bit_unlock(BITS_PER_LONG, bits);
-
-	pr_info("out-of-bounds in change_bit\n");
-	change_bit(BITS_PER_LONG, bits);
-
-	pr_info("out-of-bounds in __change_bit\n");
-	__change_bit(BITS_PER_LONG, bits);
+	kasan_bitops_modify(test, BITS_PER_LONG, bits);
 
 	/*
 	 * Below calls try to access bit beyond allocated memory.
 	 */
-	pr_info("out-of-bounds in test_and_set_bit\n");
-	test_and_set_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, bits);
 
-	pr_info("out-of-bounds in __test_and_set_bit\n");
-	__test_and_set_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
-
-	pr_info("out-of-bounds in test_and_set_bit_lock\n");
-	test_and_set_bit_lock(BITS_PER_LONG + BITS_PER_BYTE, bits);
-
-	pr_info("out-of-bounds in test_and_clear_bit\n");
-	test_and_clear_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	kfree(bits);
+}
 
-	pr_info("out-of-bounds in __test_and_clear_bit\n");
-	__test_and_clear_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+static void kasan_bitops_tags(struct kunit *test)
+{
+	long *bits;
 
-	pr_info("out-of-bounds in test_and_change_bit\n");
-	test_and_change_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	/* This test is specifically crafted for the tag-based mode. */
+	if (IS_ENABLED(CONFIG_KASAN_GENERIC)) {
+		kunit_info(test, "CONFIG_KASAN_SW_TAGS required\n");
+		return;
+	}
 
-	pr_info("out-of-bounds in __test_and_change_bit\n");
-	__test_and_change_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	/* Allocation size will be rounded to up granule size, which is 16. */
+	bits = kzalloc(sizeof(*bits), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits);
 
-	pr_info("out-of-bounds in test_bit\n");
-	kasan_int_result = test_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	/* Do the accesses past the 16 allocated bytes. */
+	kasan_bitops_modify(test, BITS_PER_LONG, &bits[1]);
+	kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, &bits[1]);
 
-#if defined(clear_bit_unlock_is_negative_byte)
-	pr_info("out-of-bounds in clear_bit_unlock_is_negative_byte\n");
-	kasan_int_result = clear_bit_unlock_is_negative_byte(BITS_PER_LONG +
-		BITS_PER_BYTE, bits);
-#endif
 	kfree(bits);
 }
 
-static noinline void __init kmalloc_double_kzfree(void)
+static void kmalloc_double_kzfree(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 16;
 
-	pr_info("double-free (kzfree)\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	kzfree(ptr);
-	kzfree(ptr);
+	kfree_sensitive(ptr);
+	KUNIT_EXPECT_KASAN_FAIL(test, kfree_sensitive(ptr));
 }
 
-#ifdef CONFIG_KASAN_VMALLOC
-static noinline void __init vmalloc_oob(void)
+static void vmalloc_oob(struct kunit *test)
 {
 	void *area;
 
-	pr_info("vmalloc out-of-bounds\n");
+	if (!IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+		kunit_info(test, "CONFIG_KASAN_VMALLOC is not enabled.");
+		return;
+	}
 
 	/*
 	 * We have to be careful not to hit the guard page.
 	 * The MMU will catch that and crash us.
 	 */
 	area = vmalloc(3000);
-	if (!area) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, area);
 
-	((volatile char *)area)[3100];
+	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)area)[3100]);
 	vfree(area);
 }
-#else
-static void __init vmalloc_oob(void) {}
-#endif
-
-static int __init kmalloc_tests_init(void)
-{
-	/*
-	 * Temporarily enable multi-shot mode. Otherwise, we'd only get a
-	 * report for the first case.
-	 */
-	bool multishot = kasan_save_enable_multi_shot();
-
-	kmalloc_oob_right();
-	kmalloc_oob_left();
-	kmalloc_node_oob_right();
-#ifdef CONFIG_SLUB
-	kmalloc_pagealloc_oob_right();
-	kmalloc_pagealloc_uaf();
-	kmalloc_pagealloc_invalid_free();
-#endif
-	kmalloc_large_oob_right();
-	kmalloc_oob_krealloc_more();
-	kmalloc_oob_krealloc_less();
-	kmalloc_oob_16();
-	kmalloc_oob_in_memset();
-	kmalloc_oob_memset_2();
-	kmalloc_oob_memset_4();
-	kmalloc_oob_memset_8();
-	kmalloc_oob_memset_16();
-	kmalloc_memmove_invalid_size();
-	kmalloc_uaf();
-	kmalloc_uaf_memset();
-	kmalloc_uaf2();
-	kfree_via_page();
-	kfree_via_phys();
-	kmem_cache_oob();
-	memcg_accounted_kmem_cache();
-	kasan_stack_oob();
-	kasan_global_oob();
-	kasan_alloca_oob_left();
-	kasan_alloca_oob_right();
-	ksize_unpoisons_memory();
-	copy_user_test();
-	kmem_cache_double_free();
-	kmem_cache_invalid_free();
-	kasan_memchr();
-	kasan_memcmp();
-	kasan_strings();
-	kasan_bitops();
-	kmalloc_double_kzfree();
-	vmalloc_oob();
 
-	kasan_restore_multi_shot(multishot);
-
-	return -EAGAIN;
-}
+static struct kunit_case kasan_kunit_test_cases[] = {
+	KUNIT_CASE(kmalloc_oob_right),
+	KUNIT_CASE(kmalloc_oob_left),
+	KUNIT_CASE(kmalloc_node_oob_right),
+	KUNIT_CASE(kmalloc_pagealloc_oob_right),
+	KUNIT_CASE(kmalloc_pagealloc_uaf),
+	KUNIT_CASE(kmalloc_pagealloc_invalid_free),
+	KUNIT_CASE(kmalloc_large_oob_right),
+	KUNIT_CASE(kmalloc_oob_krealloc_more),
+	KUNIT_CASE(kmalloc_oob_krealloc_less),
+	KUNIT_CASE(kmalloc_oob_16),
+	KUNIT_CASE(kmalloc_uaf_16),
+	KUNIT_CASE(kmalloc_oob_in_memset),
+	KUNIT_CASE(kmalloc_oob_memset_2),
+	KUNIT_CASE(kmalloc_oob_memset_4),
+	KUNIT_CASE(kmalloc_oob_memset_8),
+	KUNIT_CASE(kmalloc_oob_memset_16),
+	KUNIT_CASE(kmalloc_memmove_invalid_size),
+	KUNIT_CASE(kmalloc_uaf),
+	KUNIT_CASE(kmalloc_uaf_memset),
+	KUNIT_CASE(kmalloc_uaf2),
+	KUNIT_CASE(kfree_via_page),
+	KUNIT_CASE(kfree_via_phys),
+	KUNIT_CASE(kmem_cache_oob),
+	KUNIT_CASE(memcg_accounted_kmem_cache),
+	KUNIT_CASE(kasan_global_oob),
+	KUNIT_CASE(kasan_stack_oob),
+	KUNIT_CASE(kasan_alloca_oob_left),
+	KUNIT_CASE(kasan_alloca_oob_right),
+	KUNIT_CASE(ksize_unpoisons_memory),
+	KUNIT_CASE(kmem_cache_double_free),
+	KUNIT_CASE(kmem_cache_invalid_free),
+	KUNIT_CASE(kasan_memchr),
+	KUNIT_CASE(kasan_memcmp),
+	KUNIT_CASE(kasan_strings),
+	KUNIT_CASE(kasan_bitops_generic),
+	KUNIT_CASE(kasan_bitops_tags),
+	KUNIT_CASE(kmalloc_double_kzfree),
+	KUNIT_CASE(vmalloc_oob),
+	{}
+};
+
+static struct kunit_suite kasan_kunit_test_suite = {
+	.name = "kasan",
+	.init = kasan_test_init,
+	.test_cases = kasan_kunit_test_cases,
+	.exit = kasan_test_exit,
+};
+
+kunit_test_suite(kasan_kunit_test_suite);
 
-module_init(kmalloc_tests_init);
 MODULE_LICENSE("GPL");
diff --git a/lib/test_kasan_module.c b/lib/test_kasan_module.c
new file mode 100644
index 000000000000..3b4cc77992d2
--- /dev/null
+++ b/lib/test_kasan_module.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <a.ryabinin@samsung.com>
+ */
+
+#define pr_fmt(fmt) "kasan test: %s " fmt, __func__
+
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "../mm/kasan/kasan.h"
+
+#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE)
+
+static noinline void __init copy_user_test(void)
+{
+	char *kmem;
+	char __user *usermem;
+	size_t size = 10;
+	int unused;
+
+	kmem = kmalloc(size, GFP_KERNEL);
+	if (!kmem)
+		return;
+
+	usermem = (char __user *)vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (IS_ERR(usermem)) {
+		pr_err("Failed to allocate user memory\n");
+		kfree(kmem);
+		return;
+	}
+
+	pr_info("out-of-bounds in copy_from_user()\n");
+	unused = copy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF);
+
+	pr_info("out-of-bounds in copy_to_user()\n");
+	unused = copy_to_user(usermem, kmem, size + 1 + OOB_TAG_OFF);
+
+	pr_info("out-of-bounds in __copy_from_user()\n");
+	unused = __copy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF);
+
+	pr_info("out-of-bounds in __copy_to_user()\n");
+	unused = __copy_to_user(usermem, kmem, size + 1 + OOB_TAG_OFF);
+
+	pr_info("out-of-bounds in __copy_from_user_inatomic()\n");
+	unused = __copy_from_user_inatomic(kmem, usermem, size + 1 + OOB_TAG_OFF);
+
+	pr_info("out-of-bounds in __copy_to_user_inatomic()\n");
+	unused = __copy_to_user_inatomic(usermem, kmem, size + 1 + OOB_TAG_OFF);
+
+	pr_info("out-of-bounds in strncpy_from_user()\n");
+	unused = strncpy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF);
+
+	vm_munmap((unsigned long)usermem, PAGE_SIZE);
+	kfree(kmem);
+}
+
+static struct kasan_rcu_info {
+	int i;
+	struct rcu_head rcu;
+} *global_rcu_ptr;
+
+static noinline void __init kasan_rcu_reclaim(struct rcu_head *rp)
+{
+	struct kasan_rcu_info *fp = container_of(rp,
+						struct kasan_rcu_info, rcu);
+
+	kfree(fp);
+	fp->i = 1;
+}
+
+static noinline void __init kasan_rcu_uaf(void)
+{
+	struct kasan_rcu_info *ptr;
+
+	pr_info("use-after-free in kasan_rcu_reclaim\n");
+	ptr = kmalloc(sizeof(struct kasan_rcu_info), GFP_KERNEL);
+	if (!ptr) {
+		pr_err("Allocation failed\n");
+		return;
+	}
+
+	global_rcu_ptr = rcu_dereference_protected(ptr, NULL);
+	call_rcu(&global_rcu_ptr->rcu, kasan_rcu_reclaim);
+}
+
+static noinline void __init kasan_workqueue_work(struct work_struct *work)
+{
+	kfree(work);
+}
+
+static noinline void __init kasan_workqueue_uaf(void)
+{
+	struct workqueue_struct *workqueue;
+	struct work_struct *work;
+
+	workqueue = create_workqueue("kasan_wq_test");
+	if (!workqueue) {
+		pr_err("Allocation failed\n");
+		return;
+	}
+	work = kmalloc(sizeof(struct work_struct), GFP_KERNEL);
+	if (!work) {
+		pr_err("Allocation failed\n");
+		return;
+	}
+
+	INIT_WORK(work, kasan_workqueue_work);
+	queue_work(workqueue, work);
+	destroy_workqueue(workqueue);
+
+	pr_info("use-after-free on workqueue\n");
+	((volatile struct work_struct *)work)->data;
+}
+
+static int __init test_kasan_module_init(void)
+{
+	/*
+	 * Temporarily enable multi-shot mode. Otherwise, we'd only get a
+	 * report for the first case.
+	 */
+	bool multishot = kasan_save_enable_multi_shot();
+
+	copy_user_test();
+	kasan_rcu_uaf();
+	kasan_workqueue_uaf();
+
+	kasan_restore_multi_shot(multishot);
+	return -EAGAIN;
+}
+
+module_init(test_kasan_module_init);
+MODULE_LICENSE("GPL");
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index e651c37d56db..38c250fbace3 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -745,7 +745,7 @@ static int trigger_config_run_type(struct kmod_test_device *test_dev,
 		break;
 	case TEST_KMOD_FS_TYPE:
 		kfree_const(config->test_fs);
-		config->test_driver = NULL;
+		config->test_fs = NULL;
 		copied = config_copy_test_fs(config, test_str,
 					     strlen(test_str));
 		break;
@@ -877,20 +877,17 @@ static int test_dev_config_update_uint_sync(struct kmod_test_device *test_dev,
 					    int (*test_sync)(struct kmod_test_device *test_dev))
 {
 	int ret;
-	unsigned long new;
+	unsigned int val;
 	unsigned int old_val;
 
-	ret = kstrtoul(buf, 10, &new);
+	ret = kstrtouint(buf, 10, &val);
 	if (ret)
 		return ret;
 
-	if (new > UINT_MAX)
-		return -EINVAL;
-
 	mutex_lock(&test_dev->config_mutex);
 
 	old_val = *config;
-	*(unsigned int *)config = new;
+	*(unsigned int *)config = val;
 
 	ret = test_sync(test_dev);
 	if (ret) {
@@ -914,18 +911,18 @@ static int test_dev_config_update_uint_range(struct kmod_test_device *test_dev,
 					     unsigned int min,
 					     unsigned int max)
 {
+	unsigned int val;
 	int ret;
-	unsigned long new;
 
-	ret = kstrtoul(buf, 10, &new);
+	ret = kstrtouint(buf, 10, &val);
 	if (ret)
 		return ret;
 
-	if (new < min || new > max)
+	if (val < min || val > max)
 		return -EINVAL;
 
 	mutex_lock(&test_dev->config_mutex);
-	*config = new;
+	*config = val;
 	mutex_unlock(&test_dev->config_mutex);
 
 	/* Always return full write size even if we didn't consume all */
@@ -936,18 +933,15 @@ static int test_dev_config_update_int(struct kmod_test_device *test_dev,
 				      const char *buf, size_t size,
 				      int *config)
 {
+	int val;
 	int ret;
-	long new;
 
-	ret = kstrtol(buf, 10, &new);
+	ret = kstrtoint(buf, 10, &val);
 	if (ret)
 		return ret;
 
-	if (new < INT_MIN || new > INT_MAX)
-		return -EINVAL;
-
 	mutex_lock(&test_dev->config_mutex);
-	*config = new;
+	*config = val;
 	mutex_unlock(&test_dev->config_mutex);
 	/* Always return full write size even if we didn't consume all */
 	return size;
diff --git a/lib/test_lockup.c b/lib/test_lockup.c
index bd7c7ff39f6b..864554e76973 100644
--- a/lib/test_lockup.c
+++ b/lib/test_lockup.c
@@ -168,7 +168,7 @@ static int master_cpu;
 
 static void test_lock(bool master, bool verbose)
 {
-	u64 uninitialized_var(wait_start);
+	u64 wait_start;
 
 	if (measure_lock_wait)
 		wait_start = local_clock();
@@ -400,7 +400,7 @@ static void test_lockup(bool master)
 	test_unlock(master, true);
 }
 
-DEFINE_PER_CPU(struct work_struct, test_works);
+static DEFINE_PER_CPU(struct work_struct, test_works);
 
 static void test_work_fn(struct work_struct *work)
 {
@@ -480,6 +480,21 @@ static int __init test_lockup_init(void)
 		return -EINVAL;
 
 #ifdef CONFIG_DEBUG_SPINLOCK
+#ifdef CONFIG_PREEMPT_RT
+	if (test_magic(lock_spinlock_ptr,
+		       offsetof(spinlock_t, lock.wait_lock.magic),
+		       SPINLOCK_MAGIC) ||
+	    test_magic(lock_rwlock_ptr,
+		       offsetof(rwlock_t, rtmutex.wait_lock.magic),
+		       SPINLOCK_MAGIC) ||
+	    test_magic(lock_mutex_ptr,
+		       offsetof(struct mutex, lock.wait_lock.magic),
+		       SPINLOCK_MAGIC) ||
+	    test_magic(lock_rwsem_ptr,
+		       offsetof(struct rw_semaphore, rtmutex.wait_lock.magic),
+		       SPINLOCK_MAGIC))
+		return -EINVAL;
+#else
 	if (test_magic(lock_spinlock_ptr,
 		       offsetof(spinlock_t, rlock.magic),
 		       SPINLOCK_MAGIC) ||
@@ -494,6 +509,7 @@ static int __init test_lockup_init(void)
 		       SPINLOCK_MAGIC))
 		return -EINVAL;
 #endif
+#endif
 
 	if ((wait_state != TASK_RUNNING ||
 	     (call_cond_resched && !reacquire_locks) ||
@@ -512,8 +528,8 @@ static int __init test_lockup_init(void)
 	if (test_file_path[0]) {
 		test_file = filp_open(test_file_path, O_RDONLY, 0);
 		if (IS_ERR(test_file)) {
-			pr_err("cannot find file_path\n");
-			return -EINVAL;
+			pr_err("failed to open %s: %ld\n", test_file_path, PTR_ERR(test_file));
+			return PTR_ERR(test_file);
 		}
 		test_inode = file_inode(test_file);
 	} else if (test_lock_inode ||
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index c5a6fef7b45d..76c607ee6db5 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -434,7 +434,7 @@ static int __init test_rhltable(unsigned int entries)
 		} else {
 			if (WARN(err != -ENOENT, "removed non-existent element, error %d not %d",
 				 err, -ENOENT))
-			continue;
+				continue;
 		}
 	}
 
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 98bc92a91662..3750323973f4 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -16,7 +16,7 @@
  */
 
 /*
- * This module provides an interface to the the proc sysctl interfaces.  This
+ * This module provides an interface to the proc sysctl interfaces.  This
  * driver requires CONFIG_PROC_SYSCTL. It will not normally be loaded by the
  * system unless explicitly requested by name. You can also build this driver
  * into your kernel.
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
index 9ea10adf7a66..5e5d9355ef49 100644
--- a/lib/test_ubsan.c
+++ b/lib/test_ubsan.c
@@ -5,32 +5,54 @@
 
 typedef void(*test_ubsan_fp)(void);
 
+#define UBSAN_TEST(config, ...)	do {					\
+		pr_info("%s " __VA_ARGS__ "%s(%s=%s)\n", __func__,	\
+			sizeof(" " __VA_ARGS__) > 2 ? " " : "",		\
+			#config, IS_ENABLED(config) ? "y" : "n");	\
+	} while (0)
+
 static void test_ubsan_add_overflow(void)
 {
 	volatile int val = INT_MAX;
+	volatile unsigned int uval = UINT_MAX;
 
+	UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
 	val += 2;
+
+	UBSAN_TEST(CONFIG_UBSAN_UNSIGNED_OVERFLOW);
+	uval += 2;
 }
 
 static void test_ubsan_sub_overflow(void)
 {
 	volatile int val = INT_MIN;
+	volatile unsigned int uval = 0;
 	volatile int val2 = 2;
 
+	UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
 	val -= val2;
+
+	UBSAN_TEST(CONFIG_UBSAN_UNSIGNED_OVERFLOW);
+	uval -= val2;
 }
 
 static void test_ubsan_mul_overflow(void)
 {
 	volatile int val = INT_MAX / 2;
+	volatile unsigned int uval = UINT_MAX / 2;
 
+	UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
 	val *= 3;
+
+	UBSAN_TEST(CONFIG_UBSAN_UNSIGNED_OVERFLOW);
+	uval *= 3;
 }
 
 static void test_ubsan_negate_overflow(void)
 {
 	volatile int val = INT_MIN;
 
+	UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
 	val = -val;
 }
 
@@ -39,37 +61,67 @@ static void test_ubsan_divrem_overflow(void)
 	volatile int val = 16;
 	volatile int val2 = 0;
 
+	UBSAN_TEST(CONFIG_UBSAN_DIV_ZERO);
 	val /= val2;
 }
 
 static void test_ubsan_shift_out_of_bounds(void)
 {
-	volatile int val = -1;
-	int val2 = 10;
+	volatile int neg = -1, wrap = 4;
+	int val1 = 10;
+	int val2 = INT_MAX;
+
+	UBSAN_TEST(CONFIG_UBSAN_SHIFT, "negative exponent");
+	val1 <<= neg;
 
-	val2 <<= val;
+	UBSAN_TEST(CONFIG_UBSAN_SHIFT, "left overflow");
+	val2 <<= wrap;
 }
 
 static void test_ubsan_out_of_bounds(void)
 {
-	volatile int i = 4, j = 5;
+	volatile int i = 4, j = 5, k = -1;
+	volatile char above[4] = { }; /* Protect surrounding memory. */
 	volatile int arr[4];
+	volatile char below[4] = { }; /* Protect surrounding memory. */
 
+	above[0] = below[0];
+
+	UBSAN_TEST(CONFIG_UBSAN_BOUNDS, "above");
 	arr[j] = i;
+
+	UBSAN_TEST(CONFIG_UBSAN_BOUNDS, "below");
+	arr[k] = i;
 }
 
+enum ubsan_test_enum {
+	UBSAN_TEST_ZERO = 0,
+	UBSAN_TEST_ONE,
+	UBSAN_TEST_MAX,
+};
+
 static void test_ubsan_load_invalid_value(void)
 {
 	volatile char *dst, *src;
 	bool val, val2, *ptr;
-	char c = 4;
+	enum ubsan_test_enum eval, eval2, *eptr;
+	unsigned char c = 0xff;
 
+	UBSAN_TEST(CONFIG_UBSAN_BOOL, "bool");
 	dst = (char *)&val;
 	src = &c;
 	*dst = *src;
 
 	ptr = &val2;
 	val2 = val;
+
+	UBSAN_TEST(CONFIG_UBSAN_ENUM, "enum");
+	dst = (char *)&eval;
+	src = &c;
+	*dst = *src;
+
+	eptr = &eval2;
+	eval2 = eval;
 }
 
 static void test_ubsan_null_ptr_deref(void)
@@ -77,6 +129,7 @@ static void test_ubsan_null_ptr_deref(void)
 	volatile int *ptr = NULL;
 	int val;
 
+	UBSAN_TEST(CONFIG_UBSAN_OBJECT_SIZE);
 	val = *ptr;
 }
 
@@ -85,6 +138,7 @@ static void test_ubsan_misaligned_access(void)
 	volatile char arr[5] __aligned(4) = {1, 2, 3, 4, 5};
 	volatile int *ptr, val = 6;
 
+	UBSAN_TEST(CONFIG_UBSAN_ALIGNMENT);
 	ptr = (int *)(arr + 1);
 	*ptr = val;
 }
@@ -95,6 +149,7 @@ static void test_ubsan_object_size_mismatch(void)
 	volatile int val __aligned(8) = 4;
 	volatile long long *ptr, val2;
 
+	UBSAN_TEST(CONFIG_UBSAN_OBJECT_SIZE);
 	ptr = (long long *)&val;
 	val2 = *ptr;
 }
@@ -104,15 +159,19 @@ static const test_ubsan_fp test_ubsan_array[] = {
 	test_ubsan_sub_overflow,
 	test_ubsan_mul_overflow,
 	test_ubsan_negate_overflow,
-	test_ubsan_divrem_overflow,
 	test_ubsan_shift_out_of_bounds,
 	test_ubsan_out_of_bounds,
 	test_ubsan_load_invalid_value,
-	//test_ubsan_null_ptr_deref, /* exclude it because there is a crash */
 	test_ubsan_misaligned_access,
 	test_ubsan_object_size_mismatch,
 };
 
+/* Excluded because they Oops the module. */
+static const test_ubsan_fp skip_ubsan_array[] = {
+	test_ubsan_divrem_overflow,
+	test_ubsan_null_ptr_deref,
+};
+
 static int __init test_ubsan_init(void)
 {
 	unsigned int i;
@@ -120,7 +179,6 @@ static int __init test_ubsan_init(void)
 	for (i = 0; i < ARRAY_SIZE(test_ubsan_array); i++)
 		test_ubsan_array[i]();
 
-	(void)test_ubsan_null_ptr_deref; /* to avoid unsed-function warning */
 	return 0;
 }
 module_init(test_ubsan_init);
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index ddc9685702b1..5cf2fe9aab9e 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -15,6 +15,8 @@
 #include <linux/delay.h>
 #include <linux/rwsem.h>
 #include <linux/mm.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
 
 #define __param(type, name, init, msg)		\
 	static type name = init;				\
@@ -35,14 +37,18 @@ __param(int, test_loop_count, 1000000,
 
 __param(int, run_test_mask, INT_MAX,
 	"Set tests specified in the mask.\n\n"
-		"\t\tid: 1,   name: fix_size_alloc_test\n"
-		"\t\tid: 2,   name: full_fit_alloc_test\n"
-		"\t\tid: 4,   name: long_busy_list_alloc_test\n"
-		"\t\tid: 8,   name: random_size_alloc_test\n"
-		"\t\tid: 16,  name: fix_align_alloc_test\n"
-		"\t\tid: 32,  name: random_size_align_alloc_test\n"
-		"\t\tid: 64,  name: align_shift_alloc_test\n"
-		"\t\tid: 128, name: pcpu_alloc_test\n"
+		"\t\tid: 1,    name: fix_size_alloc_test\n"
+		"\t\tid: 2,    name: full_fit_alloc_test\n"
+		"\t\tid: 4,    name: long_busy_list_alloc_test\n"
+		"\t\tid: 8,    name: random_size_alloc_test\n"
+		"\t\tid: 16,   name: fix_align_alloc_test\n"
+		"\t\tid: 32,   name: random_size_align_alloc_test\n"
+		"\t\tid: 64,   name: align_shift_alloc_test\n"
+		"\t\tid: 128,  name: pcpu_alloc_test\n"
+		"\t\tid: 256,  name: kvfree_rcu_1_arg_vmalloc_test\n"
+		"\t\tid: 512,  name: kvfree_rcu_2_arg_vmalloc_test\n"
+		"\t\tid: 1024, name: kvfree_rcu_1_arg_slab_test\n"
+		"\t\tid: 2048, name: kvfree_rcu_2_arg_slab_test\n"
 		/* Add a new test case description here. */
 );
 
@@ -316,6 +322,83 @@ pcpu_alloc_test(void)
 	return rv;
 }
 
+struct test_kvfree_rcu {
+	struct rcu_head rcu;
+	unsigned char array[20];
+};
+
+static int
+kvfree_rcu_1_arg_vmalloc_test(void)
+{
+	struct test_kvfree_rcu *p;
+	int i;
+
+	for (i = 0; i < test_loop_count; i++) {
+		p = vmalloc(1 * PAGE_SIZE);
+		if (!p)
+			return -1;
+
+		p->array[0] = 'a';
+		kvfree_rcu(p);
+	}
+
+	return 0;
+}
+
+static int
+kvfree_rcu_2_arg_vmalloc_test(void)
+{
+	struct test_kvfree_rcu *p;
+	int i;
+
+	for (i = 0; i < test_loop_count; i++) {
+		p = vmalloc(1 * PAGE_SIZE);
+		if (!p)
+			return -1;
+
+		p->array[0] = 'a';
+		kvfree_rcu(p, rcu);
+	}
+
+	return 0;
+}
+
+static int
+kvfree_rcu_1_arg_slab_test(void)
+{
+	struct test_kvfree_rcu *p;
+	int i;
+
+	for (i = 0; i < test_loop_count; i++) {
+		p = kmalloc(sizeof(*p), GFP_KERNEL);
+		if (!p)
+			return -1;
+
+		p->array[0] = 'a';
+		kvfree_rcu(p);
+	}
+
+	return 0;
+}
+
+static int
+kvfree_rcu_2_arg_slab_test(void)
+{
+	struct test_kvfree_rcu *p;
+	int i;
+
+	for (i = 0; i < test_loop_count; i++) {
+		p = kmalloc(sizeof(*p), GFP_KERNEL);
+		if (!p)
+			return -1;
+
+		p->array[0] = 'a';
+		kvfree_rcu(p, rcu);
+	}
+
+	return 0;
+}
+
 struct test_case_desc {
 	const char *test_name;
 	int (*test_func)(void);
@@ -330,6 +413,10 @@ static struct test_case_desc test_case_array[] = {
 	{ "random_size_align_alloc_test", random_size_align_alloc_test },
 	{ "align_shift_alloc_test", align_shift_alloc_test },
 	{ "pcpu_alloc_test", pcpu_alloc_test },
+	{ "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test },
+	{ "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test },
+	{ "kvfree_rcu_1_arg_slab_test", kvfree_rcu_1_arg_slab_test },
+	{ "kvfree_rcu_2_arg_slab_test", kvfree_rcu_2_arg_slab_test },
 	/* Add a new test case here. */
 };
 
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index d4f97925dbd8..8294f43f4981 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -289,6 +289,27 @@ static noinline void check_xa_mark_2(struct xarray *xa)
 	xa_destroy(xa);
 }
 
+static noinline void check_xa_mark_3(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+	XA_STATE(xas, xa, 0x41);
+	void *entry;
+	int count = 0;
+
+	xa_store_order(xa, 0x40, 2, xa_mk_index(0x40), GFP_KERNEL);
+	xa_set_mark(xa, 0x41, XA_MARK_0);
+
+	rcu_read_lock();
+	xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0) {
+		count++;
+		XA_BUG_ON(xa, entry != xa_mk_index(0x40));
+	}
+	XA_BUG_ON(xa, count != 1);
+	rcu_read_unlock();
+	xa_destroy(xa);
+#endif
+}
+
 static noinline void check_xa_mark(struct xarray *xa)
 {
 	unsigned long index;
@@ -297,6 +318,7 @@ static noinline void check_xa_mark(struct xarray *xa)
 		check_xa_mark_1(xa, index);
 
 	check_xa_mark_2(xa);
+	check_xa_mark_3(xa);
 }
 
 static noinline void check_xa_shrink(struct xarray *xa)
@@ -393,6 +415,9 @@ static noinline void check_cmpxchg(struct xarray *xa)
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != NULL);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 5, NULL, FIVE, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) != -EBUSY);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != FIVE);
+	XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) == -EBUSY);
 	xa_erase_index(xa, 12345678);
 	xa_erase_index(xa, 5);
 	XA_BUG_ON(xa, !xa_empty(xa));
@@ -1503,6 +1528,49 @@ static noinline void check_store_range(struct xarray *xa)
 	}
 }
 
+#ifdef CONFIG_XARRAY_MULTI
+static void check_split_1(struct xarray *xa, unsigned long index,
+							unsigned int order)
+{
+	XA_STATE(xas, xa, index);
+	void *entry;
+	unsigned int i = 0;
+
+	xa_store_order(xa, index, order, xa, GFP_KERNEL);
+
+	xas_split_alloc(&xas, xa, order, GFP_KERNEL);
+	xas_lock(&xas);
+	xas_split(&xas, xa, order);
+	xas_unlock(&xas);
+
+	xa_for_each(xa, index, entry) {
+		XA_BUG_ON(xa, entry != xa);
+		i++;
+	}
+	XA_BUG_ON(xa, i != 1 << order);
+
+	xa_set_mark(xa, index, XA_MARK_0);
+	XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
+
+	xa_destroy(xa);
+}
+
+static noinline void check_split(struct xarray *xa)
+{
+	unsigned int order;
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) {
+		check_split_1(xa, 0, order);
+		check_split_1(xa, 1UL << order, order);
+		check_split_1(xa, 3UL << order, order);
+	}
+}
+#else
+static void check_split(struct xarray *xa) { }
+#endif
+
 static void check_align_1(struct xarray *xa, char *name)
 {
 	int i;
@@ -1575,14 +1643,9 @@ static noinline void shadow_remove(struct xarray *xa)
 	xa_lock(xa);
 	while ((node = list_first_entry_or_null(&shadow_nodes,
 					struct xa_node, private_list))) {
-		XA_STATE(xas, node->array, 0);
 		XA_BUG_ON(xa, node->array != xa);
 		list_del_init(&node->private_list);
-		xas.xa_node = xa_parent_locked(node->array, node);
-		xas.xa_offset = node->offset;
-		xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
-		xas_set_update(&xas, test_update_node);
-		xas_store(&xas, NULL);
+		xa_delete_node(node, test_update_node);
 	}
 	xa_unlock(xa);
 }
@@ -1649,6 +1712,26 @@ static noinline void check_account(struct xarray *xa)
 #endif
 }
 
+static noinline void check_get_order(struct xarray *xa)
+{
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+	unsigned int order;
+	unsigned long i, j;
+
+	for (i = 0; i < 3; i++)
+		XA_BUG_ON(xa, xa_get_order(xa, i) != 0);
+
+	for (order = 0; order < max_order; order++) {
+		for (i = 0; i < 10; i++) {
+			xa_store_order(xa, i << order, order,
+					xa_mk_index(i << order), GFP_KERNEL);
+			for (j = i << order; j < (i + 1) << order; j++)
+				XA_BUG_ON(xa, xa_get_order(xa, j) != order);
+			xa_erase(xa, i << order);
+		}
+	}
+}
+
 static noinline void check_destroy(struct xarray *xa)
 {
 	unsigned long index;
@@ -1697,6 +1780,7 @@ static int xarray_checks(void)
 	check_reserve(&array);
 	check_reserve(&xa0);
 	check_multi_store(&array);
+	check_get_order(&array);
 	check_xa_alloc();
 	check_find(&array);
 	check_find_entry(&array);
@@ -1708,6 +1792,7 @@ static int xarray_checks(void)
 	check_store_range(&array);
 	check_store_iter(&array);
 	check_align(&xa0);
+	check_split(&array);
 
 	check_workingset(&array, 0);
 	check_workingset(&array, 64);
diff --git a/lib/ts_bm.c b/lib/ts_bm.c
index 277cb4417ac2..4cf250031f0f 100644
--- a/lib/ts_bm.c
+++ b/lib/ts_bm.c
@@ -11,7 +11,7 @@
  *   [1] A Fast String Searching Algorithm, R.S. Boyer and Moore.
  *       Communications of the Association for Computing Machinery, 
  *       20(10), 1977, pp. 762-772.
- *       http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
+ *       https://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
  *
  *   [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004
  *       http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf
diff --git a/lib/ts_fsm.c b/lib/ts_fsm.c
index ab749ec10ab5..64fd9015ad80 100644
--- a/lib/ts_fsm.c
+++ b/lib/ts_fsm.c
@@ -193,7 +193,7 @@ startover:
 				TOKEN_MISMATCH();
 
 			block_idx++;
-			/* fall through */
+			fallthrough;
 
 		case TS_FSM_ANY:
 			if (next == NULL)
diff --git a/lib/ubsan.c b/lib/ubsan.c
index cb9af3f6b77e..3e3352f3d0da 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -17,7 +17,7 @@
 
 #include "ubsan.h"
 
-const char *type_check_kinds[] = {
+static const char * const type_check_kinds[] = {
 	"load of",
 	"store to",
 	"reference binding to",
diff --git a/lib/usercopy.c b/lib/usercopy.c
index b26509f112f9..7413dd300516 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bitops.h>
+#include <linux/fault-inject-usercopy.h>
 #include <linux/instrumented.h>
 #include <linux/uaccess.h>
 
@@ -10,7 +11,7 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n
 {
 	unsigned long res = n;
 	might_fault();
-	if (likely(access_ok(from, n))) {
+	if (!should_fail_usercopy() && likely(access_ok(from, n))) {
 		instrument_copy_from_user(to, from, n);
 		res = raw_copy_from_user(to, from, n);
 	}
@@ -25,6 +26,8 @@ EXPORT_SYMBOL(_copy_from_user);
 unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
+	if (should_fail_usercopy())
+		return n;
 	if (likely(access_ok(to, n))) {
 		instrument_copy_to_user(to, from, n);
 		n = raw_copy_to_user(to, from, n);
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index bcc9a98a0524..2919f1698140 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -68,7 +68,7 @@ static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
 		if (unlikely(!vdso_clocksource_ok(vd)))
 			return -1;
 
-		cycles = __arch_get_hw_counter(vd->clock_mode);
+		cycles = __arch_get_hw_counter(vd->clock_mode, vd);
 		if (unlikely(!vdso_cycles_ok(cycles)))
 			return -1;
 		ns = vdso_ts->nsec;
@@ -138,7 +138,7 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
 		if (unlikely(!vdso_clocksource_ok(vd)))
 			return -1;
 
-		cycles = __arch_get_hw_counter(vd->clock_mode);
+		cycles = __arch_get_hw_counter(vd->clock_mode, vd);
 		if (unlikely(!vdso_cycles_ok(cycles)))
 			return -1;
 		ns = vdso_ts->nsec;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 259e55895933..3b53c73580c5 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -381,6 +381,9 @@ int num_to_str(char *buf, int size, unsigned long long num, unsigned int width)
 #define SMALL	32		/* use lowercase in hex (must be 32 == 0x20) */
 #define SPECIAL	64		/* prefix hex with "0x", octal with "0" */
 
+static_assert(ZEROPAD == ('0' - ' '));
+static_assert(SMALL == ' ');
+
 enum format_type {
 	FORMAT_TYPE_NONE, /* Just a string part */
 	FORMAT_TYPE_WIDTH,
@@ -507,7 +510,7 @@ char *number(char *buf, char *end, unsigned long long num,
 	/* zero or space padding */
 	if (!(spec.flags & LEFT)) {
 		char c = ' ' + (spec.flags & ZEROPAD);
-		BUILD_BUG_ON(' ' + ZEROPAD != '0');
+
 		while (--field_width >= 0) {
 			if (buf < end)
 				*buf = c;
@@ -937,13 +940,13 @@ char *bdev_name(char *buf, char *end, struct block_device *bdev,
 
 	hd = bdev->bd_disk;
 	buf = string(buf, end, hd->disk_name, spec);
-	if (bdev->bd_part->partno) {
+	if (bdev->bd_partno) {
 		if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) {
 			if (buf < end)
 				*buf = 'p';
 			buf++;
 		}
-		buf = number(buf, end, bdev->bd_part->partno, spec);
+		buf = number(buf, end, bdev->bd_partno, spec);
 	}
 	return buf;
 }
@@ -1262,7 +1265,7 @@ char *mac_address_string(char *buf, char *end, u8 *addr,
 
 	case 'R':
 		reversed = true;
-		/* fall through */
+		fallthrough;
 
 	default:
 		separator = ':';
@@ -1678,7 +1681,8 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 
 	switch (*(++fmt)) {
 	case 'L':
-		uc = true;		/* fall-through */
+		uc = true;
+		fallthrough;
 	case 'l':
 		index = guid_index;
 		break;
@@ -1934,7 +1938,7 @@ char *flags_string(char *buf, char *end, void *flags_ptr,
 		names = vmaflag_names;
 		break;
 	case 'g':
-		flags = *(gfp_t *)flags_ptr;
+		flags = (__force unsigned long)(*(gfp_t *)flags_ptr);
 		names = gfpflag_names;
 		break;
 	default:
@@ -1976,12 +1980,6 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 	char *buf_start = buf;
 	struct property *prop;
 	bool has_mult, pass;
-	static const struct printf_spec num_spec = {
-		.flags = SMALL,
-		.field_width = -1,
-		.precision = -1,
-		.base = 10,
-	};
 
 	struct printf_spec str_spec = spec;
 	str_spec.field_width = -1;
@@ -2021,7 +2019,7 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 			str_spec.precision = precision;
 			break;
 		case 'p':	/* phandle */
-			buf = number(buf, end, (unsigned int)dn->phandle, num_spec);
+			buf = number(buf, end, (unsigned int)dn->phandle, default_dec_spec);
 			break;
 		case 'P':	/* path-spec */
 			p = fwnode_get_name(of_fwnode_handle(dn));
@@ -2134,7 +2132,7 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode,
  *       [4] or [6] and is able to print port [p], flowinfo [f], scope [s]
  * - '[Ii][4S][hnbl]' IPv4 addresses in host, network, big or little endian order
  * - 'I[6S]c' for IPv6 addresses printed as specified by
- *       http://tools.ietf.org/html/rfc5952
+ *       https://tools.ietf.org/html/rfc5952
  * - 'E[achnops]' For an escaped buffer, where rules are defined by combination
  *                of the following flags (see string_escape_mem() for the
  *                details):
@@ -2221,7 +2219,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	case 'S':
 	case 's':
 		ptr = dereference_symbol_descriptor(ptr);
-		/* Fallthrough */
+		fallthrough;
 	case 'B':
 		return symbol_string(buf, end, ptr, spec, fmt);
 	case 'R':
@@ -2452,7 +2450,7 @@ qualifier:
 
 	case 'x':
 		spec->flags |= SMALL;
-		/* fall through */
+		fallthrough;
 
 	case 'X':
 		spec->base = 16;
@@ -2461,6 +2459,7 @@ qualifier:
 	case 'd':
 	case 'i':
 		spec->flags |= SIGN;
+		break;
 	case 'u':
 		break;
 
@@ -2470,7 +2469,7 @@ qualifier:
 		 * utility, treat it as any other invalid or
 		 * unsupported format specifier.
 		 */
-		/* Fall-through */
+		fallthrough;
 
 	default:
 		WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt);
@@ -3413,10 +3412,10 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 			break;
 		case 'i':
 			base = 0;
-			/* fall through */
+			fallthrough;
 		case 'd':
 			is_sign = true;
-			/* fall through */
+			fallthrough;
 		case 'u':
 			break;
 		case '%':
diff --git a/lib/xarray.c b/lib/xarray.c
index e9e641d3c0c3..5fa51614802a 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -266,13 +266,14 @@ static void xa_node_free(struct xa_node *node)
  */
 static void xas_destroy(struct xa_state *xas)
 {
-	struct xa_node *node = xas->xa_alloc;
+	struct xa_node *next, *node = xas->xa_alloc;
 
-	if (!node)
-		return;
-	XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
-	kmem_cache_free(radix_tree_node_cachep, node);
-	xas->xa_alloc = NULL;
+	while (node) {
+		XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+		next = rcu_dereference_raw(node->parent);
+		radix_tree_node_rcu_free(&node->rcu_head);
+		xas->xa_alloc = node = next;
+	}
 }
 
 /**
@@ -304,6 +305,7 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp)
 	xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
 	if (!xas->xa_alloc)
 		return false;
+	xas->xa_alloc->parent = NULL;
 	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
 	xas->xa_node = XAS_RESTART;
 	return true;
@@ -339,6 +341,7 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
 	}
 	if (!xas->xa_alloc)
 		return false;
+	xas->xa_alloc->parent = NULL;
 	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
 	xas->xa_node = XAS_RESTART;
 	return true;
@@ -403,7 +406,7 @@ static unsigned long xas_size(const struct xa_state *xas)
 /*
  * Use this to calculate the maximum index that will need to be created
  * in order to add the entry described by @xas.  Because we cannot store a
- * multiple-index entry at index 0, the calculation is a little more complex
+ * multi-index entry at index 0, the calculation is a little more complex
  * than you might expect.
  */
 static unsigned long xas_max(struct xa_state *xas)
@@ -703,7 +706,7 @@ void xas_create_range(struct xa_state *xas)
 	unsigned char shift = xas->xa_shift;
 	unsigned char sibs = xas->xa_sibs;
 
-	xas->xa_index |= ((sibs + 1) << shift) - 1;
+	xas->xa_index |= ((sibs + 1UL) << shift) - 1;
 	if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift)
 		xas->xa_offset |= sibs;
 	xas->xa_shift = 0;
@@ -946,6 +949,153 @@ void xas_init_marks(const struct xa_state *xas)
 }
 EXPORT_SYMBOL_GPL(xas_init_marks);
 
+#ifdef CONFIG_XARRAY_MULTI
+static unsigned int node_get_marks(struct xa_node *node, unsigned int offset)
+{
+	unsigned int marks = 0;
+	xa_mark_t mark = XA_MARK_0;
+
+	for (;;) {
+		if (node_get_mark(node, offset, mark))
+			marks |= 1 << (__force unsigned int)mark;
+		if (mark == XA_MARK_MAX)
+			break;
+		mark_inc(mark);
+	}
+
+	return marks;
+}
+
+static void node_set_marks(struct xa_node *node, unsigned int offset,
+			struct xa_node *child, unsigned int marks)
+{
+	xa_mark_t mark = XA_MARK_0;
+
+	for (;;) {
+		if (marks & (1 << (__force unsigned int)mark)) {
+			node_set_mark(node, offset, mark);
+			if (child)
+				node_mark_all(child, mark);
+		}
+		if (mark == XA_MARK_MAX)
+			break;
+		mark_inc(mark);
+	}
+}
+
+/**
+ * xas_split_alloc() - Allocate memory for splitting an entry.
+ * @xas: XArray operation state.
+ * @entry: New entry which will be stored in the array.
+ * @order: New entry order.
+ * @gfp: Memory allocation flags.
+ *
+ * This function should be called before calling xas_split().
+ * If necessary, it will allocate new nodes (and fill them with @entry)
+ * to prepare for the upcoming split of an entry of @order size into
+ * entries of the order stored in the @xas.
+ *
+ * Context: May sleep if @gfp flags permit.
+ */
+void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
+		gfp_t gfp)
+{
+	unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
+	unsigned int mask = xas->xa_sibs;
+
+	/* XXX: no support for splitting really large entries yet */
+	if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order))
+		goto nomem;
+	if (xas->xa_shift + XA_CHUNK_SHIFT > order)
+		return;
+
+	do {
+		unsigned int i;
+		void *sibling;
+		struct xa_node *node;
+
+		node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+		if (!node)
+			goto nomem;
+		node->array = xas->xa;
+		for (i = 0; i < XA_CHUNK_SIZE; i++) {
+			if ((i & mask) == 0) {
+				RCU_INIT_POINTER(node->slots[i], entry);
+				sibling = xa_mk_sibling(0);
+			} else {
+				RCU_INIT_POINTER(node->slots[i], sibling);
+			}
+		}
+		RCU_INIT_POINTER(node->parent, xas->xa_alloc);
+		xas->xa_alloc = node;
+	} while (sibs-- > 0);
+
+	return;
+nomem:
+	xas_destroy(xas);
+	xas_set_err(xas, -ENOMEM);
+}
+EXPORT_SYMBOL_GPL(xas_split_alloc);
+
+/**
+ * xas_split() - Split a multi-index entry into smaller entries.
+ * @xas: XArray operation state.
+ * @entry: New entry to store in the array.
+ * @order: New entry order.
+ *
+ * The value in the entry is copied to all the replacement entries.
+ *
+ * Context: Any context.  The caller should hold the xa_lock.
+ */
+void xas_split(struct xa_state *xas, void *entry, unsigned int order)
+{
+	unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
+	unsigned int offset, marks;
+	struct xa_node *node;
+	void *curr = xas_load(xas);
+	int values = 0;
+
+	node = xas->xa_node;
+	if (xas_top(node))
+		return;
+
+	marks = node_get_marks(node, xas->xa_offset);
+
+	offset = xas->xa_offset + sibs;
+	do {
+		if (xas->xa_shift < node->shift) {
+			struct xa_node *child = xas->xa_alloc;
+
+			xas->xa_alloc = rcu_dereference_raw(child->parent);
+			child->shift = node->shift - XA_CHUNK_SHIFT;
+			child->offset = offset;
+			child->count = XA_CHUNK_SIZE;
+			child->nr_values = xa_is_value(entry) ?
+					XA_CHUNK_SIZE : 0;
+			RCU_INIT_POINTER(child->parent, node);
+			node_set_marks(node, offset, child, marks);
+			rcu_assign_pointer(node->slots[offset],
+					xa_mk_node(child));
+			if (xa_is_value(curr))
+				values--;
+		} else {
+			unsigned int canon = offset - xas->xa_sibs;
+
+			node_set_marks(node, canon, NULL, marks);
+			rcu_assign_pointer(node->slots[canon], entry);
+			while (offset > canon)
+				rcu_assign_pointer(node->slots[offset--],
+						xa_mk_sibling(canon));
+			values += (xa_is_value(entry) - xa_is_value(curr)) *
+					(xas->xa_sibs + 1);
+		}
+	} while (offset-- > xas->xa_offset);
+
+	node->nr_values += values;
+}
+EXPORT_SYMBOL_GPL(xas_split);
+#endif
+
 /**
  * xas_pause() - Pause a walk to drop a lock.
  * @xas: XArray operation state.
@@ -1407,7 +1557,7 @@ EXPORT_SYMBOL(__xa_store);
  * @gfp: Memory allocation flags.
  *
  * After this function returns, loads from this index will return @entry.
- * Storing into an existing multislot entry updates the entry of every index.
+ * Storing into an existing multi-index entry updates the entry of every index.
  * The marks associated with @index are unaffected unless @entry is %NULL.
  *
  * Context: Any context.  Takes and releases the xa_lock.
@@ -1549,7 +1699,7 @@ static void xas_set_range(struct xa_state *xas, unsigned long first,
  *
  * After this function returns, loads from any index between @first and @last,
  * inclusive will return @entry.
- * Storing into an existing multislot entry updates the entry of every index.
+ * Storing into an existing multi-index entry updates the entry of every index.
  * The marks associated with @index are unaffected unless @entry is %NULL.
  *
  * Context: Process context.  Takes and releases the xa_lock.  May sleep
@@ -1592,6 +1742,46 @@ unlock:
 	return xas_result(&xas, NULL);
 }
 EXPORT_SYMBOL(xa_store_range);
+
+/**
+ * xa_get_order() - Get the order of an entry.
+ * @xa: XArray.
+ * @index: Index of the entry.
+ *
+ * Return: A number between 0 and 63 indicating the order of the entry.
+ */
+int xa_get_order(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	void *entry;
+	int order = 0;
+
+	rcu_read_lock();
+	entry = xas_load(&xas);
+
+	if (!entry)
+		goto unlock;
+
+	if (!xas.xa_node)
+		goto unlock;
+
+	for (;;) {
+		unsigned int slot = xas.xa_offset + (1 << order);
+
+		if (slot >= XA_CHUNK_SIZE)
+			break;
+		if (!xa_is_sibling(xas.xa_node->slots[slot]))
+			break;
+		order++;
+	}
+
+	order += xas.xa_node->shift;
+unlock:
+	rcu_read_unlock();
+
+	return order;
+}
+EXPORT_SYMBOL(xa_get_order);
 #endif /* CONFIG_XARRAY_MULTI */
 
 /**
@@ -1974,6 +2164,29 @@ unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start,
 EXPORT_SYMBOL(xa_extract);
 
 /**
+ * xa_delete_node() - Private interface for workingset code.
+ * @node: Node to be removed from the tree.
+ * @update: Function to call to update ancestor nodes.
+ *
+ * Context: xa_lock must be held on entry and will not be released.
+ */
+void xa_delete_node(struct xa_node *node, xa_update_node_t update)
+{
+	struct xa_state xas = {
+		.xa = node->array,
+		.xa_index = (unsigned long)node->offset <<
+				(node->shift + XA_CHUNK_SHIFT),
+		.xa_shift = node->shift + XA_CHUNK_SHIFT,
+		.xa_offset = node->offset,
+		.xa_node = xa_parent_locked(node->array, node),
+		.xa_update = update,
+	};
+
+	xas_store(&xas, NULL);
+}
+EXPORT_SYMBOL_GPL(xa_delete_node);	/* For the benefit of the test suite */
+
+/**
  * xa_destroy() - Free all internal data structures.
  * @xa: XArray.
  *
diff --git a/lib/xxhash.c b/lib/xxhash.c
index aa61e2a3802f..d5bb9ff10607 100644
--- a/lib/xxhash.c
+++ b/lib/xxhash.c
@@ -34,7 +34,7 @@
  * ("BSD").
  *
  * You can contact the author at:
- * - xxHash homepage: http://cyan4973.github.io/xxHash/
+ * - xxHash homepage: https://cyan4973.github.io/xxHash/
  * - xxHash source repository: https://github.com/Cyan4973/xxHash
  */
 
diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig
index 22528743d4ce..5cb50245a878 100644
--- a/lib/xz/Kconfig
+++ b/lib/xz/Kconfig
@@ -5,7 +5,7 @@ config XZ_DEC
 	help
 	  LZMA2 compression algorithm and BCJ filters are supported using
 	  the .xz file format as the container. For integrity checking,
-	  CRC32 is supported. See Documentation/xz.txt for more information.
+	  CRC32 is supported. See Documentation/staging/xz.rst for more information.
 
 if XZ_DEC
 
diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c
index 912aae5fa09e..88a2c35e1b59 100644
--- a/lib/xz/xz_crc32.c
+++ b/lib/xz/xz_crc32.c
@@ -2,7 +2,7 @@
  * CRC32 using the polynomial from IEEE-802.3
  *
  * Authors: Lasse Collin <lasse.collin@tukaani.org>
- *          Igor Pavlov <http://7-zip.org/>
+ *          Igor Pavlov <https://7-zip.org/>
  *
  * This file has been put into the public domain.
  * You can do whatever you want with this file.
diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c
index a768e6d28bbb..72ddac6ef2ec 100644
--- a/lib/xz/xz_dec_bcj.c
+++ b/lib/xz/xz_dec_bcj.c
@@ -2,7 +2,7 @@
  * Branch/Call/Jump (BCJ) filter decoders
  *
  * Authors: Lasse Collin <lasse.collin@tukaani.org>
- *          Igor Pavlov <http://7-zip.org/>
+ *          Igor Pavlov <https://7-zip.org/>
  *
  * This file has been put into the public domain.
  * You can do whatever you want with this file.
diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
index 156f26fdc4c9..ca2603abee08 100644
--- a/lib/xz/xz_dec_lzma2.c
+++ b/lib/xz/xz_dec_lzma2.c
@@ -2,7 +2,7 @@
  * LZMA2 decoder
  *
  * Authors: Lasse Collin <lasse.collin@tukaani.org>
- *          Igor Pavlov <http://7-zip.org/>
+ *          Igor Pavlov <https://7-zip.org/>
  *
  * This file has been put into the public domain.
  * You can do whatever you want with this file.
@@ -1043,7 +1043,7 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 
 			s->lzma2.sequence = SEQ_LZMA_PREPARE;
 
-		/* Fall through */
+			fallthrough;
 
 		case SEQ_LZMA_PREPARE:
 			if (s->lzma2.compressed < RC_INIT_BYTES)
@@ -1055,7 +1055,7 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 			s->lzma2.compressed -= RC_INIT_BYTES;
 			s->lzma2.sequence = SEQ_LZMA_RUN;
 
-		/* Fall through */
+			fallthrough;
 
 		case SEQ_LZMA_RUN:
 			/*
diff --git a/lib/xz/xz_dec_stream.c b/lib/xz/xz_dec_stream.c
index bd1d182419d7..fea86deaaa01 100644
--- a/lib/xz/xz_dec_stream.c
+++ b/lib/xz/xz_dec_stream.c
@@ -583,7 +583,7 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
 			if (ret != XZ_OK)
 				return ret;
 
-		/* Fall through */
+			fallthrough;
 
 		case SEQ_BLOCK_START:
 			/* We need one byte of input to continue. */
@@ -608,7 +608,7 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
 			s->temp.pos = 0;
 			s->sequence = SEQ_BLOCK_HEADER;
 
-		/* Fall through */
+			fallthrough;
 
 		case SEQ_BLOCK_HEADER:
 			if (!fill_temp(s, b))
@@ -620,7 +620,7 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
 
 			s->sequence = SEQ_BLOCK_UNCOMPRESS;
 
-		/* Fall through */
+			fallthrough;
 
 		case SEQ_BLOCK_UNCOMPRESS:
 			ret = dec_block(s, b);
@@ -629,7 +629,7 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
 
 			s->sequence = SEQ_BLOCK_PADDING;
 
-		/* Fall through */
+			fallthrough;
 
 		case SEQ_BLOCK_PADDING:
 			/*
@@ -651,7 +651,7 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
 
 			s->sequence = SEQ_BLOCK_CHECK;
 
-		/* Fall through */
+			fallthrough;
 
 		case SEQ_BLOCK_CHECK:
 			if (s->check_type == XZ_CHECK_CRC32) {
@@ -675,7 +675,7 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
 
 			s->sequence = SEQ_INDEX_PADDING;
 
-		/* Fall through */
+			fallthrough;
 
 		case SEQ_INDEX_PADDING:
 			while ((s->index.size + (b->in_pos - s->in_start))
@@ -699,7 +699,7 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
 
 			s->sequence = SEQ_INDEX_CRC32;
 
-		/* Fall through */
+			fallthrough;
 
 		case SEQ_INDEX_CRC32:
 			ret = crc32_validate(s, b);
@@ -709,7 +709,7 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
 			s->temp.size = STREAM_HEADER_SIZE;
 			s->sequence = SEQ_STREAM_FOOTER;
 
-		/* Fall through */
+			fallthrough;
 
 		case SEQ_STREAM_FOOTER:
 			if (!fill_temp(s, b))
diff --git a/lib/xz/xz_lzma2.h b/lib/xz/xz_lzma2.h
index 071d67bee9f5..92d852d4f87a 100644
--- a/lib/xz/xz_lzma2.h
+++ b/lib/xz/xz_lzma2.h
@@ -2,7 +2,7 @@
  * LZMA2 definitions
  *
  * Authors: Lasse Collin <lasse.collin@tukaani.org>
- *          Igor Pavlov <http://7-zip.org/>
+ *          Igor Pavlov <https://7-zip.org/>
  *
  * This file has been put into the public domain.
  * You can do whatever you want with this file.
diff --git a/lib/xz/xz_stream.h b/lib/xz/xz_stream.h
index 66cb5a7055ec..430bb3a0d195 100644
--- a/lib/xz/xz_stream.h
+++ b/lib/xz/xz_stream.h
@@ -19,7 +19,7 @@
 
 /*
  * See the .xz file format specification at
- * http://tukaani.org/xz/xz-file-format.txt
+ * https://tukaani.org/xz/xz-file-format.txt
  * to understand the container format.
  */
 
diff --git a/lib/zlib_dfltcc/dfltcc_inflate.c b/lib/zlib_dfltcc/dfltcc_inflate.c
index aa9ef23474df..db107016d29b 100644
--- a/lib/zlib_dfltcc/dfltcc_inflate.c
+++ b/lib/zlib_dfltcc/dfltcc_inflate.c
@@ -4,6 +4,7 @@
 #include "dfltcc_util.h"
 #include "dfltcc.h"
 #include <asm/setup.h>
+#include <linux/export.h>
 #include <linux/zutil.h>
 
 /*
@@ -29,6 +30,7 @@ int dfltcc_can_inflate(
     return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) &&
                is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0);
 }
+EXPORT_SYMBOL(dfltcc_can_inflate);
 
 static int dfltcc_was_inflate_used(
     z_streamp strm
@@ -147,3 +149,4 @@ dfltcc_inflate_action dfltcc_inflate(
     return (cc == DFLTCC_CC_OP1_TOO_SHORT || cc == DFLTCC_CC_OP2_TOO_SHORT) ?
         DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE;
 }
+EXPORT_SYMBOL(dfltcc_inflate);
diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c
index 67cc9b08ae9d..ee39b5eb71f7 100644
--- a/lib/zlib_inflate/inflate.c
+++ b/lib/zlib_inflate/inflate.c
@@ -396,7 +396,7 @@ int zlib_inflate(z_streamp strm, int flush)
             strm->adler = state->check = REVERSE(hold);
             INITBITS();
             state->mode = DICT;
-	    /* fall through */
+	    fallthrough;
         case DICT:
             if (state->havedict == 0) {
                 RESTORE();
@@ -404,10 +404,10 @@ int zlib_inflate(z_streamp strm, int flush)
             }
             strm->adler = state->check = zlib_adler32(0L, NULL, 0);
             state->mode = TYPE;
-	    /* fall through */
+	    fallthrough;
         case TYPE:
             if (flush == Z_BLOCK) goto inf_leave;
-	    /* fall through */
+	    fallthrough;
         case TYPEDO:
             INFLATE_TYPEDO_HOOK(strm, flush);
             if (state->last) {
@@ -446,7 +446,7 @@ int zlib_inflate(z_streamp strm, int flush)
             state->length = (unsigned)hold & 0xffff;
             INITBITS();
             state->mode = COPY;
-	    /* fall through */
+	    fallthrough;
         case COPY:
             copy = state->length;
             if (copy) {
@@ -480,7 +480,7 @@ int zlib_inflate(z_streamp strm, int flush)
 #endif
             state->have = 0;
             state->mode = LENLENS;
-	    /* fall through */
+	    fallthrough;
         case LENLENS:
             while (state->have < state->ncode) {
                 NEEDBITS(3);
@@ -501,7 +501,7 @@ int zlib_inflate(z_streamp strm, int flush)
             }
             state->have = 0;
             state->mode = CODELENS;
-	    /* fall through */
+	    fallthrough;
         case CODELENS:
             while (state->have < state->nlen + state->ndist) {
                 for (;;) {
@@ -575,7 +575,7 @@ int zlib_inflate(z_streamp strm, int flush)
                 break;
             }
             state->mode = LEN;
-	    /* fall through */
+	    fallthrough;
         case LEN:
             if (have >= 6 && left >= 258) {
                 RESTORE();
@@ -615,7 +615,7 @@ int zlib_inflate(z_streamp strm, int flush)
             }
             state->extra = (unsigned)(this.op) & 15;
             state->mode = LENEXT;
-	    /* fall through */
+	    fallthrough;
         case LENEXT:
             if (state->extra) {
                 NEEDBITS(state->extra);
@@ -623,7 +623,7 @@ int zlib_inflate(z_streamp strm, int flush)
                 DROPBITS(state->extra);
             }
             state->mode = DIST;
-	    /* fall through */
+	    fallthrough;
         case DIST:
             for (;;) {
                 this = state->distcode[BITS(state->distbits)];
@@ -649,7 +649,7 @@ int zlib_inflate(z_streamp strm, int flush)
             state->offset = (unsigned)this.val;
             state->extra = (unsigned)(this.op) & 15;
             state->mode = DISTEXT;
-	    /* fall through */
+	    fallthrough;
         case DISTEXT:
             if (state->extra) {
                 NEEDBITS(state->extra);
@@ -669,7 +669,7 @@ int zlib_inflate(z_streamp strm, int flush)
                 break;
             }
             state->mode = MATCH;
-	    /* fall through */
+	    fallthrough;
         case MATCH:
             if (left == 0) goto inf_leave;
             copy = out - left;
@@ -720,7 +720,7 @@ int zlib_inflate(z_streamp strm, int flush)
                 INITBITS();
             }
             state->mode = DONE;
-	    /* fall through */
+	    fallthrough;
         case DONE:
             ret = Z_STREAM_END;
             goto inf_leave;
diff --git a/lib/zstd/bitstream.h b/lib/zstd/bitstream.h
index 3a49784d5c61..5d6343c1a909 100644
--- a/lib/zstd/bitstream.h
+++ b/lib/zstd/bitstream.h
@@ -259,16 +259,17 @@ ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, s
 		bitD->bitContainer = *(const BYTE *)(bitD->start);
 		switch (srcSize) {
 		case 7: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[6]) << (sizeof(bitD->bitContainer) * 8 - 16);
-			/* fall through */
+			fallthrough;
 		case 6: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[5]) << (sizeof(bitD->bitContainer) * 8 - 24);
-			/* fall through */
+			fallthrough;
 		case 5: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[4]) << (sizeof(bitD->bitContainer) * 8 - 32);
-			/* fall through */
+			fallthrough;
 		case 4: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[3]) << 24;
-			/* fall through */
+			fallthrough;
 		case 3: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[2]) << 16;
-			/* fall through */
+			fallthrough;
 		case 2: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[1]) << 8;
+			fallthrough;
 		default:;
 		}
 		{
diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c
index 5e0b67003e55..b080264ed3ad 100644
--- a/lib/zstd/compress.c
+++ b/lib/zstd/compress.c
@@ -3182,7 +3182,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream *zcs, void *dst, size_t *
 				zcs->outBuffFlushedSize = 0;
 				zcs->stage = zcss_flush; /* pass-through to flush stage */
 			}
-			/* fall through */
+			fallthrough;
 
 		case zcss_flush: {
 			size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c
index 269ee9a796c1..66cd487a326a 100644
--- a/lib/zstd/decompress.c
+++ b/lib/zstd/decompress.c
@@ -442,7 +442,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx *dctx, const void *src, size_t srcSize
 		case set_repeat:
 			if (dctx->litEntropy == 0)
 				return ERROR(dictionary_corrupted);
-		/* fall-through */
+			fallthrough;
 		case set_compressed:
 			if (srcSize < 5)
 				return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
@@ -1768,7 +1768,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, c
 			return 0;
 		}
 		dctx->expected = 0; /* not necessary to copy more */
-		/* fall through */
+		fallthrough;
 
 	case ZSTDds_decodeFrameHeader:
 		memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
@@ -2309,7 +2309,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB
 		switch (zds->stage) {
 		case zdss_init:
 			ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
-						/* fall-through */
+			fallthrough;
 
 		case zdss_loadHeader: {
 			size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize);
@@ -2376,7 +2376,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB
 			}
 			zds->stage = zdss_read;
 		}
-		/* fall through */
+			fallthrough;
 
 		case zdss_read: {
 			size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
@@ -2405,7 +2405,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB
 			zds->stage = zdss_load;
 			/* pass-through */
 		}
-		/* fall through */
+			fallthrough;
 
 		case zdss_load: {
 			size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
@@ -2438,7 +2438,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB
 				/* pass-through */
 			}
 		}
-		/* fall through */
+			fallthrough;
 
 		case zdss_flush: {
 			size_t const toFlushSize = zds->outEnd - zds->outStart;
diff --git a/lib/zstd/fse_decompress.c b/lib/zstd/fse_decompress.c
index a84300e5a013..0b353530fb3f 100644
--- a/lib/zstd/fse_decompress.c
+++ b/lib/zstd/fse_decompress.c
@@ -47,6 +47,7 @@
 ****************************************************************/
 #include "bitstream.h"
 #include "fse.h"
+#include "zstd_internal.h"
 #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/string.h> /* memcpy, memset */
@@ -60,14 +61,6 @@
 		enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
 	} /* use only *after* variable declarations */
 
-/* check and forward error code */
-#define CHECK_F(f)                  \
-	{                           \
-		size_t const e = f; \
-		if (FSE_isError(e)) \
-			return e;   \
-	}
-
 /* **************************************************************
 *  Templates
 ****************************************************************/
diff --git a/lib/zstd/huf_compress.c b/lib/zstd/huf_compress.c
index e727812d12aa..fd32838c185f 100644
--- a/lib/zstd/huf_compress.c
+++ b/lib/zstd/huf_compress.c
@@ -556,10 +556,11 @@ size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, si
 	n = srcSize & ~3; /* join to mod 4 */
 	switch (srcSize & 3) {
 	case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC);
-		/* fall through */
+		fallthrough;
 	case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC);
-		/* fall through */
+		fallthrough;
 	case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC);
+		fallthrough;
 	case 0:
 	default:;
 	}
diff --git a/lib/zstd/zstd_internal.h b/lib/zstd/zstd_internal.h
index 1a79fab9e13a..dac753397f86 100644
--- a/lib/zstd/zstd_internal.h
+++ b/lib/zstd/zstd_internal.h
@@ -127,7 +127,14 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
 *  Shared functions to include for inlining
 *********************************************/
 ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) {
-	memcpy(dst, src, 8);
+	/*
+	 * zstd relies heavily on gcc being able to analyze and inline this
+	 * memcpy() call, since it is called in a tight loop. Preboot mode
+	 * is compiled in freestanding mode, which stops gcc from analyzing
+	 * memcpy(). Use __builtin_memcpy() to tell gcc to analyze this as a
+	 * regular memcpy().
+	 */
+	__builtin_memcpy(dst, src, 8);
 }
 /*! ZSTD_wildcopy() :
 *   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
@@ -137,13 +144,16 @@ ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length)
 	const BYTE* ip = (const BYTE*)src;
 	BYTE* op = (BYTE*)dst;
 	BYTE* const oend = op + length;
-	/* Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.
+#if defined(GCC_VERSION) && GCC_VERSION >= 70000 && GCC_VERSION < 70200
+	/*
+	 * Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.
 	 * Avoid the bad case where the loop only runs once by handling the
 	 * special case separately. This doesn't trigger the bug because it
 	 * doesn't involve pointer/integer overflow.
 	 */
 	if (length <= 8)
 		return ZSTD_copy8(dst, src);
+#endif
 	do {
 		ZSTD_copy8(op, ip);
 		op += 8;