165 files changed, 8424 insertions, 6193 deletions
diff --git a/arch/arc/Kbuild b/arch/arc/Kbuild
index 699d8cae9b1f..20ea7dd482d4 100644
--- a/arch/arc/Kbuild
+++ b/arch/arc/Kbuild
@@ -1,3 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y += kernel/
 obj-y += mm/
+obj-y += net/
+
+# for cleaning
+subdir- += boot
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 26108ea785c2..f27e6b90428e 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -6,53 +6,54 @@
 config ARC
 	def_bool y
 	select ARC_TIMERS
+	select ARCH_HAS_CPU_CACHE_ALIASING
+	select ARCH_HAS_CACHE_LINE_SIZE
+	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_SETUP_DMA_OPS
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+	select ARCH_NEED_CMPXCHG_1_EMU
 	select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
 	select ARCH_32BIT_OFF_T
-	select BUILDTIME_EXTABLE_SORT
+	select BUILDTIME_TABLE_SORT
+	select GENERIC_BUILTIN_DTB
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select DMA_DIRECT_REMAP
 	select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
-	select GENERIC_CLOCKEVENTS
-	select GENERIC_FIND_FIRST_BIT
 	# for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP
 	select GENERIC_IRQ_SHOW
 	select GENERIC_PCI_IOMAP
-	select GENERIC_PENDING_IRQ if SMP
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
+	select GENERIC_IOREMAP
+	select GENERIC_STRNCPY_FROM_USER if MMU
+	select GENERIC_STRNLEN_USER if MMU
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARC_MMU_V4
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_IOREMAP_PROT
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZMA
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_MOD_ARCH_SPECIFIC
-	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
-	select HANDLE_DOMAIN_IRQ
+	select HAVE_SYSCALL_TRACEPOINTS
 	select IRQ_DOMAIN
+	select LOCK_MM_AND_FIND_VMA
 	select MODULES_USE_ELF_RELA
 	select OF
 	select OF_EARLY_FLATTREE
 	select PCI_SYSCALL if PCI
-	select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
 	select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
-
-config ARCH_HAS_CACHE_LINE_SIZE
-	def_bool y
-
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
+	select TRACE_IRQFLAGS_SUPPORT
+	select HAVE_EBPF_JIT if ISA_ARCV2
 
 config LOCKDEP_SUPPORT
 	def_bool y
@@ -63,9 +64,6 @@ config SCHED_OMIT_FRAME_POINTER
 config GENERIC_CSUM
 	def_bool y
 
-config ARCH_DISCONTIGMEM_ENABLE
-	def_bool n
-
 config ARCH_FLATMEM_ENABLE
 	def_bool y
 
@@ -85,18 +83,12 @@ config STACKTRACE_SUPPORT
 	def_bool y
 	select STACKTRACE
 
-config HAVE_ARCH_TRANSPARENT_HUGEPAGE
-	def_bool y
-	depends on ARC_MMU_V4
-
 menu "ARC Architecture Configuration"
 
 menu "ARC Platform/SoC/Board"
 
 source "arch/arc/plat-tb10x/Kconfig"
 source "arch/arc/plat-axs10x/Kconfig"
-#New platform adds here
-source "arch/arc/plat-eznps/Kconfig"
 source "arch/arc/plat-hsdk/Kconfig"
 
 endmenu
@@ -126,16 +118,9 @@ choice
 	default ARC_CPU_770 if ISA_ARCOMPACT
 	default ARC_CPU_HS if ISA_ARCV2
 
-if ISA_ARCOMPACT
-
-config ARC_CPU_750D
-	bool "ARC750D"
-	select ARC_CANT_LLSC
-	help
-	  Support for ARC750 core
-
 config ARC_CPU_770
 	bool "ARC770"
+	depends on ISA_ARCOMPACT
 	select ARC_HAS_SWAPE
 	help
 	  Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
@@ -145,15 +130,13 @@ config ARC_CPU_770
 	  -Caches: New Prog Model, Region Flush
 	  -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
 
-endif #ISA_ARCOMPACT
-
 config ARC_CPU_HS
 	bool "ARC-HS"
 	depends on ISA_ARCV2
 	help
 	  Support for ARC HS38x Cores based on ARCv2 ISA
 	  The notable features are:
-	    - SMP configurations of upto 4 core with coherency
+	    - SMP configurations of up to 4 cores with coherency
 	    - Optional L2 Cache and IO-Coherency
 	    - Revised Interrupt Architecture (multiple priorites, reg banks,
 	        auto stack switch, auto regfile save/restore)
@@ -168,6 +151,15 @@ config ARC_CPU_HS
 
 endchoice
 
+config ARC_TUNE_MCPU
+	string "Override default -mcpu compiler flag"
+	default ""
+	help
+	  Override default -mcpu=xxx compiler flag (which is set depending on
+	  the ISA version) with the specified value.
+	  NOTE: If specified flag isn't supported by current compiler the
+	  ISA default value will be used as a fallback.
+
 config CPU_BIG_ENDIAN
 	bool "Enable Big Endian Mode"
 	help
@@ -191,7 +183,7 @@ config ARC_SMP_HALT_ON_RESET
 	help
 	  In SMP configuration cores can be configured as Halt-on-reset
 	  or they could all start at same time. For Halt-on-reset, non
-	  masters are parked until Master kicks them so they can start of
+	  masters are parked until Master kicks them so they can start off
 	  at designated entry point. For other case, all jump to common
 	  entry point and spin wait for Master's signal.
 
@@ -242,10 +234,6 @@ config ARC_CACHE_PAGES
 	  Note that Global I/D ENABLE + Per Page DISABLE works but corollary
 	  Global DISABLE + Per Page ENABLE won't work
 
-config ARC_CACHE_VIPT_ALIASING
-	bool "Support VIPT Aliasing D$"
-	depends on ARC_HAS_DCACHE && ISA_ARCOMPACT
-
 endif #ARC_CACHE
 
 config ARC_HAS_ICCM
@@ -275,33 +263,17 @@ config ARC_DCCM_BASE
 
 choice
 	prompt "MMU Version"
-	default ARC_MMU_V3 if ARC_CPU_770
-	default ARC_MMU_V2 if ARC_CPU_750D
-	default ARC_MMU_V4 if ARC_CPU_HS
-
-if ISA_ARCOMPACT
-
-config ARC_MMU_V1
-	bool "MMU v1"
-	help
-	  Orig ARC700 MMU
-
-config ARC_MMU_V2
-	bool "MMU v2"
-	help
-	  Fixed the deficiency of v1 - possible thrashing in memcpy scenario
-	  when 2 D-TLB and 1 I-TLB entries index into same 2way set.
+	default ARC_MMU_V3 if ISA_ARCOMPACT
+	default ARC_MMU_V4 if ISA_ARCV2
 
 config ARC_MMU_V3
 	bool "MMU v3"
-	depends on ARC_CPU_770
+	depends on ISA_ARCOMPACT
 	help
 	  Introduced with ARC700 4.10: New Features
 	  Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
 	  Shared Address Spaces (SASID)
 
-endif
-
 config ARC_MMU_V4
 	bool "MMU v4"
 	depends on ISA_ARCV2
@@ -315,16 +287,17 @@ choice
 
 config ARC_PAGE_SIZE_8K
 	bool "8KB"
+	select HAVE_PAGE_SIZE_8KB
 	help
 	  Choose between 8k vs 16k
 
 config ARC_PAGE_SIZE_16K
+	select HAVE_PAGE_SIZE_16KB
 	bool "16KB"
-	depends on ARC_MMU_V3 || ARC_MMU_V4
 
 config ARC_PAGE_SIZE_4K
 	bool "4KB"
-	depends on ARC_MMU_V3 || ARC_MMU_V4
+	select HAVE_PAGE_SIZE_4KB
 
 endchoice
 
@@ -341,18 +314,12 @@ config ARC_HUGEPAGE_16M
 
 endchoice
 
-config NODES_SHIFT
-	int "Maximum NUMA Nodes (as a power of 2)"
-	default "0" if !DISCONTIGMEM
-	default "1" if DISCONTIGMEM
-	depends on NEED_MULTIPLE_NODES
-	---help---
-	  Accessing memory beyond 1GB (with or w/o PAE) requires 2 memory
-	  zones.
-
-if ISA_ARCOMPACT
+config PGTABLE_LEVELS
+	int "Number of Page table levels"
+	default 2
 
 config ARC_COMPACT_IRQ_LEVELS
+	depends on ISA_ARCOMPACT
 	bool "Setup Timer IRQ as high Priority"
 	# if SMP, LV2 enabled ONLY if ARC implementation has LV2 re-entrancy
 	depends on !SMP
@@ -360,14 +327,10 @@ config ARC_COMPACT_IRQ_LEVELS
 config ARC_FPU_SAVE_RESTORE
 	bool "Enable FPU state persistence across context switch"
 	help
-	  Double Precision Floating Point unit had dedicated regs which
-	  need to be saved/restored across context-switch.
-	  Note that ARC FPU is overly simplistic, unlike say x86, which has
-	  hardware pieces to allow software to conditionally save/restore,
-	  based on actual usage of FPU by a task. Thus our implemn does
-	  this for all tasks in system.
-
-endif #ISA_ARCOMPACT
+	  ARCompact FPU has internal registers to assist with Double precision
+	  Floating Point operations. There are control and stauts registers
+	  for floating point exceptions and rounding modes. These are
+	  preserved across task context switch when enabled.
 
 config ARC_CANT_LLSC
 	def_bool n
@@ -405,13 +368,61 @@ config ARC_HAS_DIV_REM
 	default y
 
 config ARC_HAS_ACCL_REGS
-	bool "Reg Pair ACCL:ACCH (FPU and/or MPY > 6)"
+	bool "Reg Pair ACCL:ACCH (FPU and/or MPY > 6 and/or DSP)"
 	default y
 	help
 	  Depending on the configuration, CPU can contain accumulator reg-pair
 	  (also referred to as r58:r59). These can also be used by gcc as GPR so
 	  kernel needs to save/restore per process
 
+config ARC_DSP_HANDLED
+	def_bool n
+
+config ARC_DSP_SAVE_RESTORE_REGS
+	def_bool n
+
+choice
+	prompt "DSP support"
+	default ARC_DSP_NONE
+	help
+	  Depending on the configuration, CPU can contain DSP registers
+	  (ACC0_GLO, ACC0_GHI, DSP_BFLY0, DSP_CTRL, DSP_FFT_CTRL).
+	  Below are options describing how to handle these registers in
+	  interrupt entry / exit and in context switch.
+
+config ARC_DSP_NONE
+	bool "No DSP extension presence in HW"
+	help
+	  No DSP extension presence in HW
+
+config ARC_DSP_KERNEL
+	bool "DSP extension in HW, no support for userspace"
+	select ARC_HAS_ACCL_REGS
+	select ARC_DSP_HANDLED
+	help
+	  DSP extension presence in HW, no support for DSP-enabled userspace
+	  applications. We don't save / restore DSP registers and only do
+	  some minimal preparations so userspace won't be able to break kernel
+
+config ARC_DSP_USERSPACE
+	bool "Support DSP for userspace apps"
+	select ARC_HAS_ACCL_REGS
+	select ARC_DSP_HANDLED
+	select ARC_DSP_SAVE_RESTORE_REGS
+	help
+	  DSP extension presence in HW, support save / restore DSP registers to
+	  run DSP-enabled userspace applications
+
+config ARC_DSP_AGU_USERSPACE
+	bool "Support DSP with AGU for userspace apps"
+	select ARC_HAS_ACCL_REGS
+	select ARC_DSP_HANDLED
+	select ARC_DSP_SAVE_RESTORE_REGS
+	help
+	  DSP and AGU extensions presence in HW, support save / restore DSP
+	  and AGU registers to run DSP-enabled userspace applications
+endchoice
+
 config ARC_IRQ_NO_AUTOSAVE
 	bool "Disable hardware autosave regfile on interrupts"
 	default n
@@ -420,6 +431,12 @@ config ARC_IRQ_NO_AUTOSAVE
 	  This is programmable and can be optionally disabled in which case
 	  software INTERRUPT_PROLOGUE/EPILGUE do the needed work
 
+config ARC_LPB_DISABLE
+	bool "Disable loop buffer (LPB)"
+	help
+	  On HS cores, loop buffer (LPB) is programmable in runtime and can
+	  be optionally disabled.
+
 endif # ISA_ARCV2
 
 endmenu   # "ARC CPU Configuration"
@@ -448,7 +465,8 @@ config LINUX_RAM_BASE
 
 config HIGHMEM
 	bool "High Memory Support"
-	select ARCH_DISCONTIGMEM_ENABLE
+	select HAVE_ARCH_PFN_VALID
+	select KMAP_LOCAL
 	help
 	  With ARC 2G:2G address split, only upper 2G is directly addressable by
 	  kernel. Enable this to potentially allow access to rest of 2G and PAE
@@ -456,7 +474,8 @@ config HIGHMEM
 
 config ARC_HAS_PAE40
 	bool "Support for the 40-bit Physical Address Extension"
-	depends on ISA_ARCV2
+	depends on ARC_MMU_V4
+	depends on !ARC_PAGE_SIZE_4K
 	select HIGHMEM
 	select PHYS_ADDR_T_64BIT
 	help
@@ -475,11 +494,11 @@ config ARC_KVADDR_SIZE
 	  kernel-user gutter)
 
 config ARC_CURR_IN_REG
-	bool "Dedicate Register r25 for current_task pointer"
+	bool "cache current task pointer in gp"
 	default y
 	help
-	  This reserved Register R25 to point to Current Task in
-	  kernel mode. This saves memory access for each such access
+	  This reserves gp register to point to Current Task in
+	  kernel mode eliding memory access for each access
 
 
 config ARC_EMUL_UNALIGNED
@@ -523,9 +542,6 @@ config ARC_DW2_UNWIND
 	  If you don't debug the kernel, you can say N, but we may not be able
 	  to solve problems without frame unwind information
 
-config ARC_DBG_TLB_PARANOIA
-	bool "Paranoia Checks in Low Level TLB Handlers"
-
 config ARC_DBG_JUMP_LABEL
 	bool "Paranoid checks in Static Keys (jump labels) code"
 	depends on JUMP_LABEL
@@ -535,17 +551,17 @@ config ARC_DBG_JUMP_LABEL
 	  part of static keys (jump labels) related code.
 endif
 
-config ARC_BUILTIN_DTB_NAME
+config BUILTIN_DTB_NAME
 	string "Built in DTB"
+	default "nsim_700"
 	help
-	  Set the name of the DTB to embed in the vmlinux binary
-	  Leaving it blank selects the minimal "skeleton" dtb
+	  Set the name of the DTB to embed in the vmlinux binary.
 
 endmenu	 # "ARC Architecture Configuration"
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order"
-	default "12" if ARC_HUGEPAGE_16M
-	default "11"
+	default "11" if ARC_HUGEPAGE_16M
+	default "10"
 
 source "kernel/power/Kconfig"
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 20e9ab6cc521..0c5e6e6314f2 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -6,20 +6,36 @@
 KBUILD_DEFCONFIG := haps_hs_smp_defconfig
 
 ifeq ($(CROSS_COMPILE),)
-CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-)
+CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux- arc-linux-gnu-)
 endif
 
 cflags-y	+= -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__
-cflags-$(CONFIG_ISA_ARCOMPACT)	+= -mA7
-cflags-$(CONFIG_ISA_ARCV2)	+= -mcpu=hs38
+
+tune-mcpu-def-$(CONFIG_ISA_ARCOMPACT)	:= -mcpu=arc700
+tune-mcpu-def-$(CONFIG_ISA_ARCV2)	:= -mcpu=hs38
+
+ifeq ($(CONFIG_ARC_TUNE_MCPU),)
+cflags-y				+= $(tune-mcpu-def-y)
+else
+tune-mcpu				:= $(CONFIG_ARC_TUNE_MCPU)
+ifneq ($(call cc-option,$(tune-mcpu)),)
+cflags-y				+= $(tune-mcpu)
+else
+# The flag provided by 'CONFIG_ARC_TUNE_MCPU' option isn't known by this compiler
+# (probably the compiler is too old). Use ISA default mcpu flag instead as a safe option.
+$(warning ** WARNING ** CONFIG_ARC_TUNE_MCPU flag '$(tune-mcpu)' is unknown, fallback to '$(tune-mcpu-def-y)')
+cflags-y				+= $(tune-mcpu-def-y)
+endif
+endif
 
 ifdef CONFIG_ARC_CURR_IN_REG
-# For a global register defintion, make sure it gets passed to every file
+# For a global register definition, make sure it gets passed to every file
 # We had a customer reported bug where some code built in kernel was NOT using
-# any kernel headers, and missing the r25 global register
+# any kernel headers, and missing the global register
 # Can't do unconditionally because of recursive include issues
 # due to <linux/thread_info.h>
 LINUXINCLUDE	+=  -include $(srctree)/arch/arc/include/asm/current.h
+cflags-y	+= -ffixed-gp
 endif
 
 cflags-y				+= -fsection-anchors
@@ -51,7 +67,7 @@ cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -fasynchronous-unwind-tables $(cfi)
 # small data is default for elf32 tool-chain. If not usable, disable it
 # This also allows repurposing GP as scratch reg to gcc reg allocator
 disable_small_data := y
-cflags-$(disable_small_data)		+= -mno-sdata -fcall-used-gp
+cflags-$(disable_small_data)		+= -mno-sdata
 
 cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= -mbig-endian
 ldflags-$(CONFIG_CPU_BIG_ENDIAN)	+= -EB
@@ -66,40 +82,28 @@ KBUILD_CFLAGS	+= $(cflags-y)
 KBUILD_AFLAGS	+= $(KBUILD_CFLAGS)
 KBUILD_LDFLAGS	+= $(ldflags-y)
 
-head-y		:= arch/arc/kernel/head.o
-
-# See arch/arc/Kbuild for content of core part of the kernel
-core-y		+= arch/arc/
-
-# w/o this dtb won't embed into kernel binary
-core-y		+= arch/arc/boot/dts/
-
 core-y				+= arch/arc/plat-sim/
 core-$(CONFIG_ARC_PLAT_TB10X)	+= arch/arc/plat-tb10x/
 core-$(CONFIG_ARC_PLAT_AXS10X)	+= arch/arc/plat-axs10x/
-core-$(CONFIG_ARC_PLAT_EZNPS)	+= arch/arc/plat-eznps/
 core-$(CONFIG_ARC_SOC_HSDK)	+= arch/arc/plat-hsdk/
 
-ifdef CONFIG_ARC_PLAT_EZNPS
-KBUILD_CPPFLAGS += -I$(srctree)/arch/arc/plat-eznps/include
-endif
-
-drivers-$(CONFIG_OPROFILE)	+= arch/arc/oprofile/
-
 libs-y		+= arch/arc/lib/ $(LIBGCC)
 
 boot		:= arch/arc/boot
 
-#default target for make without any arguments.
-KBUILD_IMAGE	:= $(boot)/bootpImage
-
-all:	bootpImage
-bootpImage: vmlinux
-
-boot_targets += uImage uImage.bin uImage.gz
+boot_targets := uImage.bin uImage.gz uImage.lzma
 
+PHONY += $(boot_targets)
 $(boot_targets): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-archclean:
-	$(Q)$(MAKE) $(clean)=$(boot)
+uimage-default-y			:= uImage.bin
+uimage-default-$(CONFIG_KERNEL_GZIP)	:= uImage.gz
+uimage-default-$(CONFIG_KERNEL_LZMA)	:= uImage.lzma
+
+PHONY += uImage
+uImage: $(uimage-default-y)
+	@ln -sf $< $(boot)/uImage
+	@$(kecho) '  Image $(boot)/uImage is ready'
+
+CLEAN_FILES += $(boot)/uImage
diff --git a/arch/arc/boot/.gitignore b/arch/arc/boot/.gitignore
index c4c5fd529c25..675db1494028 100644
--- a/arch/arc/boot/.gitignore
+++ b/arch/arc/boot/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 uImage
diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile
index 538b92f4dd25..5a8550124b73 100644
--- a/arch/arc/boot/Makefile
+++ b/arch/arc/boot/Makefile
@@ -1,29 +1,23 @@
 # SPDX-License-Identifier: GPL-2.0
-targets := vmlinux.bin vmlinux.bin.gz uImage
 
-# uImage build relies on mkimage being availble on your host for ARC target
+# uImage build relies on mkimage being available on your host for ARC target
 # You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage
-# and make sure it's reacable from your PATH
+# and make sure it's reachable from your PATH
 
 OBJCOPYFLAGS= -O binary -R .note -R .note.gnu.build-id -R .comment -S
 
-LINUX_START_TEXT = $$(readelf -h vmlinux | \
+LINUX_START_TEXT = $$($(READELF) -h vmlinux | \
 			grep "Entry point address" | grep -o 0x.*)
 
 UIMAGE_LOADADDR    = $(CONFIG_LINUX_LINK_BASE)
 UIMAGE_ENTRYADDR   = $(LINUX_START_TEXT)
 
-suffix-y := bin
-suffix-$(CONFIG_KERNEL_GZIP)	:= gz
-suffix-$(CONFIG_KERNEL_LZMA)	:= lzma
-
-targets += uImage
+targets += vmlinux.bin
+targets += vmlinux.bin.gz
+targets += vmlinux.bin.lzma
 targets += uImage.bin
 targets += uImage.gz
 targets += uImage.lzma
-extra-y += vmlinux.bin
-extra-y += vmlinux.bin.gz
-extra-y += vmlinux.bin.lzma
 
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
@@ -42,7 +36,3 @@ $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
 
 $(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE
 	$(call if_changed,uimage,lzma)
-
-$(obj)/uImage: $(obj)/uImage.$(suffix-y)
-	@ln -sf $(notdir $<) $@
-	@echo '  Image $@ is ready'
diff --git a/arch/arc/boot/dts/Makefile b/arch/arc/boot/dts/Makefile
index 8483a86c743d..ee5664f0640d 100644
--- a/arch/arc/boot/dts/Makefile
+++ b/arch/arc/boot/dts/Makefile
@@ -1,17 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
-# Built-in dtb
-builtindtb-y		:= nsim_700
 
-ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),"")
-	builtindtb-y	:= $(patsubst "%",%,$(CONFIG_ARC_BUILTIN_DTB_NAME))
-endif
-
-obj-y   += $(builtindtb-y).dtb.o
-dtb-y := $(builtindtb-y).dtb
+dtb-y	:= $(addsuffix .dtb, $(CONFIG_BUILTIN_DTB_NAME))
 
 # for CONFIG_OF_ALL_DTBS test
-dtstree	:= $(srctree)/$(src)
-dtb-	:= $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
+dtb-	:= $(patsubst $(src)/%.dts,%.dtb, $(wildcard $(src)/*.dts))
 
 # board-specific dtc flags
 DTC_FLAGS_hsdk += --pad 20
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
index 79ec27c043c1..88bcc7ab6f5a 100644
--- a/arch/arc/boot/dts/axc001.dtsi
+++ b/arch/arc/boot/dts/axc001.dtsi
@@ -54,7 +54,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <30>;
+				ngpios = <30>;
 				reg = <0>;
 				interrupt-controller;
 				#interrupt-cells = <2>;
@@ -91,7 +91,7 @@
 	 * avoid duplicating the MB dtsi file given that IRQ from
 	 * this intc to cpu intc are different for axs101 and axs103
 	 */
-	mb_intc: dw-apb-ictl@e0012000 {
+	mb_intc: interrupt-controller@e0012000 {
 		#interrupt-cells = <1>;
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0x0 0xe0012000 0x0 0x200 >;
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index ac8e1b463a70..9a2dc39a5cff 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -62,7 +62,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <30>;
+				ngpios = <30>;
 				reg = <0>;
 				interrupt-controller;
 				#interrupt-cells = <2>;
@@ -103,11 +103,11 @@
 			dma-coherent;
 		};
 
-		ehci@40000 {
+		usb@40000 {
 			dma-coherent;
 		};
 
-		ohci@60000 {
+		usb@60000 {
 			dma-coherent;
 		};
 
@@ -119,9 +119,9 @@
 	/*
 	 * The DW APB ICTL intc on MB is connected to CPU intc via a
 	 * DT "invisible" DW APB GPIO block, configured to simply pass thru
-	 * interrupts - setup accordinly in platform init (plat-axs10x/ax10x.c)
+	 * interrupts - setup accordingly in platform init (plat-axs10x/ax10x.c)
 	 *
-	 * So here we mimic a direct connection betwen them, ignoring the
+	 * So here we mimic a direct connection between them, ignoring the
 	 * ABPG GPIO. Thus set "interrupts = <24>" (DW APB GPIO to core)
 	 * instead of "interrupts = <12>" (DW APB ICTL to DW APB GPIO)
 	 *
@@ -129,7 +129,7 @@
 	 * avoid duplicating the MB dtsi file given that IRQ from
 	 * this intc to cpu intc are different for axs101 and axs103
 	 */
-	mb_intc: dw-apb-ictl@e0012000 {
+	mb_intc: interrupt-controller@e0012000 {
 		#interrupt-cells = <1>;
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0x0 0xe0012000 0x0 0x200 >;
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 9da21e7fd246..f31382cb8be4 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -69,7 +69,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <30>;
+				ngpios = <30>;
 				reg = <0>;
 				interrupt-controller;
 				#interrupt-cells = <2>;
@@ -110,11 +110,11 @@
 			dma-coherent;
 		};
 
-		ehci@40000 {
+		usb@40000 {
 			dma-coherent;
 		};
 
-		ohci@60000 {
+		usb@60000 {
 			dma-coherent;
 		};
 
@@ -135,7 +135,7 @@
 	 * avoid duplicating the MB dtsi file given that IRQ from
 	 * this intc to cpu intc are different for axs101 and axs103
 	 */
-	mb_intc: dw-apb-ictl@e0012000 {
+	mb_intc: interrupt-controller@e0012000 {
 		#interrupt-cells = <1>;
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0x0 0xe0012000 0x0 0x200 >;
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index f9a5c9ddcae7..3add2fe257f8 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -78,6 +78,7 @@
 			interrupt-names = "macirq";
 			phy-mode = "rgmii";
 			snps,pbl = < 32 >;
+			snps,multicast-filter-bins = <256>;
 			clocks = <&apbclk>;
 			clock-names = "stmmaceth";
 			max-speed = <100>;
@@ -86,13 +87,13 @@
 			mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */
 		};
 
-		ehci@40000 {
+		usb@40000 {
 			compatible = "generic-ehci";
 			reg = < 0x40000 0x100 >;
 			interrupts = < 8 >;
 		};
 
-		ohci@60000 {
+		usb@60000 {
 			compatible = "generic-ohci";
 			reg = < 0x60000 0x100 >;
 			interrupts = < 8 >;
@@ -249,7 +250,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <32>;
+				ngpios = <32>;
 				reg = <0>;
 			};
 
@@ -257,7 +258,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <8>;
+				ngpios = <8>;
 				reg = <1>;
 			};
 
@@ -265,7 +266,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <8>;
+				ngpios = <8>;
 				reg = <2>;
 			};
 		};
@@ -280,7 +281,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <30>;
+				ngpios = <30>;
 				reg = <0>;
 			};
 
@@ -288,7 +289,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <10>;
+				ngpios = <10>;
 				reg = <1>;
 			};
 
@@ -296,7 +297,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <8>;
+				ngpios = <8>;
 				reg = <2>;
 			};
 		};
@@ -304,7 +305,6 @@
 		pgu@17000 {
 			compatible = "snps,arcpgu";
 			reg = <0x17000 0x400>;
-			encoder-slave = <&adv7511>;
 			clocks = <&pguclk>;
 			clock-names = "pxlclk";
 			memory-region = <&frame_buffer>;
diff --git a/arch/arc/boot/dts/eznps.dts b/arch/arc/boot/dts/eznps.dts
deleted file mode 100644
index a7e2e8d8ff06..000000000000
--- a/arch/arc/boot/dts/eznps.dts
+++ /dev/null
@@ -1,84 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-/dts-v1/;
-
-/ {
-	compatible = "ezchip,arc-nps";
-	#address-cells = <1>;
-	#size-cells = <1>;
-	interrupt-parent = <&intc>;
-	present-cpus = "0-1,16-17";
-	possible-cpus = "0-4095";
-
-	aliases {
-		ethernet0 = &gmac0;
-	};
-
-	chosen {
-		bootargs = "earlycon=uart8250,mmio32be,0xf7209000,115200n8 console=ttyS0,115200n8";
-	};
-
-	memory {
-		device_type = "memory";
-		reg = <0x80000000 0x20000000>;	/* 512M */
-	};
-
-	clocks {
-		sysclk: sysclk {
-			compatible = "fixed-clock";
-			#clock-cells = <0>;
-			clock-frequency = <83333333>;
-		};
-	};
-
-	soc {
-		compatible = "simple-bus";
-		#address-cells = <1>;
-		#size-cells = <1>;
-
-		/* child and parent address space 1:1 mapped */
-		ranges;
-
-		intc: interrupt-controller {
-			compatible = "ezchip,nps400-ic";
-			interrupt-controller;
-			#interrupt-cells = <1>;
-		};
-
-		timer0: timer_clkevt {
-			compatible = "snps,arc-timer";
-			interrupts = <3>;
-			clocks = <&sysclk>;
-		};
-
-		timer1: timer_clksrc {
-			compatible = "ezchip,nps400-timer";
-			clocks = <&sysclk>;
-			clock-names="sysclk";
-		};
-
-		uart@f7209000 {
-			compatible = "snps,dw-apb-uart";
-			device_type = "serial";
-			reg = <0xf7209000 0x100>;
-			interrupts = <6>;
-			clocks = <&sysclk>;
-			clock-names="baudclk";
-			baud = <115200>;
-			reg-shift = <2>;
-			reg-io-width = <4>;
-			native-endian;
-		};
-
-		gmac0: ethernet@f7470000 {
-			compatible = "ezchip,nps-mgt-enet";
-			reg = <0xf7470000 0x1940>;
-			interrupts = <7>;
-			/* Filled in by U-Boot */
-			mac-address = [ 00 C0 00 F0 04 03 ];
-		};
-	};
-};
diff --git a/arch/arc/boot/dts/haps_hs.dts b/arch/arc/boot/dts/haps_hs.dts
index 60d578e2781f..76ad527a0847 100644
--- a/arch/arc/boot/dts/haps_hs.dts
+++ b/arch/arc/boot/dts/haps_hs.dts
@@ -16,7 +16,7 @@
 	memory {
 		device_type = "memory";
 		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
-		reg = <0x0 0x80000000 0x0 0x20000000	/* 512 MB low mem */
+		reg = <0x0 0x80000000 0x0 0x40000000	/* 1 GB low mem */
 		       0x1 0x00000000 0x0 0x40000000>;	/* 1 GB highmem */
 	};
 
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index 9acbeba832c0..98bb850722a4 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -88,6 +88,8 @@
 
 	arcpct: pct {
 		compatible = "snps,archs-pct";
+		interrupt-parent = <&cpu_intc>;
+		interrupts = <20>;
 	};
 
 	/* TIMER0 with interrupt for clockevent */
@@ -203,12 +205,11 @@
 		};
 
 		gmac: ethernet@8000 {
-			#interrupt-cells = <1>;
 			compatible = "snps,dwmac";
 			reg = <0x8000 0x2000>;
 			interrupts = <10>;
 			interrupt-names = "macirq";
-			phy-mode = "rgmii";
+			phy-mode = "rgmii-id";
 			snps,pbl = <32>;
 			snps,multicast-filter-bins = <256>;
 			clocks = <&gmacclk>;
@@ -226,13 +227,13 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 				compatible = "snps,dwmac-mdio";
-				phy0: ethernet-phy@0 {
+				phy0: ethernet-phy@0 { /* Micrel KSZ9031 */
 					reg = <0>;
 				};
 			};
 		};
 
-		ohci@60000 {
+		usb@60000 {
 			compatible = "snps,hsdk-v1.0-ohci", "generic-ohci";
 			reg = <0x60000 0x100>;
 			interrupts = <15>;
@@ -240,7 +241,7 @@
 			dma-coherent;
 		};
 
-		ehci@40000 {
+		usb@40000 {
 			compatible = "snps,hsdk-v1.0-ehci", "generic-ehci";
 			reg = <0x40000 0x100>;
 			interrupts = <15>;
@@ -273,7 +274,7 @@
 			cs-gpios = <&creg_gpio 0 GPIO_ACTIVE_LOW>,
 				   <&creg_gpio 1 GPIO_ACTIVE_LOW>;
 
-			spi-flash@0 {
+			flash@0 {
 				compatible = "sst26wf016b", "jedec,spi-nor";
 				reg = <0>;
 				#address-cells = <1>;
@@ -307,7 +308,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <24>;
+				ngpios = <24>;
 				reg = <0>;
 			};
 		};
diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi
index f8be7ba8dad4..c21d0eb07bf6 100644
--- a/arch/arc/boot/dts/vdk_axc003.dtsi
+++ b/arch/arc/boot/dts/vdk_axc003.dtsi
@@ -46,7 +46,7 @@
 
 	};
 
-	mb_intc: dw-apb-ictl@e0012000 {
+	mb_intc: interrupt-controller@e0012000 {
 		#interrupt-cells = <1>;
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0xe0012000 0x200 >;
diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
index 0afa3e53a4e3..4d348853ac7c 100644
--- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi
+++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
@@ -54,7 +54,7 @@
 
 	};
 
-	mb_intc: dw-apb-ictl@e0012000 {
+	mb_intc: interrupt-controller@e0012000 {
 		#interrupt-cells = <1>;
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0xe0012000 0x200 >;
diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
index cbb179770293..0e0e2d337bf8 100644
--- a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
@@ -46,7 +46,7 @@
 			clock-names = "stmmaceth";
 		};
 
-		ehci@40000 {
+		usb@40000 {
 			compatible = "generic-ehci";
 			reg = < 0x40000 0x100 >;
 			interrupts = < 8 >;
@@ -113,7 +113,7 @@
 	/*
 	 * Embedded Vision subsystem UIO mappings; only relevant for EV VDK
 	 *
-	 * This node is intentionally put outside of MB above becase
+	 * This node is intentionally put outside of MB above because
 	 * it maps areas outside of MB's 0xez-0xfz.
 	 */
 	uio_ev: uio@d0000000 {
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index 0016149f9583..a7cd526dd7ca 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -9,8 +9,7 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_SLUB_DEBUG is not set
@@ -24,7 +23,7 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS101=y
 CONFIG_ARC_CACHE_LINE_SHIFT=5
-CONFIG_ARC_BUILTIN_DTB_NAME="axs101"
+CONFIG_BUILTIN_DTB_NAME="axs101"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
@@ -36,9 +35,6 @@ CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
@@ -70,6 +66,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_CORE=y
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_DRM=m
@@ -100,7 +97,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index 5b031582a1cf..afa6a348f444 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -9,8 +9,7 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_SLUB_DEBUG is not set
@@ -23,7 +22,7 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS103=y
 CONFIG_ISA_ARCV2=y
-CONFIG_ARC_BUILTIN_DTB_NAME="axs103"
+CONFIG_BUILTIN_DTB_NAME="axs103"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
@@ -35,9 +34,6 @@ CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
@@ -70,6 +66,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_CORE=y
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
@@ -98,7 +95,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index d4eec39e0112..2bfa6371953c 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -9,12 +9,10 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_COMPAT_BRK is not set
-CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
@@ -24,7 +22,7 @@ CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS103=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
-CONFIG_ARC_BUILTIN_DTB_NAME="axs103_idu"
+CONFIG_BUILTIN_DTB_NAME="axs103_idu"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
@@ -36,9 +34,6 @@ CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
@@ -71,6 +66,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_CORE=y
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_DRM=m
@@ -101,7 +97,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig
index 7337cdf4ffdd..3a1577112078 100644
--- a/arch/arc/configs/haps_hs_defconfig
+++ b/arch/arc/configs/haps_hs_defconfig
@@ -11,12 +11,10 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
 CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
-CONFIG_SLAB=y
-CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs"
+CONFIG_BUILTIN_DTB_NAME="haps_hs"
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_COMPACTION is not set
@@ -60,6 +58,5 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_DEBUG_MEMORY_INIT=y
 # CONFIG_DEBUG_PREEMPT is not set
diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig
index bc927221afc0..a3cf940b1f5b 100644
--- a/arch/arc/configs/haps_hs_smp_defconfig
+++ b/arch/arc/configs/haps_hs_smp_defconfig
@@ -11,14 +11,12 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_COMPAT_BRK is not set
-CONFIG_SLAB=y
 CONFIG_SMP=y
-CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs_idu"
+CONFIG_BUILTIN_DTB_NAME="haps_hs_idu"
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
@@ -60,6 +58,5 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_SOFTLOCKUP_DETECTOR=y
 # CONFIG_DEBUG_PREEMPT is not set
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
index 0974226fab55..1558e8e87767 100644
--- a/arch/arc/configs/hsdk_defconfig
+++ b/arch/arc/configs/hsdk_defconfig
@@ -9,12 +9,10 @@ CONFIG_NAMESPACES=y
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_BLK_DEV_RAM=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_COMPAT_BRK is not set
-CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_ARC_SOC_HSDK=y
@@ -22,7 +20,7 @@ CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
 CONFIG_LINUX_LINK_BASE=0x90000000
 CONFIG_LINUX_RAM_BASE=0x80000000
-CONFIG_ARC_BUILTIN_DTB_NAME="hsdk"
+CONFIG_BUILTIN_DTB_NAME="hsdk"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
@@ -65,6 +63,7 @@ CONFIG_DRM_UDL=y
 CONFIG_DRM_ETNAVIV=y
 CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
@@ -85,7 +84,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig
deleted file mode 100644
index 07f26ed39f02..000000000000
--- a/arch/arc/configs/nps_defconfig
+++ /dev/null
@@ -1,82 +0,0 @@
-# CONFIG_LOCALVERSION_AUTO is not set
-# CONFIG_SWAP is not set
-CONFIG_SYSVIPC=y
-CONFIG_NO_HZ_IDLE=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-# CONFIG_EPOLL is not set
-# CONFIG_SIGNALFD is not set
-# CONFIG_TIMERFD is not set
-# CONFIG_EVENTFD is not set
-# CONFIG_AIO is not set
-CONFIG_EMBEDDED=y
-CONFIG_PERF_EVENTS=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_ISA_ARCOMPACT=y
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_EZNPS=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=4096
-CONFIG_ARC_CACHE_LINE_SHIFT=5
-# CONFIG_ARC_CACHE_PAGES is not set
-# CONFIG_ARC_HAS_LLSC is not set
-CONFIG_ARC_KVADDR_SIZE=402
-CONFIG_ARC_EMUL_UNALIGNED=y
-CONFIG_PREEMPT=y
-CONFIG_NET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_WIRELESS is not set
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=1
-CONFIG_BLK_DEV_RAM_SIZE=2048
-CONFIG_NETDEVICES=y
-CONFIG_NETCONSOLE=y
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=1
-CONFIG_SERIAL_8250_RUNTIME_UARTS=1
-CONFIG_SERIAL_8250_DW=y
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_DNOTIFY is not set
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-# CONFIG_MISC_FILESYSTEMS is not set
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_ROOT_NFS=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_ENABLE_DEFAULT_TRACERS=y
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index 326f6cde7826..f8b3235d9a65 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -11,14 +11,13 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
 CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_ISA_ARCOMPACT=y
-CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
+CONFIG_BUILTIN_DTB_NAME="nsim_700"
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
@@ -57,5 +56,4 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 # CONFIG_DEBUG_PREEMPT is not set
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index 5dd470b6609e..ee45dc0877fb 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -10,9 +10,8 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
 CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
@@ -20,9 +19,7 @@ CONFIG_ISA_ARCOMPACT=y
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
+CONFIG_BUILTIN_DTB_NAME="nsimosci"
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -68,4 +65,3 @@ CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
-# CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index 3532e86f7bff..e0a309970c20 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -10,19 +10,16 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
 CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
 CONFIG_ISA_ARCV2=y
-CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs"
+CONFIG_BUILTIN_DTB_NAME="nsimosci_hs"
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -66,4 +63,3 @@ CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
-# CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index d90448bee064..88325b8b49cf 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -8,18 +8,15 @@ CONFIG_IKCONFIG_PROC=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
 # CONFIG_ARC_TIMERS_64BIT is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu"
+CONFIG_BUILTIN_DTB_NAME="nsimosci_hs_idu"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
@@ -29,9 +26,6 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
@@ -40,7 +34,6 @@ CONFIG_DEVTMPFS=y
 # CONFIG_BLK_DEV is not set
 CONFIG_NETDEVICES=y
 # CONFIG_NET_VENDOR_ARC is not set
-# CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=y
 # CONFIG_NET_VENDOR_INTEL is not set
@@ -77,5 +70,5 @@ CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_FTRACE=y
+# CONFIG_NET_VENDOR_CADENCE is not set
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
index a12656ec0072..865fbc19ef03 100644
--- a/arch/arc/configs/tb10x_defconfig
+++ b/arch/arc/configs/tb10x_defconfig
@@ -14,13 +14,11 @@ CONFIG_INITRAMFS_SOURCE="../tb10x-rootfs.cpio"
 CONFIG_INITRAMFS_ROOT_UID=2100
 CONFIG_INITRAMFS_ROOT_GID=501
 # CONFIG_RD_GZIP is not set
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_AIO is not set
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_ISA_ARCOMPACT=y
-CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
@@ -28,7 +26,7 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_ARC_PLAT_TB10X=y
 CONFIG_ARC_CACHE_LINE_SHIFT=5
 CONFIG_HZ=250
-CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
+CONFIG_BUILTIN_DTB_NAME="abilis_tb100_dvk"
 CONFIG_PREEMPT_VOLUNTARY=y
 # CONFIG_COMPACTION is not set
 CONFIG_NET=y
@@ -36,15 +34,11 @@ CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 CONFIG_NETDEVICES=y
-# CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
@@ -66,6 +60,7 @@ CONFIG_SERIAL_8250_DW=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_I2C=y
 # CONFIG_I2C_COMPAT is not set
+CONFIG_I2C_DESIGNWARE_CORE=y
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
@@ -91,16 +86,15 @@ CONFIG_TMPFS=y
 CONFIG_CONFIGFS_FS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 # CONFIG_NETWORK_FILESYSTEMS is not set
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
 CONFIG_HEADERS_INSTALL=y
-CONFIG_HEADERS_CHECK=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_SCHEDSTATS=y
-CONFIG_TIMER_STATS=y
 # CONFIG_CRYPTO_HW is not set
+# CONFIG_NET_VENDOR_CADENCE is not set
diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig
index d7c858df520c..03d9ac20baa9 100644
--- a/arch/arc/configs/vdk_hs38_defconfig
+++ b/arch/arc/configs/vdk_hs38_defconfig
@@ -4,8 +4,7 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_SLUB_DEBUG is not set
@@ -14,7 +13,7 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS103=y
 CONFIG_ISA_ARCV2=y
-CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38"
+CONFIG_BUILTIN_DTB_NAME="vdk_hs38"
 CONFIG_PREEMPT=y
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -59,8 +58,6 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-CONFIG_ARCPGU_RGB888=y
-CONFIG_ARCPGU_DISPTYPE=0
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
@@ -88,7 +85,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig
index 015c1d43889e..c09488992f13 100644
--- a/arch/arc/configs/vdk_hs38_smp_defconfig
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@@ -4,8 +4,7 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_SLUB_DEBUG is not set
@@ -16,7 +15,7 @@ CONFIG_AXS103=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
 # CONFIG_ARC_TIMERS_64BIT is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp"
+CONFIG_BUILTIN_DTB_NAME="vdk_hs38_smp"
 CONFIG_PREEMPT=y
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -92,7 +91,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 1b505694691e..4c69522e0328 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -1,28 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
-generic-y += bugs.h
-generic-y += compat.h
-generic-y += device.h
-generic-y += div64.h
-generic-y += dma-mapping.h
-generic-y += emergency-restart.h
+syscall-y += syscall_table_32.h
+
 generic-y += extable.h
-generic-y += ftrace.h
-generic-y += hardirq.h
-generic-y += hw_irq.h
-generic-y += irq_regs.h
-generic-y += irq_work.h
 generic-y += kvm_para.h
-generic-y += local.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
-generic-y += mm-arch-hooks.h
-generic-y += mmiowb.h
 generic-y += parport.h
-generic-y += percpu.h
-generic-y += preempt.h
-generic-y += topology.h
-generic-y += trace_clock.h
 generic-y += user.h
-generic-y += vga.h
-generic-y += word-at-a-time.h
-generic-y += xor.h
+generic-y += text-patching.h
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 5134f0baf33c..005d9e4d187a 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -23,7 +23,7 @@
 #define ARC_REG_ICCM_BUILD	0x78	/* ICCM size (common) */
 #define ARC_REG_XY_MEM_BCR	0x79
 #define ARC_REG_MAC_BCR		0x7a
-#define ARC_REG_MUL_BCR		0x7b
+#define ARC_REG_MPY_BCR		0x7b
 #define ARC_REG_SWAP_BCR	0x7c
 #define ARC_REG_NORM_BCR	0x7d
 #define ARC_REG_MIXMAX_BCR	0x7e
@@ -39,6 +39,8 @@
 #define ARC_REG_CLUSTER_BCR	0xcf
 #define ARC_REG_AUX_ICCM	0x208	/* ICCM Base Addr (ARCv2) */
 #define ARC_REG_LPB_CTRL	0x488	/* ARCv2 Loop Buffer control */
+#define ARC_REG_FPU_CTRL	0x300
+#define ARC_REG_FPU_STATUS	0x301
 
 /* Common for ARCompact and ARCv2 status register */
 #define ARC_REG_STATUS32	0x0A
@@ -116,9 +118,35 @@
 #define ARC_AUX_DPFP_2H         0x304
 #define ARC_AUX_DPFP_STAT       0x305
 
+/*
+ * DSP-related registers
+ * Registers names must correspond to dsp_callee_regs structure fields names
+ * for automatic offset calculation in DSP_AUX_SAVE_RESTORE macros.
+ */
+#define ARC_AUX_DSP_BUILD	0x7A
+#define ARC_AUX_ACC0_LO		0x580
+#define ARC_AUX_ACC0_GLO	0x581
+#define ARC_AUX_ACC0_HI		0x582
+#define ARC_AUX_ACC0_GHI	0x583
+#define ARC_AUX_DSP_BFLY0	0x598
+#define ARC_AUX_DSP_CTRL	0x59F
+#define ARC_AUX_DSP_FFT_CTRL	0x59E
+
+#define ARC_AUX_AGU_BUILD	0xCC
+#define ARC_AUX_AGU_AP0		0x5C0
+#define ARC_AUX_AGU_AP1		0x5C1
+#define ARC_AUX_AGU_AP2		0x5C2
+#define ARC_AUX_AGU_AP3		0x5C3
+#define ARC_AUX_AGU_OS0		0x5D0
+#define ARC_AUX_AGU_OS1		0x5D1
+#define ARC_AUX_AGU_MOD0	0x5E0
+#define ARC_AUX_AGU_MOD1	0x5E1
+#define ARC_AUX_AGU_MOD2	0x5E2
+#define ARC_AUX_AGU_MOD3	0x5E3
+
 #ifndef __ASSEMBLY__
 
-#include <soc/arc/aux.h>
+#include <soc/arc/arc_aux.h>
 
 /* Helpers */
 #define TO_KB(bytes)		((bytes) >> 10)
@@ -149,7 +177,7 @@ struct bcr_isa_arcv2 {
 #endif
 };
 
-struct bcr_uarch_build_arcv2 {
+struct bcr_uarch_build {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int pad:8, prod:8, maj:8, min:8;
 #else
@@ -157,6 +185,59 @@ struct bcr_uarch_build_arcv2 {
 #endif
 };
 
+struct bcr_mmu_3 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
+		     u_itlb:4, u_dtlb:4;
+#else
+	unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
+		     ways:4, ver:8;
+#endif
+};
+
+struct bcr_mmu_4 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
+		     n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
+#else
+	/*           DTLB      ITLB      JES        JE         JA      */
+	unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
+		     pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
+#endif
+};
+
+struct bcr_cache {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
+#else
+	unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
+#endif
+};
+
+struct bcr_slc_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:24, way:2, lsz:2, sz:4;
+#else
+	unsigned int sz:4, lsz:2, way:2, pad:24;
+#endif
+};
+
+struct bcr_clust_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
+#else
+	unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
+#endif
+};
+
+struct bcr_volatile {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int start:4, limit:4, pad:22, order:1, disable:1;
+#else
+	unsigned int disable:1, order:1, pad:22, limit:4, start:4;
+#endif
+};
+
 struct bcr_mpy {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int pad:8, x1616:8, dsp:4, cycles:2, type:2, ver:8;
@@ -274,48 +355,6 @@ struct bcr_generic {
 #endif
 };
 
-/*
- *******************************************************************
- * Generic structures to hold build configuration used at runtime
- */
-
-struct cpuinfo_arc_mmu {
-	unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1;
-	unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8;
-};
-
-struct cpuinfo_arc_cache {
-	unsigned int sz_k:14, line_len:8, assoc:4, alias:1, vipt:1, pad:4;
-};
-
-struct cpuinfo_arc_bpu {
-	unsigned int ver, full, num_cache, num_pred, ret_stk;
-};
-
-struct cpuinfo_arc_ccm {
-	unsigned int base_addr, sz;
-};
-
-struct cpuinfo_arc {
-	struct cpuinfo_arc_cache icache, dcache, slc;
-	struct cpuinfo_arc_mmu mmu;
-	struct cpuinfo_arc_bpu bpu;
-	struct bcr_identity core;
-	struct bcr_isa_arcv2 isa;
-	const char *release, *name;
-	unsigned int vec_base;
-	struct cpuinfo_arc_ccm iccm, dccm;
-	struct {
-		unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
-			     fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4,
-			     ap_num:4, ap_full:1, smart:1, rtt:1, pad3:1,
-			     timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
-	} extn;
-	struct bcr_mpy extn_mpy;
-};
-
-extern struct cpuinfo_arc cpuinfo_arc700[];
-
 static inline int is_isa_arcv2(void)
 {
 	return IS_ENABLED(CONFIG_ISA_ARCV2);
diff --git a/arch/arc/include/asm/asserts.h b/arch/arc/include/asm/asserts.h
new file mode 100644
index 000000000000..108f33be6aa5
--- /dev/null
+++ b/arch/arc/include/asm/asserts.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+#ifndef __ASM_ARC_ASSERTS_H
+#define __ASM_ARC_ASSERTS_H
+
+/* Helpers to sanitize config options. */
+
+void chk_opt_strict(char *opt_name, bool hw_exists, bool opt_ena);
+void chk_opt_weak(char *opt_name, bool hw_exists, bool opt_ena);
+
+/*
+ * Check required config option:
+ *  - panic in case of OPT enabled but corresponding HW absent.
+ *  - warn in case of OPT disabled but corresponding HW exists.
+*/
+#define CHK_OPT_STRICT(opt_name, hw_exists)				\
+({									\
+	chk_opt_strict(#opt_name, hw_exists, IS_ENABLED(opt_name));	\
+})
+
+/*
+ * Check optional config option:
+ *  - panic in case of OPT enabled but corresponding HW absent.
+*/
+#define CHK_OPT_WEAK(opt_name, hw_exists)				\
+({									\
+	chk_opt_weak(#opt_name, hw_exists, IS_ENABLED(opt_name));	\
+})
+
+#endif /* __ASM_ARC_ASSERTS_H */
diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h
new file mode 100644
index 000000000000..5258cb81a16b
--- /dev/null
+++ b/arch/arc/include/asm/atomic-llsc.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARC_ATOMIC_LLSC_H
+#define _ASM_ARC_ATOMIC_LLSC_H
+
+#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
+
+#define ATOMIC_OP(op, asm_op)					\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned int val;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
+	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
+	  [i]	"ir"	(i)						\
+	: "cc", "memory");						\
+}									\
+
+#define ATOMIC_OP_RETURN(op, asm_op)				\
+static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)	\
+{									\
+	unsigned int val;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val)						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc", "memory");						\
+									\
+	return val;							\
+}
+
+#define arch_atomic_add_return_relaxed		arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed		arch_atomic_sub_return_relaxed
+
+#define ATOMIC_FETCH_OP(op, asm_op)				\
+static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+{									\
+	unsigned int val, orig;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[orig], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val),						\
+	  [orig] "=&r" (orig)						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc", "memory");						\
+									\
+	return orig;							\
+}
+
+#define arch_atomic_fetch_add_relaxed		arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed		arch_atomic_fetch_sub_relaxed
+
+#define arch_atomic_fetch_and_relaxed		arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_andnot_relaxed	arch_atomic_fetch_andnot_relaxed
+#define arch_atomic_fetch_or_relaxed		arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed		arch_atomic_fetch_xor_relaxed
+
+#define ATOMIC_OPS(op, asm_op)					\
+	ATOMIC_OP(op, asm_op)					\
+	ATOMIC_OP_RETURN(op, asm_op)				\
+	ATOMIC_FETCH_OP(op, asm_op)
+
+ATOMIC_OPS(add, add)
+ATOMIC_OPS(sub, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm_op)					\
+	ATOMIC_OP(op, asm_op)					\
+	ATOMIC_FETCH_OP(op, asm_op)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, xor)
+
+#define arch_atomic_andnot		arch_atomic_andnot
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#endif
diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h
new file mode 100644
index 000000000000..89d12a60f84c
--- /dev/null
+++ b/arch/arc/include/asm/atomic-spinlock.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARC_ATOMIC_SPLOCK_H
+#define _ASM_ARC_ATOMIC_SPLOCK_H
+
+/*
+ * Non hardware assisted Atomic-R-M-W
+ * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
+ */
+
+static inline void arch_atomic_set(atomic_t *v, int i)
+{
+	/*
+	 * Independent of hardware support, all of the atomic_xxx() APIs need
+	 * to follow the same locking rules to make sure that a "hardware"
+	 * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
+	 * sequence
+	 *
+	 * Thus atomic_set() despite being 1 insn (and seemingly atomic)
+	 * requires the locking.
+	 */
+	unsigned long flags;
+
+	atomic_ops_lock(flags);
+	WRITE_ONCE(v->counter, i);
+	atomic_ops_unlock(flags);
+}
+
+#define arch_atomic_set_release(v, i)	arch_atomic_set((v), (i))
+
+#define ATOMIC_OP(op, c_op, asm_op)					\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long flags;						\
+									\
+	atomic_ops_lock(flags);						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+}
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	unsigned int temp;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	temp = v->counter;						\
+	temp c_op i;							\
+	v->counter = temp;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return temp;							\
+}
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	unsigned int orig;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	orig = v->counter;						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return orig;							\
+}
+
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(add, +=, add)
+ATOMIC_OPS(sub, -=, sub)
+
+#define arch_atomic_fetch_add		arch_atomic_fetch_add
+#define arch_atomic_fetch_sub		arch_atomic_fetch_sub
+#define arch_atomic_add_return		arch_atomic_add_return
+#define arch_atomic_sub_return		arch_atomic_sub_return
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define arch_atomic_andnot		arch_atomic_andnot
+
+#define arch_atomic_fetch_and		arch_atomic_fetch_and
+#define arch_atomic_fetch_andnot	arch_atomic_fetch_andnot
+#define arch_atomic_fetch_or		arch_atomic_fetch_or
+#define arch_atomic_fetch_xor		arch_atomic_fetch_xor
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#endif
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 7298ce84762e..592d7fffc223 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -14,544 +14,22 @@
 #include <asm/barrier.h>
 #include <asm/smp.h>
 
-#define ATOMIC_INIT(i)	{ (i) }
-
-#ifndef CONFIG_ARC_PLAT_EZNPS
-
-#define atomic_read(v)  READ_ONCE((v)->counter)
+#define arch_atomic_read(v)  READ_ONCE((v)->counter)
 
 #ifdef CONFIG_ARC_HAS_LLSC
-
-#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
-{									\
-	unsigned int val;						\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock   %[val], [%[ctr]]		\n"		\
-	"	" #asm_op " %[val], %[val], %[i]	\n"		\
-	"	scond   %[val], [%[ctr]]		\n"		\
-	"	bnz     1b				\n"		\
-	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
-	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
-	  [i]	"ir"	(i)						\
-	: "cc");							\
-}									\
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
-{									\
-	unsigned int val;						\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
-	 */								\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock   %[val], [%[ctr]]		\n"		\
-	"	" #asm_op " %[val], %[val], %[i]	\n"		\
-	"	scond   %[val], [%[ctr]]		\n"		\
-	"	bnz     1b				\n"		\
-	: [val]	"=&r"	(val)						\
-	: [ctr]	"r"	(&v->counter),					\
-	  [i]	"ir"	(i)						\
-	: "cc");							\
-									\
-	smp_mb();							\
-									\
-	return val;							\
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
-{									\
-	unsigned int val, orig;						\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
-	 */								\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock   %[orig], [%[ctr]]		\n"		\
-	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
-	"	scond   %[val], [%[ctr]]		\n"		\
-	"	bnz     1b				\n"		\
-	: [val]	"=&r"	(val),						\
-	  [orig] "=&r" (orig)						\
-	: [ctr]	"r"	(&v->counter),					\
-	  [i]	"ir"	(i)						\
-	: "cc");							\
-									\
-	smp_mb();							\
-									\
-	return orig;							\
-}
-
-#else	/* !CONFIG_ARC_HAS_LLSC */
-
-#ifndef CONFIG_SMP
-
- /* violating atomic_xxx API locking protocol in UP for optimization sake */
-#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-
+#include <asm/atomic-llsc.h>
 #else
-
-static inline void atomic_set(atomic_t *v, int i)
-{
-	/*
-	 * Independent of hardware support, all of the atomic_xxx() APIs need
-	 * to follow the same locking rules to make sure that a "hardware"
-	 * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
-	 * sequence
-	 *
-	 * Thus atomic_set() despite being 1 insn (and seemingly atomic)
-	 * requires the locking.
-	 */
-	unsigned long flags;
-
-	atomic_ops_lock(flags);
-	WRITE_ONCE(v->counter, i);
-	atomic_ops_unlock(flags);
-}
-
-#define atomic_set_release(v, i)	atomic_set((v), (i))
-
+#include <asm/atomic-spinlock.h>
 #endif
 
 /*
- * Non hardware assisted Atomic-R-M-W
- * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
+ * 64-bit atomics
  */
-
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
-{									\
-	unsigned long flags;						\
-									\
-	atomic_ops_lock(flags);						\
-	v->counter c_op i;						\
-	atomic_ops_unlock(flags);					\
-}
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
-{									\
-	unsigned long flags;						\
-	unsigned long temp;						\
-									\
-	/*								\
-	 * spin lock/unlock provides the needed smp_mb() before/after	\
-	 */								\
-	atomic_ops_lock(flags);						\
-	temp = v->counter;						\
-	temp c_op i;							\
-	v->counter = temp;						\
-	atomic_ops_unlock(flags);					\
-									\
-	return temp;							\
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
-{									\
-	unsigned long flags;						\
-	unsigned long orig;						\
-									\
-	/*								\
-	 * spin lock/unlock provides the needed smp_mb() before/after	\
-	 */								\
-	atomic_ops_lock(flags);						\
-	orig = v->counter;						\
-	v->counter c_op i;						\
-	atomic_ops_unlock(flags);					\
-									\
-	return orig;							\
-}
-
-#endif /* !CONFIG_ARC_HAS_LLSC */
-
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(add, +=, add)
-ATOMIC_OPS(sub, -=, sub)
-
-#define atomic_andnot		atomic_andnot
-#define atomic_fetch_andnot	atomic_fetch_andnot
-
-#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(and, &=, and)
-ATOMIC_OPS(andnot, &= ~, bic)
-ATOMIC_OPS(or, |=, or)
-ATOMIC_OPS(xor, ^=, xor)
-
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-static inline int atomic_read(const atomic_t *v)
-{
-	int temp;
-
-	__asm__ __volatile__(
-	"	ld.di %0, [%1]"
-	: "=r"(temp)
-	: "r"(&v->counter)
-	: "memory");
-	return temp;
-}
-
-static inline void atomic_set(atomic_t *v, int i)
-{
-	__asm__ __volatile__(
-	"	st.di %0,[%1]"
-	:
-	: "r"(i), "r"(&v->counter)
-	: "memory");
-}
-
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
-{									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"       .word %2\n"						\
-	:								\
-	: "r"(i), "r"(&v->counter), "i"(asm_op)				\
-	: "r2", "r3", "memory");					\
-}									\
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
-{									\
-	unsigned int temp = i;						\
-									\
-	/* Explicit full memory barrier needed before/after */		\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"       .word %2\n"						\
-	"	mov %0, r2"						\
-	: "+r"(temp)							\
-	: "r"(&v->counter), "i"(asm_op)					\
-	: "r2", "r3", "memory");					\
-									\
-	smp_mb();							\
-									\
-	temp c_op i;							\
-									\
-	return temp;							\
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
-{									\
-	unsigned int temp = i;						\
-									\
-	/* Explicit full memory barrier needed before/after */		\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"       .word %2\n"						\
-	"	mov %0, r2"						\
-	: "+r"(temp)							\
-	: "r"(&v->counter), "i"(asm_op)					\
-	: "r2", "r3", "memory");					\
-									\
-	smp_mb();							\
-									\
-	return temp;							\
-}
-
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
-#define atomic_sub(i, v) atomic_add(-(i), (v))
-#define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
-#define atomic_fetch_sub(i, v) atomic_fetch_add(-(i), (v))
-
-#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
-ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
-ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
-
-#endif /* CONFIG_ARC_PLAT_EZNPS */
-
-#undef ATOMIC_OPS
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
 #ifdef CONFIG_GENERIC_ATOMIC64
-
 #include <asm-generic/atomic64.h>
-
-#else	/* Kconfig ensures this is only enabled with needed h/w assist */
-
-/*
- * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
- *  - The address HAS to be 64-bit aligned
- *  - There are 2 semantics involved here:
- *    = exclusive implies no interim update between load/store to same addr
- *    = both words are observed/updated together: this is guaranteed even
- *      for regular 64-bit load (LDD) / store (STD). Thus atomic64_set()
- *      is NOT required to use LLOCKD+SCONDD, STD suffices
- */
-
-typedef struct {
-	s64 __aligned(8) counter;
-} atomic64_t;
-
-#define ATOMIC64_INIT(a) { (a) }
-
-static inline s64 atomic64_read(const atomic64_t *v)
-{
-	s64 val;
-
-	__asm__ __volatile__(
-	"	ldd   %0, [%1]	\n"
-	: "=r"(val)
-	: "r"(&v->counter));
-
-	return val;
-}
-
-static inline void atomic64_set(atomic64_t *v, s64 a)
-{
-	/*
-	 * This could have been a simple assignment in "C" but would need
-	 * explicit volatile. Otherwise gcc optimizers could elide the store
-	 * which borked atomic64 self-test
-	 * In the inline asm version, memory clobber needed for exact same
-	 * reason, to tell gcc about the store.
-	 *
-	 * This however is not needed for sibling atomic64_add() etc since both
-	 * load/store are explicitly done in inline asm. As long as API is used
-	 * for each access, gcc has no way to optimize away any load/store
-	 */
-	__asm__ __volatile__(
-	"	std   %0, [%1]	\n"
-	:
-	: "r"(a), "r"(&v->counter)
-	: "memory");
-}
-
-#define ATOMIC64_OP(op, op1, op2)					\
-static inline void atomic64_##op(s64 a, atomic64_t *v)			\
-{									\
-	s64 val;							\
-									\
-	__asm__ __volatile__(						\
-	"1:				\n"				\
-	"	llockd  %0, [%1]	\n"				\
-	"	" #op1 " %L0, %L0, %L2	\n"				\
-	"	" #op2 " %H0, %H0, %H2	\n"				\
-	"	scondd   %0, [%1]	\n"				\
-	"	bnz     1b		\n"				\
-	: "=&r"(val)							\
-	: "r"(&v->counter), "ir"(a)					\
-	: "cc");							\
-}									\
-
-#define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
-static inline s64 atomic64_##op##_return(s64 a, atomic64_t *v)		\
-{									\
-	s64 val;							\
-									\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:				\n"				\
-	"	llockd   %0, [%1]	\n"				\
-	"	" #op1 " %L0, %L0, %L2	\n"				\
-	"	" #op2 " %H0, %H0, %H2	\n"				\
-	"	scondd   %0, [%1]	\n"				\
-	"	bnz     1b		\n"				\
-	: [val] "=&r"(val)						\
-	: "r"(&v->counter), "ir"(a)					\
-	: "cc");	/* memory clobber comes from smp_mb() */	\
-									\
-	smp_mb();							\
-									\
-	return val;							\
-}
-
-#define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
-static inline s64 atomic64_fetch_##op(s64 a, atomic64_t *v)		\
-{									\
-	s64 val, orig;							\
-									\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:				\n"				\
-	"	llockd   %0, [%2]	\n"				\
-	"	" #op1 " %L1, %L0, %L3	\n"				\
-	"	" #op2 " %H1, %H0, %H3	\n"				\
-	"	scondd   %1, [%2]	\n"				\
-	"	bnz     1b		\n"				\
-	: "=&r"(orig), "=&r"(val)					\
-	: "r"(&v->counter), "ir"(a)					\
-	: "cc");	/* memory clobber comes from smp_mb() */	\
-									\
-	smp_mb();							\
-									\
-	return orig;							\
-}
-
-#define ATOMIC64_OPS(op, op1, op2)					\
-	ATOMIC64_OP(op, op1, op2)					\
-	ATOMIC64_OP_RETURN(op, op1, op2)				\
-	ATOMIC64_FETCH_OP(op, op1, op2)
-
-#define atomic64_andnot		atomic64_andnot
-#define atomic64_fetch_andnot	atomic64_fetch_andnot
-
-ATOMIC64_OPS(add, add.f, adc)
-ATOMIC64_OPS(sub, sub.f, sbc)
-ATOMIC64_OPS(and, and, and)
-ATOMIC64_OPS(andnot, bic, bic)
-ATOMIC64_OPS(or, or, or)
-ATOMIC64_OPS(xor, xor, xor)
-
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP_RETURN
-#undef ATOMIC64_OP
-
-static inline s64
-atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
-{
-	s64 prev;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%1]	\n"
-	"	brne    %L0, %L2, 2f	\n"
-	"	brne    %H0, %H2, 2f	\n"
-	"	scondd  %3, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(prev)
-	: "r"(ptr), "ir"(expected), "r"(new)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return prev;
-}
-
-static inline s64 atomic64_xchg(atomic64_t *ptr, s64 new)
-{
-	s64 prev;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%1]	\n"
-	"	scondd  %2, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(prev)
-	: "r"(ptr), "r"(new)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return prev;
-}
-
-/**
- * atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic64_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-
-static inline s64 atomic64_dec_if_positive(atomic64_t *v)
-{
-	s64 val;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%1]	\n"
-	"	sub.f   %L0, %L0, 1	# w0 - 1, set C on borrow\n"
-	"	sub.c   %H0, %H0, 1	# if C set, w1 - 1\n"
-	"	brlt    %H0, 0, 2f	\n"
-	"	scondd  %0, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(val)
-	: "r"(&v->counter)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return val;
-}
-#define atomic64_dec_if_positive atomic64_dec_if_positive
-
-/**
- * atomic64_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if it was not @u.
- * Returns the old value of @v
- */
-static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
-{
-	s64 old, temp;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%2]	\n"
-	"	brne	%L0, %L4, 2f	# continue to add since v != u \n"
-	"	breq.d	%H0, %H4, 3f	# return since v == u \n"
-	"2:				\n"
-	"	add.f   %L1, %L0, %L3	\n"
-	"	adc     %H1, %H0, %H3	\n"
-	"	scondd  %1, [%2]	\n"
-	"	bnz     1b		\n"
-	"3:				\n"
-	: "=&r"(old), "=&r" (temp)
-	: "r"(&v->counter), "r"(a), "r"(u)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return old;
-}
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
-
-#endif	/* !CONFIG_GENERIC_ATOMIC64 */
+#else
+#include <asm/atomic64-arcv2.h>
+#endif
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
new file mode 100644
index 000000000000..9b5791b85471
--- /dev/null
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
+ *  - The address HAS to be 64-bit aligned
+ */
+
+#ifndef _ASM_ARC_ATOMIC64_ARCV2_H
+#define _ASM_ARC_ATOMIC64_ARCV2_H
+
+typedef struct {
+	s64 __aligned(8) counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(a) { (a) }
+
+static inline s64 arch_atomic64_read(const atomic64_t *v)
+{
+	s64 val;
+
+	__asm__ __volatile__(
+	"	ldd   %0, [%1]	\n"
+	: "=r"(val)
+	: "r"(&v->counter));
+
+	return val;
+}
+
+static inline void arch_atomic64_set(atomic64_t *v, s64 a)
+{
+	/*
+	 * This could have been a simple assignment in "C" but would need
+	 * explicit volatile. Otherwise gcc optimizers could elide the store
+	 * which borked atomic64 self-test
+	 * In the inline asm version, memory clobber needed for exact same
+	 * reason, to tell gcc about the store.
+	 *
+	 * This however is not needed for sibling atomic64_add() etc since both
+	 * load/store are explicitly done in inline asm. As long as API is used
+	 * for each access, gcc has no way to optimize away any load/store
+	 */
+	__asm__ __volatile__(
+	"	std   %0, [%1]	\n"
+	:
+	: "r"(a), "r"(&v->counter)
+	: "memory");
+}
+
+#define ATOMIC64_OP(op, op1, op2)					\
+static inline void arch_atomic64_##op(s64 a, atomic64_t *v)		\
+{									\
+	s64 val;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd  %0, [%1]	\n"				\
+	"	" #op1 " %L0, %L0, %L2	\n"				\
+	"	" #op2 " %H0, %H0, %H2	\n"				\
+	"	scondd   %0, [%1]	\n"				\
+	"	bnz     1b		\n"				\
+	: "=&r"(val)							\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc", "memory");						\
+}									\
+
+#define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
+static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)	\
+{									\
+	s64 val;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd   %0, [%1]	\n"				\
+	"	" #op1 " %L0, %L0, %L2	\n"				\
+	"	" #op2 " %H0, %H0, %H2	\n"				\
+	"	scondd   %0, [%1]	\n"				\
+	"	bnz     1b		\n"				\
+	: [val] "=&r"(val)						\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc", "memory");						\
+									\
+	return val;							\
+}
+
+#define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed
+
+#define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
+static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)	\
+{									\
+	s64 val, orig;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd   %0, [%2]	\n"				\
+	"	" #op1 " %L1, %L0, %L3	\n"				\
+	"	" #op2 " %H1, %H0, %H3	\n"				\
+	"	scondd   %1, [%2]	\n"				\
+	"	bnz     1b		\n"				\
+	: "=&r"(orig), "=&r"(val)					\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc", "memory");						\
+									\
+	return orig;							\
+}
+
+#define arch_atomic64_fetch_add_relaxed		arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed		arch_atomic64_fetch_sub_relaxed
+
+#define arch_atomic64_fetch_and_relaxed		arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_andnot_relaxed	arch_atomic64_fetch_andnot_relaxed
+#define arch_atomic64_fetch_or_relaxed		arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed		arch_atomic64_fetch_xor_relaxed
+
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_OP_RETURN(op, op1, op2)				\
+	ATOMIC64_FETCH_OP(op, op1, op2)
+
+ATOMIC64_OPS(add, add.f, adc)
+ATOMIC64_OPS(sub, sub.f, sbc)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_FETCH_OP(op, op1, op2)
+
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or, or, or)
+ATOMIC64_OPS(xor, xor, xor)
+
+#define arch_atomic64_andnot		arch_atomic64_andnot
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+static inline s64
+arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
+{
+	s64 prev;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	brne    %L0, %L2, 2f	\n"
+	"	brne    %H0, %H2, 2f	\n"
+	"	scondd  %3, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(prev)
+	: "r"(ptr), "ir"(expected), "r"(new)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return prev;
+}
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
+
+static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
+{
+	s64 prev;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	scondd  %2, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(prev)
+	: "r"(ptr), "r"(new)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return prev;
+}
+#define arch_atomic64_xchg arch_atomic64_xchg
+
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
+{
+	s64 val;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	sub.f   %L0, %L0, 1	# w0 - 1, set C on borrow\n"
+	"	sub.c   %H0, %H0, 1	# if C set, w1 - 1\n"
+	"	brlt    %H0, 0, 2f	\n"
+	"	scondd  %0, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(val)
+	: "r"(&v->counter)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return val;
+}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
+
+static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	s64 old, temp;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%2]	\n"
+	"	brne	%L0, %L4, 2f	# continue to add since v != u \n"
+	"	breq.d	%H0, %H4, 3f	# return since v == u \n"
+	"2:				\n"
+	"	add.f   %L1, %L0, %L3	\n"
+	"	adc     %H1, %H0, %H3	\n"
+	"	scondd  %1, [%2]	\n"
+	"	bnz     1b		\n"
+	"3:				\n"
+	: "=&r"(old), "=&r" (temp)
+	: "r"(&v->counter), "r"(a), "r"(u)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return old;
+}
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
+
+#endif
diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h
index 7823811e7cf5..4637de9e02fa 100644
--- a/arch/arc/include/asm/barrier.h
+++ b/arch/arc/include/asm/barrier.h
@@ -27,7 +27,7 @@
 #define rmb()	asm volatile("dmb 1\n" : : : "memory")
 #define wmb()	asm volatile("dmb 2\n" : : : "memory")
 
-#elif !defined(CONFIG_ARC_PLAT_EZNPS)  /* CONFIG_ISA_ARCOMPACT */
+#else
 
 /*
  * ARCompact based cores (ARC700) only have SYNC instruction which is super
@@ -37,13 +37,6 @@
 
 #define mb()	asm volatile("sync\n" : : : "memory")
 
-#else	/* CONFIG_ARC_PLAT_EZNPS */
-
-#include <plat/ctop.h>
-
-#define mb()	asm volatile (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
-#define rmb()	asm volatile (".word %0" : : "i"(CTOP_INST_SCHD_RD) : "memory")
-
 #endif
 
 #include <asm-generic/barrier.h>
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 50eb3f64a77c..f5a936496f06 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -14,242 +14,6 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
-#include <asm/barrier.h>
-#ifndef CONFIG_ARC_HAS_LLSC
-#include <asm/smp.h>
-#endif
-
-#ifdef CONFIG_ARC_HAS_LLSC
-
-/*
- * Hardware assisted Atomic-R-M-W
- */
-
-#define BIT_OP(op, c_op, asm_op)					\
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned int temp;						\
-									\
-	m += nr >> 5;							\
-									\
-	nr &= 0x1f;							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock       %0, [%1]		\n"			\
-	"	" #asm_op " %0, %0, %2	\n"				\
-	"	scond       %0, [%1]		\n"			\
-	"	bnz         1b			\n"			\
-	: "=&r"(temp)	/* Early clobber, to prevent reg reuse */	\
-	: "r"(m),	/* Not "m": llock only supports reg direct addr mode */	\
-	  "ir"(nr)							\
-	: "cc");							\
-}
-
-/*
- * Semantically:
- *    Test the bit
- *    if clear
- *        set it and return 0 (old value)
- *    else
- *        return 1 (old value).
- *
- * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
- * and the old value of bit is returned
- */
-#define TEST_N_BIT_OP(op, c_op, asm_op)					\
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old, temp;					\
-									\
-	m += nr >> 5;							\
-									\
-	nr &= 0x1f;							\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND themselves don't provide any such smenatic	\
-	 */								\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock       %0, [%2]	\n"				\
-	"	" #asm_op " %1, %0, %3	\n"				\
-	"	scond       %1, [%2]	\n"				\
-	"	bnz         1b		\n"				\
-	: "=&r"(old), "=&r"(temp)					\
-	: "r"(m), "ir"(nr)						\
-	: "cc");							\
-									\
-	smp_mb();							\
-									\
-	return (old & (1 << nr)) != 0;					\
-}
-
-#elif !defined(CONFIG_ARC_PLAT_EZNPS)
-
-/*
- * Non hardware assisted Atomic-R-M-W
- * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
- *
- * There's "significant" micro-optimization in writing our own variants of
- * bitops (over generic variants)
- *
- * (1) The generic APIs have "signed" @nr while we have it "unsigned"
- *     This avoids extra code to be generated for pointer arithmatic, since
- *     is "not sure" that index is NOT -ve
- * (2) Utilize the fact that ARCompact bit fidding insn (BSET/BCLR/ASL) etc
- *     only consider bottom 5 bits of @nr, so NO need to mask them off.
- *     (GCC Quirk: however for constant @nr we still need to do the masking
- *             at compile time)
- */
-
-#define BIT_OP(op, c_op, asm_op)					\
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long temp, flags;					\
-	m += nr >> 5;							\
-									\
-	/*								\
-	 * spin lock/unlock provide the needed smp_mb() before/after	\
-	 */								\
-	bitops_lock(flags);						\
-									\
-	temp = *m;							\
-	*m = temp c_op (1UL << (nr & 0x1f));					\
-									\
-	bitops_unlock(flags);						\
-}
-
-#define TEST_N_BIT_OP(op, c_op, asm_op)					\
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old, flags;					\
-	m += nr >> 5;							\
-									\
-	bitops_lock(flags);						\
-									\
-	old = *m;							\
-	*m = old c_op (1UL << (nr & 0x1f));				\
-									\
-	bitops_unlock(flags);						\
-									\
-	return (old & (1UL << (nr & 0x1f))) != 0;			\
-}
-
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-#define BIT_OP(op, c_op, asm_op)					\
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	m += nr >> 5;							\
-									\
-	nr = (1UL << (nr & 0x1f));					\
-	if (asm_op == CTOP_INST_AAND_DI_R2_R2_R3)			\
-		nr = ~nr;						\
-									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"	.word %2\n"						\
-	:								\
-	: "r"(nr), "r"(m), "i"(asm_op)					\
-	: "r2", "r3", "memory");					\
-}
-
-#define TEST_N_BIT_OP(op, c_op, asm_op)					\
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old;						\
-									\
-	m += nr >> 5;							\
-									\
-	nr = old = (1UL << (nr & 0x1f));				\
-	if (asm_op == CTOP_INST_AAND_DI_R2_R2_R3)			\
-		old = ~old;						\
-									\
-	/* Explicit full memory barrier needed before/after */		\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"       .word %2\n"						\
-	"	mov %0, r2"						\
-	: "+r"(old)							\
-	: "r"(m), "i"(asm_op)						\
-	: "r2", "r3", "memory");					\
-									\
-	smp_mb();							\
-									\
-	return (old & nr) != 0;					\
-}
-
-#endif /* CONFIG_ARC_PLAT_EZNPS */
-
-/***************************************
- * Non atomic variants
- **************************************/
-
-#define __BIT_OP(op, c_op, asm_op)					\
-static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m)	\
-{									\
-	unsigned long temp;						\
-	m += nr >> 5;							\
-									\
-	temp = *m;							\
-	*m = temp c_op (1UL << (nr & 0x1f));				\
-}
-
-#define __TEST_N_BIT_OP(op, c_op, asm_op)				\
-static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old;						\
-	m += nr >> 5;							\
-									\
-	old = *m;							\
-	*m = old c_op (1UL << (nr & 0x1f));				\
-									\
-	return (old & (1UL << (nr & 0x1f))) != 0;			\
-}
-
-#define BIT_OPS(op, c_op, asm_op)					\
-									\
-	/* set_bit(), clear_bit(), change_bit() */			\
-	BIT_OP(op, c_op, asm_op)					\
-									\
-	/* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
-	TEST_N_BIT_OP(op, c_op, asm_op)					\
-									\
-	/* __set_bit(), __clear_bit(), __change_bit() */		\
-	__BIT_OP(op, c_op, asm_op)					\
-									\
-	/* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
-	__TEST_N_BIT_OP(op, c_op, asm_op)
-
-#ifndef CONFIG_ARC_PLAT_EZNPS
-BIT_OPS(set, |, bset)
-BIT_OPS(clear, & ~, bclr)
-BIT_OPS(change, ^, bxor)
-#else
-BIT_OPS(set, |, CTOP_INST_AOR_DI_R2_R2_R3)
-BIT_OPS(clear, & ~, CTOP_INST_AAND_DI_R2_R2_R3)
-BIT_OPS(change, ^, CTOP_INST_AXOR_DI_R2_R2_R3)
-#endif
-
-/*
- * This routine doesn't need to be atomic.
- */
-static inline int
-test_bit(unsigned int nr, const volatile unsigned long *addr)
-{
-	unsigned long mask;
-
-	addr += nr >> 5;
-
-	mask = 1UL << (nr & 0x1f);
-
-	return ((mask & *addr) != 0);
-}
 
 #ifdef CONFIG_ISA_ARCOMPACT
 
@@ -297,10 +61,8 @@ static inline int constant_fls(unsigned int x)
 		x <<= 2;
 		r -= 2;
 	}
-	if (!(x & 0x80000000u)) {
-		x <<= 1;
+	if (!(x & 0x80000000u))
 		r -= 1;
-	}
 	return r;
 }
 
@@ -320,7 +82,7 @@ static inline __attribute__ ((const)) int fls(unsigned int x)
 /*
  * __fls: Similar to fls, but zero based (0-31)
  */
-static inline __attribute__ ((const)) int __fls(unsigned long x)
+static inline __attribute__ ((const)) unsigned long __fls(unsigned long x)
 {
 	if (!x)
 		return 0;
@@ -352,7 +114,7 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
  * @result: [1-32]
  * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
  */
-static inline __attribute__ ((const)) int fls(unsigned long x)
+static inline __attribute__ ((const)) int fls(unsigned int x)
 {
 	int n;
 
@@ -369,7 +131,7 @@ static inline __attribute__ ((const)) int fls(unsigned long x)
 /*
  * __fls: Similar to fls, but zero based (0-31). Also 0 if no bit set
  */
-static inline __attribute__ ((const)) int __fls(unsigned long x)
+static inline __attribute__ ((const)) unsigned long __fls(unsigned long x)
 {
 	/* FLS insn has exactly same semantics as the API */
 	return	__builtin_arc_fls(x);
@@ -379,7 +141,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
  * ffs = Find First Set in word (LSB to MSB)
  * @result: [1-32], 0 if all 0's
  */
-static inline __attribute__ ((const)) int ffs(unsigned long x)
+static inline __attribute__ ((const)) int ffs(unsigned int x)
 {
 	int n;
 
@@ -424,8 +186,9 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
 
-#include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/le.h>
 #include <asm-generic/bitops/ext2-atomic-setbit.h>
 
diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h
index 0be19fd1a412..4c453ba96c51 100644
--- a/arch/arc/include/asm/bug.h
+++ b/arch/arc/include/asm/bug.h
@@ -13,7 +13,8 @@
 struct task_struct;
 
 void show_regs(struct pt_regs *regs);
-void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs);
+void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs,
+		     const char *loglvl);
 void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
 			    unsigned long address);
 void die(const char *str, struct pt_regs *regs, unsigned long address);
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index d8ece4292388..f0f1fc5d62b6 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -62,10 +62,6 @@
 #define ARCH_SLAB_MINALIGN	8
 #endif
 
-extern void arc_cache_init(void);
-extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_cache_bcr(void);
-
 extern int ioc_enable;
 extern unsigned long perip_base, perip_end;
 
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index e201b4b1655a..329c94cd45d8 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -18,24 +18,18 @@
 #include <linux/mm.h>
 #include <asm/shmparam.h>
 
-/*
- * Semantically we need this because icache doesn't snoop dcache/dma.
- * However ARC Cache flush requires paddr as well as vaddr, latter not available
- * in the flush_icache_page() API. So we no-op it but do the equivalent work
- * in update_mmu_cache()
- */
-#define flush_icache_page(vma, page)
-
 void flush_cache_all(void);
 
 void flush_icache_range(unsigned long kstart, unsigned long kend);
 void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len);
-void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr);
-void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr);
+void __inv_icache_pages(phys_addr_t paddr, unsigned long vaddr, unsigned nr);
+void __flush_dcache_pages(phys_addr_t paddr, unsigned long vaddr, unsigned nr);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 
 void flush_dcache_page(struct page *page);
+void flush_dcache_folio(struct folio *folio);
+#define flush_dcache_folio flush_dcache_folio
 
 void dma_cache_wback_inv(phys_addr_t start, unsigned long sz);
 void dma_cache_inv(phys_addr_t start, unsigned long sz);
@@ -46,35 +40,15 @@ void dma_cache_wback(phys_addr_t start, unsigned long sz);
 
 /* TBD: optimize this */
 #define flush_cache_vmap(start, end)		flush_cache_all()
+#define flush_cache_vmap_early(start, end)	do { } while (0)
 #define flush_cache_vunmap(start, end)		flush_cache_all()
 
 #define flush_cache_dup_mm(mm)			/* called on fork (VIVT only) */
 
-#ifndef CONFIG_ARC_CACHE_VIPT_ALIASING
-
 #define flush_cache_mm(mm)			/* called on munmap/exit */
 #define flush_cache_range(mm, u_vstart, u_vend)
 #define flush_cache_page(vma, u_vaddr, pfn)	/* PF handling/COW-break */
 
-#else	/* VIPT aliasing dcache */
-
-/* To clear out stale userspace mappings */
-void flush_cache_mm(struct mm_struct *mm);
-void flush_cache_range(struct vm_area_struct *vma,
-	unsigned long start,unsigned long end);
-void flush_cache_page(struct vm_area_struct *vma,
-	unsigned long user_addr, unsigned long page);
-
-/*
- * To make sure that userspace mapping is flushed to memory before
- * get_user_pages() uses a kernel mapping to access the page
- */
-#define ARCH_HAS_FLUSH_ANON_PAGE
-void flush_anon_page(struct vm_area_struct *vma,
-	struct page *page, unsigned long u_vaddr);
-
-#endif	/* CONFIG_ARC_CACHE_VIPT_ALIASING */
-
 /*
  * A new pagecache page has PG_arch_1 clear - thus dcache dirty by default
  * This works around some PIO based drivers which don't call flush_dcache_page
@@ -82,28 +56,6 @@ void flush_anon_page(struct vm_area_struct *vma,
  */
 #define PG_dc_clean	PG_arch_1
 
-#define CACHE_COLORS_NUM	4
-#define CACHE_COLORS_MSK	(CACHE_COLORS_NUM - 1)
-#define CACHE_COLOR(addr)	(((unsigned long)(addr) >> (PAGE_SHIFT)) & CACHE_COLORS_MSK)
-
-/*
- * Simple wrapper over config option
- * Bootup code ensures that hardware matches kernel configuration
- */
-static inline int cache_is_vipt_aliasing(void)
-{
-	return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-}
-
-/*
- * checks if two addresses (after page aligning) index into same cache set
- */
-#define addr_not_cache_congruent(addr1, addr2)				\
-({									\
-	cache_is_vipt_aliasing() ? 					\
-		(CACHE_COLOR(addr1) != CACHE_COLOR(addr2)) : 0;		\
-})
-
 #define copy_to_user_page(vma, page, vaddr, dst, src, len)		\
 do {									\
 	memcpy(dst, src, len);						\
diff --git a/arch/arc/include/asm/cachetype.h b/arch/arc/include/asm/cachetype.h
new file mode 100644
index 000000000000..acd3b6cb4bf5
--- /dev/null
+++ b/arch/arc/include/asm/cachetype.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ARC_CACHETYPE_H
+#define __ASM_ARC_CACHETYPE_H
+
+#define cpu_dcache_is_aliasing()	false
+#define cpu_icache_is_aliasing()	true
+
+#endif
diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h
index 69debd77cd04..0b485800a392 100644
--- a/arch/arc/include/asm/checksum.h
+++ b/arch/arc/include/asm/checksum.h
@@ -24,7 +24,7 @@
  */
 static inline __sum16 csum_fold(__wsum s)
 {
-	unsigned r = s << 16 | s >> 16;	/* ror */
+	unsigned int r = s << 16 | s >> 16;	/* ror */
 	s = ~s;
 	s -= r;
 	return s >> 16;
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index c11398160240..76f43db0890f 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -6,219 +6,140 @@
 #ifndef __ASM_ARC_CMPXCHG_H
 #define __ASM_ARC_CMPXCHG_H
 
+#include <linux/build_bug.h>
 #include <linux/types.h>
+#include <linux/cmpxchg-emu.h>
 
 #include <asm/barrier.h>
 #include <asm/smp.h>
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-	unsigned long prev;
-
-	/*
-	 * Explicit full memory barrier needed before/after as
-	 * LLOCK/SCOND thmeselves don't provide any such semantics
-	 */
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llock   %0, [%1]	\n"
-	"	brne    %0, %2, 2f	\n"
-	"	scond   %3, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(prev)	/* Early clobber, to prevent reg reuse */
-	: "r"(ptr),	/* Not "m": llock only supports reg direct addr mode */
-	  "ir"(expected),
-	  "r"(new)	/* can't be "ir". scond can't take LIMM for "b" */
-	: "cc", "memory"); /* so that gcc knows memory is being written here */
-
-	smp_mb();
-
-	return prev;
-}
-
-#elif !defined(CONFIG_ARC_PLAT_EZNPS)
-
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-	unsigned long flags;
-	int prev;
-	volatile unsigned long *p = ptr;
-
-	/*
-	 * spin lock/unlock provide the needed smp_mb() before/after
-	 */
-	atomic_ops_lock(flags);
-	prev = *p;
-	if (prev == expected)
-		*p = new;
-	atomic_ops_unlock(flags);
-	return prev;
-}
-
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-	/*
-	 * Explicit full memory barrier needed before/after
-	 */
-	smp_mb();
-
-	write_aux_reg(CTOP_AUX_GPA1, expected);
-
-	__asm__ __volatile__(
-	"	mov r2, %0\n"
-	"	mov r3, %1\n"
-	"	.word %2\n"
-	"	mov %0, r2"
-	: "+r"(new)
-	: "r"(ptr), "i"(CTOP_INST_EXC_DI_R2_R2_R3)
-	: "r2", "r3", "memory");
-
-	smp_mb();
-
-	return new;
-}
-
-#endif /* CONFIG_ARC_HAS_LLSC */
-
-#define cmpxchg(ptr, o, n) ({				\
-	(typeof(*(ptr)))__cmpxchg((ptr),		\
-				  (unsigned long)(o),	\
-				  (unsigned long)(n));	\
-})
-
-/*
- * atomic_cmpxchg is same as cmpxchg
- *   LLSC: only different in data-type, semantics are exactly same
- *  !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee
- *         semantics, and this lock also happens to be used by atomic_*()
- */
-#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
-
-
-#ifndef CONFIG_ARC_PLAT_EZNPS
-
 /*
- * xchg (reg with memory) based on "Native atomic" EX insn
+ * if (*ptr == @old)
+ *      *ptr = @new
  */
-static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
-				   int size)
-{
-	extern unsigned long __xchg_bad_pointer(void);
-
-	switch (size) {
-	case 4:
-		smp_mb();
+#define __cmpxchg(ptr, old, new)					\
+({									\
+	__typeof__(*(ptr)) _prev;					\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock  %0, [%1]	\n"					\
+	"	brne   %0, %2, 2f	\n"				\
+	"	scond  %3, [%1]	\n"					\
+	"	bnz     1b		\n"				\
+	"2:				\n"				\
+	: "=&r"(_prev)	/* Early clobber prevent reg reuse */		\
+	: "r"(ptr),	/* Not "m": llock only supports reg */		\
+	  "ir"(old),							\
+	  "r"(new)	/* Not "ir": scond can't take LIMM */		\
+	: "cc",								\
+	  "memory");	/* gcc knows memory is clobbered */		\
+									\
+	_prev;								\
+})
 
-		__asm__ __volatile__(
-		"	ex  %0, [%1]	\n"
-		: "+r"(val)
-		: "r"(ptr)
-		: "memory");
+#define arch_cmpxchg_relaxed(ptr, old, new)				\
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _o_ = (old);					\
+	__typeof__(*(ptr)) _n_ = (new);					\
+	__typeof__(*(ptr)) _prev_;					\
+									\
+	switch(sizeof((_p_))) {						\
+	case 1:								\
+		_prev_ = (__typeof__(*(ptr)))cmpxchg_emu_u8((volatile u8 *__force)_p_, (uintptr_t)_o_, (uintptr_t)_n_);	\
+		break;							\
+	case 4:								\
+		_prev_ = __cmpxchg(_p_, _o_, _n_);			\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	_prev_;								\
+})
 
-		smp_mb();
+#else
 
-		return val;
-	}
-	return __xchg_bad_pointer();
-}
+#define arch_cmpxchg(ptr, old, new)				        \
+({									\
+	volatile __typeof__(ptr) _p_ = (ptr);				\
+	__typeof__(*(ptr)) _o_ = (old);					\
+	__typeof__(*(ptr)) _n_ = (new);					\
+	__typeof__(*(ptr)) _prev_;					\
+	unsigned long __flags;						\
+									\
+	/*								\
+	 * spin lock/unlock provide the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(__flags);					\
+	_prev_ = *_p_;							\
+	if (_prev_ == _o_)						\
+		*_p_ = _n_;						\
+	atomic_ops_unlock(__flags);					\
+	_prev_;								\
+})
 
-#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
-						 sizeof(*(ptr))))
+#endif
 
 /*
- * xchg() maps directly to ARC EX instruction which guarantees atomicity.
- * However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
- * due to a subtle reason:
- *  - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
- *    of  kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
- *    Hence xchg() needs to follow same locking rules.
- *
- * Technically the lock is also needed for UP (boils down to irq save/restore)
- * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
- * be disabled thus can't possibly be interrpted/preempted/clobbered by xchg()
- * Other way around, xchg is one instruction anyways, so can't be interrupted
- * as such
+ * xchg
  */
+#ifdef CONFIG_ARC_HAS_LLSC
 
-#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP)
-
-#define xchg(ptr, with)			\
-({					\
-	unsigned long flags;		\
-	typeof(*(ptr)) old_val;		\
-					\
-	atomic_ops_lock(flags);		\
-	old_val = _xchg(ptr, with);	\
-	atomic_ops_unlock(flags);	\
-	old_val;			\
+#define __arch_xchg(ptr, val)						\
+({									\
+	__asm__ __volatile__(						\
+	"	ex  %0, [%1]	\n"	/* set new value */	        \
+	: "+r"(val)							\
+	: "r"(ptr)							\
+	: "memory");							\
+	_val_;		/* get old value */				\
 })
 
-#else
-
-#define xchg(ptr, with)  _xchg(ptr, with)
-
-#endif
-
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
-				   int size)
-{
-	extern unsigned long __xchg_bad_pointer(void);
-
-	switch (size) {
-	case 4:
-		/*
-		 * Explicit full memory barrier needed before/after
-		 */
-		smp_mb();
-
-		__asm__ __volatile__(
-		"	mov r2, %0\n"
-		"	mov r3, %1\n"
-		"	.word %2\n"
-		"	mov %0, r2\n"
-		: "+r"(val)
-		: "r"(ptr), "i"(CTOP_INST_XEX_DI_R2_R2_R3)
-		: "r2", "r3", "memory");
-
-		smp_mb();
-
-		return val;
-	}
-	return __xchg_bad_pointer();
-}
-
-#define xchg(ptr, with) ({				\
-	(typeof(*(ptr)))__xchg((unsigned long)(with),	\
-			       (ptr),			\
-			       sizeof(*(ptr)));		\
+#define arch_xchg_relaxed(ptr, val)					\
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _val_ = (val);				\
+									\
+	switch(sizeof(*(_p_))) {					\
+	case 4:								\
+		_val_ = __arch_xchg(_p_, _val_);			\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	_val_;								\
 })
 
-#endif /* CONFIG_ARC_PLAT_EZNPS */
+#else  /* !CONFIG_ARC_HAS_LLSC */
 
 /*
- * "atomic" variant of xchg()
- * REQ: It needs to follow the same serialization rules as other atomic_xxx()
- * Since xchg() doesn't always do that, it would seem that following defintion
- * is incorrect. But here's the rationale:
- *   SMP : Even xchg() takes the atomic_ops_lock, so OK.
- *   LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
- *         is natively "SMP safe", no serialization required).
- *   UP  : other atomics disable IRQ, so no way a difft ctxt atomic_xchg()
- *         could clobber them. atomic_xchg() itself would be 1 insn, so it
- *         can't be clobbered by others. Thus no serialization required when
- *         atomic_xchg is involved.
+ * EX instructions is baseline and present in !LLSC too. But in this
+ * regime it still needs use @atomic_ops_lock spinlock to allow interop
+ * with cmpxchg() which uses spinlock in !LLSC
+ * (llist.h use xchg and cmpxchg on sama data)
  */
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+#define arch_xchg(ptr, val)					        \
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _val_ = (val);				\
+									\
+	unsigned long __flags;						\
+									\
+	atomic_ops_lock(__flags);					\
+									\
+	__asm__ __volatile__(						\
+	"	ex  %0, [%1]	\n"					\
+	: "+r"(_val_)							\
+	: "r"(_p_)							\
+	: "memory");							\
+									\
+	atomic_ops_unlock(__flags);					\
+	_val_;								\
+})
+
+#endif
 
 #endif
diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h
index 9b9bdd3e6538..06be89f6f2f0 100644
--- a/arch/arc/include/asm/current.h
+++ b/arch/arc/include/asm/current.h
@@ -13,7 +13,7 @@
 
 #ifdef CONFIG_ARC_CURR_IN_REG
 
-register struct task_struct *curr_arc asm("r25");
+register struct task_struct *curr_arc asm("gp");
 #define current (curr_arc)
 
 #else
diff --git a/arch/arc/include/asm/dma.h b/arch/arc/include/asm/dma.h
index 5b744f4b10a7..02431027ed2f 100644
--- a/arch/arc/include/asm/dma.h
+++ b/arch/arc/include/asm/dma.h
@@ -7,10 +7,5 @@
 #define ASM_ARC_DMA_H
 
 #define MAX_DMA_ADDRESS 0xC0000000
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy	0
-#endif
 
 #endif
diff --git a/arch/arc/include/asm/dsp-impl.h b/arch/arc/include/asm/dsp-impl.h
new file mode 100644
index 000000000000..cd5636dfeb6f
--- /dev/null
+++ b/arch/arc/include/asm/dsp-impl.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+#ifndef __ASM_ARC_DSP_IMPL_H
+#define __ASM_ARC_DSP_IMPL_H
+
+#include <asm/dsp.h>
+
+#define DSP_CTRL_DISABLED_ALL		0
+
+#ifdef __ASSEMBLY__
+
+/* clobbers r5 register */
+.macro DSP_EARLY_INIT
+#ifdef CONFIG_ISA_ARCV2
+	lr	r5, [ARC_AUX_DSP_BUILD]
+	bmsk	r5, r5, 7
+	breq    r5, 0, 1f
+	mov	r5, DSP_CTRL_DISABLED_ALL
+	sr	r5, [ARC_AUX_DSP_CTRL]
+1:
+#endif
+.endm
+
+/* clobbers r10, r11 registers pair */
+.macro DSP_SAVE_REGFILE_IRQ
+#if defined(CONFIG_ARC_DSP_KERNEL)
+	/*
+	 * Drop any changes to DSP_CTRL made by userspace so userspace won't be
+	 * able to break kernel - reset it to DSP_CTRL_DISABLED_ALL value
+	 */
+	mov	r10, DSP_CTRL_DISABLED_ALL
+	sr	r10, [ARC_AUX_DSP_CTRL]
+
+#elif defined(CONFIG_ARC_DSP_SAVE_RESTORE_REGS)
+	/*
+	 * Save DSP_CTRL register and reset it to value suitable for kernel
+	 * (DSP_CTRL_DISABLED_ALL)
+	 */
+	mov	r10, DSP_CTRL_DISABLED_ALL
+	aex	r10, [ARC_AUX_DSP_CTRL]
+	st	r10, [sp, PT_DSP_CTRL]
+
+#endif
+.endm
+
+/* clobbers r10, r11 registers pair */
+.macro DSP_RESTORE_REGFILE_IRQ
+#if defined(CONFIG_ARC_DSP_SAVE_RESTORE_REGS)
+	ld	r10, [sp, PT_DSP_CTRL]
+	sr	r10, [ARC_AUX_DSP_CTRL]
+
+#endif
+.endm
+
+#else /* __ASEMBLY__ */
+
+#include <linux/sched.h>
+#include <asm/asserts.h>
+#include <asm/switch_to.h>
+
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+
+/*
+ * As we save new and restore old AUX register value in the same place we
+ * can optimize a bit and use AEX instruction (swap contents of an auxiliary
+ * register with a core register) instead of LR + SR pair.
+ */
+#define AUX_SAVE_RESTORE(_saveto, _readfrom, _offt, _aux)		\
+do {									\
+	long unsigned int _scratch;					\
+									\
+	__asm__ __volatile__(						\
+		"ld	%0, [%2, %4]			\n"		\
+		"aex	%0, [%3]			\n"		\
+		"st	%0, [%1, %4]			\n"		\
+		:							\
+		  "=&r" (_scratch)	/* must be early clobber */	\
+		:							\
+		   "r" (_saveto),					\
+		   "r" (_readfrom),					\
+		   "Ir" (_aux),						\
+		   "Ir" (_offt)						\
+		:							\
+		  "memory"						\
+	);								\
+} while (0)
+
+#define DSP_AUX_SAVE_RESTORE(_saveto, _readfrom, _aux)			\
+	AUX_SAVE_RESTORE(_saveto, _readfrom,				\
+		offsetof(struct dsp_callee_regs, _aux),			\
+		ARC_AUX_##_aux)
+
+static inline void dsp_save_restore(struct task_struct *prev,
+					struct task_struct *next)
+{
+	long unsigned int *saveto = &prev->thread.dsp.ACC0_GLO;
+	long unsigned int *readfrom = &next->thread.dsp.ACC0_GLO;
+
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, ACC0_GLO);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, ACC0_GHI);
+
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, DSP_BFLY0);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, DSP_FFT_CTRL);
+
+#ifdef CONFIG_ARC_DSP_AGU_USERSPACE
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_AP0);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_AP1);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_AP2);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_AP3);
+
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_OS0);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_OS1);
+
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_MOD0);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_MOD1);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_MOD2);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_MOD3);
+#endif /* CONFIG_ARC_DSP_AGU_USERSPACE */
+}
+
+#else /* !CONFIG_ARC_DSP_SAVE_RESTORE_REGS */
+#define dsp_save_restore(p, n)
+#endif /* CONFIG_ARC_DSP_SAVE_RESTORE_REGS */
+
+static inline bool dsp_exist(void)
+{
+	struct bcr_generic bcr;
+
+	READ_BCR(ARC_AUX_DSP_BUILD, bcr);
+	return !!bcr.ver;
+}
+
+static inline bool agu_exist(void)
+{
+	struct bcr_generic bcr;
+
+	READ_BCR(ARC_AUX_AGU_BUILD, bcr);
+	return !!bcr.ver;
+}
+
+static inline void dsp_config_check(void)
+{
+	CHK_OPT_STRICT(CONFIG_ARC_DSP_HANDLED, dsp_exist());
+	CHK_OPT_WEAK(CONFIG_ARC_DSP_AGU_USERSPACE, agu_exist());
+}
+
+#endif /* __ASEMBLY__ */
+#endif /* __ASM_ARC_DSP_IMPL_H */
diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h
new file mode 100644
index 000000000000..f496dbc4640b
--- /dev/null
+++ b/arch/arc/include/asm/dsp.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+#ifndef __ASM_ARC_DSP_H
+#define __ASM_ARC_DSP_H
+
+#ifndef __ASSEMBLY__
+
+/*
+ * DSP-related saved registers - need to be saved only when you are
+ * scheduled out.
+ * structure fields name must correspond to aux register definitions for
+ * automatic offset calculation in DSP_AUX_SAVE_RESTORE macros
+ */
+struct dsp_callee_regs {
+	unsigned long ACC0_GLO, ACC0_GHI, DSP_BFLY0, DSP_FFT_CTRL;
+#ifdef CONFIG_ARC_DSP_AGU_USERSPACE
+	unsigned long AGU_AP0, AGU_AP1, AGU_AP2, AGU_AP3;
+	unsigned long AGU_OS0, AGU_OS1;
+	unsigned long AGU_MOD0, AGU_MOD1, AGU_MOD2, AGU_MOD3;
+#endif
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_ARC_DSP_H */
diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h
index 5f4de05bd4ee..a0d5ebe1bc3f 100644
--- a/arch/arc/include/asm/dwarf.h
+++ b/arch/arc/include/asm/dwarf.h
@@ -10,23 +10,31 @@
 
 #ifdef ARC_DW2_UNWIND_AS_CFI
 
-#define CFI_STARTPROC	.cfi_startproc
-#define CFI_ENDPROC	.cfi_endproc
-#define CFI_DEF_CFA	.cfi_def_cfa
-#define CFI_REGISTER	.cfi_register
-#define CFI_REL_OFFSET	.cfi_rel_offset
-#define CFI_UNDEFINED	.cfi_undefined
+#define CFI_STARTPROC		.cfi_startproc
+#define CFI_ENDPROC		.cfi_endproc
+#define CFI_DEF_CFA		.cfi_def_cfa
+#define CFI_DEF_CFA_OFFSET	.cfi_def_cfa_offset
+#define CFI_DEF_CFA_REGISTER	.cfi_def_cfa_register
+#define CFI_OFFSET		.cfi_offset
+#define CFI_REL_OFFSET		.cfi_rel_offset
+#define CFI_REGISTER		.cfi_register
+#define CFI_RESTORE		.cfi_restore
+#define CFI_UNDEFINED		.cfi_undefined
 
 #else
 
 #define CFI_IGNORE	#
 
-#define CFI_STARTPROC	CFI_IGNORE
-#define CFI_ENDPROC	CFI_IGNORE
-#define CFI_DEF_CFA	CFI_IGNORE
-#define CFI_REGISTER	CFI_IGNORE
-#define CFI_REL_OFFSET	CFI_IGNORE
-#define CFI_UNDEFINED	CFI_IGNORE
+#define CFI_STARTPROC		CFI_IGNORE
+#define CFI_ENDPROC		CFI_IGNORE
+#define CFI_DEF_CFA		CFI_IGNORE
+#define CFI_DEF_CFA_OFFSET	CFI_IGNORE
+#define CFI_DEF_CFA_REGISTER	CFI_IGNORE
+#define CFI_OFFSET		CFI_IGNORE
+#define CFI_REL_OFFSET		CFI_IGNORE
+#define CFI_REGISTER		CFI_IGNORE
+#define CFI_RESTORE		CFI_IGNORE
+#define CFI_UNDEFINED		CFI_IGNORE
 
 #endif	/* !ARC_DW2_UNWIND_AS_CFI */
 
diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h
index c77a0e3671ac..0284ace0e1ab 100644
--- a/arch/arc/include/asm/elf.h
+++ b/arch/arc/include/asm/elf.h
@@ -19,7 +19,7 @@
 #define  R_ARC_32_PCREL		0x31
 
 /*to set parameters in the core dumps */
-#define ELF_ARCH		EM_ARCOMPACT
+#define ELF_ARCH		EM_ARC_INUSE
 #define ELF_CLASS		ELFCLASS32
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
index 41b16f21beec..3802a2daaf86 100644
--- a/arch/arc/include/asm/entry-arcv2.h
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -4,6 +4,7 @@
 #define __ASM_ARC_ENTRY_ARCV2_H
 
 #include <asm/asm-offsets.h>
+#include <asm/dsp-impl.h>
 #include <asm/irqflags-arcv2.h>
 #include <asm/thread_info.h>	/* For THREAD_SIZE */
 
@@ -17,7 +18,6 @@
  *              |      orig_r0      |
  *              |      event/ECR    |
  *              |      bta          |
- *              |      user_r25     |
  *              |      gp           |
  *              |      fp           |
  *              |      sp           |
@@ -48,14 +48,18 @@
 /*------------------------------------------------------------------------*/
 .macro INTERRUPT_PROLOGUE
 
-	; (A) Before jumping to Interrupt Vector, hardware micro-ops did following:
+	; Before jumping to Interrupt Vector, hardware micro-ops did following:
 	;   1. SP auto-switched to kernel mode stack
 	;   2. STATUS32.Z flag set if in U mode at time of interrupt (U:1,K:0)
 	;   3. Auto save: (mandatory) Push PC and STAT32 on stack
 	;                 hardware does even if CONFIG_ARC_IRQ_NO_AUTOSAVE
-	;   4. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI
+	;  4a. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI
 	;
-	; (B) Manually saved some regs: r12,r25,r30, sp,fp,gp, ACCL pair
+	; Now
+	;  4b. If Auto-save (optional) not enabled in hw, manually save them
+	;   5. Manually save: r12,r30, sp,fp,gp, ACCL pair
+	;
+	; At the end, SP points to pt_regs
 
 #ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE
 	; carve pt_regs on stack (case #3), PC/STAT32 already on stack
@@ -71,15 +75,16 @@
 .endm
 
 /*------------------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
+.macro EXCEPTION_PROLOGUE_KEEP_AE
 
-	; (A) Before jumping to Exception Vector, hardware micro-ops did following:
+	; Before jumping to Exception Vector, hardware micro-ops did following:
 	;   1. SP auto-switched to kernel mode stack
 	;   2. STATUS32.Z flag set if in U mode at time of exception (U:1,K:0)
 	;
-	; (B) Manually save the complete reg file below
+	; Now manually save rest of reg file
+	; At the end, SP points to pt_regs
 
-	sub	sp, sp, SZ_PT_REGS	; carve pt_regs
+	sub	sp, sp, SZ_PT_REGS	; carve space for pt_regs
 
 	; _HARD saves r10 clobbered by _SOFT as scratch hence comes first
 
@@ -99,6 +104,16 @@
 	; OUTPUT: r10 has ECR expected by EV_Trap
 .endm
 
+.macro EXCEPTION_PROLOGUE
+
+	EXCEPTION_PROLOGUE_KEEP_AE	; return ECR in r10
+
+	lr  r0, [efa]
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN		; clobbers r9
+.endm
+
 /*------------------------------------------------------------------------
  * This macro saves the registers manually which would normally be autosaved
  * by hardware on taken interrupts. It is used by
@@ -134,10 +149,10 @@
  */
 .macro __SAVE_REGFILE_SOFT
 
-	ST2	gp, fp, PT_r26		; gp (r26), fp (r27)
-
-	st	r12, [sp, PT_sp + 4]
-	st	r30, [sp, PT_sp + 8]
+	st	fp,  [sp, PT_fp]	; r27
+	st	r30, [sp, PT_r30]
+	st	r12, [sp, PT_r12]
+	st	r26, [sp, PT_r26]	; gp
 
 	; Saving pt_regs->sp correctly requires some extra work due to the way
 	; Auto stack switch works
@@ -152,17 +167,19 @@
 
 	; ISA requires ADD.nz to have same dest and src reg operands
 	mov.nz	r10, sp
-	add.nz	r10, r10, SZ_PT_REGS	; K mode SP
+	add2.nz	r10, r10, SZ_PT_REGS/4	; K mode SP
 
 	st	r10, [sp, PT_sp]	; SP (pt_regs->sp)
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	st	r25, [sp, PT_user_r25]
-	GET_CURR_TASK_ON_CPU	r25
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	ST2	r58, r59, PT_r58
 #endif
 
-#ifdef CONFIG_ARC_HAS_ACCL_REGS
-	ST2	r58, r59, PT_sp + 12
+	/* clobbers r10, r11 registers pair */
+	DSP_SAVE_REGFILE_IRQ
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	GET_CURR_TASK_ON_CPU	gp
 #endif
 
 .endm
@@ -170,10 +187,10 @@
 /*------------------------------------------------------------------------*/
 .macro __RESTORE_REGFILE_SOFT
 
-	LD2	gp, fp, PT_r26		; gp (r26), fp (r27)
-
-	ld	r12, [sp, PT_sp + 4]
-	ld	r30, [sp, PT_sp + 8]
+	ld	fp,  [sp, PT_fp]
+	ld	r30, [sp, PT_r30]
+	ld	r12, [sp, PT_r12]
+	ld	r26, [sp, PT_r26]
 
 	; Restore SP (into AUX_USER_SP) only if returning to U mode
 	;  - for K mode, it will be implicitly restored as stack is unwound
@@ -185,12 +202,11 @@
 	sr	r10, [AUX_USER_SP]
 1:
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	ld	r25, [sp, PT_user_r25]
-#endif
+	/* clobbers r10, r11 registers pair */
+	DSP_RESTORE_REGFILE_IRQ
 
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
-	LD2	r58, r59, PT_sp + 12
+	LD2	r58, r59, PT_r58
 #endif
 .endm
 
@@ -227,6 +243,8 @@
 
 #ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE
 	__RESTORE_REGFILE_HARD
+
+	; SP points to PC/STAT32: hw restores them despite NO_AUTOSAVE
 	add	sp, sp, SZ_PT_REGS - 8
 #else
 	add	sp, sp, PT_r0
@@ -241,7 +259,7 @@
 
 	btst	r0, STATUS_U_BIT	; Z flag set if K, used in restoring SP
 
-	ld	r10, [sp, PT_event + 4]
+	ld	r10, [sp, PT_bta]
 	sr	r10, [erbta]
 
 	LD2	r10, r11, PT_ret
@@ -256,8 +274,8 @@
 
 .macro FAKE_RET_FROM_EXCPN
 	lr      r9, [status32]
-	bic     r9, r9, STATUS_AE_MASK
-	or      r9, r9, STATUS_IE_MASK
+	bclr    r9, r9, STATUS_AE_BIT
+	bset    r9, r9, STATUS_IE_BIT
 	kflag   r9
 .endm
 
@@ -273,4 +291,36 @@
 				/* M = 8-1  N = 8 */
 .endm
 
+.macro SAVE_ABI_CALLEE_REGS
+	push	r13
+	push	r14
+	push	r15
+	push	r16
+	push	r17
+	push	r18
+	push	r19
+	push	r20
+	push	r21
+	push	r22
+	push	r23
+	push	r24
+	push	r25
+.endm
+
+.macro RESTORE_ABI_CALLEE_REGS
+	pop	r25
+	pop	r24
+	pop	r23
+	pop	r22
+	pop	r21
+	pop	r20
+	pop	r19
+	pop	r18
+	pop	r17
+	pop	r16
+	pop	r15
+	pop	r14
+	pop	r13
+.endm
+
 #endif
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index c3aa775878dc..00946fe04c9b 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -7,7 +7,7 @@
  *  Stack switching code can no longer reliably rely on the fact that
  *  if we are NOT in user mode, stack is switched to kernel mode.
  *  e.g. L2 IRQ interrupted a L1 ISR which had not yet completed
- *  it's prologue including stack switching from user mode
+ *  its prologue including stack switching from user mode
  *
  * Vineetg: Aug 28th 2008: Bug #94984
  *  -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
@@ -21,7 +21,7 @@
  *      r25 contains the kernel current task ptr
  *  - Defined Stack Switching Macro to be reused in all intr/excp hdlrs
  *  - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the
- *      address Write back load ld.ab instead of seperate ld/add instn
+ *      address Write back load ld.ab instead of separate ld/add instn
  *
  * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
  */
@@ -33,9 +33,90 @@
 #include <asm/irqflags-compact.h>
 #include <asm/thread_info.h>	/* For THREAD_SIZE */
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-#include <plat/ctop.h>
-#endif
+/* Note on the LD/ST addr modes with addr reg wback
+ *
+ * LD.a same as LD.aw
+ *
+ * LD.a    reg1, [reg2, x]  => Pre Incr
+ *      Eff Addr for load = [reg2 + x]
+ *
+ * LD.ab   reg1, [reg2, x]  => Post Incr
+ *      Eff Addr for load = [reg2]
+ */
+
+.macro PUSHAX aux
+	lr	r9, [\aux]
+	push	r9
+.endm
+
+.macro POPAX aux
+	pop	r9
+	sr	r9, [\aux]
+.endm
+
+.macro  SAVE_R0_TO_R12
+	push	r0
+	push	r1
+	push	r2
+	push	r3
+	push	r4
+	push	r5
+	push	r6
+	push	r7
+	push	r8
+	push	r9
+	push	r10
+	push	r11
+	push	r12
+.endm
+
+.macro RESTORE_R12_TO_R0
+	pop	r12
+	pop	r11
+	pop	r10
+	pop	r9
+	pop	r8
+	pop	r7
+	pop	r6
+	pop	r5
+	pop	r4
+	pop	r3
+	pop	r2
+	pop	r1
+	pop	r0
+.endm
+
+.macro SAVE_ABI_CALLEE_REGS
+	push	r13
+	push	r14
+	push	r15
+	push	r16
+	push	r17
+	push	r18
+	push	r19
+	push	r20
+	push	r21
+	push	r22
+	push	r23
+	push	r24
+	push	r25
+.endm
+
+.macro RESTORE_ABI_CALLEE_REGS
+	pop	r25
+	pop	r24
+	pop	r23
+	pop	r22
+	pop	r21
+	pop	r20
+	pop	r19
+	pop	r18
+	pop	r17
+	pop	r16
+	pop	r15
+	pop	r14
+	pop	r13
+.endm
 
 /*--------------------------------------------------------------
  * Switch to Kernel Mode stack if SP points to User Mode stack
@@ -62,7 +143,7 @@
 	 * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode)
 	 * 3. But before it could switch SP from USER to KERNEL stack
 	 *      a L2 IRQ "Interrupts" L1
-	 * Thay way although L2 IRQ happened in Kernel mode, stack is still
+	 * That way although L2 IRQ happened in Kernel mode, stack is still
 	 * not switched.
 	 * To handle this, we may need to switch stack even if in kernel mode
 	 * provided SP has values in range of USER mode stack ( < 0x7000_0000 )
@@ -92,7 +173,7 @@
 
 	GET_CURR_TASK_ON_CPU   r9
 
-	/* With current tsk in r9, get it's kernel mode stack base */
+	/* With current tsk in r9, get its kernel mode stack base */
 	GET_TSK_STACK_BASE  r9, r9
 
 	/* save U mode SP @ pt_regs->sp */
@@ -130,19 +211,11 @@
  * to be saved again on kernel mode stack, as part of pt_regs.
  *-------------------------------------------------------------*/
 .macro PROLOG_FREEUP_REG	reg, mem
-#ifndef ARC_USE_SCRATCH_REG
-	sr  \reg, [ARC_REG_SCRATCH_DATA0]
-#else
 	st  \reg, [\mem]
-#endif
 .endm
 
 .macro PROLOG_RESTORE_REG	reg, mem
-#ifndef ARC_USE_SCRATCH_REG
-	lr  \reg, [ARC_REG_SCRATCH_DATA0]
-#else
 	ld  \reg, [\mem]
-#endif
 .endm
 
 /*--------------------------------------------------------------
@@ -152,7 +225,7 @@
  *
  * After this it is safe to call the "C" handlers
  *-------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
+.macro EXCEPTION_PROLOGUE_KEEP_AE
 
 	/* Need at least 1 reg to code the early exception prologue */
 	PROLOG_FREEUP_REG r9, @ex_saved_reg1
@@ -163,14 +236,6 @@
 	/* ARC700 doesn't provide auto-stack switching */
 	SWITCH_TO_KERNEL_STK
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	/* Treat r25 as scratch reg (save on stack) and load with "current" */
-	PUSH    r25
-	GET_CURR_TASK_ON_CPU   r25
-#else
-	sub     sp, sp, 4
-#endif
-
 	st.a	r0, [sp, -8]    /* orig_r0 needed for syscall (skip ECR slot) */
 	sub	sp, sp, 4	/* skip pt_regs->sp, already saved above */
 
@@ -189,14 +254,24 @@
 	PUSHAX	lp_start
 	PUSHAX	erbta
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	.word CTOP_INST_SCHD_RW
-	PUSHAX  CTOP_AUX_GPA1
-	PUSHAX  CTOP_AUX_EFLAGS
+	lr	r10, [ecr]
+	st      r10, [sp, PT_event]
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	/* gp already saved on stack: now load with "current" */
+	GET_CURR_TASK_ON_CPU   gp
 #endif
+	; OUTPUT: r10 has ECR expected by EV_Trap
+.endm
 
-	lr	r10, [ecr]
-	st      r10, [sp, PT_event]    /* EV_Trap expects r10 to have ECR */
+.macro EXCEPTION_PROLOGUE
+
+	EXCEPTION_PROLOGUE_KEEP_AE	; return ECR in r10
+
+	lr  r0, [efa]
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN		; clobbers r9
 .endm
 
 /*--------------------------------------------------------------
@@ -207,15 +282,10 @@
  * NOTE:
  *
  * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
- * for memory load operations. If used in that way interrupts are deffered
+ * for memory load operations. If used in that way interrupts are deferred
  * by hardware and that is not good.
  *-------------------------------------------------------------*/
 .macro EXCEPTION_EPILOGUE
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	.word CTOP_INST_SCHD_RW
-	POPAX   CTOP_AUX_EFLAGS
-	POPAX   CTOP_AUX_GPA1
-#endif
 
 	POPAX	erbta
 	POPAX	lp_start
@@ -231,11 +301,8 @@
 	POP	gp
 	RESTORE_R12_TO_R0
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	ld	r25, [sp, 12]
-#endif
 	ld  sp, [sp] /* restore original sp */
-	/* orig_r0, ECR, user_r25 skipped automatically */
+	/* orig_r0, ECR skipped automatically */
 .endm
 
 /* Dummy ECR values for Interrupts */
@@ -252,15 +319,8 @@
 
 	SWITCH_TO_KERNEL_STK
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	/* Treat r25 as scratch reg (save on stack) and load with "current" */
-	PUSH    r25
-	GET_CURR_TASK_ON_CPU   r25
-#else
-	sub     sp, sp, 4
-#endif
 
-	PUSH	0x003\LVL\()abcd    /* Dummy ECR */
+	st.a	0x003\LVL\()abcd, [sp, -4]	/* Dummy ECR */
 	sub	sp, sp, 8	    /* skip orig_r0 (not needed)
 				       skip pt_regs->sp, already saved above */
 
@@ -278,10 +338,9 @@
 	PUSHAX	lp_start
 	PUSHAX	bta_l\LVL\()
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	.word CTOP_INST_SCHD_RW
-	PUSHAX  CTOP_AUX_GPA1
-	PUSHAX  CTOP_AUX_EFLAGS
+#ifdef CONFIG_ARC_CURR_IN_REG
+	/* gp already saved on stack: now load with "current" */
+	GET_CURR_TASK_ON_CPU   gp
 #endif
 .endm
 
@@ -291,15 +350,10 @@
  * NOTE:
  *
  * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
- * for memory load operations. If used in that way interrupts are deffered
+ * for memory load operations. If used in that way interrupts are deferred
  * by hardware and that is not good.
  *-------------------------------------------------------------*/
 .macro INTERRUPT_EPILOGUE  LVL
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	.word CTOP_INST_SCHD_RW
-	POPAX   CTOP_AUX_EFLAGS
-	POPAX   CTOP_AUX_GPA1
-#endif
 
 	POPAX	bta_l\LVL\()
 	POPAX	lp_start
@@ -315,11 +369,7 @@
 	POP	gp
 	RESTORE_R12_TO_R0
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	ld	r25, [sp, 12]
-#endif
-	ld  sp, [sp] /* restore original sp */
-	/* orig_r0, ECR, user_r25 skipped automatically */
+	ld  sp, [sp] /* restore original sp; orig_r0, ECR skipped implicitly */
 .endm
 
 /* Get thread_info of "current" tsk */
@@ -327,13 +377,11 @@
 	bic \reg, sp, (THREAD_SIZE - 1)
 .endm
 
-#ifndef CONFIG_ARC_PLAT_EZNPS
 /* Get CPU-ID of this core */
 .macro  GET_CPU_ID  reg
 	lr  \reg, [identity]
 	lsr \reg, \reg, 8
 	bmsk \reg, \reg, 7
 .endm
-#endif
 
 #endif  /* __ASM_ARC_ENTRY_COMPACT_H */
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index fcdd59d77f42..38c35722cebf 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -7,193 +7,45 @@
 #ifndef __ASM_ARC_ENTRY_H
 #define __ASM_ARC_ENTRY_H
 
-#include <asm/unistd.h>		/* For NR_syscalls defination */
+#include <asm/unistd.h>		/* For NR_syscalls definition */
 #include <asm/arcregs.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>	/* For VMALLOC_START */
 #include <asm/mmu.h>
 
+#ifdef __ASSEMBLY__
+
 #ifdef CONFIG_ISA_ARCOMPACT
 #include <asm/entry-compact.h>	/* ISA specific bits */
 #else
 #include <asm/entry-arcv2.h>
 #endif
 
-/* Note on the LD/ST addr modes with addr reg wback
- *
- * LD.a same as LD.aw
- *
- * LD.a    reg1, [reg2, x]  => Pre Incr
- *      Eff Addr for load = [reg2 + x]
- *
- * LD.ab   reg1, [reg2, x]  => Post Incr
- *      Eff Addr for load = [reg2]
+/*
+ * save user mode callee regs as struct callee_regs
+ *  - needed by fork/do_signal/unaligned-access-emulation.
  */
-
-.macro PUSH reg
-	st.a	\reg, [sp, -4]
-.endm
-
-.macro PUSHAX aux
-	lr	r9, [\aux]
-	PUSH	r9
-.endm
-
-.macro POP reg
-	ld.ab	\reg, [sp, 4]
-.endm
-
-.macro POPAX aux
-	POP	r9
-	sr	r9, [\aux]
-.endm
-
-/*--------------------------------------------------------------
- * Helpers to save/restore Scratch Regs:
- * used by Interrupt/Exception Prologue/Epilogue
- *-------------------------------------------------------------*/
-.macro  SAVE_R0_TO_R12
-	PUSH	r0
-	PUSH	r1
-	PUSH	r2
-	PUSH	r3
-	PUSH	r4
-	PUSH	r5
-	PUSH	r6
-	PUSH	r7
-	PUSH	r8
-	PUSH	r9
-	PUSH	r10
-	PUSH	r11
-	PUSH	r12
-.endm
-
-.macro RESTORE_R12_TO_R0
-	POP	r12
-	POP	r11
-	POP	r10
-	POP	r9
-	POP	r8
-	POP	r7
-	POP	r6
-	POP	r5
-	POP	r4
-	POP	r3
-	POP	r2
-	POP	r1
-	POP	r0
-
-.endm
-
-/*--------------------------------------------------------------
- * Helpers to save/restore callee-saved regs:
- * used by several macros below
- *-------------------------------------------------------------*/
-.macro SAVE_R13_TO_R24
-	PUSH	r13
-	PUSH	r14
-	PUSH	r15
-	PUSH	r16
-	PUSH	r17
-	PUSH	r18
-	PUSH	r19
-	PUSH	r20
-	PUSH	r21
-	PUSH	r22
-	PUSH	r23
-	PUSH	r24
-.endm
-
-.macro RESTORE_R24_TO_R13
-	POP	r24
-	POP	r23
-	POP	r22
-	POP	r21
-	POP	r20
-	POP	r19
-	POP	r18
-	POP	r17
-	POP	r16
-	POP	r15
-	POP	r14
-	POP	r13
-.endm
-
-/*--------------------------------------------------------------
- * Collect User Mode callee regs as struct callee_regs - needed by
- * fork/do_signal/unaligned-access-emulation.
- * (By default only scratch regs are saved on entry to kernel)
- *
- * Special handling for r25 if used for caching Task Pointer.
- * It would have been saved in task->thread.user_r25 already, but to keep
- * the interface same it is copied into regular r25 placeholder in
- * struct callee_regs.
- *-------------------------------------------------------------*/
 .macro SAVE_CALLEE_SAVED_USER
+	SAVE_ABI_CALLEE_REGS
+.endm
 
-	mov	r12, sp		; save SP as ref to pt_regs
-	SAVE_R13_TO_R24
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-	; Retrieve orig r25 and save it with rest of callee_regs
-	ld	r12, [r12, PT_user_r25]
-	PUSH	r12
-#else
-	PUSH	r25
-#endif
-
+/*
+ * restore user mode callee regs as struct callee_regs
+ *  - could have been changed by ptrace tracer or unaligned-access fixup
+ */
+.macro RESTORE_CALLEE_SAVED_USER
+	RESTORE_ABI_CALLEE_REGS
 .endm
 
-/*--------------------------------------------------------------
- * Save kernel Mode callee regs at the time of Contect Switch.
- *
- * Special handling for r25 if used for caching Task Pointer.
- * Kernel simply skips saving it since it will be loaded with
- * incoming task pointer anyways
- *-------------------------------------------------------------*/
+/*
+ * save/restore kernel mode callee regs at the time of context switch
+ */
 .macro SAVE_CALLEE_SAVED_KERNEL
-
-	SAVE_R13_TO_R24
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-	sub     sp, sp, 4
-#else
-	PUSH	r25
-#endif
+	SAVE_ABI_CALLEE_REGS
 .endm
 
-/*--------------------------------------------------------------
- * Opposite of SAVE_CALLEE_SAVED_KERNEL
- *-------------------------------------------------------------*/
 .macro RESTORE_CALLEE_SAVED_KERNEL
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-	add     sp, sp, 4  /* skip usual r25 placeholder */
-#else
-	POP	r25
-#endif
-	RESTORE_R24_TO_R13
-.endm
-
-/*--------------------------------------------------------------
- * Opposite of SAVE_CALLEE_SAVED_USER
- *
- * ptrace tracer or unaligned-access fixup might have changed a user mode
- * callee reg which is saved back to usual r25 storage location
- *-------------------------------------------------------------*/
-.macro RESTORE_CALLEE_SAVED_USER
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-	POP	r12
-#else
-	POP	r25
-#endif
-	RESTORE_R24_TO_R13
-
-	; SP is back to start of pt_regs
-#ifdef CONFIG_ARC_CURR_IN_REG
-	st	r12, [sp, PT_user_r25]
-#endif
+	RESTORE_ABI_CALLEE_REGS
 .endm
 
 /*--------------------------------------------------------------
@@ -204,7 +56,7 @@
 .endm
 
 /*-------------------------------------------------------------
- * given a tsk struct, get to the base of it's kernel mode stack
+ * given a tsk struct, get to the base of its kernel mode stack
  * tsk->thread_info is really a PAGE, whose bottom hoists stack
  * which grows upwards towards thread_info
  *------------------------------------------------------------*/
@@ -229,10 +81,10 @@
 
 #ifdef CONFIG_SMP
 
-/*-------------------------------------------------
+/*
  * Retrieve the current running task on this CPU
- * 1. Determine curr CPU id.
- * 2. Use it to index into _current_task[ ]
+ *  - loads it from backing _current_task[] (and can't use the
+ *    caching reg for current task
  */
 .macro  GET_CURR_TASK_ON_CPU   reg
 	GET_CPU_ID  \reg
@@ -254,7 +106,7 @@
 	add2 \tmp, @_current_task, \tmp
 	st   \tsk, [\tmp]
 #ifdef CONFIG_ARC_CURR_IN_REG
-	mov r25, \tsk
+	mov gp, \tsk
 #endif
 
 .endm
@@ -269,21 +121,20 @@
 .macro  SET_CURR_TASK_ON_CPU    tsk, tmp
 	st  \tsk, [@_current_task]
 #ifdef CONFIG_ARC_CURR_IN_REG
-	mov r25, \tsk
+	mov gp, \tsk
 #endif
 .endm
 
 #endif /* SMP / UNI */
 
-/* ------------------------------------------------------------------
+/*
  * Get the ptr to some field of Current Task at @off in task struct
- *  -Uses r25 for Current task ptr if that is enabled
+ *  - Uses current task cached in reg if enabled
  */
-
 #ifdef CONFIG_ARC_CURR_IN_REG
 
 .macro GET_CURR_TASK_FIELD_PTR  off,  reg
-	add \reg, r25, \off
+	add \reg, gp, \off
 .endm
 
 #else
@@ -295,4 +146,23 @@
 
 #endif	/* CONFIG_ARC_CURR_IN_REG */
 
+#else	/* !__ASSEMBLY__ */
+
+extern void do_signal(struct pt_regs *);
+extern void do_notify_resume(struct pt_regs *);
+extern int do_privilege_fault(unsigned long, struct pt_regs *);
+extern int do_extension_fault(unsigned long, struct pt_regs *);
+extern int insterror_is_error(unsigned long, struct pt_regs *);
+extern int do_memory_error(unsigned long, struct pt_regs *);
+extern int trap_is_brkpt(unsigned long, struct pt_regs *);
+extern int do_misaligned_error(unsigned long, struct pt_regs *);
+extern int do_trap5_error(unsigned long, struct pt_regs *);
+extern int do_misaligned_access(unsigned long, struct pt_regs *, struct callee_regs *);
+extern void do_machine_check_fault(unsigned long, struct pt_regs *);
+extern void do_non_swi_trap(unsigned long, struct pt_regs *);
+extern void do_insterror_or_kprobe(unsigned long, struct pt_regs *);
+extern void do_page_fault(unsigned long, struct pt_regs *);
+
+#endif
+
 #endif  /* __ASM_ARC_ENTRY_H */
diff --git a/arch/arc/include/asm/fb.h b/arch/arc/include/asm/fb.h
deleted file mode 100644
index dc2e303cdbbb..000000000000
--- a/arch/arc/include/asm/fb.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-
-#include <linux/fb.h>
-#include <linux/fs.h>
-#include <asm/page.h>
-
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
-				unsigned long off)
-{
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-}
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_FB_H_ */
diff --git a/arch/arc/include/asm/fpu.h b/arch/arc/include/asm/fpu.h
new file mode 100644
index 000000000000..006bcf88a7a5
--- /dev/null
+++ b/arch/arc/include/asm/fpu.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ *
+ */
+
+#ifndef _ASM_ARC_FPU_H
+#define _ASM_ARC_FPU_H
+
+#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
+
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_ISA_ARCOMPACT
+
+/* These DPFP regs need to be saved/restored across ctx-sw */
+struct arc_fpu {
+	struct {
+		unsigned int l, h;
+	} aux_dpfp[2];
+};
+
+#define fpu_init_task(regs)
+
+#else
+
+/*
+ * ARCv2 FPU Control aux register
+ *   - bits to enable Traps on Exceptions
+ *   - Rounding mode
+ *
+ * ARCv2 FPU Status aux register
+ *   - FPU exceptions flags (Inv, Div-by-Zero, overflow, underflow, inexact)
+ *   - Flag Write Enable to clear flags explicitly (vs. by fpu instructions
+ *     only
+ */
+
+struct arc_fpu {
+	unsigned int ctrl, status;
+};
+
+extern void fpu_init_task(struct pt_regs *regs);
+
+#endif	/* !CONFIG_ISA_ARCOMPACT */
+
+struct task_struct;
+
+extern void fpu_save_restore(struct task_struct *p, struct task_struct *n);
+
+#else	/* !CONFIG_ARC_FPU_SAVE_RESTORE */
+
+#define fpu_save_restore(p, n)
+#define fpu_init_task(regs)
+
+#endif	/* CONFIG_ARC_FPU_SAVE_RESTORE */
+
+#endif	/* _ASM_ARC_FPU_H */
diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h
index 9d0d070e6c22..607d1c16d4dd 100644
--- a/arch/arc/include/asm/futex.h
+++ b/arch/arc/include/asm/futex.h
@@ -75,10 +75,12 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
 {
 	int oldval = 0, ret;
 
+	if (!access_ok(uaddr, sizeof(u32)))
+		return -EFAULT;
+
 #ifndef CONFIG_ARC_HAS_LLSC
 	preempt_disable();	/* to guarantee atomic r-m-w of futex op */
 #endif
-	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -101,7 +103,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
 		ret = -ENOSYS;
 	}
 
-	pagefault_enable();
 #ifndef CONFIG_ARC_HAS_LLSC
 	preempt_enable();
 #endif
diff --git a/arch/arc/include/asm/highmem.h b/arch/arc/include/asm/highmem.h
index 1af00accb37f..a6b8e2c352c4 100644
--- a/arch/arc/include/asm/highmem.h
+++ b/arch/arc/include/asm/highmem.h
@@ -9,49 +9,45 @@
 #ifdef CONFIG_HIGHMEM
 
 #include <uapi/asm/page.h>
-#include <asm/kmap_types.h>
+#include <asm/kmap_size.h>
+
+#define FIXMAP_SIZE		PGDIR_SIZE
+#define PKMAP_SIZE		PGDIR_SIZE
 
 /* start after vmalloc area */
 #define FIXMAP_BASE		(PAGE_OFFSET - FIXMAP_SIZE - PKMAP_SIZE)
-#define FIXMAP_SIZE		PGDIR_SIZE	/* only 1 PGD worth */
-#define KM_TYPE_NR		((FIXMAP_SIZE >> PAGE_SHIFT)/NR_CPUS)
-#define FIXMAP_ADDR(nr)		(FIXMAP_BASE + ((nr) << PAGE_SHIFT))
+
+#define FIX_KMAP_SLOTS		(KM_MAX_IDX * NR_CPUS)
+#define FIX_KMAP_BEGIN		(0UL)
+#define FIX_KMAP_END		((FIX_KMAP_BEGIN + FIX_KMAP_SLOTS) - 1)
+
+#define FIXADDR_TOP		(FIXMAP_BASE + (FIX_KMAP_END << PAGE_SHIFT))
+
+/*
+ * This should be converted to the asm-generic version, but of course this
+ * is needlessly different from all other architectures. Sigh - tglx
+ */
+#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)	(((FIXADDR_TOP - ((x) & PAGE_MASK))) >> PAGE_SHIFT)
 
 /* start after fixmap area */
 #define PKMAP_BASE		(FIXMAP_BASE + FIXMAP_SIZE)
-#define PKMAP_SIZE		PGDIR_SIZE
 #define LAST_PKMAP		(PKMAP_SIZE >> PAGE_SHIFT)
 #define LAST_PKMAP_MASK		(LAST_PKMAP - 1)
 #define PKMAP_ADDR(nr)		(PKMAP_BASE + ((nr) << PAGE_SHIFT))
 #define PKMAP_NR(virt)		(((virt) - PKMAP_BASE) >> PAGE_SHIFT)
 
-#define kmap_prot		PAGE_KERNEL
-
-
 #include <asm/cacheflush.h>
 
-extern void *kmap(struct page *page);
-extern void *kmap_high(struct page *page);
-extern void *kmap_atomic(struct page *page);
-extern void __kunmap_atomic(void *kvaddr);
-extern void kunmap_high(struct page *page);
-
 extern void kmap_init(void);
 
+#define arch_kmap_local_post_unmap(vaddr)			\
+	local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE)
+
 static inline void flush_cache_kmaps(void)
 {
 	flush_cache_all();
 }
-
-static inline void kunmap(struct page *page)
-{
-	BUG_ON(in_interrupt());
-	if (!PageHighMem(page))
-		return;
-	kunmap_high(page);
-}
-
-
 #endif
 
 #endif
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 9a74ce71a767..7765dc105d54 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -8,9 +8,15 @@
 #define _ASM_ARC_HUGEPAGE_H
 
 #include <linux/types.h>
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
+/*
+ * Hugetlb definitions.
+ */
+#define HPAGE_SHIFT		PMD_SHIFT
+#define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+
 static inline pte_t pmd_pte(pmd_t pmd)
 {
 	return __pte(pmd_val(pmd));
@@ -22,21 +28,18 @@ static inline pmd_t pte_pmd(pte_t pte)
 }
 
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
-#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkwrite_novma(pmd)	pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
 #define pmd_mkhuge(pmd)		pte_pmd(pte_mkhuge(pmd_pte(pmd)))
-#define pmd_mknotpresent(pmd)	pte_pmd(pte_mknotpresent(pmd_pte(pmd)))
+#define pmd_mkinvalid(pmd)	pte_pmd(pte_mknotpresent(pmd_pte(pmd)))
 #define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
 
 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
-#define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 
-#define mk_pmd(page, prot)	pte_pmd(mk_pte(page, prot))
-
 #define pmd_trans_huge(pmd)	(pmd_val(pmd) & _PAGE_HW_SZ)
 
 #define pfn_pmd(pfn, prot)	(__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
@@ -59,14 +62,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 				 pmd_t *pmd);
 
-/* Generic variants assume pgtable_t is struct page *, hence need for these */
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				       pgtable_t pgtable);
-
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
-
 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
 extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 				unsigned long end);
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 8f777d6441a5..00171a212b3c 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -9,7 +9,7 @@
 #include <linux/types.h>
 #include <asm/byteorder.h>
 #include <asm/page.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 #ifdef CONFIG_ISA_ARCV2
 #include <asm/barrier.h>
@@ -21,8 +21,9 @@
 #endif
 
 extern void __iomem *ioremap(phys_addr_t paddr, unsigned long size);
-extern void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
-				  unsigned long flags);
+#define ioremap ioremap
+#define ioremap_prot ioremap_prot
+#define iounmap iounmap
 static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
 {
 	return (void __iomem *)port;
@@ -32,8 +33,6 @@ static inline void ioport_unmap(void __iomem *addr)
 {
 }
 
-extern void iounmap(const void __iomem *addr);
-
 /*
  * io{read,write}{16,32}be() macros
  */
@@ -43,9 +42,6 @@ extern void iounmap(const void __iomem *addr);
 #define iowrite16be(v,p)	({ __iowmb(); __raw_writew((__force u16)cpu_to_be16(v), p); })
 #define iowrite32be(v,p)	({ __iowmb(); __raw_writel((__force u32)cpu_to_be32(v), p); })
 
-/* Change struct page to physical address */
-#define page_to_phys(page)		(page_to_pfn(page) << PAGE_SHIFT)
-
 #define __raw_readb __raw_readb
 static inline u8 __raw_readb(const volatile void __iomem *addr)
 {
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 0309cb405cfb..9cd79263acba 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -10,7 +10,7 @@
  * ARCv2 can support 240 interrupts in the core interrupts controllers and
  * 128 interrupts in IDU. Thus 512 virtual IRQs must be enough for most
  * configurations of boards.
- * This doesnt affect ARCompact, but we change it to same value
+ * This doesn't affect ARCompact, but we change it to same value
  */
 #define NR_IRQS		512
 
@@ -25,5 +25,6 @@
 #include <asm-generic/irq.h>
 
 extern void arc_init_IRQ(void);
+extern void arch_do_IRQ(unsigned int, struct pt_regs *);
 
 #endif
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index 7fc73fef5e29..936a2f21f315 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -46,12 +46,16 @@
  * IRQ Control Macros
  *
  * All of them have "memory" clobber (compiler barrier) which is needed to
- * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
+ * ensure that LD/ST requiring irq safety (R-M-W when LLSC is not available)
  * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
  *
  * Noted at the time of Abilis Timer List corruption
- *	Orig Bug + Rejected solution	: https://lkml.org/lkml/2013/3/29/67
- *	Reasoning			: https://lkml.org/lkml/2013/4/8/15
+ *
+ * Orig Bug + Rejected solution:
+ * https://lore.kernel.org/lkml/1364553218-31255-1-git-send-email-vgupta@synopsys.com
+ *
+ * Reasoning:
+ * https://lore.kernel.org/lkml/CA+55aFyFWjpSVQM6M266tKrG_ZXJzZ-nYejpmXYQXbrr42mGPQ@mail.gmail.com
  *
  ******************************************************************/
 
@@ -90,6 +94,9 @@ static inline void arch_local_irq_restore(unsigned long flags)
 /*
  * Unconditionally Enable IRQs
  */
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+extern void arch_local_irq_enable(void);
+#else
 static inline void arch_local_irq_enable(void)
 {
 	unsigned long temp;
@@ -102,7 +109,7 @@ static inline void arch_local_irq_enable(void)
 	: "n"((STATUS_E1_MASK | STATUS_E2_MASK))
 	: "cc", "memory");
 }
-
+#endif
 
 /*
  * Unconditionally Disable IRQs
diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h
index 9d9618079739..a339223d9e05 100644
--- a/arch/arc/include/asm/jump_label.h
+++ b/arch/arc/include/asm/jump_label.h
@@ -31,7 +31,7 @@
 static __always_inline bool arch_static_branch(struct static_key *key,
 					       bool branch)
 {
-	asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)"	\n"
+	asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)"		\n"
 		 "1:							\n"
 		 "nop							\n"
 		 ".pushsection __jump_table, \"aw\"			\n"
@@ -47,7 +47,7 @@ l_yes:
 static __always_inline bool arch_static_branch_jump(struct static_key *key,
 						    bool branch)
 {
-	asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)"	\n"
+	asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)"		\n"
 		 "1:							\n"
 		 "b %l[l_yes]						\n"
 		 ".pushsection __jump_table, \"aw\"			\n"
diff --git a/arch/arc/include/asm/kmap_types.h b/arch/arc/include/asm/kmap_types.h
deleted file mode 100644
index fecf7851ec32..000000000000
--- a/arch/arc/include/asm/kmap_types.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-/*
- * We primarily need to define KM_TYPE_NR here but that in turn
- * is a function of PGDIR_SIZE etc.
- * To avoid circular deps issue, put everything in asm/highmem.h
- */
-#endif
diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h
index 2134721dce44..68e8301c0df2 100644
--- a/arch/arc/include/asm/kprobes.h
+++ b/arch/arc/include/asm/kprobes.h
@@ -32,9 +32,6 @@ struct kprobe;
 
 void arch_remove_kprobe(struct kprobe *p);
 
-int kprobe_exceptions_notify(struct notifier_block *self,
-			     unsigned long val, void *data);
-
 struct prev_kprobe {
 	struct kprobe *kp;
 	unsigned long status;
@@ -46,7 +43,7 @@ struct kprobe_ctlblk {
 };
 
 int kprobe_fault_handler(struct pt_regs *regs, unsigned long cause);
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
 void trap_is_kprobe(unsigned long address, struct pt_regs *regs);
 #else
 #define trap_is_kprobe(address, regs)
diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h
index d9ee43c6b7db..8a3fb71e9cfa 100644
--- a/arch/arc/include/asm/linkage.h
+++ b/arch/arc/include/asm/linkage.h
@@ -8,6 +8,10 @@
 
 #include <asm/dwarf.h>
 
+#define ASM_NL		 `	/* use '`' to mark new line in macro */
+#define __ALIGN		.align 4
+#define __ALIGN_STR	__stringify(__ALIGN)
+
 #ifdef __ASSEMBLY__
 
 .macro ST2 e, o, off
@@ -28,8 +32,6 @@
 #endif
 .endm
 
-#define ASM_NL		 `	/* use '`' to mark new line in macro */
-
 /* annotation for data we want in DCCM - if enabled in .config */
 .macro ARCFP_DATA nm
 #ifdef CONFIG_ARC_HAS_DCCM
@@ -62,15 +64,15 @@
 #else	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_ARC_HAS_ICCM
-#define __arcfp_code __section(.text.arcfp)
+#define __arcfp_code __section(".text.arcfp")
 #else
-#define __arcfp_code __section(.text)
+#define __arcfp_code __section(".text")
 #endif
 
 #ifdef CONFIG_ARC_HAS_DCCM
-#define __arcfp_data __section(.data.arcfp)
+#define __arcfp_data __section(".data.arcfp")
 #else
-#define __arcfp_data __section(.data)
+#define __arcfp_data __section(".data")
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h
index 73746ed5b834..c4e197059379 100644
--- a/arch/arc/include/asm/mach_desc.h
+++ b/arch/arc/include/asm/mach_desc.h
@@ -53,7 +53,7 @@ extern const struct machine_desc __arch_info_begin[], __arch_info_end[];
  */
 #define MACHINE_START(_type, _name)			\
 static const struct machine_desc __mach_desc_##_type	\
-__used __section(.arch.info.init) = {			\
+__used __section(".arch.info.init") = {			\
 	.name		= _name,
 
 #define MACHINE_END				\
diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h
new file mode 100644
index 000000000000..41412642f279
--- /dev/null
+++ b/arch/arc/include/asm/mmu-arcv2.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012, 2019-20 Synopsys, Inc. (www.synopsys.com)
+ *
+ * MMUv3 (arc700) / MMUv4 (archs) are software page walked and software managed.
+ * This file contains the TLB access registers and commands
+ */
+
+#ifndef _ASM_ARC_MMU_ARCV2_H
+#define _ASM_ARC_MMU_ARCV2_H
+
+#include <soc/arc/arc_aux.h>
+
+/*
+ * TLB Management regs
+ */
+#define ARC_REG_MMU_BCR		0x06f
+
+#ifdef CONFIG_ARC_MMU_V3
+#define ARC_REG_TLBPD0		0x405
+#define ARC_REG_TLBPD1		0x406
+#define ARC_REG_TLBPD1HI	0	/* Dummy: allows common code */
+#define ARC_REG_TLBINDEX	0x407
+#define ARC_REG_TLBCOMMAND	0x408
+#define ARC_REG_PID		0x409
+#define ARC_REG_SCRATCH_DATA0	0x418
+#else
+#define ARC_REG_TLBPD0		0x460
+#define ARC_REG_TLBPD1		0x461
+#define ARC_REG_TLBPD1HI	0x463
+#define ARC_REG_TLBINDEX	0x464
+#define ARC_REG_TLBCOMMAND	0x465
+#define ARC_REG_PID		0x468
+#define ARC_REG_SCRATCH_DATA0	0x46c
+#endif
+
+/* Bits in MMU PID reg */
+#define __TLB_ENABLE		(1 << 31)
+#define __PROG_ENABLE		(1 << 30)
+#define MMU_ENABLE		(__TLB_ENABLE | __PROG_ENABLE)
+
+/* Bits in TLB Index reg */
+#define TLB_LKUP_ERR		0x80000000
+
+#ifdef CONFIG_ARC_MMU_V3
+#define TLB_DUP_ERR		(TLB_LKUP_ERR | 0x00000001)
+#else
+#define TLB_DUP_ERR		(TLB_LKUP_ERR | 0x40000000)
+#endif
+
+/*
+ * TLB Commands
+ */
+#define TLBWrite    		0x1
+#define TLBRead     		0x2
+#define TLBGetIndex 		0x3
+#define TLBProbe    		0x4
+#define TLBWriteNI		0x5  /* write JTLB without inv uTLBs */
+#define TLBIVUTLB		0x6  /* explicitly inv uTLBs */
+
+#ifdef CONFIG_ARC_MMU_V4
+#define TLBInsertEntry		0x7
+#define TLBDeleteEntry		0x8
+#endif
+
+/* Masks for actual TLB "PD"s */
+#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
+#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+
+#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK_PHYS | _PAGE_CACHEABLE)
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+extern int pae40_exist_but_not_enab(void);
+
+static inline int is_pae40_enabled(void)
+{
+	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
+}
+
+static inline void mmu_setup_asid(struct mm_struct *mm, unsigned long asid)
+{
+	write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE);
+}
+
+static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
+{
+	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
+#ifdef CONFIG_ISA_ARCV2
+	write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd);
+#endif
+}
+
+#else
+
+.macro ARC_MMU_REENABLE reg
+	lr \reg, [ARC_REG_PID]
+	or \reg, \reg, MMU_ENABLE
+	sr \reg, [ARC_REG_PID]
+.endm
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 26b731d32a2b..4ae2db59d494 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -7,98 +7,18 @@
 #define _ASM_ARC_MMU_H
 
 #ifndef __ASSEMBLY__
-#include <linux/threads.h>	/* NR_CPUS */
-#endif
-
-#if defined(CONFIG_ARC_MMU_V1)
-#define CONFIG_ARC_MMU_VER 1
-#elif defined(CONFIG_ARC_MMU_V2)
-#define CONFIG_ARC_MMU_VER 2
-#elif defined(CONFIG_ARC_MMU_V3)
-#define CONFIG_ARC_MMU_VER 3
-#elif defined(CONFIG_ARC_MMU_V4)
-#define CONFIG_ARC_MMU_VER 4
-#endif
-
-/* MMU Management regs */
-#define ARC_REG_MMU_BCR		0x06f
-#if (CONFIG_ARC_MMU_VER < 4)
-#define ARC_REG_TLBPD0		0x405
-#define ARC_REG_TLBPD1		0x406
-#define ARC_REG_TLBPD1HI	0	/* Dummy: allows code sharing with ARC700 */
-#define ARC_REG_TLBINDEX	0x407
-#define ARC_REG_TLBCOMMAND	0x408
-#define ARC_REG_PID		0x409
-#define ARC_REG_SCRATCH_DATA0	0x418
-#else
-#define ARC_REG_TLBPD0		0x460
-#define ARC_REG_TLBPD1		0x461
-#define ARC_REG_TLBPD1HI	0x463
-#define ARC_REG_TLBINDEX	0x464
-#define ARC_REG_TLBCOMMAND	0x465
-#define ARC_REG_PID		0x468
-#define ARC_REG_SCRATCH_DATA0	0x46c
-#endif
-
-#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP)
-#define	ARC_USE_SCRATCH_REG
-#endif
-
-/* Bits in MMU PID register */
-#define __TLB_ENABLE		(1 << 31)
-#define __PROG_ENABLE		(1 << 30)
-#define MMU_ENABLE		(__TLB_ENABLE | __PROG_ENABLE)
-
-/* Error code if probe fails */
-#define TLB_LKUP_ERR		0x80000000
-
-#if (CONFIG_ARC_MMU_VER < 4)
-#define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x00000001)
-#else
-#define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x40000000)
-#endif
 
-/* TLB Commands */
-#define TLBWrite    0x1
-#define TLBRead     0x2
-#define TLBGetIndex 0x3
-#define TLBProbe    0x4
-
-#if (CONFIG_ARC_MMU_VER >= 2)
-#define TLBWriteNI  0x5		/* write JTLB without inv uTLBs */
-#define TLBIVUTLB   0x6		/* explicitly inv uTLBs */
-#else
-#define TLBWriteNI  TLBWrite	/* Not present in hardware, fallback */
-#endif
-
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define TLBInsertEntry	0x7
-#define TLBDeleteEntry	0x8
-#endif
-
-#ifndef __ASSEMBLY__
+#include <linux/threads.h>	/* NR_CPUS */
 
 typedef struct {
 	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
-#else
-#define tlb_paranoid_check(a, b)
-#endif
-
-void arc_mmu_init(void);
-extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-void read_decode_mmu_bcr(void);
+struct pt_regs;
+extern void do_tlb_overlap_fault(unsigned long, unsigned long, struct pt_regs *);
 
-static inline int is_pae40_enabled(void)
-{
-	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
-}
-
-extern int pae40_exist_but_not_enab(void);
+#endif
 
-#endif	/* !__ASSEMBLY__ */
+#include <asm/mmu-arcv2.h>
 
 #endif
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 3a5e6a5b9ed6..9963bb1a5733 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -15,22 +15,23 @@
 #ifndef _ASM_ARC_MMU_CONTEXT_H
 #define _ASM_ARC_MMU_CONTEXT_H
 
-#include <asm/arcregs.h>
-#include <asm/tlb.h>
 #include <linux/sched/mm.h>
 
+#include <asm/tlb.h>
 #include <asm-generic/mm_hooks.h>
 
-/*		ARC700 ASID Management
+/*		ARC ASID Management
+ *
+ * MMU tags TLBs with an 8-bit ASID, avoiding need to flush the TLB on
+ * context-switch.
  *
- * ARC MMU provides 8-bit ASID (0..255) to TAG TLB entries, allowing entries
- * with same vaddr (different tasks) to co-exit. This provides for
- * "Fast Context Switch" i.e. no TLB flush on ctxt-switch
+ * ASID is managed per cpu, so task threads across CPUs can have different
+ * ASID. Global ASID management is needed if hardware supports TLB shootdown
+ * and/or shared TLB across cores, which ARC doesn't.
  *
- * Linux assigns each task a unique ASID. A simple round-robin allocation
- * of H/w ASID is done using software tracker @asid_cpu.
- * When it reaches max 255, the allocation cycle starts afresh by flushing
- * the entire TLB and wrapping ASID back to zero.
+ * Each task is assigned unique ASID, with a simple round-robin allocator
+ * tracked in @asid_cpu. When 8-bit value rolls over,a new cycle is started
+ * over from 0, and TLB is flushed
  *
  * A new allocation cycle, post rollover, could potentially reassign an ASID
  * to a different task. Thus the rule is to refresh the ASID in a new cycle.
@@ -93,7 +94,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	asid_mm(mm, cpu) = asid_cpu(cpu);
 
 set_hw:
-	write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
+	mmu_setup_asid(mm, hw_pid(mm, cpu));
 
 	local_irq_restore(flags);
 }
@@ -102,6 +103,7 @@ set_hw:
  * Initialize the context related info for a new mm_struct
  * instance.
  */
+#define init_new_context init_new_context
 static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
@@ -113,6 +115,7 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	return 0;
 }
 
+#define destroy_context destroy_context
 static inline void destroy_context(struct mm_struct *mm)
 {
 	unsigned long flags;
@@ -144,32 +147,28 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 */
 	cpumask_set_cpu(cpu, mm_cpumask(next));
 
-#ifdef ARC_USE_SCRATCH_REG
-	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
-#endif
+	mmu_setup_pgd(next, next->pgd);
 
 	get_new_mmu_context(next);
 }
 
 /*
- * Called at the time of execve() to get a new ASID
- * Note the subtlety here: get_new_mmu_context() behaves differently here
- * vs. in switch_mm(). Here it always returns a new ASID, because mm has
- * an unallocated "initial" value, while in latter, it moves to a new ASID,
- * only if it was unallocated
+ * activate_mm defaults (in asm-generic) to switch_mm and is called at the
+ * time of execve() to get a new ASID Note the subtlety here:
+ * get_new_mmu_context() behaves differently here vs. in switch_mm(). Here
+ * it always returns a new ASID, because mm has an unallocated "initial"
+ * value, while in latter, it moves to a new ASID, only if it was
+ * unallocated
  */
-#define activate_mm(prev, next)		switch_mm(prev, next, NULL)
 
 /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping
  * for retiring-mm. However destroy_context( ) still needs to do that because
  * between mm_release( ) = >deactive_mm( ) and
  * mmput => .. => __mmdrop( ) => destroy_context( )
- * there is a good chance that task gets sched-out/in, making it's ASID valid
+ * there is a good chance that task gets sched-out/in, making its ASID valid
  * again (this teased me for a whole day).
  */
-#define deactivate_mm(tsk, mm)   do { } while (0)
 
-#define enter_lazy_tlb(mm, tsk)
+#include <asm-generic/mmu_context.h>
 
 #endif /* __ASM_ARC_MMU_CONTEXT_H */
diff --git a/arch/arc/include/asm/mmzone.h b/arch/arc/include/asm/mmzone.h
deleted file mode 100644
index b86b9d1e54dc..000000000000
--- a/arch/arc/include/asm/mmzone.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2016 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef _ASM_ARC_MMZONE_H
-#define _ASM_ARC_MMZONE_H
-
-#ifdef CONFIG_DISCONTIGMEM
-
-extern struct pglist_data node_data[];
-#define NODE_DATA(nid) (&node_data[nid])
-
-static inline int pfn_to_nid(unsigned long pfn)
-{
-	int is_end_low = 1;
-
-	if (IS_ENABLED(CONFIG_ARC_HAS_PAE40))
-		is_end_low = pfn <= virt_to_pfn(0xFFFFFFFFUL);
-
-	/*
-	 * node 0: lowmem:             0x8000_0000   to 0xFFFF_FFFF
-	 * node 1: HIGHMEM w/o  PAE40: 0x0           to 0x7FFF_FFFF
-	 *         HIGHMEM with PAE40: 0x1_0000_0000 to ...
-	 */
-	if (pfn >= ARCH_PFN_OFFSET && is_end_low)
-		return 0;
-
-	return 1;
-}
-
-static inline int pfn_valid(unsigned long pfn)
-{
-	int nid = pfn_to_nid(pfn);
-
-	return (pfn <= node_end_pfn(nid));
-}
-#endif /* CONFIG_DISCONTIGMEM  */
-
-#endif
diff --git a/arch/arc/include/asm/module.h b/arch/arc/include/asm/module.h
index 48f13a4ace4b..f534a1fef070 100644
--- a/arch/arc/include/asm/module.h
+++ b/arch/arc/include/asm/module.h
@@ -3,7 +3,6 @@
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  *
  * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
-
  */
 
 #ifndef _ASM_ARC_MODULE_H
@@ -19,8 +18,4 @@ struct mod_arch_specific {
 	const char *secstr;
 };
 
-#define MODULE_PROC_FAMILY "ARC700"
-
-#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
-
 #endif /* _ASM_ARC_MODULE_H */
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 0a32e8cfd074..def0dfb95b43 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -7,9 +7,22 @@
 
 #include <uapi/asm/page.h>
 
+#ifdef CONFIG_ARC_HAS_PAE40
+
+#define MAX_POSSIBLE_PHYSMEM_BITS	40
+#define PAGE_MASK_PHYS			(0xff00000000ull | PAGE_MASK)
+
+#else /* CONFIG_ARC_HAS_PAE40 */
+
+#define MAX_POSSIBLE_PHYSMEM_BITS	32
+#define PAGE_MASK_PHYS			PAGE_MASK
+
+#endif /* CONFIG_ARC_HAS_PAE40 */
+
 #ifndef __ASSEMBLY__
 
 #define clear_page(paddr)		memset((paddr), 0, PAGE_SIZE)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 #define copy_page(to, from)		memcpy((to), (from), PAGE_SIZE)
 
 struct vm_area_struct;
@@ -21,72 +34,74 @@ void copy_user_highpage(struct page *to, struct page *from,
 			unsigned long u_vaddr, struct vm_area_struct *vma);
 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page);
 
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
-/*
- * These are used to make use of C type-checking..
- */
-typedef struct {
-#ifdef CONFIG_ARC_HAS_PAE40
-	unsigned long long pte;
-#else
-	unsigned long pte;
-#endif
-} pte_t;
 typedef struct {
 	unsigned long pgd;
 } pgd_t;
+
+#define pgd_val(x)	((x).pgd)
+#define __pgd(x)	((pgd_t) { (x) })
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
 typedef struct {
-	unsigned long pgprot;
-} pgprot_t;
+	unsigned long pud;
+} pud_t;
+
+#define pud_val(x)      	((x).pud)
+#define __pud(x)        	((pud_t) { (x) })
+
+#endif
 
-#define pte_val(x)      ((x).pte)
-#define pgd_val(x)      ((x).pgd)
-#define pgprot_val(x)   ((x).pgprot)
+#if CONFIG_PGTABLE_LEVELS > 2
 
-#define __pte(x)        ((pte_t) { (x) })
-#define __pgd(x)        ((pgd_t) { (x) })
-#define __pgprot(x)     ((pgprot_t) { (x) })
+typedef struct {
+	unsigned long pmd;
+} pmd_t;
 
-#define pte_pgprot(x) __pgprot(pte_val(x))
+#define pmd_val(x)	((x).pmd)
+#define __pmd(x)	((pmd_t) { (x) })
 
-#else /* !STRICT_MM_TYPECHECKS */
+#endif
 
+typedef struct {
 #ifdef CONFIG_ARC_HAS_PAE40
-typedef unsigned long long pte_t;
+	unsigned long long pte;
 #else
-typedef unsigned long pte_t;
+	unsigned long pte;
 #endif
-typedef unsigned long pgd_t;
-typedef unsigned long pgprot_t;
+} pte_t;
 
-#define pte_val(x)	(x)
-#define pgd_val(x)	(x)
-#define pgprot_val(x)	(x)
-#define __pte(x)	(x)
-#define __pgd(x)	(x)
-#define __pgprot(x)	(x)
-#define pte_pgprot(x)	(x)
+#define pte_val(x)	((x).pte)
+#define __pte(x)	((pte_t) { (x) })
 
-#endif
+typedef struct {
+	unsigned long pgprot;
+} pgprot_t;
+
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) })
+#define pte_pgprot(x)	__pgprot(pte_val(x))
 
-typedef pte_t * pgtable_t;
+typedef struct page *pgtable_t;
 
 /*
- * Use virt_to_pfn with caution:
- * If used in pte or paddr related macros, it could cause truncation
- * in PAE40 builds
- * As a rule of thumb, only use it in helpers starting with virt_
- * You have been warned !
+ * When HIGHMEM is enabled we have holes in the memory map so we need
+ * pfn_valid() that takes into account the actual extents of the physical
+ * memory
  */
-#define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
+#ifdef CONFIG_HIGHMEM
 
-#define ARCH_PFN_OFFSET		virt_to_pfn(CONFIG_LINUX_RAM_BASE)
+extern unsigned long arch_pfn_offset;
+#define ARCH_PFN_OFFSET		arch_pfn_offset
 
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn)		(((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
-#endif
+extern int pfn_valid(unsigned long pfn);
+#define pfn_valid		pfn_valid
+
+#else /* CONFIG_HIGHMEM */
+
+#define ARCH_PFN_OFFSET		virt_to_pfn((void *)CONFIG_LINUX_RAM_BASE)
+
+#endif /* CONFIG_HIGHMEM */
 
 /*
  * __pa, __va, virt_to_page (ALERT: deprecated, don't use them)
@@ -95,14 +110,26 @@ typedef pte_t * pgtable_t;
  * virt here means link-address/program-address as embedded in object code.
  * And for ARC, link-addr = physical address
  */
-#define __pa(vaddr)  ((unsigned long)(vaddr))
-#define __va(paddr)  ((void *)((unsigned long)(paddr)))
+#define __pa(vaddr)  		((unsigned long)(vaddr))
+#define __va(paddr)  		((void *)((unsigned long)(paddr)))
+
+/*
+ * Use virt_to_pfn with caution:
+ * If used in pte or paddr related macros, it could cause truncation
+ * in PAE40 builds
+ * As a rule of thumb, only use it in helpers starting with virt_
+ * You have been warned !
+ */
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+	return __pa(kaddr) >> PAGE_SHIFT;
+}
 
 #define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define virt_addr_valid(kaddr)  pfn_valid(virt_to_pfn(kaddr))
 
 /* Default Permissions for stack/heaps pages (Non Executable) */
-#define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_NON_EXEC
 
 #define WANT_PAGE_VIRTUAL   1
 
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h
index 30b9ae511ea9..d5719a260864 100644
--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@@ -63,166 +63,8 @@ struct arc_reg_cc_build {
 
 #define PERF_COUNT_ARC_HW_MAX	(PERF_COUNT_HW_MAX + 8)
 
-/*
- * Some ARC pct quirks:
- *
- * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
- * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
- *	The ARC 700 can either measure stalls per pipeline stage, or all stalls
- *	combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
- *	and all pipeline flushes (e.g. caused by mispredicts, etc.) to
- *	STALLED_CYCLES_FRONTEND.
- *
- *	We could start multiple performance counters and combine everything
- *	afterwards, but that makes it complicated.
- *
- *	Note that I$ cache misses aren't counted by either of the two!
- */
-
-/*
- * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
- * (based on a specific RTL build)
- * Below is the static map between perf generic/arc specific event_id and
- * h/w condition names.
- * At the time of probe, we loop thru each index and find it's name to
- * complete the mapping of perf event_id to h/w index as latter is needed
- * to program the counter really
- */
-static const char * const arc_pmu_ev_hw_map[] = {
-	/* count cycles */
-	[PERF_COUNT_HW_CPU_CYCLES] = "crun",
-	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
-	[PERF_COUNT_HW_BUS_CYCLES] = "crun",
-
-	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
-	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
-
-	/* counts condition */
-	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
-	/* All jump instructions that are taken */
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
-#ifdef CONFIG_ISA_ARCV2
-	[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
-#else
-	[PERF_COUNT_ARC_BPOK]         = "bpok",	  /* NP-NT, PT-T, PNT-NT */
-	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
+#ifdef CONFIG_PERF_EVENTS
+#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
 #endif
-	[PERF_COUNT_ARC_LDC] = "imemrdc",	/* Instr: mem read cached */
-	[PERF_COUNT_ARC_STC] = "imemwrc",	/* Instr: mem write cached */
-
-	[PERF_COUNT_ARC_DCLM] = "dclm",		/* D-cache Load Miss */
-	[PERF_COUNT_ARC_DCSM] = "dcsm",		/* D-cache Store Miss */
-	[PERF_COUNT_ARC_ICM] = "icm",		/* I-cache Miss */
-	[PERF_COUNT_ARC_EDTLB] = "edtlb",	/* D-TLB Miss */
-	[PERF_COUNT_ARC_EITLB] = "eitlb",	/* I-TLB Miss */
-
-	[PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc",	/* Instr: mem read cached */
-	[PERF_COUNT_HW_CACHE_MISSES] = "dclm",		/* D-cache Load Miss */
-};
-
-#define C(_x)			PERF_COUNT_HW_CACHE_##_x
-#define CACHE_OP_UNSUPPORTED	0xffff
-
-static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
-	[C(L1D)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
-			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_STC,
-			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(L1I)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= PERF_COUNT_HW_INSTRUCTIONS,
-			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(LL)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(DTLB)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
-			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB,
-		},
-			/* DTLB LD/ST Miss not segregated by h/w*/
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(ITLB)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EITLB,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(BPU)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
-			[C(RESULT_MISS)]	= PERF_COUNT_HW_BRANCH_MISSES,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(NODE)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-};
 
 #endif /* __ASM_PERF_EVENT_H */
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index b747f2ec2928..dfae070fe8d5 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -18,10 +18,10 @@
  * vineetg: April 2010
  *  -Switched pgtable_t from being struct page * to unsigned long
  *      =Needed so that Page Table allocator (pte_alloc_one) is not forced to
- *       to deal with struct page. Thay way in future we can make it allocate
+ *       deal with struct page. That way in future we can make it allocate
  *       multiple PG Tbls in one Page Frame
  *      =sweet side effect is avoiding calls to ugly page_address( ) from the
- *       pg-tlb allocator sub-sys (pte_alloc_one, ptr_free, pmd_populate
+ *       pg-tlb allocator sub-sys (pte_alloc_one, ptr_free, pmd_populate)
  *
  *  Amit Bhor, Sameer Dhavale: Codito Technologies 2004
  */
@@ -31,104 +31,62 @@
 
 #include <linux/mm.h>
 #include <linux/log2.h>
+#include <asm-generic/pgalloc.h>
 
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 {
-	pmd_set(pmd, pte);
+	/*
+	 * The cast to long below is OK in 32-bit PAE40 regime with long long pte
+	 * Despite "wider" pte, the pte table needs to be in non-PAE low memory
+	 * as all higher levels can only hold long pointers.
+	 *
+	 * The cast itself is needed given simplistic definition of set_pmd()
+	 */
+	set_pmd(pmd, __pmd((unsigned long)pte));
 }
 
-static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
-{
-	pmd_set(pmd, (pte_t *) ptep);
-}
-
-static inline int __get_order_pgd(void)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page)
 {
-	return get_order(PTRS_PER_PGD * sizeof(pgd_t));
+	set_pmd(pmd, __pmd((unsigned long)page_address(pte_page)));
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	int num, num2;
-	pgd_t *ret = (pgd_t *) __get_free_pages(GFP_KERNEL, __get_order_pgd());
+	pgd_t *ret = __pgd_alloc(mm, 0);
 
 	if (ret) {
-		num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
-		memzero(ret, num * sizeof(pgd_t));
+		int num, num2;
 
+		num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
 		num2 = VMALLOC_SIZE / PGDIR_SIZE;
 		memcpy(ret + num, swapper_pg_dir + num, num2 * sizeof(pgd_t));
-
-		memzero(ret + num + num2,
-			       (PTRS_PER_PGD - num - num2) * sizeof(pgd_t));
-
 	}
 	return ret;
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_pages((unsigned long)pgd, __get_order_pgd());
-}
-
+#if CONFIG_PGTABLE_LEVELS > 3
 
-/*
- * With software-only page-tables, addr-split for traversal is tweakable and
- * that directly governs how big tables would be at each level.
- * Further, the MMU page size is configurable.
- * Thus we need to programatically assert the size constraint
- * All of this is const math, allowing gcc to do constant folding/propagation.
- */
-
-static inline int __get_order_pte(void)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
 {
-	return get_order(PTRS_PER_PTE * sizeof(pte_t));
+	set_p4d(p4dp, __p4d((unsigned long)pudp));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-	pte_t *pte;
+#define __pud_free_tlb(tlb, pmd, addr)  pud_free((tlb)->mm, pmd)
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					 __get_order_pte());
+#endif
 
-	return pte;
-}
+#if CONFIG_PGTABLE_LEVELS > 2
 
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
 {
-	pgtable_t pte_pg;
-	struct page *page;
-
-	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
-	if (!pte_pg)
-		return 0;
-	memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
-	page = virt_to_page(pte_pg);
-	if (!pgtable_pte_page_ctor(page)) {
-		__free_page(page);
-		return 0;
-	}
-
-	return pte_pg;
+	set_pud(pudp, __pud((unsigned long)pmdp));
 }
 
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	free_pages((unsigned long)pte, __get_order_pte()); /* takes phy addr */
-}
+#define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
 
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
-{
-	pgtable_pte_page_dtor(virt_to_page(ptep));
-	free_pages((unsigned long)ptep, __get_order_pte());
-}
+#endif
 
 #define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
 
-#define pmd_pgtable(pmd)	((pgtable_t) pmd_page_vaddr(pmd))
-
 #endif /* _ASM_ARC_PGALLOC_H */
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
new file mode 100644
index 000000000000..8ebec1b21d24
--- /dev/null
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * page table flags for software walked/managed MMUv3 (ARC700) and MMUv4 (HS)
+ * There correspond to the corresponding bits in the TLB
+ */
+
+#ifndef _ASM_ARC_PGTABLE_BITS_ARCV2_H
+#define _ASM_ARC_PGTABLE_BITS_ARCV2_H
+
+#ifdef CONFIG_ARC_CACHE_PAGES
+#define _PAGE_CACHEABLE		(1 << 0)  /* Cached (H) */
+#else
+#define _PAGE_CACHEABLE		0
+#endif
+
+#define _PAGE_EXECUTE		(1 << 1)  /* User Execute  (H) */
+#define _PAGE_WRITE		(1 << 2)  /* User Write    (H) */
+#define _PAGE_READ		(1 << 3)  /* User Read     (H) */
+#define _PAGE_ACCESSED		(1 << 4)  /* Accessed      (s) */
+#define _PAGE_DIRTY		(1 << 5)  /* Modified      (s) */
+#define _PAGE_SPECIAL		(1 << 6)
+#define _PAGE_GLOBAL		(1 << 8)  /* ASID agnostic (H) */
+#define _PAGE_PRESENT		(1 << 9)  /* PTE/TLB Valid (H) */
+
+/* We borrow bit 5 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE	_PAGE_DIRTY
+
+#ifdef CONFIG_ARC_MMU_V4
+#define _PAGE_HW_SZ		(1 << 10)  /* Normal/super (H) */
+#else
+#define _PAGE_HW_SZ		0
+#endif
+
+/* Defaults for every user page */
+#define ___DEF		(_PAGE_PRESENT | _PAGE_CACHEABLE)
+
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK	(PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
+							   _PAGE_SPECIAL)
+
+/* More Abbrevaited helpers */
+#define PAGE_U_NONE     __pgprot(___DEF)
+#define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
+#define PAGE_U_W_R      __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
+#define PAGE_U_X_R      __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
+#define PAGE_U_X_W_R    __pgprot(___DEF \
+				| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
+#define PAGE_KERNEL     __pgprot(___DEF | _PAGE_GLOBAL \
+				| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
+
+#define PAGE_SHARED	PAGE_U_W_R
+
+#define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
+
+/*
+ * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
+ *
+ * Certain cases have 1:1 mapping
+ *  e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
+ *       which directly corresponds to  PAGE_U_X_R
+ *
+ * Other rules which cause the divergence from 1:1 mapping
+ *
+ *  1. Although ARC700 can do exclusive execute/write protection (meaning R
+ *     can be tracked independently of X/W unlike some other CPUs), still to
+ *     keep things consistent with other archs:
+ *      -Write implies Read:   W => R
+ *      -Execute implies Read: X => R
+ *
+ *  2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
+ *     This is to enable COW mechanism
+ */
+	/* xwr */
+#ifndef __ASSEMBLY__
+
+#define pte_write(pte)		(pte_val(pte) & _PAGE_WRITE)
+#define pte_dirty(pte)		(pte_val(pte) & _PAGE_DIRTY)
+#define pte_young(pte)		(pte_val(pte) & _PAGE_ACCESSED)
+#define pte_special(pte)	(pte_val(pte) & _PAGE_SPECIAL)
+
+#define PTE_BIT_FUNC(fn, op) \
+	static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
+
+PTE_BIT_FUNC(mknotpresent,     &= ~(_PAGE_PRESENT));
+PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
+PTE_BIT_FUNC(mkwrite_novma,	|= (_PAGE_WRITE));
+PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
+PTE_BIT_FUNC(mkdirty,	|= (_PAGE_DIRTY));
+PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
+PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
+PTE_BIT_FUNC(mkspecial,	|= (_PAGE_SPECIAL));
+PTE_BIT_FUNC(mkhuge,	|= (_PAGE_HW_SZ));
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+struct vm_fault;
+void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep, unsigned int nr);
+
+#define update_mmu_cache(vma, addr, ptep) \
+	update_mmu_cache_range(NULL, vma, addr, ptep, 1)
+
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *   <-------------- offset -------------> <--- zero --> E < type ->
+ *
+ *   E is the exclusive marker that is not stored in swap entries.
+ *   The zero'ed bits include _PAGE_PRESENT.
+ */
+#define __swp_entry(type, off)		((swp_entry_t) \
+					{ ((type) & 0x1f) | ((off) << 13) })
+
+/* Decode a PTE containing swap "identifier "into constituents */
+#define __swp_type(pte_lookalike)	(((pte_lookalike).val) & 0x1f)
+#define __swp_offset(pte_lookalike)	((pte_lookalike).val >> 13)
+
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+PTE_BIT_FUNC(swp_mkexclusive, |= (_PAGE_SWP_EXCLUSIVE));
+PTE_BIT_FUNC(swp_clear_exclusive, &= ~(_PAGE_SWP_EXCLUSIVE));
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#include <asm/hugepage.h>
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
new file mode 100644
index 000000000000..d1ce4b0f1071
--- /dev/null
+++ b/arch/arc/include/asm/pgtable-levels.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Helpers for implemenintg paging levels
+ */
+
+#ifndef _ASM_ARC_PGTABLE_LEVELS_H
+#define _ASM_ARC_PGTABLE_LEVELS_H
+
+#if CONFIG_PGTABLE_LEVELS == 2
+
+/*
+ * 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS)
+ *
+ * [31]            32 bit virtual address              [0]
+ * -------------------------------------------------------
+ * |               | <---------- PGDIR_SHIFT ----------> |
+ * |               |                | <-- PAGE_SHIFT --> |
+ * -------------------------------------------------------
+ *       |                  |                |
+ *       |                  |                --> off in page frame
+ *       |                  ---> index into Page Table
+ *       ----> index into Page Directory
+ *
+ * Given software walk, the vaddr split is arbitrary set to 11:8:13
+ * However enabling of super page in a 2 level regime pegs PGDIR_SHIFT to
+ * super page size.
+ */
+
+#if defined(CONFIG_ARC_HUGEPAGE_16M)
+#define PGDIR_SHIFT		24
+#elif defined(CONFIG_ARC_HUGEPAGE_2M)
+#define PGDIR_SHIFT		21
+#else
+/*
+ * No Super page case
+ * Default value provides 11:8:13 (8K), 10:10:12 (4K)
+ * Limits imposed by pgtable_t only PAGE_SIZE long
+ * (so 4K page can only have 1K entries: or 10 bits)
+ */
+#ifdef CONFIG_ARC_PAGE_SIZE_4K
+#define PGDIR_SHIFT		22
+#else
+#define PGDIR_SHIFT		21
+#endif
+
+#endif
+
+#else /* CONFIG_PGTABLE_LEVELS != 2 */
+
+/*
+ * A default 3 level paging testing setup in software walked MMU
+ *   MMUv4 (8K page): <4> : <7> : <8> : <13>
+ * A default 4 level paging testing setup in software walked MMU
+ *   MMUv4 (8K page): <4> : <3> : <4> : <8> : <13>
+ */
+#define PGDIR_SHIFT		28
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SHIFT		25
+#endif
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SHIFT		21
+#endif
+
+#endif /* CONFIG_PGTABLE_LEVELS */
+
+#define PGDIR_SIZE		BIT(PGDIR_SHIFT)
+#define PGDIR_MASK		(~(PGDIR_SIZE - 1))
+#define PTRS_PER_PGD		BIT(32 - PGDIR_SHIFT)
+
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SIZE		BIT(PUD_SHIFT)
+#define PUD_MASK		(~(PUD_SIZE - 1))
+#define PTRS_PER_PUD		BIT(PGDIR_SHIFT - PUD_SHIFT)
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SIZE		BIT(PMD_SHIFT)
+#define PMD_MASK		(~(PMD_SIZE - 1))
+#define PTRS_PER_PMD		BIT(PUD_SHIFT - PMD_SHIFT)
+#endif
+
+#define PTRS_PER_PTE		BIT(PMD_SHIFT - PAGE_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+#if CONFIG_PGTABLE_LEVELS > 3
+#include <asm-generic/pgtable-nop4d.h>
+#elif CONFIG_PGTABLE_LEVELS > 2
+#include <asm-generic/pgtable-nopud.h>
+#else
+#include <asm-generic/pgtable-nopmd.h>
+#endif
+
+/*
+ * 1st level paging: pgd
+ */
+#define pgd_ERROR(e) \
+	pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
+/* In 4 level paging, p4d_* macros work on pgd */
+#define p4d_none(x)		(!p4d_val(x))
+#define p4d_bad(x)		((p4d_val(x) & ~PAGE_MASK))
+#define p4d_present(x)		(p4d_val(x))
+#define p4d_clear(xp)		do { p4d_val(*(xp)) = 0; } while (0)
+#define p4d_pgtable(p4d)	((pud_t *)(p4d_val(p4d) & PAGE_MASK))
+#define p4d_page(p4d)		virt_to_page(p4d_pgtable(p4d))
+#define set_p4d(p4dp, p4d)	(*(p4dp) = p4d)
+
+/*
+ * 2nd level paging: pud
+ */
+#define pud_ERROR(e) \
+	pr_crit("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+
+/*
+ * In 3 level paging, pud_* macros work on pgd
+ * In 4 level paging, pud_* macros work on pud
+ */
+#define pud_none(x)		(!pud_val(x))
+#define pud_bad(x)		((pud_val(x) & ~PAGE_MASK))
+#define pud_present(x)		(pud_val(x))
+#define pud_clear(xp)		do { pud_val(*(xp)) = 0; } while (0)
+#define pud_pgtable(pud)	((pmd_t *)(pud_val(pud) & PAGE_MASK))
+#define pud_page(pud)		virt_to_page(pud_pgtable(pud))
+#define set_pud(pudp, pud)	(*(pudp) = pud)
+
+/*
+ * 3rd level paging: pmd
+ */
+#define pmd_ERROR(e) \
+	pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+
+#define pmd_pfn(pmd)		((pmd_val(pmd) & PMD_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+
+#endif
+
+/*
+ * Due to the strange way generic pgtable level folding works, the pmd_* macros
+ *  - are valid even for 2 levels (which supposedly only has pgd - pte)
+ *  - behave differently for 2 vs. 3
+ * In 2  level paging        (pgd -> pte), pmd_* macros work on pgd
+ * In 3+ level paging (pgd -> pmd -> pte), pmd_* macros work on pmd
+ */
+#define pmd_none(x)		(!pmd_val(x))
+#define pmd_bad(x)		((pmd_val(x) & ~PAGE_MASK))
+#define pmd_present(x)		(pmd_val(x))
+#define pmd_clear(xp)		do { pmd_val(*(xp)) = 0; } while (0)
+#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & PAGE_MASK)
+#define pmd_pfn(pmd)		((pmd_val(pmd) & PAGE_MASK) >> PAGE_SHIFT)
+#define pmd_page(pmd)		virt_to_page((void *)pmd_page_vaddr(pmd))
+#define set_pmd(pmdp, pmd)	(*(pmdp) = pmd)
+#define pmd_pgtable(pmd)	((pgtable_t) pmd_page(pmd))
+
+/*
+ * 4th level paging: pte
+ */
+#define pte_ERROR(e) \
+	pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+
+#define PFN_PTE_SHIFT		PAGE_SHIFT
+#define pte_none(x)		(!pte_val(x))
+#define pte_present(x)		(pte_val(x) & _PAGE_PRESENT)
+#define pte_clear(mm,addr,ptep)	set_pte_at(mm, addr, ptep, __pte(0))
+#define pte_page(pte)		pfn_to_page(pte_pfn(pte))
+#define set_pte(ptep, pte)	((*(ptep)) = (pte))
+#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
+#define pfn_pte(pfn, prot)	__pte(__pfn_to_phys(pfn) | pgprot_val(prot))
+
+#ifdef CONFIG_ISA_ARCV2
+#define pmd_leaf(x)		(pmd_val(x) & _PAGE_HW_SZ)
+#endif
+
+#endif	/* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 9019ed9f9c94..4cf45a99fd79 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -1,224 +1,17 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * vineetg: May 2011
- *  -Folded PAGE_PRESENT (used by VM) and PAGE_VALID (used by MMU) into 1.
- *     They are semantically the same although in different contexts
- *     VALID marks a TLB entry exists and it will only happen if PRESENT
- *  - Utilise some unused free bits to confine PTE flags to 12 bits
- *     This is a must for 4k pg-sz
- *
- * vineetg: Mar 2011 - changes to accommodate MMU TLB Page Descriptor mods
- *  -TLB Locking never really existed, except for initial specs
- *  -SILENT_xxx not needed for our port
- *  -Per my request, MMU V3 changes the layout of some of the bits
- *     to avoid a few shifts in TLB Miss handlers.
- *
- * vineetg: April 2010
- *  -PGD entry no longer contains any flags. If empty it is 0, otherwise has
- *   Pg-Tbl ptr. Thus pmd_present(), pmd_valid(), pmd_set( ) become simpler
- *
- * vineetg: April 2010
- *  -Switched form 8:11:13 split for page table lookup to 11:8:13
- *  -this speeds up page table allocation itself as we now have to memset 1K
- *    instead of 8k per page table.
- * -TODO: Right now page table alloc is 8K and rest 7K is unused
- *    need to optimise it
- *
- * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
  */
 
 #ifndef _ASM_ARC_PGTABLE_H
 #define _ASM_ARC_PGTABLE_H
 
 #include <linux/bits.h>
-#include <asm-generic/pgtable-nopmd.h>
-#include <asm/page.h>
-#include <asm/mmu.h>	/* to propagate CONFIG_ARC_MMU_VER <n> */
-
-/**************************************************************************
- * Page Table Flags
- *
- * ARC700 MMU only deals with softare managed TLB entries.
- * Page Tables are purely for Linux VM's consumption and the bits below are
- * suited to that (uniqueness). Hence some are not implemented in the TLB and
- * some have different value in TLB.
- * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in
- *      seperate PD0 and PD1, which combined forms a translation entry)
- *      while for PTE perspective, they are 8 and 9 respectively
- * with MMU v3: Most bits (except SHARED) represent the exact hardware pos
- *      (saves some bit shift ops in TLB Miss hdlrs)
- */
-
-#if (CONFIG_ARC_MMU_VER <= 2)
-
-#define _PAGE_ACCESSED      (1<<1)	/* Page is accessed (S) */
-#define _PAGE_CACHEABLE     (1<<2)	/* Page is cached (H) */
-#define _PAGE_EXECUTE       (1<<3)	/* Page has user execute perm (H) */
-#define _PAGE_WRITE         (1<<4)	/* Page has user write perm (H) */
-#define _PAGE_READ          (1<<5)	/* Page has user read perm (H) */
-#define _PAGE_DIRTY         (1<<6)	/* Page modified (dirty) (S) */
-#define _PAGE_SPECIAL       (1<<7)
-#define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
-#define _PAGE_PRESENT       (1<<10)	/* TLB entry is valid (H) */
-
-#else	/* MMU v3 onwards */
-
-#define _PAGE_CACHEABLE     (1<<0)	/* Page is cached (H) */
-#define _PAGE_EXECUTE       (1<<1)	/* Page has user execute perm (H) */
-#define _PAGE_WRITE         (1<<2)	/* Page has user write perm (H) */
-#define _PAGE_READ          (1<<3)	/* Page has user read perm (H) */
-#define _PAGE_ACCESSED      (1<<4)	/* Page is accessed (S) */
-#define _PAGE_DIRTY         (1<<5)	/* Page modified (dirty) (S) */
-#define _PAGE_SPECIAL       (1<<6)
-
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define _PAGE_WTHRU         (1<<7)	/* Page cache mode write-thru (H) */
-#endif
-
-#define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
-#define _PAGE_PRESENT       (1<<9)	/* TLB entry is valid (H) */
-
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define _PAGE_HW_SZ         (1<<10)	/* Page Size indicator (H): 0 normal, 1 super */
-#endif
-
-#define _PAGE_SHARED_CODE   (1<<11)	/* Shared Code page with cmn vaddr
-					   usable for shared TLB entries (H) */
-
-#define _PAGE_UNUSED_BIT    (1<<12)
-#endif
-
-/* vmalloc permissions */
-#define _K_PAGE_PERMS  (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
-			_PAGE_GLOBAL | _PAGE_PRESENT)
-
-#ifndef CONFIG_ARC_CACHE_PAGES
-#undef _PAGE_CACHEABLE
-#define _PAGE_CACHEABLE 0
-#endif
-
-#ifndef _PAGE_HW_SZ
-#define _PAGE_HW_SZ	0
-#endif
-
-/* Defaults for every user page */
-#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
-
-/* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
 
-/* More Abbrevaited helpers */
-#define PAGE_U_NONE     __pgprot(___DEF)
-#define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
-#define PAGE_U_W_R      __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
-#define PAGE_U_X_R      __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
-#define PAGE_U_X_W_R    __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE | \
-						       _PAGE_EXECUTE)
-
-#define PAGE_SHARED	PAGE_U_W_R
-
-/* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of
- * user vaddr space - visible in all addr spaces, but kernel mode only
- * Thus Global, all-kernel-access, no-user-access, cached
- */
-#define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE)
-
-/* ioremap */
-#define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
-
-/* Masks for actual TLB "PD"s */
-#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
-#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
-
-#ifdef CONFIG_ARC_HAS_PAE40
-#define PTE_BITS_NON_RWX_IN_PD1	(0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
-#else
-#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
-#endif
-
-/**************************************************************************
- * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
- *
- * Certain cases have 1:1 mapping
- *  e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
- *       which directly corresponds to  PAGE_U_X_R
- *
- * Other rules which cause the divergence from 1:1 mapping
- *
- *  1. Although ARC700 can do exclusive execute/write protection (meaning R
- *     can be tracked independet of X/W unlike some other CPUs), still to
- *     keep things consistent with other archs:
- *      -Write implies Read:   W => R
- *      -Execute implies Read: X => R
- *
- *  2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
- *     This is to enable COW mechanism
- */
-	/* xwr */
-#define __P000  PAGE_U_NONE
-#define __P001  PAGE_U_R
-#define __P010  PAGE_U_R	/* Pvt-W => !W */
-#define __P011  PAGE_U_R	/* Pvt-W => !W */
-#define __P100  PAGE_U_X_R	/* X => R */
-#define __P101  PAGE_U_X_R
-#define __P110  PAGE_U_X_R	/* Pvt-W => !W and X => R */
-#define __P111  PAGE_U_X_R	/* Pvt-W => !W */
-
-#define __S000  PAGE_U_NONE
-#define __S001  PAGE_U_R
-#define __S010  PAGE_U_W_R	/* W => R */
-#define __S011  PAGE_U_W_R
-#define __S100  PAGE_U_X_R	/* X => R */
-#define __S101  PAGE_U_X_R
-#define __S110  PAGE_U_X_W_R	/* X => R */
-#define __S111  PAGE_U_X_W_R
-
-/****************************************************************
- * 2 tier (PGD:PTE) software page walker
- *
- * [31]		    32 bit virtual address              [0]
- * -------------------------------------------------------
- * |               | <------------ PGDIR_SHIFT ----------> |
- * |		   |					 |
- * | BITS_FOR_PGD  |  BITS_FOR_PTE  | <-- PAGE_SHIFT --> |
- * -------------------------------------------------------
- *       |                  |                |
- *       |                  |                --> off in page frame
- *       |                  ---> index into Page Table
- *       ----> index into Page Directory
- *
- * In a single page size configuration, only PAGE_SHIFT is fixed
- * So both PGD and PTE sizing can be tweaked
- *  e.g. 8K page (PAGE_SHIFT 13) can have
- *  - PGDIR_SHIFT 21  -> 11:8:13 address split
- *  - PGDIR_SHIFT 24  -> 8:11:13 address split
- *
- * If Super Page is configured, PGDIR_SHIFT becomes fixed too,
- * so the sizing flexibility is gone.
- */
-
-#if defined(CONFIG_ARC_HUGEPAGE_16M)
-#define PGDIR_SHIFT	24
-#elif defined(CONFIG_ARC_HUGEPAGE_2M)
-#define PGDIR_SHIFT	21
-#else
-/*
- * Only Normal page support so "hackable" (see comment above)
- * Default value provides 11:8:13 (8K), 11:9:12 (4K)
- */
-#define PGDIR_SHIFT	21
-#endif
-
-#define BITS_FOR_PTE	(PGDIR_SHIFT - PAGE_SHIFT)
-#define BITS_FOR_PGD	(32 - PGDIR_SHIFT)
-
-#define PGDIR_SIZE	BIT(PGDIR_SHIFT)	/* vaddr span, not PDG sz */
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
-
-#define	PTRS_PER_PTE	BIT(BITS_FOR_PTE)
-#define	PTRS_PER_PGD	BIT(BITS_FOR_PGD)
+#include <asm/pgtable-levels.h>
+#include <asm/pgtable-bits-arcv2.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
 
 /*
  * Number of entries a user land program use.
@@ -226,170 +19,12 @@
  */
 #define	USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
 
-/*
- * No special requirements for lowest virtual address we permit any user space
- * mapping to be mapped at.
- */
-#define FIRST_USER_ADDRESS      0UL
-
-
-/****************************************************************
- * Bucket load of VM Helpers
- */
-
 #ifndef __ASSEMBLY__
 
-#define pte_ERROR(e) \
-	pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
-#define pgd_ERROR(e) \
-	pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-
-/* the zero page used for uninitialized and anonymous pages */
 extern char empty_zero_page[PAGE_SIZE];
 #define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
 
-#define pte_unmap(pte)		do { } while (0)
-#define pte_unmap_nested(pte)		do { } while (0)
-
-#define set_pte(pteptr, pteval)	((*(pteptr)) = (pteval))
-#define set_pmd(pmdptr, pmdval)	(*(pmdptr) = pmdval)
-
-/* find the page descriptor of the Page Tbl ref by PMD entry */
-#define pmd_page(pmd)		virt_to_page(pmd_val(pmd) & PAGE_MASK)
-
-/* find the logical addr (phy for ARC) of the Page Tbl ref by PMD entry */
-#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & PAGE_MASK)
-
-/* In a 2 level sys, setup the PGD entry with PTE value */
-static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
-{
-	pmd_val(*pmdp) = (unsigned long)ptep;
-}
-
-#define pte_none(x)			(!pte_val(x))
-#define pte_present(x)			(pte_val(x) & _PAGE_PRESENT)
-#define pte_clear(mm, addr, ptep)	set_pte_at(mm, addr, ptep, __pte(0))
-
-#define pmd_none(x)			(!pmd_val(x))
-#define	pmd_bad(x)			((pmd_val(x) & ~PAGE_MASK))
-#define pmd_present(x)			(pmd_val(x))
-#define pmd_clear(xp)			do { pmd_val(*(xp)) = 0; } while (0)
-
-#define pte_page(pte)		pfn_to_page(pte_pfn(pte))
-#define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
-#define pfn_pte(pfn, prot)	__pte(__pfn_to_phys(pfn) | pgprot_val(prot))
-
-/* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
-#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
-#define __pte_index(addr)	(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-
-/*
- * pte_offset gets a @ptr to PMD entry (PGD in our 2-tier paging system)
- * and returns ptr to PTE entry corresponding to @addr
- */
-#define pte_offset(dir, addr) ((pte_t *)(pmd_page_vaddr(*dir)) +\
-					 __pte_index(addr))
-
-/* No mapping of Page Tables in high mem etc, so following same as above */
-#define pte_offset_kernel(dir, addr)		pte_offset(dir, addr)
-#define pte_offset_map(dir, addr)		pte_offset(dir, addr)
-
-/* Zoo of pte_xxx function */
-#define pte_read(pte)		(pte_val(pte) & _PAGE_READ)
-#define pte_write(pte)		(pte_val(pte) & _PAGE_WRITE)
-#define pte_dirty(pte)		(pte_val(pte) & _PAGE_DIRTY)
-#define pte_young(pte)		(pte_val(pte) & _PAGE_ACCESSED)
-#define pte_special(pte)	(pte_val(pte) & _PAGE_SPECIAL)
-
-#define PTE_BIT_FUNC(fn, op) \
-	static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
-
-PTE_BIT_FUNC(mknotpresent,	&= ~(_PAGE_PRESENT));
-PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
-PTE_BIT_FUNC(mkwrite,	|= (_PAGE_WRITE));
-PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
-PTE_BIT_FUNC(mkdirty,	|= (_PAGE_DIRTY));
-PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
-PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
-PTE_BIT_FUNC(exprotect,	&= ~(_PAGE_EXECUTE));
-PTE_BIT_FUNC(mkexec,	|= (_PAGE_EXECUTE));
-PTE_BIT_FUNC(mkspecial,	|= (_PAGE_SPECIAL));
-PTE_BIT_FUNC(mkhuge,	|= (_PAGE_HW_SZ));
-
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
-	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
-}
-
-/* Macro to mark a page protection as uncacheable */
-#define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
-
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pteval)
-{
-	set_pte(ptep, pteval);
-}
-
-/*
- * All kernel related VM pages are in init's mm.
- */
-#define pgd_offset_k(address)	pgd_offset(&init_mm, address)
-#define pgd_index(addr)		((addr) >> PGDIR_SHIFT)
-#define pgd_offset(mm, addr)	(((mm)->pgd)+pgd_index(addr))
-
-/*
- * Macro to quickly access the PGD entry, utlising the fact that some
- * arch may cache the pointer to Page Directory of "current" task
- * in a MMU register
- *
- * Thus task->mm->pgd (3 pointer dereferences, cache misses etc simply
- * becomes read a register
- *
- * ********CAUTION*******:
- * Kernel code might be dealing with some mm_struct of NON "current"
- * Thus use this macro only when you are certain that "current" is current
- * e.g. when dealing with signal frame setup code etc
- */
-#ifdef ARC_USE_SCRATCH_REG
-#define pgd_offset_fast(mm, addr)	\
-({					\
-	pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0);  \
-	pgd_base + pgd_index(addr);	\
-})
-#else
-#define pgd_offset_fast(mm, addr)	pgd_offset(mm, addr)
-#endif
-
 extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
-		      pte_t *ptep);
-
-/* Encode swap {type,off} tuple into PTE
- * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
- * PAGE_PRESENT is zero in a PTE holding swap "identifier"
- */
-#define __swp_entry(type, off)	((swp_entry_t) { \
-					((type) & 0x1f) | ((off) << 13) })
-
-/* Decode a PTE containing swap "identifier "into constituents */
-#define __swp_type(pte_lookalike)	(((pte_lookalike).val) & 0x1f)
-#define __swp_offset(pte_lookalike)	((pte_lookalike).val >> 13)
-
-/* NOPs, to keep generic kernel happy */
-#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
-
-#define kern_addr_valid(addr)	(1)
-
-/*
- * remap a physical page `pfn' of size `size' with page protection `prot'
- * into virtual address `from'
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#include <asm/hugepage.h>
-#endif
-
-#include <asm-generic/pgtable.h>
 
 /* to cope with aliasing VIPT cache */
 #define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 706edeaa5583..d606658e2fe7 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -14,42 +14,25 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/ptrace.h>
-
-#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
-/* These DPFP regs need to be saved/restored across ctx-sw */
-struct arc_fpu {
-	struct {
-		unsigned int l, h;
-	} aux_dpfp[2];
-};
-#endif
-
-#ifdef CONFIG_ARC_PLAT_EZNPS
-struct eznps_dp {
-	unsigned int eflags;
-	unsigned int gpa1;
-};
-#endif
+#include <asm/dsp.h>
+#include <asm/fpu.h>
 
 /* Arch specific stuff which needs to be saved per task.
  * However these items are not so important so as to earn a place in
  * struct thread_info
  */
 struct thread_struct {
-	unsigned long ksp;	/* kernel mode stack pointer */
 	unsigned long callee_reg;	/* pointer to callee regs */
 	unsigned long fault_address;	/* dbls as brkpt holder as well */
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+	struct dsp_callee_regs dsp;
+#endif
 #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
 	struct arc_fpu fpu;
 #endif
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	struct eznps_dp dp;
-#endif
 };
 
-#define INIT_THREAD  {                          \
-	.ksp = sizeof(init_stack) + (unsigned long) init_stack, \
-}
+#define INIT_THREAD  { }
 
 /* Forward declaration, a strange C thing */
 struct task_struct;
@@ -57,24 +40,12 @@ struct task_struct;
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_SIZE + (void *)task_stack_page(p)) - 1)
 
-/* Free all resources held by a thread */
-#define release_thread(thread) do { } while (0)
-
 /*
  * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise
  * get optimised away by gcc
  */
-#ifndef CONFIG_EZNPS_MTM_EXT
-
 #define cpu_relax()		barrier()
 
-#else
-
-#define cpu_relax()     \
-	__asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
-
-#endif
-
 #define KSTK_EIP(tsk)   (task_pt_regs(tsk)->ret)
 #define KSTK_ESP(tsk)   (task_pt_regs(tsk)->sp)
 
@@ -82,7 +53,7 @@ struct task_struct;
  * Where about of Task's sp, fp, blink when it was last seen in kernel mode.
  * Look in process.c for details of kernel stack layout
  */
-#define TSK_K_ESP(tsk)		(tsk->thread.ksp)
+#define TSK_K_ESP(tsk)		(task_thread_info(tsk)->ksp)
 
 #define TSK_K_REG(tsk, off)	(*((unsigned long *)(TSK_K_ESP(tsk) + \
 					sizeof(struct callee_regs) + off)))
@@ -93,7 +64,7 @@ struct task_struct;
 extern void start_thread(struct pt_regs * regs, unsigned long pc,
 			 unsigned long usp);
 
-extern unsigned int get_wchan(struct task_struct *p);
+extern unsigned int __get_wchan(struct task_struct *p);
 
 #endif /* !__ASSEMBLY__ */
 
@@ -116,31 +87,13 @@ extern unsigned int get_wchan(struct task_struct *p);
 #define VMALLOC_START	(PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
 
 /* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
-#define VMALLOC_SIZE	((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4)
+#define VMALLOC_SIZE	((CONFIG_ARC_KVADDR_SIZE << 20) - PMD_SIZE * 4)
 
 #define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
 
 #define USER_KERNEL_GUTTER    (VMALLOC_START - TASK_SIZE)
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-/* NPS architecture defines special window of 129M in user address space for
- * special memory areas, when accessing this window the MMU do not use TLB.
- * Instead MMU direct the access to:
- * 0x57f00000:0x57ffffff -- 1M of closely coupled memory (aka CMEM)
- * 0x58000000:0x5fffffff -- 16 huge pages, 8M each, with fixed map (aka FMTs)
- *
- * CMEM - is the fastest memory we got and its size is 16K.
- * FMT  - is used to map either to internal/external memory.
- * Internal memory is the second fast memory and its size is 16M
- * External memory is the biggest memory (16G) and also the slowest.
- *
- * STACK_TOP need to be PMD align (21bit) that is why we supply 0x57e00000.
- */
-#define STACK_TOP       0x57e00000
-#else
 #define STACK_TOP       TASK_SIZE
-#endif
-
 #define STACK_TOP_MAX   STACK_TOP
 
 /* This decides where the kernel will search for a free chunk of vm
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index ba9854ef39e8..cf79df0b2570 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -8,19 +8,26 @@
 #define __ASM_ARC_PTRACE_H
 
 #include <uapi/asm/ptrace.h>
+#include <linux/compiler.h>
 
 #ifndef __ASSEMBLY__
 
+typedef union {
+	struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned long state:8, vec:8, cause:8, param:8;
+#else
+		unsigned long param:8, cause:8, vec:8, state:8;
+#endif
+	};
+	unsigned long full;
+} ecr_reg;
+
 /* THE pt_regs: Defines how regs are saved during entry into kernel */
 
 #ifdef CONFIG_ISA_ARCOMPACT
 struct pt_regs {
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	unsigned long eflags;	/* Extended FLAGS */
-	unsigned long gpa1;	/* General Purpose Aux */
-#endif
-
 	/* Real registers */
 	unsigned long bta;	/* bta_l1, bta_l2, erbta */
 
@@ -44,53 +51,38 @@ struct pt_regs {
 	 * 	Last word used by Linux for extra state mgmt (syscall-restart)
 	 * For interrupts, use artificial ECR values to note current prio-level
 	 */
-	union {
-		struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-			unsigned long state:8, ecr_vec:8,
-				      ecr_cause:8, ecr_param:8;
-#else
-			unsigned long ecr_param:8, ecr_cause:8,
-				      ecr_vec:8, state:8;
-#endif
-		};
-		unsigned long event;
-	};
+	ecr_reg ecr;
+};
 
-	unsigned long user_r25;
+struct callee_regs {
+	unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
 };
+
+#define MAX_REG_OFFSET offsetof(struct pt_regs, ecr)
+
 #else
 
 struct pt_regs {
 
 	unsigned long orig_r0;
 
-	union {
-		struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-			unsigned long state:8, ecr_vec:8,
-				      ecr_cause:8, ecr_param:8;
-#else
-			unsigned long ecr_param:8, ecr_cause:8,
-				      ecr_vec:8, state:8;
-#endif
-		};
-		unsigned long event;
-	};
+	ecr_reg ecr;		/* Exception Cause Reg */
 
-	unsigned long bta;	/* bta_l1, bta_l2, erbta */
+	unsigned long bta;	/* erbta */
 
-	unsigned long user_r25;
-
-	unsigned long r26;	/* gp */
 	unsigned long fp;
-	unsigned long sp;	/* user/kernel sp depending on where we came from  */
-
-	unsigned long r12, r30;
+	unsigned long r30;
+	unsigned long r12;
+	unsigned long r26;	/* gp */
 
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
 	unsigned long r58, r59;	/* ACCL/ACCH used by FPU / DSP MPY */
 #endif
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+	unsigned long DSP_CTRL;
+#endif
+
+	unsigned long sp;	/* user/kernel sp depending on entry  */
 
 	/*------- Below list auto saved by h/w -----------*/
 	unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
@@ -104,14 +96,14 @@ struct pt_regs {
 	unsigned long status32;
 };
 
-#endif
-
-/* Callee saved registers - need to be saved only when you are scheduled out */
-
 struct callee_regs {
 	unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
 };
 
+#define MAX_REG_OFFSET offsetof(struct pt_regs, status32)
+
+#endif
+
 #define instruction_pointer(regs)	((regs)->ret)
 #define profile_pc(regs)		instruction_pointer(regs)
 
@@ -130,13 +122,13 @@ struct callee_regs {
 /* return 1 if PC in delay slot */
 #define delay_mode(regs) ((regs->status32 & STATUS_DE_MASK) == STATUS_DE_MASK)
 
-#define in_syscall(regs)    ((regs->ecr_vec == ECR_V_TRAP) && !regs->ecr_param)
-#define in_brkpt_trap(regs) ((regs->ecr_vec == ECR_V_TRAP) && regs->ecr_param)
+#define in_syscall(regs)    ((regs->ecr.vec == ECR_V_TRAP) && !regs->ecr.param)
+#define in_brkpt_trap(regs) ((regs->ecr.vec == ECR_V_TRAP) && regs->ecr.param)
 
 #define STATE_SCALL_RESTARTED	0x01
 
-#define syscall_wont_restart(reg) (reg->state |= STATE_SCALL_RESTARTED)
-#define syscall_restartable(reg) !(reg->state &  STATE_SCALL_RESTARTED)
+#define syscall_wont_restart(regs) (regs->ecr.state |= STATE_SCALL_RESTARTED)
+#define syscall_restartable(regs) !(regs->ecr.state &  STATE_SCALL_RESTARTED)
 
 #define current_pt_regs()					\
 ({								\
@@ -151,6 +143,35 @@ static inline long regs_return_value(struct pt_regs *regs)
 	return (long)regs->r0;
 }
 
+static inline void instruction_pointer_set(struct pt_regs *regs,
+					   unsigned long val)
+{
+	instruction_pointer(regs) = val;
+}
+
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+	return regs->sp;
+}
+
+extern int regs_query_register_offset(const char *name);
+extern const char *regs_query_register_name(unsigned int offset);
+extern bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr);
+extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+					       unsigned int n);
+
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+					      unsigned int offset)
+{
+	if (unlikely(offset > MAX_REG_OFFSET))
+		return 0;
+
+	return *(unsigned long *)((unsigned long)regs + offset);
+}
+
+extern int syscall_trace_enter(struct pt_regs *);
+extern void syscall_trace_exit(struct pt_regs *);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PTRACE_H */
diff --git a/arch/arc/include/asm/segment.h b/arch/arc/include/asm/segment.h
deleted file mode 100644
index 6a2a5be5026d..000000000000
--- a/arch/arc/include/asm/segment.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef __ASMARC_SEGMENT_H
-#define __ASMARC_SEGMENT_H
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned long mm_segment_t;
-
-#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
-
-#define KERNEL_DS		MAKE_MM_SEG(0)
-#define USER_DS			MAKE_MM_SEG(TASK_SIZE)
-
-#define segment_eq(a, b)	((a) == (b))
-
-#endif /* __ASSEMBLY__ */
-#endif /* __ASMARC_SEGMENT_H */
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 61a97fe70b86..1c6db599e1fc 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -2,18 +2,14 @@
 /*
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  */
-#ifndef __ASMARC_SETUP_H
-#define __ASMARC_SETUP_H
+#ifndef __ASM_ARC_SETUP_H
+#define __ASM_ARC_SETUP_H
 
 
 #include <linux/types.h>
 #include <uapi/asm/setup.h>
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-#define COMMAND_LINE_SIZE 2048
-#else
 #define COMMAND_LINE_SIZE 256
-#endif
 
 /*
  * Data structure to map a ID to string
@@ -38,4 +34,12 @@ long __init arc_get_mem_sz(void);
 #define IS_AVAIL2(v, s, cfg)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
 #define IS_AVAIL3(v, v2, s)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
 
+extern void arc_mmu_init(void);
+extern int arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
+
+extern void arc_cache_init(void);
+extern int arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
+
+extern void __init handle_uboot_args(void);
+
 #endif /* __ASMARC_SETUP_H */
diff --git a/arch/arc/include/asm/shmparam.h b/arch/arc/include/asm/shmparam.h
index 8b0251464ffd..719112af0f41 100644
--- a/arch/arc/include/asm/shmparam.h
+++ b/arch/arc/include/asm/shmparam.h
@@ -6,7 +6,7 @@
 #ifndef __ARC_ASM_SHMPARAM_H
 #define __ARC_ASM_SHMPARAM_H
 
-/* Handle upto 2 cache bins */
+/* Handle up to 2 cache bins */
 #define	SHMLBA	(2 * PAGE_SIZE)
 
 /* Enforce SHMLBA in shmat */
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index c5de4008d19f..990f834909f0 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -29,6 +29,8 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void __init smp_init_cpus(void);
 extern void first_lines_of_secondary(void);
 extern const char *arc_platform_smp_cpuinfo(void);
+extern void arc_platform_smp_wait_to_boot(int);
+extern void start_kernel_secondary(void);
 
 /*
  * API expected BY platform smp code (FROM arch smp code)
@@ -75,7 +77,7 @@ static inline const char *arc_platform_smp_cpuinfo(void)
 
 /*
  * ARC700 doesn't support atomic Read-Modify-Write ops.
- * Originally Interrupts had to be disabled around code to gaurantee atomicity.
+ * Originally Interrupts had to be disabled around code to guarantee atomicity.
  * The LLOCK/SCOND insns allow writing interrupt-hassle-free based atomic ops
  * based on retry-if-irq-in-atomic (with hardware assist).
  * However despite these, we provide the IRQ disabling variant
@@ -84,7 +86,7 @@ static inline const char *arc_platform_smp_cpuinfo(void)
  *	support needed.
  *
  * (2) In a SMP setup, the LLOCK/SCOND atomicity across CPUs needs to be
- *	gaurantted by the platform (not something which core handles).
+ *	guaranteed by the platform (not something which core handles).
  *	Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ
  *	disabling for atomicity.
  *
@@ -105,7 +107,6 @@ static inline const char *arc_platform_smp_cpuinfo(void)
 #include <asm/spinlock.h>
 
 extern arch_spinlock_t smp_atomic_ops_lock;
-extern arch_spinlock_t smp_bitops_lock;
 
 #define atomic_ops_lock(flags)	do {		\
 	local_irq_save(flags);			\
@@ -117,24 +118,11 @@ extern arch_spinlock_t smp_bitops_lock;
 	local_irq_restore(flags);		\
 } while (0)
 
-#define bitops_lock(flags)	do {		\
-	local_irq_save(flags);			\
-	arch_spin_lock(&smp_bitops_lock);	\
-} while (0)
-
-#define bitops_unlock(flags) do {		\
-	arch_spin_unlock(&smp_bitops_lock);	\
-	local_irq_restore(flags);		\
-} while (0)
-
 #else /* !CONFIG_SMP */
 
 #define atomic_ops_lock(flags)		local_irq_save(flags)
 #define atomic_ops_unlock(flags)	local_irq_restore(flags)
 
-#define bitops_lock(flags)		local_irq_save(flags)
-#define bitops_unlock(flags)		local_irq_restore(flags)
-
 #endif /* !CONFIG_SMP */
 
 #endif	/* !CONFIG_ARC_HAS_LLSC */
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 94bbed88e3fc..192871608925 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -232,15 +232,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 
 	__asm__ __volatile__(
 	"1:	ex  %0, [%1]		\n"
-#ifdef CONFIG_EZNPS_MTM_EXT
-	"	.word %3		\n"
-#endif
 	"	breq  %0, %2, 1b	\n"
 	: "+&r" (val)
 	: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
-#ifdef CONFIG_EZNPS_MTM_EXT
-	, "i"(CTOP_INST_SCHD_RW)
-#endif
 	: "memory");
 
 	smp_mb();
diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h
index 77f123385e96..1f85de8288b1 100644
--- a/arch/arc/include/asm/switch_to.h
+++ b/arch/arc/include/asm/switch_to.h
@@ -9,36 +9,16 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/sched.h>
-
-#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
-
-extern void fpu_save_restore(struct task_struct *p, struct task_struct *n);
-#define ARC_FPU_PREV(p, n)	fpu_save_restore(p, n)
-#define ARC_FPU_NEXT(t)
-
-#else
-
-#define ARC_FPU_PREV(p, n)
-#define ARC_FPU_NEXT(n)
-
-#endif /* !CONFIG_ARC_FPU_SAVE_RESTORE */
-
-#ifdef CONFIG_ARC_PLAT_EZNPS
-extern void dp_save_restore(struct task_struct *p, struct task_struct *n);
-#define ARC_EZNPS_DP_PREV(p, n)      dp_save_restore(p, n)
-#else
-#define ARC_EZNPS_DP_PREV(p, n)
-
-#endif /* !CONFIG_ARC_PLAT_EZNPS */
+#include <asm/dsp-impl.h>
+#include <asm/fpu.h>
 
 struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n);
 
 #define switch_to(prev, next, last)	\
 do {					\
-	ARC_EZNPS_DP_PREV(prev, next);	\
-	ARC_FPU_PREV(prev, next);	\
+	dsp_save_restore(prev, next);	\
+	fpu_save_restore(prev, next);	\
 	last = __switch_to(prev, next);\
-	ARC_FPU_NEXT(next);		\
 	mb();				\
 } while (0)
 
diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h
index 94529e89dff0..728d625a10f1 100644
--- a/arch/arc/include/asm/syscall.h
+++ b/arch/arc/include/asm/syscall.h
@@ -12,6 +12,8 @@
 #include <asm/unistd.h>
 #include <asm/ptrace.h>		/* in_syscall() */
 
+extern void *sys_call_table[];
+
 static inline long
 syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
 {
@@ -22,6 +24,17 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
 }
 
 static inline void
+syscall_set_nr(struct task_struct *task, struct pt_regs *regs, int nr)
+{
+	/*
+	 * Unlike syscall_get_nr(), syscall_set_nr() can be called only when
+	 * the target task is stopped for tracing on entering syscall, so
+	 * there is no need to have the same check syscall_get_nr() has.
+	 */
+	regs->r8 = nr;
+}
+
+static inline void
 syscall_rollback(struct task_struct *task, struct pt_regs *regs)
 {
 	regs->r0 = regs->orig_r0;
@@ -65,6 +78,20 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
 	}
 }
 
+static inline void
+syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
+		      unsigned long *args)
+{
+	unsigned long *inside_ptregs = &regs->r0;
+	unsigned int n = 6;
+	unsigned int i = 0;
+
+	while (n--) {
+		*inside_ptregs = args[i++];
+		inside_ptregs--;
+	}
+}
+
 static inline int
 syscall_get_arch(struct task_struct *task)
 {
diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h
index 7ddba13e9b59..c3f4714a4f5c 100644
--- a/arch/arc/include/asm/syscalls.h
+++ b/arch/arc/include/asm/syscalls.h
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 
 int sys_clone_wrapper(int, int, int, int, int);
+int sys_clone3_wrapper(void *, size_t);
 int sys_cacheflush(uint32_t, uint32_t uint32_t);
 int sys_arc_settls(void *);
 int sys_arc_gettls(void);
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index f9eef0e8f0b7..12daaf3a61ea 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -27,7 +27,6 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/thread_info.h>
-#include <asm/segment.h>
 
 /*
  * low level task data that entry.S needs immediate access to
@@ -38,17 +37,16 @@
  */
 struct thread_info {
 	unsigned long flags;		/* low level flags */
-	int preempt_count;		/* 0 => preemptable, <0 => BUG */
-	struct task_struct *task;	/* main task structure */
-	mm_segment_t addr_limit;	/* thread address space */
-	__u32 cpu;			/* current CPU */
+	unsigned long ksp;		/* kernel mode stack top in __switch_to */
+	int preempt_count;		/* 0 => preemptible, <0 => BUG */
+	int cpu;			/* current CPU */
 	unsigned long thr_ptr;		/* TLS ptr */
+	struct task_struct *task;	/* main task structure */
 };
 
 /*
- * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
+ * initilaize thread_info for any @tsk
+ *  - this is not related to init_task per se
  */
 #define INIT_THREAD_INFO(tsk)			\
 {						\
@@ -56,7 +54,6 @@ struct thread_info {
 	.flags      = 0,			\
 	.cpu        = 0,			\
 	.preempt_count  = INIT_PREEMPT_COUNT,	\
-	.addr_limit = KERNEL_DS,		\
 }
 
 static inline __attribute_const__ struct thread_info *current_thread_info(void)
@@ -79,26 +76,31 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
+#define TIF_NOTIFY_SIGNAL	5	/* signal notifications exist */
 #define TIF_SYSCALL_TRACE	15	/* syscall trace active */
-
 /* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		16
+#define TIF_SYSCALL_TRACEPOINT	17	/* syscall tracepoint instrumentation */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_NOTIFY_SIGNAL	(1<<TIF_NOTIFY_SIGNAL)
 #define _TIF_MEMDIE		(1<<TIF_MEMDIE)
+#define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
-				 _TIF_NOTIFY_RESUME)
+				 _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL)
+
+#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
 
 /*
  * _TIF_ALLWORK_MASK includes SYSCALL_TRACE, but we don't need it.
- * SYSCALL_TRACE is anyway seperately/unconditionally tested right after a
- * syscall, so all that reamins to be tested is _TIF_WORK_MASK
+ * SYSCALL_TRACE is anyway separately/unconditionally tested right after a
+ * syscall, so all that remains to be tested is _TIF_WORK_MASK
  */
 
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/arc/include/asm/tlb-mmu1.h b/arch/arc/include/asm/tlb-mmu1.h
deleted file mode 100644
index a3083b36f5f4..000000000000
--- a/arch/arc/include/asm/tlb-mmu1.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef __ASM_TLB_MMU_V1_H__
-#define __ASM_TLB_MMU_V1_H__
-
-#include <asm/mmu.h>
-
-#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1)
-
-.macro TLB_WRITE_HEURISTICS
-
-#define JH_HACK1
-#undef JH_HACK2
-#undef JH_HACK3
-
-#ifdef JH_HACK3
-; Calculate set index for 2-way MMU
-; -avoiding use of GetIndex from MMU
-;   and its unpleasant LFSR pseudo-random sequence
-;
-; r1 = TLBPD0 from TLB_RELOAD above
-;
-; -- jh_ex_way_set not cleared on startup
-;    didn't want to change setup.c
-;    hence extra instruction to clean
-;
-; -- should be in cache since in same line
-;    as r0/r1 saves above
-;
-ld  r0,[jh_ex_way_sel]  ; victim pointer
-and r0,r0,1         ; clean
-xor.f   r0,r0,1         ; flip
-st  r0,[jh_ex_way_sel]  ; store back
-asr r0,r1,12        ; get set # <<1, note bit 12=R=0
-or.nz   r0,r0,1         ; set way bit
-and r0,r0,0xff      ; clean
-sr  r0,[ARC_REG_TLBINDEX]
-#endif
-
-#ifdef JH_HACK2
-; JH hack #2
-;  Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU
-;  Slower in thrash case (where it matters) because more code is executed
-;  Inefficient due to two-register paradigm of this miss handler
-;
-/* r1 = data TLBPD0 at this point */
-lr      r0,[eret]               /* instruction address */
-xor     r0,r0,r1                /* compare set #       */
-and.f   r0,r0,0x000fe000        /* 2-way MMU mask      */
-bne     88f                     /* not in same set - no need to probe */
-
-lr      r0,[eret]               /* instruction address */
-and     r0,r0,PAGE_MASK         /* VPN of instruction address */
-; lr  r1,[ARC_REG_TLBPD0]     /* Data VPN+ASID - already in r1 from TLB_RELOAD*/
-and     r1,r1,0xff              /* Data ASID */
-or      r0,r0,r1                /* Instruction address + Data ASID */
-
-lr      r1,[ARC_REG_TLBPD0]     /* save TLBPD0 containing data TLB*/
-sr      r0,[ARC_REG_TLBPD0]     /* write instruction address to TLBPD0 */
-sr      TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
-lr      r0,[ARC_REG_TLBINDEX]   /* r0 = index where instruction is, if at all */
-sr      r1,[ARC_REG_TLBPD0]     /* restore TLBPD0 */
-
-xor     r0,r0,1                 /* flip bottom bit of data index */
-b.d     89f
-sr      r0,[ARC_REG_TLBINDEX]   /* and put it back */
-88:
-sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
-89:
-#endif
-
-#ifdef JH_HACK1
-;
-; Always checks whether instruction will be kicked out by dtlb miss
-;
-mov_s   r3, r1                  ; save PD0 prepared by TLB_RELOAD in r3
-lr      r0,[eret]               /* instruction address */
-and     r0,r0,PAGE_MASK         /* VPN of instruction address */
-bmsk    r1,r3,7                 /* Data ASID, bits 7-0 */
-or_s    r0,r0,r1                /* Instruction address + Data ASID */
-
-sr      r0,[ARC_REG_TLBPD0]     /* write instruction address to TLBPD0 */
-sr      TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
-lr      r0,[ARC_REG_TLBINDEX]   /* r0 = index where instruction is, if at all */
-sr      r3,[ARC_REG_TLBPD0]     /* restore TLBPD0 */
-
-sr      TLBGetIndex, [ARC_REG_TLBCOMMAND]
-lr      r1,[ARC_REG_TLBINDEX]   /* r1 = index where MMU wants to put data */
-cmp     r0,r1                   /* if no match on indices, go around */
-xor.eq  r1,r1,1                 /* flip bottom bit of data index */
-sr      r1,[ARC_REG_TLBINDEX]   /* and put it back */
-#endif
-
-.endm
-
-#endif
-
-#endif
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index ea40ec7f6cae..1e8809ea000a 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -23,35 +23,6 @@
 
 #include <linux/string.h>	/* for generic string functions */
 
-
-#define __kernel_ok		(uaccess_kernel())
-
-/*
- * Algorithmically, for __user_ok() we want do:
- * 	(start < TASK_SIZE) && (start+len < TASK_SIZE)
- * where TASK_SIZE could either be retrieved from thread_info->addr_limit or
- * emitted directly in code.
- *
- * This can however be rewritten as follows:
- *	(len <= TASK_SIZE) && (start+len < TASK_SIZE)
- *
- * Because it essentially checks if buffer end is within limit and @len is
- * non-ngeative, which implies that buffer start will be within limit too.
- *
- * The reason for rewriting being, for majority of cases, @len is generally
- * compile time constant, causing first sub-expression to be compile time
- * subsumed.
- *
- * The second part would generate weird large LIMMs e.g. (0x6000_0000 - 0x10),
- * so we check for TASK_SIZE using get_fs() since the addr_limit load from mem
- * would already have been done at this call site for __kernel_ok()
- *
- */
-#define __user_ok(addr, sz)	(((sz) <= TASK_SIZE) && \
-				 ((addr) <= (get_fs() - (sz))))
-#define __access_ok(addr, sz)	(unlikely(__kernel_ok) || \
-				 likely(__user_ok((addr), (sz))))
-
 /*********** Single byte/hword/word copies ******************/
 
 #define __get_user_fn(sz, u, k)					\
@@ -175,8 +146,9 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n)
 	if (n == 0)
 		return 0;
 
-	/* unaligned */
-	if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) {
+	/* fallback for unaligned access when hardware doesn't support */
+	if (!IS_ENABLED(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) &&
+	     (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3))) {
 
 		unsigned char tmp;
 
@@ -402,8 +374,9 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 	if (n == 0)
 		return 0;
 
-	/* unaligned */
-	if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) {
+	/* fallback for unaligned access when hardware doesn't support */
+	if (!IS_ENABLED(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) &&
+	     (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3))) {
 
 		unsigned char tmp;
 
@@ -613,7 +586,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 	return res;
 }
 
-static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
+static inline unsigned long __clear_user(void __user *to, unsigned long n)
 {
 	long res = n;
 	unsigned char *d_char = to;
@@ -655,91 +628,11 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
 	return res;
 }
 
-static inline long
-__arc_strncpy_from_user(char *dst, const char __user *src, long count)
-{
-	long res = 0;
-	char val;
-
-	if (count == 0)
-		return 0;
-
-	__asm__ __volatile__(
-	"	mov	lp_count, %5		\n"
-	"	lp	3f			\n"
-	"1:	ldb.ab  %3, [%2, 1]		\n"
-	"	breq.d	%3, 0, 3f               \n"
-	"	stb.ab  %3, [%1, 1]		\n"
-	"	add	%0, %0, 1	# Num of NON NULL bytes copied	\n"
-	"3:								\n"
-	"	.section .fixup, \"ax\"		\n"
-	"	.align 4			\n"
-	"4:	mov %0, %4		# sets @res as -EFAULT	\n"
-	"	j   3b				\n"
-	"	.previous			\n"
-	"	.section __ex_table, \"a\"	\n"
-	"	.align 4			\n"
-	"	.word   1b, 4b			\n"
-	"	.previous			\n"
-	: "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
-	: "g"(-EFAULT), "r"(count)
-	: "lp_count", "memory");
-
-	return res;
-}
-
-static inline long __arc_strnlen_user(const char __user *s, long n)
-{
-	long res, tmp1, cnt;
-	char val;
-
-	__asm__ __volatile__(
-	"	mov %2, %1			\n"
-	"1:	ldb.ab  %3, [%0, 1]		\n"
-	"	breq.d  %3, 0, 2f		\n"
-	"	sub.f   %2, %2, 1		\n"
-	"	bnz 1b				\n"
-	"	sub %2, %2, 1			\n"
-	"2:	sub %0, %1, %2			\n"
-	"3:	;nop				\n"
-	"	.section .fixup, \"ax\"		\n"
-	"	.align 4			\n"
-	"4:	mov %0, 0			\n"
-	"	j   3b				\n"
-	"	.previous			\n"
-	"	.section __ex_table, \"a\"	\n"
-	"	.align 4			\n"
-	"	.word 1b, 4b			\n"
-	"	.previous			\n"
-	: "=r"(res), "=r"(tmp1), "=r"(cnt), "=r"(val)
-	: "0"(s), "1"(n)
-	: "memory");
-
-	return res;
-}
-
-#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
-
 #define INLINE_COPY_TO_USER
 #define INLINE_COPY_FROM_USER
 
-#define __clear_user(d, n)		__arc_clear_user(d, n)
-#define __strncpy_from_user(d, s, n)	__arc_strncpy_from_user(d, s, n)
-#define __strnlen_user(s, n)		__arc_strnlen_user(s, n)
-#else
-extern unsigned long arc_clear_user_noinline(void __user *to,
-		unsigned long n);
-extern long arc_strncpy_from_user_noinline (char *dst, const char __user *src,
-		long count);
-extern long arc_strnlen_user_noinline(const char __user *src, long n);
-
-#define __clear_user(d, n)		arc_clear_user_noinline(d, n)
-#define __strncpy_from_user(d, s, n)	arc_strncpy_from_user_noinline(d, s, n)
-#define __strnlen_user(s, n)		arc_strnlen_user_noinline(s, n)
-
-#endif
+#define __clear_user			__clear_user
 
-#include <asm/segment.h>
 #include <asm-generic/uaccess.h>
 
 #endif
diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h
deleted file mode 100644
index cf5a02382e0e..000000000000
--- a/arch/arc/include/asm/unaligned.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef _ASM_ARC_UNALIGNED_H
-#define _ASM_ARC_UNALIGNED_H
-
-/* ARC700 can't handle unaligned Data accesses. */
-
-#include <asm-generic/unaligned.h>
-#include <asm/ptrace.h>
-
-#ifdef CONFIG_ARC_EMUL_UNALIGNED
-int misaligned_fixup(unsigned long address, struct pt_regs *regs,
-		     struct callee_regs *cregs);
-#else
-static inline int
-misaligned_fixup(unsigned long address, struct pt_regs *regs,
-		 struct callee_regs *cregs)
-{
-	/* Not fixed */
-	return 1;
-}
-#endif
-
-#endif /* _ASM_ARC_UNALIGNED_H */
diff --git a/arch/arc/include/asm/unistd.h b/arch/arc/include/asm/unistd.h
new file mode 100644
index 000000000000..211c230d88d6
--- /dev/null
+++ b/arch/arc/include/asm/unistd.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_ARC_UNISTD_H
+#define _ASM_ARC_UNISTD_H
+
+#include <uapi/asm/unistd.h>
+
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_FORK
+
+#define NR_syscalls __NR_syscalls
+
+#endif
diff --git a/arch/arc/include/asm/vermagic.h b/arch/arc/include/asm/vermagic.h
new file mode 100644
index 000000000000..a10257d2c62c
--- /dev/null
+++ b/arch/arc/include/asm/vermagic.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_VERMAGIC_H
+#define _ASM_VERMAGIC_H
+
+#define MODULE_ARCH_VERMAGIC "ARC700"
+
+#endif /* _ASM_VERMAGIC_H */
diff --git a/arch/arc/include/asm/vmalloc.h b/arch/arc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..973095aad665
--- /dev/null
+++ b/arch/arc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_ARC_VMALLOC_H
+#define _ASM_ARC_VMALLOC_H
+
+#endif /* _ASM_ARC_VMALLOC_H */
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index e78470141932..2501e82a1a0a 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
+syscall-y += unistd_32.h
+
 generic-y += ucontext.h
diff --git a/arch/arc/include/uapi/asm/bpf_perf_event.h b/arch/arc/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..6cb1c2823288
--- /dev/null
+++ b/arch/arc/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef struct user_regs_struct bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h
index 2a97e2718a21..4606a326af5c 100644
--- a/arch/arc/include/uapi/asm/page.h
+++ b/arch/arc/include/uapi/asm/page.h
@@ -13,10 +13,8 @@
 #include <linux/const.h>
 
 /* PAGE_SHIFT determines the page size */
-#if defined(CONFIG_ARC_PAGE_SIZE_16K)
-#define PAGE_SHIFT 14
-#elif defined(CONFIG_ARC_PAGE_SIZE_4K)
-#define PAGE_SHIFT 12
+#ifdef __KERNEL__
+#include <vdso/page.h>
 #else
 /*
  * Default 8k
@@ -26,12 +24,10 @@
  * not available
  */
 #define PAGE_SHIFT 13
-#endif
-
 #define PAGE_SIZE	_BITUL(PAGE_SHIFT)	/* Default 8K */
-#define PAGE_OFFSET	_AC(0x80000000, UL)	/* Kernel starts at 2G onwrds */
-
 #define PAGE_MASK	(~(PAGE_SIZE-1))
+#endif
 
+#define PAGE_OFFSET	_AC(0x80000000, UL)	/* Kernel starts at 2G onwrds */
 
 #endif /* _UAPI__ASM_ARC_PAGE_H */
diff --git a/arch/arc/include/uapi/asm/sigcontext.h b/arch/arc/include/uapi/asm/sigcontext.h
index 95f8a4380e11..7a5449dfcb29 100644
--- a/arch/arc/include/uapi/asm/sigcontext.h
+++ b/arch/arc/include/uapi/asm/sigcontext.h
@@ -18,6 +18,7 @@
  */
 struct sigcontext {
 	struct user_regs_struct regs;
+	struct user_regs_arcv2 v2abi;
 };
 
 #endif /* _ASM_ARC_SIGCONTEXT_H */
diff --git a/arch/arc/include/uapi/asm/swab.h b/arch/arc/include/uapi/asm/swab.h
index 02109cd48ee1..8d1f1ef44ba7 100644
--- a/arch/arc/include/uapi/asm/swab.h
+++ b/arch/arc/include/uapi/asm/swab.h
@@ -62,7 +62,7 @@
  * 8051fdc4:	st     r2,[r1,20]	; Mem op : save result back to mem
  *
  * Joern suggested a better "C" algorithm which is great since
- * (1) It is portable to any architecure
+ * (1) It is portable to any architecture
  * (2) At the same time it takes advantage of ARC ISA (rotate intrns)
  */
 
diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h
index 5eafa1115162..cb2905c7c5da 100644
--- a/arch/arc/include/uapi/asm/unistd.h
+++ b/arch/arc/include/uapi/asm/unistd.h
@@ -7,45 +7,4 @@
  * published by the Free Software Foundation.
  */
 
-/******** no-legacy-syscalls-ABI *******/
-
-/*
- * Non-typical guard macro to enable inclusion twice in ARCH sys.c
- * That is how the Generic syscall wrapper generator works
- */
-#if !defined(_UAPI_ASM_ARC_UNISTD_H) || defined(__SYSCALL)
-#define _UAPI_ASM_ARC_UNISTD_H
-
-#define __ARCH_WANT_RENAMEAT
-#define __ARCH_WANT_STAT64
-#define __ARCH_WANT_SET_GET_RLIMIT
-#define __ARCH_WANT_SYS_EXECVE
-#define __ARCH_WANT_SYS_CLONE
-#define __ARCH_WANT_SYS_VFORK
-#define __ARCH_WANT_SYS_FORK
-#define __ARCH_WANT_TIME32_SYSCALLS
-
-#define sys_mmap2 sys_mmap_pgoff
-
-#include <asm-generic/unistd.h>
-
-#define NR_syscalls	__NR_syscalls
-
-/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */
-#define __NR_sysfs		(__NR_arch_specific_syscall + 3)
-
-/* ARC specific syscall */
-#define __NR_cacheflush		(__NR_arch_specific_syscall + 0)
-#define __NR_arc_settls		(__NR_arch_specific_syscall + 1)
-#define __NR_arc_gettls		(__NR_arch_specific_syscall + 2)
-#define __NR_arc_usr_cmpxchg	(__NR_arch_specific_syscall + 4)
-
-__SYSCALL(__NR_cacheflush, sys_cacheflush)
-__SYSCALL(__NR_arc_settls, sys_arc_settls)
-__SYSCALL(__NR_arc_gettls, sys_arc_gettls)
-__SYSCALL(__NR_arc_usr_cmpxchg, sys_arc_usr_cmpxchg)
-__SYSCALL(__NR_sysfs, sys_sysfs)
-
-#undef __SYSCALL
-
-#endif
+#include <asm/unistd_32.h>
diff --git a/arch/arc/kernel/.gitignore b/arch/arc/kernel/.gitignore
index c5f676c3c224..bbb90f92d051 100644
--- a/arch/arc/kernel/.gitignore
+++ b/arch/arc/kernel/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 vmlinux.lds
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index e784f5396dda..fa94fff02419 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -3,11 +3,10 @@
 # Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 #
 
-# Pass UTS_MACHINE for user_regset definition
-CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
-
-obj-y	:= arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o
+obj-y	:= head.o arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o
 obj-y	+= signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o
+obj-y	+= ctx_sw_asm.o
+
 obj-$(CONFIG_ISA_ARCOMPACT)		+= entry-compact.o intc-compact.o
 obj-$(CONFIG_ISA_ARCV2)			+= entry-arcv2.o intc-arcv2.o
 
@@ -23,13 +22,8 @@ obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 
 obj-$(CONFIG_ARC_FPU_SAVE_RESTORE)	+= fpu.o
+ifdef CONFIG_ISA_ARCOMPACT
 CFLAGS_fpu.o   += -mdpfp
-
-ifdef CONFIG_ARC_DW2_UNWIND
-CFLAGS_ctx_sw.o += -fno-omit-frame-pointer
-obj-y += ctx_sw.o
-else
-obj-y += ctx_sw_asm.o
 endif
 
-extra-y := vmlinux.lds head.o
+always-$(KBUILD_BUILTIN) := vmlinux.lds
diff --git a/arch/arc/kernel/Makefile.syscalls b/arch/arc/kernel/Makefile.syscalls
new file mode 100644
index 000000000000..391d30ab7a83
--- /dev/null
+++ b/arch/arc/kernel/Makefile.syscalls
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 += arc time32 renameat stat64 rlimit
diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
index 1f621e416521..f77deb799175 100644
--- a/arch/arc/kernel/asm-offsets.c
+++ b/arch/arc/kernel/asm-offsets.c
@@ -12,6 +12,7 @@
 #include <asm/hardirq.h>
 #include <asm/page.h>
 
+
 int main(void)
 {
 	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
@@ -19,13 +20,13 @@ int main(void)
 
 	BLANK();
 
-	DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
 	DEFINE(THREAD_CALLEE_REG, offsetof(struct thread_struct, callee_reg));
 	DEFINE(THREAD_FAULT_ADDR,
 	       offsetof(struct thread_struct, fault_address));
 
 	BLANK();
 
+	DEFINE(THREAD_INFO_KSP, offsetof(struct thread_info, ksp));
 	DEFINE(THREAD_INFO_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(THREAD_INFO_PREEMPT_COUNT,
 	       offsetof(struct thread_info, preempt_count));
@@ -45,7 +46,8 @@ int main(void)
 	BLANK();
 
 	DEFINE(PT_status32, offsetof(struct pt_regs, status32));
-	DEFINE(PT_event, offsetof(struct pt_regs, event));
+	DEFINE(PT_event, offsetof(struct pt_regs, ecr));
+	DEFINE(PT_bta, offsetof(struct pt_regs, bta));
 	DEFINE(PT_sp, offsetof(struct pt_regs, sp));
 	DEFINE(PT_r0, offsetof(struct pt_regs, r0));
 	DEFINE(PT_r1, offsetof(struct pt_regs, r1));
@@ -60,13 +62,23 @@ int main(void)
 	DEFINE(PT_r26, offsetof(struct pt_regs, r26));
 	DEFINE(PT_ret, offsetof(struct pt_regs, ret));
 	DEFINE(PT_blink, offsetof(struct pt_regs, blink));
+	OFFSET(PT_fp, pt_regs, fp);
 	DEFINE(PT_lpe, offsetof(struct pt_regs, lp_end));
 	DEFINE(PT_lpc, offsetof(struct pt_regs, lp_count));
-	DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
+#ifdef CONFIG_ISA_ARCV2
+	OFFSET(PT_r12, pt_regs, r12);
+	OFFSET(PT_r30, pt_regs, r30);
+#endif
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	OFFSET(PT_r58, pt_regs, r58);
+	OFFSET(PT_r59, pt_regs, r59);
+#endif
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+	OFFSET(PT_DSP_CTRL, pt_regs, DSP_CTRL);
+#endif
 
 	DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
 	DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
-	DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
 
 	return 0;
 }
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
deleted file mode 100644
index e172c3333a84..000000000000
--- a/arch/arc/kernel/ctx_sw.c
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * Vineetg: Aug 2009
- *  -"C" version of lowest level context switch asm macro called by schedular
- *   gcc doesn't generate the dward CFI info for hand written asm, hence can't
- *   backtrace out of it (e.g. tasks sleeping in kernel).
- *   So we cheat a bit by writing almost similar code in inline-asm.
- *  -This is a hacky way of doing things, but there is no other simple way.
- *   I don't want/intend to extend unwinding code to understand raw asm
- */
-
-#include <asm/asm-offsets.h>
-#include <linux/sched.h>
-#include <linux/sched/debug.h>
-#ifdef CONFIG_ARC_PLAT_EZNPS
-#include <plat/ctop.h>
-#endif
-
-#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
-
-struct task_struct *__sched
-__switch_to(struct task_struct *prev_task, struct task_struct *next_task)
-{
-	unsigned int tmp;
-	unsigned int prev = (unsigned int)prev_task;
-	unsigned int next = (unsigned int)next_task;
-
-	__asm__ __volatile__(
-		/* FP/BLINK save generated by gcc (standard function prologue */
-		"st.a    r13, [sp, -4]   \n\t"
-		"st.a    r14, [sp, -4]   \n\t"
-		"st.a    r15, [sp, -4]   \n\t"
-		"st.a    r16, [sp, -4]   \n\t"
-		"st.a    r17, [sp, -4]   \n\t"
-		"st.a    r18, [sp, -4]   \n\t"
-		"st.a    r19, [sp, -4]   \n\t"
-		"st.a    r20, [sp, -4]   \n\t"
-		"st.a    r21, [sp, -4]   \n\t"
-		"st.a    r22, [sp, -4]   \n\t"
-		"st.a    r23, [sp, -4]   \n\t"
-		"st.a    r24, [sp, -4]   \n\t"
-#ifndef CONFIG_ARC_CURR_IN_REG
-		"st.a    r25, [sp, -4]   \n\t"
-#else
-		"sub     sp, sp, 4      \n\t"	/* usual r25 placeholder */
-#endif
-
-		/* set ksp of outgoing task in tsk->thread.ksp */
-#if KSP_WORD_OFF <= 255
-		"st.as   sp, [%3, %1]    \n\t"
-#else
-		/*
-		 * Workaround for NR_CPUS=4k
-		 * %1 is bigger than 255 (S9 offset for st.as)
-		 */
-		"add2    r24, %3, %1     \n\t"
-		"st      sp, [r24]       \n\t"
-#endif
-
-		/*
-		 * setup _current_task with incoming tsk.
-		 * optionally, set r25 to that as well
-		 * For SMP extra work to get to &_current_task[cpu]
-		 * (open coded SET_CURR_TASK_ON_CPU)
-		 */
-#ifndef CONFIG_SMP
-		"st  %2, [@_current_task]	\n\t"
-#else
-#ifdef CONFIG_ARC_PLAT_EZNPS
-		"lr   r24, [%4]		\n\t"
-#ifndef CONFIG_EZNPS_MTM_EXT
-		"lsr  r24, r24, 4		\n\t"
-#endif
-#else
-		"lr   r24, [identity]		\n\t"
-		"lsr  r24, r24, 8		\n\t"
-		"bmsk r24, r24, 7		\n\t"
-#endif
-		"add2 r24, @_current_task, r24	\n\t"
-		"st   %2,  [r24]		\n\t"
-#endif
-#ifdef CONFIG_ARC_CURR_IN_REG
-		"mov r25, %2   \n\t"
-#endif
-
-		/* get ksp of incoming task from tsk->thread.ksp */
-		"ld.as  sp, [%2, %1]   \n\t"
-
-		/* start loading it's CALLEE reg file */
-
-#ifndef CONFIG_ARC_CURR_IN_REG
-		"ld.ab   r25, [sp, 4]   \n\t"
-#else
-		"add    sp, sp, 4       \n\t"
-#endif
-		"ld.ab   r24, [sp, 4]   \n\t"
-		"ld.ab   r23, [sp, 4]   \n\t"
-		"ld.ab   r22, [sp, 4]   \n\t"
-		"ld.ab   r21, [sp, 4]   \n\t"
-		"ld.ab   r20, [sp, 4]   \n\t"
-		"ld.ab   r19, [sp, 4]   \n\t"
-		"ld.ab   r18, [sp, 4]   \n\t"
-		"ld.ab   r17, [sp, 4]   \n\t"
-		"ld.ab   r16, [sp, 4]   \n\t"
-		"ld.ab   r15, [sp, 4]   \n\t"
-		"ld.ab   r14, [sp, 4]   \n\t"
-		"ld.ab   r13, [sp, 4]   \n\t"
-
-		/* last (ret value) = prev : although for ARC it mov r0, r0 */
-		"mov     %0, %3        \n\t"
-
-		/* FP/BLINK restore generated by gcc (standard func epilogue */
-
-		: "=r"(tmp)
-		: "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
-#ifdef CONFIG_ARC_PLAT_EZNPS
-		, "i"(CTOP_AUX_LOGIC_GLOBAL_ID)
-#endif
-		: "blink"
-	);
-
-	return (struct task_struct *)tmp;
-}
diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
index 02c461484761..48e1f21976ed 100644
--- a/arch/arc/kernel/ctx_sw_asm.S
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -11,50 +11,54 @@
 #include <asm/entry.h>       /* For the SAVE_* macros */
 #include <asm/asm-offsets.h>
 
-#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
-
-;################### Low Level Context Switch ##########################
+; IN
+;  - r0: prev task (also current)
+;  - r1: next task
+; OUT
+;  - r0: prev task (so r0 not touched)
 
 	.section .sched.text,"ax",@progbits
-	.align 4
-	.global __switch_to
-	.type   __switch_to, @function
-__switch_to:
-	CFI_STARTPROC
-
-	/* Save regs on kernel mode stack of task */
-	st.a    blink, [sp, -4]
-	st.a    fp, [sp, -4]
-	SAVE_CALLEE_SAVED_KERNEL
+ENTRY_CFI(__switch_to)
 
-	/* Save the now KSP in task->thread.ksp */
-#if KSP_WORD_OFF  <= 255
-	st.as  sp, [r0, KSP_WORD_OFF]
-#else
-	/* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */
-	add2	r24, r0, KSP_WORD_OFF
-	st	sp, [r24]
-#endif
-	/*
-	* Return last task in r0 (return reg)
-	* On ARC, Return reg = First Arg reg = r0.
-	* Since we already have last task in r0,
-	* don't need to do anything special to return it
-	*/
+	/* save kernel stack frame regs of @prev task */
+	push	blink
+	CFI_DEF_CFA_OFFSET 4
+	CFI_OFFSET r31, -4
+
+	push	fp
+	CFI_DEF_CFA_OFFSET 8
+	CFI_OFFSET r27, -8
+
+	mov	fp, sp
+	CFI_DEF_CFA_REGISTER r27
+
+	/* kernel mode callee regs of @prev */
+	SAVE_CALLEE_SAVED_KERNEL
 
 	/*
-	 * switch to new task, contained in r1
-	 * Temp reg r3 is required to get the ptr to store val
+	 * save final SP to @prev->thread_info.ksp
+	 * @prev is "current" so thread_info derived from SP
 	 */
-	SET_CURR_TASK_ON_CPU  r1, r3
+	GET_CURR_THR_INFO_FROM_SP  r10
+	st	sp,  [r10, THREAD_INFO_KSP]
+
+	/* update @next in _current_task[] and GP register caching it */
+	SET_CURR_TASK_ON_CPU  r1, r10
 
-	/* reload SP with kernel mode stack pointer in task->thread.ksp */
-	ld.as  sp, [r1, (TASK_THREAD + THREAD_KSP)/4]
+	/* load SP from @next->thread_info.ksp */
+	ld	r10, [r1, TASK_THREAD_INFO]
+	ld	sp,  [r10, THREAD_INFO_KSP]
 
-	/* restore the registers */
+	/* restore callee regs, stack frame regs of @next */
 	RESTORE_CALLEE_SAVED_KERNEL
-	ld.ab   fp, [sp, 4]
-	ld.ab   blink, [sp, 4]
-	j       [blink]
 
+	pop	fp
+	CFI_RESTORE r27
+	CFI_DEF_CFA r28, 4
+
+	pop	blink
+	CFI_RESTORE r31
+	CFI_DEF_CFA_OFFSET 0
+
+	j      [blink]
 END_CFI(__switch_to)
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index fa86d13df5ed..cc6ac7d128aa 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -12,6 +12,7 @@
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <asm/mach_desc.h>
+#include <asm/serial.h>
 
 #ifdef CONFIG_SERIAL_EARLYCON
 
@@ -29,8 +30,6 @@ static void __init arc_set_early_base_baud(unsigned long dt_root)
 	else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp") ||
 		 of_flat_dt_is_compatible(dt_root, "snps,hsdk"))
 		arc_base_baud = 33333333;	/* Fixed 33MHz clk (AXS10x & HSDK) */
-	else if (of_flat_dt_is_compatible(dt_root, "ezchip,arc-nps"))
-		arc_base_baud = 800000000;      /* Fixed 800MHz clk (NPS) */
 	else
 		arc_base_baud = 50000000;	/* Fixed default 50MHz */
 }
@@ -63,7 +62,7 @@ const struct machine_desc * __init setup_machine_fdt(void *dt)
 	const struct machine_desc *mdesc;
 	unsigned long dt_root;
 
-	if (!early_init_dt_scan(dt))
+	if (!early_init_dt_scan(dt, __pa(dt)))
 		return NULL;
 
 	mdesc = of_flat_dt_match_machine(NULL, arch_get_next_mach);
diff --git a/arch/arc/kernel/disasm.c b/arch/arc/kernel/disasm.c
index d04837d91b40..ccc7e8c39eb3 100644
--- a/arch/arc/kernel/disasm.c
+++ b/arch/arc/kernel/disasm.c
@@ -339,7 +339,7 @@ void __kprobes disasm_instr(unsigned long addr, struct disasm_state *state,
 
 	case op_LDWX_S:	/* LDWX_S c, [b, u6] */
 		state->x = 1;
-		/* intentional fall-through */
+		fallthrough;
 
 	case op_LDW_S:	/* LDW_S c, [b, u6] */
 		state->zz = 2;
@@ -366,7 +366,7 @@ void __kprobes disasm_instr(unsigned long addr, struct disasm_state *state,
 	case op_SP:	/* LD_S|LDB_S b,[sp,u7], ST_S|STB_S b,[sp,u7] */
 		/* note: we are ignoring possibility of:
 		 * ADD_S, SUB_S, PUSH_S, POP_S as these should not
-		 * cause unaliged exception anyway */
+		 * cause unaligned exception anyway */
 		state->write = BITS(state->words[0], 6, 6);
 		state->zz = BITS(state->words[0], 5, 5);
 		if (state->zz)
@@ -434,14 +434,31 @@ long __kprobes get_reg(int reg, struct pt_regs *regs,
 {
 	long *p;
 
+#if defined(CONFIG_ISA_ARCOMPACT)
 	if (reg <= 12) {
 		p = &regs->r0;
 		return p[-reg];
 	}
+#else /* CONFIG_ISA_ARCV2 */
+	if (reg <= 11) {
+		p = &regs->r0;
+		return p[reg];
+	}
 
+	if (reg == 12)
+		return regs->r12;
+	if (reg == 30)
+		return regs->r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	if (reg == 58)
+		return regs->r58;
+	if (reg == 59)
+		return regs->r59;
+#endif
+#endif
 	if (cregs && (reg <= 25)) {
 		p = &cregs->r13;
-		return p[13-reg];
+		return p[13 - reg];
 	}
 
 	if (reg == 26)
@@ -461,6 +478,7 @@ void __kprobes set_reg(int reg, long val, struct pt_regs *regs,
 {
 	long *p;
 
+#if defined(CONFIG_ISA_ARCOMPACT)
 	switch (reg) {
 	case 0 ... 12:
 		p = &regs->r0;
@@ -469,7 +487,7 @@ void __kprobes set_reg(int reg, long val, struct pt_regs *regs,
 	case 13 ... 25:
 		if (cregs) {
 			p = &cregs->r13;
-			p[13-reg] = val;
+			p[13 - reg] = val;
 		}
 		break;
 	case 26:
@@ -487,6 +505,48 @@ void __kprobes set_reg(int reg, long val, struct pt_regs *regs,
 	default:
 		break;
 	}
+#else /* CONFIG_ISA_ARCV2 */
+	switch (reg) {
+	case 0 ... 11:
+		p = &regs->r0;
+		p[reg] = val;
+		break;
+	case 12:
+		regs->r12 = val;
+		break;
+	case 13 ... 25:
+		if (cregs) {
+			p = &cregs->r13;
+			p[13 - reg] = val;
+		}
+		break;
+	case 26:
+		regs->r26 = val;
+		break;
+	case 27:
+		regs->fp = val;
+		break;
+	case 28:
+		regs->sp = val;
+		break;
+	case 30:
+		regs->r30 = val;
+		break;
+	case 31:
+		regs->blink = val;
+		break;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	case 58:
+		regs->r58 = val;
+		break;
+	case 59:
+		regs->r59 = val;
+		break;
+#endif
+	default:
+		break;
+	}
+#endif
 }
 
 /*
@@ -503,7 +563,6 @@ int __kprobes disasm_next_pc(unsigned long pc, struct pt_regs *regs,
 {
 	struct disasm_state instr;
 
-	memset(&instr, 0, sizeof(struct disasm_state));
 	disasm_instr(pc, &instr, 0, regs, cregs);
 
 	*next_pc = pc + instr.instr_len;
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index 12d5f12d10d2..e238b5fd3c8c 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -5,11 +5,12 @@
  * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
  */
 
-#include <linux/linkage.h>   /* ARC_{EXTRY,EXIT} */
+#include <linux/linkage.h>   /* ARC_{ENTRY,EXIT} */
 #include <asm/entry.h>       /* SAVE_ALL_{INT1,INT2,TRAP...} */
 #include <asm/errno.h>
 #include <asm/arcregs.h>
 #include <asm/irqflags.h>
+#include <asm/mmu.h>
 
 ; A maximum number of supported interrupts in the core interrupt controller.
 ; This number is not equal to the maximum interrupt number (256) because
@@ -30,7 +31,7 @@ VECTOR	res_service		; Reset Vector
 VECTOR	mem_service		; Mem exception
 VECTOR	instr_service		; Instrn Error
 VECTOR	EV_MachineCheck		; Fatal Machine check
-VECTOR	EV_TLBMissI		; Intruction TLB miss
+VECTOR	EV_TLBMissI		; Instruction TLB miss
 VECTOR	EV_TLBMissD		; Data TLB miss
 VECTOR	EV_TLBProtV		; Protection Violation
 VECTOR	EV_PrivilegeV		; Privilege Violation
@@ -75,11 +76,11 @@ ENTRY(handle_interrupt)
 	# query in hard ISR path would return false (since .IE is set) which would
 	# trips genirq interrupt handling asserts.
 	#
-	# So do a "soft" disable of interrutps here.
+	# So do a "soft" disable of interrupts here.
 	#
 	# Note this disable is only for consistent book-keeping as further interrupts
 	# will be disabled anyways even w/o this. Hardware tracks active interrupts
-	# seperately in AUX_IRQ_ACT.active and will not take new interrupts
+	# separately in AUX_IRQ_ACT.active and will not take new interrupts
 	# unless this one returns (or higher prio becomes pending in 2-prio scheme)
 
 	IRQ_DISABLE
@@ -124,11 +125,6 @@ ENTRY(mem_service)
 
 	EXCEPTION_PROLOGUE
 
-	lr  r0, [efa]
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
 	bl  do_memory_error
 	b   ret_from_exception
 END(mem_service)
@@ -137,11 +133,6 @@ ENTRY(EV_Misaligned)
 
 	EXCEPTION_PROLOGUE
 
-	lr  r0, [efa]	; Faulting Data address
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
 	SAVE_CALLEE_SAVED_USER
 	mov r2, sp              ; callee_regs
 
@@ -162,11 +153,6 @@ ENTRY(EV_TLBProtV)
 
 	EXCEPTION_PROLOGUE
 
-	lr  r0, [efa]	; Faulting Data address
-	mov r1, sp	; pt_regs
-
-	FAKE_RET_FROM_EXCPN
-
 	mov blink, ret_from_exception
 	b   do_page_fault
 
diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
index 5cb0cd7e4eab..774c03cc1d1a 100644
--- a/arch/arc/kernel/entry-compact.S
+++ b/arch/arc/kernel/entry-compact.S
@@ -254,18 +254,7 @@ END(handle_interrupt_level1)
 
 ENTRY(EV_TLBProtV)
 
-	EXCEPTION_PROLOGUE
-
-	mov r2, r10	; ECR set into r10 already
-	lr  r0, [efa]	; Faulting Data address (not part of pt_regs saved above)
-
-	; Exception auto-disables further Intr/exceptions.
-	; Re-enable them by pretending to return from exception
-	; (so rest of handler executes in pure K mode)
-
-	FAKE_RET_FROM_EXCPN
-
-	mov   r1, sp	; Handle to pt_regs
+	EXCEPTION_PROLOGUE	; ECR returned in r10
 
 	;------ (5) Type of Protection Violation? ----------
 	;
@@ -273,8 +262,7 @@ ENTRY(EV_TLBProtV)
 	;   -Access Violation	: 00_23_(00|01|02|03)_00
 	;			         x  r  w  r+w
 	;   -Unaligned Access	: 00_23_04_00
-	;
-	bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
+	bbit1 r10, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
 
 	;========= (6a) Access Violation Processing ========
 	bl  do_page_fault
@@ -303,9 +291,6 @@ END(EV_TLBProtV)
 ENTRY(call_do_page_fault)
 
 	EXCEPTION_PROLOGUE
-	lr  r0, [efa]	; Faulting Data address
-	mov   r1, sp
-	FAKE_RET_FROM_EXCPN
 
 	mov blink, ret_from_exception
 	b  do_page_fault
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 72be01270e24..3c7e74aba679 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -29,12 +29,24 @@ ENTRY(sys_clone_wrapper)
 	DISCARD_CALLEE_SAVED_USER
 
 	GET_CURR_THR_INFO_FLAGS   r10
-	btst r10, TIF_SYSCALL_TRACE
-	bnz  tracesys_exit
+	and.f 0, r10, _TIF_SYSCALL_WORK
+	bnz   tracesys_exit
 
 	b .Lret_from_system_call
 END(sys_clone_wrapper)
 
+ENTRY(sys_clone3_wrapper)
+	SAVE_CALLEE_SAVED_USER
+	bl  @sys_clone3
+	DISCARD_CALLEE_SAVED_USER
+
+	GET_CURR_THR_INFO_FLAGS   r10
+	and.f 0, r10, _TIF_SYSCALL_WORK
+	bnz   tracesys_exit
+
+	b .Lret_from_system_call
+END(sys_clone3_wrapper)
+
 ENTRY(ret_from_fork)
 	; when the forked child comes here from the __switch_to function
 	; r0 has the last task pointer.
@@ -68,11 +80,6 @@ ENTRY(instr_service)
 
 	EXCEPTION_PROLOGUE
 
-	lr  r0, [efa]
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
 	bl  do_insterror_or_kprobe
 	b   ret_from_exception
 END(instr_service)
@@ -83,19 +90,15 @@ END(instr_service)
 
 ENTRY(EV_MachineCheck)
 
-	EXCEPTION_PROLOGUE
+	EXCEPTION_PROLOGUE_KEEP_AE	; ECR returned in r10
 
-	lr  r2, [ecr]
 	lr  r0, [efa]
 	mov r1, sp
 
-	; hardware auto-disables MMU, re-enable it to allow kernel vaddr
-	; access for say stack unwinding of modules for crash dumps
-	lr	r3, [ARC_REG_PID]
-	or	r3, r3, MMU_ENABLE
-	sr	r3, [ARC_REG_PID]
+	; MC exceptions disable MMU
+	ARC_MMU_REENABLE r3
 
-	lsr  	r3, r2, 8
+	lsr  	r3, r10, 8
 	bmsk 	r3, r3, 7
 	brne    r3, ECR_C_MCHK_DUP_TLB, 1f
 
@@ -120,11 +123,6 @@ ENTRY(EV_PrivilegeV)
 
 	EXCEPTION_PROLOGUE
 
-	lr  r0, [efa]
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
 	bl  do_privilege_fault
 	b   ret_from_exception
 END(EV_PrivilegeV)
@@ -136,11 +134,6 @@ ENTRY(EV_Extension)
 
 	EXCEPTION_PROLOGUE
 
-	lr  r0, [efa]
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
 	bl  do_extension_fault
 	b   ret_from_exception
 END(EV_Extension)
@@ -151,22 +144,20 @@ END(EV_Extension)
 ; syscall Tracing
 ; ---------------------------------------------
 tracesys:
-	; save EFA in case tracer wants the PC of traced task
-	; using ERET won't work since next-PC has already committed
-	lr  r12, [efa]
+	; safekeep EFA (r12) if syscall tracer wanted PC
+	; for traps, ERET is pre-commit so points to next-PC
 	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r11
 	st  r12, [r11, THREAD_FAULT_ADDR]	; thread.fault_address
 
-	; PRE Sys Call Ptrace hook
-	mov r0, sp			; pt_regs needed
-	bl  @syscall_trace_entry
+	; PRE syscall trace hook
+	mov r0, sp				; pt_regs
+	bl  @syscall_trace_enter
 
 	; Tracing code now returns the syscall num (orig or modif)
 	mov r8, r0
 
 	; Do the Sys Call as we normally would.
-	; Validate the Sys Call number
-	cmp     r8,  NR_syscalls
+	cmp     r8,  NR_syscalls - 1
 	mov.hi  r0, -ENOSYS
 	bhi     tracesys_exit
 
@@ -182,83 +173,74 @@ tracesys:
 	ld  r6, [sp, PT_r6]
 	ld  r7, [sp, PT_r7]
 	ld.as   r9, [sys_call_table, r8]
-	jl      [r9]        ; Entry into Sys Call Handler
+	jl      [r9]
 
 tracesys_exit:
-	st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
+	st  r0, [sp, PT_r0]
 
-	;POST Sys Call Ptrace Hook
+	; POST syscall trace hook
+	mov r0, sp		; pt_regs needed
 	bl  @syscall_trace_exit
-	b   ret_from_exception ; NOT ret_from_system_call at is saves r0 which
-	; we'd done before calling post hook above
+
+	; don't call ret_from_system_call as it saves r0, already done above
+	b   ret_from_exception
 
 ; ---------------------------------------------
 ; Breakpoint TRAP
 ; ---------------------------------------------
 trap_with_param:
+	mov r0, r12	; EFA in case ptracer/gdb wants stop_pc
+	mov r1, sp	; pt_regs
 
-	; stop_pc info by gdb needs this info
-	lr  r0, [efa]
-	mov r1, sp
-
-	; Now that we have read EFA, it is safe to do "fake" rtie
-	;   and get out of CPU exception mode
-	FAKE_RET_FROM_EXCPN
-
-	; Save callee regs in case gdb wants to have a look
-	; SP will grow up by size of CALLEE Reg-File
-	; NOTE: clobbers r12
+	; save callee regs in case tracer/gdb wants to peek
 	SAVE_CALLEE_SAVED_USER
 
-	; save location of saved Callee Regs @ thread_struct->pc
+	; safekeep ref to callee regs
 	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
 	st  sp, [r10, THREAD_CALLEE_REG]
 
-	; Call the trap handler
+	; call the non syscall trap handler
 	bl  do_non_swi_trap
 
-	; unwind stack to discard Callee saved Regs
+	; unwind stack to discard callee regs
 	DISCARD_CALLEE_SAVED_USER
 
 	b   ret_from_exception
 
 ; ---------------------------------------------
 ; syscall TRAP
-; ABI: (r0-r7) upto 8 args, (r8) syscall number
+; ABI: (r0-r7) up to 8 args, (r8) syscall number
 ; ---------------------------------------------
 
 ENTRY(EV_Trap)
 
-	EXCEPTION_PROLOGUE
+	EXCEPTION_PROLOGUE_KEEP_AE
+
+	lr  r12, [efa]
+
+	FAKE_RET_FROM_EXCPN
 
-	;============ TRAP 1   :breakpoints
-	; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR)
+	;============ TRAP N : breakpoints, kprobes etc
 	bmsk.f 0, r10, 7
 	bnz    trap_with_param
 
-	;============ TRAP  (no param): syscall top level
-
-	; First return from Exception to pure K mode (Exception/IRQs renabled)
-	FAKE_RET_FROM_EXCPN
+	;============ TRAP 0 (no param): syscall
 
-	; If syscall tracing ongoing, invoke pre-post-hooks
+	; syscall tracing ongoing, invoke pre-post-hooks around syscall
 	GET_CURR_THR_INFO_FLAGS   r10
-	btst r10, TIF_SYSCALL_TRACE
-	bnz tracesys  ; this never comes back
+	and.f 0, r10, _TIF_SYSCALL_WORK
+	bnz   tracesys  ; this never comes back
 
 	;============ Normal syscall case
 
-	; syscall num shd not exceed the total system calls avail
-	cmp     r8,  NR_syscalls
+	cmp     r8,  NR_syscalls - 1
 	mov.hi  r0, -ENOSYS
 	bhi     .Lret_from_system_call
 
-	; Offset into the syscall_table and call handler
 	ld.as   r9,[sys_call_table, r8]
-	jl      [r9]        ; Entry into Sys Call Handler
+	jl      [r9]
 
 .Lret_from_system_call:
-
 	st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
 
 	; fall through to ret_from_exception
@@ -301,7 +283,8 @@ resume_user_mode_begin:
 	mov r0, sp	; pt_regs for arg to do_signal()/do_notify_resume()
 
 	GET_CURR_THR_INFO_FLAGS   r9
-	bbit0  r9, TIF_SIGPENDING, .Lchk_notify_resume
+	and.f  0,  r9, _TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL
+	bz .Lchk_notify_resume
 
 	; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs
 	; in pt_reg since the "C" ABI (kernel code) will automatically
@@ -313,7 +296,7 @@ resume_user_mode_begin:
 	;      tracer might call PEEKUSR(CALLEE reg)
 	;
 	; NOTE: SP will grow up by size of CALLEE Reg-File
-	SAVE_CALLEE_SAVED_USER		; clobbers r12
+	SAVE_CALLEE_SAVED_USER
 
 	; save location of saved Callee Regs @ thread_struct->callee
 	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
@@ -337,11 +320,11 @@ resume_user_mode_begin:
 resume_kernel_mode:
 
 	; Disable Interrupts from this point on
-	; CONFIG_PREEMPT: This is a must for preempt_schedule_irq()
-	; !CONFIG_PREEMPT: To ensure restore_regs is intr safe
+	; CONFIG_PREEMPTION: This is a must for preempt_schedule_irq()
+	; !CONFIG_PREEMPTION: To ensure restore_regs is intr safe
 	IRQ_DISABLE	r9
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 
 	; Can't preempt if preemption disabled
 	GET_CURR_THR_INFO_FROM_SP   r10
diff --git a/arch/arc/kernel/fpu.c b/arch/arc/kernel/fpu.c
index 07e22b563fbb..ec640219d989 100644
--- a/arch/arc/kernel/fpu.c
+++ b/arch/arc/kernel/fpu.c
@@ -6,7 +6,9 @@
  */
 
 #include <linux/sched.h>
-#include <asm/switch_to.h>
+#include <asm/fpu.h>
+
+#ifdef CONFIG_ISA_ARCOMPACT
 
 /*
  * To save/restore FPU regs, simplest scheme would use LR/SR insns.
@@ -50,3 +52,31 @@ void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
 		: "r" (zero), "r" (*(readfrom + 3)), "r" (*(readfrom + 2))
 	);
 }
+
+#else
+
+void fpu_init_task(struct pt_regs *regs)
+{
+	const unsigned int fwe = 0x80000000;
+
+	/* default rounding mode */
+	write_aux_reg(ARC_REG_FPU_CTRL, 0x100);
+
+	/* Initialize to zero: setting requires FWE be set */
+	write_aux_reg(ARC_REG_FPU_STATUS, fwe);
+}
+
+void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
+{
+	struct arc_fpu *save = &prev->thread.fpu;
+	struct arc_fpu *restore = &next->thread.fpu;
+	const unsigned int fwe = 0x80000000;
+
+	save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL);
+	save->status = read_aux_reg(ARC_REG_FPU_STATUS);
+
+	write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl);
+	write_aux_reg(ARC_REG_FPU_STATUS, (fwe | restore->status));
+}
+
+#endif
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 6f41265f6250..8d541f53fae3 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -14,6 +14,7 @@
 #include <asm/entry.h>
 #include <asm/arcregs.h>
 #include <asm/cache.h>
+#include <asm/dsp-impl.h>
 #include <asm/irqflags.h>
 
 .macro CPU_EARLY_SETUP
@@ -58,7 +59,33 @@
 	bclr	r5, r5, STATUS_AD_BIT
 #endif
 	kflag	r5
-#endif
+
+#ifdef CONFIG_ARC_LPB_DISABLE
+	lr	r5, [ARC_REG_LPB_BUILD]
+	breq    r5, 0, 1f		; LPB doesn't exist
+	mov	r5, 1
+	sr	r5, [ARC_REG_LPB_CTRL]
+1:
+#endif /* CONFIG_ARC_LPB_DISABLE */
+
+	/* On HSDK, CCMs need to remapped super early */
+#ifdef CONFIG_ARC_SOC_HSDK
+	mov	r6, 0x60000000
+	lr	r5, [ARC_REG_ICCM_BUILD]
+	breq	r5, 0, 1f
+	sr	r6, [ARC_REG_AUX_ICCM]
+1:
+	lr	r5, [ARC_REG_DCCM_BUILD]
+	breq	r5, 0, 2f
+	sr	r6, [ARC_REG_AUX_DCCM]
+2:
+#endif	/* CONFIG_ARC_SOC_HSDK */
+
+#endif	/* CONFIG_ISA_ARCV2 */
+
+	; Config DSP_CTRL properly, so kernel may use integer multiply,
+	; multiply-accumulate, and divide operations
+	DSP_EARLY_INIT
 .endm
 
 	.section .init.text, "ax",@progbits
@@ -138,7 +165,7 @@ ENTRY(first_lines_of_secondary)
 	; setup stack (fp, sp)
 	mov	fp, 0
 
-	; set it's stack base to tsk->thread_info bottom
+	; set its stack base to tsk->thread_info bottom
 	GET_TSK_STACK_BASE r0, sp
 
 	j	start_kernel_secondary
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index 5cda19d0aa91..809edc59af25 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -56,7 +56,7 @@ void arc_init_IRQ(void)
 	WRITE_AUX(AUX_IRQ_CTRL, ictrl);
 
 	/*
-	 * ARCv2 core intc provides multiple interrupt priorities (upto 16).
+	 * ARCv2 core intc provides multiple interrupt priorities (up to 16).
 	 * Typical builds though have only two levels (0-high, 1-low)
 	 * Linux by default uses lower prio 1 for most irqs, reserving 0 for
 	 * NMI style interrupts in future (say perf)
@@ -108,7 +108,7 @@ static void arcv2_irq_unmask(struct irq_data *data)
 	write_aux_reg(AUX_IRQ_ENABLE, 1);
 }
 
-void arcv2_irq_enable(struct irq_data *data)
+static void arcv2_irq_enable(struct irq_data *data)
 {
 	/* set default priority */
 	write_aux_reg(AUX_IRQ_SELECT, data->hwirq);
@@ -170,7 +170,7 @@ init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
 	if (parent)
 		panic("DeviceTree incore intc not a root irq controller\n");
 
-	root_domain = irq_domain_add_linear(intc, nr_cpu_irqs, &arcv2_irq_ops, NULL);
+	root_domain = irq_domain_create_linear(of_fwnode_handle(intc), nr_cpu_irqs, &arcv2_irq_ops, NULL);
 	if (!root_domain)
 		panic("root irq domain not avail\n");
 
@@ -178,7 +178,7 @@ init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
 	 * Needed for primary domain lookup to succeed
 	 * This is a primary irqchip, and can never have a parent
 	 */
-	irq_set_default_host(root_domain);
+	irq_set_default_domain(root_domain);
 
 #ifdef CONFIG_SMP
 	irq_create_mapping(root_domain, IPI_IRQ);
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index a86641b91e65..1b159e9e0234 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -112,8 +112,9 @@ init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
 	if (parent)
 		panic("DeviceTree incore intc not a root irq controller\n");
 
-	root_domain = irq_domain_add_linear(intc, NR_CPU_IRQS,
-					    &arc_intc_domain_ops, NULL);
+	root_domain = irq_domain_create_linear(of_fwnode_handle(intc),
+					       NR_CPU_IRQS,
+					       &arc_intc_domain_ops, NULL);
 	if (!root_domain)
 		panic("root irq domain not avail\n");
 
@@ -121,7 +122,7 @@ init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
 	 * Needed for primary domain lookup to succeed
 	 * This is a primary irqchip, and can never have a parent
 	 */
-	irq_set_default_host(root_domain);
+	irq_set_default_domain(root_domain);
 
 	return 0;
 }
@@ -142,7 +143,7 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
  *    Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
  *    Here local_irq_enable( ) shd not re-enable lower priority interrupts
  * -If called from soft-ISR, it must re-enable all interrupts
- *    soft ISR are low prioity jobs which can be very slow, thus all IRQs
+ *    soft ISR are low priority jobs which can be very slow, thus all IRQs
  *    must be enabled while they run.
  *    Now hardware context wise we may still be in L2 ISR (not done rtie)
  *    still we must re-enable both L1 and L2 IRQs
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index ef909dd4b40c..dd09b58ff82d 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -6,6 +6,8 @@
 #include <linux/interrupt.h>
 #include <linux/irqchip.h>
 #include <asm/mach_desc.h>
+
+#include <asm/irq_regs.h>
 #include <asm/smp.h>
 
 /*
@@ -39,5 +41,11 @@ void __init init_IRQ(void)
  */
 void arch_do_IRQ(unsigned int hwirq, struct pt_regs *regs)
 {
-	handle_domain_irq(NULL, hwirq, regs);
+	struct pt_regs *old_regs;
+
+	irq_enter();
+	old_regs = set_irq_regs(regs);
+	generic_handle_domain_irq(NULL, hwirq);
+	set_irq_regs(old_regs);
+	irq_exit();
 }
diff --git a/arch/arc/kernel/jump_label.c b/arch/arc/kernel/jump_label.c
index b8600dc325b5..70b74a5d047b 100644
--- a/arch/arc/kernel/jump_label.c
+++ b/arch/arc/kernel/jump_label.c
@@ -96,19 +96,6 @@ void arch_jump_label_transform(struct jump_entry *entry,
 	flush_icache_range(entry->code, entry->code + JUMP_LABEL_NOP_SIZE);
 }
 
-void arch_jump_label_transform_static(struct jump_entry *entry,
-				      enum jump_label_type type)
-{
-	/*
-	 * We use only one NOP type (1x, 4 byte) in arch_static_branch, so
-	 * there's no need to patch an identical NOP over the top of it here.
-	 * The generic code calls 'arch_jump_label_transform' if the NOP needs
-	 * to be replaced by a branch, so 'arch_jump_label_transform_static' is
-	 * never called with type other than JUMP_LABEL_NOP.
-	 */
-	BUG_ON(type != JUMP_LABEL_NOP);
-}
-
 #ifdef CONFIG_ARC_DBG_JUMP_LABEL
 #define SELFTEST_MSG	"ARC: instruction generation self-test: "
 
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index ecfbc42d3a40..4f2b5951454f 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -140,6 +140,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
 		ptr = &remcomInBuffer[1];
 		if (kgdb_hex2long(&ptr, &addr))
 			regs->ret = addr;
+		fallthrough;
 
 	case 'D':
 	case 'k':
@@ -174,7 +175,7 @@ void kgdb_trap(struct pt_regs *regs)
 	 * with trap_s 4 (compiled) breakpoints, continuation needs to
 	 * start after the breakpoint.
 	 */
-	if (regs->ecr_param == 3)
+	if (regs->ecr.param == 3)
 		instruction_pointer(regs) -= BREAK_INSTR_SIZE;
 
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);
diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
index 7d3efe83cba7..f8e2960832d9 100644
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -190,7 +190,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs)
 	}
 }
 
-int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
+static int
+__kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
 {
 	struct kprobe *p;
 	struct kprobe_ctlblk *kcb;
@@ -241,8 +242,8 @@ int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
 	return 0;
 }
 
-static int __kprobes arc_post_kprobe_handler(unsigned long addr,
-					 struct pt_regs *regs)
+static int
+__kprobes arc_post_kprobe_handler(unsigned long addr, struct pt_regs *regs)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -317,22 +318,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned long trapnr)
 		 * caused the fault.
 		 */
 
-		/* We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(cur);
-
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it first.
-		 */
-		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-			return 1;
-
 		/*
 		 * In case the user-specified fault handler returned zero,
 		 * try to fix up.
@@ -379,8 +364,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 
 static void __used kretprobe_trampoline_holder(void)
 {
-	__asm__ __volatile__(".global kretprobe_trampoline\n"
-			     "kretprobe_trampoline:\n" "nop\n");
+	__asm__ __volatile__(".global __kretprobe_trampoline\n"
+			     "__kretprobe_trampoline:\n"
+			     "nop\n");
 }
 
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
@@ -388,66 +374,16 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 {
 
 	ri->ret_addr = (kprobe_opcode_t *) regs->blink;
+	ri->fp = NULL;
 
 	/* Replace the return addr with trampoline addr */
-	regs->blink = (unsigned long)&kretprobe_trampoline;
+	regs->blink = (unsigned long)&__kretprobe_trampoline;
 }
 
 static int __kprobes trampoline_probe_handler(struct kprobe *p,
 					      struct pt_regs *regs)
 {
-	struct kretprobe_instance *ri = NULL;
-	struct hlist_head *head, empty_rp;
-	struct hlist_node *tmp;
-	unsigned long flags, orig_ret_address = 0;
-	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
-
-	INIT_HLIST_HEAD(&empty_rp);
-	kretprobe_hash_lock(current, &head, &flags);
-
-	/*
-	 * It is possible to have multiple instances associated with a given
-	 * task either because an multiple functions in the call path
-	 * have a return probe installed on them, and/or more than one return
-	 * return probe was registered for a target function.
-	 *
-	 * We can handle this because:
-	 *     - instances are always inserted at the head of the list
-	 *     - when multiple return probes are registered for the same
-	 *       function, the first instance's ret_addr will point to the
-	 *       real return address, and all the rest will point to
-	 *       kretprobe_trampoline
-	 */
-	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
-		if (ri->task != current)
-			/* another task is sharing our hash bucket */
-			continue;
-
-		if (ri->rp && ri->rp->handler)
-			ri->rp->handler(ri, regs);
-
-		orig_ret_address = (unsigned long)ri->ret_addr;
-		recycle_rp_inst(ri, &empty_rp);
-
-		if (orig_ret_address != trampoline_address) {
-			/*
-			 * This is the real return address. Any other
-			 * instances associated with this task are for
-			 * other calls deeper on the call stack
-			 */
-			break;
-		}
-	}
-
-	kretprobe_assert(ri, orig_ret_address, trampoline_address);
-	regs->ret = orig_ret_address;
-
-	kretprobe_hash_unlock(current, &flags);
-
-	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
-		hlist_del(&ri->hlist);
-		kfree(ri);
-	}
+	regs->ret = __kretprobe_trampoline_handler(regs, NULL);
 
 	/* By returning a non zero value, we are telling the kprobe handler
 	 * that we don't want the post_handler to run
@@ -456,7 +392,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 }
 
 static struct kprobe trampoline_p = {
-	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.addr = (kprobe_opcode_t *) &__kretprobe_trampoline,
 	.pre_handler = trampoline_probe_handler
 };
 
@@ -468,7 +404,7 @@ int __init arch_init_kprobes(void)
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
 {
-	if (p->addr == (kprobe_opcode_t *) &kretprobe_trampoline)
+	if (p->addr == (kprobe_opcode_t *) &__kretprobe_trampoline)
 		return 1;
 
 	return 0;
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index abf9398cc333..02b28a9324f4 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -165,8 +165,6 @@ static void mcip_probe_n_setup(void)
 		IS_AVAIL1(mp.idu, "IDU "),
 		IS_AVAIL1(mp.dbg, "DEBUG "),
 		IS_AVAIL1(mp.gfrc, "GFRC"));
-
-	cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
 }
 
 struct plat_smp_ops plat_smp_ops = {
@@ -352,15 +350,13 @@ static void idu_cascade_isr(struct irq_desc *desc)
 	irq_hw_number_t idu_hwirq = core_hwirq - FIRST_EXT_IRQ;
 
 	chained_irq_enter(core_chip, desc);
-	generic_handle_irq(irq_find_mapping(idu_domain, idu_hwirq));
+	generic_handle_domain_irq(idu_domain, idu_hwirq);
 	chained_irq_exit(core_chip, desc);
 }
 
 static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
 {
 	irq_set_chip_and_handler(virq, &idu_irq_chip, handle_level_irq);
-	irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
-
 	return 0;
 }
 
@@ -395,7 +391,8 @@ idu_of_init(struct device_node *intc, struct device_node *parent)
 
 	pr_info("MCIP: IDU supports %u common irqs\n", nr_irqs);
 
-	domain = irq_domain_add_linear(intc, nr_irqs, &idu_irq_ops, NULL);
+	domain = irq_domain_create_linear(of_fwnode_handle(intc), nr_irqs,
+					  &idu_irq_ops, NULL);
 
 	/* Parent interrupts (core-intc) are already mapped */
 
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 661fd842ea97..ed6d4f0cd621 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -17,6 +17,168 @@
 /* HW holds 8 symbols + one for null terminator */
 #define ARCPMU_EVENT_NAME_LEN	9
 
+/*
+ * Some ARC pct quirks:
+ *
+ * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+ * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+ *	The ARC 700 can either measure stalls per pipeline stage, or all stalls
+ *	combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
+ *	and all pipeline flushes (e.g. caused by mispredicts, etc.) to
+ *	STALLED_CYCLES_FRONTEND.
+ *
+ *	We could start multiple performance counters and combine everything
+ *	afterwards, but that makes it complicated.
+ *
+ *	Note that I$ cache misses aren't counted by either of the two!
+ */
+
+/*
+ * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
+ * (based on a specific RTL build)
+ * Below is the static map between perf generic/arc specific event_id and
+ * h/w condition names.
+ * At the time of probe, we loop thru each index and find its name to
+ * complete the mapping of perf event_id to h/w index as latter is needed
+ * to program the counter really
+ */
+static const char * const arc_pmu_ev_hw_map[] = {
+	/* count cycles */
+	[PERF_COUNT_HW_CPU_CYCLES] = "crun",
+	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
+	[PERF_COUNT_HW_BUS_CYCLES] = "crun",
+
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
+
+	/* counts condition */
+	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
+	/* All jump instructions that are taken */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
+#ifdef CONFIG_ISA_ARCV2
+	[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
+#else
+	[PERF_COUNT_ARC_BPOK]         = "bpok",	  /* NP-NT, PT-T, PNT-NT */
+	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
+#endif
+	[PERF_COUNT_ARC_LDC] = "imemrdc",	/* Instr: mem read cached */
+	[PERF_COUNT_ARC_STC] = "imemwrc",	/* Instr: mem write cached */
+
+	[PERF_COUNT_ARC_DCLM] = "dclm",		/* D-cache Load Miss */
+	[PERF_COUNT_ARC_DCSM] = "dcsm",		/* D-cache Store Miss */
+	[PERF_COUNT_ARC_ICM] = "icm",		/* I-cache Miss */
+	[PERF_COUNT_ARC_EDTLB] = "edtlb",	/* D-TLB Miss */
+	[PERF_COUNT_ARC_EITLB] = "eitlb",	/* I-TLB Miss */
+
+	[PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc",	/* Instr: mem read cached */
+	[PERF_COUNT_HW_CACHE_MISSES] = "dclm",		/* D-cache Load Miss */
+};
+
+#define C(_x)			PERF_COUNT_HW_CACHE_##_x
+#define CACHE_OP_UNSUPPORTED	0xffff
+
+static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_STC,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= PERF_COUNT_HW_INSTRUCTIONS,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB,
+		},
+			/* DTLB LD/ST Miss not segregated by h/w*/
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EITLB,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+			[C(RESULT_MISS)]	= PERF_COUNT_HW_BRANCH_MISSES,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
 enum arc_pmu_attr_groups {
 	ARCPMU_ATTR_GR_EVENTS,
 	ARCPMU_ATTR_GR_FORMATS,
@@ -328,7 +490,7 @@ static void arc_pmu_stop(struct perf_event *event, int flags)
 	}
 
 	if (!(event->hw.state & PERF_HES_STOPPED)) {
-		/* stop ARC pmu here */
+		/* stop hw counter here */
 		write_aux_reg(ARC_REG_PCT_INDEX, idx);
 
 		/* condition code #0 is always "never" */
@@ -361,7 +523,7 @@ static int arc_pmu_add(struct perf_event *event, int flags)
 {
 	struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
 	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
+	int idx;
 
 	idx = ffz(pmu_cpu->used_mask[0]);
 	if (idx == arc_pmu->n_counters)
@@ -437,10 +599,8 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
 
 		arc_perf_event_update(event, &event->hw, event->hw.idx);
 		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (arc_pmu_event_set_period(event)) {
-			if (perf_event_overflow(event, &data, regs))
-				arc_pmu_stop(event, 0);
-		}
+		if (arc_pmu_event_set_period(event))
+			perf_event_overflow(event, &data, regs);
 
 		active_ints &= ~BIT(idx);
 	} while (active_ints);
@@ -562,7 +722,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 {
 	struct arc_reg_pct_build pct_bcr;
 	struct arc_reg_cc_build cc_bcr;
-	int i, has_interrupts;
+	int i, has_interrupts, irq = -1;
 	int counter_size;	/* in bits */
 
 	union cc_name {
@@ -638,22 +798,25 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 	};
 
 	if (has_interrupts) {
-		int irq = platform_get_irq(pdev, 0);
+		irq = platform_get_irq(pdev, 0);
+		if (irq >= 0) {
+			int ret;
 
-		if (irq < 0) {
-			pr_err("Cannot get IRQ number for the platform\n");
-			return -ENODEV;
-		}
+			arc_pmu->irq = irq;
 
-		arc_pmu->irq = irq;
+			/* intc map function ensures irq_set_percpu_devid() called */
+			ret = request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters",
+						 this_cpu_ptr(&arc_pmu_cpu));
 
-		/* intc map function ensures irq_set_percpu_devid() called */
-		request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters",
-				   this_cpu_ptr(&arc_pmu_cpu));
+			if (!ret)
+				on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1);
+			else
+				irq = -1;
+		}
 
-		on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1);
+	}
 
-	} else
+	if (irq == -1)
 		arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 
 	/*
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index e1889ce3faf9..186ceab661eb 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -20,6 +20,8 @@
 #include <linux/elf.h>
 #include <linux/tick.h>
 
+#include <asm/fpu.h>
+
 SYSCALL_DEFINE1(arc_settls, void *, user_tls_data_ptr)
 {
 	task_thread_info(current)->thr_ptr = (unsigned int)user_tls_data_ptr;
@@ -41,21 +43,21 @@ SYSCALL_DEFINE0(arc_gettls)
 	return task_thread_info(current)->thr_ptr;
 }
 
-SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
+SYSCALL_DEFINE3(arc_usr_cmpxchg, int __user *, uaddr, int, expected, int, new)
 {
 	struct pt_regs *regs = current_pt_regs();
 	u32 uval;
 	int ret;
 
 	/*
-	 * This is only for old cores lacking LLOCK/SCOND, which by defintion
+	 * This is only for old cores lacking LLOCK/SCOND, which by definition
 	 * can't possibly be SMP. Thus doesn't need to be SMP safe.
 	 * And this also helps reduce the overhead for serializing in
 	 * the UP case
 	 */
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP));
 
-	/* Z indicates to userspace if operation succeded */
+	/* Z indicates to userspace if operation succeeded */
 	regs->status32 &= ~STATUS_Z_MASK;
 
 	ret = access_ok(uaddr, sizeof(*uaddr));
@@ -88,10 +90,10 @@ fault:
 	if (unlikely(ret != -EFAULT))
 		 goto fail;
 
-	down_read(&current->mm->mmap_sem);
-	ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr,
+	mmap_read_lock(current->mm);
+	ret = fixup_user_fault(current->mm, (unsigned long) uaddr,
 			       FAULT_FLAG_WRITE, NULL);
-	up_read(&current->mm->mmap_sem);
+	mmap_read_unlock(current->mm);
 
 	if (likely(!ret))
 		 goto again;
@@ -105,32 +107,24 @@ fail:
 
 void arch_cpu_idle(void)
 {
-	/* Re-enable interrupts <= default irq priority before commiting SLEEP */
+	/* Re-enable interrupts <= default irq priority before committing SLEEP */
 	const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO;
 
 	__asm__ __volatile__(
 		"sleep %0	\n"
 		:
 		:"I"(arg)); /* can't be "r" has to be embedded const */
-}
 
-#elif defined(CONFIG_EZNPS_MTM_EXT)	/* ARC700 variant in NPS */
-
-void arch_cpu_idle(void)
-{
-	/* only the calling HW thread needs to sleep */
-	__asm__ __volatile__(
-		".word %0	\n"
-		:
-		:"i"(CTOP_INST_HWSCHD_WFT_IE12));
+	raw_local_irq_disable();
 }
 
 #else	/* ARC700 */
 
 void arch_cpu_idle(void)
 {
-	/* sleep, but enable both set E1/E2 (levels of interrutps) before committing */
+	/* sleep, but enable both set E1/E2 (levels of interrupts) before committing */
 	__asm__ __volatile__("sleep 0x3	\n");
+	raw_local_irq_disable();
 }
 
 #endif
@@ -147,7 +141,7 @@ asmlinkage void ret_from_fork(void);
  * |    unused      |
  * |                |
  * ------------------
- * |     r25        |   <==== top of Stack (thread.ksp)
+ * |     r25        |   <==== top of Stack (thread_info.ksp)
  * ~                ~
  * |    --to--      |   (CALLEE Regs of kernel mode)
  * |     r13        |
@@ -168,13 +162,13 @@ asmlinkage void ret_from_fork(void);
  * |      SP        |
  * |    orig_r0     |
  * |    event/ECR   |
- * |    user_r25    |
  * ------------------  <===== END of PAGE
  */
-int copy_thread(unsigned long clone_flags,
-		unsigned long usp, unsigned long kthread_arg,
-		struct task_struct *p)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 {
+	unsigned long clone_flags = args->flags;
+	unsigned long usp = args->stack;
+	unsigned long tls = args->tls;
 	struct pt_regs *c_regs;        /* child's pt_regs */
 	unsigned long *childksp;       /* to unwind out of __switch_to() */
 	struct callee_regs *c_callee;  /* child's callee regs */
@@ -187,24 +181,24 @@ int copy_thread(unsigned long clone_flags,
 	c_callee = ((struct callee_regs *)childksp) - 1;
 
 	/*
-	 * __switch_to() uses thread.ksp to start unwinding stack
+	 * __switch_to() uses thread_info.ksp to start unwinding stack
 	 * For kernel threads we don't need to create callee regs, the
 	 * stack layout nevertheless needs to remain the same.
 	 * Also, since __switch_to anyways unwinds callee regs, we use
 	 * this to populate kernel thread entry-pt/args into callee regs,
 	 * so that ret_from_kernel_thread() becomes simpler.
 	 */
-	p->thread.ksp = (unsigned long)c_callee;	/* THREAD_KSP */
+	task_thread_info(p)->ksp = (unsigned long)c_callee;	/* THREAD_INFO_KSP */
 
 	/* __switch_to expects FP(0), BLINK(return addr) at top */
 	childksp[0] = 0;			/* fp */
 	childksp[1] = (unsigned long)ret_from_fork; /* blink */
 
-	if (unlikely(p->flags & PF_KTHREAD)) {
+	if (unlikely(args->fn)) {
 		memset(c_regs, 0, sizeof(struct pt_regs));
 
-		c_callee->r13 = kthread_arg;
-		c_callee->r14 = usp;  /* function */
+		c_callee->r13 = (unsigned long)args->fn_arg;
+		c_callee->r14 = (unsigned long)args->fn;
 
 		return 0;
 	}
@@ -231,7 +225,7 @@ int copy_thread(unsigned long clone_flags,
 		 * set task's userland tls data ptr from 4th arg
 		 * clone C-lib call is difft from clone sys-call
 		 */
-		task_thread_info(p)->thr_ptr = regs->r3;
+		task_thread_info(p)->thr_ptr = tls;
 	} else {
 		/* Normal fork case: set parent's TLS ptr in child */
 		task_thread_info(p)->thr_ptr =
@@ -248,23 +242,13 @@ int copy_thread(unsigned long clone_flags,
 	 */
 	c_callee->r25 = task_thread_info(p)->thr_ptr;
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	/*
-	 * setup usermode thread pointer #2:
-	 * however for this special use of r25 in kernel, __switch_to() sets
-	 * r25 for kernel needs and only in the final return path is usermode
-	 * r25 setup, from pt_regs->user_r25. So set that up as well
-	 */
-	c_regs->user_r25 = c_callee->r25;
-#endif
-
 	return 0;
 }
 
 /*
  * Do necessary setup to start up a new user task
  */
-void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
+void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
 {
 	regs->sp = usp;
 	regs->ret = pc;
@@ -276,9 +260,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
 	 */
 	regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS;
 
-#ifdef CONFIG_EZNPS_MTM_EXT
-	regs->eflags = 0;
-#endif
+	fpu_init_task(regs);
 
 	/* bogus seed values for debugging */
 	regs->lp_start = 0x10;
@@ -292,11 +274,6 @@ void flush_thread(void)
 {
 }
 
-int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
-{
-	return 0;
-}
-
 int elf_check_arch(const struct elf32_hdr *x)
 {
 	unsigned int eflags;
@@ -310,7 +287,7 @@ int elf_check_arch(const struct elf32_hdr *x)
 	eflags = x->e_flags;
 	if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) {
 		pr_err("ABI mismatch - you need newer toolchain\n");
-		force_sigsegv(SIGSEGV);
+		force_fatal_sig(SIGSEGV);
 		return 0;
 	}
 
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
index d5f3fcf273b5..e0c233c178b1 100644
--- a/arch/arc/kernel/ptrace.c
+++ b/arch/arc/kernel/ptrace.c
@@ -4,12 +4,95 @@
  */
 
 #include <linux/ptrace.h>
-#include <linux/tracehook.h>
 #include <linux/sched/task_stack.h>
 #include <linux/regset.h>
 #include <linux/unistd.h>
 #include <linux/elf.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+struct pt_regs_offset {
+	const char *name;
+	int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+#ifdef CONFIG_ISA_ARCOMPACT
+static const struct pt_regs_offset regoffset_table[] = {
+	REG_OFFSET_NAME(bta),
+	REG_OFFSET_NAME(lp_start),
+	REG_OFFSET_NAME(lp_end),
+	REG_OFFSET_NAME(lp_count),
+	REG_OFFSET_NAME(status32),
+	REG_OFFSET_NAME(ret),
+	REG_OFFSET_NAME(blink),
+	REG_OFFSET_NAME(fp),
+	REG_OFFSET_NAME(r26),
+	REG_OFFSET_NAME(r12),
+	REG_OFFSET_NAME(r11),
+	REG_OFFSET_NAME(r10),
+	REG_OFFSET_NAME(r9),
+	REG_OFFSET_NAME(r8),
+	REG_OFFSET_NAME(r7),
+	REG_OFFSET_NAME(r6),
+	REG_OFFSET_NAME(r5),
+	REG_OFFSET_NAME(r4),
+	REG_OFFSET_NAME(r3),
+	REG_OFFSET_NAME(r2),
+	REG_OFFSET_NAME(r1),
+	REG_OFFSET_NAME(r0),
+	REG_OFFSET_NAME(sp),
+	REG_OFFSET_NAME(orig_r0),
+	REG_OFFSET_NAME(ecr),
+	REG_OFFSET_END,
+};
+
+#else
+
+static const struct pt_regs_offset regoffset_table[] = {
+	REG_OFFSET_NAME(orig_r0),
+	REG_OFFSET_NAME(ecr),
+	REG_OFFSET_NAME(bta),
+	REG_OFFSET_NAME(r26),
+	REG_OFFSET_NAME(fp),
+	REG_OFFSET_NAME(sp),
+	REG_OFFSET_NAME(r12),
+	REG_OFFSET_NAME(r30),
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	REG_OFFSET_NAME(r58),
+	REG_OFFSET_NAME(r59),
+#endif
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+	REG_OFFSET_NAME(DSP_CTRL),
+#endif
+	REG_OFFSET_NAME(r0),
+	REG_OFFSET_NAME(r1),
+	REG_OFFSET_NAME(r2),
+	REG_OFFSET_NAME(r3),
+	REG_OFFSET_NAME(r4),
+	REG_OFFSET_NAME(r5),
+	REG_OFFSET_NAME(r6),
+	REG_OFFSET_NAME(r7),
+	REG_OFFSET_NAME(r8),
+	REG_OFFSET_NAME(r9),
+	REG_OFFSET_NAME(r10),
+	REG_OFFSET_NAME(r11),
+	REG_OFFSET_NAME(blink),
+	REG_OFFSET_NAME(lp_end),
+	REG_OFFSET_NAME(lp_start),
+	REG_OFFSET_NAME(lp_count),
+	REG_OFFSET_NAME(ei),
+	REG_OFFSET_NAME(ldi),
+	REG_OFFSET_NAME(jli),
+	REG_OFFSET_NAME(ret),
+	REG_OFFSET_NAME(status32),
+	REG_OFFSET_END,
+};
+#endif
+
 static struct callee_regs *task_callee_regs(struct task_struct *tsk)
 {
 	struct callee_regs *tmp = (struct callee_regs *)tsk->thread.callee_reg;
@@ -18,88 +101,61 @@ static struct callee_regs *task_callee_regs(struct task_struct *tsk)
 
 static int genregs_get(struct task_struct *target,
 		       const struct user_regset *regset,
-		       unsigned int pos, unsigned int count,
-		       void *kbuf, void __user *ubuf)
+		       struct membuf to)
 {
 	const struct pt_regs *ptregs = task_pt_regs(target);
 	const struct callee_regs *cregs = task_callee_regs(target);
-	int ret = 0;
 	unsigned int stop_pc_val;
 
-#define REG_O_CHUNK(START, END, PTR)	\
-	if (!ret)	\
-		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, PTR, \
-			offsetof(struct user_regs_struct, START), \
-			offsetof(struct user_regs_struct, END));
-
-#define REG_O_ONE(LOC, PTR)	\
-	if (!ret)		\
-		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, PTR, \
-			offsetof(struct user_regs_struct, LOC), \
-			offsetof(struct user_regs_struct, LOC) + 4);
-
-#define REG_O_ZERO(LOC)		\
-	if (!ret)		\
-		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, \
-			offsetof(struct user_regs_struct, LOC), \
-			offsetof(struct user_regs_struct, LOC) + 4);
-
-	REG_O_ZERO(pad);
-	REG_O_ONE(scratch.bta, &ptregs->bta);
-	REG_O_ONE(scratch.lp_start, &ptregs->lp_start);
-	REG_O_ONE(scratch.lp_end, &ptregs->lp_end);
-	REG_O_ONE(scratch.lp_count, &ptregs->lp_count);
-	REG_O_ONE(scratch.status32, &ptregs->status32);
-	REG_O_ONE(scratch.ret, &ptregs->ret);
-	REG_O_ONE(scratch.blink, &ptregs->blink);
-	REG_O_ONE(scratch.fp, &ptregs->fp);
-	REG_O_ONE(scratch.gp, &ptregs->r26);
-	REG_O_ONE(scratch.r12, &ptregs->r12);
-	REG_O_ONE(scratch.r11, &ptregs->r11);
-	REG_O_ONE(scratch.r10, &ptregs->r10);
-	REG_O_ONE(scratch.r9, &ptregs->r9);
-	REG_O_ONE(scratch.r8, &ptregs->r8);
-	REG_O_ONE(scratch.r7, &ptregs->r7);
-	REG_O_ONE(scratch.r6, &ptregs->r6);
-	REG_O_ONE(scratch.r5, &ptregs->r5);
-	REG_O_ONE(scratch.r4, &ptregs->r4);
-	REG_O_ONE(scratch.r3, &ptregs->r3);
-	REG_O_ONE(scratch.r2, &ptregs->r2);
-	REG_O_ONE(scratch.r1, &ptregs->r1);
-	REG_O_ONE(scratch.r0, &ptregs->r0);
-	REG_O_ONE(scratch.sp, &ptregs->sp);
-
-	REG_O_ZERO(pad2);
-
-	REG_O_ONE(callee.r25, &cregs->r25);
-	REG_O_ONE(callee.r24, &cregs->r24);
-	REG_O_ONE(callee.r23, &cregs->r23);
-	REG_O_ONE(callee.r22, &cregs->r22);
-	REG_O_ONE(callee.r21, &cregs->r21);
-	REG_O_ONE(callee.r20, &cregs->r20);
-	REG_O_ONE(callee.r19, &cregs->r19);
-	REG_O_ONE(callee.r18, &cregs->r18);
-	REG_O_ONE(callee.r17, &cregs->r17);
-	REG_O_ONE(callee.r16, &cregs->r16);
-	REG_O_ONE(callee.r15, &cregs->r15);
-	REG_O_ONE(callee.r14, &cregs->r14);
-	REG_O_ONE(callee.r13, &cregs->r13);
-
-	REG_O_ONE(efa, &target->thread.fault_address);
-
-	if (!ret) {
-		if (in_brkpt_trap(ptregs)) {
-			stop_pc_val = target->thread.fault_address;
-			pr_debug("\t\tstop_pc (brk-pt)\n");
-		} else {
-			stop_pc_val = ptregs->ret;
-			pr_debug("\t\tstop_pc (others)\n");
-		}
-
-		REG_O_ONE(stop_pc, &stop_pc_val);
+	membuf_zero(&to, 4);	// pad
+	membuf_store(&to, ptregs->bta);
+	membuf_store(&to, ptregs->lp_start);
+	membuf_store(&to, ptregs->lp_end);
+	membuf_store(&to, ptregs->lp_count);
+	membuf_store(&to, ptregs->status32);
+	membuf_store(&to, ptregs->ret);
+	membuf_store(&to, ptregs->blink);
+	membuf_store(&to, ptregs->fp);
+	membuf_store(&to, ptregs->r26);	// gp
+	membuf_store(&to, ptregs->r12);
+	membuf_store(&to, ptregs->r11);
+	membuf_store(&to, ptregs->r10);
+	membuf_store(&to, ptregs->r9);
+	membuf_store(&to, ptregs->r8);
+	membuf_store(&to, ptregs->r7);
+	membuf_store(&to, ptregs->r6);
+	membuf_store(&to, ptregs->r5);
+	membuf_store(&to, ptregs->r4);
+	membuf_store(&to, ptregs->r3);
+	membuf_store(&to, ptregs->r2);
+	membuf_store(&to, ptregs->r1);
+	membuf_store(&to, ptregs->r0);
+	membuf_store(&to, ptregs->sp);
+	membuf_zero(&to, 4);	// pad2
+	membuf_store(&to, cregs->r25);
+	membuf_store(&to, cregs->r24);
+	membuf_store(&to, cregs->r23);
+	membuf_store(&to, cregs->r22);
+	membuf_store(&to, cregs->r21);
+	membuf_store(&to, cregs->r20);
+	membuf_store(&to, cregs->r19);
+	membuf_store(&to, cregs->r18);
+	membuf_store(&to, cregs->r17);
+	membuf_store(&to, cregs->r16);
+	membuf_store(&to, cregs->r15);
+	membuf_store(&to, cregs->r14);
+	membuf_store(&to, cregs->r13);
+	membuf_store(&to, target->thread.fault_address); // efa
+
+	if (in_brkpt_trap(ptregs)) {
+		stop_pc_val = target->thread.fault_address;
+		pr_debug("\t\tstop_pc (brk-pt)\n");
+	} else {
+		stop_pc_val = ptregs->ret;
+		pr_debug("\t\tstop_pc (others)\n");
 	}
 
-	return ret;
+	return membuf_store(&to, stop_pc_val); // stop_pc
 }
 
 static int genregs_set(struct task_struct *target,
@@ -127,7 +183,7 @@ static int genregs_set(struct task_struct *target,
 
 #define REG_IGNORE_ONE(LOC)		\
 	if (!ret)			\
-		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, \
+		user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, \
 			offsetof(struct user_regs_struct, LOC), \
 			offsetof(struct user_regs_struct, LOC) + 4);
 
@@ -184,25 +240,20 @@ static int genregs_set(struct task_struct *target,
 #ifdef CONFIG_ISA_ARCV2
 static int arcv2regs_get(struct task_struct *target,
 		       const struct user_regset *regset,
-		       unsigned int pos, unsigned int count,
-		       void *kbuf, void __user *ubuf)
+		       struct membuf to)
 {
 	const struct pt_regs *regs = task_pt_regs(target);
-	int ret, copy_sz;
 
 	if (IS_ENABLED(CONFIG_ARC_HAS_ACCL_REGS))
-		copy_sz = sizeof(struct user_regs_arcv2);
-	else
-		copy_sz = 4;	/* r30 only */
+		/*
+		 * itemized copy not needed like above as layout of regs (r30,r58,r59)
+		 * is exactly same in kernel (pt_regs) and userspace (user_regs_arcv2)
+		 */
+		return membuf_write(&to, &regs->r30, sizeof(struct user_regs_arcv2));
 
-	/*
-	 * itemized copy not needed like above as layout of regs (r30,r58,r59)
-	 * is exactly same in kernel (pt_regs) and userspace (user_regs_arcv2)
-	 */
-	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &regs->r30,
-				  0, copy_sz);
 
-	return ret;
+	membuf_write(&to, &regs->r30, 4); /* r30 only */
+	return membuf_zero(&to, sizeof(struct user_regs_arcv2) - 4);
 }
 
 static int arcv2regs_set(struct task_struct *target,
@@ -237,7 +288,7 @@ static const struct user_regset arc_regsets[] = {
 	       .n = ELF_NGREG,
 	       .size = sizeof(unsigned long),
 	       .align = sizeof(unsigned long),
-	       .get = genregs_get,
+	       .regset_get = genregs_get,
 	       .set = genregs_set,
 	},
 #ifdef CONFIG_ISA_ARCV2
@@ -246,14 +297,14 @@ static const struct user_regset arc_regsets[] = {
 	       .n = ELF_ARCV2REG,
 	       .size = sizeof(unsigned long),
 	       .align = sizeof(unsigned long),
-	       .get = arcv2regs_get,
+	       .regset_get = arcv2regs_get,
 	       .set = arcv2regs_set,
 	},
 #endif
 };
 
 static const struct user_regset_view user_arc_view = {
-	.name		= UTS_MACHINE,
+	.name		= "arc",
 	.e_machine	= EM_ARC_INUSE,
 	.regsets	= arc_regsets,
 	.n		= ARRAY_SIZE(arc_regsets)
@@ -288,15 +339,63 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ret;
 }
 
-asmlinkage int syscall_trace_entry(struct pt_regs *regs)
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 {
-	if (tracehook_report_syscall_entry(regs))
-		return ULONG_MAX;
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		if (ptrace_report_syscall_entry(regs))
+			return ULONG_MAX;
+
+#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_enter(regs, syscall_get_nr(current, regs));
+#endif
 
 	return regs->r8;
 }
 
 asmlinkage void syscall_trace_exit(struct pt_regs *regs)
 {
-	tracehook_report_syscall_exit(regs, 0);
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		ptrace_report_syscall_exit(regs, 0);
+
+#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_exit(regs, regs_return_value(regs));
+#endif
+}
+
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_offset *roff;
+
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return roff->offset;
+	return -EINVAL;
+}
+
+const char *regs_query_register_name(unsigned int offset)
+{
+	const struct pt_regs_offset *roff;
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (roff->offset == offset)
+			return roff->name;
+	return NULL;
+}
+
+bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+	return (addr & ~(THREAD_SIZE - 1))  ==
+		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1));
+}
+
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+	unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+
+	addr += n;
+	if (regs_within_kernel_stack(regs, (unsigned long)addr))
+		return *addr;
+	else
+		return 0;
 }
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 7ee89dc61f6e..7b6a9beba9db 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -8,17 +8,19 @@
 #include <linux/delay.h>
 #include <linux/root_dev.h>
 #include <linux/clk.h>
-#include <linux/clk-provider.h>
 #include <linux/clocksource.h>
 #include <linux/console.h>
 #include <linux/module.h>
+#include <linux/sizes.h>
 #include <linux/cpu.h>
+#include <linux/of_clk.h>
 #include <linux/of_fdt.h>
 #include <linux/of.h>
 #include <linux/cache.h>
 #include <uapi/linux/mount.h>
 #include <asm/sections.h>
 #include <asm/arcregs.h>
+#include <asm/asserts.h>
 #include <asm/tlb.h>
 #include <asm/setup.h>
 #include <asm/page.h>
@@ -26,6 +28,8 @@
 #include <asm/unwind.h>
 #include <asm/mach_desc.h>
 #include <asm/smp.h>
+#include <asm/dsp-impl.h>
+#include <soc/arc/mcip.h>
 
 #define FIX_PTR(x)  __asm__ __volatile__(";" : "+r"(x))
 
@@ -40,408 +44,365 @@ const struct machine_desc *machine_desc;
 
 struct task_struct *_current_task[NR_CPUS];	/* For stack switching */
 
-struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
+struct cpuinfo_arc {
+	int arcver;
+	unsigned int t0:1, t1:1;
+	struct {
+		unsigned long base;
+		unsigned int sz;
+	} iccm, dccm;
+};
+
+#ifdef CONFIG_ISA_ARCV2
 
-static const struct id_to_str arc_legacy_rel[] = {
+static const struct id_to_str arc_hs_rel[] = {
 	/* ID.ARCVER,	Release */
-#ifdef CONFIG_ISA_ARCOMPACT
-	{ 0x34, 	"R4.10"},
-	{ 0x35, 	"R4.11"},
-#else
 	{ 0x51, 	"R2.0" },
 	{ 0x52, 	"R2.1" },
 	{ 0x53,		"R3.0" },
-#endif
-	{ 0x00,		NULL   }
 };
 
-static const struct id_to_str arc_cpu_rel[] = {
+static const struct id_to_str arc_hs_ver54_rel[] = {
 	/* UARCH.MAJOR,	Release */
 	{  0,		"R3.10a"},
 	{  1,		"R3.50a"},
+	{  2,		"R3.60a"},
+	{  3,		"R4.00a"},
 	{  0xFF,	NULL   }
 };
+#endif
 
-static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
+static int
+arcompact_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
 {
-	if (is_isa_arcompact()) {
-		struct bcr_iccm_arcompact iccm;
-		struct bcr_dccm_arcompact dccm;
-
-		READ_BCR(ARC_REG_ICCM_BUILD, iccm);
-		if (iccm.ver) {
-			cpu->iccm.sz = 4096 << iccm.sz;	/* 8K to 512K */
-			cpu->iccm.base_addr = iccm.base << 16;
-		}
-
-		READ_BCR(ARC_REG_DCCM_BUILD, dccm);
-		if (dccm.ver) {
-			unsigned long base;
-			cpu->dccm.sz = 2048 << dccm.sz;	/* 2K to 256K */
-
-			base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
-			cpu->dccm.base_addr = base & ~0xF;
-		}
-	} else {
-		struct bcr_iccm_arcv2 iccm;
-		struct bcr_dccm_arcv2 dccm;
-		unsigned long region;
-
-		READ_BCR(ARC_REG_ICCM_BUILD, iccm);
-		if (iccm.ver) {
-			cpu->iccm.sz = 256 << iccm.sz00;	/* 512B to 16M */
-			if (iccm.sz00 == 0xF && iccm.sz01 > 0)
-				cpu->iccm.sz <<= iccm.sz01;
-
-			region = read_aux_reg(ARC_REG_AUX_ICCM);
-			cpu->iccm.base_addr = region & 0xF0000000;
-		}
+	int n = 0;
+#ifdef CONFIG_ISA_ARCOMPACT
+	char *cpu_nm, *isa_nm = "ARCompact";
+	struct bcr_fp_arcompact fpu_sp, fpu_dp;
+	int atomic = 0, be, present;
+	int bpu_full, bpu_cache, bpu_pred;
+	struct bcr_bpu_arcompact bpu;
+	struct bcr_iccm_arcompact iccm;
+	struct bcr_dccm_arcompact dccm;
+	struct bcr_generic isa;
 
-		READ_BCR(ARC_REG_DCCM_BUILD, dccm);
-		if (dccm.ver) {
-			cpu->dccm.sz = 256 << dccm.sz0;
-			if (dccm.sz0 == 0xF && dccm.sz1 > 0)
-				cpu->dccm.sz <<= dccm.sz1;
+	READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
 
-			region = read_aux_reg(ARC_REG_AUX_DCCM);
-			cpu->dccm.base_addr = region & 0xF0000000;
-		}
+	if (!isa.ver)	/* ISA BCR absent, use Kconfig info */
+		atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+	else {
+		/* ARC700_BUILD only has 2 bits of isa info */
+		atomic = isa.info & 1;
 	}
-}
 
-static void decode_arc_core(struct cpuinfo_arc *cpu)
-{
-	struct bcr_uarch_build_arcv2 uarch;
-	const struct id_to_str *tbl;
+	be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
 
-	/*
-	 * Up until (including) the first core4 release (0x54) things were
-	 * simple: AUX IDENTITY.ARCVER was sufficient to identify arc family
-	 * and release: 0x50 to 0x53 was HS38, 0x54 was HS48 (dual issue)
-	 */
+	if (info->arcver < 0x34)
+		cpu_nm = "ARC750";
+	else
+		cpu_nm = "ARC770";
 
-	if (cpu->core.family < 0x54) { /* includes arc700 */
+	n += scnprintf(buf + n, len - n, "processor [%d]\t: %s (%s ISA) %s%s%s\n",
+		       c, cpu_nm, isa_nm,
+		       IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+		       IS_AVAIL1(be, "[Big-Endian]"));
 
-		for (tbl = &arc_legacy_rel[0]; tbl->id != 0; tbl++) {
-			if (cpu->core.family == tbl->id) {
-				cpu->release = tbl->str;
-				break;
-			}
-		}
+	READ_BCR(ARC_REG_FP_BCR, fpu_sp);
+	READ_BCR(ARC_REG_DPFP_BCR, fpu_dp);
 
-		if (is_isa_arcompact())
-			cpu->name = "ARC700";
-		else if (tbl->str)
-			cpu->name = "HS38";
-		else
-			cpu->name = cpu->release = "Unknown";
+	if (fpu_sp.ver | fpu_dp.ver)
+		n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
+			       IS_AVAIL1(fpu_sp.ver, "SP "),
+			       IS_AVAIL1(fpu_dp.ver, "DP "));
 
-		return;
-	}
+	READ_BCR(ARC_REG_BPU_BCR, bpu);
+	bpu_full = bpu.fam ? 1 : 0;
+	bpu_cache = 256 << (bpu.ent - 1);
+	bpu_pred = 256 << (bpu.ent - 1);
 
-	/*
-	 * However the subsequent HS release (same 0x54) allow HS38 or HS48
-	 * configurations and encode this info in a different BCR.
-	 * The BCR was introduced in 0x54 so can't be read unconditionally.
-	 */
-
-	READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch);
+	n += scnprintf(buf + n, len - n,
+			"BPU\t\t: %s%s match, cache:%d, Predict Table:%d\n",
+			IS_AVAIL1(bpu_full, "full"),
+			IS_AVAIL1(!bpu_full, "partial"),
+			bpu_cache, bpu_pred);
+
+	READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+	if (iccm.ver) {
+		info->iccm.sz = 4096 << iccm.sz;	/* 8K to 512K */
+		info->iccm.base = iccm.base << 16;
+	}
 
-	if (uarch.prod == 4) {
-		cpu->name = "HS48";
-		cpu->extn.dual = 1;
+	READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+	if (dccm.ver) {
+		unsigned long base;
+		info->dccm.sz = 2048 << dccm.sz;	/* 2K to 256K */
 
-	} else {
-		cpu->name = "HS38";
+		base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
+		info->dccm.base = base & ~0xF;
 	}
 
-	for (tbl = &arc_cpu_rel[0]; tbl->id != 0xFF; tbl++) {
-		if (uarch.maj == tbl->id) {
-			cpu->release = tbl->str;
-			break;
-		}
-	}
+	/* ARCompact ISA specific sanity checks */
+	present = fpu_dp.ver;	/* SP has no arch visible regs */
+	CHK_OPT_STRICT(CONFIG_ARC_FPU_SAVE_RESTORE, present);
+#endif
+	return n;
+
 }
 
-static void read_arc_build_cfg_regs(void)
+static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
 {
-	struct bcr_timer timer;
-	struct bcr_generic bcr;
-	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+	int n = 0;
+#ifdef CONFIG_ISA_ARCV2
+	const char *release = "", *cpu_nm = "HS38", *isa_nm = "ARCv2";
+	int dual_issue = 0, dual_enb = 0, mpy_opt, present;
+	int bpu_full, bpu_cache, bpu_pred, bpu_ret_stk;
+	char mpy_nm[16], lpb_nm[32];
 	struct bcr_isa_arcv2 isa;
-	struct bcr_actionpoint ap;
-
-	FIX_PTR(cpu);
-
-	READ_BCR(AUX_IDENTITY, cpu->core);
-	decode_arc_core(cpu);
-
-	READ_BCR(ARC_REG_TIMERS_BCR, timer);
-	cpu->extn.timer0 = timer.t0;
-	cpu->extn.timer1 = timer.t1;
-	cpu->extn.rtc = timer.rtc;
+	struct bcr_mpy mpy;
+	struct bcr_fp_arcv2 fpu;
+	struct bcr_bpu_arcv2 bpu;
+	struct bcr_lpb lpb;
+	struct bcr_iccm_arcv2 iccm;
+	struct bcr_dccm_arcv2 dccm;
+	struct bcr_erp erp;
 
-	cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
-
-	READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
+	/*
+	 * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until
+	 * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent
+	 * releases only update it.
+	 */
 
-	/* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
-	read_decode_ccm_bcr(cpu);
+	if (info->arcver > 0x50 && info->arcver <= 0x53) {
+		release = arc_hs_rel[info->arcver - 0x51].str;
+	} else {
+		const struct id_to_str *tbl;
+		struct bcr_uarch_build uarch;
 
-	read_decode_mmu_bcr();
-	read_decode_cache_bcr();
+		READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch);
 
-	if (is_isa_arcompact()) {
-		struct bcr_fp_arcompact sp, dp;
-		struct bcr_bpu_arcompact bpu;
-
-		READ_BCR(ARC_REG_FP_BCR, sp);
-		READ_BCR(ARC_REG_DPFP_BCR, dp);
-		cpu->extn.fpu_sp = sp.ver ? 1 : 0;
-		cpu->extn.fpu_dp = dp.ver ? 1 : 0;
-
-		READ_BCR(ARC_REG_BPU_BCR, bpu);
-		cpu->bpu.ver = bpu.ver;
-		cpu->bpu.full = bpu.fam ? 1 : 0;
-		if (bpu.ent) {
-			cpu->bpu.num_cache = 256 << (bpu.ent - 1);
-			cpu->bpu.num_pred = 256 << (bpu.ent - 1);
+		for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) {
+			if (uarch.maj == tbl->id) {
+				release = tbl->str;
+				break;
+			}
 		}
-	} else {
-		struct bcr_fp_arcv2 spdp;
-		struct bcr_bpu_arcv2 bpu;
-
-		READ_BCR(ARC_REG_FP_V2_BCR, spdp);
-		cpu->extn.fpu_sp = spdp.sp ? 1 : 0;
-		cpu->extn.fpu_dp = spdp.dp ? 1 : 0;
-
-		READ_BCR(ARC_REG_BPU_BCR, bpu);
-		cpu->bpu.ver = bpu.ver;
-		cpu->bpu.full = bpu.ft;
-		cpu->bpu.num_cache = 256 << bpu.bce;
-		cpu->bpu.num_pred = 2048 << bpu.pte;
-		cpu->bpu.ret_stk = 4 << bpu.rse;
-
-		/* if dual issue hardware, is it enabled ? */
-		if (cpu->extn.dual) {
+		if (uarch.prod == 4) {
 			unsigned int exec_ctrl;
 
+			cpu_nm = "HS48";
+			dual_issue = 1;
+			/* if dual issue hardware, is it enabled ? */
 			READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
-			cpu->extn.dual_enb = !(exec_ctrl & 1);
+			dual_enb = !(exec_ctrl & 1);
 		}
 	}
 
-	READ_BCR(ARC_REG_AP_BCR, ap);
-	if (ap.ver) {
-		cpu->extn.ap_num = 2 << ap.num;
-		cpu->extn.ap_full = !ap.min;
-	}
-
-	READ_BCR(ARC_REG_SMART_BCR, bcr);
-	cpu->extn.smart = bcr.ver ? 1 : 0;
-
-	READ_BCR(ARC_REG_RTT_BCR, bcr);
-	cpu->extn.rtt = bcr.ver ? 1 : 0;
-
 	READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
 
-	/* some hacks for lack of feature BCR info in old ARC700 cores */
-	if (is_isa_arcompact()) {
-		if (!isa.ver)	/* ISA BCR absent, use Kconfig info */
-			cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
-		else {
-			/* ARC700_BUILD only has 2 bits of isa info */
-			struct bcr_generic bcr = *(struct bcr_generic *)&isa;
-			cpu->isa.atomic = bcr.info & 1;
-		}
-
-		cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+	n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n",
+		       c, cpu_nm, release, isa_nm,
+		       IS_AVAIL1(isa.be, "[Big-Endian]"),
+		       IS_AVAIL3(dual_issue, dual_enb, " Dual-Issue "));
+
+	READ_BCR(ARC_REG_MPY_BCR, mpy);
+	mpy_opt = 2;	/* stock MPY/MPYH */
+	if (mpy.dsp)	/* OPT 7-9 */
+		mpy_opt = mpy.dsp + 6;
+
+	scnprintf(mpy_nm, 16, "mpy[opt %d] ", mpy_opt);
+
+	READ_BCR(ARC_REG_FP_V2_BCR, fpu);
+
+	n += scnprintf(buf + n, len - n, "ISA Extn\t: %s%s%s%s%s%s%s%s%s%s%s\n",
+		       IS_AVAIL2(isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+		       IS_AVAIL2(isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
+		       IS_AVAIL2(isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS),
+		       IS_AVAIL1(mpy.ver, mpy_nm),
+		       IS_AVAIL1(isa.div_rem, "div_rem "),
+		       IS_AVAIL1((fpu.sp | fpu.dp), "  FPU:"),
+		       IS_AVAIL1(fpu.sp, " sp"),
+		       IS_AVAIL1(fpu.dp, " dp"));
+
+	READ_BCR(ARC_REG_BPU_BCR, bpu);
+	bpu_full = bpu.ft;
+	bpu_cache = 256 << bpu.bce;
+	bpu_pred = 2048 << bpu.pte;
+	bpu_ret_stk = 4 << bpu.rse;
+
+	READ_BCR(ARC_REG_LPB_BUILD, lpb);
+	if (lpb.ver) {
+		unsigned int ctl;
+		ctl = read_aux_reg(ARC_REG_LPB_CTRL);
+
+		scnprintf(lpb_nm, sizeof(lpb_nm), " Loop Buffer:%d %s",
+			  lpb.entries, IS_DISABLED_RUN(!ctl));
+	}
 
-		 /* there's no direct way to distinguish 750 vs. 770 */
-		if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3))
-			cpu->name = "ARC750";
-	} else {
-		cpu->isa = isa;
+	n += scnprintf(buf + n, len - n,
+			"BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d%s\n",
+			IS_AVAIL1(bpu_full, "full"),
+			IS_AVAIL1(!bpu_full, "partial"),
+			bpu_cache, bpu_pred, bpu_ret_stk,
+			lpb_nm);
+
+	READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+	if (iccm.ver) {
+		unsigned long base;
+		info->iccm.sz = 256 << iccm.sz00;	/* 512B to 16M */
+		if (iccm.sz00 == 0xF && iccm.sz01 > 0)
+			info->iccm.sz <<= iccm.sz01;
+		base = read_aux_reg(ARC_REG_AUX_ICCM);
+		info->iccm.base = base & 0xF0000000;
 	}
-}
 
-static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
-{
-	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
-	struct bcr_identity *core = &cpu->core;
-	char mpy_opt[16];
-	int n = 0;
+	READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+	if (dccm.ver) {
+		unsigned long base;
+		info->dccm.sz = 256 << dccm.sz0;
+		if (dccm.sz0 == 0xF && dccm.sz1 > 0)
+			info->dccm.sz <<= dccm.sz1;
+		base = read_aux_reg(ARC_REG_AUX_DCCM);
+		info->dccm.base = base & 0xF0000000;
+	}
 
-	FIX_PTR(cpu);
+	/* Error Protection: ECC/Parity */
+	READ_BCR(ARC_REG_ERP_BUILD, erp);
+	if (erp.ver) {
+		struct ctl_erp ctl;
+		READ_BCR(ARC_REG_ERP_CTRL, ctl);
+		/* inverted bits: 0 means enabled */
+		n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
+				IS_AVAIL3(erp.ic,  !ctl.dpi, "IC "),
+				IS_AVAIL3(erp.dc,  !ctl.dpd, "DC "),
+				IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
+	}
 
-	n += scnprintf(buf + n, len - n,
-		       "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
-		       core->family, core->cpu_id, core->chip_id);
+	/* ARCv2 ISA specific sanity checks */
+	present = fpu.sp | fpu.dp | mpy.dsp;	/* DSP and/or FPU */
+	CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
 
-	n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n",
-		       cpu_id, cpu->name, cpu->release,
-		       is_isa_arcompact() ? "ARCompact" : "ARCv2",
-		       IS_AVAIL1(cpu->isa.be, "[Big-Endian]"),
-		       IS_AVAIL3(cpu->extn.dual, cpu->extn.dual_enb, " Dual-Issue "));
+	dsp_config_check();
+#endif
+	return n;
+}
 
-	n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ",
-		       IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
-		       IS_AVAIL1(cpu->extn.timer1, "Timer1 "),
-		       IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
-		       IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT));
+static char *arc_cpu_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
+{
+	struct bcr_identity ident;
+	struct bcr_timer timer;
+	struct bcr_generic bcr;
+	struct mcip_bcr mp;
+	struct bcr_actionpoint ap;
+	unsigned long vec_base;
+	int ap_num, ap_full, smart, rtt, n;
 
-	if (cpu->extn_mpy.ver) {
-		if (is_isa_arcompact()) {
-			scnprintf(mpy_opt, 16, "mpy");
-		} else {
+	memset(info, 0, sizeof(struct cpuinfo_arc));
 
-			int opt = 2;	/* stock MPY/MPYH */
+	READ_BCR(AUX_IDENTITY, ident);
+	info->arcver = ident.family;
 
-			if (cpu->extn_mpy.dsp)	/* OPT 7-9 */
-				opt = cpu->extn_mpy.dsp + 6;
+	n = scnprintf(buf, len,
+		       "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
+		       ident.family, ident.cpu_id, ident.chip_id);
 
-			scnprintf(mpy_opt, 16, "mpy[opt %d] ", opt);
-		}
+	if (is_isa_arcompact()) {
+		n += arcompact_mumbojumbo(c, info, buf + n, len - n);
+	} else if (is_isa_arcv2()){
+		n += arcv2_mumbojumbo(c, info, buf + n, len - n);
 	}
 
-	n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
-		       IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
-		       IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
-		       IS_AVAIL2(cpu->isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS),
-		       IS_AVAIL1(cpu->extn_mpy.ver, mpy_opt),
-		       IS_AVAIL1(cpu->isa.div_rem, "div_rem "));
+	n += arc_mmu_mumbojumbo(c, buf + n, len - n);
+	n += arc_cache_mumbojumbo(c, buf + n, len - n);
 
-	if (cpu->bpu.ver) {
-		n += scnprintf(buf + n, len - n,
-			      "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d",
-			      IS_AVAIL1(cpu->bpu.full, "full"),
-			      IS_AVAIL1(!cpu->bpu.full, "partial"),
-			      cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk);
-
-		if (is_isa_arcv2()) {
-			struct bcr_lpb lpb;
-
-			READ_BCR(ARC_REG_LPB_BUILD, lpb);
-			if (lpb.ver) {
-				unsigned int ctl;
-				ctl = read_aux_reg(ARC_REG_LPB_CTRL);
-
-				n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s",
-					       lpb.entries,
-					       IS_DISABLED_RUN(!ctl));
-			}
-		}
-		n += scnprintf(buf + n, len - n, "\n");
-	}
+	READ_BCR(ARC_REG_TIMERS_BCR, timer);
+	info->t0 = timer.t0;
+	info->t1 = timer.t1;
 
-	return buf;
-}
+	READ_BCR(ARC_REG_MCIP_BCR, mp);
+	vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
 
-static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
-{
-	int n = 0;
-	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
+	n += scnprintf(buf + n, len - n,
+		       "Timers\t\t: %s%s%s%s%s%s\nVector Table\t: %#lx\n",
+		       IS_AVAIL1(timer.t0, "Timer0 "),
+		       IS_AVAIL1(timer.t1, "Timer1 "),
+		       IS_AVAIL2(timer.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
+		       IS_AVAIL2(mp.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
+		       vec_base);
 
-	FIX_PTR(cpu);
+	READ_BCR(ARC_REG_AP_BCR, ap);
+	if (ap.ver) {
+		ap_num = 2 << ap.num;
+		ap_full = !ap.min;
+	}
 
-	n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base);
+	READ_BCR(ARC_REG_SMART_BCR, bcr);
+	smart = bcr.ver ? 1 : 0;
 
-	if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
-		n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
-			       IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
-			       IS_AVAIL1(cpu->extn.fpu_dp, "DP "));
+	READ_BCR(ARC_REG_RTT_BCR, bcr);
+	rtt = bcr.ver ? 1 : 0;
 
-	if (cpu->extn.ap_num | cpu->extn.smart | cpu->extn.rtt) {
+	if (ap.ver | smart | rtt) {
 		n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s",
-			       IS_AVAIL1(cpu->extn.smart, "smaRT "),
-			       IS_AVAIL1(cpu->extn.rtt, "RTT "));
-		if (cpu->extn.ap_num) {
+			       IS_AVAIL1(smart, "smaRT "),
+			       IS_AVAIL1(rtt, "RTT "));
+		if (ap.ver) {
 			n += scnprintf(buf + n, len - n, "ActionPoint %d/%s",
-				       cpu->extn.ap_num,
-				       cpu->extn.ap_full ? "full":"min");
+				       ap_num,
+				       ap_full ? "full":"min");
 		}
 		n += scnprintf(buf + n, len - n, "\n");
 	}
 
-	if (cpu->dccm.sz || cpu->iccm.sz)
-		n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",
-			       cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
-			       cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
-
-	if (is_isa_arcv2()) {
-
-		/* Error Protection: ECC/Parity */
-		struct bcr_erp erp;
-		READ_BCR(ARC_REG_ERP_BUILD, erp);
-
-		if (erp.ver) {
-			struct  ctl_erp ctl;
-			READ_BCR(ARC_REG_ERP_CTRL, ctl);
-
-			/* inverted bits: 0 means enabled */
-			n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
-				IS_AVAIL3(erp.ic,  !ctl.dpi, "IC "),
-				IS_AVAIL3(erp.dc,  !ctl.dpd, "DC "),
-				IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
-		}
-	}
+	if (info->dccm.sz || info->iccm.sz)
+		n += scnprintf(buf + n, len - n,
+			       "Extn [CCM]\t: DCCM @ %lx, %d KB / ICCM: @ %lx, %d KB\n",
+			       info->dccm.base, TO_KB(info->dccm.sz),
+			       info->iccm.base, TO_KB(info->iccm.sz));
 
 	return buf;
 }
 
-static void arc_chk_core_config(void)
+void chk_opt_strict(char *opt_name, bool hw_exists, bool opt_ena)
+{
+	if (hw_exists && !opt_ena)
+		pr_warn(" ! Enable %s for working apps\n", opt_name);
+	else if (!hw_exists && opt_ena)
+		panic("Disable %s, hardware NOT present\n", opt_name);
+}
+
+void chk_opt_weak(char *opt_name, bool hw_exists, bool opt_ena)
 {
-	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
-	int saved = 0, present = 0;
-	char *opt_nm = NULL;
+	if (!hw_exists && opt_ena)
+		panic("Disable %s, hardware NOT present\n", opt_name);
+}
 
-	if (!cpu->extn.timer0)
+/*
+ * ISA agnostic sanity checks
+ */
+static void arc_chk_core_config(struct cpuinfo_arc *info)
+{
+	if (!info->t0)
 		panic("Timer0 is not present!\n");
 
-	if (!cpu->extn.timer1)
+	if (!info->t1)
 		panic("Timer1 is not present!\n");
 
 #ifdef CONFIG_ARC_HAS_DCCM
 	/*
 	 * DCCM can be arbit placed in hardware.
-	 * Make sure it's placement/sz matches what Linux is built with
+	 * Make sure its placement/sz matches what Linux is built with
 	 */
-	if ((unsigned int)__arc_dccm_base != cpu->dccm.base_addr)
+	if ((unsigned int)__arc_dccm_base != info->dccm.base)
 		panic("Linux built with incorrect DCCM Base address\n");
 
-	if (CONFIG_ARC_DCCM_SZ != cpu->dccm.sz)
+	if (CONFIG_ARC_DCCM_SZ * SZ_1K != info->dccm.sz)
 		panic("Linux built with incorrect DCCM Size\n");
 #endif
 
 #ifdef CONFIG_ARC_HAS_ICCM
-	if (CONFIG_ARC_ICCM_SZ != cpu->iccm.sz)
+	if (CONFIG_ARC_ICCM_SZ * SZ_1K != info->iccm.sz)
 		panic("Linux built with incorrect ICCM Size\n");
 #endif
-
-	/*
-	 * FP hardware/software config sanity
-	 * -If hardware present, kernel needs to save/restore FPU state
-	 * -If not, it will crash trying to save/restore the non-existant regs
-	 */
-
-	if (is_isa_arcompact()) {
-		opt_nm = "CONFIG_ARC_FPU_SAVE_RESTORE";
-		saved = IS_ENABLED(CONFIG_ARC_FPU_SAVE_RESTORE);
-
-		/* only DPDP checked since SP has no arch visible regs */
-		present = cpu->extn.fpu_dp;
-	} else {
-		opt_nm = "CONFIG_ARC_HAS_ACCL_REGS";
-		saved = IS_ENABLED(CONFIG_ARC_HAS_ACCL_REGS);
-
-		/* Accumulator Low:High pair (r58:59) present if DSP MPY or FPU */
-		present = cpu->extn_mpy.dsp | cpu->extn.fpu_sp | cpu->extn.fpu_dp;
-	}
-
-	if (present && !saved)
-		pr_warn("Enable %s for working apps\n", opt_nm);
-	else if (!present && saved)
-		panic("Disable %s, hardware NOT present\n", opt_nm);
 }
 
 /*
@@ -452,21 +413,19 @@ static void arc_chk_core_config(void)
 
 void setup_processor(void)
 {
+	struct cpuinfo_arc info;
+	int c = smp_processor_id();
 	char str[512];
-	int cpu_id = smp_processor_id();
 
-	read_arc_build_cfg_regs();
-	arc_init_IRQ();
+	pr_info("%s", arc_cpu_mumbojumbo(c, &info, str, sizeof(str)));
+	pr_info("%s", arc_platform_smp_cpuinfo());
 
-	pr_info("%s", arc_cpu_mumbojumbo(cpu_id, str, sizeof(str)));
+	arc_chk_core_config(&info);
 
+	arc_init_IRQ();
 	arc_mmu_init();
 	arc_cache_init();
 
-	pr_info("%s", arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
-	pr_info("%s", arc_platform_smp_cpuinfo());
-
-	arc_chk_core_config();
 }
 
 static inline bool uboot_arg_invalid(unsigned long addr)
@@ -572,10 +531,6 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	root_mountflags &= ~MS_RDONLY;
 
-#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
-#endif
-
 	arc_unwind_init();
 }
 
@@ -617,6 +572,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	char *str;
 	int cpu_id = ptr_to_cpu(v);
 	struct device *cpu_dev = get_cpu_device(cpu_id);
+	struct cpuinfo_arc info;
 	struct clk *cpu_clk;
 	unsigned long freq = 0;
 
@@ -629,7 +585,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	if (!str)
 		goto done;
 
-	seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE));
+	seq_printf(m, arc_cpu_mumbojumbo(cpu_id, &info, str, PAGE_SIZE));
 
 	cpu_clk = clk_get(cpu_dev, NULL);
 	if (IS_ERR(cpu_clk)) {
@@ -646,9 +602,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		   loops_per_jiffy / (500000 / HZ),
 		   (loops_per_jiffy / (5000 / HZ)) % 100);
 
-	seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE));
-	seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE));
-	seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE));
 	seq_printf(m, arc_platform_smp_cpuinfo());
 
 	free_page((unsigned long)str);
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index 3d57ed0d8535..fefa705a8638 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -8,15 +8,16 @@
  *
  * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
  *  -do_signal() supports TIF_RESTORE_SIGMASK
- *  -do_signal() no loner needs oldset, required by OLD sys_sigsuspend
- *  -sys_rt_sigsuspend() now comes from generic code, so discard arch implemen
+ *  -do_signal() no longer needs oldset, required by OLD sys_sigsuspend
+ *  -sys_rt_sigsuspend() now comes from generic code, so discard arch
+ *   implementation
  *  -sys_sigsuspend() no longer needs to fudge ptregs, hence that arg removed
  *  -sys_sigsuspend() no longer loops for do_signal(), sets TIF_xxx and leaves
  *   the job to do_signal()
  *
  * vineetg: July 2009
  *  -Modified Code to support the uClibc provided userland sigreturn stub
- *   to avoid kernel synthesing it on user stack at runtime, costing TLB
+ *   to avoid kernel synthesizing it on user stack at runtime, costing TLB
  *   probes and Cache line flushes.
  *
  * vineetg: July 2009
@@ -49,10 +50,11 @@
 #include <linux/personality.h>
 #include <linux/uaccess.h>
 #include <linux/syscalls.h>
-#include <linux/tracehook.h>
+#include <linux/resume_user_mode.h>
 #include <linux/sched/task_stack.h>
 
 #include <asm/ucontext.h>
+#include <asm/entry.h>
 
 struct rt_sigframe {
 	struct siginfo info;
@@ -61,6 +63,41 @@ struct rt_sigframe {
 	unsigned int sigret_magic;
 };
 
+static int save_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs)
+{
+	int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+	struct user_regs_arcv2 v2abi;
+
+	v2abi.r30 = regs->r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	v2abi.r58 = regs->r58;
+	v2abi.r59 = regs->r59;
+#else
+	v2abi.r58 = v2abi.r59 = 0;
+#endif
+	err = __copy_to_user(&mctx->v2abi, (void const *)&v2abi, sizeof(v2abi));
+#endif
+	return err;
+}
+
+static int restore_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs)
+{
+	int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+	struct user_regs_arcv2 v2abi;
+
+	err = __copy_from_user(&v2abi, &mctx->v2abi, sizeof(v2abi));
+
+	regs->r30 = v2abi.r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	regs->r58 = v2abi.r58;
+	regs->r59 = v2abi.r59;
+#endif
+#endif
+	return err;
+}
+
 static int
 stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
 	       sigset_t *set)
@@ -94,9 +131,13 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
 
 	err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch,
 			     sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+	if (is_isa_arcv2())
+		err |= save_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
 	err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
 
-	return err;
+	return err ? -EFAULT : 0;
 }
 
 static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
@@ -109,8 +150,12 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
 	err |= __copy_from_user(&uregs.scratch,
 				&(sf->uc.uc_mcontext.regs.scratch),
 				sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+	if (is_isa_arcv2())
+		err |= restore_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
 	if (err)
-		return err;
+		return -EFAULT;
 
 	set_current_blocked(&set);
 	regs->bta	= uregs.scratch.bta;
@@ -259,7 +304,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
 		regs->r2 = (unsigned long)&sf->uc;
 
 		/*
-		 * small optim to avoid unconditonally calling do_sigaltstack
+		 * small optim to avoid unconditionally calling do_sigaltstack
 		 * in sigreturn path, now that we only have rt_sigreturn
 		 */
 		magic = MAGIC_SIGALTSTK;
@@ -276,7 +321,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
 	regs->ret = (unsigned long)ksig->ka.sa.sa_handler;
 
 	/*
-	 * handler returns using sigreturn stub provided already by userpsace
+	 * handler returns using sigreturn stub provided already by userspace
 	 * If not, nuke the process right away
 	 */
 	if(!(ksig->ka.sa.sa_flags & SA_RESTORER))
@@ -321,7 +366,7 @@ static void arc_restart_syscall(struct k_sigaction *ka, struct pt_regs *regs)
 			regs->r0 = -EINTR;
 			break;
 		}
-		/* fallthrough */
+		fallthrough;
 
 	case -ERESTARTNOINTR:
 		/*
@@ -362,7 +407,7 @@ void do_signal(struct pt_regs *regs)
 
 	restart_scall = in_syscall(regs) && syscall_restartable(regs);
 
-	if (get_signal(&ksig)) {
+	if (test_thread_flag(TIF_SIGPENDING) && get_signal(&ksig)) {
 		if (restart_scall) {
 			arc_restart_syscall(&ksig.ka, regs);
 			syscall_wont_restart(regs);	/* No more restarts */
@@ -391,9 +436,9 @@ void do_signal(struct pt_regs *regs)
 void do_notify_resume(struct pt_regs *regs)
 {
 	/*
-	 * ASM glue gaurantees that this is only called when returning to
+	 * ASM glue guarantees that this is only called when returning to
 	 * user mode
 	 */
-	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
-		tracehook_notify_resume(regs);
+	if (test_thread_flag(TIF_NOTIFY_RESUME))
+		resume_user_mode_work(regs);
 }
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index eca35e02ce06..b2f2c59279a6 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -23,28 +23,22 @@
 #include <linux/export.h>
 #include <linux/of_fdt.h>
 
-#include <asm/processor.h>
-#include <asm/setup.h>
 #include <asm/mach_desc.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
 
 #ifndef CONFIG_ARC_HAS_LLSC
 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
 EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
-EXPORT_SYMBOL_GPL(smp_bitops_lock);
 #endif
 
 struct plat_smp_ops  __weak plat_smp_ops;
 
-/* XXX: per cpu ? Only needed once in early seconday boot */
+/* XXX: per cpu ? Only needed once in early secondary boot */
 struct task_struct *secondary_idle_tsk;
 
-/* Called from start_kernel */
-void __init smp_prepare_boot_cpu(void)
-{
-}
-
 static int __init arc_get_cpu_map(const char *name, struct cpumask *cpumask)
 {
 	unsigned long dt_root = of_get_flat_dt_root();
@@ -189,7 +183,6 @@ void start_kernel_secondary(void)
 	pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
 
 	local_irq_enable();
-	preempt_disable();
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
@@ -226,7 +219,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	}
 
 	if (!cpu_online(cpu)) {
-		pr_info("Timeout: CPU%u FAILED to comeup !!!\n", cpu);
+		pr_info("Timeout: CPU%u FAILED to come up !!!\n", cpu);
 		return -1;
 	}
 
@@ -235,14 +228,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	return 0;
 }
 
-/*
- * not supported here
- */
-int setup_profiling_timer(unsigned int multiplier)
-{
-	return -EINVAL;
-}
-
 /*****************************************************************************/
 /*              Inter Processor Interrupt Handling                           */
 /*****************************************************************************/
@@ -277,14 +262,14 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
 	 * and read back old value
 	 */
 	do {
-		new = old = READ_ONCE(*ipi_data_ptr);
+		new = old = *ipi_data_ptr;
 		new |= 1U << msg;
 	} while (cmpxchg(ipi_data_ptr, old, new) != old);
 
 	/*
 	 * Call the platform specific IPI kick function, but avoid if possible:
 	 * Only do so if there's no pending msg from other concurrent sender(s).
-	 * Otherwise, recevier will see this msg as well when it takes the
+	 * Otherwise, receiver will see this msg as well when it takes the
 	 * IPI corresponding to that msg. This is true, even if it is already in
 	 * IPI handler, because !@old means it has not yet dequeued the msg(s)
 	 * so @new msg can be a free-loader
@@ -303,7 +288,7 @@ static void ipi_send_msg(const struct cpumask *callmap, enum ipi_msg_type msg)
 		ipi_send_msg_one(cpu, msg);
 }
 
-void smp_send_reschedule(int cpu)
+void arch_smp_send_reschedule(int cpu)
 {
 	ipi_send_msg_one(cpu, IPI_RESCHEDULE);
 }
@@ -362,7 +347,7 @@ static inline int __do_IPI(unsigned long msg)
  * arch-common ISR to handle for inter-processor interrupts
  * Has hooks for platform specific IPI
  */
-irqreturn_t do_IPI(int irq, void *dev_id)
+static irqreturn_t do_IPI(int irq, void *dev_id)
 {
 	unsigned long pending;
 	unsigned long __maybe_unused copy;
@@ -396,7 +381,7 @@ irqreturn_t do_IPI(int irq, void *dev_id)
  * API called by platform code to hookup arch-common ISR to their IPI IRQ
  *
  * Note: If IPI is provided by platform (vs. say ARC MCIP), their intc setup/map
- * function needs to call call irq_set_percpu_devid() for IPI IRQ, otherwise
+ * function needs to call irq_set_percpu_devid() for IPI IRQ, otherwise
  * request_percpu_irq() below will fail
  */
 static DEFINE_PER_CPU(int, ipi_dev);
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index 1e440bbfa876..ea99c066ef25 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -15,7 +15,7 @@
  *      = specifics of data structs where trace is saved(CONFIG_STACKTRACE etc)
  *
  *  vineetg: March 2009
- *  -Implemented correct versions of thread_saved_pc() and get_wchan()
+ *  -Implemented correct versions of thread_saved_pc() and __get_wchan()
  *
  *  rajeshwarr: 2008
  *  -Initial implementation
@@ -29,6 +29,7 @@
 
 #include <asm/arcregs.h>
 #include <asm/unwind.h>
+#include <asm/stacktrace.h>
 #include <asm/switch_to.h>
 
 /*-------------------------------------------------------------------------
@@ -38,15 +39,27 @@
 
 #ifdef CONFIG_ARC_DW2_UNWIND
 
-static void seed_unwind_frame_info(struct task_struct *tsk,
-				   struct pt_regs *regs,
-				   struct unwind_frame_info *frame_info)
+static int
+seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs,
+		       struct unwind_frame_info *frame_info)
 {
-	/*
-	 * synchronous unwinding (e.g. dump_stack)
-	 *  - uses current values of SP and friends
-	 */
-	if (tsk == NULL && regs == NULL) {
+	if (regs) {
+		/*
+		 * Asynchronous unwinding of intr/exception
+		 *  - Just uses the pt_regs passed
+		 */
+		frame_info->task = tsk;
+
+		frame_info->regs.r27 = regs->fp;
+		frame_info->regs.r28 = regs->sp;
+		frame_info->regs.r31 = regs->blink;
+		frame_info->regs.r63 = regs->ret;
+		frame_info->call_frame = 0;
+	} else if (tsk == NULL || tsk == current) {
+		/*
+		 * synchronous unwinding (e.g. dump_stack)
+		 *  - uses current values of SP and friends
+		 */
 		unsigned long fp, sp, blink, ret;
 		frame_info->task = current;
 
@@ -63,13 +76,17 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
 		frame_info->regs.r31 = blink;
 		frame_info->regs.r63 = ret;
 		frame_info->call_frame = 0;
-	} else if (regs == NULL) {
+	} else {
 		/*
-		 * Asynchronous unwinding of sleeping task
-		 *  - Gets SP etc from task's pt_regs (saved bottom of kernel
-		 *    mode stack of task)
+		 * Asynchronous unwinding of a likely sleeping task
+		 *  - first ensure it is actually sleeping
+		 *  - if so, it will be in __switch_to, kernel mode SP of task
+		 *    is safe-kept and BLINK at a well known location in there
 		 */
 
+		if (task_is_running(tsk))
+			return -1;
+
 		frame_info->task = tsk;
 
 		frame_info->regs.r27 = TSK_K_FP(tsk);
@@ -90,19 +107,8 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
 		frame_info->regs.r28 += 60;
 		frame_info->call_frame = 0;
 
-	} else {
-		/*
-		 * Asynchronous unwinding of intr/exception
-		 *  - Just uses the pt_regs passed
-		 */
-		frame_info->task = tsk;
-
-		frame_info->regs.r27 = regs->fp;
-		frame_info->regs.r28 = regs->sp;
-		frame_info->regs.r31 = regs->blink;
-		frame_info->regs.r63 = regs->ret;
-		frame_info->call_frame = 0;
 	}
+	return 0;
 }
 
 #endif
@@ -112,11 +118,12 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
 		int (*consumer_fn) (unsigned int, void *), void *arg)
 {
 #ifdef CONFIG_ARC_DW2_UNWIND
-	int ret = 0;
+	int ret = 0, cnt = 0;
 	unsigned int address;
 	struct unwind_frame_info frame_info;
 
-	seed_unwind_frame_info(tsk, regs, &frame_info);
+	if (seed_unwind_frame_info(tsk, regs, &frame_info))
+		return 0;
 
 	while (1) {
 		address = UNW_PC(&frame_info);
@@ -132,13 +139,18 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
 			break;
 
 		frame_info.regs.r63 = frame_info.regs.r31;
+
+		if (cnt++ > 128) {
+			printk("unwinder looping too long, aborting !\n");
+			return 0;
+		}
 	}
 
 	return address;		/* return the last address it saw */
 #else
 	/* On ARC, only Dward based unwinder works. fp based backtracing is
 	 * not possible (-fno-omit-frame-pointer) because of the way function
-	 * prelogue is setup (callee regs saved and then fp set and not other
+	 * prologue is setup (callee regs saved and then fp set and not other
 	 * way around
 	 */
 	pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
@@ -158,9 +170,11 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
 /* Call-back which plugs into unwinding core to dump the stack in
  * case of panic/OOPs/BUG etc
  */
-static int __print_sym(unsigned int address, void *unused)
+static int __print_sym(unsigned int address, void *arg)
 {
-	printk("  %pS\n", (void *)address);
+	const char *loglvl = arg;
+
+	printk("%s  %pS\n", loglvl, (void *)address);
 	return 0;
 }
 
@@ -217,24 +231,25 @@ static int __get_first_nonsched(unsigned int address, void *unused)
  *-------------------------------------------------------------------------
  */
 
-noinline void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs)
+noinline void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs,
+			      const char *loglvl)
 {
-	pr_info("\nStack Trace:\n");
-	arc_unwind_core(tsk, regs, __print_sym, NULL);
+	printk("%s\nStack Trace:\n", loglvl);
+	arc_unwind_core(tsk, regs, __print_sym, (void *)loglvl);
 }
 EXPORT_SYMBOL(show_stacktrace);
 
 /* Expected by sched Code */
-void show_stack(struct task_struct *tsk, unsigned long *sp)
+void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
 {
-	show_stacktrace(tsk, NULL);
+	show_stacktrace(tsk, NULL, loglvl);
 }
 
 /* Another API expected by schedular, shows up in "ps" as Wait Channel
  * Of course just returning schedule( ) would be pointless so unwind until
  * the function is not in schedular code
  */
-unsigned int get_wchan(struct task_struct *tsk)
+unsigned int __get_wchan(struct task_struct *tsk)
 {
 	return arc_unwind_core(tsk, NULL, __get_first_nonsched, NULL);
 }
diff --git a/arch/arc/kernel/sys.c b/arch/arc/kernel/sys.c
index fddecc76efb7..36a2a95c083b 100644
--- a/arch/arc/kernel/sys.c
+++ b/arch/arc/kernel/sys.c
@@ -7,11 +7,13 @@
 #include <asm/syscalls.h>
 
 #define sys_clone	sys_clone_wrapper
+#define sys_clone3	sys_clone3_wrapper
+#define sys_mmap2	sys_mmap_pgoff
 
-#undef __SYSCALL
 #define __SYSCALL(nr, call) [nr] = (call),
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)  __SYSCALL(nr, native)
 
 void *sys_call_table[NR_syscalls] = {
 	[0 ... NR_syscalls-1] = sys_ni_syscall,
-#include <asm/unistd.h>
+#include <asm/syscall_table_32.h>
 };
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index 57235e5c0cea..8d2ea2cbd98b 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -16,14 +16,11 @@
 #include <linux/ptrace.h>
 #include <linux/kprobes.h>
 #include <linux/kgdb.h>
+#include <asm/entry.h>
 #include <asm/setup.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <asm/kprobes.h>
-
-void __init trap_init(void)
-{
-	return;
-}
+#include "unaligned.h"
 
 void die(const char *str, struct pt_regs *regs, unsigned long address)
 {
@@ -93,7 +90,7 @@ int do_misaligned_access(unsigned long address, struct pt_regs *regs,
 
 /*
  * Entry point for miscll errors such as Nested Exceptions
- *  -Duplicate TLB entry is handled seperately though
+ *  -Duplicate TLB entry is handled separately though
  */
 void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
 {
@@ -114,9 +111,7 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
  */
 void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
 {
-	unsigned int param = regs->ecr_param;
-
-	switch (param) {
+	switch (regs->ecr.param) {
 	case 1:
 		trap_is_brkpt(address, regs);
 		break;
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index b79886a6cec8..c380d8c30704 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -18,44 +18,37 @@
 
 #define ARC_PATH_MAX	256
 
-/*
- * Common routine to print scratch regs (r0-r12) or callee regs (r13-r25)
- *   -Prints 3 regs per line and a CR.
- *   -To continue, callee regs right after scratch, special handling of CR
- */
-static noinline void print_reg_file(long *reg_rev, int start_num)
+static noinline void print_regs_scratch(struct pt_regs *regs)
 {
-	unsigned int i;
-	char buf[512];
-	int n = 0, len = sizeof(buf);
-
-	for (i = start_num; i < start_num + 13; i++) {
-		n += scnprintf(buf + n, len - n, "r%02u: 0x%08lx\t",
-			       i, (unsigned long)*reg_rev);
-
-		if (((i + 1) % 3) == 0)
-			n += scnprintf(buf + n, len - n, "\n");
-
-		/* because pt_regs has regs reversed: r12..r0, r25..r13 */
-		if (is_isa_arcv2() && start_num == 0)
-			reg_rev++;
-		else
-			reg_rev--;
-	}
-
-	if (start_num != 0)
-		n += scnprintf(buf + n, len - n, "\n\n");
-
-	/* To continue printing callee regs on same line as scratch regs */
-	if (start_num == 0)
-		pr_info("%s", buf);
-	else
-		pr_cont("%s\n", buf);
+	pr_cont("BTA: 0x%08lx\n SP: 0x%08lx  FP: 0x%08lx BLK: %pS\n",
+		regs->bta, regs->sp, regs->fp, (void *)regs->blink);
+	pr_cont("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n",
+		regs->lp_start, regs->lp_end, regs->lp_count);
+
+	pr_info("r00: 0x%08lx\tr01: 0x%08lx\tr02: 0x%08lx\n"	\
+		"r03: 0x%08lx\tr04: 0x%08lx\tr05: 0x%08lx\n"	\
+		"r06: 0x%08lx\tr07: 0x%08lx\tr08: 0x%08lx\n"	\
+		"r09: 0x%08lx\tr10: 0x%08lx\tr11: 0x%08lx\n"	\
+		"r12: 0x%08lx\t",
+		regs->r0, regs->r1, regs->r2,
+		regs->r3, regs->r4, regs->r5,
+		regs->r6, regs->r7, regs->r8,
+		regs->r9, regs->r10, regs->r11,
+		regs->r12);
 }
 
-static void show_callee_regs(struct callee_regs *cregs)
+static void print_regs_callee(struct callee_regs *regs)
 {
-	print_reg_file(&(cregs->r13), 13);
+	pr_cont("r13: 0x%08lx\tr14: 0x%08lx\n"			\
+		"r15: 0x%08lx\tr16: 0x%08lx\tr17: 0x%08lx\n"	\
+		"r18: 0x%08lx\tr19: 0x%08lx\tr20: 0x%08lx\n"	\
+		"r21: 0x%08lx\tr22: 0x%08lx\tr23: 0x%08lx\n"	\
+		"r24: 0x%08lx\tr25: 0x%08lx\n",
+		regs->r13, regs->r14,
+		regs->r15, regs->r16, regs->r17,
+		regs->r18, regs->r19, regs->r20,
+		regs->r21, regs->r22, regs->r23,
+		regs->r24, regs->r25);
 }
 
 static void print_task_path_n_nm(struct task_struct *tsk)
@@ -89,30 +82,31 @@ static void show_faulting_vma(unsigned long address)
 	/* can't use print_vma_addr() yet as it doesn't check for
 	 * non-inclusive vma
 	 */
-	down_read(&active_mm->mmap_sem);
-	vma = find_vma(active_mm, address);
+	mmap_read_lock(active_mm);
+	vma = vma_lookup(active_mm, address);
 
-	/* check against the find_vma( ) behaviour which returns the next VMA
-	 * if the container VMA is not found
+	/* Lookup the vma at the address and report if the container VMA is not
+	 * found
 	 */
-	if (vma && (vma->vm_start <= address)) {
+	if (vma) {
 		char buf[ARC_PATH_MAX];
-		char *nm = "?";
+		char *nm = "anon";
 
 		if (vma->vm_file) {
-			nm = file_path(vma->vm_file, buf, ARC_PATH_MAX-1);
+			/* XXX: can we use %pD below and get rid of buf? */
+			nm = d_path(file_user_path(vma->vm_file), buf,
+				    ARC_PATH_MAX-1);
 			if (IS_ERR(nm))
 				nm = "?";
 		}
-		pr_info("    @off 0x%lx in [%s]\n"
-			"    VMA: 0x%08lx to 0x%08lx\n",
+		pr_info("  @off 0x%lx in [%s]  VMA: 0x%08lx to 0x%08lx\n",
 			vma->vm_start < TASK_UNMAPPED_BASE ?
 				address : address - vma->vm_start,
 			nm, vma->vm_start, vma->vm_end);
 	} else
 		pr_info("    @No matching VMA found\n");
 
-	up_read(&active_mm->mmap_sem);
+	mmap_read_unlock(active_mm);
 }
 
 static void show_ecr_verbose(struct pt_regs *regs)
@@ -120,20 +114,18 @@ static void show_ecr_verbose(struct pt_regs *regs)
 	unsigned int vec, cause_code;
 	unsigned long address;
 
-	pr_info("\n[ECR   ]: 0x%08lx => ", regs->event);
-
 	/* For Data fault, this is data address not instruction addr */
 	address = current->thread.fault_address;
 
-	vec = regs->ecr_vec;
-	cause_code = regs->ecr_cause;
+	vec = regs->ecr.vec;
+	cause_code = regs->ecr.cause;
 
 	/* For DTLB Miss or ProtV, display the memory involved too */
 	if (vec == ECR_V_DTLB_MISS) {
-		pr_cont("Invalid %s @ 0x%08lx by insn @ 0x%08lx\n",
+		pr_cont("Invalid %s @ 0x%08lx by insn @ %pS\n",
 		       (cause_code == 0x01) ? "Read" :
 		       ((cause_code == 0x02) ? "Write" : "EX"),
-		       address, regs->ret);
+		       address, (void *)regs->ret);
 	} else if (vec == ECR_V_ITLB_MISS) {
 		pr_cont("Insn could not be fetched\n");
 	} else if (vec == ECR_V_MACH_CHK) {
@@ -164,7 +156,7 @@ static void show_ecr_verbose(struct pt_regs *regs)
 		pr_cont("Misaligned r/w from 0x%08lx\n", address);
 #endif
 	} else if (vec == ECR_V_TRAP) {
-		if (regs->ecr_param == 5)
+		if (regs->ecr.param == 5)
 			pr_cont("gcc generated __builtin_trap\n");
 	} else {
 		pr_cont("Check Programmer's Manual\n");
@@ -178,7 +170,7 @@ static void show_ecr_verbose(struct pt_regs *regs)
 void show_regs(struct pt_regs *regs)
 {
 	struct task_struct *tsk = current;
-	struct callee_regs *cregs;
+	struct callee_regs *cregs = (struct callee_regs *)tsk->thread.callee_reg;
 
 	/*
 	 * generic code calls us with preemption disabled, but some calls
@@ -191,43 +183,32 @@ void show_regs(struct pt_regs *regs)
 
 	show_ecr_verbose(regs);
 
-	pr_info("[EFA   ]: 0x%08lx\n[BLINK ]: %pS\n[ERET  ]: %pS\n",
-		current->thread.fault_address,
-		(void *)regs->blink, (void *)regs->ret);
-
 	if (user_mode(regs))
 		show_faulting_vma(regs->ret); /* faulting code, not data */
 
-	pr_info("[STAT32]: 0x%08lx", regs->status32);
+	pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\n",
+		regs->ecr.full, current->thread.fault_address, regs->ret);
+
+	pr_info("STAT32: 0x%08lx", regs->status32);
 
 #define STS_BIT(r, bit)	r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
 
 #ifdef CONFIG_ISA_ARCOMPACT
-	pr_cont(" : %2s%2s%2s%2s%2s%2s%2s\n",
+	pr_cont(" [%2s%2s%2s%2s%2s%2s%2s]",
 			(regs->status32 & STATUS_U_MASK) ? "U " : "K ",
 			STS_BIT(regs, DE), STS_BIT(regs, AE),
 			STS_BIT(regs, A2), STS_BIT(regs, A1),
 			STS_BIT(regs, E2), STS_BIT(regs, E1));
 #else
-	pr_cont(" : %2s%2s%2s%2s\n",
+	pr_cont(" [%2s%2s%2s%2s]   ",
 			STS_BIT(regs, IE),
 			(regs->status32 & STATUS_U_MASK) ? "U " : "K ",
 			STS_BIT(regs, DE), STS_BIT(regs, AE));
 #endif
-	pr_info("BTA: 0x%08lx\t SP: 0x%08lx\t FP: 0x%08lx\n",
-		regs->bta, regs->sp, regs->fp);
-	pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n",
-	       regs->lp_start, regs->lp_end, regs->lp_count);
-
-	/* print regs->r0 thru regs->r12
-	 * Sequential printing was generating horrible code
-	 */
-	print_reg_file(&(regs->r0), 0);
 
-	/* If Callee regs were saved, display them too */
-	cregs = (struct callee_regs *)current->thread.callee_reg;
+	print_regs_scratch(regs);
 	if (cregs)
-		show_callee_regs(cregs);
+		print_regs_callee(cregs);
 
 	preempt_disable();
 }
@@ -245,5 +226,5 @@ void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
 
 	/* Show stack trace if this Fatality happened in kernel mode */
 	if (!user_mode(regs))
-		show_stacktrace(current, regs);
+		show_stacktrace(current, regs, KERN_DEFAULT);
 }
diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
index d63ebd81f1c6..3b2d8b1bd271 100644
--- a/arch/arc/kernel/unaligned.c
+++ b/arch/arc/kernel/unaligned.c
@@ -12,6 +12,7 @@
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
 #include <asm/disasm.h>
+#include "unaligned.h"
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
 #define BE		1
@@ -199,7 +200,6 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs,
 		     struct callee_regs *cregs)
 {
 	struct disasm_state state;
-	char buf[TASK_COMM_LEN];
 
 	/* handle user mode only and only if enabled by sysadmin */
 	if (!user_mode(regs) || !unaligned_enabled)
@@ -211,11 +211,11 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs,
 			     " performance significantly\n. To enable further"
 			     " logging of such instances, please \n"
 			     " echo 0 > /proc/sys/kernel/ignore-unaligned-usertrap\n",
-			     get_task_comm(buf, current), task_pid_nr(current));
+			     current->comm, task_pid_nr(current));
 	} else {
 		/* Add rate limiting if it gets down to it */
 		pr_warn("%s(%d): unaligned access to/from 0x%lx by PC: 0x%lx\n",
-			get_task_comm(buf, current), task_pid_nr(current),
+			current->comm, task_pid_nr(current),
 			address, regs->ret);
 
 	}
@@ -237,7 +237,7 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs,
 	if (state.fault)
 		goto fault;
 
-	/* clear any remanants of delay slot */
+	/* clear any remnants of delay slot */
 	if (delay_mode(regs)) {
 		regs->ret = regs->bta & ~1U;
 		regs->status32 &= ~STATUS_DE_MASK;
diff --git a/arch/arc/kernel/unaligned.h b/arch/arc/kernel/unaligned.h
new file mode 100644
index 000000000000..5244453bb85f
--- /dev/null
+++ b/arch/arc/kernel/unaligned.h
@@ -0,0 +1,16 @@
+struct pt_regs;
+struct callee_regs;
+
+#ifdef CONFIG_ARC_EMUL_UNALIGNED
+int misaligned_fixup(unsigned long address, struct pt_regs *regs,
+		     struct callee_regs *cregs);
+#else
+static inline int
+misaligned_fixup(unsigned long address, struct pt_regs *regs,
+		 struct callee_regs *cregs)
+{
+	/* Not fixed */
+	return 1;
+}
+#endif
+
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index dc05a63516f5..d8969dab12d4 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -19,7 +19,7 @@
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
 #include <asm/sections.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <asm/unwind.h>
 
 extern char __start_unwind[], __end_unwind[];
@@ -42,10 +42,10 @@ do {						\
 
 #define EXTRA_INFO(f) { \
 		BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
-				% FIELD_SIZEOF(struct unwind_frame_info, f)) \
+				% sizeof_field(struct unwind_frame_info, f)) \
 				+ offsetof(struct unwind_frame_info, f) \
-				/ FIELD_SIZEOF(struct unwind_frame_info, f), \
-				FIELD_SIZEOF(struct unwind_frame_info, f) \
+				/ sizeof_field(struct unwind_frame_info, f), \
+				sizeof_field(struct unwind_frame_info, f) \
 	}
 #define PTREGS_INFO(f) EXTRA_INFO(regs.f)
 
@@ -187,25 +187,26 @@ static void init_unwind_table(struct unwind_table *table, const char *name,
 			      const void *table_start, unsigned long table_size,
 			      const u8 *header_start, unsigned long header_size)
 {
-	const u8 *ptr = header_start + 4;
-	const u8 *end = header_start + header_size;
-
 	table->core.pc = (unsigned long)core_start;
 	table->core.range = core_size;
 	table->init.pc = (unsigned long)init_start;
 	table->init.range = init_size;
 	table->address = table_start;
 	table->size = table_size;
-
-	/* See if the linker provided table looks valid. */
-	if (header_size <= 4
-	    || header_start[0] != 1
-	    || (void *)read_pointer(&ptr, end, header_start[1]) != table_start
-	    || header_start[2] == DW_EH_PE_omit
-	    || read_pointer(&ptr, end, header_start[2]) <= 0
-	    || header_start[3] == DW_EH_PE_omit)
-		header_start = NULL;
-
+	/* To avoid the pointer addition with NULL pointer.*/
+	if (header_start != NULL) {
+		const u8 *ptr = header_start + 4;
+		const u8 *end = header_start + header_size;
+		/* See if the linker provided table looks valid. */
+		if (header_size <= 4
+		|| header_start[0] != 1
+		|| (void *)read_pointer(&ptr, end, header_start[1])
+				!= table_start
+		|| header_start[2] == DW_EH_PE_omit
+		|| read_pointer(&ptr, end, header_start[2]) <= 0
+		|| header_start[3] == DW_EH_PE_omit)
+			header_start = NULL;
+	}
 	table->hdrsz = header_size;
 	smp_wmb();
 	table->header = header_start;
@@ -244,14 +245,9 @@ static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
 {
 	struct eh_frame_hdr_table_entry *e1 = p1;
 	struct eh_frame_hdr_table_entry *e2 = p2;
-	unsigned long v;
-
-	v = e1->start;
-	e1->start = e2->start;
-	e2->start = v;
-	v = e1->fde;
-	e1->fde = e2->fde;
-	e2->fde = v;
+
+	swap(e1->start, e2->start);
+	swap(e1->fde, e2->fde);
 }
 
 static void init_unwind_hdr(struct unwind_table *table,
@@ -259,7 +255,7 @@ static void init_unwind_hdr(struct unwind_table *table,
 {
 	const u8 *ptr;
 	unsigned long tableSize = table->size, hdrSize;
-	unsigned n;
+	unsigned int n;
 	const u32 *fde;
 	struct {
 		u8 version;
@@ -373,6 +369,8 @@ void *unwind_add_table(struct module *module, const void *table_start,
 		       unsigned long table_size)
 {
 	struct unwind_table *table;
+	struct module_memory *core_text;
+	struct module_memory *init_text;
 
 	if (table_size <= 0)
 		return NULL;
@@ -381,11 +379,11 @@ void *unwind_add_table(struct module *module, const void *table_start,
 	if (!table)
 		return NULL;
 
-	init_unwind_table(table, module->name,
-			  module->core_layout.base, module->core_layout.size,
-			  module->init_layout.base, module->init_layout.size,
-			  table_start, table_size,
-			  NULL, 0);
+	core_text = &module->mem[MOD_TEXT];
+	init_text = &module->mem[MOD_INIT_TEXT];
+
+	init_unwind_table(table, module->name, core_text->base, core_text->size,
+			  init_text->base, init_text->size, table_start, table_size, NULL, 0);
 
 	init_unwind_hdr(table, unw_hdr_alloc);
 
@@ -461,7 +459,7 @@ static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
 {
 	const u8 *cur = *pcur;
 	uleb128_t value;
-	unsigned shift;
+	unsigned int shift;
 
 	for (shift = 0, value = 0; cur < end; shift += 7) {
 		if (shift + 7 > 8 * sizeof(value)
@@ -482,7 +480,7 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
 {
 	const u8 *cur = *pcur;
 	sleb128_t value;
-	unsigned shift;
+	unsigned int shift;
 
 	for (shift = 0, value = 0; cur < end; shift += 7) {
 		if (shift + 7 > 8 * sizeof(value)
@@ -572,7 +570,7 @@ static unsigned long read_pointer(const u8 **pLoc, const void *end,
 #else
 		BUILD_BUG_ON(sizeof(u32) != sizeof(value));
 #endif
-		/* Fall through */
+		fallthrough;
 	case DW_EH_PE_native:
 		if (end < (const void *)(ptr.pul + 1))
 			return 0;
@@ -608,7 +606,7 @@ static unsigned long read_pointer(const u8 **pLoc, const void *end,
 static signed fde_pointer_type(const u32 *cie)
 {
 	const u8 *ptr = (const u8 *)(cie + 2);
-	unsigned version = *ptr;
+	unsigned int version = *ptr;
 
 	if (*++ptr) {
 		const char *aug;
@@ -827,7 +825,7 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc,
 			case DW_CFA_def_cfa:
 				state->cfa.reg = get_uleb128(&ptr.p8, end);
 				unw_debug("cfa_def_cfa: r%lu ", state->cfa.reg);
-				/* fall through */
+				fallthrough;
 			case DW_CFA_def_cfa_offset:
 				state->cfa.offs = get_uleb128(&ptr.p8, end);
 				unw_debug("cfa_def_cfa_offset: 0x%lx ",
@@ -835,7 +833,7 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc,
 				break;
 			case DW_CFA_def_cfa_sf:
 				state->cfa.reg = get_uleb128(&ptr.p8, end);
-				/* fall through */
+				fallthrough;
 			case DW_CFA_def_cfa_offset_sf:
 				state->cfa.offs = get_sleb128(&ptr.p8, end)
 				    * state->dataAlign;
@@ -903,7 +901,7 @@ int arc_unwind(struct unwind_frame_info *frame)
 	const u8 *ptr = NULL, *end = NULL;
 	unsigned long pc = UNW_PC(frame) - frame->call_frame;
 	unsigned long startLoc = 0, endLoc = 0, cfa;
-	unsigned i;
+	unsigned int i;
 	signed ptrType = -1;
 	uleb128_t retAddrReg = 0;
 	const struct unwind_table *table;
@@ -1178,11 +1176,9 @@ int arc_unwind(struct unwind_frame_info *frame)
 #endif
 
 	/* update frame */
-#ifndef CONFIG_AS_CFI_SIGNAL_FRAME
 	if (frame->call_frame
 	    && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
 		frame->call_frame = 0;
-#endif
 	cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
 	startLoc = min_t(unsigned long, UNW_SP(frame), cfa);
 	endLoc = max_t(unsigned long, UNW_SP(frame), cfa);
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index 54139a6f469b..61a1b2b96e1d 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -41,8 +41,8 @@ SECTIONS
 #endif
 
 	/*
-	 * The reason for having a seperate subsection .init.ramfs is to
-	 * prevent objump from including it in kernel dumps
+	 * The reason for having a separate subsection .init.ramfs is to
+	 * prevent objdump from including it in kernel dumps
 	 *
 	 * Reason for having .init.ramfs above .init is to make sure that the
 	 * binary blob is tucked away to one side, reducing the displacement
@@ -57,7 +57,6 @@ SECTIONS
 	.init.ramfs : { INIT_RAM_FS }
 
 	. = ALIGN(PAGE_SIZE);
-	_stext = .;
 
 	HEAD_TEXT_SECTION
 	INIT_TEXT_SECTION(L1_CACHE_BYTES)
@@ -83,11 +82,13 @@ SECTIONS
 
 	.text : {
 		_text = .;
+		_stext = .;
 		TEXT_TEXT
 		SCHED_TEXT
-		CPUIDLE_TEXT
 		LOCK_TEXT
 		KPROBES_TEXT
+		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.fixup)
 		*(.gnu.warning)
 	}
@@ -122,6 +123,7 @@ SECTIONS
 	_end = . ;
 
 	STABS_DEBUG
+	ELF_DETAILS
 	DISCARDS
 
 	.arcextmap 0 : {
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index d2e09fece5bc..d0a5cec4cdca 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -36,12 +36,13 @@
 #endif
 
 ENTRY_CFI(memset)
-	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
 	mov.f	0, r2
 ;;; if size is zero
 	jz.d	[blink]
 	mov	r3, r0		; don't clobber ret val
 
+	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
+
 ;;; if length < 8
 	brls.d.nt	r2, 8, .Lsmallchunk
 	mov.f	lp_count,r2
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index a2fbea3ee07c..9106ceac323c 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -28,6 +28,10 @@ int slc_enable = 1, ioc_enable = 1;
 unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
 unsigned long perip_end = 0xFFFFFFFF; /* legacy value */
 
+static struct cpuinfo_arc_cache {
+	unsigned int sz_k, line_len, colors;
+} ic_info, dc_info, slc_info;
+
 void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
 			       unsigned long sz, const int op, const int full_page);
 
@@ -35,78 +39,24 @@ void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz);
 void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz);
 void (*__dma_cache_wback)(phys_addr_t start, unsigned long sz);
 
-char *arc_cache_mumbojumbo(int c, char *buf, int len)
-{
-	int n = 0;
-	struct cpuinfo_arc_cache *p;
-
-#define PR_CACHE(p, cfg, str)						\
-	if (!(p)->line_len)						\
-		n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");	\
-	else								\
-		n += scnprintf(buf + n, len - n,			\
-			str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",	\
-			(p)->sz_k, (p)->assoc, (p)->line_len,		\
-			(p)->vipt ? "VIPT" : "PIPT",			\
-			(p)->alias ? " aliasing" : "",			\
-			IS_USED_CFG(cfg));
-
-	PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
-	PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
-
-	p = &cpuinfo_arc700[c].slc;
-	if (p->line_len)
-		n += scnprintf(buf + n, len - n,
-			       "SLC\t\t: %uK, %uB Line%s\n",
-			       p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
-
-	n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
-		       perip_base,
-		       IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
-
-	return buf;
-}
-
-/*
- * Read the Cache Build Confuration Registers, Decode them and save into
- * the cpuinfo structure for later use.
- * No Validation done here, simply read/convert the BCRs
- */
-static void read_decode_cache_bcr_arcv2(int cpu)
+static int read_decode_cache_bcr_arcv2(int c, char *buf, int len)
 {
-	struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
+	struct cpuinfo_arc_cache *p_slc = &slc_info;
+	struct bcr_identity ident;
 	struct bcr_generic sbcr;
-
-	struct bcr_slc_cfg {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int pad:24, way:2, lsz:2, sz:4;
-#else
-		unsigned int sz:4, lsz:2, way:2, pad:24;
-#endif
-	} slc_cfg;
-
-	struct bcr_clust_cfg {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
-#else
-		unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
-#endif
-	} cbcr;
-
-	struct bcr_volatile {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int start:4, limit:4, pad:22, order:1, disable:1;
-#else
-		unsigned int disable:1, order:1, pad:22, limit:4, start:4;
-#endif
-	} vol;
-
+	struct bcr_clust_cfg cbcr;
+	struct bcr_volatile vol;
+	int n = 0;
 
 	READ_BCR(ARC_REG_SLC_BCR, sbcr);
 	if (sbcr.ver) {
+		struct bcr_slc_cfg  slc_cfg;
 		READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
 		p_slc->sz_k = 128 << slc_cfg.sz;
 		l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+		n += scnprintf(buf + n, len - n,
+			       "SLC\t\t: %uK, %uB Line%s\n",
+			       p_slc->sz_k, p_slc->line_len, IS_USED_RUN(slc_enable));
 	}
 
 	READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
@@ -129,70 +79,82 @@ static void read_decode_cache_bcr_arcv2(int cpu)
 		ioc_enable = 0;
 	}
 
+	READ_BCR(AUX_IDENTITY, ident);
+
 	/* HS 2.0 didn't have AUX_VOL */
-	if (cpuinfo_arc700[cpu].core.family > 0x51) {
+	if (ident.family > 0x51) {
 		READ_BCR(AUX_VOL, vol);
 		perip_base = vol.start << 28;
 		/* HS 3.0 has limit and strict-ordering fields */
-		if (cpuinfo_arc700[cpu].core.family > 0x52)
+		if (ident.family > 0x52)
 			perip_end = (vol.limit << 28) - 1;
 	}
+
+	n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
+		       perip_base,
+		       IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
+
+	return n;
 }
 
-void read_decode_cache_bcr(void)
+int arc_cache_mumbojumbo(int c, char *buf, int len)
 {
-	struct cpuinfo_arc_cache *p_ic, *p_dc;
-	unsigned int cpu = smp_processor_id();
-	struct bcr_cache {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
-#else
-		unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
-#endif
-	} ibcr, dbcr;
+	struct cpuinfo_arc_cache *p_ic = &ic_info, *p_dc = &dc_info;
+	struct bcr_cache ibcr, dbcr;
+	int vipt, assoc;
+	int n = 0;
 
-	p_ic = &cpuinfo_arc700[cpu].icache;
 	READ_BCR(ARC_REG_IC_BCR, ibcr);
-
 	if (!ibcr.ver)
 		goto dc_chk;
 
-	if (ibcr.ver <= 3) {
+	if (is_isa_arcompact() && (ibcr.ver <= 3)) {
 		BUG_ON(ibcr.config != 3);
-		p_ic->assoc = 2;		/* Fixed to 2w set assoc */
-	} else if (ibcr.ver >= 4) {
-		p_ic->assoc = 1 << ibcr.config;	/* 1,2,4,8 */
+		assoc = 2;		/* Fixed to 2w set assoc */
+	} else if (is_isa_arcv2() && (ibcr.ver >= 4)) {
+		assoc = 1 << ibcr.config;	/* 1,2,4,8 */
 	}
 
 	p_ic->line_len = 8 << ibcr.line_len;
 	p_ic->sz_k = 1 << (ibcr.sz - 1);
-	p_ic->vipt = 1;
-	p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
+	p_ic->colors = p_ic->sz_k/assoc/TO_KB(PAGE_SIZE);
+
+	n += scnprintf(buf + n, len - n,
+			"I-Cache\t\t: %uK, %dway/set, %uB Line, VIPT%s%s\n",
+			p_ic->sz_k, assoc, p_ic->line_len,
+			p_ic->colors > 1 ? " aliasing" : "",
+			IS_USED_CFG(CONFIG_ARC_HAS_ICACHE));
 
 dc_chk:
-	p_dc = &cpuinfo_arc700[cpu].dcache;
 	READ_BCR(ARC_REG_DC_BCR, dbcr);
-
 	if (!dbcr.ver)
 		goto slc_chk;
 
-	if (dbcr.ver <= 3) {
+	if (is_isa_arcompact() && (dbcr.ver <= 3)) {
 		BUG_ON(dbcr.config != 2);
-		p_dc->assoc = 4;		/* Fixed to 4w set assoc */
-		p_dc->vipt = 1;
-		p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
-	} else if (dbcr.ver >= 4) {
-		p_dc->assoc = 1 << dbcr.config;	/* 1,2,4,8 */
-		p_dc->vipt = 0;
-		p_dc->alias = 0;		/* PIPT so can't VIPT alias */
+		vipt = 1;
+		assoc = 4;		/* Fixed to 4w set assoc */
+		p_dc->colors = p_dc->sz_k/assoc/TO_KB(PAGE_SIZE);
+	} else if (is_isa_arcv2() && (dbcr.ver >= 4)) {
+		vipt = 0;
+		assoc = 1 << dbcr.config;	/* 1,2,4,8 */
+		p_dc->colors = 1;		/* PIPT so can't VIPT alias */
 	}
 
 	p_dc->line_len = 16 << dbcr.line_len;
 	p_dc->sz_k = 1 << (dbcr.sz - 1);
 
+	n += scnprintf(buf + n, len - n,
+			"D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s\n",
+			p_dc->sz_k, assoc, p_dc->line_len,
+			vipt ? "VIPT" : "PIPT",
+			IS_USED_CFG(CONFIG_ARC_HAS_DCACHE));
+
 slc_chk:
 	if (is_isa_arcv2())
-                read_decode_cache_bcr_arcv2(cpu);
+		n += read_decode_cache_bcr_arcv2(c, buf + n, len - n);
+
+	return n;
 }
 
 /*
@@ -205,93 +167,24 @@ slc_chk:
 #define OP_INV_IC	0x4
 
 /*
- *		I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
- *
- * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
- * The orig Cache Management Module "CDU" only required paddr to invalidate a
- * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
- * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
- * the exact same line.
+ * Cache Flush programming model
  *
- * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
- * paddr alone could not be used to correctly index the cache.
+ * ARC700 MMUv3 I$ and D$ are both VIPT and can potentially alias.
+ * Programming model requires both paddr and vaddr irrespecive of aliasing
+ * considerations:
+ *  - vaddr in {I,D}C_IV?L
+ *  - paddr in {I,D}C_PTAG
  *
- * ------------------
- * MMU v1/v2 (Fixed Page Size 8k)
- * ------------------
- * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geometry, 13 comes from
- * standard page size of 8k.
- * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
- * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
- * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
- * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU programming
- * model.
+ * In HS38x (MMUv4), D$ is PIPT, I$ is VIPT and can still alias.
+ * Programming model is different for aliasing vs. non-aliasing I$
+ *  - D$ / Non-aliasing I$: only paddr in {I,D}C_IV?L
+ *  - Aliasing I$: same as ARC700 above (so MMUv3 routine used for MMUv4 I$)
  *
- * 17:13 represented the max num of bits passable, actual bits needed were
- * fewer, based on the num-of-aliases possible.
- * -for 2 alias possibility, only bit 13 needed (32K cache)
- * -for 4 alias possibility, bits 14:13 needed (64K cache)
- *
- * ------------------
- * MMU v3
- * ------------------
- * This ver of MMU supports variable page sizes (1k-16k): although Linux will
- * only support 8k (default), 16k and 4k.
- * However from hardware perspective, smaller page sizes aggravate aliasing
- * meaning more vaddr bits needed to disambiguate the cache-line-op ;
- * the existing scheme of piggybacking won't work for certain configurations.
- * Two new registers IC_PTAG and DC_PTAG inttoduced.
- * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
+ *  - If PAE40 is enabled, independent of aliasing considerations, the higher
+ *    bits needs to be written into PTAG_HI
  */
 
 static inline
-void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
-			  unsigned long sz, const int op, const int full_page)
-{
-	unsigned int aux_cmd;
-	int num_lines;
-
-	if (op == OP_INV_IC) {
-		aux_cmd = ARC_REG_IC_IVIL;
-	} else {
-		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
-		aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
-	}
-
-	/* Ensure we properly floor/ceil the non-line aligned/sized requests
-	 * and have @paddr - aligned to cache line and integral @num_lines.
-	 * This however can be avoided for page sized since:
-	 *  -@paddr will be cache-line aligned already (being page aligned)
-	 *  -@sz will be integral multiple of line size (being page sized).
-	 */
-	if (!full_page) {
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
-		vaddr &= CACHE_LINE_MASK;
-	}
-
-	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
-
-	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
-	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-
-	while (num_lines-- > 0) {
-		write_aux_reg(aux_cmd, paddr);
-		paddr += L1_CACHE_BYTES;
-	}
-}
-
-/*
- * For ARC700 MMUv3 I-cache and D-cache flushes
- *  - ARC700 programming model requires paddr and vaddr be passed in seperate
- *    AUX registers (*_IV*L and *_PTAG respectively) irrespective of whether the
- *    caches actually alias or not.
- * -  For HS38, only the aliasing I-cache configuration uses the PTAG reg
- *    (non aliasing I-cache version doesn't; while D-cache can't possibly alias)
- */
-static inline
 void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 			  unsigned long sz, const int op, const int full_page)
 {
@@ -350,17 +243,6 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 #ifndef USE_RGN_FLSH
 
 /*
- * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
- * Here's how cache ops are implemented
- *
- *  - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
- *  - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
- *  - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
- *    respectively, similar to MMU v3 programming model, hence
- *    __cache_line_loop_v3() is used)
- *
- * If PAE40 is enabled, independent of aliasing considerations, the higher bits
- * needs to be written into PTAG_HI
  */
 static inline
 void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
@@ -460,11 +342,9 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 
 #endif
 
-#if (CONFIG_ARC_MMU_VER < 3)
-#define __cache_line_loop	__cache_line_loop_v2
-#elif (CONFIG_ARC_MMU_VER == 3)
+#ifdef CONFIG_ARC_MMU_V3
 #define __cache_line_loop	__cache_line_loop_v3
-#elif (CONFIG_ARC_MMU_VER > 3)
+#else
 #define __cache_line_loop	__cache_line_loop_v4
 #endif
 
@@ -483,7 +363,7 @@ static inline void __before_dc_op(const int op)
 {
 	if (op == OP_FLUSH_N_INV) {
 		/* Dcache provides 2 cmd: FLUSH or INV
-		 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
+		 * INV in turn has sub-modes: DISCARD or FLUSH-BEFORE
 		 * flush-n-inv is achieved by INV cmd but with IM=1
 		 * So toggle INV sub-mode depending on op request and default
 		 */
@@ -663,7 +543,7 @@ static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr,
 
 #endif /* CONFIG_ARC_HAS_ICACHE */
 
-noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
+static noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
 {
 #ifdef CONFIG_ISA_ARCV2
 	/*
@@ -726,7 +606,7 @@ noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
 #endif
 }
 
-noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
+static __maybe_unused noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
 {
 #ifdef CONFIG_ISA_ARCV2
 	/*
@@ -822,47 +702,16 @@ static inline void arc_slc_enable(void)
  * Exported APIs
  */
 
-/*
- * Handle cache congruency of kernel and userspace mappings of page when kernel
- * writes-to/reads-from
- *
- * The idea is to defer flushing of kernel mapping after a WRITE, possible if:
- *  -dcache is NOT aliasing, hence any U/K-mappings of page are congruent
- *  -U-mapping doesn't exist yet for page (finalised in update_mmu_cache)
- *  -In SMP, if hardware caches are coherent
- *
- * There's a corollary case, where kernel READs from a userspace mapped page.
- * If the U-mapping is not congruent to to K-mapping, former needs flushing.
- */
-void flush_dcache_page(struct page *page)
+void flush_dcache_folio(struct folio *folio)
 {
-	struct address_space *mapping;
-
-	if (!cache_is_vipt_aliasing()) {
-		clear_bit(PG_dc_clean, &page->flags);
-		return;
-	}
-
-	/* don't handle anon pages here */
-	mapping = page_mapping_file(page);
-	if (!mapping)
-		return;
-
-	/*
-	 * pagecache page, file not yet mapped to userspace
-	 * Make a note that K-mapping is dirty
-	 */
-	if (!mapping_mapped(mapping)) {
-		clear_bit(PG_dc_clean, &page->flags);
-	} else if (page_mapcount(page)) {
-
-		/* kernel reading from page with U-mapping */
-		phys_addr_t paddr = (unsigned long)page_address(page);
-		unsigned long vaddr = page->index << PAGE_SHIFT;
+	clear_bit(PG_dc_clean, &folio->flags);
+	return;
+}
+EXPORT_SYMBOL(flush_dcache_folio);
 
-		if (addr_not_cache_congruent(paddr, vaddr))
-			__flush_dcache_page(paddr, vaddr);
-	}
+void flush_dcache_page(struct page *page)
+{
+	return flush_dcache_folio(page_folio(page));
 }
 EXPORT_SYMBOL(flush_dcache_page);
 
@@ -992,7 +841,7 @@ EXPORT_SYMBOL(flush_icache_range);
  * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc)
  *    However in one instance, when called by kprobe (for a breakpt in
  *    builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
- *    use a paddr to index the cache (despite VIPT). This is fine since since a
+ *    use a paddr to index the cache (despite VIPT). This is fine since a
  *    builtin kernel page will not have any virtual mappings.
  *    kprobe on loadable module will be kernel vaddr.
  */
@@ -1003,18 +852,18 @@ void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len)
 }
 
 /* wrapper to compile time eliminate alignment checks in flush loop */
-void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr)
+void __inv_icache_pages(phys_addr_t paddr, unsigned long vaddr, unsigned nr)
 {
-	__ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
+	__ic_line_inv_vaddr(paddr, vaddr, nr * PAGE_SIZE);
 }
 
 /*
  * wrapper to clearout kernel or userspace mappings of a page
  * For kernel mappings @vaddr == @paddr
  */
-void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr)
+void __flush_dcache_pages(phys_addr_t paddr, unsigned long vaddr, unsigned nr)
 {
-	__dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
+	__dc_line_op(paddr, vaddr & PAGE_MASK, nr * PAGE_SIZE, OP_FLUSH_N_INV);
 }
 
 noinline void flush_cache_all(void)
@@ -1030,89 +879,18 @@ noinline void flush_cache_all(void)
 
 }
 
-#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
-
-void flush_cache_mm(struct mm_struct *mm)
-{
-	flush_cache_all();
-}
-
-void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
-		      unsigned long pfn)
-{
-	phys_addr_t paddr = pfn << PAGE_SHIFT;
-
-	u_vaddr &= PAGE_MASK;
-
-	__flush_dcache_page(paddr, u_vaddr);
-
-	if (vma->vm_flags & VM_EXEC)
-		__inv_icache_page(paddr, u_vaddr);
-}
-
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-		       unsigned long end)
-{
-	flush_cache_all();
-}
-
-void flush_anon_page(struct vm_area_struct *vma, struct page *page,
-		     unsigned long u_vaddr)
-{
-	/* TBD: do we really need to clear the kernel mapping */
-	__flush_dcache_page((phys_addr_t)page_address(page), u_vaddr);
-	__flush_dcache_page((phys_addr_t)page_address(page),
-			    (phys_addr_t)page_address(page));
-
-}
-
-#endif
-
 void copy_user_highpage(struct page *to, struct page *from,
 	unsigned long u_vaddr, struct vm_area_struct *vma)
 {
+	struct folio *src = page_folio(from);
+	struct folio *dst = page_folio(to);
 	void *kfrom = kmap_atomic(from);
 	void *kto = kmap_atomic(to);
-	int clean_src_k_mappings = 0;
-
-	/*
-	 * If SRC page was already mapped in userspace AND it's U-mapping is
-	 * not congruent with K-mapping, sync former to physical page so that
-	 * K-mapping in memcpy below, sees the right data
-	 *
-	 * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
-	 * equally valid for SRC page as well
-	 *
-	 * For !VIPT cache, all of this gets compiled out as
-	 * addr_not_cache_congruent() is 0
-	 */
-	if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
-		__flush_dcache_page((unsigned long)kfrom, u_vaddr);
-		clean_src_k_mappings = 1;
-	}
 
 	copy_page(kto, kfrom);
 
-	/*
-	 * Mark DST page K-mapping as dirty for a later finalization by
-	 * update_mmu_cache(). Although the finalization could have been done
-	 * here as well (given that both vaddr/paddr are available).
-	 * But update_mmu_cache() already has code to do that for other
-	 * non copied user pages (e.g. read faults which wire in pagecache page
-	 * directly).
-	 */
-	clear_bit(PG_dc_clean, &to->flags);
-
-	/*
-	 * if SRC was already usermapped and non-congruent to kernel mapping
-	 * sync the kernel mapping back to physical page
-	 */
-	if (clean_src_k_mappings) {
-		__flush_dcache_page((unsigned long)kfrom, (unsigned long)kfrom);
-		set_bit(PG_dc_clean, &from->flags);
-	} else {
-		clear_bit(PG_dc_clean, &from->flags);
-	}
+	clear_bit(PG_dc_clean, &dst->flags);
+	clear_bit(PG_dc_clean, &src->flags);
 
 	kunmap_atomic(kto);
 	kunmap_atomic(kfrom);
@@ -1120,10 +898,11 @@ void copy_user_highpage(struct page *to, struct page *from,
 
 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
 {
+	struct folio *folio = page_folio(page);
 	clear_page(to);
-	clear_bit(PG_dc_clean, &page->flags);
+	clear_bit(PG_dc_clean, &folio->flags);
 }
-
+EXPORT_SYMBOL(clear_user_page);
 
 /**********************************************************************
  * Explicit Cache flush request from user space via syscall
@@ -1151,7 +930,7 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
  * 3. All Caches need to be disabled when setting up IOC to elide any in-flight
  *    Coherency transactions
  */
-noinline void __init arc_ioc_setup(void)
+static noinline void __init arc_ioc_setup(void)
 {
 	unsigned int ioc_base, mem_sz;
 
@@ -1213,12 +992,10 @@ noinline void __init arc_ioc_setup(void)
  *    one core suffices for all
  *  - IOC setup / dma callbacks only need to be done once
  */
-void __init arc_cache_init_master(void)
+static noinline void __init arc_cache_init_master(void)
 {
-	unsigned int __maybe_unused cpu = smp_processor_id();
-
 	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
-		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+		struct cpuinfo_arc_cache *ic = &ic_info;
 
 		if (!ic->line_len)
 			panic("cache support enabled but non-existent cache\n");
@@ -1231,14 +1008,14 @@ void __init arc_cache_init_master(void)
 		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
 		 * pair to provide vaddr/paddr respectively, just as in MMU v3
 		 */
-		if (is_isa_arcv2() && ic->alias)
+		if (is_isa_arcv2() && ic->colors > 1)
 			_cache_line_loop_ic_fn = __cache_line_loop_v3;
 		else
 			_cache_line_loop_ic_fn = __cache_line_loop;
 	}
 
 	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
-		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+		struct cpuinfo_arc_cache *dc = &dc_info;
 
 		if (!dc->line_len)
 			panic("cache support enabled but non-existent cache\n");
@@ -1248,18 +1025,8 @@ void __init arc_cache_init_master(void)
 			      dc->line_len, L1_CACHE_BYTES);
 
 		/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
-		if (is_isa_arcompact()) {
-			int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-			int num_colors = dc->sz_k/dc->assoc/TO_KB(PAGE_SIZE);
-
-			if (dc->alias) {
-				if (!handled)
-					panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-				if (CACHE_COLORS_NUM != num_colors)
-					panic("CACHE_COLORS_NUM not optimized for config\n");
-			} else if (!dc->alias && handled) {
-				panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-			}
+		if (is_isa_arcompact() && dc->colors > 1) {
+			panic("Aliasing VIPT cache not supported\n");
 		}
 	}
 
@@ -1300,9 +1067,6 @@ void __init arc_cache_init_master(void)
 void __ref arc_cache_init(void)
 {
 	unsigned int __maybe_unused cpu = smp_processor_id();
-	char str[256];
-
-	pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str)));
 
 	if (!cpu)
 		arc_cache_init_master();
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index e947572a521e..6b85e94f3275 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -3,7 +3,7 @@
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  */
 
-#include <linux/dma-noncoherent.h>
+#include <linux/dma-map-ops.h>
 #include <asm/cache.h>
 #include <asm/cacheflush.h>
 
@@ -32,7 +32,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 
 /*
  * Cache operations depending on function and direction argument, inspired by
- * https://lkml.org/lkml/2018/5/18/979
+ * https://lore.kernel.org/lkml/20180518175004.GF17671@n2100.armlinux.org.uk
  * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20]
  * dma-mapping: provide a generic dma-noncoherent implementation)"
  *
@@ -90,8 +90,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
 /*
  * Plug in direct dma map ops.
  */
-void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-			const struct iommu_ops *iommu, bool coherent)
+void arch_setup_dma_ops(struct device *dev, bool coherent)
 {
 	/*
 	 * IOC hardware snoops all DMA traffic keeping the caches consistent
diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c
index b06b09ddf924..88fa3a4d4906 100644
--- a/arch/arc/mm/extable.c
+++ b/arch/arc/mm/extable.c
@@ -22,26 +22,3 @@ int fixup_exception(struct pt_regs *regs)
 
 	return 0;
 }
-
-#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-
-unsigned long arc_clear_user_noinline(void __user *to,
-		unsigned long n)
-{
-	return __arc_clear_user(to, n);
-}
-EXPORT_SYMBOL(arc_clear_user_noinline);
-
-long arc_strncpy_from_user_noinline(char *dst, const char __user *src,
-		long count)
-{
-	return __arc_strncpy_from_user(dst, src, count);
-}
-EXPORT_SYMBOL(arc_strncpy_from_user_noinline);
-
-long arc_strnlen_user_noinline(const char __user *src, long n)
-{
-	return __arc_strnlen_user(src, n);
-}
-EXPORT_SYMBOL(arc_strnlen_user_noinline);
-#endif
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index fb86bc3e9b35..95119a5e7761 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -13,7 +13,7 @@
 #include <linux/kdebug.h>
 #include <linux/perf_event.h>
 #include <linux/mm_types.h>
-#include <asm/pgalloc.h>
+#include <asm/entry.h>
 #include <asm/mmu.h>
 
 /*
@@ -34,28 +34,34 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
 	pud_t *pud, *pud_k;
 	pmd_t *pmd, *pmd_k;
 
-	pgd = pgd_offset_fast(current->active_mm, address);
+	pgd = pgd_offset(current->active_mm, address);
 	pgd_k = pgd_offset_k(address);
 
-	if (!pgd_present(*pgd_k))
+	if (pgd_none (*pgd_k))
 		goto bad_area;
+	if (!pgd_present(*pgd))
+		set_pgd(pgd, *pgd_k);
 
 	p4d = p4d_offset(pgd, address);
 	p4d_k = p4d_offset(pgd_k, address);
-	if (!p4d_present(*p4d_k))
+	if (p4d_none(*p4d_k))
 		goto bad_area;
+	if (!p4d_present(*p4d))
+		set_p4d(p4d, *p4d_k);
 
 	pud = pud_offset(p4d, address);
 	pud_k = pud_offset(p4d_k, address);
-	if (!pud_present(*pud_k))
+	if (pud_none(*pud_k))
 		goto bad_area;
+	if (!pud_present(*pud))
+		set_pud(pud, *pud_k);
 
 	pmd = pmd_offset(pud, address);
 	pmd_k = pmd_offset(pud_k, address);
-	if (!pmd_present(*pmd_k))
+	if (pmd_none(*pmd_k))
 		goto bad_area;
-
-	set_pmd(pmd, *pmd_k);
+	if (!pmd_present(*pmd))
+		set_pmd(pmd, *pmd_k);
 
 	/* XXX: create the TLB entry here */
 	return 0;
@@ -94,28 +100,23 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
 	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
-	if (regs->ecr_cause & ECR_C_PROTV_STORE)	/* ST/EX */
+	if (regs->ecr.cause & ECR_C_PROTV_STORE)	/* ST/EX */
 		write = 1;
-	else if ((regs->ecr_vec == ECR_V_PROTV) &&
-	         (regs->ecr_cause == ECR_C_PROTV_INST_FETCH))
+	else if ((regs->ecr.vec == ECR_V_PROTV) &&
+	         (regs->ecr.cause == ECR_C_PROTV_INST_FETCH))
 		exec = 1;
 
-	flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+	flags = FAULT_FLAG_DEFAULT;
 	if (user_mode(regs))
 		flags |= FAULT_FLAG_USER;
 	if (write)
 		flags |= FAULT_FLAG_WRITE;
 
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 retry:
-	down_read(&mm->mmap_sem);
-
-	vma = find_vma(mm, address);
+	vma = lock_mm_and_find_vma(mm, address, regs);
 	if (!vma)
-		goto bad_area;
-	if (unlikely(address < vma->vm_start)) {
-		if (!(vma->vm_flags & VM_GROWSDOWN) || expand_stack(vma, address))
-			goto bad_area;
-	}
+		goto bad_area_nosemaphore;
 
 	/*
 	 * vm_area is good, now check permissions for this memory access
@@ -131,56 +132,38 @@ retry:
 		goto bad_area;
 	}
 
-	fault = handle_mm_fault(vma, address, flags);
+	fault = handle_mm_fault(vma, address, flags, regs);
+
+	/* Quick path to respond to signals */
+	if (fault_signal_pending(fault, regs)) {
+		if (!user_mode(regs))
+			goto no_context;
+		return;
+	}
+
+	/* The fault is fully completed (including releasing mmap lock) */
+	if (fault & VM_FAULT_COMPLETED)
+		return;
 
 	/*
-	 * Fault retry nuances
+	 * Fault retry nuances, mmap_lock already relinquished by core mm
 	 */
 	if (unlikely(fault & VM_FAULT_RETRY)) {
-
-		/*
-		 * If fault needs to be retried, handle any pending signals
-		 * first (by returning to user mode).
-		 * mmap_sem already relinquished by core mm for RETRY case
-		 */
-		if (fatal_signal_pending(current)) {
-			if (!user_mode(regs))
-				goto no_context;
-			return;
-		}
-		/*
-		 * retry state machine
-		 */
-		if (flags & FAULT_FLAG_ALLOW_RETRY) {
-			flags &= ~FAULT_FLAG_ALLOW_RETRY;
-			flags |= FAULT_FLAG_TRIED;
-			goto retry;
-		}
+		flags |= FAULT_FLAG_TRIED;
+		goto retry;
 	}
 
 bad_area:
-	up_read(&mm->mmap_sem);
+	mmap_read_unlock(mm);
 
+bad_area_nosemaphore:
 	/*
 	 * Major/minor page fault accounting
 	 * (in case of retry we only land here once)
 	 */
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
-
-	if (likely(!(fault & VM_FAULT_ERROR))) {
-		if (fault & VM_FAULT_MAJOR) {
-			tsk->maj_flt++;
-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
-				      regs, address);
-		} else {
-			tsk->min_flt++;
-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
-				      regs, address);
-		}
-
+	if (likely(!(fault & VM_FAULT_ERROR)))
 		/* Normal return path: fault Handled Gracefully */
 		return;
-	}
 
 	if (!user_mode(regs))
 		goto no_context;
diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
index fc8849e4f72e..c79912a6b196 100644
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -6,8 +6,8 @@
 #include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/highmem.h>
+#include <linux/pgtable.h>
 #include <asm/processor.h>
-#include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
@@ -36,9 +36,8 @@
  *   This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which means
  *   2M of kvaddr space for typical config (8K page and 11:8:13 traversal split)
  *
- * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE
- *   slots across NR_CPUS would be more than sufficient (generic code defines
- *   KM_TYPE_NR as 20).
+ * - The fixed KMAP slots for kmap_local/atomic() require KM_MAX_IDX slots per
+ *   CPU. So the number of CPUs sharing a single PTE page is limited.
  *
  * - pkmap being preemptible, in theory could do with more than 256 concurrent
  *   mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse
@@ -47,80 +46,12 @@
  */
 
 extern pte_t * pkmap_page_table;
-static pte_t * fixmap_page_table;
-
-void *kmap(struct page *page)
-{
-	BUG_ON(in_interrupt());
-	if (!PageHighMem(page))
-		return page_address(page);
-
-	return kmap_high(page);
-}
-EXPORT_SYMBOL(kmap);
-
-void *kmap_atomic(struct page *page)
-{
-	int idx, cpu_idx;
-	unsigned long vaddr;
-
-	preempt_disable();
-	pagefault_disable();
-	if (!PageHighMem(page))
-		return page_address(page);
-
-	cpu_idx = kmap_atomic_idx_push();
-	idx = cpu_idx + KM_TYPE_NR * smp_processor_id();
-	vaddr = FIXMAP_ADDR(idx);
-
-	set_pte_at(&init_mm, vaddr, fixmap_page_table + idx,
-		   mk_pte(page, kmap_prot));
-
-	return (void *)vaddr;
-}
-EXPORT_SYMBOL(kmap_atomic);
-
-void __kunmap_atomic(void *kv)
-{
-	unsigned long kvaddr = (unsigned long)kv;
-
-	if (kvaddr >= FIXMAP_BASE && kvaddr < (FIXMAP_BASE + FIXMAP_SIZE)) {
-
-		/*
-		 * Because preemption is disabled, this vaddr can be associated
-		 * with the current allocated index.
-		 * But in case of multiple live kmap_atomic(), it still relies on
-		 * callers to unmap in right order.
-		 */
-		int cpu_idx = kmap_atomic_idx();
-		int idx = cpu_idx + KM_TYPE_NR * smp_processor_id();
-
-		WARN_ON(kvaddr != FIXMAP_ADDR(idx));
-
-		pte_clear(&init_mm, kvaddr, fixmap_page_table + idx);
-		local_flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE);
-
-		kmap_atomic_idx_pop();
-	}
-
-	pagefault_enable();
-	preempt_enable();
-}
-EXPORT_SYMBOL(__kunmap_atomic);
 
 static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr)
 {
-	pgd_t *pgd_k;
-	p4d_t *p4d_k;
-	pud_t *pud_k;
-	pmd_t *pmd_k;
+	pmd_t *pmd_k = pmd_off_k(kvaddr);
 	pte_t *pte_k;
 
-	pgd_k = pgd_offset_k(kvaddr);
-	p4d_k = p4d_offset(pgd_k, kvaddr);
-	pud_k = pud_offset(p4d_k, kvaddr);
-	pmd_k = pmd_offset(pud_k, kvaddr);
-
 	pte_k = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
 	if (!pte_k)
 		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
@@ -134,10 +65,9 @@ void __init kmap_init(void)
 {
 	/* Due to recursive include hell, we can't do this in processor.h */
 	BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE));
+	BUILD_BUG_ON(LAST_PKMAP > PTRS_PER_PTE);
+	BUILD_BUG_ON(FIX_KMAP_SLOTS > PTRS_PER_PTE);
 
-	BUILD_BUG_ON(KM_TYPE_NR > PTRS_PER_PTE);
 	pkmap_page_table = alloc_kmap_pgtable(PKMAP_BASE);
-
-	BUILD_BUG_ON(LAST_PKMAP > PTRS_PER_PTE);
-	fixmap_page_table = alloc_kmap_pgtable(FIXMAP_BASE);
+	alloc_kmap_pgtable(FIXMAP_BASE);
 }
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 0920c969c466..a73cc94f806e 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -14,8 +14,8 @@
 #include <linux/module.h>
 #include <linux/highmem.h>
 #include <asm/page.h>
-#include <asm/pgalloc.h>
 #include <asm/sections.h>
+#include <asm/setup.h>
 #include <asm/arcregs.h>
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
@@ -27,13 +27,10 @@ static unsigned long low_mem_sz;
 
 #ifdef CONFIG_HIGHMEM
 static unsigned long min_high_pfn, max_high_pfn;
-static u64 high_mem_start;
-static u64 high_mem_sz;
-#endif
-
-#ifdef CONFIG_DISCONTIGMEM
-struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_data);
+static phys_addr_t high_mem_start;
+static phys_addr_t high_mem_sz;
+unsigned long arch_pfn_offset;
+EXPORT_SYMBOL(arch_pfn_offset);
 #endif
 
 long __init arc_get_mem_sz(void)
@@ -63,11 +60,14 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 
 		low_mem_sz = size;
 		in_use = 1;
+		memblock_add_node(base, size, 0, MEMBLOCK_NONE);
 	} else {
 #ifdef CONFIG_HIGHMEM
 		high_mem_start = base;
 		high_mem_sz = size;
 		in_use = 1;
+		memblock_add_node(base, size, 1, MEMBLOCK_NONE);
+		memblock_reserve(base, size);
 #endif
 	}
 
@@ -83,25 +83,16 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
  */
 void __init setup_arch_memory(void)
 {
-	unsigned long zones_size[MAX_NR_ZONES];
-	unsigned long zones_holes[MAX_NR_ZONES];
+	unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
 
-	init_mm.start_code = (unsigned long)_text;
-	init_mm.end_code = (unsigned long)_etext;
-	init_mm.end_data = (unsigned long)_edata;
-	init_mm.brk = (unsigned long)_end;
+	setup_initial_init_mm(_text, _etext, _edata, _end);
 
 	/* first page of system - kernel .vector starts here */
-	min_low_pfn = ARCH_PFN_OFFSET;
+	min_low_pfn = virt_to_pfn((void *)CONFIG_LINUX_RAM_BASE);
 
 	/* Last usable page of low mem */
 	max_low_pfn = max_pfn = PFN_DOWN(low_mem_start + low_mem_sz);
 
-#ifdef CONFIG_FLATMEM
-	/* pfn_valid() uses this */
-	max_mapnr = max_low_pfn - min_low_pfn;
-#endif
-
 	/*------------- bootmem allocator setup -----------------------*/
 
 	/*
@@ -115,7 +106,6 @@ void __init setup_arch_memory(void)
 	 * the crash
 	 */
 
-	memblock_add_node(low_mem_start, low_mem_sz, 0);
 	memblock_reserve(CONFIG_LINUX_LINK_BASE,
 			 __pa(_end) - CONFIG_LINUX_LINK_BASE);
 
@@ -133,73 +123,57 @@ void __init setup_arch_memory(void)
 	memblock_dump_all();
 
 	/*----------------- node/zones setup --------------------------*/
-	memset(zones_size, 0, sizeof(zones_size));
-	memset(zones_holes, 0, sizeof(zones_holes));
-
-	zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn;
-	zones_holes[ZONE_NORMAL] = 0;
-
-	/*
-	 * We can't use the helper free_area_init(zones[]) because it uses
-	 * PAGE_OFFSET to compute the @min_low_pfn which would be wrong
-	 * when our kernel doesn't start at PAGE_OFFSET, i.e.
-	 * PAGE_OFFSET != CONFIG_LINUX_RAM_BASE
-	 */
-	free_area_init_node(0,			/* node-id */
-			    zones_size,		/* num pages per zone */
-			    min_low_pfn,	/* first pfn of node */
-			    zones_holes);	/* holes */
+	max_zone_pfn[ZONE_NORMAL] = max_low_pfn;
 
 #ifdef CONFIG_HIGHMEM
 	/*
-	 * Populate a new node with highmem
-	 *
 	 * On ARC (w/o PAE) HIGHMEM addresses are actually smaller (0 based)
-	 * than addresses in normal ala low memory (0x8000_0000 based).
+	 * than addresses in normal aka low memory (0x8000_0000 based).
 	 * Even with PAE, the huge peripheral space hole would waste a lot of
-	 * mem with single mem_map[]. This warrants a mem_map per region design.
-	 * Thus HIGHMEM on ARC is imlemented with DISCONTIGMEM.
-	 *
-	 * DISCONTIGMEM in turns requires multiple nodes. node 0 above is
-	 * populated with normal memory zone while node 1 only has highmem
+	 * mem with single contiguous mem_map[].
+	 * Thus when HIGHMEM on ARC is enabled the memory map corresponding
+	 * to the hole is freed and ARC specific version of pfn_valid()
+	 * handles the hole in the memory map.
 	 */
-	node_set_online(1);
 
 	min_high_pfn = PFN_DOWN(high_mem_start);
 	max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz);
 
-	zones_size[ZONE_NORMAL] = 0;
-	zones_holes[ZONE_NORMAL] = 0;
-
-	zones_size[ZONE_HIGHMEM] = max_high_pfn - min_high_pfn;
-	zones_holes[ZONE_HIGHMEM] = 0;
-
-	free_area_init_node(1,			/* node-id */
-			    zones_size,		/* num pages per zone */
-			    min_high_pfn,	/* first pfn of node */
-			    zones_holes);	/* holes */
+	/*
+	 * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE.
+	 * For HIGHMEM without PAE max_high_pfn should be less than
+	 * min_low_pfn to guarantee that these two regions don't overlap.
+	 * For PAE case highmem is greater than lowmem, so it is natural
+	 * to use max_high_pfn.
+	 *
+	 * In both cases, holes should be handled by pfn_valid().
+	 */
+	max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn;
 
-	high_memory = (void *)(min_high_pfn << PAGE_SHIFT);
+	arch_pfn_offset = min(min_low_pfn, min_high_pfn);
 	kmap_init();
-#endif
+#endif /* CONFIG_HIGHMEM */
+
+	free_area_init(max_zone_pfn);
 }
 
-/*
- * mem_init - initializes memory
- *
- * Frees up bootmem
- * Calculates and displays memory available/used
- */
-void __init mem_init(void)
+void __init arch_mm_preinit(void)
 {
 #ifdef CONFIG_HIGHMEM
-	unsigned long tmp;
-
-	reset_all_zones_managed_pages();
-	for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++)
-		free_highmem_page(pfn_to_page(tmp));
+	memblock_phys_free(high_mem_start, high_mem_sz);
 #endif
 
-	memblock_free_all();
-	mem_init_print_info(NULL);
+	BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE);
+	BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE);
+	BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE);
+	BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE);
+}
+
+#ifdef CONFIG_HIGHMEM
+int pfn_valid(unsigned long pfn)
+{
+	return (pfn >= min_high_pfn && pfn <= max_high_pfn) ||
+		(pfn >= min_low_pfn && pfn <= max_low_pfn);
 }
+EXPORT_SYMBOL(pfn_valid);
+#endif
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index fac4adc90204..fd8897a0e52c 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -8,7 +8,6 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/cache.h>
 
 static inline bool arc_uncached_addr_space(phys_addr_t paddr)
@@ -25,13 +24,6 @@ static inline bool arc_uncached_addr_space(phys_addr_t paddr)
 
 void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
 {
-	phys_addr_t end;
-
-	/* Don't allow wraparound or zero size */
-	end = paddr + size - 1;
-	if (!size || (end < paddr))
-		return NULL;
-
 	/*
 	 * If the region is h/w uncached, MMU mapping can be elided as optim
 	 * The cast to u32 is fine as this region can only be inside 4GB
@@ -39,7 +31,8 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
 	if (arc_uncached_addr_space(paddr))
 		return (void __iomem *)(u32)paddr;
 
-	return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE);
+	return ioremap_prot(paddr, size,
+			    pgprot_noncached(PAGE_KERNEL));
 }
 EXPORT_SYMBOL(ioremap);
 
@@ -50,54 +43,20 @@ EXPORT_SYMBOL(ioremap);
  * ARC hardware uncached region, this one still goes thru the MMU as caller
  * might need finer access control (R/W/X)
  */
-void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
-			   unsigned long flags)
+void __iomem *ioremap_prot(phys_addr_t paddr, size_t size,
+			   pgprot_t prot)
 {
-	unsigned long vaddr;
-	struct vm_struct *area;
-	phys_addr_t off, end;
-	pgprot_t prot = __pgprot(flags);
-
-	/* Don't allow wraparound, zero size */
-	end = paddr + size - 1;
-	if ((!size) || (end < paddr))
-		return NULL;
-
-	/* An early platform driver might end up here */
-	if (!slab_is_available())
-		return NULL;
-
 	/* force uncached */
-	prot = pgprot_noncached(prot);
-
-	/* Mappings have to be page-aligned */
-	off = paddr & ~PAGE_MASK;
-	paddr &= PAGE_MASK;
-	size = PAGE_ALIGN(end + 1) - paddr;
-
-	/*
-	 * Ok, go for it..
-	 */
-	area = get_vm_area(size, VM_IOREMAP);
-	if (!area)
-		return NULL;
-	area->phys_addr = paddr;
-	vaddr = (unsigned long)area->addr;
-	if (ioremap_page_range(vaddr, vaddr + size, paddr, prot)) {
-		vunmap((void __force *)vaddr);
-		return NULL;
-	}
-	return (void __iomem *)(off + (char __iomem *)vaddr);
+	return generic_ioremap_prot(paddr, size, pgprot_noncached(prot));
 }
 EXPORT_SYMBOL(ioremap_prot);
 
-
-void iounmap(const void __iomem *addr)
+void iounmap(volatile void __iomem *addr)
 {
 	/* weird double cast to handle phys_addr_t > 32 bits */
 	if (arc_uncached_addr_space((phys_addr_t)(u32)addr))
 		return;
 
-	vfree((void *)(PAGE_MASK & (unsigned long __force)addr));
+	generic_iounmap(addr);
 }
 EXPORT_SYMBOL(iounmap);
diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
index 722d26b94307..2185afe8d59f 100644
--- a/arch/arc/mm/mmap.c
+++ b/arch/arc/mm/mmap.c
@@ -14,10 +14,6 @@
 
 #include <asm/cacheflush.h>
 
-#define COLOUR_ALIGN(addr, pgoff)			\
-	((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) +	\
-	 (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1)))
-
 /*
  * Ensure that shared mappings are correctly aligned to
  * avoid aliasing issues with VIPT caches.
@@ -27,25 +23,18 @@
  */
 unsigned long
 arch_get_unmapped_area(struct file *filp, unsigned long addr,
-		unsigned long len, unsigned long pgoff, unsigned long flags)
+		unsigned long len, unsigned long pgoff,
+		unsigned long flags, vm_flags_t vm_flags)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
-	int do_align = 0;
-	int aliasing = cache_is_vipt_aliasing();
-	struct vm_unmapped_area_info info;
-
-	/*
-	 * We only need to do colour alignment if D cache aliases.
-	 */
-	if (aliasing)
-		do_align = filp || (flags & MAP_SHARED);
+	struct vm_unmapped_area_info info = {};
 
 	/*
 	 * We enforce the MAP_FIXED case.
 	 */
 	if (flags & MAP_FIXED) {
-		if (aliasing && flags & MAP_SHARED &&
+		if (flags & MAP_SHARED &&
 		    (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))
 			return -EINVAL;
 		return addr;
@@ -55,10 +44,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		return -ENOMEM;
 
 	if (addr) {
-		if (do_align)
-			addr = COLOUR_ALIGN(addr, pgoff);
-		else
-			addr = PAGE_ALIGN(addr);
+		addr = PAGE_ALIGN(addr);
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
@@ -66,11 +52,29 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 			return addr;
 	}
 
-	info.flags = 0;
 	info.length = len;
 	info.low_limit = mm->mmap_base;
 	info.high_limit = TASK_SIZE;
-	info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
 	return vm_unmapped_area(&info);
 }
+
+static const pgprot_t protection_map[16] = {
+	[VM_NONE]					= PAGE_U_NONE,
+	[VM_READ]					= PAGE_U_R,
+	[VM_WRITE]					= PAGE_U_R,
+	[VM_WRITE | VM_READ]				= PAGE_U_R,
+	[VM_EXEC]					= PAGE_U_X_R,
+	[VM_EXEC | VM_READ]				= PAGE_U_X_R,
+	[VM_EXEC | VM_WRITE]				= PAGE_U_X_R,
+	[VM_EXEC | VM_WRITE | VM_READ]			= PAGE_U_X_R,
+	[VM_SHARED]					= PAGE_U_NONE,
+	[VM_SHARED | VM_READ]				= PAGE_U_R,
+	[VM_SHARED | VM_WRITE]				= PAGE_U_W_R,
+	[VM_SHARED | VM_WRITE | VM_READ]		= PAGE_U_W_R,
+	[VM_SHARED | VM_EXEC]				= PAGE_U_X_R,
+	[VM_SHARED | VM_EXEC | VM_READ]			= PAGE_U_X_R,
+	[VM_SHARED | VM_EXEC | VM_WRITE]		= PAGE_U_X_W_R,
+	[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ]	= PAGE_U_X_W_R
+};
+DECLARE_VM_GET_PAGE_PROT
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index c340acd989a0..cae4a7aae0ed 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -1,51 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * TLB Management (flush/create/diagnostics) for ARC700
+ * TLB Management (flush/create/diagnostics) for MMUv3 and MMUv4
  *
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  *
- * vineetg: Aug 2011
- *  -Reintroduce duplicate PD fixup - some customer chips still have the issue
- *
- * vineetg: May 2011
- *  -No need to flush_cache_page( ) for each call to update_mmu_cache()
- *   some of the LMBench tests improved amazingly
- *      = page-fault thrice as fast (75 usec to 28 usec)
- *      = mmap twice as fast (9.6 msec to 4.6 msec),
- *      = fork (5.3 msec to 3.7 msec)
- *
- * vineetg: April 2011 :
- *  -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
- *      helps avoid a shift when preparing PD0 from PTE
- *
- * vineetg: April 2011 : Preparing for MMU V3
- *  -MMU v2/v3 BCRs decoded differently
- *  -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512
- *  -tlb_entry_erase( ) can be void
- *  -local_flush_tlb_range( ):
- *      = need not "ceil" @end
- *      = walks MMU only if range spans < 32 entries, as opposed to 256
- *
- * Vineetg: Sept 10th 2008
- *  -Changes related to MMU v2 (Rel 4.8)
- *
- * Vineetg: Aug 29th 2008
- *  -In TLB Flush operations (Metal Fix MMU) there is a explict command to
- *    flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd,
- *    it fails. Thus need to load it with ANY valid value before invoking
- *    TLBIVUTLB cmd
- *
- * Vineetg: Aug 21th 2008:
- *  -Reduced the duration of IRQ lockouts in TLB Flush routines
- *  -Multiple copies of TLB erase code seperated into a "single" function
- *  -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID
- *       in interrupt-safe region.
- *
- * Vineetg: April 23rd Bug #93131
- *    Problem: tlb_flush_kernel_range() doesn't do anything if the range to
- *              flush is more than the size of TLB itself.
- *
- * Rahul Trivedi : Codito Technologies 2004
  */
 
 #include <linux/module.h>
@@ -57,51 +15,12 @@
 #include <asm/mmu_context.h>
 #include <asm/mmu.h>
 
-/*			Need for ARC MMU v2
- *
- * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc.
- * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages
- * map into same set, there would be contention for the 2 ways causing severe
- * Thrashing.
- *
- * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has
- * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways.
- * Given this, the thrasing problem should never happen because once the 3
- * J-TLB entries are created (even though 3rd will knock out one of the prev
- * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy
- *
- * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs.
- * This is a simple design for keeping them in sync. So what do we do?
- * The solution which James came up was pretty neat. It utilised the assoc
- * of uTLBs by not invalidating always but only when absolutely necessary.
- *
- * - Existing TLB commands work as before
- * - New command (TLBWriteNI) for TLB write without clearing uTLBs
- * - New command (TLBIVUTLB) to invalidate uTLBs.
- *
- * The uTLBs need only be invalidated when pages are being removed from the
- * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB
- * as a result of a miss, the removed entry is still allowed to exist in the
- * uTLBs as it is still valid and present in the OS page table. This allows the
- * full associativity of the uTLBs to hide the limited associativity of the main
- * TLB.
- *
- * During a miss handler, the new "TLBWriteNI" command is used to load
- * entries without clearing the uTLBs.
- *
- * When the OS page table is updated, TLB entries that may be associated with a
- * removed page are removed (flushed) from the TLB using TLBWrite. In this
- * circumstance, the uTLBs must also be cleared. This is done by using the
- * existing TLBWrite command. An explicit IVUTLB is also required for those
- * corner cases when TLBWrite was not executed at all because the corresp
- * J-TLB entry got evicted/replaced.
- */
-
-
 /* A copy of the ASID from the PID reg is kept in asid_cache */
 DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 
-static int __read_mostly pae_exists;
+static struct cpuinfo_arc_mmu {
+	unsigned int ver, pg_sz_k, s_pg_sz_m, pae, sets, ways;
+} mmuinfo;
 
 /*
  * Utility Routine to erase a J-TLB entry
@@ -120,32 +39,10 @@ static inline void __tlb_entry_erase(void)
 
 static void utlb_invalidate(void)
 {
-#if (CONFIG_ARC_MMU_VER >= 2)
-
-#if (CONFIG_ARC_MMU_VER == 2)
-	/* MMU v2 introduced the uTLB Flush command.
-	 * There was however an obscure hardware bug, where uTLB flush would
-	 * fail when a prior probe for J-TLB (both totally unrelated) would
-	 * return lkup err - because the entry didn't exist in MMU.
-	 * The Workround was to set Index reg with some valid value, prior to
-	 * flush. This was fixed in MMU v3
-	 */
-	unsigned int idx;
-
-	/* make sure INDEX Reg is valid */
-	idx = read_aux_reg(ARC_REG_TLBINDEX);
-
-	/* If not write some dummy val */
-	if (unlikely(idx & TLB_LKUP_ERR))
-		write_aux_reg(ARC_REG_TLBINDEX, 0xa);
-#endif
-
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB);
-#endif
-
 }
 
-#if (CONFIG_ARC_MMU_VER < 4)
+#ifdef CONFIG_ARC_MMU_V3
 
 static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
 {
@@ -176,7 +73,7 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
 	}
 }
 
-static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
+static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
 {
 	unsigned int idx;
 
@@ -206,7 +103,7 @@ static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 
-#else	/* CONFIG_ARC_MMU_VER >= 4) */
+#else	/* MMUv4 */
 
 static void tlb_entry_erase(unsigned int vaddr_n_asid)
 {
@@ -214,13 +111,16 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
 }
 
-static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
+static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
 {
 	write_aux_reg(ARC_REG_TLBPD0, pd0);
-	write_aux_reg(ARC_REG_TLBPD1, pd1);
 
-	if (is_pae40_enabled())
+	if (!is_pae40_enabled()) {
+		write_aux_reg(ARC_REG_TLBPD1, pd1);
+	} else {
+		write_aux_reg(ARC_REG_TLBPD1, pd1 & 0xFFFFFFFF);
 		write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
+	}
 
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
 }
@@ -233,7 +133,7 @@ static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
 
 noinline void local_flush_tlb_all(void)
 {
-	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	struct cpuinfo_arc_mmu *mmu = &mmuinfo;
 	unsigned long flags;
 	unsigned int entry;
 	int num_tlb = mmu->sets * mmu->ways;
@@ -272,7 +172,7 @@ noinline void local_flush_tlb_all(void)
 }
 
 /*
- * Flush the entrie MM for userland. The fastest way is to move to Next ASID
+ * Flush the entire MM for userland. The fastest way is to move to Next ASID
  */
 noinline void local_flush_tlb_mm(struct mm_struct *mm)
 {
@@ -303,7 +203,7 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm)
  * Difference between this and Kernel Range Flush is
  *  -Here the fastest way (if range is too large) is to move to next ASID
  *      without doing any explicit Shootdown
- *  -In case of kernel Flush, entry has to be shot down explictly
+ *  -In case of kernel Flush, entry has to be shot down explicitly
  */
 void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			   unsigned long end)
@@ -312,7 +212,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 	unsigned long flags;
 
 	/* If range @start to @end is more than 32 TLB entries deep,
-	 * its better to move to a new ASID rather than searching for
+	 * it's better to move to a new ASID rather than searching for
 	 * individual entries and then shooting them down
 	 *
 	 * The calc above is rough, doesn't account for unaligned parts,
@@ -491,12 +391,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 /*
  * Routine to create a TLB entry
  */
-void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
+static void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 {
 	unsigned long flags;
 	unsigned int asid_or_sasid, rwx;
 	unsigned long pd0;
-	pte_t pd1;
+	phys_addr_t pd1;
 
 	/*
 	 * create_tlb() assumes that current->mm == vma->mm, since
@@ -505,11 +405,10 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 	 *
 	 * Removing the assumption involves
 	 * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg.
-	 * -Fix the TLB paranoid debug code to not trigger false negatives.
 	 * -More importantly it makes this handler inconsistent with fast-path
 	 *  TLB Refill handler which always deals with "current"
 	 *
-	 * Lets see the use cases when current->mm != vma->mm and we land here
+	 * Let's see the use cases when current->mm != vma->mm and we land here
 	 *  1. execve->copy_strings()->__get_user_pages->handle_mm_fault
 	 *     Here VM wants to pre-install a TLB entry for user stack while
 	 *     current->mm still points to pre-execve mm (hence the condition).
@@ -528,8 +427,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 
 	local_irq_save(flags);
 
-	tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr);
-
 	vaddr &= PAGE_MASK;
 
 	/* update this PTE credentials */
@@ -572,39 +469,37 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
  * Note that flush (when done) involves both WBACK - so physical page is
  * in sync as well as INV - so any non-congruent aliases don't remain
  */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
-		      pte_t *ptep)
+void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
+		unsigned long vaddr_unaligned, pte_t *ptep, unsigned int nr)
 {
 	unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
-	phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK;
+	phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS;
 	struct page *page = pfn_to_page(pte_pfn(*ptep));
 
 	create_tlb(vma, vaddr, ptep);
 
-	if (page == ZERO_PAGE(0)) {
+	if (page == ZERO_PAGE(0))
 		return;
-	}
 
 	/*
-	 * Exec page : Independent of aliasing/page-color considerations,
-	 *	       since icache doesn't snoop dcache on ARC, any dirty
-	 *	       K-mapping of a code page needs to be wback+inv so that
-	 *	       icache fetch by userspace sees code correctly.
-	 * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it
-	 *	       so userspace sees the right data.
-	 *  (Avoids the flush for Non-exec + congruent mapping case)
+	 * For executable pages, since icache doesn't snoop dcache, any
+	 * dirty K-mapping of a code page needs to be wback+inv so that
+	 * icache fetch by userspace sees code correctly.
 	 */
-	if ((vma->vm_flags & VM_EXEC) ||
-	     addr_not_cache_congruent(paddr, vaddr)) {
-
-		int dirty = !test_and_set_bit(PG_dc_clean, &page->flags);
+	if (vma->vm_flags & VM_EXEC) {
+		struct folio *folio = page_folio(page);
+		int dirty = !test_and_set_bit(PG_dc_clean, &folio->flags);
 		if (dirty) {
+			unsigned long offset = offset_in_folio(folio, paddr);
+			nr = folio_nr_pages(folio);
+			paddr -= offset;
+			vaddr -= offset;
 			/* wback + inv dcache lines (K-mapping) */
-			__flush_dcache_page(paddr, paddr);
+			__flush_dcache_pages(paddr, paddr, nr);
 
 			/* invalidate any existing icache lines (U-mapping) */
 			if (vma->vm_flags & VM_EXEC)
-				__inv_icache_page(paddr, vaddr);
+				__inv_icache_pages(paddr, vaddr, nr);
 		}
 	}
 }
@@ -620,7 +515,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
  * Super Page size is configurable in hardware (4K to 16M), but fixed once
  * RTL builds.
  *
- * The exact THP size a Linx configuration will support is a function of:
+ * The exact THP size a Linux configuration will support is a function of:
  *  - MMU page size (typical 8K, RTL fixed)
  *  - software page walker address split between PGD:PTE:PFN (typical
  *    11:8:13, but can be changed with 1 line)
@@ -636,44 +531,7 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 				 pmd_t *pmd)
 {
 	pte_t pte = __pte(pmd_val(*pmd));
-	update_mmu_cache(vma, addr, &pte);
-}
-
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				pgtable_t pgtable)
-{
-	struct list_head *lh = (struct list_head *) pgtable;
-
-	assert_spin_locked(&mm->page_table_lock);
-
-	/* FIFO */
-	if (!pmd_huge_pte(mm, pmdp))
-		INIT_LIST_HEAD(lh);
-	else
-		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
-	pmd_huge_pte(mm, pmdp) = pgtable;
-}
-
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
-{
-	struct list_head *lh;
-	pgtable_t pgtable;
-
-	assert_spin_locked(&mm->page_table_lock);
-
-	pgtable = pmd_huge_pte(mm, pmdp);
-	lh = (struct list_head *) pgtable;
-	if (list_empty(lh))
-		pmd_huge_pte(mm, pmdp) = NULL;
-	else {
-		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
-		list_del(lh);
-	}
-
-	pte_val(pgtable[0]) = 0;
-	pte_val(pgtable[1]) = 0;
-
-	return pgtable;
+	update_mmu_cache_range(NULL, vma, addr, &pte, HPAGE_PMD_NR);
 }
 
 void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
@@ -698,139 +556,92 @@ void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 
 #endif
 
-/* Read the Cache Build Confuration Registers, Decode them and save into
+/* Read the Cache Build Configuration Registers, Decode them and save into
  * the cpuinfo structure for later use.
  * No Validation is done here, simply read/convert the BCRs
  */
-void read_decode_mmu_bcr(void)
+int arc_mmu_mumbojumbo(int c, char *buf, int len)
 {
-	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
-	unsigned int tmp;
-	struct bcr_mmu_1_2 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8;
-#else
-		unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8;
-#endif
-	} *mmu2;
-
-	struct bcr_mmu_3 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
-		     u_itlb:4, u_dtlb:4;
-#else
-	unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
-		     ways:4, ver:8;
-#endif
-	} *mmu3;
-
-	struct bcr_mmu_4 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
-		     n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
-#else
-	/*           DTLB      ITLB      JES        JE         JA      */
-	unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
-		     pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
-#endif
-	} *mmu4;
-
-	tmp = read_aux_reg(ARC_REG_MMU_BCR);
-	mmu->ver = (tmp >> 24);
-
-	if (is_isa_arcompact()) {
-		if (mmu->ver <= 2) {
-			mmu2 = (struct bcr_mmu_1_2 *)&tmp;
-			mmu->pg_sz_k = TO_KB(0x2000);
-			mmu->sets = 1 << mmu2->sets;
-			mmu->ways = 1 << mmu2->ways;
-			mmu->u_dtlb = mmu2->u_dtlb;
-			mmu->u_itlb = mmu2->u_itlb;
-		} else {
-			mmu3 = (struct bcr_mmu_3 *)&tmp;
-			mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
-			mmu->sets = 1 << mmu3->sets;
-			mmu->ways = 1 << mmu3->ways;
-			mmu->u_dtlb = mmu3->u_dtlb;
-			mmu->u_itlb = mmu3->u_itlb;
-			mmu->sasid = mmu3->sasid;
-		}
+	struct cpuinfo_arc_mmu *mmu = &mmuinfo;
+	unsigned int bcr, u_dtlb, u_itlb, sasid;
+	struct bcr_mmu_3 *mmu3;
+	struct bcr_mmu_4 *mmu4;
+	char super_pg[64] = "";
+	int n = 0;
+
+	bcr = read_aux_reg(ARC_REG_MMU_BCR);
+	mmu->ver = (bcr >> 24);
+
+	if (is_isa_arcompact() && mmu->ver == 3) {
+		mmu3 = (struct bcr_mmu_3 *)&bcr;
+		mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
+		mmu->sets = 1 << mmu3->sets;
+		mmu->ways = 1 << mmu3->ways;
+		u_dtlb = mmu3->u_dtlb;
+		u_itlb = mmu3->u_itlb;
+		sasid = mmu3->sasid;
 	} else {
-		mmu4 = (struct bcr_mmu_4 *)&tmp;
+		mmu4 = (struct bcr_mmu_4 *)&bcr;
 		mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
 		mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11);
 		mmu->sets = 64 << mmu4->n_entry;
 		mmu->ways = mmu4->n_ways * 2;
-		mmu->u_dtlb = mmu4->u_dtlb * 4;
-		mmu->u_itlb = mmu4->u_itlb * 4;
-		mmu->sasid = mmu4->sasid;
-		pae_exists = mmu->pae = mmu4->pae;
+		u_dtlb = mmu4->u_dtlb * 4;
+		u_itlb = mmu4->u_itlb * 4;
+		sasid = mmu4->sasid;
+		mmu->pae = mmu4->pae;
 	}
-}
-
-char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
-{
-	int n = 0;
-	struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
-	char super_pg[64] = "";
 
-	if (p_mmu->s_pg_sz_m)
-		scnprintf(super_pg, 64, "%dM Super Page %s",
-			  p_mmu->s_pg_sz_m,
-			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
+	if (mmu->s_pg_sz_m)
+		scnprintf(super_pg, 64, "/%dM%s",
+			  mmu->s_pg_sz_m,
+			  IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ? " (THP enabled)":"");
 
 	n += scnprintf(buf + n, len - n,
-		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
-		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
-		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
-		       p_mmu->u_dtlb, p_mmu->u_itlb,
-		       IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
+		      "MMU [v%x]\t: %dk%s, swalk %d lvl, JTLB %dx%d, uDTLB %d, uITLB %d%s%s%s\n",
+		       mmu->ver, mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS,
+		       mmu->sets, mmu->ways,
+		       u_dtlb, u_itlb,
+		       IS_AVAIL1(sasid, ", SASID"),
+		       IS_AVAIL2(mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
 
-	return buf;
+	return n;
 }
 
 int pae40_exist_but_not_enab(void)
 {
-	return pae_exists && !is_pae40_enabled();
+	return mmuinfo.pae && !is_pae40_enabled();
 }
 
 void arc_mmu_init(void)
 {
-	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
-	char str[256];
+	struct cpuinfo_arc_mmu *mmu = &mmuinfo;
 	int compat = 0;
 
-	pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str)));
-
 	/*
-	 * Can't be done in processor.h due to header include depenedencies
+	 * Can't be done in processor.h due to header include dependencies
 	 */
 	BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE));
 
 	/*
 	 * stack top size sanity check,
-	 * Can't be done in processor.h due to header include depenedencies
+	 * Can't be done in processor.h due to header include dependencies
 	 */
 	BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE));
 
 	/*
 	 * Ensure that MMU features assumed by kernel exist in hardware.
-	 * For older ARC700 cpus, it has to be exact match, since the MMU
-	 * revisions were not backwards compatible (MMUv3 TLB layout changed
-	 * so even if kernel for v2 didn't use any new cmds of v3, it would
-	 * still not work.
-	 * For HS cpus, MMUv4 was baseline and v5 is backwards compatible
-	 * (will run older software).
+	 *  - For older ARC700 cpus, only v3 supported
+	 *  - For HS cpus, v4 was baseline and v5 is backwards compatible
+	 *    (will run older software).
 	 */
-	if (is_isa_arcompact() && mmu->ver == CONFIG_ARC_MMU_VER)
+	if (is_isa_arcompact() && mmu->ver == 3)
 		compat = 1;
-	else if (is_isa_arcv2() && mmu->ver >= CONFIG_ARC_MMU_VER)
+	else if (is_isa_arcv2() && mmu->ver >= 4)
 		compat = 1;
 
-	if (!compat) {
-		panic("MMU ver %d doesn't match kernel built for %d...\n",
-		      mmu->ver, CONFIG_ARC_MMU_VER);
-	}
+	if (!compat)
+		panic("MMU ver %d doesn't match kernel built for\n", mmu->ver);
 
 	if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
 		panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
@@ -843,14 +654,11 @@ void arc_mmu_init(void)
 	if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
 		panic("Hardware doesn't support PAE40\n");
 
-	/* Enable the MMU */
-	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
+	/* Enable the MMU with ASID 0 */
+	mmu_setup_asid(NULL, 0);
 
-	/* In smp we use this reg for interrupt 1 scratch */
-#ifdef ARC_USE_SCRATCH_REG
-	/* swapper_pg_dir is the pgd for the kernel, used by vmalloc */
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir);
-#endif
+	/* cache the pgd pointer in MMU SCRATCH reg (ARCv2 only) */
+	mmu_setup_pgd(NULL, swapper_pg_dir);
 
 	if (pae40_exist_but_not_enab())
 		write_aux_reg(ARC_REG_TLBPD1HI, 0);
@@ -881,12 +689,12 @@ void arc_mmu_init(void)
  *      the duplicate one.
  * -Knob to be verbose abt it.(TODO: hook them up to debugfs)
  */
-volatile int dup_pd_silent; /* Be slient abt it or complain (default) */
+volatile int dup_pd_silent; /* Be silent abt it or complain (default) */
 
 void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 			  struct pt_regs *regs)
 {
-	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	struct cpuinfo_arc_mmu *mmu = &mmuinfo;
 	unsigned long flags;
 	int set, n_ways = mmu->ways;
 
@@ -945,40 +753,3 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 
 	local_irq_restore(flags);
 }
-
-/***********************************************************************
- * Diagnostic Routines
- *  -Called from Low Level TLB Hanlders if things don;t look good
- **********************************************************************/
-
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-
-/*
- * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS
- * don't match
- */
-void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path)
-{
-	pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",
-	       is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid);
-
-	__asm__ __volatile__("flag 1");
-}
-
-void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr)
-{
-	unsigned int mmu_asid;
-
-	mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff;
-
-	/*
-	 * At the time of a TLB miss/installation
-	 *   - HW version needs to match SW version
-	 *   - SW needs to have a valid ASID
-	 */
-	if (addr < 0x70000000 &&
-	    ((mm_asid == MM_CTXT_NO_ASID) ||
-	      (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK))))
-		print_asid_mismatch(mm_asid, mmu_asid, 0);
-}
-#endif
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 2efaf6ca0c06..dc65e87a531f 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -5,19 +5,19 @@
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  *
  * Vineetg: April 2011 :
- *  -MMU v1: moved out legacy code into a seperate file
+ *  -MMU v1: moved out legacy code into a separate file
  *  -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
  *      helps avoid a shift when preparing PD0 from PTE
  *
  * Vineetg: July 2009
- *  -For MMU V2, we need not do heuristics at the time of commiting a D-TLB
- *   entry, so that it doesn't knock out it's I-TLB entry
+ *  -For MMU V2, we need not do heuristics at the time of committing a D-TLB
+ *   entry, so that it doesn't knock out its I-TLB entry
  *  -Some more fine tuning:
  *   bmsk instead of add, asl.cc instead of branch, delay slot utilise etc
  *
  * Vineetg: July 2009
  *  -Practically rewrote the I/D TLB Miss handlers
- *   Now 40 and 135 instructions a peice as compared to 131 and 449 resp.
+ *   Now 40 and 135 instructions apiece as compared to 131 and 449 resp.
  *   Hence Leaner by 1.5 K
  *   Used Conditional arithmetic to replace excessive branching
  *   Also used short instructions wherever possible
@@ -33,13 +33,12 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/pgtable.h>
 #include <asm/entry.h>
 #include <asm/mmu.h>
-#include <asm/pgtable.h>
 #include <asm/arcregs.h>
 #include <asm/cache.h>
 #include <asm/processor.h>
-#include <asm/tlb-mmu1.h>
 
 #ifdef CONFIG_ISA_ARCOMPACT
 ;-----------------------------------------------------------------
@@ -94,11 +93,6 @@ ex_saved_reg1:
 	st_s  r1, [r0, 4]
 	st_s  r2, [r0, 8]
 	st_s  r3, [r0, 12]
-
-	; VERIFY if the ASID in MMU-PID Reg is same as
-	; one in Linux data structures
-
-	tlb_paranoid_check_asm
 .endm
 
 .macro TLBMISS_RESTORE_REGS
@@ -148,53 +142,16 @@ ex_saved_reg1:
 #endif
 
 ;============================================================================
-;  Troubleshooting Stuff
+;TLB Miss handling Code
 ;============================================================================
 
-; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid
-; When Creating TLB Entries, instead of doing 3 dependent loads from memory,
-; we use the MMU PID Reg to get current ASID.
-; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
-; So we try to detect this in TLB Mis shandler
-
-.macro tlb_paranoid_check_asm
-
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-
-	GET_CURR_TASK_ON_CPU  r3
-	ld r0, [r3, TASK_ACT_MM]
-	ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
-	breq r0, 0, 55f	; Error if no ASID allocated
-
-	lr r1, [ARC_REG_PID]
-	and r1, r1, 0xFF
-
-	and r2, r0, 0xFF	; MMU PID bits only for comparison
-	breq r1, r2, 5f
-
-55:
-	; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
-	lr  r2, [erstatus]
-	bbit0 r2, STATUS_U_BIT, 5f
-
-	; We sure are in troubled waters, Flag the error, but to do so
-	; need to switch to kernel mode stack to call error routine
-	GET_TSK_STACK_BASE   r3, sp
-
-	; Call printk to shoutout aloud
-	mov r2, 1
-	j print_asid_mismatch
-
-5:	; ASIDs match so proceed normally
-	nop
-
+#ifndef PMD_SHIFT
+#define PMD_SHIFT PUD_SHIFT
 #endif
 
-.endm
-
-;============================================================================
-;TLB Miss handling Code
-;============================================================================
+#ifndef PUD_SHIFT
+#define PUD_SHIFT PGDIR_SHIFT
+#endif
 
 ;-----------------------------------------------------------------------------
 ; This macro does the page-table lookup for the faulting address.
@@ -203,7 +160,7 @@ ex_saved_reg1:
 
 	lr  r2, [efa]
 
-#ifdef ARC_USE_SCRATCH_REG
+#ifdef CONFIG_ISA_ARCV2
 	lr  r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
 #else
 	GET_CURR_TASK_ON_CPU  r1
@@ -216,6 +173,24 @@ ex_saved_reg1:
 	tst	r3, r3
 	bz	do_slow_path_pf         ; if no Page Table, do page fault
 
+#if CONFIG_PGTABLE_LEVELS > 3
+	lsr     r0, r2, PUD_SHIFT	; Bits for indexing into PUD
+	and	r0, r0, (PTRS_PER_PUD - 1)
+	ld.as	r1, [r3, r0]		; PMD entry
+	tst	r1, r1
+	bz	do_slow_path_pf
+	mov	r3, r1
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+	lsr     r0, r2, PMD_SHIFT	; Bits for indexing into PMD
+	and	r0, r0, (PTRS_PER_PMD - 1)
+	ld.as	r1, [r3, r0]		; PMD entry
+	tst	r1, r1
+	bz	do_slow_path_pf
+	mov	r3, r1
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	and.f	0, r3, _PAGE_HW_SZ	; Is this Huge PMD (thp)
 	add2.nz	r1, r1, r0
@@ -279,14 +254,7 @@ ex_saved_reg1:
 ; Commit the TLB entry into MMU
 
 .macro COMMIT_ENTRY_TO_MMU
-#if (CONFIG_ARC_MMU_VER < 4)
-
-#ifdef CONFIG_EZNPS_MTM_EXT
-	/* verify if entry for this vaddr+ASID already exists */
-	sr    TLBProbe, [ARC_REG_TLBCOMMAND]
-	lr    r0, [ARC_REG_TLBINDEX]
-	bbit0 r0, 31, 88f
-#endif
+#ifdef CONFIG_ARC_MMU_V3
 
 	/* Get free TLB slot: Set = computed from vaddr, way = random */
 	sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
@@ -382,13 +350,6 @@ ENTRY(EV_TLBMissD)
 
 	CONV_PTE_TO_TLB
 
-#if (CONFIG_ARC_MMU_VER == 1)
-	; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of
-	; memcpy where 3 parties contend for 2 ways, ensuing a livelock.
-	; But only for old MMU or one with Metal Fix
-	TLB_WRITE_HEURISTICS
-#endif
-
 	COMMIT_ENTRY_TO_MMU
 	TLBMISS_RESTORE_REGS
 EV_TLBMissD_fast_ret:	; additional label for VDK OS-kit instrumentation
diff --git a/arch/arc/net/Makefile b/arch/arc/net/Makefile
new file mode 100644
index 000000000000..ea5790952e9a
--- /dev/null
+++ b/arch/arc/net/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+ifeq ($(CONFIG_ISA_ARCV2),y)
+	obj-$(CONFIG_BPF_JIT) += bpf_jit_core.o
+	obj-$(CONFIG_BPF_JIT) += bpf_jit_arcv2.o
+endif
diff --git a/arch/arc/net/bpf_jit.h b/arch/arc/net/bpf_jit.h
new file mode 100644
index 000000000000..495f3023e4c1
--- /dev/null
+++ b/arch/arc/net/bpf_jit.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The interface that a back-end should provide to bpf_jit_core.c.
+ *
+ * Copyright (c) 2024 Synopsys Inc.
+ * Author: Shahab Vahedi <shahab@synopsys.com>
+ */
+
+#ifndef _ARC_BPF_JIT_H
+#define _ARC_BPF_JIT_H
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+/* Print debug info and assert. */
+//#define ARC_BPF_JIT_DEBUG
+
+/* Determine the address type of the target. */
+#ifdef CONFIG_ISA_ARCV2
+#define ARC_ADDR u32
+#endif
+
+/*
+ * For the translation of some BPF instructions, a temporary register
+ * might be needed for some interim data.
+ */
+#define JIT_REG_TMP MAX_BPF_JIT_REG
+
+/*
+ * Buffer access: If buffer "b" is not NULL, advance by "n" bytes.
+ *
+ * This macro must be used in any place that potentially requires a
+ * "buf + len". This way, we make sure that the "buf" argument for
+ * the underlying "arc_*(buf, ...)" ends up as NULL instead of something
+ * like "0+4" or "0+8", etc. Those "arc_*()" functions check their "buf"
+ * value to decide if instructions should be emitted or not.
+ */
+#define BUF(b, n) (((b) != NULL) ? ((b) + (n)) : (b))
+
+/************** Functions that the back-end must provide **************/
+/* Extension for 32-bit operations. */
+u8 zext(u8 *buf, u8 rd);
+/***** Moves *****/
+u8 mov_r32(u8 *buf, u8 rd, u8 rs, u8 sign_ext);
+u8 mov_r32_i32(u8 *buf, u8 reg, s32 imm);
+u8 mov_r64(u8 *buf, u8 rd, u8 rs, u8 sign_ext);
+u8 mov_r64_i32(u8 *buf, u8 reg, s32 imm);
+u8 mov_r64_i64(u8 *buf, u8 reg, u32 lo, u32 hi);
+/***** Loads and stores *****/
+u8 load_r(u8 *buf, u8 rd, u8 rs, s16 off, u8 size, bool sign_ext);
+u8 store_r(u8 *buf, u8 rd, u8 rs, s16 off, u8 size);
+u8 store_i(u8 *buf, s32 imm, u8 rd, s16 off, u8 size);
+/***** Addition *****/
+u8 add_r32(u8 *buf, u8 rd, u8 rs);
+u8 add_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 add_r64(u8 *buf, u8 rd, u8 rs);
+u8 add_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Subtraction *****/
+u8 sub_r32(u8 *buf, u8 rd, u8 rs);
+u8 sub_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 sub_r64(u8 *buf, u8 rd, u8 rs);
+u8 sub_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Multiplication *****/
+u8 mul_r32(u8 *buf, u8 rd, u8 rs);
+u8 mul_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 mul_r64(u8 *buf, u8 rd, u8 rs);
+u8 mul_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Division *****/
+u8 div_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext);
+u8 div_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext);
+/***** Remainder *****/
+u8 mod_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext);
+u8 mod_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext);
+/***** Bitwise AND *****/
+u8 and_r32(u8 *buf, u8 rd, u8 rs);
+u8 and_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 and_r64(u8 *buf, u8 rd, u8 rs);
+u8 and_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise OR *****/
+u8 or_r32(u8 *buf, u8 rd, u8 rs);
+u8 or_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 or_r64(u8 *buf, u8 rd, u8 rs);
+u8 or_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise XOR *****/
+u8 xor_r32(u8 *buf, u8 rd, u8 rs);
+u8 xor_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 xor_r64(u8 *buf, u8 rd, u8 rs);
+u8 xor_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise Negate *****/
+u8 neg_r32(u8 *buf, u8 r);
+u8 neg_r64(u8 *buf, u8 r);
+/***** Bitwise left shift *****/
+u8 lsh_r32(u8 *buf, u8 rd, u8 rs);
+u8 lsh_r32_i32(u8 *buf, u8 rd, u8 imm);
+u8 lsh_r64(u8 *buf, u8 rd, u8 rs);
+u8 lsh_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise right shift (logical) *****/
+u8 rsh_r32(u8 *buf, u8 rd, u8 rs);
+u8 rsh_r32_i32(u8 *buf, u8 rd, u8 imm);
+u8 rsh_r64(u8 *buf, u8 rd, u8 rs);
+u8 rsh_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise right shift (arithmetic) *****/
+u8 arsh_r32(u8 *buf, u8 rd, u8 rs);
+u8 arsh_r32_i32(u8 *buf, u8 rd, u8 imm);
+u8 arsh_r64(u8 *buf, u8 rd, u8 rs);
+u8 arsh_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Frame related *****/
+u32 mask_for_used_regs(u8 bpf_reg, bool is_call);
+u8 arc_prologue(u8 *buf, u32 usage, u16 frame_size);
+u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size);
+/***** Jumps *****/
+/*
+ * Different sorts of conditions (ARC enum as opposed to BPF_*).
+ *
+ * Do not change the order of enums here. ARC_CC_SLE+1 is used
+ * to determine the number of JCCs.
+ */
+enum ARC_CC {
+	ARC_CC_UGT = 0,		/* unsigned >  */
+	ARC_CC_UGE,		/* unsigned >= */
+	ARC_CC_ULT,		/* unsigned <  */
+	ARC_CC_ULE,		/* unsigned <= */
+	ARC_CC_SGT,		/*   signed >  */
+	ARC_CC_SGE,		/*   signed >= */
+	ARC_CC_SLT,		/*   signed <  */
+	ARC_CC_SLE,		/*   signed <= */
+	ARC_CC_AL,		/* always      */
+	ARC_CC_EQ,		/*          == */
+	ARC_CC_NE,		/*          != */
+	ARC_CC_SET,		/* test        */
+	ARC_CC_LAST
+};
+
+/*
+ * A few notes:
+ *
+ * - check_jmp_*() are prerequisites before calling the gen_jmp_*().
+ *   They return "true" if the jump is possible and "false" otherwise.
+ *
+ * - The notion of "*_off" is to emphasize that these parameters are
+ *   merely offsets in the JIT stream and not absolute addresses. One
+ *   can look at them as addresses if the JIT code would start from
+ *   address 0x0000_0000. Nonetheless, since the buffer address for the
+ *   JIT is on a word-aligned address, this works and actually makes
+ *   things simpler (offsets are in the range of u32 which is more than
+ *   enough).
+ */
+bool check_jmp_32(u32 curr_off, u32 targ_off, u8 cond);
+bool check_jmp_64(u32 curr_off, u32 targ_off, u8 cond);
+u8 gen_jmp_32(u8 *buf, u8 rd, u8 rs, u8 cond, u32 c_off, u32 t_off);
+u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 c_off, u32 t_off);
+/***** Miscellaneous *****/
+u8 gen_func_call(u8 *buf, ARC_ADDR func_addr, bool external_func);
+u8 arc_to_bpf_return(u8 *buf);
+/*
+ * - Perform byte swaps on "rd" based on the "size".
+ * - If "force" is set, do it unconditionally. Otherwise, consider the
+ *   desired "endian"ness and the host endianness.
+ * - For data "size"s up to 32 bits, perform a zero-extension if asked
+ *   by the "do_zext" boolean.
+ */
+u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext);
+
+#endif /* _ARC_BPF_JIT_H */
diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c
new file mode 100644
index 000000000000..6d989b6d88c6
--- /dev/null
+++ b/arch/arc/net/bpf_jit_arcv2.c
@@ -0,0 +1,3007 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The ARCv2 backend of Just-In-Time compiler for eBPF bytecode.
+ *
+ * Copyright (c) 2024 Synopsys Inc.
+ * Author: Shahab Vahedi <shahab@synopsys.com>
+ */
+#include <linux/bug.h>
+#include "bpf_jit.h"
+
+/* ARC core registers. */
+enum {
+	ARC_R_0,  ARC_R_1,  ARC_R_2,  ARC_R_3,  ARC_R_4,  ARC_R_5,
+	ARC_R_6,  ARC_R_7,  ARC_R_8,  ARC_R_9,  ARC_R_10, ARC_R_11,
+	ARC_R_12, ARC_R_13, ARC_R_14, ARC_R_15, ARC_R_16, ARC_R_17,
+	ARC_R_18, ARC_R_19, ARC_R_20, ARC_R_21, ARC_R_22, ARC_R_23,
+	ARC_R_24, ARC_R_25, ARC_R_26, ARC_R_FP, ARC_R_SP, ARC_R_ILINK,
+	ARC_R_30, ARC_R_BLINK,
+	/*
+	 * Having ARC_R_IMM encoded as source register means there is an
+	 * immediate that must be interpreted from the next 4 bytes. If
+	 * encoded as the destination register though, it implies that the
+	 * output of the operation is not assigned to any register. The
+	 * latter is helpful if we only care about updating the CPU status
+	 * flags.
+	 */
+	ARC_R_IMM = 62
+};
+
+/*
+ * Remarks about the rationale behind the chosen mapping:
+ *
+ * - BPF_REG_{1,2,3,4} are the argument registers and must be mapped to
+ *   argument registers in ARCv2 ABI: r0-r7. The r7 registers is the last
+ *   argument register in the ABI. Therefore BPF_REG_5, as the fifth
+ *   argument, must be pushed onto the stack. This is a must for calling
+ *   in-kernel functions.
+ *
+ * - In ARCv2 ABI, the return value is in r0 for 32-bit results and (r1,r0)
+ *   for 64-bit results. However, because they're already used for BPF_REG_1,
+ *   the next available scratch registers, r8 and r9, are the best candidates
+ *   for BPF_REG_0. After a "call" to a(n) (in-kernel) function, the result
+ *   is "mov"ed to these registers. At a BPF_EXIT, their value is "mov"ed to
+ *   (r1,r0).
+ *   It is worth mentioning that scratch registers are the best choice for
+ *   BPF_REG_0, because it is very popular in BPF instruction encoding.
+ *
+ * - JIT_REG_TMP is an artifact needed to translate some BPF instructions.
+ *   Its life span is one single BPF instruction. Since during the
+ *   analyze_reg_usage(), it is not known if temporary registers are used,
+ *   it is mapped to ARC's scratch registers: r10 and r11. Therefore, they
+ *   don't matter in analysing phase and don't need saving. This temporary
+ *   register is added as yet another index in the bpf2arc array, so it will
+ *   unfold like the rest of registers during the code generation process.
+ *
+ * - Mapping of callee-saved BPF registers, BPF_REG_{6,7,8,9}, starts from
+ *   (r15,r14) register pair. The (r13,r12) is not a good choice, because
+ *   in ARCv2 ABI, r12 is not a callee-saved register and this can cause
+ *   problem when calling an in-kernel function. Theoretically, the mapping
+ *   could start from (r14,r13), but it is not a conventional ARCv2 register
+ *   pair. To have a future proof design, I opted for this arrangement.
+ *   If/when we decide to add ARCv2 instructions that do use register pairs,
+ *   the mapping, hopefully, doesn't need to be revisited.
+ */
+static const u8 bpf2arc[][2] = {
+	/* Return value from in-kernel function, and exit value from eBPF */
+	[BPF_REG_0] = {ARC_R_8, ARC_R_9},
+	/* Arguments from eBPF program to in-kernel function */
+	[BPF_REG_1] = {ARC_R_0, ARC_R_1},
+	[BPF_REG_2] = {ARC_R_2, ARC_R_3},
+	[BPF_REG_3] = {ARC_R_4, ARC_R_5},
+	[BPF_REG_4] = {ARC_R_6, ARC_R_7},
+	/* Remaining arguments, to be passed on the stack per 32-bit ABI */
+	[BPF_REG_5] = {ARC_R_22, ARC_R_23},
+	/* Callee-saved registers that in-kernel function will preserve */
+	[BPF_REG_6] = {ARC_R_14, ARC_R_15},
+	[BPF_REG_7] = {ARC_R_16, ARC_R_17},
+	[BPF_REG_8] = {ARC_R_18, ARC_R_19},
+	[BPF_REG_9] = {ARC_R_20, ARC_R_21},
+	/* Read-only frame pointer to access the eBPF stack. 32-bit only. */
+	[BPF_REG_FP] = {ARC_R_FP, },
+	/* Register for blinding constants */
+	[BPF_REG_AX] = {ARC_R_24, ARC_R_25},
+	/* Temporary registers for internal use */
+	[JIT_REG_TMP] = {ARC_R_10, ARC_R_11}
+};
+
+#define ARC_CALLEE_SAVED_REG_FIRST ARC_R_13
+#define ARC_CALLEE_SAVED_REG_LAST  ARC_R_25
+
+#define REG_LO(r) (bpf2arc[(r)][0])
+#define REG_HI(r) (bpf2arc[(r)][1])
+
+/*
+ * To comply with ARCv2 ABI, BPF's arg5 must be put on stack. After which,
+ * the stack needs to be restored by ARG5_SIZE.
+ */
+#define ARG5_SIZE 8
+
+/* Instruction lengths in bytes. */
+enum {
+	INSN_len_normal = 4,	/* Normal instructions length. */
+	INSN_len_imm = 4	/* Length of an extra 32-bit immediate. */
+};
+
+/* ZZ defines the size of operation in encodings that it is used. */
+enum {
+	ZZ_1_byte = 1,
+	ZZ_2_byte = 2,
+	ZZ_4_byte = 0,
+	ZZ_8_byte = 3
+};
+
+/*
+ * AA is mostly about address write back mode. It determines if the
+ * address in question should be updated before usage or after:
+ *   addr += offset; data = *addr;
+ *   data = *addr; addr += offset;
+ *
+ * In "scaling" mode, the effective address will become the sum
+ * of "address" + "index"*"size". The "size" is specified by the
+ * "ZZ" field. There is no write back when AA is set for scaling:
+ *   data = *(addr + offset<<zz)
+ */
+enum {
+	AA_none  = 0,
+	AA_pre   = 1,	/* in assembly known as "a/aw". */
+	AA_post  = 2,	/* in assembly known as "ab". */
+	AA_scale = 3	/* in assembly known as "as". */
+};
+
+/* X flag determines the mode of extension. */
+enum {
+	X_zero = 0,
+	X_sign = 1
+};
+
+/* Condition codes. */
+enum {
+	CC_always     = 0,	/* condition is true all the time */
+	CC_equal      = 1,	/* if status32.z flag is set */
+	CC_unequal    = 2,	/* if status32.z flag is clear */
+	CC_positive   = 3,	/* if status32.n flag is clear */
+	CC_negative   = 4,	/* if status32.n flag is set */
+	CC_less_u     = 5,	/* less than (unsigned) */
+	CC_less_eq_u  = 14,	/* less than or equal (unsigned) */
+	CC_great_eq_u = 6,	/* greater than or equal (unsigned) */
+	CC_great_u    = 13,	/* greater than (unsigned) */
+	CC_less_s     = 11,	/* less than (signed) */
+	CC_less_eq_s  = 12,	/* less than or equal (signed) */
+	CC_great_eq_s = 10,	/* greater than or equal (signed) */
+	CC_great_s    = 9	/* greater than (signed) */
+};
+
+#define IN_U6_RANGE(x)	((x) <= (0x40      - 1) && (x) >= 0)
+#define IN_S9_RANGE(x)	((x) <= (0x100     - 1) && (x) >= -0x100)
+#define IN_S12_RANGE(x)	((x) <= (0x800     - 1) && (x) >= -0x800)
+#define IN_S21_RANGE(x)	((x) <= (0x100000  - 1) && (x) >= -0x100000)
+#define IN_S25_RANGE(x)	((x) <= (0x1000000 - 1) && (x) >= -0x1000000)
+
+/* Operands in most of the encodings. */
+#define OP_A(x)	((x) & 0x03f)
+#define OP_B(x)	((((x) & 0x07) << 24) | (((x) & 0x38) <<  9))
+#define OP_C(x)	(((x) & 0x03f) << 6)
+#define OP_IMM	(OP_C(ARC_R_IMM))
+#define COND(x)	(OP_A((x) & 31))
+#define FLAG(x)	(((x) & 1) << 15)
+
+/*
+ * The 4-byte encoding of "mov b,c":
+ *
+ * 0010_0bbb 0000_1010 0BBB_cccc cc00_0000
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source register
+ */
+#define OPC_MOV		0x200a0000
+
+/*
+ * The 4-byte encoding of "mov b,s12" (used for moving small immediates):
+ *
+ * 0010_0bbb 1000_1010 0BBB_ssss ssSS_SSSS
+ *
+ * b:  BBBbbb		destination register
+ * s:  SSSSSSssssss	source immediate (signed)
+ */
+#define OPC_MOVI	0x208a0000
+#define MOVI_S12(x)	((((x) & 0xfc0) >> 6) | (((x) & 0x3f) << 6))
+
+/*
+ * The 4-byte encoding of "mov[.qq] b,u6", used for conditional
+ * moving of even smaller immediates:
+ *
+ * 0010_0bbb 1100_1010 0BBB_cccc cciq_qqqq
+ *
+ * qq: qqqqq		condition code
+ * i:			If set, c is considered a 6-bit immediate, else a reg.
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source
+ */
+#define OPC_MOV_CC	0x20ca0000
+#define MOV_CC_I	BIT(5)
+#define OPC_MOVU_CC	(OPC_MOV_CC | MOV_CC_I)
+
+/*
+ * The 4-byte encoding of "sexb b,c" (8-bit sign extension):
+ *
+ * 0010_0bbb 0010_1111 0BBB_cccc cc00_0101
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source register
+ */
+#define OPC_SEXB	0x202f0005
+
+/*
+ * The 4-byte encoding of "sexh b,c" (16-bit sign extension):
+ *
+ * 0010_0bbb 0010_1111 0BBB_cccc cc00_0110
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source register
+ */
+#define OPC_SEXH	0x202f0006
+
+/*
+ * The 4-byte encoding of "ld[zz][.x][.aa] c,[b,s9]":
+ *
+ * 0001_0bbb ssss_ssss SBBB_0aaz zxcc_cccc
+ *
+ * zz:			size mode
+ * aa:			address write back mode
+ * x:			extension mode
+ *
+ * s9: S_ssss_ssss	9-bit signed number
+ * b:  BBBbbb		source reg for address
+ * c:  cccccc		destination register
+ */
+#define OPC_LOAD	0x10000000
+#define LOAD_X(x)	((x) << 6)
+#define LOAD_ZZ(x)	((x) << 7)
+#define LOAD_AA(x)	((x) << 9)
+#define LOAD_S9(x)	((((x) & 0x0ff) << 16) | (((x) & 0x100) <<  7))
+#define LOAD_C(x)	((x) & 0x03f)
+/* Unsigned and signed loads. */
+#define OPC_LDU		(OPC_LOAD | LOAD_X(X_zero))
+#define OPC_LDS		(OPC_LOAD | LOAD_X(X_sign))
+/* 32-bit load. */
+#define OPC_LD32	(OPC_LDU | LOAD_ZZ(ZZ_4_byte))
+/* "pop reg" is merely a "ld.ab reg,[sp,4]". */
+#define OPC_POP		\
+	(OPC_LD32 | LOAD_AA(AA_post) | LOAD_S9(4) | OP_B(ARC_R_SP))
+
+/*
+ * The 4-byte encoding of "st[zz][.aa] c,[b,s9]":
+ *
+ * 0001_1bbb ssss_ssss SBBB_cccc cc0a_azz0
+ *
+ * zz: zz		size mode
+ * aa: aa		address write back mode
+ *
+ * s9: S_ssss_ssss	9-bit signed number
+ * b:  BBBbbb		source reg for address
+ * c:  cccccc		source reg to be stored
+ */
+#define OPC_STORE	0x18000000
+#define STORE_ZZ(x)	((x) << 1)
+#define STORE_AA(x)	((x) << 3)
+#define STORE_S9(x)	((((x) & 0x0ff) << 16) | (((x) & 0x100) <<  7))
+/* 32-bit store. */
+#define OPC_ST32	(OPC_STORE | STORE_ZZ(ZZ_4_byte))
+/* "push reg" is merely a "st.aw reg,[sp,-4]". */
+#define OPC_PUSH	\
+	(OPC_ST32 | STORE_AA(AA_pre) | STORE_S9(-4) | OP_B(ARC_R_SP))
+
+/*
+ * The 4-byte encoding of "add a,b,c":
+ *
+ * 0010_0bbb 0i00_0000 fBBB_cccc ccaa_aaaa
+ *
+ * f:                   indicates if flags (carry, etc.) should be updated
+ * i:			If set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_ADD		0x20000000
+/* Addition with updating the pertinent flags in "status32" register. */
+#define OPC_ADDF	(OPC_ADD | FLAG(1))
+#define ADDI		BIT(22)
+#define ADDI_U6(x)	OP_C(x)
+#define OPC_ADDI	(OPC_ADD | ADDI)
+#define OPC_ADDIF	(OPC_ADDI | FLAG(1))
+#define OPC_ADD_I	(OPC_ADD | OP_IMM)
+
+/*
+ * The 4-byte encoding of "adc a,b,c" (addition with carry):
+ *
+ * 0010_0bbb 0i00_0001 0BBB_cccc ccaa_aaaa
+ *
+ * i:			if set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_ADC		0x20010000
+#define ADCI		BIT(22)
+#define ADCI_U6(x)	OP_C(x)
+#define OPC_ADCI	(OPC_ADC | ADCI)
+
+/*
+ * The 4-byte encoding of "sub a,b,c":
+ *
+ * 0010_0bbb 0i00_0010 fBBB_cccc ccaa_aaaa
+ *
+ * f:                   indicates if flags (carry, etc.) should be updated
+ * i:			if set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_SUB		0x20020000
+/* Subtraction with updating the pertinent flags in "status32" register. */
+#define OPC_SUBF	(OPC_SUB | FLAG(1))
+#define SUBI		BIT(22)
+#define SUBI_U6(x)	OP_C(x)
+#define OPC_SUBI	(OPC_SUB | SUBI)
+#define OPC_SUB_I	(OPC_SUB | OP_IMM)
+
+/*
+ * The 4-byte encoding of "sbc a,b,c" (subtraction with carry):
+ *
+ * 0010_0bbb 0000_0011 fBBB_cccc ccaa_aaaa
+ *
+ * f:                   indicates if flags (carry, etc.) should be updated
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_SBC		0x20030000
+
+/*
+ * The 4-byte encoding of "cmp[.qq] b,c":
+ *
+ * 0010_0bbb 1100_1100 1BBB_cccc cc0q_qqqq
+ *
+ * qq:	qqqqq		condition code
+ *
+ * b:  BBBbbb		the 1st operand
+ * c:  cccccc		the 2nd operand
+ */
+#define OPC_CMP		0x20cc8000
+
+/*
+ * The 4-byte encoding of "neg a,b":
+ *
+ * 0010_0bbb 0100_1110 0BBB_0000 00aa_aaaa
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		input
+ */
+#define OPC_NEG		0x204e0000
+
+/*
+ * The 4-byte encoding of "mpy a,b,c".
+ * mpy is the signed 32-bit multiplication with the lower 32-bit
+ * of the product as the result.
+ *
+ * 0010_0bbb 0001_1010 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_MPY		0x201a0000
+#define OPC_MPYI	(OPC_MPY | OP_IMM)
+
+/*
+ * The 4-byte encoding of "mpydu a,b,c".
+ * mpydu is the unsigned 32-bit multiplication with the lower 32-bit of
+ * the product in register "a" and the higher 32-bit in register "a+1".
+ *
+ * 0010_1bbb 0001_1001 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		64-bit result in registers (R_a+1,R_a)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_MPYDU	0x28190000
+#define OPC_MPYDUI	(OPC_MPYDU | OP_IMM)
+
+/*
+ * The 4-byte encoding of "divu a,b,c" (unsigned division):
+ *
+ * 0010_1bbb 0000_0101 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result (quotient)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand (divisor)
+ */
+#define OPC_DIVU	0x28050000
+#define OPC_DIVUI	(OPC_DIVU | OP_IMM)
+
+/*
+ * The 4-byte encoding of "div a,b,c" (signed division):
+ *
+ * 0010_1bbb 0000_0100 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result (quotient)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand (divisor)
+ */
+#define OPC_DIVS	0x28040000
+#define OPC_DIVSI	(OPC_DIVS | OP_IMM)
+
+/*
+ * The 4-byte encoding of "remu a,b,c" (unsigned remainder):
+ *
+ * 0010_1bbb 0000_1001 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result (remainder)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand (divisor)
+ */
+#define OPC_REMU	0x28090000
+#define OPC_REMUI	(OPC_REMU | OP_IMM)
+
+/*
+ * The 4-byte encoding of "rem a,b,c" (signed remainder):
+ *
+ * 0010_1bbb 0000_1000 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result (remainder)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand (divisor)
+ */
+#define OPC_REMS	0x28080000
+#define OPC_REMSI	(OPC_REMS | OP_IMM)
+
+/*
+ * The 4-byte encoding of "and a,b,c":
+ *
+ * 0010_0bbb 0000_0100 fBBB_cccc ccaa_aaaa
+ *
+ * f:                   indicates if zero and negative flags should be updated
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_AND		0x20040000
+#define OPC_ANDI	(OPC_AND | OP_IMM)
+
+/*
+ * The 4-byte encoding of "tst[.qq] b,c".
+ * Checks if the two input operands have any bit set at the same
+ * position.
+ *
+ * 0010_0bbb 1100_1011 1BBB_cccc cc0q_qqqq
+ *
+ * qq:	qqqqq		condition code
+ *
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_TST		0x20cb8000
+
+/*
+ * The 4-byte encoding of "or a,b,c":
+ *
+ * 0010_0bbb 0000_0101 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_OR		0x20050000
+#define OPC_ORI		(OPC_OR | OP_IMM)
+
+/*
+ * The 4-byte encoding of "xor a,b,c":
+ *
+ * 0010_0bbb 0000_0111 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_XOR		0x20070000
+#define OPC_XORI	(OPC_XOR | OP_IMM)
+
+/*
+ * The 4-byte encoding of "not b,c":
+ *
+ * 0010_0bbb 0010_1111 0BBB_cccc cc00_1010
+ *
+ * b:  BBBbbb		result
+ * c:  cccccc		input
+ */
+#define OPC_NOT		0x202f000a
+
+/*
+ * The 4-byte encoding of "btst b,u6":
+ *
+ * 0010_0bbb 0101_0001 1BBB_uuuu uu00_0000
+ *
+ * b:  BBBbbb		input number to check
+ * u6: uuuuuu		6-bit unsigned number specifying bit position to check
+ */
+#define OPC_BTSTU6	0x20518000
+#define BTST_U6(x)	(OP_C((x) & 63))
+
+/*
+ * The 4-byte encoding of "asl[.qq] b,b,c" (arithmetic shift left):
+ *
+ * 0010_1bbb 0i00_0000 0BBB_cccc ccaa_aaaa
+ *
+ * i:			if set, c is considered a 5-bit immediate, else a reg.
+ *
+ * b:  BBBbbb		result and the first operand (number to be shifted)
+ * c:  cccccc		amount to be shifted
+ */
+#define OPC_ASL		0x28000000
+#define ASL_I		BIT(22)
+#define ASLI_U6(x)	OP_C((x) & 31)
+#define OPC_ASLI	(OPC_ASL | ASL_I)
+
+/*
+ * The 4-byte encoding of "asr a,b,c" (arithmetic shift right):
+ *
+ * 0010_1bbb 0i00_0010 0BBB_cccc ccaa_aaaa
+ *
+ * i:			if set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		first input:  number to be shifted
+ * c:  cccccc		second input: amount to be shifted
+ */
+#define OPC_ASR		0x28020000
+#define ASR_I		ASL_I
+#define ASRI_U6(x)	ASLI_U6(x)
+#define OPC_ASRI	(OPC_ASR | ASR_I)
+
+/*
+ * The 4-byte encoding of "lsr a,b,c" (logical shift right):
+ *
+ * 0010_1bbb 0i00_0001 0BBB_cccc ccaa_aaaa
+ *
+ * i:			if set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		first input:  number to be shifted
+ * c:  cccccc		second input: amount to be shifted
+ */
+#define OPC_LSR		0x28010000
+#define LSR_I		ASL_I
+#define LSRI_U6(x)	ASLI_U6(x)
+#define OPC_LSRI	(OPC_LSR | LSR_I)
+
+/*
+ * The 4-byte encoding of "swape b,c":
+ *
+ * 0010_1bbb 0010_1111 0bbb_cccc cc00_1001
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source register
+ */
+#define OPC_SWAPE	0x282f0009
+
+/*
+ * Encoding for jump to an address in register:
+ * j reg_c
+ *
+ * 0010_0000 1110_0000 0000_cccc cc00_0000
+ *
+ * c:  cccccc		register holding the destination address
+ */
+#define OPC_JMP		0x20e00000
+/* Jump to "branch-and-link" register, which effectively is a "return". */
+#define OPC_J_BLINK	(OPC_JMP | OP_C(ARC_R_BLINK))
+
+/*
+ * Encoding for jump-and-link to an address in register:
+ * jl reg_c
+ *
+ * 0010_0000 0010_0010 0000_cccc cc00_0000
+ *
+ * c:  cccccc		register holding the destination address
+ */
+#define OPC_JL		0x20220000
+
+/*
+ * Encoding for (conditional) branch to an offset from the current location
+ * that is word aligned: (PC & 0xffff_fffc) + s21
+ * B[qq] s21
+ *
+ * 0000_0sss ssss_sss0 SSSS_SSSS SS0q_qqqq
+ *
+ * qq:	qqqqq				condition code
+ * s21:	SSSS SSSS_SSss ssss_ssss	The displacement (21-bit signed)
+ *
+ * The displacement is supposed to be 16-bit (2-byte) aligned. Therefore,
+ * it should be a multiple of 2. Hence, there is an implied '0' bit at its
+ * LSB: S_SSSS SSSS_Ssss ssss_sss0
+ */
+#define OPC_BCC		0x00000000
+#define BCC_S21(d)	((((d) & 0x7fe) << 16) | (((d) & 0x1ff800) >> 5))
+
+/*
+ * Encoding for unconditional branch to an offset from the current location
+ * that is word aligned: (PC & 0xffff_fffc) + s25
+ * B s25
+ *
+ * 0000_0sss ssss_sss1 SSSS_SSSS SS00_TTTT
+ *
+ * s25:	TTTT SSSS SSSS_SSss ssss_ssss	The displacement (25-bit signed)
+ *
+ * The displacement is supposed to be 16-bit (2-byte) aligned. Therefore,
+ * it should be a multiple of 2. Hence, there is an implied '0' bit at its
+ * LSB: T TTTS_SSSS SSSS_Ssss ssss_sss0
+ */
+#define OPC_B		0x00010000
+#define B_S25(d)	((((d) & 0x1e00000) >> 21) | BCC_S21(d))
+
+static inline void emit_2_bytes(u8 *buf, u16 bytes)
+{
+	*((u16 *)buf) = bytes;
+}
+
+static inline void emit_4_bytes(u8 *buf, u32 bytes)
+{
+	emit_2_bytes(buf, bytes >> 16);
+	emit_2_bytes(buf + 2, bytes & 0xffff);
+}
+
+static inline u8 bpf_to_arc_size(u8 size)
+{
+	switch (size) {
+	case BPF_B:
+		return ZZ_1_byte;
+	case BPF_H:
+		return ZZ_2_byte;
+	case BPF_W:
+		return ZZ_4_byte;
+	case BPF_DW:
+		return ZZ_8_byte;
+	default:
+		return ZZ_4_byte;
+	}
+}
+
+/************** Encoders (Deal with ARC regs) ************/
+
+/* Move an immediate to register with a 4-byte instruction. */
+static u8 arc_movi_r(u8 *buf, u8 reg, s16 imm)
+{
+	const u32 insn = OPC_MOVI | OP_B(reg) | MOVI_S12(imm);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* rd <- rs */
+static u8 arc_mov_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_MOV | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* The emitted code may have different sizes based on "imm". */
+static u8 arc_mov_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_MOV | OP_B(rd) | OP_IMM;
+
+	if (IN_S12_RANGE(imm))
+		return arc_movi_r(buf, rd, imm);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* The emitted code will always have the same size (8). */
+static u8 arc_mov_i_fixed(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_MOV | OP_B(rd) | OP_IMM;
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* Conditional move. */
+static u8 arc_mov_cc_r(u8 *buf, u8 cc, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_MOV_CC | OP_B(rd) | OP_C(rs) | COND(cc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* Conditional move of a small immediate to rd. */
+static u8 arc_movu_cc_r(u8 *buf, u8 cc, u8 rd, u8 imm)
+{
+	const u32 insn = OPC_MOVU_CC | OP_B(rd) | OP_C(imm) | COND(cc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* Sign extension from a byte. */
+static u8 arc_sexb_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_SEXB | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* Sign extension from two bytes. */
+static u8 arc_sexh_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_SEXH | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* st reg, [reg_mem, off] */
+static u8 arc_st_r(u8 *buf, u8 reg, u8 reg_mem, s16 off, u8 zz)
+{
+	const u32 insn = OPC_STORE | STORE_ZZ(zz) | OP_C(reg) |
+		OP_B(reg_mem) | STORE_S9(off);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* st.aw reg, [sp, -4] */
+static u8 arc_push_r(u8 *buf, u8 reg)
+{
+	const u32 insn = OPC_PUSH | OP_C(reg);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* ld reg, [reg_mem, off] (unsigned) */
+static u8 arc_ld_r(u8 *buf, u8 reg, u8 reg_mem, s16 off, u8 zz)
+{
+	const u32 insn = OPC_LDU | LOAD_ZZ(zz) | LOAD_C(reg) |
+		OP_B(reg_mem) | LOAD_S9(off);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* ld.x reg, [reg_mem, off] (sign extend) */
+static u8 arc_ldx_r(u8 *buf, u8 reg, u8 reg_mem, s16 off, u8 zz)
+{
+	const u32 insn = OPC_LDS | LOAD_ZZ(zz) | LOAD_C(reg) |
+		OP_B(reg_mem) | LOAD_S9(off);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* ld.ab reg,[sp,4] */
+static u8 arc_pop_r(u8 *buf, u8 reg)
+{
+	const u32 insn = OPC_POP | LOAD_C(reg);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* add Ra,Ra,Rc */
+static u8 arc_add_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_ADD | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* add.f Ra,Ra,Rc */
+static u8 arc_addf_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_ADDF | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* add.f Ra,Ra,u6 */
+static u8 arc_addif_r(u8 *buf, u8 ra, u8 u6)
+{
+	const u32 insn = OPC_ADDIF | OP_A(ra) | OP_B(ra) | ADDI_U6(u6);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* add Ra,Ra,u6 */
+static u8 arc_addi_r(u8 *buf, u8 ra, u8 u6)
+{
+	const u32 insn = OPC_ADDI | OP_A(ra) | OP_B(ra) | ADDI_U6(u6);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* add Ra,Rb,imm */
+static u8 arc_add_i(u8 *buf, u8 ra, u8 rb, s32 imm)
+{
+	const u32 insn = OPC_ADD_I | OP_A(ra) | OP_B(rb);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* adc Ra,Ra,Rc */
+static u8 arc_adc_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_ADC | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* adc Ra,Ra,u6 */
+static u8 arc_adci_r(u8 *buf, u8 ra, u8 u6)
+{
+	const u32 insn = OPC_ADCI | OP_A(ra) | OP_B(ra) | ADCI_U6(u6);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* sub Ra,Ra,Rc */
+static u8 arc_sub_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_SUB | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* sub.f Ra,Ra,Rc */
+static u8 arc_subf_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_SUBF | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* sub Ra,Ra,u6 */
+static u8 arc_subi_r(u8 *buf, u8 ra, u8 u6)
+{
+	const u32 insn = OPC_SUBI | OP_A(ra) | OP_B(ra) | SUBI_U6(u6);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* sub Ra,Ra,imm */
+static u8 arc_sub_i(u8 *buf, u8 ra, s32 imm)
+{
+	const u32 insn = OPC_SUB_I | OP_A(ra) | OP_B(ra);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* sbc Ra,Ra,Rc */
+static u8 arc_sbc_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_SBC | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* cmp Rb,Rc */
+static u8 arc_cmp_r(u8 *buf, u8 rb, u8 rc)
+{
+	const u32 insn = OPC_CMP | OP_B(rb) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/*
+ * cmp.z Rb,Rc
+ *
+ * This "cmp.z" variant of compare instruction is used on lower
+ * 32-bits of register pairs after "cmp"ing their upper parts. If the
+ * upper parts are equal (z), then this one will proceed to check the
+ * rest.
+ */
+static u8 arc_cmpz_r(u8 *buf, u8 rb, u8 rc)
+{
+	const u32 insn = OPC_CMP | OP_B(rb) | OP_C(rc) | CC_equal;
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* neg Ra,Rb */
+static u8 arc_neg_r(u8 *buf, u8 ra, u8 rb)
+{
+	const u32 insn = OPC_NEG | OP_A(ra) | OP_B(rb);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* mpy Ra,Rb,Rc */
+static u8 arc_mpy_r(u8 *buf, u8 ra, u8 rb, u8 rc)
+{
+	const u32 insn = OPC_MPY | OP_A(ra) | OP_B(rb) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* mpy Ra,Rb,imm */
+static u8 arc_mpy_i(u8 *buf, u8 ra, u8 rb, s32 imm)
+{
+	const u32 insn = OPC_MPYI | OP_A(ra) | OP_B(rb);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* mpydu Ra,Ra,Rc */
+static u8 arc_mpydu_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_MPYDU | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* mpydu Ra,Ra,imm */
+static u8 arc_mpydu_i(u8 *buf, u8 ra, s32 imm)
+{
+	const u32 insn = OPC_MPYDUI | OP_A(ra) | OP_B(ra);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* divu Rd,Rd,Rs */
+static u8 arc_divu_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_DIVU | OP_A(rd) | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* divu Rd,Rd,imm */
+static u8 arc_divu_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_DIVUI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* div Rd,Rd,Rs */
+static u8 arc_divs_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_DIVS | OP_A(rd) | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* div Rd,Rd,imm */
+static u8 arc_divs_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_DIVSI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* remu Rd,Rd,Rs */
+static u8 arc_remu_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_REMU | OP_A(rd) | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* remu Rd,Rd,imm */
+static u8 arc_remu_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_REMUI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* rem Rd,Rd,Rs */
+static u8 arc_rems_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_REMS | OP_A(rd) | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* rem Rd,Rd,imm */
+static u8 arc_rems_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_REMSI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* and Rd,Rd,Rs */
+static u8 arc_and_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_AND | OP_A(rd) | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* and Rd,Rd,limm */
+static u8 arc_and_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_ANDI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* tst Rd,Rs */
+static u8 arc_tst_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_TST | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/*
+ * This particular version, "tst.z ...", is meant to be used after a
+ * "tst" on the low 32-bit of register pairs. If that "tst" is not
+ * zero, then we don't need to test the upper 32-bits lest it sets
+ * the zero flag.
+ */
+static u8 arc_tstz_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_TST | OP_B(rd) | OP_C(rs) | CC_equal;
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_or_r(u8 *buf, u8 rd, u8 rs1, u8 rs2)
+{
+	const u32 insn = OPC_OR | OP_A(rd) | OP_B(rs1) | OP_C(rs2);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_or_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_ORI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+static u8 arc_xor_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_XOR | OP_A(rd) | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_xor_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_XORI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+static u8 arc_not_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_NOT | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_btst_i(u8 *buf, u8 rs, u8 imm)
+{
+	const u32 insn = OPC_BTSTU6 | OP_B(rs) | BTST_U6(imm);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_asl_r(u8 *buf, u8 rd, u8 rs1, u8 rs2)
+{
+	const u32 insn = OPC_ASL | OP_A(rd) | OP_B(rs1) | OP_C(rs2);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_asli_r(u8 *buf, u8 rd, u8 rs, u8 imm)
+{
+	const u32 insn = OPC_ASLI | OP_A(rd) | OP_B(rs) | ASLI_U6(imm);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_asr_r(u8 *buf, u8 rd, u8 rs1, u8 rs2)
+{
+	const u32 insn = OPC_ASR | OP_A(rd) | OP_B(rs1) | OP_C(rs2);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_asri_r(u8 *buf, u8 rd, u8 rs, u8 imm)
+{
+	const u32 insn = OPC_ASRI | OP_A(rd) | OP_B(rs) | ASRI_U6(imm);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_lsr_r(u8 *buf, u8 rd, u8 rs1, u8 rs2)
+{
+	const u32 insn = OPC_LSR | OP_A(rd) | OP_B(rs1) | OP_C(rs2);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_lsri_r(u8 *buf, u8 rd, u8 rs, u8 imm)
+{
+	const u32 insn = OPC_LSRI | OP_A(rd) | OP_B(rs) | LSRI_U6(imm);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_swape_r(u8 *buf, u8 r)
+{
+	const u32 insn = OPC_SWAPE | OP_B(r) | OP_C(r);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_jmp_return(u8 *buf)
+{
+	if (buf)
+		emit_4_bytes(buf, OPC_J_BLINK);
+	return INSN_len_normal;
+}
+
+static u8 arc_jl(u8 *buf, u8 reg)
+{
+	const u32 insn = OPC_JL | OP_C(reg);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/*
+ * Conditional jump to an address that is max 21 bits away (signed).
+ *
+ * b<cc> s21
+ */
+static u8 arc_bcc(u8 *buf, u8 cc, int offset)
+{
+	const u32 insn = OPC_BCC | BCC_S21(offset) | COND(cc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/*
+ * Unconditional jump to an address that is max 25 bits away (signed).
+ *
+ * b     s25
+ */
+static u8 arc_b(u8 *buf, s32 offset)
+{
+	const u32 insn = OPC_B | B_S25(offset);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/************* Packers (Deal with BPF_REGs) **************/
+
+u8 zext(u8 *buf, u8 rd)
+{
+	if (rd != BPF_REG_FP)
+		return arc_movi_r(buf, REG_HI(rd), 0);
+	else
+		return 0;
+}
+
+u8 mov_r32(u8 *buf, u8 rd, u8 rs, u8 sign_ext)
+{
+	u8 len = 0;
+
+	if (sign_ext) {
+		if (sign_ext == 8)
+			len = arc_sexb_r(buf, REG_LO(rd), REG_LO(rs));
+		else if (sign_ext == 16)
+			len = arc_sexh_r(buf, REG_LO(rd), REG_LO(rs));
+		else if (sign_ext == 32 && rd != rs)
+			len = arc_mov_r(buf, REG_LO(rd), REG_LO(rs));
+
+		return len;
+	}
+
+	/* Unsigned move. */
+
+	if (rd != rs)
+		len = arc_mov_r(buf, REG_LO(rd), REG_LO(rs));
+
+	return len;
+}
+
+u8 mov_r32_i32(u8 *buf, u8 reg, s32 imm)
+{
+	return arc_mov_i(buf, REG_LO(reg), imm);
+}
+
+u8 mov_r64(u8 *buf, u8 rd, u8 rs, u8 sign_ext)
+{
+	u8 len = 0;
+
+	if (sign_ext) {
+		/* First handle the low 32-bit part. */
+		len = mov_r32(buf, rd, rs, sign_ext);
+
+		/* Now propagate the sign bit of LO to HI. */
+		if (sign_ext == 8 || sign_ext == 16 || sign_ext == 32) {
+			len += arc_asri_r(BUF(buf, len),
+					  REG_HI(rd), REG_LO(rd), 31);
+		}
+
+		return len;
+	}
+
+	/* Unsigned move. */
+
+	if (rd == rs)
+		return 0;
+
+	len = arc_mov_r(buf, REG_LO(rd), REG_LO(rs));
+
+	if (rs != BPF_REG_FP)
+		len += arc_mov_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	/* BPF_REG_FP is mapped to 32-bit "fp" register. */
+	else
+		len += arc_movi_r(BUF(buf, len), REG_HI(rd), 0);
+
+	return len;
+}
+
+/* Sign extend the 32-bit immediate into 64-bit register pair. */
+u8 mov_r64_i32(u8 *buf, u8 reg, s32 imm)
+{
+	u8 len = 0;
+
+	len = arc_mov_i(buf, REG_LO(reg), imm);
+
+	/* BPF_REG_FP is mapped to 32-bit "fp" register. */
+	if (reg != BPF_REG_FP) {
+		if (imm >= 0)
+			len += arc_movi_r(BUF(buf, len), REG_HI(reg), 0);
+		else
+			len += arc_movi_r(BUF(buf, len), REG_HI(reg), -1);
+	}
+
+	return len;
+}
+
+/*
+ * This is merely used for translation of "LD R, IMM64" instructions
+ * of the BPF. These sort of instructions are sometimes used for
+ * relocations. If during the normal pass, the relocation value is
+ * not known, the BPF instruction may look something like:
+ *
+ * LD R <- 0x0000_0001_0000_0001
+ *
+ * Which will nicely translate to two 4-byte ARC instructions:
+ *
+ * mov R_lo, 1               # imm is small enough to be s12
+ * mov R_hi, 1               # same
+ *
+ * However, during the extra pass, the IMM64 will have changed
+ * to the resolved address and looks something like:
+ *
+ * LD R <- 0x0000_0000_1234_5678
+ *
+ * Now, the translated code will require 12 bytes:
+ *
+ * mov R_lo, 0x12345678      # this is an 8-byte instruction
+ * mov R_hi, 0               # still 4 bytes
+ *
+ * Which in practice will result in overwriting the following
+ * instruction. To avoid such cases, we will always emit codes
+ * with fixed sizes.
+ */
+u8 mov_r64_i64(u8 *buf, u8 reg, u32 lo, u32 hi)
+{
+	u8 len;
+
+	len  = arc_mov_i_fixed(buf, REG_LO(reg), lo);
+	len += arc_mov_i_fixed(BUF(buf, len), REG_HI(reg), hi);
+
+	return len;
+}
+
+/*
+ * If the "off"set is too big (doesn't encode as S9) for:
+ *
+ *   {ld,st}  r, [rm, off]
+ *
+ * Then emit:
+ *
+ *   add r10, REG_LO(rm), off
+ *
+ * and make sure that r10 becomes the effective address:
+ *
+ *   {ld,st}  r, [r10, 0]
+ */
+static u8 adjust_mem_access(u8 *buf, s16 *off, u8 size,
+			    u8 rm, u8 *arc_reg_mem)
+{
+	u8 len = 0;
+	*arc_reg_mem = REG_LO(rm);
+
+	if (!IN_S9_RANGE(*off) ||
+	    (size == BPF_DW && !IN_S9_RANGE(*off + 4))) {
+		len += arc_add_i(BUF(buf, len),
+				 REG_LO(JIT_REG_TMP), REG_LO(rm), (u32)(*off));
+		*arc_reg_mem = REG_LO(JIT_REG_TMP);
+		*off = 0;
+	}
+
+	return len;
+}
+
+/* store rs, [rd, off] */
+u8 store_r(u8 *buf, u8 rs, u8 rd, s16 off, u8 size)
+{
+	u8 len, arc_reg_mem;
+
+	len = adjust_mem_access(buf, &off, size, rd, &arc_reg_mem);
+
+	if (size == BPF_DW) {
+		len += arc_st_r(BUF(buf, len), REG_LO(rs), arc_reg_mem,
+				off, ZZ_4_byte);
+		len += arc_st_r(BUF(buf, len), REG_HI(rs), arc_reg_mem,
+				off + 4, ZZ_4_byte);
+	} else {
+		u8 zz = bpf_to_arc_size(size);
+
+		len += arc_st_r(BUF(buf, len), REG_LO(rs), arc_reg_mem,
+				off, zz);
+	}
+
+	return len;
+}
+
+/*
+ * For {8,16,32}-bit stores:
+ *   mov r21, imm
+ *   st  r21, [...]
+ * For 64-bit stores:
+ *   mov r21, imm
+ *   st  r21, [...]
+ *   mov r21, {0,-1}
+ *   st  r21, [...+4]
+ */
+u8 store_i(u8 *buf, s32 imm, u8 rd, s16 off, u8 size)
+{
+	u8 len, arc_reg_mem;
+	/* REG_LO(JIT_REG_TMP) might be used by "adjust_mem_access()". */
+	const u8 arc_rs = REG_HI(JIT_REG_TMP);
+
+	len = adjust_mem_access(buf, &off, size, rd, &arc_reg_mem);
+
+	if (size == BPF_DW) {
+		len += arc_mov_i(BUF(buf, len), arc_rs, imm);
+		len += arc_st_r(BUF(buf, len), arc_rs, arc_reg_mem,
+				off, ZZ_4_byte);
+		imm = (imm >= 0 ? 0 : -1);
+		len += arc_mov_i(BUF(buf, len), arc_rs, imm);
+		len += arc_st_r(BUF(buf, len), arc_rs, arc_reg_mem,
+				off + 4, ZZ_4_byte);
+	} else {
+		u8 zz = bpf_to_arc_size(size);
+
+		len += arc_mov_i(BUF(buf, len), arc_rs, imm);
+		len += arc_st_r(BUF(buf, len), arc_rs, arc_reg_mem, off, zz);
+	}
+
+	return len;
+}
+
+/*
+ * For the calling convention of a little endian machine, the LO part
+ * must be on top of the stack.
+ */
+static u8 push_r64(u8 *buf, u8 reg)
+{
+	u8 len = 0;
+
+#ifdef __LITTLE_ENDIAN
+	/* BPF_REG_FP is mapped to 32-bit "fp" register. */
+	if (reg != BPF_REG_FP)
+		len += arc_push_r(BUF(buf, len), REG_HI(reg));
+	len += arc_push_r(BUF(buf, len), REG_LO(reg));
+#else
+	len += arc_push_r(BUF(buf, len), REG_LO(reg));
+	if (reg != BPF_REG_FP)
+		len += arc_push_r(BUF(buf, len), REG_HI(reg));
+#endif
+
+	return len;
+}
+
+/* load rd, [rs, off] */
+u8 load_r(u8 *buf, u8 rd, u8 rs, s16 off, u8 size, bool sign_ext)
+{
+	u8 len, arc_reg_mem;
+
+	len = adjust_mem_access(buf, &off, size, rs, &arc_reg_mem);
+
+	if (size == BPF_B || size == BPF_H || size == BPF_W) {
+		const u8 zz = bpf_to_arc_size(size);
+
+		/* Use LD.X only if the data size is less than 32-bit. */
+		if (sign_ext && (zz == ZZ_1_byte || zz == ZZ_2_byte)) {
+			len += arc_ldx_r(BUF(buf, len), REG_LO(rd),
+					 arc_reg_mem, off, zz);
+		} else {
+			len += arc_ld_r(BUF(buf, len), REG_LO(rd),
+					arc_reg_mem, off, zz);
+		}
+
+		if (sign_ext) {
+			/* Propagate the sign bit to the higher reg. */
+			len += arc_asri_r(BUF(buf, len),
+					  REG_HI(rd), REG_LO(rd), 31);
+		} else {
+			len += arc_movi_r(BUF(buf, len), REG_HI(rd), 0);
+		}
+	} else if (size == BPF_DW) {
+		/*
+		 * We are about to issue 2 consecutive loads:
+		 *
+		 *   ld rx, [rb, off+0]
+		 *   ld ry, [rb, off+4]
+		 *
+		 * If "rx" and "rb" are the same registers, then the order
+		 * should change to guarantee that "rb" remains intact
+		 * during these 2 operations:
+		 *
+		 *   ld ry, [rb, off+4]
+		 *   ld rx, [rb, off+0]
+		 */
+		if (REG_LO(rd) != arc_reg_mem) {
+			len += arc_ld_r(BUF(buf, len), REG_LO(rd), arc_reg_mem,
+					off, ZZ_4_byte);
+			len += arc_ld_r(BUF(buf, len), REG_HI(rd), arc_reg_mem,
+					off + 4, ZZ_4_byte);
+		} else {
+			len += arc_ld_r(BUF(buf, len), REG_HI(rd), arc_reg_mem,
+					off + 4, ZZ_4_byte);
+			len += arc_ld_r(BUF(buf, len), REG_LO(rd), arc_reg_mem,
+					off, ZZ_4_byte);
+		}
+	}
+
+	return len;
+}
+
+u8 add_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_add_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 add_r32_i32(u8 *buf, u8 rd, s32 imm)
+{
+	if (IN_U6_RANGE(imm))
+		return arc_addi_r(buf, REG_LO(rd), imm);
+	else
+		return arc_add_i(buf, REG_LO(rd), REG_LO(rd), imm);
+}
+
+u8 add_r64(u8 *buf, u8 rd, u8 rs)
+{
+	u8 len;
+
+	len  = arc_addf_r(buf, REG_LO(rd), REG_LO(rs));
+	len += arc_adc_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	return len;
+}
+
+u8 add_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	u8 len;
+
+	if (IN_U6_RANGE(imm)) {
+		len  = arc_addif_r(buf, REG_LO(rd), imm);
+		len += arc_adci_r(BUF(buf, len), REG_HI(rd), 0);
+	} else {
+		len  = mov_r64_i32(buf, JIT_REG_TMP, imm);
+		len += add_r64(BUF(buf, len), rd, JIT_REG_TMP);
+	}
+	return len;
+}
+
+u8 sub_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_sub_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 sub_r32_i32(u8 *buf, u8 rd, s32 imm)
+{
+	if (IN_U6_RANGE(imm))
+		return arc_subi_r(buf, REG_LO(rd), imm);
+	else
+		return arc_sub_i(buf, REG_LO(rd), imm);
+}
+
+u8 sub_r64(u8 *buf, u8 rd, u8 rs)
+{
+	u8 len;
+
+	len  = arc_subf_r(buf, REG_LO(rd), REG_LO(rs));
+	len += arc_sbc_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	return len;
+}
+
+u8 sub_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	u8 len;
+
+	len  = mov_r64_i32(buf, JIT_REG_TMP, imm);
+	len += sub_r64(BUF(buf, len), rd, JIT_REG_TMP);
+	return len;
+}
+
+static u8 cmp_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_cmp_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 neg_r32(u8 *buf, u8 r)
+{
+	return arc_neg_r(buf, REG_LO(r), REG_LO(r));
+}
+
+/* In a two's complement system, -r is (~r + 1). */
+u8 neg_r64(u8 *buf, u8 r)
+{
+	u8 len;
+
+	len  = arc_not_r(buf, REG_LO(r), REG_LO(r));
+	len += arc_not_r(BUF(buf, len), REG_HI(r), REG_HI(r));
+	len += add_r64_i32(BUF(buf, len), r, 1);
+	return len;
+}
+
+u8 mul_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_mpy_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs));
+}
+
+u8 mul_r32_i32(u8 *buf, u8 rd, s32 imm)
+{
+	return arc_mpy_i(buf, REG_LO(rd), REG_LO(rd), imm);
+}
+
+/*
+ * MUL B, C
+ * --------
+ * mpy       t0, B_hi, C_lo
+ * mpy       t1, B_lo, C_hi
+ * mpydu   B_lo, B_lo, C_lo
+ * add     B_hi, B_hi,   t0
+ * add     B_hi, B_hi,   t1
+ */
+u8 mul_r64(u8 *buf, u8 rd, u8 rs)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 t1   = REG_HI(JIT_REG_TMP);
+	const u8 C_lo = REG_LO(rs);
+	const u8 C_hi = REG_HI(rs);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	u8 len;
+
+	len  = arc_mpy_r(buf, t0, B_hi, C_lo);
+	len += arc_mpy_r(BUF(buf, len), t1, B_lo, C_hi);
+	len += arc_mpydu_r(BUF(buf, len), B_lo, C_lo);
+	len += arc_add_r(BUF(buf, len), B_hi, t0);
+	len += arc_add_r(BUF(buf, len), B_hi, t1);
+
+	return len;
+}
+
+/*
+ * MUL B, imm
+ * ----------
+ *
+ *  To get a 64-bit result from a signed 64x32 multiplication:
+ *
+ *         B_hi             B_lo   *
+ *         sign             imm
+ *  -----------------------------
+ *  HI(B_lo*imm)     LO(B_lo*imm)  +
+ *     B_hi*imm                    +
+ *     B_lo*sign
+ *  -----------------------------
+ *        res_hi           res_lo
+ *
+ * mpy     t1, B_lo, sign(imm)
+ * mpy     t0, B_hi, imm
+ * mpydu B_lo, B_lo, imm
+ * add   B_hi, B_hi,  t0
+ * add   B_hi, B_hi,  t1
+ *
+ * Note: We can't use signed double multiplication, "mpyd", instead of an
+ * unsigned version, "mpydu", and then get rid of the sign adjustments
+ * calculated in "t1". The signed multiplication, "mpyd", will consider
+ * both operands, "B_lo" and "imm", as signed inputs. However, for this
+ * 64x32 multiplication, "B_lo" must be treated as an unsigned number.
+ */
+u8 mul_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 t1   = REG_HI(JIT_REG_TMP);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	u8 len = 0;
+
+	if (imm == 1)
+		return 0;
+
+	/* Is the sign-extension of the immediate "-1"? */
+	if (imm < 0)
+		len += arc_neg_r(BUF(buf, len), t1, B_lo);
+
+	len += arc_mpy_i(BUF(buf, len), t0, B_hi, imm);
+	len += arc_mpydu_i(BUF(buf, len), B_lo, imm);
+	len += arc_add_r(BUF(buf, len), B_hi, t0);
+
+	/* Add the "sign*B_lo" part, if necessary. */
+	if (imm < 0)
+		len += arc_add_r(BUF(buf, len), B_hi, t1);
+
+	return len;
+}
+
+u8 div_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext)
+{
+	if (sign_ext)
+		return arc_divs_r(buf, REG_LO(rd), REG_LO(rs));
+	else
+		return arc_divu_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 div_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext)
+{
+	if (imm == 0)
+		return 0;
+
+	if (sign_ext)
+		return arc_divs_i(buf, REG_LO(rd), imm);
+	else
+		return arc_divu_i(buf, REG_LO(rd), imm);
+}
+
+u8 mod_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext)
+{
+	if (sign_ext)
+		return arc_rems_r(buf, REG_LO(rd), REG_LO(rs));
+	else
+		return arc_remu_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 mod_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext)
+{
+	if (imm == 0)
+		return 0;
+
+	if (sign_ext)
+		return arc_rems_i(buf, REG_LO(rd), imm);
+	else
+		return arc_remu_i(buf, REG_LO(rd), imm);
+}
+
+u8 and_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_and_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 and_r32_i32(u8 *buf, u8 rd, s32 imm)
+{
+	return arc_and_i(buf, REG_LO(rd), imm);
+}
+
+u8 and_r64(u8 *buf, u8 rd, u8 rs)
+{
+	u8 len;
+
+	len  = arc_and_r(buf, REG_LO(rd), REG_LO(rs));
+	len += arc_and_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	return len;
+}
+
+u8 and_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	u8 len;
+
+	len  = mov_r64_i32(buf, JIT_REG_TMP, imm);
+	len += and_r64(BUF(buf, len), rd, JIT_REG_TMP);
+	return len;
+}
+
+static u8 tst_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_tst_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 or_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_or_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs));
+}
+
+u8 or_r32_i32(u8 *buf, u8 rd, s32 imm)
+{
+	return arc_or_i(buf, REG_LO(rd), imm);
+}
+
+u8 or_r64(u8 *buf, u8 rd, u8 rs)
+{
+	u8 len;
+
+	len  = arc_or_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs));
+	len += arc_or_r(BUF(buf, len), REG_HI(rd), REG_HI(rd), REG_HI(rs));
+	return len;
+}
+
+u8 or_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	u8 len;
+
+	len  = mov_r64_i32(buf, JIT_REG_TMP, imm);
+	len += or_r64(BUF(buf, len), rd, JIT_REG_TMP);
+	return len;
+}
+
+u8 xor_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_xor_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 xor_r32_i32(u8 *buf, u8 rd, s32 imm)
+{
+	return arc_xor_i(buf, REG_LO(rd), imm);
+}
+
+u8 xor_r64(u8 *buf, u8 rd, u8 rs)
+{
+	u8 len;
+
+	len  = arc_xor_r(buf, REG_LO(rd), REG_LO(rs));
+	len += arc_xor_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	return len;
+}
+
+u8 xor_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	u8 len;
+
+	len  = mov_r64_i32(buf, JIT_REG_TMP, imm);
+	len += xor_r64(BUF(buf, len), rd, JIT_REG_TMP);
+	return len;
+}
+
+/* "asl a,b,c" --> "a = (b << (c & 31))". */
+u8 lsh_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_asl_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs));
+}
+
+u8 lsh_r32_i32(u8 *buf, u8 rd, u8 imm)
+{
+	return arc_asli_r(buf, REG_LO(rd), REG_LO(rd), imm);
+}
+
+/*
+ * algorithm
+ * ---------
+ * if (n <= 32)
+ *   to_hi = lo >> (32-n)   # (32-n) is the negate of "n" in a 5-bit width.
+ *   lo <<= n
+ *   hi <<= n
+ *   hi |= to_hi
+ * else
+ *   hi = lo << (n-32)
+ *   lo = 0
+ *
+ * assembly translation for "LSH B, C"
+ * (heavily influenced by ARC gcc)
+ * -----------------------------------
+ * not    t0, C_lo            # The first 3 lines are almost the same as:
+ * lsr    t1, B_lo, 1         #   neg   t0, C_lo
+ * lsr    t1, t1, t0          #   lsr   t1, B_lo, t0   --> t1 is "to_hi"
+ * mov    t0, C_lo*           # with one important difference. In "neg"
+ * asl    B_lo, B_lo, t0      # version, when C_lo=0, t1 becomes B_lo while
+ * asl    B_hi, B_hi, t0      # it should be 0. The "not" approach instead,
+ * or     B_hi, B_hi, t1      # "shift"s t1 once and 31 times, practically
+ * btst   t0, 5               # setting it to 0 when C_lo=0.
+ * mov.ne B_hi, B_lo**
+ * mov.ne B_lo, 0
+ *
+ * *The "mov t0, C_lo" is necessary to cover the cases that C is the same
+ * register as B.
+ *
+ * **ARC performs a shift in this manner: B <<= (C & 31)
+ * For 32<=n<64, "n-32" and "n&31" are the same. Therefore, "B << n" and
+ * "B << (n-32)" yield the same results. e.g. the results of "B << 35" and
+ * "B << 3" are the same.
+ *
+ * The behaviour is undefined for n >= 64.
+ */
+u8 lsh_r64(u8 *buf, u8 rd, u8 rs)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 t1   = REG_HI(JIT_REG_TMP);
+	const u8 C_lo = REG_LO(rs);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	u8 len;
+
+	len  = arc_not_r(buf, t0, C_lo);
+	len += arc_lsri_r(BUF(buf, len), t1, B_lo, 1);
+	len += arc_lsr_r(BUF(buf, len), t1, t1, t0);
+	len += arc_mov_r(BUF(buf, len), t0, C_lo);
+	len += arc_asl_r(BUF(buf, len), B_lo, B_lo, t0);
+	len += arc_asl_r(BUF(buf, len), B_hi, B_hi, t0);
+	len += arc_or_r(BUF(buf, len), B_hi, B_hi, t1);
+	len += arc_btst_i(BUF(buf, len), t0, 5);
+	len += arc_mov_cc_r(BUF(buf, len), CC_unequal, B_hi, B_lo);
+	len += arc_movu_cc_r(BUF(buf, len), CC_unequal, B_lo, 0);
+
+	return len;
+}
+
+/*
+ * if (n < 32)
+ *   to_hi = B_lo >> 32-n          # extract upper n bits
+ *   lo <<= n
+ *   hi <<=n
+ *   hi |= to_hi
+ * else if (n < 64)
+ *   hi = lo << n-32
+ *   lo = 0
+ */
+u8 lsh_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	const u8 n    = (u8)imm;
+	u8 len = 0;
+
+	if (n == 0) {
+		return 0;
+	} else if (n <= 31) {
+		len  = arc_lsri_r(buf, t0, B_lo, 32 - n);
+		len += arc_asli_r(BUF(buf, len), B_lo, B_lo, n);
+		len += arc_asli_r(BUF(buf, len), B_hi, B_hi, n);
+		len += arc_or_r(BUF(buf, len), B_hi, B_hi, t0);
+	} else if (n <= 63) {
+		len  = arc_asli_r(buf, B_hi, B_lo, n - 32);
+		len += arc_movi_r(BUF(buf, len), B_lo, 0);
+	}
+	/* n >= 64 is undefined behaviour. */
+
+	return len;
+}
+
+/* "lsr a,b,c" --> "a = (b >> (c & 31))". */
+u8 rsh_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_lsr_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs));
+}
+
+u8 rsh_r32_i32(u8 *buf, u8 rd, u8 imm)
+{
+	return arc_lsri_r(buf, REG_LO(rd), REG_LO(rd), imm);
+}
+
+/*
+ * For better commentary, see lsh_r64().
+ *
+ * algorithm
+ * ---------
+ * if (n <= 32)
+ *   to_lo = hi << (32-n)
+ *   hi >>= n
+ *   lo >>= n
+ *   lo |= to_lo
+ * else
+ *   lo = hi >> (n-32)
+ *   hi = 0
+ *
+ * RSH    B,C
+ * ----------
+ * not    t0, C_lo
+ * asl    t1, B_hi, 1
+ * asl    t1, t1, t0
+ * mov    t0, C_lo
+ * lsr    B_hi, B_hi, t0
+ * lsr    B_lo, B_lo, t0
+ * or     B_lo, B_lo, t1
+ * btst   t0, 5
+ * mov.ne B_lo, B_hi
+ * mov.ne B_hi, 0
+ */
+u8 rsh_r64(u8 *buf, u8 rd, u8 rs)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 t1   = REG_HI(JIT_REG_TMP);
+	const u8 C_lo = REG_LO(rs);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	u8 len;
+
+	len  = arc_not_r(buf, t0, C_lo);
+	len += arc_asli_r(BUF(buf, len), t1, B_hi, 1);
+	len += arc_asl_r(BUF(buf, len), t1, t1, t0);
+	len += arc_mov_r(BUF(buf, len), t0, C_lo);
+	len += arc_lsr_r(BUF(buf, len), B_hi, B_hi, t0);
+	len += arc_lsr_r(BUF(buf, len), B_lo, B_lo, t0);
+	len += arc_or_r(BUF(buf, len), B_lo, B_lo, t1);
+	len += arc_btst_i(BUF(buf, len), t0, 5);
+	len += arc_mov_cc_r(BUF(buf, len), CC_unequal, B_lo, B_hi);
+	len += arc_movu_cc_r(BUF(buf, len), CC_unequal, B_hi, 0);
+
+	return len;
+}
+
+/*
+ * if (n < 32)
+ *   to_lo = B_lo << 32-n     # extract lower n bits, right-padded with 32-n 0s
+ *   lo >>=n
+ *   hi >>=n
+ *   hi |= to_lo
+ * else if (n < 64)
+ *   lo = hi >> n-32
+ *   hi = 0
+ */
+u8 rsh_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	const u8 n    = (u8)imm;
+	u8 len = 0;
+
+	if (n == 0) {
+		return 0;
+	} else if (n <= 31) {
+		len  = arc_asli_r(buf, t0, B_hi, 32 - n);
+		len += arc_lsri_r(BUF(buf, len), B_lo, B_lo, n);
+		len += arc_lsri_r(BUF(buf, len), B_hi, B_hi, n);
+		len += arc_or_r(BUF(buf, len), B_lo, B_lo, t0);
+	} else if (n <= 63) {
+		len  = arc_lsri_r(buf, B_lo, B_hi, n - 32);
+		len += arc_movi_r(BUF(buf, len), B_hi, 0);
+	}
+	/* n >= 64 is undefined behaviour. */
+
+	return len;
+}
+
+/* "asr a,b,c" --> "a = (b s>> (c & 31))". */
+u8 arsh_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_asr_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs));
+}
+
+u8 arsh_r32_i32(u8 *buf, u8 rd, u8 imm)
+{
+	return arc_asri_r(buf, REG_LO(rd), REG_LO(rd), imm);
+}
+
+/*
+ * For comparison, see rsh_r64().
+ *
+ * algorithm
+ * ---------
+ * if (n <= 32)
+ *   to_lo = hi << (32-n)
+ *   hi s>>= n
+ *   lo  >>= n
+ *   lo |= to_lo
+ * else
+ *   hi_sign = hi s>>31
+ *   lo = hi s>> (n-32)
+ *   hi = hi_sign
+ *
+ * ARSH   B,C
+ * ----------
+ * not    t0, C_lo
+ * asl    t1, B_hi, 1
+ * asl    t1, t1, t0
+ * mov    t0, C_lo
+ * asr    B_hi, B_hi, t0
+ * lsr    B_lo, B_lo, t0
+ * or     B_lo, B_lo, t1
+ * btst   t0, 5
+ * asr    t0, B_hi, 31        # now, t0 = 0 or -1 based on B_hi's sign
+ * mov.ne B_lo, B_hi
+ * mov.ne B_hi, t0
+ */
+u8 arsh_r64(u8 *buf, u8 rd, u8 rs)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 t1   = REG_HI(JIT_REG_TMP);
+	const u8 C_lo = REG_LO(rs);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	u8 len;
+
+	len  = arc_not_r(buf, t0, C_lo);
+	len += arc_asli_r(BUF(buf, len), t1, B_hi, 1);
+	len += arc_asl_r(BUF(buf, len), t1, t1, t0);
+	len += arc_mov_r(BUF(buf, len), t0, C_lo);
+	len += arc_asr_r(BUF(buf, len), B_hi, B_hi, t0);
+	len += arc_lsr_r(BUF(buf, len), B_lo, B_lo, t0);
+	len += arc_or_r(BUF(buf, len), B_lo, B_lo, t1);
+	len += arc_btst_i(BUF(buf, len), t0, 5);
+	len += arc_asri_r(BUF(buf, len), t0, B_hi, 31);
+	len += arc_mov_cc_r(BUF(buf, len), CC_unequal, B_lo, B_hi);
+	len += arc_mov_cc_r(BUF(buf, len), CC_unequal, B_hi, t0);
+
+	return len;
+}
+
+/*
+ * if (n < 32)
+ *   to_lo = lo << 32-n     # extract lower n bits, right-padded with 32-n 0s
+ *   lo >>=n
+ *   hi s>>=n
+ *   hi |= to_lo
+ * else if (n < 64)
+ *   lo = hi s>> n-32
+ *   hi = (lo[msb] ? -1 : 0)
+ */
+u8 arsh_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	const u8 n    = (u8)imm;
+	u8 len = 0;
+
+	if (n == 0) {
+		return 0;
+	} else if (n <= 31) {
+		len  = arc_asli_r(buf, t0, B_hi, 32 - n);
+		len += arc_lsri_r(BUF(buf, len), B_lo, B_lo, n);
+		len += arc_asri_r(BUF(buf, len), B_hi, B_hi, n);
+		len += arc_or_r(BUF(buf, len), B_lo, B_lo, t0);
+	} else if (n <= 63) {
+		len  = arc_asri_r(buf, B_lo, B_hi, n - 32);
+		len += arc_movi_r(BUF(buf, len), B_hi, -1);
+		len += arc_btst_i(BUF(buf, len), B_lo, 31);
+		len += arc_movu_cc_r(BUF(buf, len), CC_equal, B_hi, 0);
+	}
+	/* n >= 64 is undefined behaviour. */
+
+	return len;
+}
+
+u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
+{
+	u8 len = 0;
+#ifdef __BIG_ENDIAN
+	const u8 host_endian = BPF_FROM_BE;
+#else
+	const u8 host_endian = BPF_FROM_LE;
+#endif
+	if (host_endian != endian || force) {
+		switch (size) {
+		case 16:
+			/*
+			 * r = B4B3_B2B1 << 16 --> r = B2B1_0000
+			 * then, swape(r) would become the desired 0000_B1B2
+			 */
+			len = arc_asli_r(buf, REG_LO(rd), REG_LO(rd), 16);
+			fallthrough;
+		case 32:
+			len += arc_swape_r(BUF(buf, len), REG_LO(rd));
+			if (do_zext)
+				len += zext(BUF(buf, len), rd);
+			break;
+		case 64:
+			/*
+			 * swap "hi" and "lo":
+			 *   hi ^= lo;
+			 *   lo ^= hi;
+			 *   hi ^= lo;
+			 * and then swap the bytes in "hi" and "lo".
+			 */
+			len  = arc_xor_r(buf, REG_HI(rd), REG_LO(rd));
+			len += arc_xor_r(BUF(buf, len), REG_LO(rd), REG_HI(rd));
+			len += arc_xor_r(BUF(buf, len), REG_HI(rd), REG_LO(rd));
+			len += arc_swape_r(BUF(buf, len), REG_LO(rd));
+			len += arc_swape_r(BUF(buf, len), REG_HI(rd));
+			break;
+		default:
+			/* The caller must have handled this. */
+			break;
+		}
+	} else {
+		/*
+		 * If the same endianness, there's not much to do other
+		 * than zeroing out the upper bytes based on the "size".
+		 */
+		switch (size) {
+		case 16:
+			len = arc_and_i(buf, REG_LO(rd), 0xffff);
+			fallthrough;
+		case 32:
+			if (do_zext)
+				len += zext(BUF(buf, len), rd);
+			break;
+		case 64:
+			break;
+		default:
+			/* The caller must have handled this. */
+			break;
+		}
+	}
+
+	return len;
+}
+
+/*
+ * To create a frame, all that is needed is:
+ *
+ *  push fp
+ *  mov  fp, sp
+ *  sub  sp, <frame_size>
+ *
+ * "push fp" is taken care of separately while saving the clobbered registers.
+ * All that remains is copying SP value to FP and shrinking SP's address space
+ * for any possible function call to come.
+ */
+static inline u8 frame_create(u8 *buf, u16 size)
+{
+	u8 len;
+
+	len = arc_mov_r(buf, ARC_R_FP, ARC_R_SP);
+	if (IN_U6_RANGE(size))
+		len += arc_subi_r(BUF(buf, len), ARC_R_SP, size);
+	else
+		len += arc_sub_i(BUF(buf, len), ARC_R_SP, size);
+	return len;
+}
+
+/*
+ * mov sp, fp
+ *
+ * The value of SP upon entering was copied to FP.
+ */
+static inline u8 frame_restore(u8 *buf)
+{
+	return arc_mov_r(buf, ARC_R_SP, ARC_R_FP);
+}
+
+/*
+ * Going from a JITed code to the native caller:
+ *
+ * mov ARC_ABI_RET_lo, BPF_REG_0_lo     # r0 <- r8
+ * mov ARC_ABI_RET_hi, BPF_REG_0_hi     # r1 <- r9
+ */
+static u8 bpf_to_arc_return(u8 *buf)
+{
+	u8 len;
+
+	len  = arc_mov_r(buf, ARC_R_0, REG_LO(BPF_REG_0));
+	len += arc_mov_r(BUF(buf, len), ARC_R_1, REG_HI(BPF_REG_0));
+	return len;
+}
+
+/*
+ * Coming back from an external (in-kernel) function to the JITed code:
+ *
+ * mov ARC_ABI_RET_lo, BPF_REG_0_lo     # r8 <- r0
+ * mov ARC_ABI_RET_hi, BPF_REG_0_hi     # r9 <- r1
+ */
+u8 arc_to_bpf_return(u8 *buf)
+{
+	u8 len;
+
+	len  = arc_mov_r(buf, REG_LO(BPF_REG_0), ARC_R_0);
+	len += arc_mov_r(BUF(buf, len), REG_HI(BPF_REG_0), ARC_R_1);
+	return len;
+}
+
+/*
+ * This translation leads to:
+ *
+ *   mov r10, addr                # always an 8-byte instruction
+ *   jl  [r10]
+ *
+ * The length of the "mov" must be fixed (8), otherwise it may diverge
+ * during the normal and extra passes:
+ *
+ *          normal pass                  extra pass
+ *
+ *   180:  mov     r10,0        |   180:  mov     r10,0x700578d8
+ *   184:  jl      [r10]        |   188:  jl      [r10]
+ *   188:  add.f   r16,r16,0x1  |   18c:  adc     r17,r17,0
+ *   18c:  adc     r17,r17,0    |
+ *
+ * In the above example, the change from "r10 <- 0" to "r10 <- 0x700578d8"
+ * has led to an increase in the length of the "mov" instruction.
+ * Inadvertently, that caused the loss of the "add.f" instruction.
+ */
+static u8 jump_and_link(u8 *buf, u32 addr)
+{
+	u8 len;
+
+	len  = arc_mov_i_fixed(buf, REG_LO(JIT_REG_TMP), addr);
+	len += arc_jl(BUF(buf, len), REG_LO(JIT_REG_TMP));
+	return len;
+}
+
+/*
+ * This function determines which ARC registers must be saved and restored.
+ * It does so by looking into:
+ *
+ * "bpf_reg": The clobbered (destination) BPF register
+ * "is_call": Indicator if the current instruction is a call
+ *
+ * When a register of interest is clobbered, its corresponding bit position
+ * in return value, "usage", is set to true.
+ */
+u32 mask_for_used_regs(u8 bpf_reg, bool is_call)
+{
+	u32 usage = 0;
+
+	/* BPF registers that must be saved. */
+	if (bpf_reg >= BPF_REG_6 && bpf_reg <= BPF_REG_9) {
+		usage |= BIT(REG_LO(bpf_reg));
+		usage |= BIT(REG_HI(bpf_reg));
+	/*
+	 * Using the frame pointer register implies that it should
+	 * be saved and reinitialised with the current frame data.
+	 */
+	} else if (bpf_reg == BPF_REG_FP) {
+		usage |= BIT(REG_LO(BPF_REG_FP));
+	/* Could there be some ARC registers that must to be saved? */
+	} else {
+		if (REG_LO(bpf_reg) >= ARC_CALLEE_SAVED_REG_FIRST &&
+		    REG_LO(bpf_reg) <= ARC_CALLEE_SAVED_REG_LAST)
+			usage |= BIT(REG_LO(bpf_reg));
+
+		if (REG_HI(bpf_reg) >= ARC_CALLEE_SAVED_REG_FIRST &&
+		    REG_HI(bpf_reg) <= ARC_CALLEE_SAVED_REG_LAST)
+			usage |= BIT(REG_HI(bpf_reg));
+	}
+
+	/* A "call" indicates that ARC's "blink" reg must be saved. */
+	usage |= is_call ? BIT(ARC_R_BLINK) : 0;
+
+	return usage;
+}
+
+/*
+ * push blink             # if blink is marked as clobbered
+ * push r[0-n]            # if r[i] is marked as clobbered
+ * push fp                # if fp is marked as clobbered
+ * mov  fp, sp            # if frame_size > 0 (clobbers fp)
+ * sub  sp, <frame_size>  # same as above
+ */
+u8 arc_prologue(u8 *buf, u32 usage, u16 frame_size)
+{
+	u8 len = 0;
+	u32 gp_regs = 0;
+
+	/* Deal with blink first. */
+	if (usage & BIT(ARC_R_BLINK))
+		len += arc_push_r(BUF(buf, len), ARC_R_BLINK);
+
+	gp_regs = usage & ~(BIT(ARC_R_BLINK) | BIT(ARC_R_FP));
+	while (gp_regs) {
+		u8 reg = __builtin_ffs(gp_regs) - 1;
+
+		len += arc_push_r(BUF(buf, len), reg);
+		gp_regs &= ~BIT(reg);
+	}
+
+	/* Deal with fp last. */
+	if ((usage & BIT(ARC_R_FP)) || frame_size > 0)
+		len += arc_push_r(BUF(buf, len), ARC_R_FP);
+
+	if (frame_size > 0)
+		len += frame_create(BUF(buf, len), frame_size);
+
+#ifdef ARC_BPF_JIT_DEBUG
+	if ((usage & BIT(ARC_R_FP)) && frame_size == 0) {
+		pr_err("FP is being saved while there is no frame.");
+		BUG();
+	}
+#endif
+
+	return len;
+}
+
+/*
+ * mov  sp, fp            # if frame_size > 0
+ * pop  fp                # if fp is marked as clobbered
+ * pop  r[n-0]            # if r[i] is marked as clobbered
+ * pop  blink             # if blink is marked as clobbered
+ * mov  r0, r8            # always: ABI_return <- BPF_return
+ * mov  r1, r9            # continuation of above
+ * j    [blink]           # always
+ *
+ * "fp being marked as clobbered" and "frame_size > 0" are the two sides of
+ * the same coin.
+ */
+u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size)
+{
+	u32 len = 0;
+	u32 gp_regs = 0;
+
+#ifdef ARC_BPF_JIT_DEBUG
+	if ((usage & BIT(ARC_R_FP)) && frame_size == 0) {
+		pr_err("FP is being saved while there is no frame.");
+		BUG();
+	}
+#endif
+
+	if (frame_size > 0)
+		len += frame_restore(BUF(buf, len));
+
+	/* Deal with fp first. */
+	if ((usage & BIT(ARC_R_FP)) || frame_size > 0)
+		len += arc_pop_r(BUF(buf, len), ARC_R_FP);
+
+	gp_regs = usage & ~(BIT(ARC_R_BLINK) | BIT(ARC_R_FP));
+	while (gp_regs) {
+		/* "usage" is 32-bit, each bit indicating an ARC register. */
+		u8 reg = 31 - __builtin_clz(gp_regs);
+
+		len += arc_pop_r(BUF(buf, len), reg);
+		gp_regs &= ~BIT(reg);
+	}
+
+	/* Deal with blink last. */
+	if (usage & BIT(ARC_R_BLINK))
+		len += arc_pop_r(BUF(buf, len), ARC_R_BLINK);
+
+	/* Wrap up the return value and jump back to the caller. */
+	len += bpf_to_arc_return(BUF(buf, len));
+	len += arc_jmp_return(BUF(buf, len));
+
+	return len;
+}
+
+/*
+ * For details on the algorithm, see the comments of "gen_jcc_64()".
+ *
+ * This data structure is holding information for jump translations.
+ *
+ * jit_off: How many bytes into the current JIT address, "b"ranch insn. occurs
+ * cond: The condition that the ARC branch instruction must use
+ *
+ * e.g.:
+ *
+ * BPF_JGE  R1, R0, @target
+ * ------------------------
+ *            |
+ *            v
+ * 0x1000: cmp  r3, r1     # 0x1000 is the JIT address for "BPF_JGE ..." insn
+ * 0x1004: bhi  @target    # first jump (branch higher)
+ * 0x1008: blo  @end       # second jump acting as a skip (end is 0x1014)
+ * 0x100C: cmp  r2, r0     # the lower 32 bits are evaluated
+ * 0x1010: bhs  @target    # third jump (branch higher or same)
+ * 0x1014: ...
+ *
+ * The jit_off(set) of the "bhi" is 4 bytes.
+ * The cond(ition) for the "bhi" is "CC_great_u".
+ *
+ * The jit_off(set) is necessary for calculating the exact displacement
+ * to the "target" address:
+ *
+ * jit_address + jit_off(set) - @target
+ * 0x1000      + 4            - @target
+ */
+#define JCC64_NR_OF_JMPS 3	/* Number of jumps in jcc64 template. */
+#define JCC64_INSNS_TO_END 3	/* Number of insn. inclusive the 2nd jmp to end. */
+#define JCC64_SKIP_JMP 1	/* Index of the "skip" jump to "end". */
+static const struct {
+	/*
+	 * "jit_off" is common between all "jmp[]" and is coupled with
+	 * "cond" of each "jmp[]" instance. e.g.:
+	 *
+	 * arcv2_64_jccs.jit_off[1]
+	 * arcv2_64_jccs.jmp[ARC_CC_UGT].cond[1]
+	 *
+	 * Are indicating that the second jump in JITed code of "UGT"
+	 * is at offset "jit_off[1]" while its condition is "cond[1]".
+	 */
+	u8 jit_off[JCC64_NR_OF_JMPS];
+
+	struct {
+		u8 cond[JCC64_NR_OF_JMPS];
+	} jmp[ARC_CC_SLE + 1];
+} arcv2_64_jccs = {
+	.jit_off = {
+		INSN_len_normal * 1,
+		INSN_len_normal * 2,
+		INSN_len_normal * 4
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   bhi  @target         # 1: u>
+	 *   blo  @end            # 2: u<
+	 *   cmp  rd_lo, rs_lo
+	 *   bhi  @target         # 3: u>
+	 * end:
+	 */
+	.jmp[ARC_CC_UGT] = {
+		.cond = {CC_great_u, CC_less_u, CC_great_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   bhi  @target         # 1: u>
+	 *   blo  @end            # 2: u<
+	 *   cmp  rd_lo, rs_lo
+	 *   bhs  @target         # 3: u>=
+	 * end:
+	 */
+	.jmp[ARC_CC_UGE] = {
+		.cond = {CC_great_u, CC_less_u, CC_great_eq_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   blo  @target         # 1: u<
+	 *   bhi  @end            # 2: u>
+	 *   cmp  rd_lo, rs_lo
+	 *   blo  @target         # 3: u<
+	 * end:
+	 */
+	.jmp[ARC_CC_ULT] = {
+		.cond = {CC_less_u, CC_great_u, CC_less_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   blo  @target         # 1: u<
+	 *   bhi  @end            # 2: u>
+	 *   cmp  rd_lo, rs_lo
+	 *   bls  @target         # 3: u<=
+	 * end:
+	 */
+	.jmp[ARC_CC_ULE] = {
+		.cond = {CC_less_u, CC_great_u, CC_less_eq_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   bgt  @target         # 1: s>
+	 *   blt  @end            # 2: s<
+	 *   cmp  rd_lo, rs_lo
+	 *   bhi  @target         # 3: u>
+	 * end:
+	 */
+	.jmp[ARC_CC_SGT] = {
+		.cond = {CC_great_s, CC_less_s, CC_great_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   bgt  @target         # 1: s>
+	 *   blt  @end            # 2: s<
+	 *   cmp  rd_lo, rs_lo
+	 *   bhs  @target         # 3: u>=
+	 * end:
+	 */
+	.jmp[ARC_CC_SGE] = {
+		.cond = {CC_great_s, CC_less_s, CC_great_eq_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   blt  @target         # 1: s<
+	 *   bgt  @end            # 2: s>
+	 *   cmp  rd_lo, rs_lo
+	 *   blo  @target         # 3: u<
+	 * end:
+	 */
+	.jmp[ARC_CC_SLT] = {
+		.cond = {CC_less_s, CC_great_s, CC_less_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   blt  @target         # 1: s<
+	 *   bgt  @end            # 2: s>
+	 *   cmp  rd_lo, rs_lo
+	 *   bls  @target         # 3: u<=
+	 * end:
+	 */
+	.jmp[ARC_CC_SLE] = {
+		.cond = {CC_less_s, CC_great_s, CC_less_eq_u}
+	}
+};
+
+/*
+ * The displacement (offset) for ARC's "b"ranch instruction is the distance
+ * from the aligned version of _current_ instruction (PCL) to the target
+ * instruction:
+ *
+ * DISP = TARGET - PCL          # PCL is the word aligned PC
+ */
+static inline s32 get_displacement(u32 curr_off, u32 targ_off)
+{
+	return (s32)(targ_off - (curr_off & ~3L));
+}
+
+/*
+ * "disp"lacement should be:
+ *
+ * 1. 16-bit aligned.
+ * 2. fit in S25, because no "condition code" is supposed to be encoded.
+ */
+static inline bool is_valid_far_disp(s32 disp)
+{
+	return (!(disp & 1) && IN_S25_RANGE(disp));
+}
+
+/*
+ * "disp"lacement should be:
+ *
+ * 1. 16-bit aligned.
+ * 2. fit in S21, because "condition code" is supposed to be encoded too.
+ */
+static inline bool is_valid_near_disp(s32 disp)
+{
+	return (!(disp & 1) && IN_S21_RANGE(disp));
+}
+
+/*
+ * cmp        rd_hi, rs_hi
+ * cmp.z      rd_lo, rs_lo
+ * b{eq,ne}   @target
+ *   |  |
+ *   |  `-->  "eq" param is false (JNE)
+ *   `----->  "eq" param is true  (JEQ)
+ */
+static int gen_j_eq_64(u8 *buf, u8 rd, u8 rs, bool eq,
+		       u32 curr_off, u32 targ_off)
+{
+	s32 disp;
+	u8 len = 0;
+
+	len += arc_cmp_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	len += arc_cmpz_r(BUF(buf, len), REG_LO(rd), REG_LO(rs));
+	disp = get_displacement(curr_off + len, targ_off);
+	len += arc_bcc(BUF(buf, len), eq ? CC_equal : CC_unequal, disp);
+
+	return len;
+}
+
+/*
+ * tst   rd_hi, rs_hi
+ * tst.z rd_lo, rs_lo
+ * bne   @target
+ */
+static u8 gen_jset_64(u8 *buf, u8 rd, u8 rs, u32 curr_off, u32 targ_off)
+{
+	u8 len = 0;
+	s32 disp;
+
+	len += arc_tst_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	len += arc_tstz_r(BUF(buf, len), REG_LO(rd), REG_LO(rs));
+	disp = get_displacement(curr_off + len, targ_off);
+	len += arc_bcc(BUF(buf, len), CC_unequal, disp);
+
+	return len;
+}
+
+/*
+ * Verify if all the jumps for a JITed jcc64 operation are valid,
+ * by consulting the data stored at "arcv2_64_jccs".
+ */
+static bool check_jcc_64(u32 curr_off, u32 targ_off, u8 cond)
+{
+	size_t i;
+
+	if (cond >= ARC_CC_LAST)
+		return false;
+
+	for (i = 0; i < JCC64_NR_OF_JMPS; i++) {
+		u32 from, to;
+
+		from = curr_off + arcv2_64_jccs.jit_off[i];
+		/* for the 2nd jump, we jump to the end of block. */
+		if (i != JCC64_SKIP_JMP)
+			to = targ_off;
+		else
+			to = from + (JCC64_INSNS_TO_END * INSN_len_normal);
+		/* There is a "cc" in the instruction, so a "near" jump. */
+		if (!is_valid_near_disp(get_displacement(from, to)))
+			return false;
+	}
+
+	return true;
+}
+
+/* Can the jump from "curr_off" to "targ_off" actually happen? */
+bool check_jmp_64(u32 curr_off, u32 targ_off, u8 cond)
+{
+	s32 disp;
+
+	switch (cond) {
+	case ARC_CC_UGT:
+	case ARC_CC_UGE:
+	case ARC_CC_ULT:
+	case ARC_CC_ULE:
+	case ARC_CC_SGT:
+	case ARC_CC_SGE:
+	case ARC_CC_SLT:
+	case ARC_CC_SLE:
+		return check_jcc_64(curr_off, targ_off, cond);
+	case ARC_CC_EQ:
+	case ARC_CC_NE:
+	case ARC_CC_SET:
+		/*
+		 * The "jump" for the JITed BPF_J{SET,EQ,NE} is actually the
+		 * 3rd instruction. See comments of "gen_j{set,_eq}_64()".
+		 */
+		curr_off += 2 * INSN_len_normal;
+		disp = get_displacement(curr_off, targ_off);
+		/* There is a "cc" field in the issued instruction. */
+		return is_valid_near_disp(disp);
+	case ARC_CC_AL:
+		disp = get_displacement(curr_off, targ_off);
+		return is_valid_far_disp(disp);
+	default:
+		return false;
+	}
+}
+
+/*
+ * The template for the 64-bit jumps with the following BPF conditions
+ *
+ * u< u<= u> u>= s< s<= s> s>=
+ *
+ * Looks like below:
+ *
+ *   cmp   rd_hi, rs_hi
+ *   b<c1> @target
+ *   b<c2> @end
+ *   cmp   rd_lo, rs_lo   # if execution reaches here, r{d,s}_hi are equal
+ *   b<c3> @target
+ * end:
+ *
+ * "c1" is the condition that JIT is handling minus the equality part.
+ * For instance if we have to translate an "unsigned greater or equal",
+ * then "c1" will be "unsigned greater". We won't know about equality
+ * until all 64-bits of data (higeher and lower registers) are processed.
+ *
+ * "c2" is the counter logic of "c1". For instance, if "c1" is originated
+ * from "s>", then "c2" would be "s<". Notice that equality doesn't play
+ * a role here either, because the lower 32 bits are not processed yet.
+ *
+ * "c3" is the unsigned version of "c1", no matter if the BPF condition
+ * was signed or unsigned. An unsigned version is necessary, because the
+ * MSB of the lower 32 bits does not reflect a sign in the whole 64-bit
+ * scheme. Otherwise, 64-bit comparisons like
+ * (0x0000_0000,0x8000_0000) s>= (0x0000_0000,0x0000_0000)
+ * would yield an incorrect result. Finally, if there is an equality
+ * check in the BPF condition, it will be reflected in "c3".
+ *
+ * You can find all the instances of this template where the
+ * "arcv2_64_jccs" is getting initialised.
+ */
+static u8 gen_jcc_64(u8 *buf, u8 rd, u8 rs, u8 cond,
+		     u32 curr_off, u32 targ_off)
+{
+	s32 disp;
+	u32 end_off;
+	const u8 *cc = arcv2_64_jccs.jmp[cond].cond;
+	u8 len = 0;
+
+	/* cmp rd_hi, rs_hi */
+	len += arc_cmp_r(buf, REG_HI(rd), REG_HI(rs));
+
+	/* b<c1> @target */
+	disp = get_displacement(curr_off + len, targ_off);
+	len += arc_bcc(BUF(buf, len), cc[0], disp);
+
+	/* b<c2> @end */
+	end_off = curr_off + len + (JCC64_INSNS_TO_END * INSN_len_normal);
+	disp = get_displacement(curr_off + len, end_off);
+	len += arc_bcc(BUF(buf, len), cc[1], disp);
+
+	/* cmp rd_lo, rs_lo */
+	len += arc_cmp_r(BUF(buf, len), REG_LO(rd), REG_LO(rs));
+
+	/* b<c3> @target */
+	disp = get_displacement(curr_off + len, targ_off);
+	len += arc_bcc(BUF(buf, len), cc[2], disp);
+
+	return len;
+}
+
+/*
+ * This function only applies the necessary logic to make the proper
+ * translations. All the sanity checks must have already been done
+ * by calling the check_jmp_64().
+ */
+u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off)
+{
+	u8 len = 0;
+	bool eq = false;
+	s32 disp;
+
+	switch (cond) {
+	case ARC_CC_AL:
+		disp = get_displacement(curr_off, targ_off);
+		len = arc_b(buf, disp);
+		break;
+	case ARC_CC_UGT:
+	case ARC_CC_UGE:
+	case ARC_CC_ULT:
+	case ARC_CC_ULE:
+	case ARC_CC_SGT:
+	case ARC_CC_SGE:
+	case ARC_CC_SLT:
+	case ARC_CC_SLE:
+		len = gen_jcc_64(buf, rd, rs, cond, curr_off, targ_off);
+		break;
+	case ARC_CC_EQ:
+		eq = true;
+		fallthrough;
+	case ARC_CC_NE:
+		len = gen_j_eq_64(buf, rd, rs, eq, curr_off, targ_off);
+		break;
+	case ARC_CC_SET:
+		len = gen_jset_64(buf, rd, rs, curr_off, targ_off);
+		break;
+	default:
+#ifdef ARC_BPF_JIT_DEBUG
+		pr_err("64-bit jump condition is not known.");
+		BUG();
+#endif
+	}
+	return len;
+}
+
+/*
+ * The condition codes to use when generating JIT instructions
+ * for 32-bit jumps.
+ *
+ * The "ARC_CC_AL" index is not really used by the code, but it
+ * is here for the sake of completeness.
+ *
+ * The "ARC_CC_SET" becomes "CC_unequal" because of the "tst"
+ * instruction that precedes the conditional branch.
+ */
+static const u8 arcv2_32_jmps[ARC_CC_LAST] = {
+	[ARC_CC_UGT] = CC_great_u,
+	[ARC_CC_UGE] = CC_great_eq_u,
+	[ARC_CC_ULT] = CC_less_u,
+	[ARC_CC_ULE] = CC_less_eq_u,
+	[ARC_CC_SGT] = CC_great_s,
+	[ARC_CC_SGE] = CC_great_eq_s,
+	[ARC_CC_SLT] = CC_less_s,
+	[ARC_CC_SLE] = CC_less_eq_s,
+	[ARC_CC_AL]  = CC_always,
+	[ARC_CC_EQ]  = CC_equal,
+	[ARC_CC_NE]  = CC_unequal,
+	[ARC_CC_SET] = CC_unequal
+};
+
+/* Can the jump from "curr_off" to "targ_off" actually happen? */
+bool check_jmp_32(u32 curr_off, u32 targ_off, u8 cond)
+{
+	u8 addendum;
+	s32 disp;
+
+	if (cond >= ARC_CC_LAST)
+		return false;
+
+	/*
+	 * The unconditional jump happens immediately, while the rest
+	 * are either preceded by a "cmp" or "tst" instruction.
+	 */
+	addendum = (cond == ARC_CC_AL) ? 0 : INSN_len_normal;
+	disp = get_displacement(curr_off + addendum, targ_off);
+
+	if (cond == ARC_CC_AL)
+		return is_valid_far_disp(disp);
+	else
+		return is_valid_near_disp(disp);
+}
+
+/*
+ * The JITed code for 32-bit (conditional) branches:
+ *
+ * ARC_CC_AL  @target
+ *   b @jit_targ_addr
+ *
+ * ARC_CC_SET rd, rs, @target
+ *   tst rd, rs
+ *   bnz @jit_targ_addr
+ *
+ * ARC_CC_xx  rd, rs, @target
+ *   cmp rd, rs
+ *   b<cc> @jit_targ_addr            # cc = arcv2_32_jmps[xx]
+ */
+u8 gen_jmp_32(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off)
+{
+	s32 disp;
+	u8 len = 0;
+
+	/*
+	 * Although this must have already been checked by "check_jmp_32()",
+	 * we're not going to risk accessing "arcv2_32_jmps" array without
+	 * the boundary check.
+	 */
+	if (cond >= ARC_CC_LAST) {
+#ifdef ARC_BPF_JIT_DEBUG
+		pr_err("32-bit jump condition is not known.");
+		BUG();
+#endif
+		return 0;
+	}
+
+	/* If there is a "condition", issue the "cmp" or "tst" first. */
+	if (cond != ARC_CC_AL) {
+		if (cond == ARC_CC_SET)
+			len = tst_r32(buf, rd, rs);
+		else
+			len = cmp_r32(buf, rd, rs);
+		/*
+		 * The issued instruction affects the "disp"lacement as
+		 * it alters the "curr_off" by its "len"gth. The "curr_off"
+		 * should always point to the jump instruction.
+		 */
+		disp = get_displacement(curr_off + len, targ_off);
+		len += arc_bcc(BUF(buf, len), arcv2_32_jmps[cond], disp);
+	} else {
+		/* The straight forward unconditional jump. */
+		disp = get_displacement(curr_off, targ_off);
+		len = arc_b(buf, disp);
+	}
+
+	return len;
+}
+
+/*
+ * Generate code for functions calls. There can be two types of calls:
+ *
+ * - Calling another BPF function
+ * - Calling an in-kernel function which is compiled by ARC gcc
+ *
+ * In the later case, we must comply to ARCv2 ABI and handle arguments
+ * and return values accordingly.
+ */
+u8 gen_func_call(u8 *buf, ARC_ADDR func_addr, bool external_func)
+{
+	u8 len = 0;
+
+	/*
+	 * In case of an in-kernel function call, always push the 5th
+	 * argument onto the stack, because that's where the ABI dictates
+	 * it should be found. If the callee doesn't really use it, no harm
+	 * is done. The stack is readjusted either way after the call.
+	 */
+	if (external_func)
+		len += push_r64(BUF(buf, len), BPF_REG_5);
+
+	len += jump_and_link(BUF(buf, len), func_addr);
+
+	if (external_func)
+		len += arc_add_i(BUF(buf, len), ARC_R_SP, ARC_R_SP, ARG5_SIZE);
+
+	return len;
+}
diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c
new file mode 100644
index 000000000000..e3628922c24a
--- /dev/null
+++ b/arch/arc/net/bpf_jit_core.c
@@ -0,0 +1,1425 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The back-end-agnostic part of Just-In-Time compiler for eBPF bytecode.
+ *
+ * Copyright (c) 2024 Synopsys Inc.
+ * Author: Shahab Vahedi <shahab@synopsys.com>
+ */
+#include <linux/bug.h>
+#include "bpf_jit.h"
+
+/*
+ * Check for the return value. A pattern used often in this file.
+ * There must be a "ret" variable of type "int" in the scope.
+ */
+#define CHECK_RET(cmd)			\
+	do {				\
+		ret = (cmd);		\
+		if (ret < 0)		\
+			return ret;	\
+	} while (0)
+
+#ifdef ARC_BPF_JIT_DEBUG
+/* Dumps bytes in /var/log/messages at KERN_INFO level (4). */
+static void dump_bytes(const u8 *buf, u32 len, const char *header)
+{
+	u8 line[64];
+	size_t i, j;
+
+	pr_info("-----------------[ %s ]-----------------\n", header);
+
+	for (i = 0, j = 0; i < len; i++) {
+		/* Last input byte? */
+		if (i == len - 1) {
+			j += scnprintf(line + j, 64 - j, "0x%02x", buf[i]);
+			pr_info("%s\n", line);
+			break;
+		}
+		/* End of line? */
+		else if (i % 8 == 7) {
+			j += scnprintf(line + j, 64 - j, "0x%02x", buf[i]);
+			pr_info("%s\n", line);
+			j = 0;
+		} else {
+			j += scnprintf(line + j, 64 - j, "0x%02x, ", buf[i]);
+		}
+	}
+}
+#endif /* ARC_BPF_JIT_DEBUG */
+
+/********************* JIT context ***********************/
+
+/*
+ * buf:		Translated instructions end up here.
+ * len:		The length of whole block in bytes.
+ * index:	The offset at which the _next_ instruction may be put.
+ */
+struct jit_buffer {
+	u8	*buf;
+	u32	len;
+	u32	index;
+};
+
+/*
+ * This is a subset of "struct jit_context" that its information is deemed
+ * necessary for the next extra pass to come.
+ *
+ * bpf_header:	Needed to finally lock the region.
+ * bpf2insn:	Used to find the translation for instructions of interest.
+ *
+ * Things like "jit.buf" and "jit.len" can be retrieved respectively from
+ * "prog->bpf_func" and "prog->jited_len".
+ */
+struct arc_jit_data {
+	struct bpf_binary_header *bpf_header;
+	u32                      *bpf2insn;
+};
+
+/*
+ * The JIT pertinent context that is used by different functions.
+ *
+ * prog:		The current eBPF program being handled.
+ * orig_prog:		The original eBPF program before any possible change.
+ * jit:			The JIT buffer and its length.
+ * bpf_header:		The JITed program header. "jit.buf" points inside it.
+ * emit:		If set, opcodes are written to memory; else, a dry-run.
+ * do_zext:		If true, 32-bit sub-regs must be zero extended.
+ * bpf2insn:		Maps BPF insn indices to their counterparts in jit.buf.
+ * bpf2insn_valid:	Indicates if "bpf2ins" is populated with the mappings.
+ * jit_data:		A piece of memory to transfer data to the next pass.
+ * arc_regs_clobbered:	Each bit status determines if that arc reg is clobbered.
+ * save_blink:		Whether ARC's "blink" register needs to be saved.
+ * frame_size:		Derived from "prog->aux->stack_depth".
+ * epilogue_offset:	Used by early "return"s in the code to jump here.
+ * need_extra_pass:	A forecast if an "extra_pass" will occur.
+ * is_extra_pass:	Indicates if the current pass is an extra pass.
+ * user_bpf_prog:	True, if VM opcodes come from a real program.
+ * blinded:		True if "constant blinding" step returned a new "prog".
+ * success:		Indicates if the whole JIT went OK.
+ */
+struct jit_context {
+	struct bpf_prog			*prog;
+	struct bpf_prog			*orig_prog;
+	struct jit_buffer		jit;
+	struct bpf_binary_header	*bpf_header;
+	bool				emit;
+	bool				do_zext;
+	u32				*bpf2insn;
+	bool				bpf2insn_valid;
+	struct arc_jit_data		*jit_data;
+	u32				arc_regs_clobbered;
+	bool				save_blink;
+	u16				frame_size;
+	u32				epilogue_offset;
+	bool				need_extra_pass;
+	bool				is_extra_pass;
+	bool				user_bpf_prog;
+	bool				blinded;
+	bool				success;
+};
+
+/*
+ * If we're in ARC_BPF_JIT_DEBUG mode and the debug level is right, dump the
+ * input BPF stream. "bpf_jit_dump()" is not fully suited for this purpose.
+ */
+static void vm_dump(const struct bpf_prog *prog)
+{
+#ifdef ARC_BPF_JIT_DEBUG
+	if (bpf_jit_enable > 1)
+		dump_bytes((u8 *)prog->insns, 8 * prog->len, " VM  ");
+#endif
+}
+
+/*
+ * If the right level of debug is set, dump the bytes. There are 2 variants
+ * of this function:
+ *
+ * 1. Use the standard bpf_jit_dump() which is meant only for JITed code.
+ * 2. Use the dump_bytes() to match its "vm_dump()" instance.
+ */
+static void jit_dump(const struct jit_context *ctx)
+{
+#ifdef ARC_BPF_JIT_DEBUG
+	u8 header[8];
+#endif
+	const int pass = ctx->is_extra_pass ? 2 : 1;
+
+	if (bpf_jit_enable <= 1 || !ctx->prog->jited)
+		return;
+
+#ifdef ARC_BPF_JIT_DEBUG
+	scnprintf(header, sizeof(header), "JIT:%d", pass);
+	dump_bytes(ctx->jit.buf, ctx->jit.len, header);
+	pr_info("\n");
+#else
+	bpf_jit_dump(ctx->prog->len, ctx->jit.len, pass, ctx->jit.buf);
+#endif
+}
+
+/* Initialise the context so there's no garbage. */
+static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
+{
+	memset(ctx, 0, sizeof(*ctx));
+
+	ctx->orig_prog = prog;
+
+	/* If constant blinding was requested but failed, scram. */
+	ctx->prog = bpf_jit_blind_constants(prog);
+	if (IS_ERR(ctx->prog))
+		return PTR_ERR(ctx->prog);
+	ctx->blinded = (ctx->prog != ctx->orig_prog);
+
+	/* If the verifier doesn't zero-extend, then we have to do it. */
+	ctx->do_zext = !ctx->prog->aux->verifier_zext;
+
+	ctx->is_extra_pass = ctx->prog->jited;
+	ctx->user_bpf_prog = ctx->prog->is_func;
+
+	return 0;
+}
+
+/*
+ * Only after the first iteration of normal pass (the dry-run),
+ * there are valid offsets in ctx->bpf2insn array.
+ */
+static inline bool offsets_available(const struct jit_context *ctx)
+{
+	return ctx->bpf2insn_valid;
+}
+
+/*
+ * "*mem" should be freed when there is no "extra pass" to come,
+ * or the compilation terminated abruptly. A few of such memory
+ * allocations are: ctx->jit_data and ctx->bpf2insn.
+ */
+static inline void maybe_free(struct jit_context *ctx, void **mem)
+{
+	if (*mem) {
+		if (!ctx->success || !ctx->need_extra_pass) {
+			kfree(*mem);
+			*mem = NULL;
+		}
+	}
+}
+
+/*
+ * Free memories based on the status of the context.
+ *
+ * A note about "bpf_header": On successful runs, "bpf_header" is
+ * not freed, because "jit.buf", a sub-array of it, is returned as
+ * the "bpf_func". However, "bpf_header" is lost and nothing points
+ * to it. This should not cause a leakage, because apparently
+ * "bpf_header" can be revived by "bpf_jit_binary_hdr()". This is
+ * how "bpf_jit_free()" in "kernel/bpf/core.c" releases the memory.
+ */
+static void jit_ctx_cleanup(struct jit_context *ctx)
+{
+	if (ctx->blinded) {
+		/* if all went well, release the orig_prog. */
+		if (ctx->success)
+			bpf_jit_prog_release_other(ctx->prog, ctx->orig_prog);
+		else
+			bpf_jit_prog_release_other(ctx->orig_prog, ctx->prog);
+	}
+
+	maybe_free(ctx, (void **)&ctx->bpf2insn);
+	maybe_free(ctx, (void **)&ctx->jit_data);
+
+	if (!ctx->bpf2insn)
+		ctx->bpf2insn_valid = false;
+
+	/* Freeing "bpf_header" is enough. "jit.buf" is a sub-array of it. */
+	if (!ctx->success && ctx->bpf_header) {
+		bpf_jit_binary_free(ctx->bpf_header);
+		ctx->bpf_header = NULL;
+		ctx->jit.buf    = NULL;
+		ctx->jit.index  = 0;
+		ctx->jit.len    = 0;
+	}
+
+	ctx->emit = false;
+	ctx->do_zext = false;
+}
+
+/*
+ * Analyse the register usage and record the frame size.
+ * The register usage is determined by consulting the back-end.
+ */
+static void analyze_reg_usage(struct jit_context *ctx)
+{
+	size_t i;
+	u32 usage = 0;
+	const struct bpf_insn *insn = ctx->prog->insnsi;
+
+	for (i = 0; i < ctx->prog->len; i++) {
+		u8 bpf_reg;
+		bool call;
+
+		bpf_reg = insn[i].dst_reg;
+		call = (insn[i].code == (BPF_JMP | BPF_CALL)) ? true : false;
+		usage |= mask_for_used_regs(bpf_reg, call);
+	}
+
+	ctx->arc_regs_clobbered = usage;
+	ctx->frame_size = ctx->prog->aux->stack_depth;
+}
+
+/* Verify that no instruction will be emitted when there is no buffer. */
+static inline int jit_buffer_check(const struct jit_context *ctx)
+{
+	if (ctx->emit) {
+		if (!ctx->jit.buf) {
+			pr_err("bpf-jit: inconsistence state; no "
+			       "buffer to emit instructions.\n");
+			return -EINVAL;
+		} else if (ctx->jit.index > ctx->jit.len) {
+			pr_err("bpf-jit: estimated JIT length is less "
+			       "than the emitted instructions.\n");
+			return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+/* On a dry-run (emit=false), "jit.len" is growing gradually. */
+static inline void jit_buffer_update(struct jit_context *ctx, u32 n)
+{
+	if (!ctx->emit)
+		ctx->jit.len += n;
+	else
+		ctx->jit.index += n;
+}
+
+/* Based on "emit", determine the address where instructions are emitted. */
+static inline u8 *effective_jit_buf(const struct jit_context *ctx)
+{
+	return ctx->emit ? (ctx->jit.buf + ctx->jit.index) : NULL;
+}
+
+/* Prologue based on context variables set by "analyze_reg_usage()". */
+static int handle_prologue(struct jit_context *ctx)
+{
+	int ret;
+	u8 *buf = effective_jit_buf(ctx);
+	u32 len = 0;
+
+	CHECK_RET(jit_buffer_check(ctx));
+
+	len = arc_prologue(buf, ctx->arc_regs_clobbered, ctx->frame_size);
+	jit_buffer_update(ctx, len);
+
+	return 0;
+}
+
+/* The counter part for "handle_prologue()". */
+static int handle_epilogue(struct jit_context *ctx)
+{
+	int ret;
+	u8 *buf = effective_jit_buf(ctx);
+	u32 len = 0;
+
+	CHECK_RET(jit_buffer_check(ctx));
+
+	len = arc_epilogue(buf, ctx->arc_regs_clobbered, ctx->frame_size);
+	jit_buffer_update(ctx, len);
+
+	return 0;
+}
+
+/* Tell which number of the BPF instruction we are dealing with. */
+static inline s32 get_index_for_insn(const struct jit_context *ctx,
+				     const struct bpf_insn *insn)
+{
+	return (insn - ctx->prog->insnsi);
+}
+
+/*
+ * In most of the cases, the "offset" is read from "insn->off". However,
+ * if it is an unconditional BPF_JMP32, then it comes from "insn->imm".
+ *
+ * (Courtesy of "cpu=v4" support)
+ */
+static inline s32 get_offset(const struct bpf_insn *insn)
+{
+	if ((BPF_CLASS(insn->code) == BPF_JMP32) &&
+	    (BPF_OP(insn->code) == BPF_JA))
+		return insn->imm;
+	else
+		return insn->off;
+}
+
+/*
+ * Determine to which number of the BPF instruction we're jumping to.
+ *
+ * The "offset" is interpreted as the "number" of BPF instructions
+ * from the _next_ BPF instruction. e.g.:
+ *
+ *  4 means 4 instructions after  the next insn
+ *  0 means 0 instructions after  the next insn -> fallthrough.
+ * -1 means 1 instruction  before the next insn -> jmp to current insn.
+ *
+ *  Another way to look at this, "offset" is the number of instructions
+ *  that exist between the current instruction and the target instruction.
+ *
+ *  It is worth noting that a "mov r,i64", which is 16-byte long, is
+ *  treated as two instructions long, therefore "offset" needn't be
+ *  treated specially for those. Everything is uniform.
+ */
+static inline s32 get_target_index_for_insn(const struct jit_context *ctx,
+					    const struct bpf_insn *insn)
+{
+	return (get_index_for_insn(ctx, insn) + 1) + get_offset(insn);
+}
+
+/* Is there an immediate operand encoded in the "insn"? */
+static inline bool has_imm(const struct bpf_insn *insn)
+{
+	return BPF_SRC(insn->code) == BPF_K;
+}
+
+/* Is the last BPF instruction? */
+static inline bool is_last_insn(const struct bpf_prog *prog, u32 idx)
+{
+	return idx == (prog->len - 1);
+}
+
+/*
+ * Invocation of this function, conditionally signals the need for
+ * an extra pass. The conditions that must be met are:
+ *
+ * 1. The current pass itself shouldn't be an extra pass.
+ * 2. The stream of bytes being JITed must come from a user program.
+ */
+static inline void set_need_for_extra_pass(struct jit_context *ctx)
+{
+	if (!ctx->is_extra_pass)
+		ctx->need_extra_pass = ctx->user_bpf_prog;
+}
+
+/*
+ * Check if the "size" is valid and then transfer the control to
+ * the back-end for the swap.
+ */
+static int handle_swap(u8 *buf, u8 rd, u8 size, u8 endian,
+		       bool force, bool do_zext, u8 *len)
+{
+	/* Sanity check on the size. */
+	switch (size) {
+	case 16:
+	case 32:
+	case 64:
+		break;
+	default:
+		pr_err("bpf-jit: invalid size for swap.\n");
+		return -EINVAL;
+	}
+
+	*len = gen_swap(buf, rd, size, endian, force, do_zext);
+
+	return 0;
+}
+
+/* Checks if the (instruction) index is in valid range. */
+static inline bool check_insn_idx_valid(const struct jit_context *ctx,
+					const s32 idx)
+{
+	return (idx >= 0 && idx < ctx->prog->len);
+}
+
+/*
+ * Decouple the back-end from BPF by converting BPF conditions
+ * to internal enum. ARC_CC_* start from 0 and are used as index
+ * to an array. BPF_J* usage must end after this conversion.
+ */
+static int bpf_cond_to_arc(const u8 op, u8 *arc_cc)
+{
+	switch (op) {
+	case BPF_JA:
+		*arc_cc = ARC_CC_AL;
+		break;
+	case BPF_JEQ:
+		*arc_cc = ARC_CC_EQ;
+		break;
+	case BPF_JGT:
+		*arc_cc = ARC_CC_UGT;
+		break;
+	case BPF_JGE:
+		*arc_cc = ARC_CC_UGE;
+		break;
+	case BPF_JSET:
+		*arc_cc = ARC_CC_SET;
+		break;
+	case BPF_JNE:
+		*arc_cc = ARC_CC_NE;
+		break;
+	case BPF_JSGT:
+		*arc_cc = ARC_CC_SGT;
+		break;
+	case BPF_JSGE:
+		*arc_cc = ARC_CC_SGE;
+		break;
+	case BPF_JLT:
+		*arc_cc = ARC_CC_ULT;
+		break;
+	case BPF_JLE:
+		*arc_cc = ARC_CC_ULE;
+		break;
+	case BPF_JSLT:
+		*arc_cc = ARC_CC_SLT;
+		break;
+	case BPF_JSLE:
+		*arc_cc = ARC_CC_SLE;
+		break;
+	default:
+		pr_err("bpf-jit: can't handle condition 0x%02X\n", op);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * Check a few things for a supposedly "jump" instruction:
+ *
+ * 0. "insn" is a "jump" instruction, but not the "call/exit" variant.
+ * 1. The current "insn" index is in valid range.
+ * 2. The index of target instruction is in valid range.
+ */
+static int check_bpf_jump(const struct jit_context *ctx,
+			  const struct bpf_insn *insn)
+{
+	const u8 class = BPF_CLASS(insn->code);
+	const u8 op = BPF_OP(insn->code);
+
+	/* Must be a jmp(32) instruction that is not a "call/exit". */
+	if ((class != BPF_JMP && class != BPF_JMP32) ||
+	    (op == BPF_CALL || op == BPF_EXIT)) {
+		pr_err("bpf-jit: not a jump instruction.\n");
+		return -EINVAL;
+	}
+
+	if (!check_insn_idx_valid(ctx, get_index_for_insn(ctx, insn))) {
+		pr_err("bpf-jit: the bpf jump insn is not in prog.\n");
+		return -EINVAL;
+	}
+
+	if (!check_insn_idx_valid(ctx, get_target_index_for_insn(ctx, insn))) {
+		pr_err("bpf-jit: bpf jump label is out of range.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Based on input "insn", consult "ctx->bpf2insn" to get the
+ * related index (offset) of the translation in JIT stream.
+ */
+static u32 get_curr_jit_off(const struct jit_context *ctx,
+			    const struct bpf_insn *insn)
+{
+	const s32 idx = get_index_for_insn(ctx, insn);
+#ifdef ARC_BPF_JIT_DEBUG
+	BUG_ON(!offsets_available(ctx) || !check_insn_idx_valid(ctx, idx));
+#endif
+	return ctx->bpf2insn[idx];
+}
+
+/*
+ * The input "insn" must be a jump instruction.
+ *
+ * Based on input "insn", consult "ctx->bpf2insn" to get the
+ * related JIT index (offset) of "target instruction" that
+ * "insn" would jump to.
+ */
+static u32 get_targ_jit_off(const struct jit_context *ctx,
+			    const struct bpf_insn *insn)
+{
+	const s32 tidx = get_target_index_for_insn(ctx, insn);
+#ifdef ARC_BPF_JIT_DEBUG
+	BUG_ON(!offsets_available(ctx) || !check_insn_idx_valid(ctx, tidx));
+#endif
+	return ctx->bpf2insn[tidx];
+}
+
+/*
+ * This function will return 0 for a feasible jump.
+ *
+ * Consult the back-end to check if it finds it feasible to emit
+ * the necessary instructions based on "cond" and the displacement
+ * between the "from_off" and the "to_off".
+ */
+static int feasible_jit_jump(u32 from_off, u32 to_off, u8 cond, bool j32)
+{
+	int ret = 0;
+
+	if (j32) {
+		if (!check_jmp_32(from_off, to_off, cond))
+			ret = -EFAULT;
+	} else {
+		if (!check_jmp_64(from_off, to_off, cond))
+			ret = -EFAULT;
+	}
+
+	if (ret != 0)
+		pr_err("bpf-jit: the JIT displacement is not OK.\n");
+
+	return ret;
+}
+
+/*
+ * This jump handler performs the following steps:
+ *
+ * 1. Compute ARC's internal condition code from BPF's
+ * 2. Determine the bitness of the operation (32 vs. 64)
+ * 3. Sanity check on BPF stream
+ * 4. Sanity check on what is supposed to be JIT's displacement
+ * 5. And finally, emit the necessary instructions
+ *
+ * The last two steps are performed through the back-end.
+ * The value of steps 1 and 2 are necessary inputs for the back-end.
+ */
+static int handle_jumps(const struct jit_context *ctx,
+			const struct bpf_insn *insn,
+			u8 *len)
+{
+	u8 cond;
+	int ret = 0;
+	u8 *buf = effective_jit_buf(ctx);
+	const bool j32 = (BPF_CLASS(insn->code) == BPF_JMP32) ? true : false;
+	const u8 rd = insn->dst_reg;
+	u8 rs = insn->src_reg;
+	u32 curr_off = 0, targ_off = 0;
+
+	*len = 0;
+
+	/* Map the BPF condition to internal enum. */
+	CHECK_RET(bpf_cond_to_arc(BPF_OP(insn->code), &cond));
+
+	/* Sanity check on the BPF byte stream. */
+	CHECK_RET(check_bpf_jump(ctx, insn));
+
+	/*
+	 * Move the immediate into a temporary register _now_ for 2 reasons:
+	 *
+	 * 1. "gen_jmp_{32,64}()" deal with operands in registers.
+	 *
+	 * 2. The "len" parameter will grow so that the current jit offset
+	 *    (curr_off) will have increased to a point where the necessary
+	 *    instructions can be inserted by "gen_jmp_{32,64}()".
+	 */
+	if (has_imm(insn) && cond != ARC_CC_AL) {
+		if (j32) {
+			*len += mov_r32_i32(BUF(buf, *len), JIT_REG_TMP,
+					    insn->imm);
+		} else {
+			*len += mov_r64_i32(BUF(buf, *len), JIT_REG_TMP,
+					    insn->imm);
+		}
+		rs = JIT_REG_TMP;
+	}
+
+	/* If the offsets are known, check if the branch can occur. */
+	if (offsets_available(ctx)) {
+		curr_off = get_curr_jit_off(ctx, insn) + *len;
+		targ_off = get_targ_jit_off(ctx, insn);
+
+		/* Sanity check on the back-end side. */
+		CHECK_RET(feasible_jit_jump(curr_off, targ_off, cond, j32));
+	}
+
+	if (j32) {
+		*len += gen_jmp_32(BUF(buf, *len), rd, rs, cond,
+				   curr_off, targ_off);
+	} else {
+		*len += gen_jmp_64(BUF(buf, *len), rd, rs, cond,
+				   curr_off, targ_off);
+	}
+
+	return ret;
+}
+
+/* Jump to translated epilogue address. */
+static int handle_jmp_epilogue(struct jit_context *ctx,
+			       const struct bpf_insn *insn, u8 *len)
+{
+	u8 *buf = effective_jit_buf(ctx);
+	u32 curr_off = 0, epi_off = 0;
+
+	/* Check the offset only if the data is available. */
+	if (offsets_available(ctx)) {
+		curr_off = get_curr_jit_off(ctx, insn);
+		epi_off = ctx->epilogue_offset;
+
+		if (!check_jmp_64(curr_off, epi_off, ARC_CC_AL)) {
+			pr_err("bpf-jit: epilogue offset is not valid.\n");
+			return -EINVAL;
+		}
+	}
+
+	/* Jump to "epilogue offset" (rd and rs don't matter). */
+	*len = gen_jmp_64(buf, 0, 0, ARC_CC_AL, curr_off, epi_off);
+
+	return 0;
+}
+
+/* Try to get the resolved address and generate the instructions. */
+static int handle_call(struct jit_context *ctx,
+		       const struct bpf_insn *insn,
+		       u8 *len)
+{
+	int  ret;
+	bool in_kernel_func, fixed = false;
+	u64  addr = 0;
+	u8  *buf = effective_jit_buf(ctx);
+
+	ret = bpf_jit_get_func_addr(ctx->prog, insn, ctx->is_extra_pass,
+				    &addr, &fixed);
+	if (ret < 0) {
+		pr_err("bpf-jit: can't get the address for call.\n");
+		return ret;
+	}
+	in_kernel_func = (fixed ? true : false);
+
+	/* No valuable address retrieved (yet). */
+	if (!fixed && !addr)
+		set_need_for_extra_pass(ctx);
+
+	*len = gen_func_call(buf, (ARC_ADDR)addr, in_kernel_func);
+
+	if (insn->src_reg != BPF_PSEUDO_CALL) {
+		/* Assigning ABI's return reg to JIT's return reg. */
+		*len += arc_to_bpf_return(BUF(buf, *len));
+	}
+
+	return 0;
+}
+
+/*
+ * Try to generate instructions for loading a 64-bit immediate.
+ * These sort of instructions are usually associated with the 64-bit
+ * relocations: R_BPF_64_64. Therefore, signal the need for an extra
+ * pass if the circumstances are right.
+ */
+static int handle_ld_imm64(struct jit_context *ctx,
+			   const struct bpf_insn *insn,
+			   u8 *len)
+{
+	const s32 idx = get_index_for_insn(ctx, insn);
+	u8 *buf = effective_jit_buf(ctx);
+
+	/* We're about to consume 2 VM instructions. */
+	if (is_last_insn(ctx->prog, idx)) {
+		pr_err("bpf-jit: need more data for 64-bit immediate.\n");
+		return -EINVAL;
+	}
+
+	*len = mov_r64_i64(buf, insn->dst_reg, insn->imm, (insn + 1)->imm);
+
+	if (bpf_pseudo_func(insn))
+		set_need_for_extra_pass(ctx);
+
+	return 0;
+}
+
+/*
+ * Handles one eBPF instruction at a time. To make this function faster,
+ * it does not call "jit_buffer_check()". Else, it would call it for every
+ * instruction. As a result, it should not be invoked directly. Only
+ * "handle_body()", that has already executed the "check", may call this
+ * function.
+ *
+ * If the "ret" value is negative, something has went wrong. Else,
+ * it mostly holds the value 0 and rarely 1. Number 1 signals
+ * the loop in "handle_body()" to skip the next instruction, because
+ * it has been consumed as part of a 64-bit immediate value.
+ */
+static int handle_insn(struct jit_context *ctx, u32 idx)
+{
+	const struct bpf_insn *insn = &ctx->prog->insnsi[idx];
+	const u8  code = insn->code;
+	const u8  dst  = insn->dst_reg;
+	const u8  src  = insn->src_reg;
+	const s16 off  = insn->off;
+	const s32 imm  = insn->imm;
+	u8 *buf = effective_jit_buf(ctx);
+	u8  len = 0;
+	int ret = 0;
+
+	switch (code) {
+	/* dst += src (32-bit) */
+	case BPF_ALU | BPF_ADD | BPF_X:
+		len = add_r32(buf, dst, src);
+		break;
+	/* dst += imm (32-bit) */
+	case BPF_ALU | BPF_ADD | BPF_K:
+		len = add_r32_i32(buf, dst, imm);
+		break;
+	/* dst -= src (32-bit) */
+	case BPF_ALU | BPF_SUB | BPF_X:
+		len = sub_r32(buf, dst, src);
+		break;
+	/* dst -= imm (32-bit) */
+	case BPF_ALU | BPF_SUB | BPF_K:
+		len = sub_r32_i32(buf, dst, imm);
+		break;
+	/* dst = -dst (32-bit) */
+	case BPF_ALU | BPF_NEG:
+		len = neg_r32(buf, dst);
+		break;
+	/* dst *= src (32-bit) */
+	case BPF_ALU | BPF_MUL | BPF_X:
+		len = mul_r32(buf, dst, src);
+		break;
+	/* dst *= imm (32-bit) */
+	case BPF_ALU | BPF_MUL | BPF_K:
+		len = mul_r32_i32(buf, dst, imm);
+		break;
+	/* dst /= src (32-bit) */
+	case BPF_ALU | BPF_DIV | BPF_X:
+		len = div_r32(buf, dst, src, off == 1);
+		break;
+	/* dst /= imm (32-bit) */
+	case BPF_ALU | BPF_DIV | BPF_K:
+		len = div_r32_i32(buf, dst, imm, off == 1);
+		break;
+	/* dst %= src (32-bit) */
+	case BPF_ALU | BPF_MOD | BPF_X:
+		len = mod_r32(buf, dst, src, off == 1);
+		break;
+	/* dst %= imm (32-bit) */
+	case BPF_ALU | BPF_MOD | BPF_K:
+		len = mod_r32_i32(buf, dst, imm, off == 1);
+		break;
+	/* dst &= src (32-bit) */
+	case BPF_ALU | BPF_AND | BPF_X:
+		len = and_r32(buf, dst, src);
+		break;
+	/* dst &= imm (32-bit) */
+	case BPF_ALU | BPF_AND | BPF_K:
+		len = and_r32_i32(buf, dst, imm);
+		break;
+	/* dst |= src (32-bit) */
+	case BPF_ALU | BPF_OR | BPF_X:
+		len = or_r32(buf, dst, src);
+		break;
+	/* dst |= imm (32-bit) */
+	case BPF_ALU | BPF_OR | BPF_K:
+		len = or_r32_i32(buf, dst, imm);
+		break;
+	/* dst ^= src (32-bit) */
+	case BPF_ALU | BPF_XOR | BPF_X:
+		len = xor_r32(buf, dst, src);
+		break;
+	/* dst ^= imm (32-bit) */
+	case BPF_ALU | BPF_XOR | BPF_K:
+		len = xor_r32_i32(buf, dst, imm);
+		break;
+	/* dst <<= src (32-bit) */
+	case BPF_ALU | BPF_LSH | BPF_X:
+		len = lsh_r32(buf, dst, src);
+		break;
+	/* dst <<= imm (32-bit) */
+	case BPF_ALU | BPF_LSH | BPF_K:
+		len = lsh_r32_i32(buf, dst, imm);
+		break;
+	/* dst >>= src (32-bit) [unsigned] */
+	case BPF_ALU | BPF_RSH | BPF_X:
+		len = rsh_r32(buf, dst, src);
+		break;
+	/* dst >>= imm (32-bit) [unsigned] */
+	case BPF_ALU | BPF_RSH | BPF_K:
+		len = rsh_r32_i32(buf, dst, imm);
+		break;
+	/* dst >>= src (32-bit) [signed] */
+	case BPF_ALU | BPF_ARSH | BPF_X:
+		len = arsh_r32(buf, dst, src);
+		break;
+	/* dst >>= imm (32-bit) [signed] */
+	case BPF_ALU | BPF_ARSH | BPF_K:
+		len = arsh_r32_i32(buf, dst, imm);
+		break;
+	/* dst = src (32-bit) */
+	case BPF_ALU | BPF_MOV | BPF_X:
+		len = mov_r32(buf, dst, src, (u8)off);
+		break;
+	/* dst = imm32 (32-bit) */
+	case BPF_ALU | BPF_MOV | BPF_K:
+		len = mov_r32_i32(buf, dst, imm);
+		break;
+	/* dst = swap(dst) */
+	case BPF_ALU   | BPF_END | BPF_FROM_LE:
+	case BPF_ALU   | BPF_END | BPF_FROM_BE:
+	case BPF_ALU64 | BPF_END | BPF_FROM_LE: {
+		CHECK_RET(handle_swap(buf, dst, imm, BPF_SRC(code),
+				      BPF_CLASS(code) == BPF_ALU64,
+				      ctx->do_zext, &len));
+		break;
+	}
+	/* dst += src (64-bit) */
+	case BPF_ALU64 | BPF_ADD | BPF_X:
+		len = add_r64(buf, dst, src);
+		break;
+	/* dst += imm32 (64-bit) */
+	case BPF_ALU64 | BPF_ADD | BPF_K:
+		len = add_r64_i32(buf, dst, imm);
+		break;
+	/* dst -= src (64-bit) */
+	case BPF_ALU64 | BPF_SUB | BPF_X:
+		len = sub_r64(buf, dst, src);
+		break;
+	/* dst -= imm32 (64-bit) */
+	case BPF_ALU64 | BPF_SUB | BPF_K:
+		len = sub_r64_i32(buf, dst, imm);
+		break;
+	/* dst = -dst (64-bit) */
+	case BPF_ALU64 | BPF_NEG:
+		len = neg_r64(buf, dst);
+		break;
+	/* dst *= src (64-bit) */
+	case BPF_ALU64 | BPF_MUL | BPF_X:
+		len = mul_r64(buf, dst, src);
+		break;
+	/* dst *= imm32 (64-bit) */
+	case BPF_ALU64 | BPF_MUL | BPF_K:
+		len = mul_r64_i32(buf, dst, imm);
+		break;
+	/* dst &= src (64-bit) */
+	case BPF_ALU64 | BPF_AND | BPF_X:
+		len = and_r64(buf, dst, src);
+		break;
+	/* dst &= imm32 (64-bit) */
+	case BPF_ALU64 | BPF_AND | BPF_K:
+		len = and_r64_i32(buf, dst, imm);
+		break;
+	/* dst |= src (64-bit) */
+	case BPF_ALU64 | BPF_OR | BPF_X:
+		len = or_r64(buf, dst, src);
+		break;
+	/* dst |= imm32 (64-bit) */
+	case BPF_ALU64 | BPF_OR | BPF_K:
+		len = or_r64_i32(buf, dst, imm);
+		break;
+	/* dst ^= src (64-bit) */
+	case BPF_ALU64 | BPF_XOR | BPF_X:
+		len = xor_r64(buf, dst, src);
+		break;
+	/* dst ^= imm32 (64-bit) */
+	case BPF_ALU64 | BPF_XOR | BPF_K:
+		len = xor_r64_i32(buf, dst, imm);
+		break;
+	/* dst <<= src (64-bit) */
+	case BPF_ALU64 | BPF_LSH | BPF_X:
+		len = lsh_r64(buf, dst, src);
+		break;
+	/* dst <<= imm32 (64-bit) */
+	case BPF_ALU64 | BPF_LSH | BPF_K:
+		len = lsh_r64_i32(buf, dst, imm);
+		break;
+	/* dst >>= src (64-bit) [unsigned] */
+	case BPF_ALU64 | BPF_RSH | BPF_X:
+		len = rsh_r64(buf, dst, src);
+		break;
+	/* dst >>= imm32 (64-bit) [unsigned] */
+	case BPF_ALU64 | BPF_RSH | BPF_K:
+		len = rsh_r64_i32(buf, dst, imm);
+		break;
+	/* dst >>= src (64-bit) [signed] */
+	case BPF_ALU64 | BPF_ARSH | BPF_X:
+		len = arsh_r64(buf, dst, src);
+		break;
+	/* dst >>= imm32 (64-bit) [signed] */
+	case BPF_ALU64 | BPF_ARSH | BPF_K:
+		len = arsh_r64_i32(buf, dst, imm);
+		break;
+	/* dst = src (64-bit) */
+	case BPF_ALU64 | BPF_MOV | BPF_X:
+		len = mov_r64(buf, dst, src, (u8)off);
+		break;
+	/* dst = imm32 (sign extend to 64-bit) */
+	case BPF_ALU64 | BPF_MOV | BPF_K:
+		len = mov_r64_i32(buf, dst, imm);
+		break;
+	/* dst = imm64 */
+	case BPF_LD | BPF_DW | BPF_IMM:
+		CHECK_RET(handle_ld_imm64(ctx, insn, &len));
+		/* Tell the loop to skip the next instruction. */
+		ret = 1;
+		break;
+	/* dst = *(size *)(src + off) */
+	case BPF_LDX | BPF_MEM | BPF_W:
+	case BPF_LDX | BPF_MEM | BPF_H:
+	case BPF_LDX | BPF_MEM | BPF_B:
+	case BPF_LDX | BPF_MEM | BPF_DW:
+		len = load_r(buf, dst, src, off, BPF_SIZE(code), false);
+		break;
+	case BPF_LDX | BPF_MEMSX | BPF_W:
+	case BPF_LDX | BPF_MEMSX | BPF_H:
+	case BPF_LDX | BPF_MEMSX | BPF_B:
+		len = load_r(buf, dst, src, off, BPF_SIZE(code), true);
+		break;
+	/* *(size *)(dst + off) = src */
+	case BPF_STX | BPF_MEM | BPF_W:
+	case BPF_STX | BPF_MEM | BPF_H:
+	case BPF_STX | BPF_MEM | BPF_B:
+	case BPF_STX | BPF_MEM | BPF_DW:
+		len = store_r(buf, src, dst, off, BPF_SIZE(code));
+		break;
+	case BPF_ST | BPF_MEM | BPF_W:
+	case BPF_ST | BPF_MEM | BPF_H:
+	case BPF_ST | BPF_MEM | BPF_B:
+	case BPF_ST | BPF_MEM | BPF_DW:
+		len = store_i(buf, imm, dst, off, BPF_SIZE(code));
+		break;
+	case BPF_JMP   | BPF_JA:
+	case BPF_JMP   | BPF_JEQ  | BPF_X:
+	case BPF_JMP   | BPF_JEQ  | BPF_K:
+	case BPF_JMP   | BPF_JNE  | BPF_X:
+	case BPF_JMP   | BPF_JNE  | BPF_K:
+	case BPF_JMP   | BPF_JSET | BPF_X:
+	case BPF_JMP   | BPF_JSET | BPF_K:
+	case BPF_JMP   | BPF_JGT  | BPF_X:
+	case BPF_JMP   | BPF_JGT  | BPF_K:
+	case BPF_JMP   | BPF_JGE  | BPF_X:
+	case BPF_JMP   | BPF_JGE  | BPF_K:
+	case BPF_JMP   | BPF_JSGT | BPF_X:
+	case BPF_JMP   | BPF_JSGT | BPF_K:
+	case BPF_JMP   | BPF_JSGE | BPF_X:
+	case BPF_JMP   | BPF_JSGE | BPF_K:
+	case BPF_JMP   | BPF_JLT  | BPF_X:
+	case BPF_JMP   | BPF_JLT  | BPF_K:
+	case BPF_JMP   | BPF_JLE  | BPF_X:
+	case BPF_JMP   | BPF_JLE  | BPF_K:
+	case BPF_JMP   | BPF_JSLT | BPF_X:
+	case BPF_JMP   | BPF_JSLT | BPF_K:
+	case BPF_JMP   | BPF_JSLE | BPF_X:
+	case BPF_JMP   | BPF_JSLE | BPF_K:
+	case BPF_JMP32 | BPF_JA:
+	case BPF_JMP32 | BPF_JEQ  | BPF_X:
+	case BPF_JMP32 | BPF_JEQ  | BPF_K:
+	case BPF_JMP32 | BPF_JNE  | BPF_X:
+	case BPF_JMP32 | BPF_JNE  | BPF_K:
+	case BPF_JMP32 | BPF_JSET | BPF_X:
+	case BPF_JMP32 | BPF_JSET | BPF_K:
+	case BPF_JMP32 | BPF_JGT  | BPF_X:
+	case BPF_JMP32 | BPF_JGT  | BPF_K:
+	case BPF_JMP32 | BPF_JGE  | BPF_X:
+	case BPF_JMP32 | BPF_JGE  | BPF_K:
+	case BPF_JMP32 | BPF_JSGT | BPF_X:
+	case BPF_JMP32 | BPF_JSGT | BPF_K:
+	case BPF_JMP32 | BPF_JSGE | BPF_X:
+	case BPF_JMP32 | BPF_JSGE | BPF_K:
+	case BPF_JMP32 | BPF_JLT  | BPF_X:
+	case BPF_JMP32 | BPF_JLT  | BPF_K:
+	case BPF_JMP32 | BPF_JLE  | BPF_X:
+	case BPF_JMP32 | BPF_JLE  | BPF_K:
+	case BPF_JMP32 | BPF_JSLT | BPF_X:
+	case BPF_JMP32 | BPF_JSLT | BPF_K:
+	case BPF_JMP32 | BPF_JSLE | BPF_X:
+	case BPF_JMP32 | BPF_JSLE | BPF_K:
+		CHECK_RET(handle_jumps(ctx, insn, &len));
+		break;
+	case BPF_JMP | BPF_CALL:
+		CHECK_RET(handle_call(ctx, insn, &len));
+		break;
+
+	case BPF_JMP | BPF_EXIT:
+		/* If this is the last instruction, epilogue will follow. */
+		if (is_last_insn(ctx->prog, idx))
+			break;
+		CHECK_RET(handle_jmp_epilogue(ctx, insn, &len));
+		break;
+	default:
+		pr_err("bpf-jit: can't handle instruction code 0x%02X\n", code);
+		return -EOPNOTSUPP;
+	}
+
+	if (BPF_CLASS(code) == BPF_ALU) {
+		/*
+		 * Skip the "swap" instructions. Even 64-bit swaps are of type
+		 * BPF_ALU (and not BPF_ALU64). Therefore, for the swaps, one
+		 * has to look at the "size" of the operations rather than the
+		 * ALU type. "gen_swap()" specifically takes care of that.
+		 */
+		if (BPF_OP(code) != BPF_END && ctx->do_zext)
+			len += zext(BUF(buf, len), dst);
+	}
+
+	jit_buffer_update(ctx, len);
+
+	return ret;
+}
+
+static int handle_body(struct jit_context *ctx)
+{
+	int ret;
+	bool populate_bpf2insn = false;
+	const struct bpf_prog *prog = ctx->prog;
+
+	CHECK_RET(jit_buffer_check(ctx));
+
+	/*
+	 * Record the mapping for the instructions during the dry-run.
+	 * Doing it this way allows us to have the mapping ready for
+	 * the jump instructions during the real compilation phase.
+	 */
+	if (!ctx->emit)
+		populate_bpf2insn = true;
+
+	for (u32 i = 0; i < prog->len; i++) {
+		/* During the dry-run, jit.len grows gradually per BPF insn. */
+		if (populate_bpf2insn)
+			ctx->bpf2insn[i] = ctx->jit.len;
+
+		CHECK_RET(handle_insn(ctx, i));
+		if (ret > 0) {
+			/* "ret" is 1 if two (64-bit) chunks were consumed. */
+			ctx->bpf2insn[i + 1] = ctx->bpf2insn[i];
+			i++;
+		}
+	}
+
+	/* If bpf2insn had to be populated, then it is done at this point. */
+	if (populate_bpf2insn)
+		ctx->bpf2insn_valid = true;
+
+	return 0;
+}
+
+/*
+ * Initialize the memory with "unimp_s" which is the mnemonic for
+ * "unimplemented" instruction and always raises an exception.
+ *
+ * The instruction is 2 bytes. If "size" is odd, there is not much
+ * that can be done about the last byte in "area". Because, the
+ * CPU always fetches instructions in two bytes. Therefore, the
+ * byte beyond the last one is going to accompany it during a
+ * possible fetch. In the most likely case of a little endian
+ * system, that beyond-byte will become the major opcode and
+ * we have no control over its initialisation.
+ */
+static void fill_ill_insn(void *area, unsigned int size)
+{
+	const u16 unimp_s = 0x79e0;
+
+	if (size & 1) {
+		*((u8 *)area + (size - 1)) = 0xff;
+		size -= 1;
+	}
+
+	memset16(area, unimp_s, size >> 1);
+}
+
+/* Piece of memory that can be allocated at the beginning of jit_prepare(). */
+static int jit_prepare_early_mem_alloc(struct jit_context *ctx)
+{
+	ctx->bpf2insn = kcalloc(ctx->prog->len, sizeof(ctx->jit.len),
+				GFP_KERNEL);
+
+	if (!ctx->bpf2insn) {
+		pr_err("bpf-jit: could not allocate memory for "
+		       "mapping of the instructions.\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Memory allocations that rely on parameters known at the end of
+ * jit_prepare().
+ */
+static int jit_prepare_final_mem_alloc(struct jit_context *ctx)
+{
+	const size_t alignment = sizeof(u32);
+
+	ctx->bpf_header = bpf_jit_binary_alloc(ctx->jit.len, &ctx->jit.buf,
+					       alignment, fill_ill_insn);
+	if (!ctx->bpf_header) {
+		pr_err("bpf-jit: could not allocate memory for translation.\n");
+		return -ENOMEM;
+	}
+
+	if (ctx->need_extra_pass) {
+		ctx->jit_data = kzalloc(sizeof(*ctx->jit_data), GFP_KERNEL);
+		if (!ctx->jit_data)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * The first phase of the translation without actually emitting any
+ * instruction. It helps in getting a forecast on some aspects, such
+ * as the length of the whole program or where the epilogue starts.
+ *
+ * Whenever the necessary parameters are known, memories are allocated.
+ */
+static int jit_prepare(struct jit_context *ctx)
+{
+	int ret;
+
+	/* Dry run. */
+	ctx->emit = false;
+
+	CHECK_RET(jit_prepare_early_mem_alloc(ctx));
+
+	/* Get the length of prologue section after some register analysis. */
+	analyze_reg_usage(ctx);
+	CHECK_RET(handle_prologue(ctx));
+
+	CHECK_RET(handle_body(ctx));
+
+	/* Record at which offset epilogue begins. */
+	ctx->epilogue_offset = ctx->jit.len;
+
+	/* Process the epilogue section now. */
+	CHECK_RET(handle_epilogue(ctx));
+
+	CHECK_RET(jit_prepare_final_mem_alloc(ctx));
+
+	return 0;
+}
+
+/*
+ * jit_compile() is the real compilation phase. jit_prepare() is
+ * invoked before jit_compile() as a dry-run to make sure everything
+ * will go OK and allocate the necessary memory.
+ *
+ * In the end, jit_compile() checks if it has produced the same number
+ * of instructions as jit_prepare() would.
+ */
+static int jit_compile(struct jit_context *ctx)
+{
+	int ret;
+
+	/* Let there be code. */
+	ctx->emit = true;
+
+	CHECK_RET(handle_prologue(ctx));
+
+	CHECK_RET(handle_body(ctx));
+
+	CHECK_RET(handle_epilogue(ctx));
+
+	if (ctx->jit.index != ctx->jit.len) {
+		pr_err("bpf-jit: divergence between the phases; "
+		       "%u vs. %u (bytes).\n",
+		       ctx->jit.len, ctx->jit.index);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * Calling this function implies a successful JIT. A successful
+ * translation is signaled by setting the right parameters:
+ *
+ * prog->jited=1, prog->jited_len=..., prog->bpf_func=...
+ */
+static int jit_finalize(struct jit_context *ctx)
+{
+	struct bpf_prog *prog = ctx->prog;
+
+	/* We're going to need this information for the "do_extra_pass()". */
+	if (ctx->need_extra_pass) {
+		ctx->jit_data->bpf_header = ctx->bpf_header;
+		ctx->jit_data->bpf2insn = ctx->bpf2insn;
+		prog->aux->jit_data = (void *)ctx->jit_data;
+	} else {
+		/*
+		 * If things seem finalised, then mark the JITed memory
+		 * as R-X and flush it.
+		 */
+		if (bpf_jit_binary_lock_ro(ctx->bpf_header)) {
+			pr_err("bpf-jit: Could not lock the JIT memory.\n");
+			return -EFAULT;
+		}
+		flush_icache_range((unsigned long)ctx->bpf_header,
+				   (unsigned long)
+				   BUF(ctx->jit.buf, ctx->jit.len));
+		prog->aux->jit_data = NULL;
+		bpf_prog_fill_jited_linfo(prog, ctx->bpf2insn);
+	}
+
+	ctx->success = true;
+	prog->bpf_func = (void *)ctx->jit.buf;
+	prog->jited_len = ctx->jit.len;
+	prog->jited = 1;
+
+	jit_ctx_cleanup(ctx);
+	jit_dump(ctx);
+
+	return 0;
+}
+
+/*
+ * A lenient verification for the existence of JIT context in "prog".
+ * Apparently the JIT internals, namely jit_subprogs() in bpf/verifier.c,
+ * may request for a second compilation although nothing needs to be done.
+ */
+static inline int check_jit_context(const struct bpf_prog *prog)
+{
+	if (!prog->aux->jit_data) {
+		pr_notice("bpf-jit: no jit data for the extra pass.\n");
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+/* Reuse the previous pass's data. */
+static int jit_resume_context(struct jit_context *ctx)
+{
+	struct arc_jit_data *jdata =
+		(struct arc_jit_data *)ctx->prog->aux->jit_data;
+
+	if (!jdata) {
+		pr_err("bpf-jit: no jit data for the extra pass.\n");
+		return -EINVAL;
+	}
+
+	ctx->jit.buf = (u8 *)ctx->prog->bpf_func;
+	ctx->jit.len = ctx->prog->jited_len;
+	ctx->bpf_header = jdata->bpf_header;
+	ctx->bpf2insn = (u32 *)jdata->bpf2insn;
+	ctx->bpf2insn_valid = ctx->bpf2insn ? true : false;
+	ctx->jit_data = jdata;
+
+	return 0;
+}
+
+/*
+ * Patch in the new addresses. The instructions of interest are:
+ *
+ * - call
+ * - ld r64, imm64
+ *
+ * For "call"s, it resolves the addresses one more time through the
+ * handle_call().
+ *
+ * For 64-bit immediate loads, it just retranslates them, because the BPF
+ * core in kernel might have changed the value since the normal pass.
+ */
+static int jit_patch_relocations(struct jit_context *ctx)
+{
+	const u8 bpf_opc_call = BPF_JMP | BPF_CALL;
+	const u8 bpf_opc_ldi64 = BPF_LD | BPF_DW | BPF_IMM;
+	const struct bpf_prog *prog = ctx->prog;
+	int ret;
+
+	ctx->emit = true;
+	for (u32 i = 0; i < prog->len; i++) {
+		const struct bpf_insn *insn = &prog->insnsi[i];
+		u8 dummy;
+		/*
+		 * Adjust "ctx.jit.index", so "gen_*()" functions below
+		 * can use it for their output addresses.
+		 */
+		ctx->jit.index = ctx->bpf2insn[i];
+
+		if (insn->code == bpf_opc_call) {
+			CHECK_RET(handle_call(ctx, insn, &dummy));
+		} else if (insn->code == bpf_opc_ldi64) {
+			CHECK_RET(handle_ld_imm64(ctx, insn, &dummy));
+			/* Skip the next instruction. */
+			++i;
+		}
+	}
+	return 0;
+}
+
+/*
+ * A normal pass that involves a "dry-run" phase, jit_prepare(),
+ * to get the necessary data for the real compilation phase,
+ * jit_compile().
+ */
+static struct bpf_prog *do_normal_pass(struct bpf_prog *prog)
+{
+	struct jit_context ctx;
+
+	/* Bail out if JIT is disabled. */
+	if (!prog->jit_requested)
+		return prog;
+
+	if (jit_ctx_init(&ctx, prog)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	/* Get the lengths and allocate buffer. */
+	if (jit_prepare(&ctx)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	if (jit_compile(&ctx)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	if (jit_finalize(&ctx)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	return ctx.prog;
+}
+
+/*
+ * If there are multi-function BPF programs that call each other,
+ * their translated addresses are not known all at once. Therefore,
+ * an extra pass is needed to consult the bpf_jit_get_func_addr()
+ * again to get the newly translated addresses in order to resolve
+ * the "call"s.
+ */
+static struct bpf_prog *do_extra_pass(struct bpf_prog *prog)
+{
+	struct jit_context ctx;
+
+	/* Skip if there's no context to resume from. */
+	if (check_jit_context(prog))
+		return prog;
+
+	if (jit_ctx_init(&ctx, prog)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	if (jit_resume_context(&ctx)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	if (jit_patch_relocations(&ctx)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	if (jit_finalize(&ctx)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	return ctx.prog;
+}
+
+/*
+ * This function may be invoked twice for the same stream of BPF
+ * instructions. The "extra pass" happens, when there are
+ * (re)locations involved that their addresses are not known
+ * during the first run.
+ */
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+	vm_dump(prog);
+
+	/* Was this program already translated? */
+	if (!prog->jited)
+		return do_normal_pass(prog);
+	else
+		return do_extra_pass(prog);
+
+	return prog;
+}
diff --git a/arch/arc/oprofile/Makefile b/arch/arc/oprofile/Makefile
deleted file mode 100644
index 698367bb41d0..000000000000
--- a/arch/arc/oprofile/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_OPROFILE) += oprofile.o
-
-DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
-		oprof.o cpu_buffer.o buffer_sync.o \
-		event_buffer.o oprofile_files.o \
-		oprofilefs.o oprofile_stats.o \
-		timer_int.o )
-
-oprofile-y	:= $(DRIVER_OBJS) common.o
diff --git a/arch/arc/oprofile/common.c b/arch/arc/oprofile/common.c
deleted file mode 100644
index 86bf5899533b..000000000000
--- a/arch/arc/oprofile/common.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * Based on orig code from @author John Levon <levon@movementarian.org>
- */
-
-#include <linux/oprofile.h>
-#include <linux/perf_event.h>
-
-int __init oprofile_arch_init(struct oprofile_operations *ops)
-{
-	/*
-	 * A failure here, forces oprofile core to switch to Timer based PC
-	 * sampling, which will happen if say perf is not enabled/available
-	 */
-	return oprofile_perf_init(ops);
-}
-
-void oprofile_arch_exit(void)
-{
-	oprofile_perf_exit();
-}
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index 63ea5a606ecd..1feb990a56bc 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 #include <linux/libfdt.h>
 
 #include <asm/asm-offsets.h>
@@ -50,7 +49,7 @@ static void __init axs10x_enable_gpio_intc_wire(void)
 	 * Current implementation of "irq-dw-apb-ictl" driver doesn't work well
 	 * with stacked INTCs. In particular problem happens if its master INTC
 	 * not yet instantiated. See discussion here -
-	 * https://lkml.org/lkml/2015/3/4/755
+	 * https://lore.kernel.org/lkml/54F6FE2C.7020309@synopsys.com
 	 *
 	 * So setup the first gpio block as a passive pass thru and hide it from
 	 * DT hardware topology - connect MB intc directly to cpu intc
diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig
deleted file mode 100644
index a376a50d3fea..000000000000
--- a/arch/arc/plat-eznps/Kconfig
+++ /dev/null
@@ -1,57 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.rst.
-#
-
-menuconfig ARC_PLAT_EZNPS
-	bool "\"EZchip\" ARC dev platform"
-	select CPU_BIG_ENDIAN
-	select CLKSRC_NPS
-	select EZNPS_GIC
-	select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET
-	help
-	  Support for EZchip development platforms,
-	  based on ARC700 cores.
-	  We handle few flavors:
-	    - Hardware Emulator AKA HE which is FPGA based chassis
-	    - Simulator based on MetaWare nSIM
-	    - NPS400 chip based on ASIC
-
-config EZNPS_MTM_EXT
-	bool "ARC-EZchip MTM Extensions"
-	select CPUMASK_OFFSTACK
-	depends on ARC_PLAT_EZNPS && SMP
-	default y
-	help
-	  Here we add new hierarchy for CPUs topology.
-	  We got:
-	    Core
-	    Thread
-	  At the new thread level each CPU represent one HW thread.
-	  At highest hierarchy each core contain 16 threads,
-	  any of them seem like CPU from Linux point of view.
-	  All threads within same core share the execution unit of the
-	  core and HW scheduler round robin between them.
-
-config EZNPS_MEM_ERROR_ALIGN
-	bool "ARC-EZchip Memory error as an exception"
-	depends on EZNPS_MTM_EXT
-	default n
-	help
-	  On the real chip of the NPS, user memory errors are handled
-	  as a machine check exception, which is fatal, whereas on
-	  simulator platform for NPS, is handled as a Level 2 interrupt
-	  (just a stock ARC700) which is recoverable. This option makes
-	  simulator behave like hardware.
-
-config EZNPS_SHARED_AUX_REGS
-	bool "ARC-EZchip Shared Auxiliary Registers Per Core"
-	depends on ARC_PLAT_EZNPS
-	default y
-	help
-	  On the real chip of the NPS, auxiliary registers are shared between
-	  all the cpus of the core, whereas on simulator platform for NPS,
-	  each cpu has a different set of auxiliary registers. Configuration
-	  should be unset if auxiliary registers are not shared between the cpus
-	  of the core, so there will be a need to initialize them per cpu.
diff --git a/arch/arc/plat-eznps/Makefile b/arch/arc/plat-eznps/Makefile
deleted file mode 100644
index ebb9723002cf..000000000000
--- a/arch/arc/plat-eznps/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for the linux kernel.
-#
-
-obj-y := entry.o platform.o ctop.o
-obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_EZNPS_MTM_EXT) += mtm.o
diff --git a/arch/arc/plat-eznps/ctop.c b/arch/arc/plat-eznps/ctop.c
deleted file mode 100644
index b398e6e838a9..000000000000
--- a/arch/arc/plat-eznps/ctop.c
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#include <linux/sched.h>
-#include <asm/processor.h>
-#include <plat/ctop.h>
-
-void dp_save_restore(struct task_struct *prev, struct task_struct *next)
-{
-	struct eznps_dp *prev_task_dp = &prev->thread.dp;
-	struct eznps_dp *next_task_dp = &next->thread.dp;
-
-	/* Here we save all Data Plane related auxiliary registers */
-	prev_task_dp->eflags = read_aux_reg(CTOP_AUX_EFLAGS);
-	write_aux_reg(CTOP_AUX_EFLAGS, next_task_dp->eflags);
-
-	prev_task_dp->gpa1 = read_aux_reg(CTOP_AUX_GPA1);
-	write_aux_reg(CTOP_AUX_GPA1, next_task_dp->gpa1);
-}
diff --git a/arch/arc/plat-eznps/entry.S b/arch/arc/plat-eznps/entry.S
deleted file mode 100644
index 3f18c0108e72..000000000000
--- a/arch/arc/plat-eznps/entry.S
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*******************************************************************************
-
-  EZNPS CPU startup Code
-  Copyright(c) 2012 EZchip Technologies.
-
-
-*******************************************************************************/
-#include <linux/linkage.h>
-#include <asm/entry.h>
-#include <asm/cache.h>
-#include <plat/ctop.h>
-
-	.cpu A7
-
-	.section .init.text, "ax",@progbits
-	.align 1024	; HW requierment for restart first PC
-
-ENTRY(res_service)
-#if defined(CONFIG_EZNPS_MTM_EXT) && defined(CONFIG_EZNPS_SHARED_AUX_REGS)
-	; There is no work for HW thread id != 0
-	lr	r3, [CTOP_AUX_THREAD_ID]
-	cmp	r3, 0
-	jne	stext
-#endif
-
-#ifdef CONFIG_ARC_HAS_DCACHE
-	; With no cache coherency mechanism D$ need to be used very carefully.
-	; Address space:
-	; 0G-2G: We disable CONFIG_ARC_CACHE_PAGES.
-	; 2G-3G: We disable D$ by setting this bit.
-	; 3G-4G: D$ is disabled by architecture.
-	; FMT are huge pages for user application reside at 0-2G.
-	; Only FMT left as one who can use D$ where each such page got
-	; disable/enable bit for cachability.
-	; Programmer will use FMT pages for private data so cache coherency
-	; would not be a problem.
-	; First thing we invalidate D$
-	sr	1, [ARC_REG_DC_IVDC]
-	sr	HW_COMPLY_KRN_NOT_D_CACHED, [CTOP_AUX_HW_COMPLY]
-#endif
-
-#ifdef CONFIG_SMP
-	; We set logical cpuid to be used by GET_CPUID
-	; We do not use physical cpuid since we want ids to be continious when
-	; it comes to cpus on the same quad cluster.
-	; This is useful for applications that used shared resources of a quad
-	; cluster such SRAMS.
-	lr 	r3, [CTOP_AUX_CORE_ID]
-	sr	r3, [CTOP_AUX_LOGIC_CORE_ID]
-	lr	r3, [CTOP_AUX_CLUSTER_ID]
-	; Set logical is acheived by swap of 2 middle bits of cluster id (4 bit)
-	; r3 is used since we use short instruction and we need q-class reg
-	.short	CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST
-	.word 	CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM
-	 sr	r3, [CTOP_AUX_LOGIC_CLUSTER_ID]
-#endif
-
-	j	stext
-END(res_service)
diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h
deleted file mode 100644
index a4a61531c7fb..000000000000
--- a/arch/arc/plat-eznps/include/plat/ctop.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#ifndef _PLAT_EZNPS_CTOP_H
-#define _PLAT_EZNPS_CTOP_H
-
-#ifndef CONFIG_ARC_PLAT_EZNPS
-#error "Incorrect ctop.h include"
-#endif
-
-#include <linux/bits.h>
-#include <linux/types.h>
-#include <soc/nps/common.h>
-
-/* core auxiliary registers */
-#ifdef __ASSEMBLY__
-#define CTOP_AUX_BASE				(-0x800)
-#else
-#define CTOP_AUX_BASE				0xFFFFF800
-#endif
-
-#define CTOP_AUX_GLOBAL_ID			(CTOP_AUX_BASE + 0x000)
-#define CTOP_AUX_CLUSTER_ID			(CTOP_AUX_BASE + 0x004)
-#define CTOP_AUX_CORE_ID			(CTOP_AUX_BASE + 0x008)
-#define CTOP_AUX_THREAD_ID			(CTOP_AUX_BASE + 0x00C)
-#define CTOP_AUX_LOGIC_GLOBAL_ID		(CTOP_AUX_BASE + 0x010)
-#define CTOP_AUX_LOGIC_CLUSTER_ID		(CTOP_AUX_BASE + 0x014)
-#define CTOP_AUX_LOGIC_CORE_ID			(CTOP_AUX_BASE + 0x018)
-#define CTOP_AUX_MT_CTRL			(CTOP_AUX_BASE + 0x020)
-#define CTOP_AUX_HW_COMPLY			(CTOP_AUX_BASE + 0x024)
-#define CTOP_AUX_DPC				(CTOP_AUX_BASE + 0x02C)
-#define CTOP_AUX_LPC				(CTOP_AUX_BASE + 0x030)
-#define CTOP_AUX_EFLAGS				(CTOP_AUX_BASE + 0x080)
-#define CTOP_AUX_IACK				(CTOP_AUX_BASE + 0x088)
-#define CTOP_AUX_GPA1				(CTOP_AUX_BASE + 0x08C)
-#define CTOP_AUX_UDMC				(CTOP_AUX_BASE + 0x300)
-
-/* EZchip core instructions */
-#define CTOP_INST_HWSCHD_WFT_IE12		0x3E6F7344
-#define CTOP_INST_HWSCHD_OFF_R4			0x3C6F00BF
-#define CTOP_INST_HWSCHD_RESTORE_R4		0x3E6F7103
-#define CTOP_INST_SCHD_RW			0x3E6F7004
-#define CTOP_INST_SCHD_RD			0x3E6F7084
-#define CTOP_INST_ASRI_0_R3			0x3B56003E
-#define CTOP_INST_XEX_DI_R2_R2_R3		0x4A664C00
-#define CTOP_INST_EXC_DI_R2_R2_R3		0x4A664C01
-#define CTOP_INST_AADD_DI_R2_R2_R3		0x4A664C02
-#define CTOP_INST_AAND_DI_R2_R2_R3		0x4A664C04
-#define CTOP_INST_AOR_DI_R2_R2_R3		0x4A664C05
-#define CTOP_INST_AXOR_DI_R2_R2_R3		0x4A664C06
-
-/* Do not use D$ for address in 2G-3G */
-#define HW_COMPLY_KRN_NOT_D_CACHED		BIT(28)
-
-#define NPS_MSU_EN_CFG				0x80
-#define NPS_CRG_BLKID				0x480
-#define NPS_CRG_SYNC_BIT			BIT(0)
-#define NPS_GIM_BLKID				0x5C0
-
-/* GIM registers and fields*/
-#define NPS_GIM_UART_LINE			BIT(7)
-#define NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE	BIT(10)
-#define NPS_GIM_DBG_LAN_EAST_RX_RDY_LINE	BIT(11)
-#define NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE	BIT(25)
-#define NPS_GIM_DBG_LAN_WEST_RX_RDY_LINE	BIT(26)
-
-#ifndef __ASSEMBLY__
-/* Functional registers definition */
-struct nps_host_reg_mtm_cfg {
-	union {
-		struct {
-			u32 gen:1, gdis:1, clk_gate_dis:1, asb:1,
-			__reserved:9, nat:3, ten:16;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_mtm_cpu_cfg {
-	union {
-		struct {
-			u32 csa:22, dmsid:6, __reserved:3, cs:1;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_thr_init {
-	union {
-		struct {
-			u32 str:1, __reserved:27, thr_id:4;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_thr_init_sts {
-	union {
-		struct {
-			u32 bsy:1, err:1, __reserved:26, thr_id:4;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_msu_en_cfg {
-	union {
-		struct {
-			u32     __reserved1:11,
-			rtc_en:1, ipc_en:1, gim_1_en:1,
-			gim_0_en:1, ipi_en:1, buff_e_rls_bmuw:1,
-			buff_e_alc_bmuw:1, buff_i_rls_bmuw:1, buff_i_alc_bmuw:1,
-			buff_e_rls_bmue:1, buff_e_alc_bmue:1, buff_i_rls_bmue:1,
-			buff_i_alc_bmue:1, __reserved2:1, buff_e_pre_en:1,
-			buff_i_pre_en:1, pmuw_ja_en:1, pmue_ja_en:1,
-			pmuw_nj_en:1, pmue_nj_en:1, msu_en:1;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_gim_p_int_dst {
-	union {
-		struct {
-			u32 int_out_en:1, __reserved1:4,
-			is:1, intm:2, __reserved2:4,
-			nid:4, __reserved3:4, cid:4,
-			 __reserved4:4, tid:4;
-		};
-		u32 value;
-	};
-};
-
-/* AUX registers definition */
-struct nps_host_reg_aux_dpc {
-	union {
-		struct {
-			u32 ien:1, men:1, hen:1, reserved:29;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_aux_udmc {
-	union {
-		struct {
-			u32 dcp:1, cme:1, __reserved:19, nat:3,
-			__reserved2:5, dcas:3;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_aux_mt_ctrl {
-	union {
-		struct {
-			u32 mten:1, hsen:1, scd:1, sten:1,
-			st_cnt:8, __reserved:8,
-			hs_cnt:8, __reserved1:4;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_aux_hw_comply {
-	union {
-		struct {
-			u32 me:1, le:1, te:1, knc:1, __reserved:28;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_aux_lpc {
-	union {
-		struct {
-			u32 mep:1, __reserved:31;
-		};
-		u32 value;
-	};
-};
-
-/* CRG registers */
-#define REG_GEN_PURP_0          nps_host_reg_non_cl(NPS_CRG_BLKID, 0x1BF)
-
-/* GIM registers */
-#define REG_GIM_P_INT_EN_0      nps_host_reg_non_cl(NPS_GIM_BLKID, 0x100)
-#define REG_GIM_P_INT_POL_0     nps_host_reg_non_cl(NPS_GIM_BLKID, 0x110)
-#define REG_GIM_P_INT_SENS_0    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x114)
-#define REG_GIM_P_INT_BLK_0     nps_host_reg_non_cl(NPS_GIM_BLKID, 0x118)
-#define REG_GIM_P_INT_DST_10    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x13A)
-#define REG_GIM_P_INT_DST_11    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x13B)
-#define REG_GIM_P_INT_DST_25    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x149)
-#define REG_GIM_P_INT_DST_26    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x14A)
-
-#else
-
-.macro  GET_CPU_ID  reg
-	lr  \reg, [CTOP_AUX_LOGIC_GLOBAL_ID]
-#ifndef CONFIG_EZNPS_MTM_EXT
-	lsr \reg, \reg, 4
-#endif
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _PLAT_EZNPS_CTOP_H */
diff --git a/arch/arc/plat-eznps/include/plat/mtm.h b/arch/arc/plat-eznps/include/plat/mtm.h
deleted file mode 100644
index 7c55becc891b..000000000000
--- a/arch/arc/plat-eznps/include/plat/mtm.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#ifndef _PLAT_EZNPS_MTM_H
-#define _PLAT_EZNPS_MTM_H
-
-#include <plat/ctop.h>
-
-static inline void *nps_mtm_reg_addr(u32 cpu, u32 reg)
-{
-	struct global_id gid;
-	u32 core, blkid;
-
-	gid.value = cpu;
-	core = gid.core;
-	blkid = (((core & 0x0C) << 2) | (core & 0x03));
-
-	return nps_host_reg(cpu, blkid, reg);
-}
-
-#ifdef CONFIG_EZNPS_MTM_EXT
-#define NPS_CPU_TO_THREAD_NUM(cpu) \
-	({ struct global_id gid; gid.value = cpu; gid.thread; })
-
-/* MTM registers */
-#define MTM_CFG(cpu)			nps_mtm_reg_addr(cpu, 0x81)
-#define MTM_THR_INIT(cpu)		nps_mtm_reg_addr(cpu, 0x92)
-#define MTM_THR_INIT_STS(cpu)		nps_mtm_reg_addr(cpu, 0x93)
-
-#define get_thread(map) map.thread
-#define eznps_max_cpus 4096
-#define eznps_cpus_per_cluster	256
-
-void mtm_enable_core(unsigned int cpu);
-int mtm_enable_thread(int cpu);
-#else /* !CONFIG_EZNPS_MTM_EXT */
-
-#define get_thread(map) 0
-#define eznps_max_cpus 256
-#define eznps_cpus_per_cluster	16
-#define mtm_enable_core(cpu)
-#define mtm_enable_thread(cpu) 1
-#define NPS_CPU_TO_THREAD_NUM(cpu) 0
-
-#endif /* CONFIG_EZNPS_MTM_EXT */
-
-#endif /* _PLAT_EZNPS_MTM_H */
diff --git a/arch/arc/plat-eznps/include/plat/smp.h b/arch/arc/plat-eznps/include/plat/smp.h
deleted file mode 100644
index e433f118bdca..000000000000
--- a/arch/arc/plat-eznps/include/plat/smp.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#ifndef __PLAT_EZNPS_SMP_H
-#define __PLAT_EZNPS_SMP_H
-
-#ifdef CONFIG_SMP
-
-extern void res_service(void);
-
-#endif /* CONFIG_SMP */
-
-#endif
diff --git a/arch/arc/plat-eznps/mtm.c b/arch/arc/plat-eznps/mtm.c
deleted file mode 100644
index 3dcf5a9e2976..000000000000
--- a/arch/arc/plat-eznps/mtm.c
+++ /dev/null
@@ -1,166 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/io.h>
-#include <linux/log2.h>
-#include <asm/arcregs.h>
-#include <plat/mtm.h>
-#include <plat/smp.h>
-
-#define MT_HS_CNT_MIN		0x01
-#define MT_HS_CNT_MAX		0xFF
-#define MT_CTRL_ST_CNT		0xF
-#define NPS_NUM_HW_THREADS	0x10
-
-static int mtm_hs_ctr = MT_HS_CNT_MAX;
-
-#ifdef CONFIG_EZNPS_MEM_ERROR_ALIGN
-int do_memory_error(unsigned long address, struct pt_regs *regs)
-{
-	die("Invalid Mem Access", regs, address);
-
-	return 1;
-}
-#endif
-
-static void mtm_init_nat(int cpu)
-{
-	struct nps_host_reg_mtm_cfg mtm_cfg;
-	struct nps_host_reg_aux_udmc udmc;
-	int log_nat, nat = 0, i, t;
-
-	/* Iterate core threads and update nat */
-	for (i = 0, t = cpu; i < NPS_NUM_HW_THREADS; i++, t++)
-		nat += test_bit(t, cpumask_bits(cpu_possible_mask));
-
-	log_nat = ilog2(nat);
-
-	udmc.value = read_aux_reg(CTOP_AUX_UDMC);
-	udmc.nat = log_nat;
-	write_aux_reg(CTOP_AUX_UDMC, udmc.value);
-
-	mtm_cfg.value = ioread32be(MTM_CFG(cpu));
-	mtm_cfg.nat = log_nat;
-	iowrite32be(mtm_cfg.value, MTM_CFG(cpu));
-}
-
-static void mtm_init_thread(int cpu)
-{
-	int i, tries = 5;
-	struct nps_host_reg_thr_init thr_init;
-	struct nps_host_reg_thr_init_sts thr_init_sts;
-
-	/* Set thread init register */
-	thr_init.value = 0;
-	iowrite32be(thr_init.value, MTM_THR_INIT(cpu));
-	thr_init.thr_id = NPS_CPU_TO_THREAD_NUM(cpu);
-	thr_init.str = 1;
-	iowrite32be(thr_init.value, MTM_THR_INIT(cpu));
-
-	/* Poll till thread init is done */
-	for (i = 0; i < tries; i++) {
-		thr_init_sts.value = ioread32be(MTM_THR_INIT_STS(cpu));
-		if (thr_init_sts.thr_id == thr_init.thr_id) {
-			if (thr_init_sts.bsy)
-				continue;
-			else if (thr_init_sts.err)
-				pr_warn("Failed to thread init cpu %u\n", cpu);
-			break;
-		}
-
-		pr_warn("Wrong thread id in thread init for cpu %u\n", cpu);
-		break;
-	}
-
-	if (i == tries)
-		pr_warn("Got thread init timeout for cpu %u\n", cpu);
-}
-
-int mtm_enable_thread(int cpu)
-{
-	struct nps_host_reg_mtm_cfg mtm_cfg;
-
-	if (NPS_CPU_TO_THREAD_NUM(cpu) == 0)
-		return 1;
-
-	/* Enable thread in mtm */
-	mtm_cfg.value = ioread32be(MTM_CFG(cpu));
-	mtm_cfg.ten |= (1 << (NPS_CPU_TO_THREAD_NUM(cpu)));
-	iowrite32be(mtm_cfg.value, MTM_CFG(cpu));
-
-	return 0;
-}
-
-void mtm_enable_core(unsigned int cpu)
-{
-	int i;
-	struct nps_host_reg_aux_mt_ctrl mt_ctrl;
-	struct nps_host_reg_mtm_cfg mtm_cfg;
-	struct nps_host_reg_aux_dpc dpc;
-
-	/*
-	 * Initializing dpc register in each CPU.
-	 * Overwriting the init value of the DPC
-	 * register so that CMEM and FMT virtual address
-	 * spaces are accessible, and Data Plane HW
-	 * facilities are enabled.
-	 */
-	dpc.ien = 1;
-	dpc.men = 1;
-	write_aux_reg(CTOP_AUX_DPC, dpc.value);
-
-	if (NPS_CPU_TO_THREAD_NUM(cpu) != 0)
-		return;
-
-	/* Initialize Number of Active Threads */
-	mtm_init_nat(cpu);
-
-	/* Initialize mtm_cfg */
-	mtm_cfg.value = ioread32be(MTM_CFG(cpu));
-	mtm_cfg.ten = 1;
-	iowrite32be(mtm_cfg.value, MTM_CFG(cpu));
-
-	/* Initialize all other threads in core */
-	for (i = 1; i < NPS_NUM_HW_THREADS; i++)
-		mtm_init_thread(cpu + i);
-
-
-	/* Enable HW schedule, stall counter, mtm */
-	mt_ctrl.value = 0;
-	mt_ctrl.hsen = 1;
-	mt_ctrl.hs_cnt = mtm_hs_ctr;
-	mt_ctrl.mten = 1;
-	write_aux_reg(CTOP_AUX_MT_CTRL, mt_ctrl.value);
-
-	/*
-	 * HW scheduling mechanism will start working
-	 * Only after call to instruction "schd.rw".
-	 * cpu_relax() calls "schd.rw" instruction.
-	 */
-	cpu_relax();
-}
-
-/* Verify and set the value of the mtm hs counter */
-static int __init set_mtm_hs_ctr(char *ctr_str)
-{
-	int hs_ctr;
-	int ret;
-
-	ret = kstrtoint(ctr_str, 0, &hs_ctr);
-
-	if (ret || hs_ctr > MT_HS_CNT_MAX || hs_ctr < MT_HS_CNT_MIN) {
-		pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n",
-		       hs_ctr, MT_HS_CNT_MIN, MT_HS_CNT_MAX);
-		return -EINVAL;
-	}
-
-	mtm_hs_ctr = hs_ctr;
-
-	return 0;
-}
-early_param("nps_mtm_hs_ctr", set_mtm_hs_ctr);
diff --git a/arch/arc/plat-eznps/platform.c b/arch/arc/plat-eznps/platform.c
deleted file mode 100644
index 6de2fe840043..000000000000
--- a/arch/arc/plat-eznps/platform.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#include <linux/init.h>
-#include <linux/io.h>
-#include <asm/mach_desc.h>
-#include <plat/mtm.h>
-
-static void __init eznps_configure_msu(void)
-{
-	int cpu;
-	struct nps_host_reg_msu_en_cfg msu_en_cfg = {.value = 0};
-
-	msu_en_cfg.msu_en = 1;
-	msu_en_cfg.ipi_en = 1;
-	msu_en_cfg.gim_0_en = 1;
-	msu_en_cfg.gim_1_en = 1;
-
-	/* enable IPI and GIM messages on all clusters */
-	for (cpu = 0 ; cpu < eznps_max_cpus; cpu += eznps_cpus_per_cluster)
-		iowrite32be(msu_en_cfg.value,
-			    nps_host_reg(cpu, NPS_MSU_BLKID, NPS_MSU_EN_CFG));
-}
-
-static void __init eznps_configure_gim(void)
-{
-	u32 reg_value;
-	u32 gim_int_lines;
-	struct nps_host_reg_gim_p_int_dst gim_p_int_dst = {.value = 0};
-
-	gim_int_lines = NPS_GIM_UART_LINE;
-	gim_int_lines |= NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE;
-	gim_int_lines |= NPS_GIM_DBG_LAN_EAST_RX_RDY_LINE;
-	gim_int_lines |= NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE;
-	gim_int_lines |= NPS_GIM_DBG_LAN_WEST_RX_RDY_LINE;
-
-	/*
-	 * IRQ polarity
-	 * low or high level
-	 * negative or positive edge
-	 */
-	reg_value = ioread32be(REG_GIM_P_INT_POL_0);
-	reg_value &= ~gim_int_lines;
-	iowrite32be(reg_value, REG_GIM_P_INT_POL_0);
-
-	/* IRQ type level or edge */
-	reg_value = ioread32be(REG_GIM_P_INT_SENS_0);
-	reg_value |= NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE;
-	reg_value |= NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE;
-	iowrite32be(reg_value, REG_GIM_P_INT_SENS_0);
-
-	/*
-	 * GIM interrupt select type for
-	 * dbg_lan TX and RX interrupts
-	 * should be type 1
-	 * type 0 = IRQ line 6
-	 * type 1 = IRQ line 7
-	 */
-	gim_p_int_dst.is = 1;
-	iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_10);
-	iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_11);
-	iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_25);
-	iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_26);
-
-	/*
-	 * CTOP IRQ lines should be defined
-	 * as blocking in GIM
-	*/
-	iowrite32be(gim_int_lines, REG_GIM_P_INT_BLK_0);
-
-	/* enable CTOP IRQ lines in GIM */
-	iowrite32be(gim_int_lines, REG_GIM_P_INT_EN_0);
-}
-
-static void __init eznps_early_init(void)
-{
-	eznps_configure_msu();
-	eznps_configure_gim();
-}
-
-static const char *eznps_compat[] __initconst = {
-	"ezchip,arc-nps",
-	NULL,
-};
-
-MACHINE_START(NPS, "nps")
-	.dt_compat	= eznps_compat,
-	.init_early	= eznps_early_init,
-MACHINE_END
diff --git a/arch/arc/plat-eznps/smp.c b/arch/arc/plat-eznps/smp.c
deleted file mode 100644
index f119cb7de2ae..000000000000
--- a/arch/arc/plat-eznps/smp.c
+++ /dev/null
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#include <linux/smp.h>
-#include <linux/of_fdt.h>
-#include <linux/io.h>
-#include <linux/irqdomain.h>
-#include <asm/irq.h>
-#include <plat/ctop.h>
-#include <plat/smp.h>
-#include <plat/mtm.h>
-
-#define NPS_DEFAULT_MSID	0x34
-#define NPS_MTM_CPU_CFG		0x90
-
-static char smp_cpuinfo_buf[128] = {"Extn [EZNPS-SMP]\t: On\n"};
-
-/* Get cpu map from device tree */
-static int __init eznps_get_map(const char *name, struct cpumask *cpumask)
-{
-	unsigned long dt_root = of_get_flat_dt_root();
-	const char *buf;
-
-	buf = of_get_flat_dt_prop(dt_root, name, NULL);
-	if (!buf)
-		return 1;
-
-	cpulist_parse(buf, cpumask);
-
-	return 0;
-}
-
-/* Update board cpu maps */
-static void __init eznps_init_cpumasks(void)
-{
-	struct cpumask cpumask;
-
-	if (eznps_get_map("present-cpus", &cpumask)) {
-		pr_err("Failed to get present-cpus from dtb");
-		return;
-	}
-	init_cpu_present(&cpumask);
-
-	if (eznps_get_map("possible-cpus", &cpumask)) {
-		pr_err("Failed to get possible-cpus from dtb");
-		return;
-	}
-	init_cpu_possible(&cpumask);
-}
-
-static void eznps_init_core(unsigned int cpu)
-{
-	u32 sync_value;
-	struct nps_host_reg_aux_hw_comply hw_comply;
-	struct nps_host_reg_aux_lpc lpc;
-
-	if (NPS_CPU_TO_THREAD_NUM(cpu) != 0)
-		return;
-
-	hw_comply.value = read_aux_reg(CTOP_AUX_HW_COMPLY);
-	hw_comply.me  = 1;
-	hw_comply.le  = 1;
-	hw_comply.te  = 1;
-	write_aux_reg(CTOP_AUX_HW_COMPLY, hw_comply.value);
-
-	/* Enable MMU clock */
-	lpc.mep = 1;
-	write_aux_reg(CTOP_AUX_LPC, lpc.value);
-
-	/* Boot CPU only */
-	if (!cpu) {
-		/* Write to general purpose register in CRG */
-		sync_value = ioread32be(REG_GEN_PURP_0);
-		sync_value |= NPS_CRG_SYNC_BIT;
-		iowrite32be(sync_value, REG_GEN_PURP_0);
-	}
-}
-
-/*
- * Master kick starting another CPU
- */
-static void __init eznps_smp_wakeup_cpu(int cpu, unsigned long pc)
-{
-	struct nps_host_reg_mtm_cpu_cfg cpu_cfg;
-
-	if (mtm_enable_thread(cpu) == 0)
-		return;
-
-	/* set PC, dmsid, and start CPU */
-	cpu_cfg.value = (u32)res_service;
-	cpu_cfg.dmsid = NPS_DEFAULT_MSID;
-	cpu_cfg.cs = 1;
-	iowrite32be(cpu_cfg.value, nps_mtm_reg_addr(cpu, NPS_MTM_CPU_CFG));
-}
-
-static void eznps_ipi_send(int cpu)
-{
-	struct global_id gid;
-	struct {
-		union {
-			struct {
-				u32 num:8, cluster:8, core:8, thread:8;
-			};
-			u32 value;
-		};
-	} ipi;
-
-	gid.value = cpu;
-	ipi.thread = get_thread(gid);
-	ipi.core = gid.core;
-	ipi.cluster = nps_cluster_logic_to_phys(gid.cluster);
-	ipi.num = NPS_IPI_IRQ;
-
-	__asm__ __volatile__(
-	"	mov r3, %0\n"
-	"	.word %1\n"
-	:
-	: "r"(ipi.value), "i"(CTOP_INST_ASRI_0_R3)
-	: "r3");
-}
-
-static void eznps_init_per_cpu(int cpu)
-{
-	smp_ipi_irq_setup(cpu, NPS_IPI_IRQ);
-
-	eznps_init_core(cpu);
-	mtm_enable_core(cpu);
-}
-
-struct plat_smp_ops plat_smp_ops = {
-	.info		= smp_cpuinfo_buf,
-	.init_early_smp	= eznps_init_cpumasks,
-	.cpu_kick	= eznps_smp_wakeup_cpu,
-	.ipi_send	= eznps_ipi_send,
-	.init_per_cpu	= eznps_init_per_cpu,
-};
diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig
index ce8101834518..a2d10c29fbcc 100644
--- a/arch/arc/plat-hsdk/Kconfig
+++ b/arch/arc/plat-hsdk/Kconfig
@@ -7,6 +7,8 @@ menuconfig ARC_SOC_HSDK
 	depends on ISA_ARCV2
 	select ARC_HAS_ACCL_REGS
 	select ARC_IRQ_NO_AUTOSAVE
+	select ARC_FPU_SAVE_RESTORE
 	select CLK_HSDK
+	select RESET_CONTROLLER
 	select RESET_HSDK
 	select HAVE_PCI
diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c
index 0b961a2a10b8..c4a875b22352 100644
--- a/arch/arc/plat-hsdk/platform.c
+++ b/arch/arc/plat-hsdk/platform.c
@@ -13,26 +13,10 @@
 #include <asm/io.h>
 #include <asm/mach_desc.h>
 
-int arc_hsdk_axi_dmac_coherent __section(.data) = 0;
+int arc_hsdk_axi_dmac_coherent __section(".data") = 0;
 
 #define ARC_CCM_UNUSED_ADDR	0x60000000
 
-static void __init hsdk_init_per_cpu(unsigned int cpu)
-{
-	/*
-	 * By default ICCM is mapped to 0x7z while this area is used for
-	 * kernel virtual mappings, so move it to currently unused area.
-	 */
-	if (cpuinfo_arc700[cpu].iccm.sz)
-		write_aux_reg(ARC_REG_AUX_ICCM, ARC_CCM_UNUSED_ADDR);
-
-	/*
-	 * By default DCCM is mapped to 0x8z while this area is used by kernel,
-	 * so move it to currently unused area.
-	 */
-	if (cpuinfo_arc700[cpu].dccm.sz)
-		write_aux_reg(ARC_REG_AUX_DCCM, ARC_CCM_UNUSED_ADDR);
-}
 
 #define ARC_PERIPHERAL_BASE	0xf0000000
 #define CREG_BASE		(ARC_PERIPHERAL_BASE + 0x1000)
@@ -68,7 +52,7 @@ static void __init hsdk_enable_gpio_intc_wire(void)
 	 * Current implementation of "irq-dw-apb-ictl" driver doesn't work well
 	 * with stacked INTCs. In particular problem happens if its master INTC
 	 * not yet instantiated. See discussion here -
-	 * https://lkml.org/lkml/2015/3/4/755
+	 * https://lore.kernel.org/lkml/54F6FE2C.7020309@synopsys.com
 	 *
 	 * So setup the first gpio block as a passive pass thru and hide it from
 	 * DT hardware topology - connect intc directly to cpu intc
@@ -339,5 +323,4 @@ static const char *hsdk_compat[] __initconst = {
 MACHINE_START(SIMULATION, "hsdk")
 	.dt_compat	= hsdk_compat,
 	.init_early     = hsdk_init_early,
-	.init_per_cpu	= hsdk_init_per_cpu,
 MACHINE_END