summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arc/Kconfig5
-rw-r--r--arch/arc/include/asm/cacheflush.h43
-rw-r--r--arch/arc/include/asm/entry-arcv2.h32
-rw-r--r--arch/arc/include/asm/entry-compact.h87
-rw-r--r--arch/arc/include/asm/entry.h110
-rw-r--r--arch/arc/include/asm/hugepage.h7
-rw-r--r--arch/arc/include/asm/ptrace.h14
-rw-r--r--arch/arc/kernel/setup.c4
-rw-r--r--arch/arc/kernel/signal.c6
-rw-r--r--arch/arc/mm/cache.c136
-rw-r--r--arch/arc/mm/mmap.c21
-rw-r--r--arch/arc/mm/tlb.c16
-rw-r--r--arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts4
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts4
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7s.dtsi8
-rw-r--r--arch/arm/boot/dts/nxp/mxs/imx28-xea.dts1
-rw-r--r--arch/arm/boot/dts/rockchip/rk3128.dtsi2
-rw-r--r--arch/arm/boot/dts/rockchip/rk322x.dtsi6
-rw-r--r--arch/arm/boot/dts/ti/omap/am33xx.dtsi1
-rw-r--r--arch/arm/boot/dts/ti/omap/dra7.dtsi2
-rw-r--r--arch/arm/include/asm/kexec.h4
-rw-r--r--arch/arm/kernel/Makefile2
-rw-r--r--arch/arm/mach-imx/mmdc.c7
-rw-r--r--arch/arm/mach-omap2/id.c5
-rw-r--r--arch/arm/xen/enlighten.c3
-rw-r--r--arch/arm64/Makefile2
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi3
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts3
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi5
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi8
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi11
-rw-r--r--arch/arm64/boot/dts/freescale/imx8ulp.dtsi6
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx93.dtsi10
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts12
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986a.dtsi24
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173-evb.dts4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-evb.dts4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi8
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi96
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183.dtsi242
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186.dtsi44
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195.dtsi6
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399.dtsi6
-rw-r--r--arch/arm64/boot/dts/rockchip/rk356x.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s.dtsi1
-rw-r--r--arch/arm64/include/asm/cpufeature.h5
-rw-r--r--arch/arm64/include/asm/esr.h15
-rw-r--r--arch/arm64/include/asm/kvm_arm.h63
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h34
-rw-r--r--arch/arm64/include/asm/kvm_host.h138
-rw-r--r--arch/arm64/include/asm/kvm_nested.h56
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h80
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h5
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h6
-rw-r--r--arch/arm64/include/asm/setup.h17
-rw-r--r--arch/arm64/include/asm/syscall_wrapper.h4
-rw-r--r--arch/arm64/include/asm/sysreg.h25
-rw-r--r--arch/arm64/include/asm/tlb.h15
-rw-r--r--arch/arm64/include/asm/tlbflush.h100
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h103
-rw-r--r--arch/arm64/kernel/cpufeature.c45
-rw-r--r--arch/arm64/kvm/Kconfig5
-rw-r--r--arch/arm64/kvm/arch_timer.c3
-rw-r--r--arch/arm64/kvm/arm.c14
-rw-r--r--arch/arm64/kvm/emulate-nested.c63
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h2
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h93
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h22
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c2
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c90
-rw-r--r--arch/arm64/kvm/mmu.c49
-rw-r--r--arch/arm64/kvm/nested.c22
-rw-r--r--arch/arm64/kvm/reset.c9
-rw-r--r--arch/arm64/kvm/sys_regs.c235
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c47
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c5
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c32
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c101
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c4
-rw-r--r--arch/arm64/kvm/vgic/vgic.h1
-rw-r--r--arch/arm64/mm/pageattr.c7
-rw-r--r--arch/arm64/tools/cpucaps1
-rw-r--r--arch/arm64/tools/sysreg320
-rw-r--r--arch/loongarch/Makefile5
-rw-r--r--arch/loongarch/include/asm/asmmacro.h3
-rw-r--r--arch/loongarch/include/asm/efi.h2
-rw-r--r--arch/loongarch/include/asm/elf.h2
-rw-r--r--arch/loongarch/include/asm/kvm_host.h24
-rw-r--r--arch/loongarch/include/asm/kvm_vcpu.h21
-rw-r--r--arch/loongarch/include/asm/loongarch.h5
-rw-r--r--arch/loongarch/include/asm/percpu.h11
-rw-r--r--arch/loongarch/include/asm/setup.h2
-rw-r--r--arch/loongarch/include/uapi/asm/kvm.h1
-rw-r--r--arch/loongarch/kernel/Makefile2
-rw-r--r--arch/loongarch/kernel/fpu.S2
-rw-r--r--arch/loongarch/kernel/relocate.c10
-rw-r--r--arch/loongarch/kernel/stacktrace.c2
-rw-r--r--arch/loongarch/kernel/time.c23
-rw-r--r--arch/loongarch/kernel/unwind.c1
-rw-r--r--arch/loongarch/kernel/unwind_prologue.c2
-rw-r--r--arch/loongarch/kvm/Kconfig3
-rw-r--r--arch/loongarch/kvm/exit.c50
-rw-r--r--arch/loongarch/kvm/main.c1
-rw-r--r--arch/loongarch/kvm/mmu.c124
-rw-r--r--arch/loongarch/kvm/switch.S31
-rw-r--r--arch/loongarch/kvm/timer.c129
-rw-r--r--arch/loongarch/kvm/trace.h6
-rw-r--r--arch/loongarch/kvm/vcpu.c307
-rw-r--r--arch/loongarch/mm/pgtable.c4
-rw-r--r--arch/loongarch/net/bpf_jit.c18
-rw-r--r--arch/m68k/include/asm/kexec.h4
-rw-r--r--arch/m68k/kernel/Makefile2
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi3
-rw-r--r--arch/mips/boot/dts/loongson/ls7a-pch.dtsi3
-rw-r--r--arch/mips/cavium-octeon/smp.c4
-rw-r--r--arch/mips/include/asm/kexec.h2
-rw-r--r--arch/mips/include/asm/mach-loongson64/boot_param.h9
-rw-r--r--arch/mips/include/asm/smp-ops.h2
-rw-r--r--arch/mips/include/asm/smp.h2
-rw-r--r--arch/mips/kernel/Makefile2
-rw-r--r--arch/mips/kernel/process.c25
-rw-r--r--arch/mips/kernel/smp-bmips.c4
-rw-r--r--arch/mips/kernel/smp-cps.c10
-rw-r--r--arch/mips/kernel/smp.c4
-rw-r--r--arch/mips/kvm/Kconfig4
-rw-r--r--arch/mips/loongson64/env.c10
-rw-r--r--arch/mips/loongson64/init.c47
-rw-r--r--arch/mips/loongson64/reset.c4
-rw-r--r--arch/mips/loongson64/smp.c2
-rw-r--r--arch/parisc/Kconfig13
-rw-r--r--arch/parisc/include/asm/alternative.h9
-rw-r--r--arch/parisc/include/asm/assembly.h1
-rw-r--r--arch/parisc/include/asm/bug.h38
-rw-r--r--arch/parisc/include/asm/elf.h10
-rw-r--r--arch/parisc/include/asm/jump_label.h8
-rw-r--r--arch/parisc/include/asm/ldcw.h2
-rw-r--r--arch/parisc/include/asm/processor.h2
-rw-r--r--arch/parisc/include/asm/uaccess.h1
-rw-r--r--arch/parisc/include/uapi/asm/errno.h2
-rw-r--r--arch/parisc/kernel/processor.c2
-rw-r--r--arch/parisc/kernel/sys_parisc.c2
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S1
-rw-r--r--arch/powerpc/configs/skiroot_defconfig1
-rw-r--r--arch/powerpc/kernel/fpu.S13
-rw-r--r--arch/powerpc/kernel/process.c6
-rw-r--r--arch/powerpc/kernel/trace/ftrace_entry.S4
-rw-r--r--arch/powerpc/kernel/vector.S2
-rw-r--r--arch/powerpc/kvm/Kconfig6
-rw-r--r--arch/powerpc/kvm/powerpc.c3
-rw-r--r--arch/powerpc/platforms/pseries/vas.c51
-rw-r--r--arch/powerpc/platforms/pseries/vas.h2
-rw-r--r--arch/riscv/Kconfig23
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts7
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts7
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-polarberry.dts7
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts7
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts7
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs.dtsi1
-rw-r--r--arch/riscv/boot/dts/sophgo/cv1800b.dtsi1
-rw-r--r--arch/riscv/errata/andes/errata.c20
-rw-r--r--arch/riscv/include/asm/kvm_host.h10
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_sbi.h20
-rw-r--r--arch/riscv/include/asm/paravirt.h28
-rw-r--r--arch/riscv/include/asm/paravirt_api_clock.h1
-rw-r--r--arch/riscv/include/asm/pgtable.h2
-rw-r--r--arch/riscv/include/asm/sbi.h17
-rw-r--r--arch/riscv/include/asm/syscall_wrapper.h5
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h13
-rw-r--r--arch/riscv/kernel/Makefile1
-rw-r--r--arch/riscv/kernel/crash_core.c4
-rw-r--r--arch/riscv/kernel/head.S2
-rw-r--r--arch/riscv/kernel/module.c114
-rw-r--r--arch/riscv/kernel/paravirt.c135
-rw-r--r--arch/riscv/kernel/sys_riscv.c2
-rw-r--r--arch/riscv/kernel/tests/module_test/test_uleb128.S8
-rw-r--r--arch/riscv/kernel/time.c3
-rw-r--r--arch/riscv/kernel/traps_misaligned.c6
-rw-r--r--arch/riscv/kvm/Kconfig5
-rw-r--r--arch/riscv/kvm/Makefile1
-rw-r--r--arch/riscv/kvm/aia_imsic.c13
-rw-r--r--arch/riscv/kvm/vcpu.c10
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c135
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c142
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c2
-rw-r--r--arch/riscv/kvm/vcpu_sbi_sta.c208
-rw-r--r--arch/riscv/kvm/vcpu_switch.S32
-rw-r--r--arch/riscv/kvm/vcpu_vector.c16
-rw-r--r--arch/riscv/kvm/vm.c1
-rw-r--r--arch/s390/configs/debug_defconfig10
-rw-r--r--arch/s390/configs/defconfig9
-rw-r--r--arch/s390/configs/zfcpdump_defconfig3
-rw-r--r--arch/s390/include/asm/facility.h6
-rw-r--r--arch/s390/include/asm/fpu/api.h2
-rw-r--r--arch/s390/include/asm/kvm_host.h2
-rw-r--r--arch/s390/include/asm/processor.h1
-rw-r--r--arch/s390/include/asm/syscall_wrapper.h13
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/facility.c21
-rw-r--r--arch/s390/kernel/ipl.c1
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c11
-rw-r--r--arch/s390/kernel/perf_pai_ext.c1
-rw-r--r--arch/s390/kvm/Kconfig5
-rw-r--r--arch/s390/kvm/guestdbg.c4
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/kvm/vsie.c23
-rw-r--r--arch/s390/mm/pgtable.c2
-rw-r--r--arch/sh/include/asm/kexec.h4
-rw-r--r--arch/sh/kernel/Makefile2
-rw-r--r--arch/sh/kernel/reboot.c4
-rw-r--r--arch/sh/kernel/setup.c2
-rw-r--r--arch/x86/boot/compressed/acpi.c2
-rw-r--r--arch/x86/coco/tdx/tdx.c1
-rw-r--r--arch/x86/entry/common.c93
-rw-r--r--arch/x86/entry/entry_64_compat.S77
-rw-r--r--arch/x86/events/intel/core.c2
-rw-r--r--arch/x86/hyperv/hv_init.c25
-rw-r--r--arch/x86/include/asm/acpi.h14
-rw-r--r--arch/x86/include/asm/ia32.h7
-rw-r--r--arch/x86/include/asm/idtentry.h4
-rw-r--r--arch/x86/include/asm/proto.h4
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h34
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h9
-rw-r--r--arch/x86/kernel/acpi/boot.c34
-rw-r--r--arch/x86/kernel/alternative.c14
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c39
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c15
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c17
-rw-r--r--arch/x86/kernel/cpu/microcode/internal.h14
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c5
-rw-r--r--arch/x86/kernel/head_64.S16
-rw-r--r--arch/x86/kernel/idt.c2
-rw-r--r--arch/x86/kernel/sev.c11
-rw-r--r--arch/x86/kernel/signal_64.c6
-rw-r--r--arch/x86/kvm/Kconfig7
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/debugfs.c1
-rw-r--r--arch/x86/kvm/svm/sev.c19
-rw-r--r--arch/x86/kvm/svm/svm.c9
-rw-r--r--arch/x86/kvm/svm/svm.h2
-rw-r--r--arch/x86/kvm/x86.c9
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c11
-rw-r--r--arch/x86/net/bpf_jit_comp.c46
-rw-r--r--arch/x86/xen/Kconfig1
-rw-r--r--arch/x86/xen/enlighten.c6
-rw-r--r--arch/x86/xen/enlighten_pv.c2
-rw-r--r--arch/x86/xen/xen-asm.S2
-rw-r--r--arch/x86/xen/xen-ops.h2
272 files changed, 3945 insertions, 1810 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 3162db540ee9..1b0483c51cc1 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -49,7 +49,6 @@ config ARC
select OF
select OF_EARLY_FLATTREE
select PCI_SYSCALL if PCI
- select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
select TRACE_IRQFLAGS_SUPPORT
@@ -232,10 +231,6 @@ config ARC_CACHE_PAGES
Note that Global I/D ENABLE + Per Page DISABLE works but corollary
Global DISABLE + Per Page ENABLE won't work
-config ARC_CACHE_VIPT_ALIASING
- bool "Support VIPT Aliasing D$"
- depends on ARC_HAS_DCACHE && ISA_ARCOMPACT
-
endif #ARC_CACHE
config ARC_HAS_ICCM
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index bd5b1a9a0544..563af3e75f01 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -44,31 +44,10 @@ void dma_cache_wback(phys_addr_t start, unsigned long sz);
#define flush_cache_dup_mm(mm) /* called on fork (VIVT only) */
-#ifndef CONFIG_ARC_CACHE_VIPT_ALIASING
-
#define flush_cache_mm(mm) /* called on munmap/exit */
#define flush_cache_range(mm, u_vstart, u_vend)
#define flush_cache_page(vma, u_vaddr, pfn) /* PF handling/COW-break */
-#else /* VIPT aliasing dcache */
-
-/* To clear out stale userspace mappings */
-void flush_cache_mm(struct mm_struct *mm);
-void flush_cache_range(struct vm_area_struct *vma,
- unsigned long start,unsigned long end);
-void flush_cache_page(struct vm_area_struct *vma,
- unsigned long user_addr, unsigned long page);
-
-/*
- * To make sure that userspace mapping is flushed to memory before
- * get_user_pages() uses a kernel mapping to access the page
- */
-#define ARCH_HAS_FLUSH_ANON_PAGE
-void flush_anon_page(struct vm_area_struct *vma,
- struct page *page, unsigned long u_vaddr);
-
-#endif /* CONFIG_ARC_CACHE_VIPT_ALIASING */
-
/*
* A new pagecache page has PG_arch_1 clear - thus dcache dirty by default
* This works around some PIO based drivers which don't call flush_dcache_page
@@ -76,28 +55,6 @@ void flush_anon_page(struct vm_area_struct *vma,
*/
#define PG_dc_clean PG_arch_1
-#define CACHE_COLORS_NUM 4
-#define CACHE_COLORS_MSK (CACHE_COLORS_NUM - 1)
-#define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & CACHE_COLORS_MSK)
-
-/*
- * Simple wrapper over config option
- * Bootup code ensures that hardware matches kernel configuration
- */
-static inline int cache_is_vipt_aliasing(void)
-{
- return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-}
-
-/*
- * checks if two addresses (after page aligning) index into same cache set
- */
-#define addr_not_cache_congruent(addr1, addr2) \
-({ \
- cache_is_vipt_aliasing() ? \
- (CACHE_COLOR(addr1) != CACHE_COLOR(addr2)) : 0; \
-})
-
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
do { \
memcpy(dst, src, len); \
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
index 4d13320e0c1b..3802a2daaf86 100644
--- a/arch/arc/include/asm/entry-arcv2.h
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -291,4 +291,36 @@
/* M = 8-1 N = 8 */
.endm
+.macro SAVE_ABI_CALLEE_REGS
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r18
+ push r19
+ push r20
+ push r21
+ push r22
+ push r23
+ push r24
+ push r25
+.endm
+
+.macro RESTORE_ABI_CALLEE_REGS
+ pop r25
+ pop r24
+ pop r23
+ pop r22
+ pop r21
+ pop r20
+ pop r19
+ pop r18
+ pop r17
+ pop r16
+ pop r15
+ pop r14
+ pop r13
+.endm
+
#endif
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index a0e760eb35a8..92c3e9f13252 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -33,6 +33,91 @@
#include <asm/irqflags-compact.h>
#include <asm/thread_info.h> /* For THREAD_SIZE */
+/* Note on the LD/ST addr modes with addr reg wback
+ *
+ * LD.a same as LD.aw
+ *
+ * LD.a reg1, [reg2, x] => Pre Incr
+ * Eff Addr for load = [reg2 + x]
+ *
+ * LD.ab reg1, [reg2, x] => Post Incr
+ * Eff Addr for load = [reg2]
+ */
+
+.macro PUSHAX aux
+ lr r9, [\aux]
+ push r9
+.endm
+
+.macro POPAX aux
+ pop r9
+ sr r9, [\aux]
+.endm
+
+.macro SAVE_R0_TO_R12
+ push r0
+ push r1
+ push r2
+ push r3
+ push r4
+ push r5
+ push r6
+ push r7
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+.endm
+
+.macro RESTORE_R12_TO_R0
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ pop r7
+ pop r6
+ pop r5
+ pop r4
+ pop r3
+ pop r2
+ pop r1
+ pop r0
+.endm
+
+.macro SAVE_ABI_CALLEE_REGS
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r18
+ push r19
+ push r20
+ push r21
+ push r22
+ push r23
+ push r24
+ push r25
+.endm
+
+.macro RESTORE_ABI_CALLEE_REGS
+ pop r25
+ pop r24
+ pop r23
+ pop r22
+ pop r21
+ pop r20
+ pop r19
+ pop r18
+ pop r17
+ pop r16
+ pop r15
+ pop r14
+ pop r13
+.endm
+
/*--------------------------------------------------------------
* Switch to Kernel Mode stack if SP points to User Mode stack
*
@@ -235,7 +320,7 @@
SWITCH_TO_KERNEL_STK
- PUSH 0x003\LVL\()abcd /* Dummy ECR */
+ st.a 0x003\LVL\()abcd, [sp, -4] /* Dummy ECR */
sub sp, sp, 8 /* skip orig_r0 (not needed)
skip pt_regs->sp, already saved above */
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index 49c2e090cb5c..cf1ba376e992 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -21,114 +21,12 @@
#include <asm/entry-arcv2.h>
#endif
-/* Note on the LD/ST addr modes with addr reg wback
- *
- * LD.a same as LD.aw
- *
- * LD.a reg1, [reg2, x] => Pre Incr
- * Eff Addr for load = [reg2 + x]
- *
- * LD.ab reg1, [reg2, x] => Post Incr
- * Eff Addr for load = [reg2]
- */
-
-.macro PUSH reg
- st.a \reg, [sp, -4]
-.endm
-
-.macro PUSHAX aux
- lr r9, [\aux]
- PUSH r9
-.endm
-
-.macro POP reg
- ld.ab \reg, [sp, 4]
-.endm
-
-.macro POPAX aux
- POP r9
- sr r9, [\aux]
-.endm
-
-/*--------------------------------------------------------------
- * Helpers to save/restore Scratch Regs:
- * used by Interrupt/Exception Prologue/Epilogue
- *-------------------------------------------------------------*/
-.macro SAVE_R0_TO_R12
- PUSH r0
- PUSH r1
- PUSH r2
- PUSH r3
- PUSH r4
- PUSH r5
- PUSH r6
- PUSH r7
- PUSH r8
- PUSH r9
- PUSH r10
- PUSH r11
- PUSH r12
-.endm
-
-.macro RESTORE_R12_TO_R0
- POP r12
- POP r11
- POP r10
- POP r9
- POP r8
- POP r7
- POP r6
- POP r5
- POP r4
- POP r3
- POP r2
- POP r1
- POP r0
-
-.endm
-
-/*--------------------------------------------------------------
- * Helpers to save/restore callee-saved regs:
- * used by several macros below
- *-------------------------------------------------------------*/
-.macro SAVE_R13_TO_R25
- PUSH r13
- PUSH r14
- PUSH r15
- PUSH r16
- PUSH r17
- PUSH r18
- PUSH r19
- PUSH r20
- PUSH r21
- PUSH r22
- PUSH r23
- PUSH r24
- PUSH r25
-.endm
-
-.macro RESTORE_R25_TO_R13
- POP r25
- POP r24
- POP r23
- POP r22
- POP r21
- POP r20
- POP r19
- POP r18
- POP r17
- POP r16
- POP r15
- POP r14
- POP r13
-.endm
-
/*
* save user mode callee regs as struct callee_regs
* - needed by fork/do_signal/unaligned-access-emulation.
*/
.macro SAVE_CALLEE_SAVED_USER
- SAVE_R13_TO_R25
+ SAVE_ABI_CALLEE_REGS
.endm
/*
@@ -136,18 +34,18 @@
* - could have been changed by ptrace tracer or unaligned-access fixup
*/
.macro RESTORE_CALLEE_SAVED_USER
- RESTORE_R25_TO_R13
+ RESTORE_ABI_CALLEE_REGS
.endm
/*
* save/restore kernel mode callee regs at the time of context switch
*/
.macro SAVE_CALLEE_SAVED_KERNEL
- SAVE_R13_TO_R25
+ SAVE_ABI_CALLEE_REGS
.endm
.macro RESTORE_CALLEE_SAVED_KERNEL
- RESTORE_R25_TO_R13
+ RESTORE_ABI_CALLEE_REGS
.endm
/*--------------------------------------------------------------
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index ef8d4166370c..8a2441670a8f 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -10,6 +10,13 @@
#include <linux/types.h>
#include <asm-generic/pgtable-nopmd.h>
+/*
+ * Hugetlb definitions.
+ */
+#define HPAGE_SHIFT PMD_SHIFT
+#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
+#define HPAGE_MASK (~(HPAGE_SIZE - 1))
+
static inline pte_t pmd_pte(pmd_t pmd)
{
return __pte(pmd_val(pmd));
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 4a2b30fb5a98..00b9318e551e 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -54,6 +54,10 @@ struct pt_regs {
ecr_reg ecr;
};
+struct callee_regs {
+ unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
+};
+
#define MAX_REG_OFFSET offsetof(struct pt_regs, ecr)
#else
@@ -92,16 +96,14 @@ struct pt_regs {
unsigned long status32;
};
-#define MAX_REG_OFFSET offsetof(struct pt_regs, status32)
-
-#endif
-
-/* Callee saved registers - need to be saved only when you are scheduled out */
-
struct callee_regs {
unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
};
+#define MAX_REG_OFFSET offsetof(struct pt_regs, status32)
+
+#endif
+
#define instruction_pointer(regs) ((regs)->ret)
#define profile_pc(regs) instruction_pointer(regs)
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 4dcf8589b708..d08a5092c2b4 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -153,7 +153,7 @@ static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
{
int n = 0;
#ifdef CONFIG_ISA_ARCV2
- const char *release, *cpu_nm, *isa_nm = "ARCv2";
+ const char *release = "", *cpu_nm = "HS38", *isa_nm = "ARCv2";
int dual_issue = 0, dual_enb = 0, mpy_opt, present;
int bpu_full, bpu_cache, bpu_pred, bpu_ret_stk;
char mpy_nm[16], lpb_nm[32];
@@ -172,8 +172,6 @@ static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
* releases only update it.
*/
- cpu_nm = "HS38";
-
if (info->arcver > 0x50 && info->arcver <= 0x53) {
release = arc_hs_rel[info->arcver - 0x51].str;
} else {
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index 0b3bb529d246..8f6f4a542964 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -62,7 +62,7 @@ struct rt_sigframe {
unsigned int sigret_magic;
};
-static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+static int save_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs)
{
int err = 0;
#ifndef CONFIG_ISA_ARCOMPACT
@@ -75,12 +75,12 @@ static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
#else
v2abi.r58 = v2abi.r59 = 0;
#endif
- err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi));
+ err = __copy_to_user(&mctx->v2abi, (void const *)&v2abi, sizeof(v2abi));
#endif
return err;
}
-static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+static int restore_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs)
{
int err = 0;
#ifndef CONFIG_ISA_ARCOMPACT
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index f7e05c146637..9106ceac323c 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -145,10 +145,9 @@ dc_chk:
p_dc->sz_k = 1 << (dbcr.sz - 1);
n += scnprintf(buf + n, len - n,
- "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",
+ "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s\n",
p_dc->sz_k, assoc, p_dc->line_len,
vipt ? "VIPT" : "PIPT",
- p_dc->colors > 1 ? " aliasing" : "",
IS_USED_CFG(CONFIG_ARC_HAS_DCACHE));
slc_chk:
@@ -703,51 +702,10 @@ static inline void arc_slc_enable(void)
* Exported APIs
*/
-/*
- * Handle cache congruency of kernel and userspace mappings of page when kernel
- * writes-to/reads-from
- *
- * The idea is to defer flushing of kernel mapping after a WRITE, possible if:
- * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent
- * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache)
- * -In SMP, if hardware caches are coherent
- *
- * There's a corollary case, where kernel READs from a userspace mapped page.
- * If the U-mapping is not congruent to K-mapping, former needs flushing.
- */
void flush_dcache_folio(struct folio *folio)
{
- struct address_space *mapping;
-
- if (!cache_is_vipt_aliasing()) {
- clear_bit(PG_dc_clean, &folio->flags);
- return;
- }
-
- /* don't handle anon pages here */
- mapping = folio_flush_mapping(folio);
- if (!mapping)
- return;
-
- /*
- * pagecache page, file not yet mapped to userspace
- * Make a note that K-mapping is dirty
- */
- if (!mapping_mapped(mapping)) {
- clear_bit(PG_dc_clean, &folio->flags);
- } else if (folio_mapped(folio)) {
- /* kernel reading from page with U-mapping */
- phys_addr_t paddr = (unsigned long)folio_address(folio);
- unsigned long vaddr = folio_pos(folio);
-
- /*
- * vaddr is not actually the virtual address, but is
- * congruent to every user mapping.
- */
- if (addr_not_cache_congruent(paddr, vaddr))
- __flush_dcache_pages(paddr, vaddr,
- folio_nr_pages(folio));
- }
+ clear_bit(PG_dc_clean, &folio->flags);
+ return;
}
EXPORT_SYMBOL(flush_dcache_folio);
@@ -921,44 +879,6 @@ noinline void flush_cache_all(void)
}
-#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
-
-void flush_cache_mm(struct mm_struct *mm)
-{
- flush_cache_all();
-}
-
-void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
- unsigned long pfn)
-{
- phys_addr_t paddr = pfn << PAGE_SHIFT;
-
- u_vaddr &= PAGE_MASK;
-
- __flush_dcache_pages(paddr, u_vaddr, 1);
-
- if (vma->vm_flags & VM_EXEC)
- __inv_icache_pages(paddr, u_vaddr, 1);
-}
-
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
-{
- flush_cache_all();
-}
-
-void flush_anon_page(struct vm_area_struct *vma, struct page *page,
- unsigned long u_vaddr)
-{
- /* TBD: do we really need to clear the kernel mapping */
- __flush_dcache_pages((phys_addr_t)page_address(page), u_vaddr, 1);
- __flush_dcache_pages((phys_addr_t)page_address(page),
- (phys_addr_t)page_address(page), 1);
-
-}
-
-#endif
-
void copy_user_highpage(struct page *to, struct page *from,
unsigned long u_vaddr, struct vm_area_struct *vma)
{
@@ -966,46 +886,11 @@ void copy_user_highpage(struct page *to, struct page *from,
struct folio *dst = page_folio(to);
void *kfrom = kmap_atomic(from);
void *kto = kmap_atomic(to);
- int clean_src_k_mappings = 0;
-
- /*
- * If SRC page was already mapped in userspace AND it's U-mapping is
- * not congruent with K-mapping, sync former to physical page so that
- * K-mapping in memcpy below, sees the right data
- *
- * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
- * equally valid for SRC page as well
- *
- * For !VIPT cache, all of this gets compiled out as
- * addr_not_cache_congruent() is 0
- */
- if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
- __flush_dcache_pages((unsigned long)kfrom, u_vaddr, 1);
- clean_src_k_mappings = 1;
- }
copy_page(kto, kfrom);
- /*
- * Mark DST page K-mapping as dirty for a later finalization by
- * update_mmu_cache(). Although the finalization could have been done
- * here as well (given that both vaddr/paddr are available).
- * But update_mmu_cache() already has code to do that for other
- * non copied user pages (e.g. read faults which wire in pagecache page
- * directly).
- */
clear_bit(PG_dc_clean, &dst->flags);
-
- /*
- * if SRC was already usermapped and non-congruent to kernel mapping
- * sync the kernel mapping back to physical page
- */
- if (clean_src_k_mappings) {
- __flush_dcache_pages((unsigned long)kfrom,
- (unsigned long)kfrom, 1);
- } else {
- clear_bit(PG_dc_clean, &src->flags);
- }
+ clear_bit(PG_dc_clean, &src->flags);
kunmap_atomic(kto);
kunmap_atomic(kfrom);
@@ -1140,17 +1025,8 @@ static noinline void __init arc_cache_init_master(void)
dc->line_len, L1_CACHE_BYTES);
/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
- if (is_isa_arcompact()) {
- int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-
- if (dc->colors > 1) {
- if (!handled)
- panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
- if (CACHE_COLORS_NUM != dc->colors)
- panic("CACHE_COLORS_NUM not optimized for config\n");
- } else if (handled && dc->colors == 1) {
- panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
- }
+ if (is_isa_arcompact() && dc->colors > 1) {
+ panic("Aliasing VIPT cache not supported\n");
}
}
diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
index fce5fa2b4f52..3c1c7ae73292 100644
--- a/arch/arc/mm/mmap.c
+++ b/arch/arc/mm/mmap.c
@@ -14,10 +14,6 @@
#include <asm/cacheflush.h>
-#define COLOUR_ALIGN(addr, pgoff) \
- ((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + \
- (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1)))
-
/*
* Ensure that shared mappings are correctly aligned to
* avoid aliasing issues with VIPT caches.
@@ -31,21 +27,13 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- int do_align = 0;
- int aliasing = cache_is_vipt_aliasing();
struct vm_unmapped_area_info info;
/*
- * We only need to do colour alignment if D cache aliases.
- */
- if (aliasing)
- do_align = filp || (flags & MAP_SHARED);
-
- /*
* We enforce the MAP_FIXED case.
*/
if (flags & MAP_FIXED) {
- if (aliasing && flags & MAP_SHARED &&
+ if (flags & MAP_SHARED &&
(addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))
return -EINVAL;
return addr;
@@ -55,10 +43,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
return -ENOMEM;
if (addr) {
- if (do_align)
- addr = COLOUR_ALIGN(addr, pgoff);
- else
- addr = PAGE_ALIGN(addr);
+ addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
@@ -70,7 +55,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
- info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+ info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
return vm_unmapped_area(&info);
}
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index e536b2dcd4b0..ad702b49aeb3 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -478,21 +478,15 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
create_tlb(vma, vaddr, ptep);
- if (page == ZERO_PAGE(0)) {
+ if (page == ZERO_PAGE(0))
return;
- }
/*
- * Exec page : Independent of aliasing/page-color considerations,
- * since icache doesn't snoop dcache on ARC, any dirty
- * K-mapping of a code page needs to be wback+inv so that
- * icache fetch by userspace sees code correctly.
- * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it
- * so userspace sees the right data.
- * (Avoids the flush for Non-exec + congruent mapping case)
+ * For executable pages, since icache doesn't snoop dcache, any
+ * dirty K-mapping of a code page needs to be wback+inv so that
+ * icache fetch by userspace sees code correctly.
*/
- if ((vma->vm_flags & VM_EXEC) ||
- addr_not_cache_congruent(paddr, vaddr)) {
+ if (vma->vm_flags & VM_EXEC) {
struct folio *folio = page_folio(page);
int dirty = !test_and_set_bit(PG_dc_clean, &folio->flags);
if (dirty) {
diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts
index 1ab8184302db..5a2869a18bd5 100644
--- a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts
+++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts
@@ -36,9 +36,7 @@
gpios = <&gpio 42 GPIO_ACTIVE_HIGH>;
};
-&leds {
- /delete-node/ led_act;
-};
+/delete-node/ &led_act;
&pm {
/delete-property/ system-power-controller;
diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts
index a3f247c722b4..0342a79ccd5d 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts
@@ -37,9 +37,9 @@
&clks {
assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>,
- <&clks IMX6QDL_CLK_LDB_DI1_SEL>;
+ <&clks IMX6QDL_CLK_LDB_DI1_SEL>, <&clks IMX6QDL_CLK_ENET_REF_SEL>;
assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>,
- <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>;
+ <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, <&clk50m_phy>;
};
&hdmi {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi
index 4ffe99ed55ca..07dcecbe485d 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi
@@ -121,6 +121,8 @@
max-speed = <100>;
interrupt-parent = <&gpio5>;
interrupts = <6 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&clks IMX6UL_CLK_ENET_REF>;
+ clock-names = "rmii-ref";
};
};
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
index 29b8fd03567a..5387da8a2a0a 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
@@ -454,7 +454,7 @@
};
gpt1: timer@302d0000 {
- compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
+ compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt";
reg = <0x302d0000 0x10000>;
interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_GPT1_ROOT_CLK>,
@@ -463,7 +463,7 @@
};
gpt2: timer@302e0000 {
- compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
+ compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt";
reg = <0x302e0000 0x10000>;
interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_GPT2_ROOT_CLK>,
@@ -473,7 +473,7 @@
};
gpt3: timer@302f0000 {
- compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
+ compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt";
reg = <0x302f0000 0x10000>;
interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_GPT3_ROOT_CLK>,
@@ -483,7 +483,7 @@
};
gpt4: timer@30300000 {
- compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
+ compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt";
reg = <0x30300000 0x10000>;
interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_GPT4_ROOT_CLK>,
diff --git a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts
index a400c108f66a..6c5e6856648a 100644
--- a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts
+++ b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts
@@ -8,6 +8,7 @@
#include "imx28-lwe.dtsi"
/ {
+ model = "Liebherr XEA board";
compatible = "lwn,imx28-xea", "fsl,imx28";
};
diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi
index 7bf557c99561..01edf244ddee 100644
--- a/arch/arm/boot/dts/rockchip/rk3128.dtsi
+++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi
@@ -848,7 +848,7 @@
};
sdmmc_pwren: sdmmc-pwren {
- rockchip,pins = <1 RK_PB6 1 &pcfg_pull_default>;
+ rockchip,pins = <1 RK_PB6 RK_FUNC_GPIO &pcfg_pull_default>;
};
sdmmc_bus4: sdmmc-bus4 {
diff --git a/arch/arm/boot/dts/rockchip/rk322x.dtsi b/arch/arm/boot/dts/rockchip/rk322x.dtsi
index ffc16d6b97e1..a721744cbfd1 100644
--- a/arch/arm/boot/dts/rockchip/rk322x.dtsi
+++ b/arch/arm/boot/dts/rockchip/rk322x.dtsi
@@ -215,9 +215,9 @@
power-domain@RK3228_PD_VOP {
reg = <RK3228_PD_VOP>;
- clocks =<&cru ACLK_VOP>,
- <&cru DCLK_VOP>,
- <&cru HCLK_VOP>;
+ clocks = <&cru ACLK_VOP>,
+ <&cru DCLK_VOP>,
+ <&cru HCLK_VOP>;
pm_qos = <&qos_vop>;
#power-domain-cells = <0>;
};
diff --git a/arch/arm/boot/dts/ti/omap/am33xx.dtsi b/arch/arm/boot/dts/ti/omap/am33xx.dtsi
index 1a2cd5baf402..5b9e01a8aa5d 100644
--- a/arch/arm/boot/dts/ti/omap/am33xx.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am33xx.dtsi
@@ -359,6 +359,7 @@
<SYSC_IDLE_NO>,
<SYSC_IDLE_SMART>,
<SYSC_IDLE_SMART_WKUP>;
+ ti,sysc-delay-us = <2>;
clocks = <&l3s_clkctrl AM3_L3S_USB_OTG_HS_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
diff --git a/arch/arm/boot/dts/ti/omap/dra7.dtsi b/arch/arm/boot/dts/ti/omap/dra7.dtsi
index 3f3e52e3b375..6509c742fb58 100644
--- a/arch/arm/boot/dts/ti/omap/dra7.dtsi
+++ b/arch/arm/boot/dts/ti/omap/dra7.dtsi
@@ -147,7 +147,7 @@
l3-noc@44000000 {
compatible = "ti,dra7-l3-noc";
- reg = <0x44000000 0x1000>,
+ reg = <0x44000000 0x1000000>,
<0x45000000 0x1000>;
interrupts-extended = <&crossbar_mpu GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
<&wakeupgen GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h
index e62832dcba76..a8287e7ab9d4 100644
--- a/arch/arm/include/asm/kexec.h
+++ b/arch/arm/include/asm/kexec.h
@@ -2,8 +2,6 @@
#ifndef _ARM_KEXEC_H
#define _ARM_KEXEC_H
-#ifdef CONFIG_KEXEC
-
/* Maximum physical address we can use pages from */
#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
/* Maximum address we can reach in physical address mode */
@@ -82,6 +80,4 @@ static inline struct page *boot_pfn_to_page(unsigned long boot_pfn)
#endif /* __ASSEMBLY__ */
-#endif /* CONFIG_KEXEC */
-
#endif /* _ARM_KEXEC_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index d53f56d6f840..771264d4726a 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -59,7 +59,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
# Main staffs in KPROBES are in arch/arm/probes/ .
obj-$(CONFIG_KPROBES) += patch.o insn.o
obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o
diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index 2157493b78a9..df69af932375 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -501,6 +501,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b
name = devm_kasprintf(&pdev->dev,
GFP_KERNEL, "mmdc%d", ret);
+ if (!name) {
+ ret = -ENOMEM;
+ goto pmu_release_id;
+ }
pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk;
pmu_mmdc->devtype_data = (struct fsl_mmdc_devtype_data *)of_id->data;
@@ -523,9 +527,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b
pmu_register_err:
pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret);
- ida_simple_remove(&mmdc_ida, pmu_mmdc->id);
cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node);
hrtimer_cancel(&pmu_mmdc->hrtimer);
+pmu_release_id:
+ ida_simple_remove(&mmdc_ida, pmu_mmdc->id);
pmu_free:
kfree(pmu_mmdc);
return ret;
diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c
index 98999aa8cc0c..7f387706368a 100644
--- a/arch/arm/mach-omap2/id.c
+++ b/arch/arm/mach-omap2/id.c
@@ -793,11 +793,16 @@ void __init omap_soc_device_init(void)
soc_dev_attr->machine = soc_name;
soc_dev_attr->family = omap_get_family();
+ if (!soc_dev_attr->family) {
+ kfree(soc_dev_attr);
+ return;
+ }
soc_dev_attr->revision = soc_rev;
soc_dev_attr->custom_attr_group = omap_soc_groups[0];
soc_dev = soc_device_register(soc_dev_attr);
if (IS_ERR(soc_dev)) {
+ kfree(soc_dev_attr->family);
kfree(soc_dev_attr);
return;
}
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 9afdc4c4a5dc..a395b6c0aae2 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -484,7 +484,8 @@ static int __init xen_guest_init(void)
* for secondary CPUs as they are brought up.
* For uniformity we use VCPUOP_register_vcpu_info even on cpu0.
*/
- xen_vcpu_info = alloc_percpu(struct vcpu_info);
+ xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info),
+ 1 << fls(sizeof(struct vcpu_info) - 1));
if (xen_vcpu_info == NULL)
return -ENOMEM;
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 4bd85cc0d32b..9a2d3723cd0f 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -158,7 +158,7 @@ endif
all: $(notdir $(KBUILD_IMAGE))
-
+vmlinuz.efi: Image
Image vmlinuz.efi: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi
index 15290e6892fc..fc7315b94406 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi
@@ -68,10 +68,7 @@
&emac0 {
pinctrl-names = "default";
pinctrl-0 = <&ext_rgmii_pins>;
- phy-mode = "rgmii";
phy-handle = <&ext_rgmii_phy>;
- allwinner,rx-delay-ps = <3100>;
- allwinner,tx-delay-ps = <700>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts
index d83852e72f06..b5d713926a34 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts
@@ -13,6 +13,9 @@
};
&emac0 {
+ allwinner,rx-delay-ps = <3100>;
+ allwinner,tx-delay-ps = <700>;
+ phy-mode = "rgmii";
phy-supply = <&reg_dcdce>;
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts
index 00fe28caac93..b3b1b8692125 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts
@@ -13,6 +13,8 @@
};
&emac0 {
+ allwinner,tx-delay-ps = <700>;
+ phy-mode = "rgmii-rxid";
phy-supply = <&reg_dldo1>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi
index 5ce5fbf2b38e..f69b0c17560a 100644
--- a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi
@@ -82,12 +82,9 @@
pinctrl-0 = <&pinctrl_wifi_pdn>;
gpio = <&lsio_gpio1 28 GPIO_ACTIVE_HIGH>;
enable-active-high;
+ regulator-always-on;
regulator-name = "wifi_pwrdn_fake_regulator";
regulator-settling-time-us = <100>;
-
- regulator-state-mem {
- regulator-off-in-suspend;
- };
};
reg_pcie_switch: regulator-pcie-switch {
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
index ce66d30a4839..b0bb77150adc 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
@@ -149,7 +149,7 @@ dma_subsys: bus@5a000000 {
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
- #pwm-cells = <2>;
+ #pwm-cells = <3>;
power-domains = <&pd IMX_SC_R_LCD_0_PWM_0>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
index 49ad3413db94..7e510b21bbac 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
@@ -29,7 +29,7 @@ lsio_subsys: bus@5d000000 {
<&pwm0_lpcg 1>;
assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
- #pwm-cells = <2>;
+ #pwm-cells = <3>;
interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
@@ -42,7 +42,7 @@ lsio_subsys: bus@5d000000 {
<&pwm1_lpcg 1>;
assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
- #pwm-cells = <2>;
+ #pwm-cells = <3>;
interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
@@ -55,7 +55,7 @@ lsio_subsys: bus@5d000000 {
<&pwm2_lpcg 1>;
assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
- #pwm-cells = <2>;
+ #pwm-cells = <3>;
interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
@@ -68,7 +68,7 @@ lsio_subsys: bus@5d000000 {
<&pwm3_lpcg 1>;
assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
- #pwm-cells = <2>;
+ #pwm-cells = <3>;
interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index c9a610ba4836..1264da6012f9 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -2072,6 +2072,7 @@
phys = <&usb3_phy0>, <&usb3_phy0>;
phy-names = "usb2-phy", "usb3-phy";
snps,gfladj-refclk-lpm-sel-quirk;
+ snps,parkmode-disable-ss-quirk;
};
};
@@ -2114,6 +2115,7 @@
phys = <&usb3_phy1>, <&usb3_phy1>;
phy-names = "usb2-phy", "usb3-phy";
snps,gfladj-refclk-lpm-sel-quirk;
+ snps,parkmode-disable-ss-quirk;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 4b1ce9fc1758..c6dc3ba0d43b 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -1649,6 +1649,7 @@
phys = <&usb3_phy0>, <&usb3_phy0>;
phy-names = "usb2-phy", "usb3-phy";
power-domains = <&pgc_otg1>;
+ snps,parkmode-disable-ss-quirk;
status = "disabled";
};
@@ -1680,6 +1681,7 @@
phys = <&usb3_phy1>, <&usb3_phy1>;
phy-names = "usb2-phy", "usb3-phy";
power-domains = <&pgc_otg2>;
+ snps,parkmode-disable-ss-quirk;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
index 01539df335f8..8439dd6b3935 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
@@ -96,6 +96,17 @@
status = "okay";
};
+&edma3 {
+ power-domains = <&pd IMX_SC_R_DMA_1_CH0>,
+ <&pd IMX_SC_R_DMA_1_CH1>,
+ <&pd IMX_SC_R_DMA_1_CH2>,
+ <&pd IMX_SC_R_DMA_1_CH3>,
+ <&pd IMX_SC_R_DMA_1_CH4>,
+ <&pd IMX_SC_R_DMA_1_CH5>,
+ <&pd IMX_SC_R_DMA_1_CH6>,
+ <&pd IMX_SC_R_DMA_1_CH7>;
+};
+
&flexcan1 {
fsl,clk-source = /bits/ 8 <1>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
index f22c1ac391c9..c4a0082f30d3 100644
--- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
@@ -483,7 +483,7 @@
};
};
- gpioe: gpio@2d000080 {
+ gpioe: gpio@2d000000 {
compatible = "fsl,imx8ulp-gpio";
reg = <0x2d000000 0x1000>;
gpio-controller;
@@ -498,7 +498,7 @@
gpio-ranges = <&iomuxc1 0 32 24>;
};
- gpiof: gpio@2d010080 {
+ gpiof: gpio@2d010000 {
compatible = "fsl,imx8ulp-gpio";
reg = <0x2d010000 0x1000>;
gpio-controller;
@@ -534,7 +534,7 @@
};
};
- gpiod: gpio@2e200080 {
+ gpiod: gpio@2e200000 {
compatible = "fsl,imx8ulp-gpio";
reg = <0x2e200000 0x1000>;
gpio-controller;
diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts
index f06139bdff97..3c5c67ebee5d 100644
--- a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts
+++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts
@@ -577,7 +577,7 @@
fsl,pins = <
MX93_PAD_UART2_TXD__LPUART2_TX 0x31e
MX93_PAD_UART2_RXD__LPUART2_RX 0x31e
- MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x31e
+ MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x51e
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi
index ceccf4766440..34c0540276d1 100644
--- a/arch/arm64/boot/dts/freescale/imx93.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx93.dtsi
@@ -417,7 +417,7 @@
compatible = "fsl,imx93-src-slice";
reg = <0x44462400 0x400>, <0x44465800 0x400>;
#power-domain-cells = <0>;
- clocks = <&clk IMX93_CLK_MEDIA_AXI>,
+ clocks = <&clk IMX93_CLK_NIC_MEDIA_GATE>,
<&clk IMX93_CLK_MEDIA_APB>;
};
};
@@ -957,7 +957,7 @@
};
};
- gpio2: gpio@43810080 {
+ gpio2: gpio@43810000 {
compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio";
reg = <0x43810000 0x1000>;
gpio-controller;
@@ -972,7 +972,7 @@
gpio-ranges = <&iomuxc 0 4 30>;
};
- gpio3: gpio@43820080 {
+ gpio3: gpio@43820000 {
compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio";
reg = <0x43820000 0x1000>;
gpio-controller;
@@ -988,7 +988,7 @@
<&iomuxc 26 34 2>, <&iomuxc 28 0 4>;
};
- gpio4: gpio@43830080 {
+ gpio4: gpio@43830000 {
compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio";
reg = <0x43830000 0x1000>;
gpio-controller;
@@ -1003,7 +1003,7 @@
gpio-ranges = <&iomuxc 0 38 28>, <&iomuxc 28 36 2>;
};
- gpio1: gpio@47400080 {
+ gpio1: gpio@47400000 {
compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio";
reg = <0x47400000 0x1000>;
gpio-controller;
diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
index 3b7a176b7904..c46682150e50 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
@@ -73,7 +73,7 @@
};
};
- memory {
+ memory@40000000 {
reg = <0 0x40000000 0 0x40000000>;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
index a885a3fbe456..2dc1bdc74e21 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
@@ -55,7 +55,7 @@
};
};
- memory {
+ memory@40000000 {
reg = <0 0x40000000 0 0x20000000>;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
index af4a4309bda4..b876e501216b 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
@@ -126,6 +126,7 @@
compatible = "sff,sfp";
i2c-bus = <&i2c_sfp1>;
los-gpios = <&pio 46 GPIO_ACTIVE_HIGH>;
+ maximum-power-milliwatt = <3000>;
mod-def0-gpios = <&pio 49 GPIO_ACTIVE_LOW>;
tx-disable-gpios = <&pio 20 GPIO_ACTIVE_HIGH>;
tx-fault-gpios = <&pio 7 GPIO_ACTIVE_HIGH>;
@@ -137,6 +138,7 @@
i2c-bus = <&i2c_sfp2>;
los-gpios = <&pio 31 GPIO_ACTIVE_HIGH>;
mod-def0-gpios = <&pio 47 GPIO_ACTIVE_LOW>;
+ maximum-power-milliwatt = <3000>;
tx-disable-gpios = <&pio 15 GPIO_ACTIVE_HIGH>;
tx-fault-gpios = <&pio 48 GPIO_ACTIVE_HIGH>;
};
@@ -150,16 +152,16 @@
trip = <&cpu_trip_active_high>;
};
- cpu-active-low {
+ cpu-active-med {
/* active: set fan to cooling level 1 */
cooling-device = <&fan 1 1>;
- trip = <&cpu_trip_active_low>;
+ trip = <&cpu_trip_active_med>;
};
- cpu-passive {
- /* passive: set fan to cooling level 0 */
+ cpu-active-low {
+ /* active: set fan to cooling level 0 */
cooling-device = <&fan 0 0>;
- trip = <&cpu_trip_passive>;
+ trip = <&cpu_trip_active_low>;
};
};
};
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
index 24eda00e320d..fc751e049953 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
@@ -374,6 +374,10 @@
reg = <0 0x11230000 0 0x1000>,
<0 0x11c20000 0 0x1000>;
interrupts = <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+ assigned-clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>,
+ <&topckgen CLK_TOP_EMMC_250M_SEL>;
+ assigned-clock-parents = <&apmixedsys CLK_APMIXED_MPLL>,
+ <&topckgen CLK_TOP_NET1PLL_D5_D2>;
clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>,
<&infracfg CLK_INFRA_MSDC_HCK_CK>,
<&infracfg CLK_INFRA_MSDC_CK>,
@@ -610,22 +614,34 @@
thermal-sensors = <&thermal 0>;
trips {
+ cpu_trip_crit: crit {
+ temperature = <125000>;
+ hysteresis = <2000>;
+ type = "critical";
+ };
+
+ cpu_trip_hot: hot {
+ temperature = <120000>;
+ hysteresis = <2000>;
+ type = "hot";
+ };
+
cpu_trip_active_high: active-high {
temperature = <115000>;
hysteresis = <2000>;
type = "active";
};
- cpu_trip_active_low: active-low {
+ cpu_trip_active_med: active-med {
temperature = <85000>;
hysteresis = <2000>;
type = "active";
};
- cpu_trip_passive: passive {
- temperature = <40000>;
+ cpu_trip_active_low: active-low {
+ temperature = <60000>;
hysteresis = <2000>;
- type = "passive";
+ type = "active";
};
};
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
index 5122963d8743..d258c80213b2 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
@@ -44,7 +44,7 @@
id-gpio = <&pio 16 GPIO_ACTIVE_HIGH>;
};
- usb_p1_vbus: regulator@0 {
+ usb_p1_vbus: regulator-usb-p1 {
compatible = "regulator-fixed";
regulator-name = "usb_vbus";
regulator-min-microvolt = <5000000>;
@@ -53,7 +53,7 @@
enable-active-high;
};
- usb_p0_vbus: regulator@1 {
+ usb_p0_vbus: regulator-usb-p0 {
compatible = "regulator-fixed";
regulator-name = "vbus";
regulator-min-microvolt = <5000000>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts
index ce336a48c897..77f9ab94c00b 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts
@@ -31,14 +31,14 @@
#address-cells = <2>;
#size-cells = <2>;
ranges;
- scp_mem_reserved: scp_mem_region {
+ scp_mem_reserved: memory@50000000 {
compatible = "shared-dma-pool";
reg = <0 0x50000000 0 0x2900000>;
no-map;
};
};
- ntc@0 {
+ thermal-sensor {
compatible = "murata,ncp03wf104";
pullup-uv = <1800000>;
pullup-ohm = <390000>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
index bf97b60ae4d1..820260348de9 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
@@ -91,6 +91,8 @@
&dsi0 {
status = "okay";
+ /delete-property/#size-cells;
+ /delete-property/#address-cells;
/delete-node/panel@0;
ports {
port {
@@ -441,20 +443,20 @@
};
touchscreen_pins: touchscreen-pins {
- touch_int_odl {
+ touch-int-odl {
pinmux = <PINMUX_GPIO155__FUNC_GPIO155>;
input-enable;
bias-pull-up;
};
- touch_rst_l {
+ touch-rst-l {
pinmux = <PINMUX_GPIO156__FUNC_GPIO156>;
output-high;
};
};
trackpad_pins: trackpad-pins {
- trackpad_int {
+ trackpad-int {
pinmux = <PINMUX_GPIO7__FUNC_GPIO7>;
input-enable;
bias-disable; /* pulled externally */
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
index bf7de35ffcbc..7881a27be029 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
@@ -116,7 +116,7 @@
#size-cells = <2>;
ranges;
- scp_mem_reserved: scp_mem_region {
+ scp_mem_reserved: memory@50000000 {
compatible = "shared-dma-pool";
reg = <0 0x50000000 0 0x2900000>;
no-map;
@@ -460,7 +460,7 @@
&pio {
aud_pins_default: audiopins {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO97__FUNC_I2S2_MCK>,
<PINMUX_GPIO98__FUNC_I2S2_BCK>,
<PINMUX_GPIO101__FUNC_I2S2_LRCK>,
@@ -482,7 +482,7 @@
};
aud_pins_tdm_out_on: audiotdmouton {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO169__FUNC_TDM_BCK_2ND>,
<PINMUX_GPIO170__FUNC_TDM_LRCK_2ND>,
<PINMUX_GPIO171__FUNC_TDM_DATA0_2ND>,
@@ -494,7 +494,7 @@
};
aud_pins_tdm_out_off: audiotdmoutoff {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO169__FUNC_GPIO169>,
<PINMUX_GPIO170__FUNC_GPIO170>,
<PINMUX_GPIO171__FUNC_GPIO171>,
@@ -508,13 +508,13 @@
};
bt_pins: bt-pins {
- pins_bt_en {
+ pins-bt-en {
pinmux = <PINMUX_GPIO120__FUNC_GPIO120>;
output-low;
};
};
- ec_ap_int_odl: ec_ap_int_odl {
+ ec_ap_int_odl: ec-ap-int-odl {
pins1 {
pinmux = <PINMUX_GPIO151__FUNC_GPIO151>;
input-enable;
@@ -522,7 +522,7 @@
};
};
- h1_int_od_l: h1_int_od_l {
+ h1_int_od_l: h1-int-od-l {
pins1 {
pinmux = <PINMUX_GPIO153__FUNC_GPIO153>;
input-enable;
@@ -530,7 +530,7 @@
};
i2c0_pins: i2c0 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO82__FUNC_SDA0>,
<PINMUX_GPIO83__FUNC_SCL0>;
mediatek,pull-up-adv = <3>;
@@ -539,7 +539,7 @@
};
i2c1_pins: i2c1 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO81__FUNC_SDA1>,
<PINMUX_GPIO84__FUNC_SCL1>;
mediatek,pull-up-adv = <3>;
@@ -548,7 +548,7 @@
};
i2c2_pins: i2c2 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO103__FUNC_SCL2>,
<PINMUX_GPIO104__FUNC_SDA2>;
bias-disable;
@@ -557,7 +557,7 @@
};
i2c3_pins: i2c3 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO50__FUNC_SCL3>,
<PINMUX_GPIO51__FUNC_SDA3>;
mediatek,pull-up-adv = <3>;
@@ -566,7 +566,7 @@
};
i2c4_pins: i2c4 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO105__FUNC_SCL4>,
<PINMUX_GPIO106__FUNC_SDA4>;
bias-disable;
@@ -575,7 +575,7 @@
};
i2c5_pins: i2c5 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO48__FUNC_SCL5>,
<PINMUX_GPIO49__FUNC_SDA5>;
mediatek,pull-up-adv = <3>;
@@ -584,7 +584,7 @@
};
i2c6_pins: i2c6 {
- pins_bus {
+ pins-bus {
pinmux = <PINMUX_GPIO11__FUNC_SCL6>,
<PINMUX_GPIO12__FUNC_SDA6>;
bias-disable;
@@ -592,7 +592,7 @@
};
mmc0_pins_default: mmc0-pins-default {
- pins_cmd_dat {
+ pins-cmd-dat {
pinmux = <PINMUX_GPIO123__FUNC_MSDC0_DAT0>,
<PINMUX_GPIO128__FUNC_MSDC0_DAT1>,
<PINMUX_GPIO125__FUNC_MSDC0_DAT2>,
@@ -607,13 +607,13 @@
mediatek,pull-up-adv = <01>;
};
- pins_clk {
+ pins-clk {
pinmux = <PINMUX_GPIO124__FUNC_MSDC0_CLK>;
drive-strength = <MTK_DRIVE_14mA>;
mediatek,pull-down-adv = <10>;
};
- pins_rst {
+ pins-rst {
pinmux = <PINMUX_GPIO133__FUNC_MSDC0_RSTB>;
drive-strength = <MTK_DRIVE_14mA>;
mediatek,pull-down-adv = <01>;
@@ -621,7 +621,7 @@
};
mmc0_pins_uhs: mmc0-pins-uhs {
- pins_cmd_dat {
+ pins-cmd-dat {
pinmux = <PINMUX_GPIO123__FUNC_MSDC0_DAT0>,
<PINMUX_GPIO128__FUNC_MSDC0_DAT1>,
<PINMUX_GPIO125__FUNC_MSDC0_DAT2>,
@@ -636,19 +636,19 @@
mediatek,pull-up-adv = <01>;
};
- pins_clk {
+ pins-clk {
pinmux = <PINMUX_GPIO124__FUNC_MSDC0_CLK>;
drive-strength = <MTK_DRIVE_14mA>;
mediatek,pull-down-adv = <10>;
};
- pins_ds {
+ pins-ds {
pinmux = <PINMUX_GPIO131__FUNC_MSDC0_DSL>;
drive-strength = <MTK_DRIVE_14mA>;
mediatek,pull-down-adv = <10>;
};
- pins_rst {
+ pins-rst {
pinmux = <PINMUX_GPIO133__FUNC_MSDC0_RSTB>;
drive-strength = <MTK_DRIVE_14mA>;
mediatek,pull-up-adv = <01>;
@@ -656,7 +656,7 @@
};
mmc1_pins_default: mmc1-pins-default {
- pins_cmd_dat {
+ pins-cmd-dat {
pinmux = <PINMUX_GPIO31__FUNC_MSDC1_CMD>,
<PINMUX_GPIO32__FUNC_MSDC1_DAT0>,
<PINMUX_GPIO34__FUNC_MSDC1_DAT1>,
@@ -666,7 +666,7 @@
mediatek,pull-up-adv = <10>;
};
- pins_clk {
+ pins-clk {
pinmux = <PINMUX_GPIO29__FUNC_MSDC1_CLK>;
input-enable;
mediatek,pull-down-adv = <10>;
@@ -674,7 +674,7 @@
};
mmc1_pins_uhs: mmc1-pins-uhs {
- pins_cmd_dat {
+ pins-cmd-dat {
pinmux = <PINMUX_GPIO31__FUNC_MSDC1_CMD>,
<PINMUX_GPIO32__FUNC_MSDC1_DAT0>,
<PINMUX_GPIO34__FUNC_MSDC1_DAT1>,
@@ -685,7 +685,7 @@
mediatek,pull-up-adv = <10>;
};
- pins_clk {
+ pins-clk {
pinmux = <PINMUX_GPIO29__FUNC_MSDC1_CLK>;
drive-strength = <MTK_DRIVE_8mA>;
mediatek,pull-down-adv = <10>;
@@ -693,15 +693,15 @@
};
};
- panel_pins_default: panel_pins_default {
- panel_reset {
+ panel_pins_default: panel-pins-default {
+ panel-reset {
pinmux = <PINMUX_GPIO45__FUNC_GPIO45>;
output-low;
bias-pull-up;
};
};
- pwm0_pin_default: pwm0_pin_default {
+ pwm0_pin_default: pwm0-pin-default {
pins1 {
pinmux = <PINMUX_GPIO176__FUNC_GPIO176>;
output-high;
@@ -713,14 +713,14 @@
};
scp_pins: scp {
- pins_scp_uart {
+ pins-scp-uart {
pinmux = <PINMUX_GPIO110__FUNC_TP_URXD1_AO>,
<PINMUX_GPIO112__FUNC_TP_UTXD1_AO>;
};
};
spi0_pins: spi0 {
- pins_spi {
+ pins-spi {
pinmux = <PINMUX_GPIO85__FUNC_SPI0_MI>,
<PINMUX_GPIO86__FUNC_GPIO86>,
<PINMUX_GPIO87__FUNC_SPI0_MO>,
@@ -730,7 +730,7 @@
};
spi1_pins: spi1 {
- pins_spi {
+ pins-spi {
pinmux = <PINMUX_GPIO161__FUNC_SPI1_A_MI>,
<PINMUX_GPIO162__FUNC_SPI1_A_CSB>,
<PINMUX_GPIO163__FUNC_SPI1_A_MO>,
@@ -740,20 +740,20 @@
};
spi2_pins: spi2 {
- pins_spi {
+ pins-spi {
pinmux = <PINMUX_GPIO0__FUNC_SPI2_CSB>,
<PINMUX_GPIO1__FUNC_SPI2_MO>,
<PINMUX_GPIO2__FUNC_SPI2_CLK>;
bias-disable;
};
- pins_spi_mi {
+ pins-spi-mi {
pinmux = <PINMUX_GPIO94__FUNC_SPI2_MI>;
mediatek,pull-down-adv = <00>;
};
};
spi3_pins: spi3 {
- pins_spi {
+ pins-spi {
pinmux = <PINMUX_GPIO21__FUNC_SPI3_MI>,
<PINMUX_GPIO22__FUNC_SPI3_CSB>,
<PINMUX_GPIO23__FUNC_SPI3_MO>,
@@ -763,7 +763,7 @@
};
spi4_pins: spi4 {
- pins_spi {
+ pins-spi {
pinmux = <PINMUX_GPIO17__FUNC_SPI4_MI>,
<PINMUX_GPIO18__FUNC_SPI4_CSB>,
<PINMUX_GPIO19__FUNC_SPI4_MO>,
@@ -773,7 +773,7 @@
};
spi5_pins: spi5 {
- pins_spi {
+ pins-spi {
pinmux = <PINMUX_GPIO13__FUNC_SPI5_MI>,
<PINMUX_GPIO14__FUNC_SPI5_CSB>,
<PINMUX_GPIO15__FUNC_SPI5_MO>,
@@ -783,63 +783,63 @@
};
uart0_pins_default: uart0-pins-default {
- pins_rx {
+ pins-rx {
pinmux = <PINMUX_GPIO95__FUNC_URXD0>;
input-enable;
bias-pull-up;
};
- pins_tx {
+ pins-tx {
pinmux = <PINMUX_GPIO96__FUNC_UTXD0>;
};
};
uart1_pins_default: uart1-pins-default {
- pins_rx {
+ pins-rx {
pinmux = <PINMUX_GPIO121__FUNC_URXD1>;
input-enable;
bias-pull-up;
};
- pins_tx {
+ pins-tx {
pinmux = <PINMUX_GPIO115__FUNC_UTXD1>;
};
- pins_rts {
+ pins-rts {
pinmux = <PINMUX_GPIO47__FUNC_URTS1>;
output-enable;
};
- pins_cts {
+ pins-cts {
pinmux = <PINMUX_GPIO46__FUNC_UCTS1>;
input-enable;
};
};
uart1_pins_sleep: uart1-pins-sleep {
- pins_rx {
+ pins-rx {
pinmux = <PINMUX_GPIO121__FUNC_GPIO121>;
input-enable;
bias-pull-up;
};
- pins_tx {
+ pins-tx {
pinmux = <PINMUX_GPIO115__FUNC_UTXD1>;
};
- pins_rts {
+ pins-rts {
pinmux = <PINMUX_GPIO47__FUNC_URTS1>;
output-enable;
};
- pins_cts {
+ pins-cts {
pinmux = <PINMUX_GPIO46__FUNC_UCTS1>;
input-enable;
};
};
wifi_pins_pwrseq: wifi-pins-pwrseq {
- pins_wifi_enable {
+ pins-wifi-enable {
pinmux = <PINMUX_GPIO119__FUNC_GPIO119>;
output-low;
};
};
wifi_pins_wakeup: wifi-pins-wakeup {
- pins_wifi_wakeup {
+ pins-wifi-wakeup {
pinmux = <PINMUX_GPIO113__FUNC_GPIO113>;
input-enable;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
index 5169779d01df..976dc968b3ca 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
@@ -1210,127 +1210,6 @@
nvmem-cell-names = "calibration-data";
};
- thermal_zones: thermal-zones {
- cpu_thermal: cpu-thermal {
- polling-delay-passive = <100>;
- polling-delay = <500>;
- thermal-sensors = <&thermal 0>;
- sustainable-power = <5000>;
-
- trips {
- threshold: trip-point0 {
- temperature = <68000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
- target: trip-point1 {
- temperature = <80000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
- cpu_crit: cpu-crit {
- temperature = <115000>;
- hysteresis = <2000>;
- type = "critical";
- };
- };
-
- cooling-maps {
- map0 {
- trip = <&target>;
- cooling-device = <&cpu0
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>,
- <&cpu1
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>,
- <&cpu2
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>,
- <&cpu3
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>;
- contribution = <3072>;
- };
- map1 {
- trip = <&target>;
- cooling-device = <&cpu4
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>,
- <&cpu5
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>,
- <&cpu6
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>,
- <&cpu7
- THERMAL_NO_LIMIT
- THERMAL_NO_LIMIT>;
- contribution = <1024>;
- };
- };
- };
-
- /* The tzts1 ~ tzts6 don't need to polling */
- /* The tzts1 ~ tzts6 don't need to thermal throttle */
-
- tzts1: tzts1 {
- polling-delay-passive = <0>;
- polling-delay = <0>;
- thermal-sensors = <&thermal 1>;
- sustainable-power = <5000>;
- trips {};
- cooling-maps {};
- };
-
- tzts2: tzts2 {
- polling-delay-passive = <0>;
- polling-delay = <0>;
- thermal-sensors = <&thermal 2>;
- sustainable-power = <5000>;
- trips {};
- cooling-maps {};
- };
-
- tzts3: tzts3 {
- polling-delay-passive = <0>;
- polling-delay = <0>;
- thermal-sensors = <&thermal 3>;
- sustainable-power = <5000>;
- trips {};
- cooling-maps {};
- };
-
- tzts4: tzts4 {
- polling-delay-passive = <0>;
- polling-delay = <0>;
- thermal-sensors = <&thermal 4>;
- sustainable-power = <5000>;
- trips {};
- cooling-maps {};
- };
-
- tzts5: tzts5 {
- polling-delay-passive = <0>;
- polling-delay = <0>;
- thermal-sensors = <&thermal 5>;
- sustainable-power = <5000>;
- trips {};
- cooling-maps {};
- };
-
- tztsABB: tztsABB {
- polling-delay-passive = <0>;
- polling-delay = <0>;
- thermal-sensors = <&thermal 6>;
- sustainable-power = <5000>;
- trips {};
- cooling-maps {};
- };
- };
-
pwm0: pwm@1100e000 {
compatible = "mediatek,mt8183-disp-pwm";
reg = <0 0x1100e000 0 0x1000>;
@@ -2105,4 +1984,125 @@
power-domains = <&spm MT8183_POWER_DOMAIN_CAM>;
};
};
+
+ thermal_zones: thermal-zones {
+ cpu_thermal: cpu-thermal {
+ polling-delay-passive = <100>;
+ polling-delay = <500>;
+ thermal-sensors = <&thermal 0>;
+ sustainable-power = <5000>;
+
+ trips {
+ threshold: trip-point0 {
+ temperature = <68000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ target: trip-point1 {
+ temperature = <80000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+
+ cpu_crit: cpu-crit {
+ temperature = <115000>;
+ hysteresis = <2000>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&target>;
+ cooling-device = <&cpu0
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>,
+ <&cpu1
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>,
+ <&cpu2
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>,
+ <&cpu3
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>;
+ contribution = <3072>;
+ };
+ map1 {
+ trip = <&target>;
+ cooling-device = <&cpu4
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>,
+ <&cpu5
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>,
+ <&cpu6
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>,
+ <&cpu7
+ THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>;
+ contribution = <1024>;
+ };
+ };
+ };
+
+ /* The tzts1 ~ tzts6 don't need to polling */
+ /* The tzts1 ~ tzts6 don't need to thermal throttle */
+
+ tzts1: tzts1 {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&thermal 1>;
+ sustainable-power = <5000>;
+ trips {};
+ cooling-maps {};
+ };
+
+ tzts2: tzts2 {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&thermal 2>;
+ sustainable-power = <5000>;
+ trips {};
+ cooling-maps {};
+ };
+
+ tzts3: tzts3 {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&thermal 3>;
+ sustainable-power = <5000>;
+ trips {};
+ cooling-maps {};
+ };
+
+ tzts4: tzts4 {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&thermal 4>;
+ sustainable-power = <5000>;
+ trips {};
+ cooling-maps {};
+ };
+
+ tzts5: tzts5 {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&thermal 5>;
+ sustainable-power = <5000>;
+ trips {};
+ cooling-maps {};
+ };
+
+ tztsABB: tztsABB {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&thermal 6>;
+ sustainable-power = <5000>;
+ trips {};
+ cooling-maps {};
+ };
+ };
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi
index f04ae70c470a..df0c04f2ba1d 100644
--- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi
@@ -924,7 +924,8 @@
reg = <MT8186_POWER_DOMAIN_CSIRX_TOP>;
clocks = <&topckgen CLK_TOP_SENINF>,
<&topckgen CLK_TOP_SENINF1>;
- clock-names = "csirx_top0", "csirx_top1";
+ clock-names = "subsys-csirx-top0",
+ "subsys-csirx-top1";
#power-domain-cells = <0>;
};
@@ -942,7 +943,8 @@
reg = <MT8186_POWER_DOMAIN_ADSP_AO>;
clocks = <&topckgen CLK_TOP_AUDIODSP>,
<&topckgen CLK_TOP_ADSP_BUS>;
- clock-names = "audioadsp", "adsp_bus";
+ clock-names = "audioadsp",
+ "subsys-adsp-bus";
#address-cells = <1>;
#size-cells = <0>;
#power-domain-cells = <1>;
@@ -975,8 +977,11 @@
<&mmsys CLK_MM_SMI_COMMON>,
<&mmsys CLK_MM_SMI_GALS>,
<&mmsys CLK_MM_SMI_IOMMU>;
- clock-names = "disp", "mdp", "smi_infra", "smi_common",
- "smi_gals", "smi_iommu";
+ clock-names = "disp", "mdp",
+ "subsys-smi-infra",
+ "subsys-smi-common",
+ "subsys-smi-gals",
+ "subsys-smi-iommu";
mediatek,infracfg = <&infracfg_ao>;
#address-cells = <1>;
#size-cells = <0>;
@@ -993,15 +998,17 @@
power-domain@MT8186_POWER_DOMAIN_CAM {
reg = <MT8186_POWER_DOMAIN_CAM>;
- clocks = <&topckgen CLK_TOP_CAM>,
- <&topckgen CLK_TOP_SENINF>,
+ clocks = <&topckgen CLK_TOP_SENINF>,
<&topckgen CLK_TOP_SENINF1>,
<&topckgen CLK_TOP_SENINF2>,
<&topckgen CLK_TOP_SENINF3>,
+ <&camsys CLK_CAM2MM_GALS>,
<&topckgen CLK_TOP_CAMTM>,
- <&camsys CLK_CAM2MM_GALS>;
- clock-names = "cam-top", "cam0", "cam1", "cam2",
- "cam3", "cam-tm", "gals";
+ <&topckgen CLK_TOP_CAM>;
+ clock-names = "cam0", "cam1", "cam2",
+ "cam3", "gals",
+ "subsys-cam-tm",
+ "subsys-cam-top";
mediatek,infracfg = <&infracfg_ao>;
#address-cells = <1>;
#size-cells = <0>;
@@ -1020,9 +1027,9 @@
power-domain@MT8186_POWER_DOMAIN_IMG {
reg = <MT8186_POWER_DOMAIN_IMG>;
- clocks = <&topckgen CLK_TOP_IMG1>,
- <&imgsys1 CLK_IMG1_GALS_IMG1>;
- clock-names = "img-top", "gals";
+ clocks = <&imgsys1 CLK_IMG1_GALS_IMG1>,
+ <&topckgen CLK_TOP_IMG1>;
+ clock-names = "gals", "subsys-img-top";
mediatek,infracfg = <&infracfg_ao>;
#address-cells = <1>;
#size-cells = <0>;
@@ -1041,8 +1048,11 @@
<&ipesys CLK_IPE_LARB20>,
<&ipesys CLK_IPE_SMI_SUBCOM>,
<&ipesys CLK_IPE_GALS_IPE>;
- clock-names = "ipe-top", "ipe-larb0", "ipe-larb1",
- "ipe-smi", "ipe-gals";
+ clock-names = "subsys-ipe-top",
+ "subsys-ipe-larb0",
+ "subsys-ipe-larb1",
+ "subsys-ipe-smi",
+ "subsys-ipe-gals";
mediatek,infracfg = <&infracfg_ao>;
#power-domain-cells = <0>;
};
@@ -1061,7 +1071,9 @@
clocks = <&topckgen CLK_TOP_WPE>,
<&wpesys CLK_WPE_SMI_LARB8_CK_EN>,
<&wpesys CLK_WPE_SMI_LARB8_PCLK_EN>;
- clock-names = "wpe0", "larb-ck", "larb-pclk";
+ clock-names = "wpe0",
+ "subsys-larb-ck",
+ "subsys-larb-pclk";
mediatek,infracfg = <&infracfg_ao>;
#power-domain-cells = <0>;
};
@@ -1656,7 +1668,7 @@
#address-cells = <1>;
#size-cells = <1>;
- gpu_speedbin: gpu-speed-bin@59c {
+ gpu_speedbin: gpu-speedbin@59c {
reg = <0x59c 0x4>;
bits = <0 3>;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
index dd5b89b73190..5a7cab489ff3 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
@@ -389,7 +389,7 @@
pinctrl-0 = <&i2c7_pins>;
pmic@34 {
- #interrupt-cells = <1>;
+ #interrupt-cells = <2>;
compatible = "mediatek,mt6360";
reg = <0x34>;
interrupt-controller;
diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
index 54c674c45b49..e0ac2e9f5b72 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
@@ -627,6 +627,8 @@
power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 {
reg = <MT8195_POWER_DOMAIN_VENC_CORE1>;
+ clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>;
+ clock-names = "venc1-larb";
mediatek,infracfg = <&infracfg_ao>;
#power-domain-cells = <0>;
};
@@ -689,6 +691,8 @@
power-domain@MT8195_POWER_DOMAIN_VENC {
reg = <MT8195_POWER_DOMAIN_VENC>;
+ clocks = <&vencsys CLK_VENC_LARB>;
+ clock-names = "venc0-larb";
mediatek,infracfg = <&infracfg_ao>;
#power-domain-cells = <0>;
};
@@ -2665,7 +2669,7 @@
reg = <0 0x1b010000 0 0x1000>;
mediatek,larb-id = <20>;
mediatek,smi = <&smi_common_vpp>;
- clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>,
+ clocks = <&vencsys_core1 CLK_VENC_CORE1_VENC>,
<&vencsys_core1 CLK_VENC_CORE1_GALS>,
<&vppsys0 CLK_VPP0_GALS_VDO0_VDO1_VENCSYS_CORE1>;
clock-names = "apb", "smi", "gals";
diff --git a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts
index 70b465f7c6a7..00ac59a873e8 100644
--- a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts
@@ -238,6 +238,7 @@
mt6360: pmic@34 {
compatible = "mediatek,mt6360";
reg = <0x34>;
+ interrupt-parent = <&pio>;
interrupts = <128 IRQ_TYPE_EDGE_FALLING>;
interrupt-names = "IRQB";
interrupt-controller;
diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts
index de0a1f2af983..7d4c5324c61b 100644
--- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts
+++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts
@@ -86,7 +86,7 @@
sgtl5000_clk: sgtl5000-oscillator {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <24576000>;
+ clock-frequency = <24576000>;
};
dc_12v: dc-12v-regulator {
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index e729e7a22b23..cc8209795c3e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -668,7 +668,7 @@
vdec: video-codec@ff360000 {
compatible = "rockchip,rk3328-vdec", "rockchip,rk3399-vdec";
- reg = <0x0 0xff360000 0x0 0x400>;
+ reg = <0x0 0xff360000 0x0 0x480>;
interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru ACLK_RKVDEC>, <&cru HCLK_RKVDEC>,
<&cru SCLK_VDEC_CABAC>, <&cru SCLK_VDEC_CORE>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
index 5c1929d41cc0..cacbad35cfc8 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
@@ -509,8 +509,7 @@ ap_i2c_tp: &i2c5 {
&pci_rootport {
mvl_wifi: wifi@0,0 {
compatible = "pci1b4b,2b42";
- reg = <0x83010000 0x0 0x00000000 0x0 0x00100000
- 0x83010000 0x0 0x00100000 0x0 0x00100000>;
+ reg = <0x0000 0x0 0x0 0x0 0x0>;
interrupt-parent = <&gpio0>;
interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts
index 853e88455e75..9e4b12ed62cb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts
@@ -34,8 +34,8 @@
&pci_rootport {
wifi@0,0 {
compatible = "qcom,ath10k";
- reg = <0x00010000 0x0 0x00000000 0x0 0x00000000>,
- <0x03010010 0x0 0x00000000 0x0 0x00200000>;
+ reg = <0x00000000 0x0 0x00000000 0x0 0x00000000>,
+ <0x03000010 0x0 0x00000000 0x0 0x00200000>;
qcom,ath10k-calibration-variant = "GO_DUMO";
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
index c9bf1d5c3a42..789fd0dcc88b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
@@ -489,6 +489,7 @@ ap_i2c_audio: &i2c8 {
#address-cells = <3>;
#size-cells = <2>;
ranges;
+ device_type = "pci";
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index faf02e59d6c7..da0dfb237f85 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -1109,7 +1109,9 @@
power-domain@RK3399_PD_VDU {
reg = <RK3399_PD_VDU>;
clocks = <&cru ACLK_VDU>,
- <&cru HCLK_VDU>;
+ <&cru HCLK_VDU>,
+ <&cru SCLK_VDU_CA>,
+ <&cru SCLK_VDU_CORE>;
pm_qos = <&qos_video_m1_r>,
<&qos_video_m1_w>;
#power-domain-cells = <0>;
@@ -1384,7 +1386,7 @@
vdec: video-codec@ff660000 {
compatible = "rockchip,rk3399-vdec";
- reg = <0x0 0xff660000 0x0 0x400>;
+ reg = <0x0 0xff660000 0x0 0x480>;
interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH 0>;
clocks = <&cru ACLK_VDU>, <&cru HCLK_VDU>,
<&cru SCLK_VDU_CA>, <&cru SCLK_VDU_CORE>;
diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
index 0964761e3ce9..c19c0f1b3778 100644
--- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
@@ -977,7 +977,7 @@
<GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "sys", "pmc", "msi", "legacy", "err";
+ interrupt-names = "sys", "pmc", "msg", "legacy", "err";
bus-range = <0x0 0xf>;
clocks = <&cru ACLK_PCIE20_MST>, <&cru ACLK_PCIE20_SLV>,
<&cru ACLK_PCIE20_DBI>, <&cru PCLK_PCIE20>,
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
index 9570b34aca2e..d88c0e852356 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
@@ -235,13 +235,13 @@
&pinctrl {
fan {
fan_int: fan-int {
- rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_none>;
+ rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
hym8563 {
hym8563_int: hym8563-int {
- rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
+ rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts
index 8f399c4317bd..e3a839a12dc6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts
@@ -38,7 +38,7 @@
leds {
compatible = "gpio-leds";
pinctrl-names = "default";
- pinctrl-0 =<&leds_gpio>;
+ pinctrl-0 = <&leds_gpio>;
led-1 {
gpios = <&gpio1 RK_PA2 GPIO_ACTIVE_HIGH>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi
index 63151d9d2377..30db12c4fc82 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi
@@ -369,7 +369,7 @@
emmc_data_strobe: emmc-data-strobe {
rockchip,pins =
/* emmc_data_strobe */
- <2 RK_PA2 1 &pcfg_pull_none>;
+ <2 RK_PA2 1 &pcfg_pull_down>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 7064c0e9179f..8aa0499f9b03 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -1362,7 +1362,6 @@
<GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH 0>,
<GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH 0>,
<GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>;
- interrupt-names = "ch0", "ch1", "ch2", "ch3";
rockchip,pmu = <&pmu1grf>;
};
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index f6d416fe49b0..acf109581ac0 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -819,6 +819,11 @@ static inline bool system_supports_tlb_range(void)
return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE);
}
+static inline bool system_supports_lpa2(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_LPA2);
+}
+
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index ae35939f395b..353fe08546cf 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -392,6 +392,21 @@ static inline bool esr_is_data_abort(unsigned long esr)
return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
}
+static inline bool esr_fsc_is_translation_fault(unsigned long esr)
+{
+ return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
+}
+
+static inline bool esr_fsc_is_permission_fault(unsigned long esr)
+{
+ return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM;
+}
+
+static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
+{
+ return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS;
+}
+
const char *esr_get_class_string(unsigned long esr);
#endif /* __ASSEMBLY */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index b85f46a73e21..3c6f8ba1e479 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -108,6 +108,7 @@
#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En)
/* TCR_EL2 Registers bits */
+#define TCR_EL2_DS (1UL << 32)
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
@@ -122,6 +123,7 @@
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
/* VTCR_EL2 Registers bits */
+#define VTCR_EL2_DS TCR_EL2_DS
#define VTCR_EL2_RES1 (1U << 31)
#define VTCR_EL2_HD (1 << 22)
#define VTCR_EL2_HA (1 << 21)
@@ -344,36 +346,47 @@
* Once we get to a point where the two describe the same thing, we'll
* merge the definitions. One day.
*/
-#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51))
+#define __HFGRTR_EL2_RES0 HFGxTR_EL2_RES0
#define __HFGRTR_EL2_MASK GENMASK(49, 0)
-#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
+#define __HFGRTR_EL2_nMASK ~(__HFGRTR_EL2_RES0 | __HFGRTR_EL2_MASK)
-#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \
- BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
- GENMASK(26, 25) | BIT(21) | BIT(18) | \
+/*
+ * The HFGWTR bits are a subset of HFGRTR bits. To ensure we don't miss any
+ * future additions, define __HFGWTR* macros relative to __HFGRTR* ones.
+ */
+#define __HFGRTR_ONLY_MASK (BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
+ GENMASK(26, 25) | BIT(21) | BIT(18) | \
GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
-#define __HFGWTR_EL2_MASK GENMASK(49, 0)
-#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
-
-#define __HFGITR_EL2_RES0 GENMASK(63, 57)
-#define __HFGITR_EL2_MASK GENMASK(54, 0)
-#define __HFGITR_EL2_nMASK GENMASK(56, 55)
-
-#define __HDFGRTR_EL2_RES0 (BIT(49) | BIT(42) | GENMASK(39, 38) | \
- GENMASK(21, 20) | BIT(8))
-#define __HDFGRTR_EL2_MASK ~__HDFGRTR_EL2_nMASK
-#define __HDFGRTR_EL2_nMASK GENMASK(62, 59)
-
-#define __HDFGWTR_EL2_RES0 (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \
- BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \
- BIT(22) | BIT(9) | BIT(6))
-#define __HDFGWTR_EL2_MASK ~__HDFGWTR_EL2_nMASK
-#define __HDFGWTR_EL2_nMASK GENMASK(62, 60)
+#define __HFGWTR_EL2_RES0 (__HFGRTR_EL2_RES0 | __HFGRTR_ONLY_MASK)
+#define __HFGWTR_EL2_MASK (__HFGRTR_EL2_MASK & ~__HFGRTR_ONLY_MASK)
+#define __HFGWTR_EL2_nMASK ~(__HFGWTR_EL2_RES0 | __HFGWTR_EL2_MASK)
+
+#define __HFGITR_EL2_RES0 HFGITR_EL2_RES0
+#define __HFGITR_EL2_MASK (BIT(62) | BIT(60) | GENMASK(54, 0))
+#define __HFGITR_EL2_nMASK ~(__HFGITR_EL2_RES0 | __HFGITR_EL2_MASK)
+
+#define __HDFGRTR_EL2_RES0 HDFGRTR_EL2_RES0
+#define __HDFGRTR_EL2_MASK (BIT(63) | GENMASK(58, 50) | GENMASK(48, 43) | \
+ GENMASK(41, 40) | GENMASK(37, 22) | \
+ GENMASK(19, 9) | GENMASK(7, 0))
+#define __HDFGRTR_EL2_nMASK ~(__HDFGRTR_EL2_RES0 | __HDFGRTR_EL2_MASK)
+
+#define __HDFGWTR_EL2_RES0 HDFGWTR_EL2_RES0
+#define __HDFGWTR_EL2_MASK (GENMASK(57, 52) | GENMASK(50, 48) | \
+ GENMASK(46, 44) | GENMASK(42, 41) | \
+ GENMASK(37, 35) | GENMASK(33, 31) | \
+ GENMASK(29, 23) | GENMASK(21, 10) | \
+ GENMASK(8, 7) | GENMASK(5, 0))
+#define __HDFGWTR_EL2_nMASK ~(__HDFGWTR_EL2_RES0 | __HDFGWTR_EL2_MASK)
+
+#define __HAFGRTR_EL2_RES0 HAFGRTR_EL2_RES0
+#define __HAFGRTR_EL2_MASK (GENMASK(49, 17) | GENMASK(4, 0))
+#define __HAFGRTR_EL2_nMASK ~(__HAFGRTR_EL2_RES0 | __HAFGRTR_EL2_MASK)
/* Similar definitions for HCRX_EL2 */
-#define __HCRX_EL2_RES0 (GENMASK(63, 16) | GENMASK(13, 12))
-#define __HCRX_EL2_MASK (0)
-#define __HCRX_EL2_nMASK (GENMASK(15, 14) | GENMASK(4, 0))
+#define __HCRX_EL2_RES0 HCRX_EL2_RES0
+#define __HCRX_EL2_MASK (BIT(6))
+#define __HCRX_EL2_nMASK ~(__HCRX_EL2_RES0 | __HCRX_EL2_MASK)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 78a550537b67..b804fe832184 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -17,6 +17,7 @@
#include <asm/esr.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_nested.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>
#include <asm/virt.h>
@@ -54,11 +55,6 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
-static inline bool vcpu_has_feature(const struct kvm_vcpu *vcpu, int feature)
-{
- return test_bit(feature, vcpu->kvm->arch.vcpu_features);
-}
-
#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
@@ -248,7 +244,7 @@ static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt)
static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
{
- return __is_hyp_ctxt(&vcpu->arch.ctxt);
+ return vcpu_has_nv(vcpu) && __is_hyp_ctxt(&vcpu->arch.ctxt);
}
/*
@@ -404,14 +400,25 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC;
}
-static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+static inline
+bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
+ return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu));
}
-static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu)
+static inline
+bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL;
+ return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu));
+}
+
+static inline
+u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu)
+{
+ unsigned long esr = kvm_vcpu_get_esr(vcpu);
+
+ BUG_ON(!esr_fsc_is_permission_fault(esr));
+ return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL));
}
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
@@ -454,12 +461,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
* first), then a permission fault to allow the flags
* to be set.
*/
- switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
- case ESR_ELx_FSC_PERM:
- return true;
- default:
- return false;
- }
+ return kvm_vcpu_trap_is_permission_fault(vcpu);
}
if (kvm_vcpu_trap_is_iabt(vcpu))
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 567bbc417e4b..21c57b812569 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
+#include <asm/vncr_mapping.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -306,6 +307,7 @@ struct kvm_arch {
* Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
*/
#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
+#define IDX_IDREG(idx) sys_reg(3, 0, 0, ((idx) >> 3) + 1, (idx) & Op2_mask)
#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)])
#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
u64 id_regs[KVM_ARM_ID_REG_NUM];
@@ -324,33 +326,33 @@ struct kvm_vcpu_fault_info {
u64 disr_el1; /* Deferred [SError] Status Register */
};
+/*
+ * VNCR() just places the VNCR_capable registers in the enum after
+ * __VNCR_START__, and the value (after correction) to be an 8-byte offset
+ * from the VNCR base. As we don't require the enum to be otherwise ordered,
+ * we need the terrible hack below to ensure that we correctly size the
+ * sys_regs array, no matter what.
+ *
+ * The __MAX__ macro has been lifted from Sean Eron Anderson's wonderful
+ * treasure trove of bit hacks:
+ * https://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define __MAX__(x,y) ((x) ^ (((x) ^ (y)) & -((x) < (y))))
+#define VNCR(r) \
+ __before_##r, \
+ r = __VNCR_START__ + ((VNCR_ ## r) / 8), \
+ __after_##r = __MAX__(__before_##r - 1, r)
+
enum vcpu_sysreg {
__INVALID_SYSREG__, /* 0 is reserved as an invalid value */
MPIDR_EL1, /* MultiProcessor Affinity Register */
CLIDR_EL1, /* Cache Level ID Register */
CSSELR_EL1, /* Cache Size Selection Register */
- SCTLR_EL1, /* System Control Register */
- ACTLR_EL1, /* Auxiliary Control Register */
- CPACR_EL1, /* Coprocessor Access Control */
- ZCR_EL1, /* SVE Control */
- TTBR0_EL1, /* Translation Table Base Register 0 */
- TTBR1_EL1, /* Translation Table Base Register 1 */
- TCR_EL1, /* Translation Control Register */
- TCR2_EL1, /* Extended Translation Control Register */
- ESR_EL1, /* Exception Syndrome Register */
- AFSR0_EL1, /* Auxiliary Fault Status Register 0 */
- AFSR1_EL1, /* Auxiliary Fault Status Register 1 */
- FAR_EL1, /* Fault Address Register */
- MAIR_EL1, /* Memory Attribute Indirection Register */
- VBAR_EL1, /* Vector Base Address Register */
- CONTEXTIDR_EL1, /* Context ID Register */
TPIDR_EL0, /* Thread ID, User R/W */
TPIDRRO_EL0, /* Thread ID, User R/O */
TPIDR_EL1, /* Thread ID, Privileged */
- AMAIR_EL1, /* Aux Memory Attribute Indirection Register */
CNTKCTL_EL1, /* Timer Control Register (EL1) */
PAR_EL1, /* Physical Address Register */
- MDSCR_EL1, /* Monitor Debug System Control Register */
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
OSLSR_EL1, /* OS Lock Status Register */
DISR_EL1, /* Deferred Interrupt Status Register */
@@ -381,26 +383,11 @@ enum vcpu_sysreg {
APGAKEYLO_EL1,
APGAKEYHI_EL1,
- ELR_EL1,
- SP_EL1,
- SPSR_EL1,
-
- CNTVOFF_EL2,
- CNTV_CVAL_EL0,
- CNTV_CTL_EL0,
- CNTP_CVAL_EL0,
- CNTP_CTL_EL0,
-
/* Memory Tagging Extension registers */
RGSR_EL1, /* Random Allocation Tag Seed Register */
GCR_EL1, /* Tag Control Register */
- TFSR_EL1, /* Tag Fault Status Register (EL1) */
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
- /* Permission Indirection Extension registers */
- PIR_EL1, /* Permission Indirection Register 1 (EL1) */
- PIRE0_EL1, /* Permission Indirection Register 0 (EL1) */
-
/* 32bit specific registers. */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@@ -408,21 +395,14 @@ enum vcpu_sysreg {
DBGVCR32_EL2, /* Debug Vector Catch Register */
/* EL2 registers */
- VPIDR_EL2, /* Virtualization Processor ID Register */
- VMPIDR_EL2, /* Virtualization Multiprocessor ID Register */
SCTLR_EL2, /* System Control Register (EL2) */
ACTLR_EL2, /* Auxiliary Control Register (EL2) */
- HCR_EL2, /* Hypervisor Configuration Register */
MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
- HSTR_EL2, /* Hypervisor System Trap Register */
HACR_EL2, /* Hypervisor Auxiliary Control Register */
- HCRX_EL2, /* Extended Hypervisor Configuration Register */
TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
TCR_EL2, /* Translation Control Register (EL2) */
- VTTBR_EL2, /* Virtualization Translation Table Base Register */
- VTCR_EL2, /* Virtualization Translation Control Register */
SPSR_EL2, /* EL2 saved program status register */
ELR_EL2, /* EL2 exception link register */
AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */
@@ -435,19 +415,62 @@ enum vcpu_sysreg {
VBAR_EL2, /* Vector Base Address Register (EL2) */
RVBAR_EL2, /* Reset Vector Base Address Register */
CONTEXTIDR_EL2, /* Context ID Register (EL2) */
- TPIDR_EL2, /* EL2 Software Thread ID Register */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
- HFGRTR_EL2,
- HFGWTR_EL2,
- HFGITR_EL2,
- HDFGRTR_EL2,
- HDFGWTR_EL2,
CNTHP_CTL_EL2,
CNTHP_CVAL_EL2,
CNTHV_CTL_EL2,
CNTHV_CVAL_EL2,
+ __VNCR_START__, /* Any VNCR-capable reg goes after this point */
+
+ VNCR(SCTLR_EL1),/* System Control Register */
+ VNCR(ACTLR_EL1),/* Auxiliary Control Register */
+ VNCR(CPACR_EL1),/* Coprocessor Access Control */
+ VNCR(ZCR_EL1), /* SVE Control */
+ VNCR(TTBR0_EL1),/* Translation Table Base Register 0 */
+ VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */
+ VNCR(TCR_EL1), /* Translation Control Register */
+ VNCR(TCR2_EL1), /* Extended Translation Control Register */
+ VNCR(ESR_EL1), /* Exception Syndrome Register */
+ VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */
+ VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */
+ VNCR(FAR_EL1), /* Fault Address Register */
+ VNCR(MAIR_EL1), /* Memory Attribute Indirection Register */
+ VNCR(VBAR_EL1), /* Vector Base Address Register */
+ VNCR(CONTEXTIDR_EL1), /* Context ID Register */
+ VNCR(AMAIR_EL1),/* Aux Memory Attribute Indirection Register */
+ VNCR(MDSCR_EL1),/* Monitor Debug System Control Register */
+ VNCR(ELR_EL1),
+ VNCR(SP_EL1),
+ VNCR(SPSR_EL1),
+ VNCR(TFSR_EL1), /* Tag Fault Status Register (EL1) */
+ VNCR(VPIDR_EL2),/* Virtualization Processor ID Register */
+ VNCR(VMPIDR_EL2),/* Virtualization Multiprocessor ID Register */
+ VNCR(HCR_EL2), /* Hypervisor Configuration Register */
+ VNCR(HSTR_EL2), /* Hypervisor System Trap Register */
+ VNCR(VTTBR_EL2),/* Virtualization Translation Table Base Register */
+ VNCR(VTCR_EL2), /* Virtualization Translation Control Register */
+ VNCR(TPIDR_EL2),/* EL2 Software Thread ID Register */
+ VNCR(HCRX_EL2), /* Extended Hypervisor Configuration Register */
+
+ /* Permission Indirection Extension registers */
+ VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */
+ VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */
+
+ VNCR(HFGRTR_EL2),
+ VNCR(HFGWTR_EL2),
+ VNCR(HFGITR_EL2),
+ VNCR(HDFGRTR_EL2),
+ VNCR(HDFGWTR_EL2),
+ VNCR(HAFGRTR_EL2),
+
+ VNCR(CNTVOFF_EL2),
+ VNCR(CNTV_CVAL_EL0),
+ VNCR(CNTV_CTL_EL0),
+ VNCR(CNTP_CVAL_EL0),
+ VNCR(CNTP_CTL_EL0),
+
NR_SYS_REGS /* Nothing after this line! */
};
@@ -464,6 +487,9 @@ struct kvm_cpu_context {
u64 sys_regs[NR_SYS_REGS];
struct kvm_vcpu *__hyp_running_vcpu;
+
+ /* This pointer has to be 4kB aligned. */
+ u64 *vncr_array;
};
struct kvm_host_data {
@@ -826,8 +852,19 @@ struct kvm_vcpu_arch {
* accessed by a running VCPU. For example, for userspace access or
* for system registers that are never context switched, but only
* emulated.
+ *
+ * Don't bother with VNCR-based accesses in the nVHE code, it has no
+ * business dealing with NV.
*/
-#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)])
+static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
+{
+#if !defined (__KVM_NVHE_HYPERVISOR__)
+ if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
+ r >= __VNCR_START__ && ctxt->vncr_array))
+ return &ctxt->vncr_array[r - __VNCR_START__];
+#endif
+ return (u64 *)&ctxt->sys_regs[r];
+}
#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
@@ -871,6 +908,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break;
+ case SPSR_EL1: *val = read_sysreg_s(SYS_SPSR_EL12); break;
case PAR_EL1: *val = read_sysreg_par(); break;
case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
@@ -915,6 +953,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break;
+ case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break;
case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
@@ -1175,6 +1214,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_vm_has_ran_once(kvm) \
(test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags))
+static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature)
+{
+ return test_bit(feature, ka->vcpu_features);
+}
+
+#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f))
+
int kvm_trng_call(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
extern phys_addr_t hyp_mem_base;
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 6cec8e9c6c91..4882905357f4 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -2,8 +2,9 @@
#ifndef __ARM64_KVM_NESTED_H
#define __ARM64_KVM_NESTED_H
-#include <asm/kvm_emulate.h>
+#include <linux/bitfield.h>
#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
{
@@ -12,12 +13,55 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2));
}
-extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
+/* Translation helpers from non-VHE EL2 to EL1 */
+static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2)
+{
+ return (u64)FIELD_GET(TCR_EL2_PS_MASK, tcr_el2) << TCR_IPS_SHIFT;
+}
+
+static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
+{
+ return TCR_EPD1_MASK | /* disable TTBR1_EL1 */
+ ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
+ tcr_el2_ps_to_tcr_el1_ips(tcr) |
+ (tcr & TCR_EL2_TG0_MASK) |
+ (tcr & TCR_EL2_ORGN0_MASK) |
+ (tcr & TCR_EL2_IRGN0_MASK) |
+ (tcr & TCR_EL2_T0SZ_MASK);
+}
+
+static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2)
+{
+ u64 cpacr_el1 = 0;
+
+ if (cptr_el2 & CPTR_EL2_TTA)
+ cpacr_el1 |= CPACR_ELx_TTA;
+ if (!(cptr_el2 & CPTR_EL2_TFP))
+ cpacr_el1 |= CPACR_ELx_FPEN;
+ if (!(cptr_el2 & CPTR_EL2_TZ))
+ cpacr_el1 |= CPACR_ELx_ZEN;
-struct sys_reg_params;
-struct sys_reg_desc;
+ return cpacr_el1;
+}
+
+static inline u64 translate_sctlr_el2_to_sctlr_el1(u64 val)
+{
+ /* Only preserve the minimal set of bits we support */
+ val &= (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | SCTLR_ELx_SA |
+ SCTLR_ELx_I | SCTLR_ELx_IESB | SCTLR_ELx_WXN | SCTLR_ELx_EE);
+ val |= SCTLR_EL1_RES1;
+
+ return val;
+}
+
+static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
+{
+ /* Clear the ASID field */
+ return ttbr0 & ~GENMASK_ULL(63, 48);
+}
+
+extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
-void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
- const struct sys_reg_desc *r);
+int kvm_init_nv_sysregs(struct kvm *kvm);
#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index d3e354bb8351..cfdf40f734b1 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -11,7 +11,8 @@
#include <linux/kvm_host.h>
#include <linux/types.h>
-#define KVM_PGTABLE_MAX_LEVELS 4U
+#define KVM_PGTABLE_FIRST_LEVEL -1
+#define KVM_PGTABLE_LAST_LEVEL 3
/*
* The largest supported block sizes for KVM (no 52-bit PA support):
@@ -20,17 +21,29 @@
* - 64K (level 2): 512MB
*/
#ifdef CONFIG_ARM64_4K_PAGES
-#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1U
+#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1
#else
-#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U
+#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2
#endif
+#define kvm_lpa2_is_enabled() system_supports_lpa2()
+
+static inline u64 kvm_get_parange_max(void)
+{
+ if (kvm_lpa2_is_enabled() ||
+ (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16))
+ return ID_AA64MMFR0_EL1_PARANGE_52;
+ else
+ return ID_AA64MMFR0_EL1_PARANGE_48;
+}
+
static inline u64 kvm_get_parange(u64 mmfr0)
{
+ u64 parange_max = kvm_get_parange_max();
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_EL1_PARANGE_SHIFT);
- if (parange > ID_AA64MMFR0_EL1_PARANGE_MAX)
- parange = ID_AA64MMFR0_EL1_PARANGE_MAX;
+ if (parange > parange_max)
+ parange = parange_max;
return parange;
}
@@ -41,6 +54,8 @@ typedef u64 kvm_pte_t;
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
+#define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT)
+#define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
#define KVM_PHYS_INVALID (-1ULL)
@@ -51,21 +66,34 @@ static inline bool kvm_pte_valid(kvm_pte_t pte)
static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
{
- u64 pa = pte & KVM_PTE_ADDR_MASK;
-
- if (PAGE_SHIFT == 16)
- pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
+ u64 pa;
+
+ if (kvm_lpa2_is_enabled()) {
+ pa = pte & KVM_PTE_ADDR_MASK_LPA2;
+ pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50;
+ } else {
+ pa = pte & KVM_PTE_ADDR_MASK;
+ if (PAGE_SHIFT == 16)
+ pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
+ }
return pa;
}
static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
{
- kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
-
- if (PAGE_SHIFT == 16) {
- pa &= GENMASK(51, 48);
- pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
+ kvm_pte_t pte;
+
+ if (kvm_lpa2_is_enabled()) {
+ pte = pa & KVM_PTE_ADDR_MASK_LPA2;
+ pa &= GENMASK(51, 50);
+ pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50);
+ } else {
+ pte = pa & KVM_PTE_ADDR_MASK;
+ if (PAGE_SHIFT == 16) {
+ pa &= GENMASK(51, 48);
+ pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
+ }
}
return pte;
@@ -76,28 +104,28 @@ static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte)
return __phys_to_pfn(kvm_pte_to_phys(pte));
}
-static inline u64 kvm_granule_shift(u32 level)
+static inline u64 kvm_granule_shift(s8 level)
{
- /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
+ /* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */
return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
}
-static inline u64 kvm_granule_size(u32 level)
+static inline u64 kvm_granule_size(s8 level)
{
return BIT(kvm_granule_shift(level));
}
-static inline bool kvm_level_supports_block_mapping(u32 level)
+static inline bool kvm_level_supports_block_mapping(s8 level)
{
return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
}
static inline u32 kvm_supported_block_sizes(void)
{
- u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
+ s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
u32 r = 0;
- for (; level < KVM_PGTABLE_MAX_LEVELS; level++)
+ for (; level <= KVM_PGTABLE_LAST_LEVEL; level++)
r |= BIT(kvm_granule_shift(level));
return r;
@@ -142,7 +170,7 @@ struct kvm_pgtable_mm_ops {
void* (*zalloc_page)(void *arg);
void* (*zalloc_pages_exact)(size_t size);
void (*free_pages_exact)(void *addr, size_t size);
- void (*free_unlinked_table)(void *addr, u32 level);
+ void (*free_unlinked_table)(void *addr, s8 level);
void (*get_page)(void *addr);
void (*put_page)(void *addr);
int (*page_count)(void *addr);
@@ -238,7 +266,7 @@ struct kvm_pgtable_visit_ctx {
u64 start;
u64 addr;
u64 end;
- u32 level;
+ s8 level;
enum kvm_pgtable_walk_flags flags;
};
@@ -341,7 +369,7 @@ static inline bool kvm_pgtable_walk_lock_held(void)
*/
struct kvm_pgtable {
u32 ia_bits;
- u32 start_level;
+ s8 start_level;
kvm_pteref_t pgd;
struct kvm_pgtable_mm_ops *mm_ops;
@@ -475,7 +503,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
* The page-table is assumed to be unreachable by any hardware walkers prior to
* freeing and therefore no TLB invalidation is performed.
*/
-void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
/**
* kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
@@ -499,7 +527,7 @@ void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *p
* an ERR_PTR(error) on failure.
*/
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
- u64 phys, u32 level,
+ u64 phys, s8 level,
enum kvm_pgtable_prot prot,
void *mc, bool force_pte);
@@ -725,7 +753,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
- kvm_pte_t *ptep, u32 *level);
+ kvm_pte_t *ptep, s8 *level);
/**
* kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index e46250a02017..ad9cfb5c1ff4 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -56,10 +56,11 @@ static inline unsigned long hyp_vm_table_pages(void)
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
{
- unsigned long total = 0, i;
+ unsigned long total = 0;
+ int i;
/* Provision the worst case scenario */
- for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
+ for (i = KVM_PGTABLE_FIRST_LEVEL; i <= KVM_PGTABLE_LAST_LEVEL; i++) {
nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
total += nr_pages;
}
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index e9624f6326dd..483dbfa39c4c 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -71,6 +71,8 @@ extern bool arm64_use_ng_mappings;
#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
+#define lpa2_is_enabled() false
+
/*
* If we have userspace only BTI we don't want to mark kernel pages
* guarded even if the system does support BTI.
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b19a8aee684c..79ce70fbb751 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -834,6 +834,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
+ /*
+ * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware
+ * dirtiness again.
+ */
+ if (pte_sw_dirty(pte))
+ pte = pte_mkdirty(pte);
return pte;
}
diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
index f4af547ef54c..2e4d7da74fb8 100644
--- a/arch/arm64/include/asm/setup.h
+++ b/arch/arm64/include/asm/setup.h
@@ -21,9 +21,22 @@ static inline bool arch_parse_debug_rodata(char *arg)
extern bool rodata_enabled;
extern bool rodata_full;
- if (arg && !strcmp(arg, "full")) {
+ if (!arg)
+ return false;
+
+ if (!strcmp(arg, "full")) {
+ rodata_enabled = rodata_full = true;
+ return true;
+ }
+
+ if (!strcmp(arg, "off")) {
+ rodata_enabled = rodata_full = false;
+ return true;
+ }
+
+ if (!strcmp(arg, "on")) {
rodata_enabled = true;
- rodata_full = true;
+ rodata_full = false;
return true;
}
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index d977713ec0ba..abb57bc54305 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -44,9 +44,6 @@
return sys_ni_syscall(); \
}
-#define COMPAT_SYS_NI(name) \
- SYSCALL_ALIAS(__arm64_compat_sys_##name, sys_ni_posix_timers);
-
#endif /* CONFIG_COMPAT */
#define __SYSCALL_DEFINEx(x, name, ...) \
@@ -81,6 +78,5 @@
}
asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused);
-#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers);
#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 5e65f51c10d2..c3b19b376c86 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -645,6 +645,7 @@
#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2)
#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
@@ -781,10 +782,16 @@
#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6)
/* Misc instructions */
+#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4)
+#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5)
+#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6)
+#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0)
+
#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4)
#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5)
#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4)
#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5)
+#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6)
#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
/* Common SCTLR_ELx flags. */
@@ -871,10 +878,12 @@
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1
+#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf
#define ARM64_MIN_PARANGE_BITS 32
@@ -882,6 +891,7 @@
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7
#ifdef CONFIG_ARM64_PA_BITS_52
@@ -892,11 +902,13 @@
#if defined(CONFIG_ARM64_4K_PAGES)
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
#elif defined(CONFIG_ARM64_16K_PAGES)
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
@@ -1039,6 +1051,19 @@
#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
+/*
+ * Permission Overlay Extension (POE) permission encodings.
+ */
+#define POE_NONE UL(0x0)
+#define POE_R UL(0x1)
+#define POE_X UL(0x2)
+#define POE_RX UL(0x3)
+#define POE_W UL(0x4)
+#define POE_RW UL(0x5)
+#define POE_XW UL(0x6)
+#define POE_RXW UL(0x7)
+#define POE_MASK UL(0xf)
+
#define ARM64_FEATURE_FIELD_BITS 4
/* Defined for compatibility only, do not add new users. */
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 846c563689a8..0150deb332af 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -22,15 +22,15 @@ static void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
/*
- * get the tlbi levels in arm64. Default value is 0 if more than one
- * of cleared_* is set or neither is set.
- * Arm64 doesn't support p4ds now.
+ * get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than
+ * one of cleared_* is set or neither is set - this elides the level hinting to
+ * the hardware.
*/
static inline int tlb_get_level(struct mmu_gather *tlb)
{
/* The TTL field is only valid for the leaf entry. */
if (tlb->freed_tables)
- return 0;
+ return TLBI_TTL_UNKNOWN;
if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
tlb->cleared_puds ||
@@ -47,7 +47,12 @@ static inline int tlb_get_level(struct mmu_gather *tlb)
tlb->cleared_p4ds))
return 1;
- return 0;
+ if (tlb->cleared_p4ds && !(tlb->cleared_ptes ||
+ tlb->cleared_pmds ||
+ tlb->cleared_puds))
+ return 0;
+
+ return TLBI_TTL_UNKNOWN;
}
static inline void tlb_flush(struct mmu_gather *tlb)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bb2c2833a987..1deb5d789c2e 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -94,19 +94,22 @@ static inline unsigned long get_trans_granule(void)
* When ARMv8.4-TTL exists, TLBI operations take an additional hint for
* the level at which the invalidation must take place. If the level is
* wrong, no invalidation may take place. In the case where the level
- * cannot be easily determined, a 0 value for the level parameter will
- * perform a non-hinted invalidation.
+ * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform
+ * a non-hinted invalidation. Any provided level outside the hint range
+ * will also cause fall-back to non-hinted invalidation.
*
* For Stage-2 invalidation, use the level values provided to that effect
* in asm/stage2_pgtable.h.
*/
#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
+#define TLBI_TTL_UNKNOWN INT_MAX
+
#define __tlbi_level(op, addr, level) do { \
u64 arg = addr; \
\
if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \
- level) { \
+ level >= 0 && level <= 3) { \
u64 ttl = level & 3; \
ttl |= get_trans_granule() << 2; \
arg &= ~TLBI_TTL_MASK; \
@@ -122,28 +125,34 @@ static inline unsigned long get_trans_granule(void)
} while (0)
/*
- * This macro creates a properly formatted VA operand for the TLB RANGE.
- * The value bit assignments are:
+ * This macro creates a properly formatted VA operand for the TLB RANGE. The
+ * value bit assignments are:
*
* +----------+------+-------+-------+-------+----------------------+
* | ASID | TG | SCALE | NUM | TTL | BADDR |
* +-----------------+-------+-------+-------+----------------------+
* |63 48|47 46|45 44|43 39|38 37|36 0|
*
- * The address range is determined by below formula:
- * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
+ * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) *
+ * 2^(5*SCALE + 1) * PAGESIZE)
+ *
+ * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR
+ * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI
+ * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA,
+ * EL1, Inner Shareable".
*
*/
-#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \
- ({ \
- unsigned long __ta = (addr) >> PAGE_SHIFT; \
- __ta &= GENMASK_ULL(36, 0); \
- __ta |= (unsigned long)(ttl) << 37; \
- __ta |= (unsigned long)(num) << 39; \
- __ta |= (unsigned long)(scale) << 44; \
- __ta |= get_trans_granule() << 46; \
- __ta |= (unsigned long)(asid) << 48; \
- __ta; \
+#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \
+ ({ \
+ unsigned long __ta = (baddr); \
+ unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \
+ __ta &= GENMASK_ULL(36, 0); \
+ __ta |= __ttl << 37; \
+ __ta |= (unsigned long)(num) << 39; \
+ __ta |= (unsigned long)(scale) << 44; \
+ __ta |= get_trans_granule() << 46; \
+ __ta |= (unsigned long)(asid) << 48; \
+ __ta; \
})
/* These macros are used by the TLBI RANGE feature. */
@@ -216,12 +225,16 @@ static inline unsigned long get_trans_granule(void)
* CPUs, ensuring that any walk-cache entries associated with the
* translation are also invalidated.
*
- * __flush_tlb_range(vma, start, end, stride, last_level)
+ * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level)
* Invalidate the virtual-address range '[start, end)' on all
* CPUs for the user address space corresponding to 'vma->mm'.
* The invalidation operations are issued at a granularity
* determined by 'stride' and only affect any walk-cache entries
- * if 'last_level' is equal to false.
+ * if 'last_level' is equal to false. tlb_level is the level at
+ * which the invalidation must take place. If the level is wrong,
+ * no invalidation may take place. In the case where the level
+ * cannot be easily determined, the value TLBI_TTL_UNKNOWN will
+ * perform a non-hinted invalidation.
*
*
* Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
@@ -345,34 +358,44 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
* @tlb_level: Translation Table level hint, if known
* @tlbi_user: If 'true', call an additional __tlbi_user()
* (typically for user ASIDs). 'flase' for IPA instructions
+ * @lpa2: If 'true', the lpa2 scheme is used as set out below
*
* When the CPU does not support TLB range operations, flush the TLB
* entries one by one at the granularity of 'stride'. If the TLB
* range ops are supported, then:
*
- * 1. If 'pages' is odd, flush the first page through non-range
- * operations;
+ * 1. If FEAT_LPA2 is in use, the start address of a range operation must be
+ * 64KB aligned, so flush pages one by one until the alignment is reached
+ * using the non-range operations. This step is skipped if LPA2 is not in
+ * use.
+ *
+ * 2. The minimum range granularity is decided by 'scale', so multiple range
+ * TLBI operations may be required. Start from scale = 3, flush the largest
+ * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the
+ * requested range, then decrement scale and continue until one or zero pages
+ * are left. We must start from highest scale to ensure 64KB start alignment
+ * is maintained in the LPA2 case.
*
- * 2. For remaining pages: the minimum range granularity is decided
- * by 'scale', so multiple range TLBI operations may be required.
- * Start from scale = 0, flush the corresponding number of pages
- * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
- * until no pages left.
+ * 3. If there is 1 page remaining, flush it through non-range operations. Range
+ * operations can only span an even number of pages. We save this for last to
+ * ensure 64KB start alignment is maintained for the LPA2 case.
*
* Note that certain ranges can be represented by either num = 31 and
* scale or num = 0 and scale + 1. The loop below favours the latter
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
#define __flush_tlb_range_op(op, start, pages, stride, \
- asid, tlb_level, tlbi_user) \
+ asid, tlb_level, tlbi_user, lpa2) \
do { \
int num = 0; \
- int scale = 0; \
+ int scale = 3; \
+ int shift = lpa2 ? 16 : PAGE_SHIFT; \
unsigned long addr; \
\
while (pages > 0) { \
if (!system_supports_tlb_range() || \
- pages % 2 == 1) { \
+ pages == 1 || \
+ (lpa2 && start != ALIGN(start, SZ_64K))) { \
addr = __TLBI_VADDR(start, asid); \
__tlbi_level(op, addr, tlb_level); \
if (tlbi_user) \
@@ -384,20 +407,20 @@ do { \
\
num = __TLBI_RANGE_NUM(pages, scale); \
if (num >= 0) { \
- addr = __TLBI_VADDR_RANGE(start, asid, scale, \
- num, tlb_level); \
+ addr = __TLBI_VADDR_RANGE(start >> shift, asid, \
+ scale, num, tlb_level); \
__tlbi(r##op, addr); \
if (tlbi_user) \
__tlbi_user(r##op, addr); \
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
pages -= __TLBI_RANGE_PAGES(num, scale); \
} \
- scale++; \
+ scale--; \
} \
} while (0)
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
- __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
+ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
@@ -427,9 +450,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
asid = ASID(vma->vm_mm);
if (last_level)
- __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
+ __flush_tlb_range_op(vale1is, start, pages, stride, asid,
+ tlb_level, true, lpa2_is_enabled());
else
- __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
+ __flush_tlb_range_op(vae1is, start, pages, stride, asid,
+ tlb_level, true, lpa2_is_enabled());
dsb(ish);
mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
@@ -441,9 +466,10 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
/*
* We cannot use leaf-only invalidation here, since we may be invalidating
* table entries as part of collapsing hugepages or moving page tables.
- * Set the tlb_level to 0 because we can not get enough information here.
+ * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough
+ * information here.
*/
- __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
}
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
new file mode 100644
index 000000000000..df2c47c55972
--- /dev/null
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * System register offsets in the VNCR page
+ * All offsets are *byte* displacements!
+ */
+
+#ifndef __ARM64_VNCR_MAPPING_H__
+#define __ARM64_VNCR_MAPPING_H__
+
+#define VNCR_VTTBR_EL2 0x020
+#define VNCR_VTCR_EL2 0x040
+#define VNCR_VMPIDR_EL2 0x050
+#define VNCR_CNTVOFF_EL2 0x060
+#define VNCR_HCR_EL2 0x078
+#define VNCR_HSTR_EL2 0x080
+#define VNCR_VPIDR_EL2 0x088
+#define VNCR_TPIDR_EL2 0x090
+#define VNCR_HCRX_EL2 0x0A0
+#define VNCR_VNCR_EL2 0x0B0
+#define VNCR_CPACR_EL1 0x100
+#define VNCR_CONTEXTIDR_EL1 0x108
+#define VNCR_SCTLR_EL1 0x110
+#define VNCR_ACTLR_EL1 0x118
+#define VNCR_TCR_EL1 0x120
+#define VNCR_AFSR0_EL1 0x128
+#define VNCR_AFSR1_EL1 0x130
+#define VNCR_ESR_EL1 0x138
+#define VNCR_MAIR_EL1 0x140
+#define VNCR_AMAIR_EL1 0x148
+#define VNCR_MDSCR_EL1 0x158
+#define VNCR_SPSR_EL1 0x160
+#define VNCR_CNTV_CVAL_EL0 0x168
+#define VNCR_CNTV_CTL_EL0 0x170
+#define VNCR_CNTP_CVAL_EL0 0x178
+#define VNCR_CNTP_CTL_EL0 0x180
+#define VNCR_SCXTNUM_EL1 0x188
+#define VNCR_TFSR_EL1 0x190
+#define VNCR_HFGRTR_EL2 0x1B8
+#define VNCR_HFGWTR_EL2 0x1C0
+#define VNCR_HFGITR_EL2 0x1C8
+#define VNCR_HDFGRTR_EL2 0x1D0
+#define VNCR_HDFGWTR_EL2 0x1D8
+#define VNCR_ZCR_EL1 0x1E0
+#define VNCR_HAFGRTR_EL2 0x1E8
+#define VNCR_TTBR0_EL1 0x200
+#define VNCR_TTBR1_EL1 0x210
+#define VNCR_FAR_EL1 0x220
+#define VNCR_ELR_EL1 0x230
+#define VNCR_SP_EL1 0x240
+#define VNCR_VBAR_EL1 0x250
+#define VNCR_TCR2_EL1 0x270
+#define VNCR_PIRE0_EL1 0x290
+#define VNCR_PIRE0_EL2 0x298
+#define VNCR_PIR_EL1 0x2A0
+#define VNCR_ICH_LR0_EL2 0x400
+#define VNCR_ICH_LR1_EL2 0x408
+#define VNCR_ICH_LR2_EL2 0x410
+#define VNCR_ICH_LR3_EL2 0x418
+#define VNCR_ICH_LR4_EL2 0x420
+#define VNCR_ICH_LR5_EL2 0x428
+#define VNCR_ICH_LR6_EL2 0x430
+#define VNCR_ICH_LR7_EL2 0x438
+#define VNCR_ICH_LR8_EL2 0x440
+#define VNCR_ICH_LR9_EL2 0x448
+#define VNCR_ICH_LR10_EL2 0x450
+#define VNCR_ICH_LR11_EL2 0x458
+#define VNCR_ICH_LR12_EL2 0x460
+#define VNCR_ICH_LR13_EL2 0x468
+#define VNCR_ICH_LR14_EL2 0x470
+#define VNCR_ICH_LR15_EL2 0x478
+#define VNCR_ICH_AP0R0_EL2 0x480
+#define VNCR_ICH_AP0R1_EL2 0x488
+#define VNCR_ICH_AP0R2_EL2 0x490
+#define VNCR_ICH_AP0R3_EL2 0x498
+#define VNCR_ICH_AP1R0_EL2 0x4A0
+#define VNCR_ICH_AP1R1_EL2 0x4A8
+#define VNCR_ICH_AP1R2_EL2 0x4B0
+#define VNCR_ICH_AP1R3_EL2 0x4B8
+#define VNCR_ICH_HCR_EL2 0x4C0
+#define VNCR_ICH_VMCR_EL2 0x4C8
+#define VNCR_VDISR_EL2 0x500
+#define VNCR_PMBLIMITR_EL1 0x800
+#define VNCR_PMBPTR_EL1 0x810
+#define VNCR_PMBSR_EL1 0x820
+#define VNCR_PMSCR_EL1 0x828
+#define VNCR_PMSEVFR_EL1 0x830
+#define VNCR_PMSICR_EL1 0x838
+#define VNCR_PMSIRR_EL1 0x840
+#define VNCR_PMSLATFR_EL1 0x848
+#define VNCR_TRFCR_EL1 0x880
+#define VNCR_MPAM1_EL1 0x900
+#define VNCR_MPAMHCR_EL2 0x930
+#define VNCR_MPAMVPMV_EL2 0x938
+#define VNCR_MPAMVPM0_EL2 0x940
+#define VNCR_MPAMVPM1_EL2 0x948
+#define VNCR_MPAMVPM2_EL2 0x950
+#define VNCR_MPAMVPM3_EL2 0x958
+#define VNCR_MPAMVPM4_EL2 0x960
+#define VNCR_MPAMVPM5_EL2 0x968
+#define VNCR_MPAMVPM6_EL2 0x970
+#define VNCR_MPAMVPM7_EL2 0x978
+
+#endif /* __ARM64_VNCR_MAPPING_H__ */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 646591c67e7a..1322652a22de 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1768,6 +1768,39 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
return !meltdown_safe;
}
+#if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
+static bool has_lpa2_at_stage1(u64 mmfr0)
+{
+ unsigned int tgran;
+
+ tgran = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+ return tgran == ID_AA64MMFR0_EL1_TGRAN_LPA2;
+}
+
+static bool has_lpa2_at_stage2(u64 mmfr0)
+{
+ unsigned int tgran;
+
+ tgran = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_2_SHIFT);
+ return tgran == ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2;
+}
+
+static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 mmfr0;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ return has_lpa2_at_stage1(mmfr0) && has_lpa2_at_stage2(mmfr0);
+}
+#else
+static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
@@ -1839,6 +1872,10 @@ static int __init __kpti_install_ng_mappings(void *__unused)
static void __init kpti_install_ng_mappings(void)
{
+ /* Check whether KPTI is going to be used */
+ if (!cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0))
+ return;
+
/*
* We don't need to rewrite the page-tables if either we've done
* it already or we have KASLR enabled and therefore have not
@@ -2339,7 +2376,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_NESTED_VIRT,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_nested_virt_support,
- ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, IMP)
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2)
},
{
.capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
@@ -2731,6 +2768,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
},
+ {
+ .desc = "52-bit Virtual Addressing for KVM (LPA2)",
+ .capability = ARM64_HAS_LPA2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_lpa2,
+ },
{},
};
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 1a777715199f..6c3c8ca73e7f 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -21,16 +21,14 @@ if VIRTUALIZATION
menuconfig KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
+ select KVM_COMMON
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_GENERIC_MMU_NOTIFIER
- select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_XFER_TO_GUEST_WORK
select KVM_VFIO
- select HAVE_KVM_EVENTFD
- select HAVE_KVM_IRQFD
select HAVE_KVM_DIRTY_RING_ACQ_REL
select NEED_KVM_DIRTY_RING_WITH_BITMAP
select HAVE_KVM_MSI
@@ -41,7 +39,6 @@ menuconfig KVM
select HAVE_KVM_VCPU_RUN_PID_CHANGE
select SCHED_INFO
select GUEST_PERF_EVENTS if PERF_EVENTS
- select INTERVAL_TREE
select XARRAY_MULTI
help
Support hosting virtualized guest machines.
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 13ba691b848f..9dec8c419bf4 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -295,8 +295,7 @@ static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
struct arch_timer_context *ctx;
- ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu)
- : vcpu_vtimer(vcpu);
+ ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu);
return kvm_counter_compute_delta(ctx, val);
}
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e5f75f1f1085..a25265aca432 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -221,7 +221,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = vgic_present;
break;
case KVM_CAP_IOEVENTFD:
- case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
@@ -410,7 +409,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
kvm_timer_vcpu_terminate(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
-
+ kvm_vgic_vcpu_destroy(vcpu);
kvm_arm_vcpu_destroy(vcpu);
}
@@ -669,6 +668,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
return ret;
}
+ if (vcpu_has_nv(vcpu)) {
+ ret = kvm_init_nv_sysregs(vcpu->kvm);
+ if (ret)
+ return ret;
+ }
+
ret = kvm_timer_enable(vcpu);
if (ret)
return ret;
@@ -1837,6 +1842,7 @@ static int kvm_init_vector_slots(void)
static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
{
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
+ u64 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
unsigned long tcr;
/*
@@ -1859,6 +1865,10 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
}
tcr &= ~TCR_T0SZ_MASK;
tcr |= TCR_T0SZ(hyp_va_bits);
+ tcr &= ~TCR_EL2_PS_MASK;
+ tcr |= FIELD_PREP(TCR_EL2_PS_MASK, kvm_get_parange(mmfr0));
+ if (kvm_lpa2_is_enabled())
+ tcr |= TCR_EL2_DS;
params->tcr_el2 = tcr;
params->pgd_pa = kvm_mmu_get_httbr();
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 06185216a297..431fd429932d 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -1012,6 +1012,7 @@ enum fgt_group_id {
HDFGRTR_GROUP,
HDFGWTR_GROUP,
HFGITR_GROUP,
+ HAFGRTR_GROUP,
/* Must be last */
__NR_FGT_GROUP_IDS__
@@ -1042,10 +1043,20 @@ enum fg_filter_id {
static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
/* HFGRTR_EL2, HFGWTR_EL2 */
+ SR_FGT(SYS_AMAIR2_EL1, HFGxTR, nAMAIR2_EL1, 0),
+ SR_FGT(SYS_MAIR2_EL1, HFGxTR, nMAIR2_EL1, 0),
+ SR_FGT(SYS_S2POR_EL1, HFGxTR, nS2POR_EL1, 0),
+ SR_FGT(SYS_POR_EL1, HFGxTR, nPOR_EL1, 0),
+ SR_FGT(SYS_POR_EL0, HFGxTR, nPOR_EL0, 0),
SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0),
SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0),
+ SR_FGT(SYS_RCWMASK_EL1, HFGxTR, nRCWMASK_EL1, 0),
SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0),
SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0),
+ SR_FGT(SYS_GCSCR_EL1, HFGxTR, nGCS_EL1, 0),
+ SR_FGT(SYS_GCSPR_EL1, HFGxTR, nGCS_EL1, 0),
+ SR_FGT(SYS_GCSCRE0_EL1, HFGxTR, nGCS_EL0, 0),
+ SR_FGT(SYS_GCSPR_EL0, HFGxTR, nGCS_EL0, 0),
SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0),
SR_FGT(SYS_ERXADDR_EL1, HFGxTR, ERXADDR_EL1, 1),
SR_FGT(SYS_ERXPFGCDN_EL1, HFGxTR, ERXPFGCDN_EL1, 1),
@@ -1107,6 +1118,11 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
SR_FGT(SYS_AFSR1_EL1, HFGxTR, AFSR1_EL1, 1),
SR_FGT(SYS_AFSR0_EL1, HFGxTR, AFSR0_EL1, 1),
/* HFGITR_EL2 */
+ SR_FGT(OP_AT_S1E1A, HFGITR, ATS1E1A, 1),
+ SR_FGT(OP_COSP_RCTX, HFGITR, COSPRCTX, 1),
+ SR_FGT(OP_GCSPUSHX, HFGITR, nGCSEPP, 0),
+ SR_FGT(OP_GCSPOPX, HFGITR, nGCSEPP, 0),
+ SR_FGT(OP_GCSPUSHM, HFGITR, nGCSPUSHM_EL1, 0),
SR_FGT(OP_BRB_IALL, HFGITR, nBRBIALL, 0),
SR_FGT(OP_BRB_INJ, HFGITR, nBRBINJ, 0),
SR_FGT(SYS_DC_CVAC, HFGITR, DCCVAC, 1),
@@ -1674,6 +1690,49 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
SR_FGT(SYS_PMCR_EL0, HDFGWTR, PMCR_EL0, 1),
SR_FGT(SYS_PMSWINC_EL0, HDFGWTR, PMSWINC_EL0, 1),
SR_FGT(SYS_OSLAR_EL1, HDFGWTR, OSLAR_EL1, 1),
+ /*
+ * HAFGRTR_EL2
+ */
+ SR_FGT(SYS_AMEVTYPER1_EL0(15), HAFGRTR, AMEVTYPER115_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(14), HAFGRTR, AMEVTYPER114_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(13), HAFGRTR, AMEVTYPER113_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(12), HAFGRTR, AMEVTYPER112_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(11), HAFGRTR, AMEVTYPER111_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(10), HAFGRTR, AMEVTYPER110_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(9), HAFGRTR, AMEVTYPER19_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(8), HAFGRTR, AMEVTYPER18_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(7), HAFGRTR, AMEVTYPER17_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(6), HAFGRTR, AMEVTYPER16_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(5), HAFGRTR, AMEVTYPER15_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(4), HAFGRTR, AMEVTYPER14_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(3), HAFGRTR, AMEVTYPER13_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(2), HAFGRTR, AMEVTYPER12_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(1), HAFGRTR, AMEVTYPER11_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(0), HAFGRTR, AMEVTYPER10_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(15), HAFGRTR, AMEVCNTR115_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(14), HAFGRTR, AMEVCNTR114_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(13), HAFGRTR, AMEVCNTR113_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(12), HAFGRTR, AMEVCNTR112_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(11), HAFGRTR, AMEVCNTR111_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(10), HAFGRTR, AMEVCNTR110_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(9), HAFGRTR, AMEVCNTR19_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(8), HAFGRTR, AMEVCNTR18_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(7), HAFGRTR, AMEVCNTR17_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(6), HAFGRTR, AMEVCNTR16_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(5), HAFGRTR, AMEVCNTR15_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(4), HAFGRTR, AMEVCNTR14_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(3), HAFGRTR, AMEVCNTR13_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(2), HAFGRTR, AMEVCNTR12_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(1), HAFGRTR, AMEVCNTR11_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(0), HAFGRTR, AMEVCNTR10_EL0, 1),
+ SR_FGT(SYS_AMCNTENCLR1_EL0, HAFGRTR, AMCNTEN1, 1),
+ SR_FGT(SYS_AMCNTENSET1_EL0, HAFGRTR, AMCNTEN1, 1),
+ SR_FGT(SYS_AMCNTENCLR0_EL0, HAFGRTR, AMCNTEN0, 1),
+ SR_FGT(SYS_AMCNTENSET0_EL0, HAFGRTR, AMCNTEN0, 1),
+ SR_FGT(SYS_AMEVCNTR0_EL0(3), HAFGRTR, AMEVCNTR03_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR0_EL0(2), HAFGRTR, AMEVCNTR02_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR0_EL0(1), HAFGRTR, AMEVCNTR01_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR0_EL0(0), HAFGRTR, AMEVCNTR00_EL0, 1),
};
static union trap_config get_trap_config(u32 sysreg)
@@ -1894,6 +1953,10 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
val = sanitised_sys_reg(vcpu, HDFGWTR_EL2);
break;
+ case HAFGRTR_GROUP:
+ val = sanitised_sys_reg(vcpu, HAFGRTR_EL2);
+ break;
+
case HFGITR_GROUP:
val = sanitised_sys_reg(vcpu, HFGITR_EL2);
switch (tc.fgf) {
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
index 9ddcfe2c3e57..9e13c1bc2ad5 100644
--- a/arch/arm64/kvm/hyp/include/hyp/fault.h
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
*/
if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
+ esr_fsc_is_permission_fault(esr))) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index f99d8af0b9af..a038320cdb08 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -79,6 +79,45 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
clr |= ~hfg & __ ## reg ## _nMASK; \
} while(0)
+#define update_fgt_traps_cs(vcpu, reg, clr, set) \
+ do { \
+ struct kvm_cpu_context *hctxt = \
+ &this_cpu_ptr(&kvm_host_data)->host_ctxt; \
+ u64 c = 0, s = 0; \
+ \
+ ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \
+ compute_clr_set(vcpu, reg, c, s); \
+ s |= set; \
+ c |= clr; \
+ if (c || s) { \
+ u64 val = __ ## reg ## _nMASK; \
+ val |= s; \
+ val &= ~c; \
+ write_sysreg_s(val, SYS_ ## reg); \
+ } \
+ } while(0)
+
+#define update_fgt_traps(vcpu, reg) \
+ update_fgt_traps_cs(vcpu, reg, 0, 0)
+
+/*
+ * Validate the fine grain trap masks.
+ * Check that the masks do not overlap and that all bits are accounted for.
+ */
+#define CHECK_FGT_MASKS(reg) \
+ do { \
+ BUILD_BUG_ON((__ ## reg ## _MASK) & (__ ## reg ## _nMASK)); \
+ BUILD_BUG_ON(~((__ ## reg ## _RES0) ^ (__ ## reg ## _MASK) ^ \
+ (__ ## reg ## _nMASK))); \
+ } while(0)
+
+static inline bool cpu_has_amu(void)
+{
+ u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1);
+
+ return cpuid_feature_extract_unsigned_field(pfr0,
+ ID_AA64PFR0_EL1_AMU_SHIFT);
+}
static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
@@ -86,6 +125,14 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
u64 r_val, w_val;
+ CHECK_FGT_MASKS(HFGRTR_EL2);
+ CHECK_FGT_MASKS(HFGWTR_EL2);
+ CHECK_FGT_MASKS(HFGITR_EL2);
+ CHECK_FGT_MASKS(HDFGRTR_EL2);
+ CHECK_FGT_MASKS(HDFGWTR_EL2);
+ CHECK_FGT_MASKS(HAFGRTR_EL2);
+ CHECK_FGT_MASKS(HCRX_EL2);
+
if (!cpus_have_final_cap(ARM64_HAS_FGT))
return;
@@ -110,12 +157,15 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
}
- /* The default is not to trap anything but ACCDATA_EL1 */
- r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+ /* The default to trap everything not handled or supported in KVM. */
+ tmp = HFGxTR_EL2_nAMAIR2_EL1 | HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nS2POR_EL1 |
+ HFGxTR_EL2_nPOR_EL1 | HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nACCDATA_EL1;
+
+ r_val = __HFGRTR_EL2_nMASK & ~tmp;
r_val |= r_set;
r_val &= ~r_clr;
- w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+ w_val = __HFGWTR_EL2_nMASK & ~tmp;
w_val |= w_set;
w_val &= ~w_clr;
@@ -125,34 +175,12 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
return;
- ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2);
+ update_fgt_traps(vcpu, HFGITR_EL2);
+ update_fgt_traps(vcpu, HDFGRTR_EL2);
+ update_fgt_traps(vcpu, HDFGWTR_EL2);
- r_set = r_clr = 0;
- compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set);
- r_val = __HFGITR_EL2_nMASK;
- r_val |= r_set;
- r_val &= ~r_clr;
-
- write_sysreg_s(r_val, SYS_HFGITR_EL2);
-
- ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2);
- ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2);
-
- r_clr = r_set = w_clr = w_set = 0;
-
- compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set);
- compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set);
-
- r_val = __HDFGRTR_EL2_nMASK;
- r_val |= r_set;
- r_val &= ~r_clr;
-
- w_val = __HDFGWTR_EL2_nMASK;
- w_val |= w_set;
- w_val &= ~w_clr;
-
- write_sysreg_s(r_val, SYS_HDFGRTR_EL2);
- write_sysreg_s(w_val, SYS_HDFGWTR_EL2);
+ if (cpu_has_amu())
+ update_fgt_traps(vcpu, HAFGRTR_EL2);
}
static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
@@ -171,6 +199,9 @@ static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
+
+ if (cpu_has_amu())
+ write_sysreg_s(ctxt_sys_reg(hctxt, HAFGRTR_EL2), SYS_HAFGRTR_EL2);
}
static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
@@ -591,7 +622,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
bool valid;
- valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
+ valid = kvm_vcpu_trap_is_translation_fault(vcpu) &&
kvm_vcpu_dabt_isvalid(vcpu) &&
!kvm_vcpu_abt_issea(vcpu) &&
!kvm_vcpu_abt_iss1tw(vcpu);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index e91922daa8ca..51f043649146 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -69,6 +69,8 @@
ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
)
+#define PVM_ID_AA64PFR2_ALLOW 0ULL
+
/*
* Allow for protected VMs:
* - Mixed-endian
@@ -101,6 +103,7 @@
* - Privileged Access Never
* - SError interrupt exceptions from speculative reads
* - Enhanced Translation Synchronization
+ * - Control for cache maintenance permission
*/
#define PVM_ID_AA64MMFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
@@ -108,7 +111,8 @@
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \
)
/*
@@ -133,6 +137,8 @@
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
)
+#define PVM_ID_AA64MMFR3_ALLOW (0ULL)
+
/*
* No support for Scalable Vectors for protected VMs:
* Requires additional support from KVM, e.g., context-switching and
@@ -178,10 +184,18 @@
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
)
+/* Restrict pointer authentication to the basic version. */
+#define PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED (\
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \
+ )
+
+#define PVM_ID_AA64ISAR2_RESTRICT_UNSIGNED (\
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \
+ )
+
#define PVM_ID_AA64ISAR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
@@ -196,8 +210,8 @@
)
#define PVM_ID_AA64ISAR2_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) \
)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 1cc06e6797bd..2994878d68ea 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -122,11 +122,7 @@ alternative_if ARM64_HAS_CNP
alternative_else_nop_endif
msr ttbr0_el2, x2
- /*
- * Set the PS bits in TCR_EL2.
- */
ldr x0, [x0, #NVHE_INIT_TCR_EL2]
- tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2
msr tcr_el2, x0
isb
@@ -292,6 +288,8 @@ alternative_else_nop_endif
mov sp, x0
/* And turn the MMU back on! */
+ dsb nsh
+ isb
set_sctlr_el2 x2
ret x1
SYM_FUNC_END(__pkvm_init_switch_pgd)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 8d0a5834e883..861c76021a25 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -91,7 +91,7 @@ static void host_s2_put_page(void *addr)
hyp_put_page(&host_s2_pool, addr);
}
-static void host_s2_free_unlinked_table(void *addr, u32 level)
+static void host_s2_free_unlinked_table(void *addr, s8 level)
{
kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
}
@@ -443,7 +443,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
{
struct kvm_mem_range cur;
kvm_pte_t pte;
- u32 level;
+ s8 level;
int ret;
hyp_assert_lock_held(&host_mmu.lock);
@@ -462,7 +462,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
cur.start = ALIGN_DOWN(addr, granule);
cur.end = cur.start + granule;
level++;
- } while ((level < KVM_PGTABLE_MAX_LEVELS) &&
+ } while ((level <= KVM_PGTABLE_LAST_LEVEL) &&
!(kvm_level_supports_block_mapping(level) &&
range_included(&cur, range)));
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 65a7a186d7b2..b01a3d1078a8 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -260,7 +260,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
* https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
*/
dsb(ishst);
- __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1));
+ __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL);
dsb(ish);
isb();
}
@@ -275,7 +275,7 @@ static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx,
{
struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg);
- if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_MAX_LEVELS - 1)
+ if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL)
return -EINVAL;
slot->addr = ctx->addr;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 9d23a51d7f75..84b5c3f387d8 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -136,6 +136,10 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
cptr_set |= CPTR_EL2_TTA;
}
+ /* Trap External Trace */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids))
+ mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
+
vcpu->arch.mdcr_el2 |= mdcr_set;
vcpu->arch.mdcr_el2 &= ~mdcr_clear;
vcpu->arch.cptr_el2 |= cptr_set;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 0d5e0a89ddce..bc58d1b515af 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -181,7 +181,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
if (!kvm_pte_valid(ctx->old))
return 0;
- if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1))
+ if (ctx->level != KVM_PGTABLE_LAST_LEVEL)
return -EINVAL;
phys = kvm_pte_to_phys(ctx->old);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 1966fdee740e..c651df904fe3 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -79,7 +79,10 @@ static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
static bool kvm_phys_is_valid(u64 phys)
{
- return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
+ u64 parange_max = kvm_get_parange_max();
+ u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max);
+
+ return phys < BIT(shift);
}
static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys)
@@ -98,7 +101,7 @@ static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx,
return IS_ALIGNED(ctx->addr, granule);
}
-static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
+static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, s8 level)
{
u64 shift = kvm_granule_shift(level);
u64 mask = BIT(PAGE_SHIFT - 3) - 1;
@@ -114,7 +117,7 @@ static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
return (addr & mask) >> shift;
}
-static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
+static u32 kvm_pgd_pages(u32 ia_bits, s8 start_level)
{
struct kvm_pgtable pgt = {
.ia_bits = ia_bits,
@@ -124,9 +127,9 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
return kvm_pgd_page_idx(&pgt, -1ULL) + 1;
}
-static bool kvm_pte_table(kvm_pte_t pte, u32 level)
+static bool kvm_pte_table(kvm_pte_t pte, s8 level)
{
- if (level == KVM_PGTABLE_MAX_LEVELS - 1)
+ if (level == KVM_PGTABLE_LAST_LEVEL)
return false;
if (!kvm_pte_valid(pte))
@@ -154,11 +157,11 @@ static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops
return pte;
}
-static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
+static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, s8 level)
{
kvm_pte_t pte = kvm_phys_to_pte(pa);
- u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
- KVM_PTE_TYPE_BLOCK;
+ u64 type = (level == KVM_PGTABLE_LAST_LEVEL) ? KVM_PTE_TYPE_PAGE :
+ KVM_PTE_TYPE_BLOCK;
pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
pte |= FIELD_PREP(KVM_PTE_TYPE, type);
@@ -203,11 +206,11 @@ static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker,
}
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
- struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level);
+ struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level);
static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
struct kvm_pgtable_mm_ops *mm_ops,
- kvm_pteref_t pteref, u32 level)
+ kvm_pteref_t pteref, s8 level)
{
enum kvm_pgtable_walk_flags flags = data->walker->flags;
kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref);
@@ -272,12 +275,13 @@ out:
}
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
- struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level)
+ struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level)
{
u32 idx;
int ret = 0;
- if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
+ if (WARN_ON_ONCE(level < KVM_PGTABLE_FIRST_LEVEL ||
+ level > KVM_PGTABLE_LAST_LEVEL))
return -EINVAL;
for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
@@ -340,7 +344,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct leaf_walk_data {
kvm_pte_t pte;
- u32 level;
+ s8 level;
};
static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
@@ -355,7 +359,7 @@ static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
}
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
- kvm_pte_t *ptep, u32 *level)
+ kvm_pte_t *ptep, s8 *level)
{
struct leaf_walk_data data;
struct kvm_pgtable_walker walker = {
@@ -408,7 +412,8 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
}
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
- attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
+ if (!kvm_lpa2_is_enabled())
+ attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
*ptep = attr;
@@ -467,7 +472,7 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
if (hyp_map_walker_try_leaf(ctx, data))
return 0;
- if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
+ if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL))
return -EINVAL;
childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
@@ -563,14 +568,19 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
struct kvm_pgtable_mm_ops *mm_ops)
{
- u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
+ s8 start_level = KVM_PGTABLE_LAST_LEVEL + 1 -
+ ARM64_HW_PGTABLE_LEVELS(va_bits);
+
+ if (start_level < KVM_PGTABLE_FIRST_LEVEL ||
+ start_level > KVM_PGTABLE_LAST_LEVEL)
+ return -EINVAL;
pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL);
if (!pgt->pgd)
return -ENOMEM;
pgt->ia_bits = va_bits;
- pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
+ pgt->start_level = start_level;
pgt->mm_ops = mm_ops;
pgt->mmu = NULL;
pgt->force_pte_cb = NULL;
@@ -624,7 +634,7 @@ struct stage2_map_data {
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
{
u64 vtcr = VTCR_EL2_FLAGS;
- u8 lvls;
+ s8 lvls;
vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
vtcr |= VTCR_EL2_T0SZ(phys_shift);
@@ -635,6 +645,15 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
lvls = stage2_pgtable_levels(phys_shift);
if (lvls < 2)
lvls = 2;
+
+ /*
+ * When LPA2 is enabled, the HW supports an extra level of translation
+ * (for 5 in total) when using 4K pages. It also introduces VTCR_EL2.SL2
+ * to as an addition to SL0 to enable encoding this extra start level.
+ * However, since we always use concatenated pages for the first level
+ * lookup, we will never need this extra level and therefore do not need
+ * to touch SL2.
+ */
vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
#ifdef CONFIG_ARM64_HW_AFDBM
@@ -654,6 +673,9 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
vtcr |= VTCR_EL2_HA;
#endif /* CONFIG_ARM64_HW_AFDBM */
+ if (kvm_lpa2_is_enabled())
+ vtcr |= VTCR_EL2_DS;
+
/* Set the vmid bits */
vtcr |= (get_vmid_bits(mmfr1) == 16) ?
VTCR_EL2_VS_16BIT :
@@ -711,7 +733,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
if (prot & KVM_PGTABLE_PROT_W)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
- attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
+ if (!kvm_lpa2_is_enabled())
+ attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
+
attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
*ptep = attr;
@@ -902,7 +926,7 @@ static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
{
u64 phys = stage2_map_walker_phys_addr(ctx, data);
- if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
+ if (data->force_pte && ctx->level < KVM_PGTABLE_LAST_LEVEL)
return false;
return kvm_block_mapping_supported(ctx, phys);
@@ -981,7 +1005,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
if (ret != -E2BIG)
return ret;
- if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
+ if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL))
return -EINVAL;
if (!data->memcache)
@@ -1151,7 +1175,7 @@ struct stage2_attr_data {
kvm_pte_t attr_set;
kvm_pte_t attr_clr;
kvm_pte_t pte;
- u32 level;
+ s8 level;
};
static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
@@ -1194,7 +1218,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
u64 size, kvm_pte_t attr_set,
kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
- u32 *level, enum kvm_pgtable_walk_flags flags)
+ s8 *level, enum kvm_pgtable_walk_flags flags)
{
int ret;
kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
@@ -1296,7 +1320,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_prot prot)
{
int ret;
- u32 level;
+ s8 level;
kvm_pte_t set = 0, clr = 0;
if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
@@ -1349,7 +1373,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
}
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
- u64 phys, u32 level,
+ u64 phys, s8 level,
enum kvm_pgtable_prot prot,
void *mc, bool force_pte)
{
@@ -1407,7 +1431,7 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
* fully populated tree up to the PTE entries. Note that @level is
* interpreted as in "level @level entry".
*/
-static int stage2_block_get_nr_page_tables(u32 level)
+static int stage2_block_get_nr_page_tables(s8 level)
{
switch (level) {
case 1:
@@ -1418,7 +1442,7 @@ static int stage2_block_get_nr_page_tables(u32 level)
return 0;
default:
WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
- level >= KVM_PGTABLE_MAX_LEVELS);
+ level > KVM_PGTABLE_LAST_LEVEL);
return -EINVAL;
};
}
@@ -1431,13 +1455,13 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
struct kvm_s2_mmu *mmu;
kvm_pte_t pte = ctx->old, new, *childp;
enum kvm_pgtable_prot prot;
- u32 level = ctx->level;
+ s8 level = ctx->level;
bool force_pte;
int nr_pages;
u64 phys;
/* No huge-pages exist at the last level */
- if (level == KVM_PGTABLE_MAX_LEVELS - 1)
+ if (level == KVM_PGTABLE_LAST_LEVEL)
return 0;
/* We only split valid block mappings */
@@ -1514,7 +1538,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
u64 vtcr = mmu->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
- u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
+ s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz);
@@ -1537,7 +1561,7 @@ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
{
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
- u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
+ s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
}
@@ -1573,7 +1597,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
pgt->pgd = NULL;
}
-void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
{
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
struct kvm_pgtable_walker walker = {
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d87c8fcc4c24..d14504821b79 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -223,12 +223,12 @@ static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head)
{
struct page *page = container_of(head, struct page, rcu_head);
void *pgtable = page_to_virt(page);
- u32 level = page_private(page);
+ s8 level = page_private(page);
kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level);
}
-static void stage2_free_unlinked_table(void *addr, u32 level)
+static void stage2_free_unlinked_table(void *addr, s8 level)
{
struct page *page = virt_to_page(addr);
@@ -804,13 +804,13 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
struct kvm_pgtable pgt = {
.pgd = (kvm_pteref_t)kvm->mm->pgd,
.ia_bits = vabits_actual,
- .start_level = (KVM_PGTABLE_MAX_LEVELS -
- CONFIG_PGTABLE_LEVELS),
+ .start_level = (KVM_PGTABLE_LAST_LEVEL -
+ CONFIG_PGTABLE_LEVELS + 1),
.mm_ops = &kvm_user_mm_ops,
};
unsigned long flags;
kvm_pte_t pte = 0; /* Keep GCC quiet... */
- u32 level = ~0;
+ s8 level = S8_MAX;
int ret;
/*
@@ -829,7 +829,9 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
* Not seeing an error, but not updating level? Something went
* deeply wrong...
*/
- if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS))
+ if (WARN_ON(level > KVM_PGTABLE_LAST_LEVEL))
+ return -EFAULT;
+ if (WARN_ON(level < KVM_PGTABLE_FIRST_LEVEL))
return -EFAULT;
/* Oops, the userspace PTs are gone... Replay the fault */
@@ -1374,7 +1376,7 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
- unsigned long fault_status)
+ bool fault_is_perm)
{
int ret = 0;
bool write_fault, writable, force_pte = false;
@@ -1388,17 +1390,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
gfn_t gfn;
kvm_pfn_t pfn;
bool logging_active = memslot_is_logging(memslot);
- unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
long vma_pagesize, fault_granule;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
- fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
+ if (fault_is_perm)
+ fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
write_fault = kvm_is_write_fault(vcpu);
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
VM_BUG_ON(write_fault && exec_fault);
- if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
+ if (fault_is_perm && !write_fault && !exec_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
}
@@ -1409,8 +1411,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* only exception to this is when dirty logging is enabled at runtime
* and a write fault needs to collapse a block entry into a table.
*/
- if (fault_status != ESR_ELx_FSC_PERM ||
- (logging_active && write_fault)) {
+ if (!fault_is_perm || (logging_active && write_fault)) {
ret = kvm_mmu_topup_memory_cache(memcache,
kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
if (ret)
@@ -1527,8 +1528,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* backed by a THP and thus use block mapping if possible.
*/
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
- if (fault_status == ESR_ELx_FSC_PERM &&
- fault_granule > PAGE_SIZE)
+ if (fault_is_perm && fault_granule > PAGE_SIZE)
vma_pagesize = fault_granule;
else
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
@@ -1541,7 +1541,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
}
- if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
+ if (!fault_is_perm && !device && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new disallowed VMA */
if (mte_allowed) {
sanitise_mte_tags(kvm, pfn, vma_pagesize);
@@ -1567,7 +1567,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* permissions only if vma_pagesize equals fault_granule. Otherwise,
* kvm_pgtable_stage2_map() should be called to change block size.
*/
- if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
+ if (fault_is_perm && vma_pagesize == fault_granule)
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
else
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
@@ -1618,7 +1618,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
*/
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
{
- unsigned long fault_status;
+ unsigned long esr;
phys_addr_t fault_ipa;
struct kvm_memory_slot *memslot;
unsigned long hva;
@@ -1626,12 +1626,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
gfn_t gfn;
int ret, idx;
- fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
+ esr = kvm_vcpu_get_esr(vcpu);
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
- if (fault_status == ESR_ELx_FSC_FAULT) {
+ if (esr_fsc_is_permission_fault(esr)) {
/* Beyond sanitised PARange (which is the IPA limit) */
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
kvm_inject_size_fault(vcpu);
@@ -1666,9 +1666,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
- if (fault_status != ESR_ELx_FSC_FAULT &&
- fault_status != ESR_ELx_FSC_PERM &&
- fault_status != ESR_ELx_FSC_ACCESS) {
+ if (!esr_fsc_is_translation_fault(esr) &&
+ !esr_fsc_is_permission_fault(esr) &&
+ !esr_fsc_is_access_flag_fault(esr)) {
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1730,13 +1730,14 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
/* Userspace should not be able to register out-of-bounds IPAs */
VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu));
- if (fault_status == ESR_ELx_FSC_ACCESS) {
+ if (esr_fsc_is_access_flag_fault(esr)) {
handle_access_fault(vcpu, fault_ipa);
ret = 1;
goto out_unlock;
}
- ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
+ ret = user_mem_abort(vcpu, fault_ipa, memslot, hva,
+ esr_fsc_is_permission_fault(esr));
if (ret == 0)
ret = 1;
out:
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 042695a210ce..ba95d044bc98 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -23,13 +23,9 @@
* This list should get updated as new features get added to the NV
* support, and new extension to the architecture.
*/
-void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static u64 limit_nv_id_reg(u32 id, u64 val)
{
- u32 id = reg_to_encoding(r);
- u64 val, tmp;
-
- val = p->regval;
+ u64 tmp;
switch (id) {
case SYS_ID_AA64ISAR0_EL1:
@@ -158,5 +154,17 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
break;
}
- p->regval = val;
+ return val;
+}
+int kvm_init_nv_sysregs(struct kvm *kvm)
+{
+ mutex_lock(&kvm->arch.config_lock);
+
+ for (int i = 0; i < KVM_ARM_ID_REG_NUM; i++)
+ kvm->arch.id_regs[i] = limit_nv_id_reg(IDX_IDREG(i),
+ kvm->arch.id_regs[i]);
+
+ mutex_unlock(&kvm->arch.config_lock);
+
+ return 0;
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 5bb4de162cab..68d1d05672bd 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -280,12 +280,11 @@ int __init kvm_set_ipa_limit(void)
parange = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_EL1_PARANGE_SHIFT);
/*
- * IPA size beyond 48 bits could not be supported
- * on either 4K or 16K page size. Hence let's cap
- * it to 48 bits, in case it's reported as larger
- * on the system.
+ * IPA size beyond 48 bits for 4K and 16K page size is only supported
+ * when LPA2 is available. So if we have LPA2, enable it, else cap to 48
+ * bits, in case it's reported as larger on the system.
*/
- if (PAGE_SIZE != SZ_64K)
+ if (!kvm_lpa2_is_enabled() && PAGE_SIZE != SZ_64K)
parange = min(parange, (unsigned int)ID_AA64MMFR0_EL1_PARANGE_48);
/*
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 4735e1b37fb3..b4d3486d66ff 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -45,44 +45,170 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
u64 val);
-static bool read_from_write_only(struct kvm_vcpu *vcpu,
- struct sys_reg_params *params,
- const struct sys_reg_desc *r)
+static bool bad_trap(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *r,
+ const char *msg)
{
- WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n");
+ WARN_ONCE(1, "Unexpected %s\n", msg);
print_sys_reg_instr(params);
kvm_inject_undefined(vcpu);
return false;
}
+static bool read_from_write_only(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *r)
+{
+ return bad_trap(vcpu, params, r,
+ "sys_reg read to write-only register");
+}
+
static bool write_to_read_only(struct kvm_vcpu *vcpu,
struct sys_reg_params *params,
const struct sys_reg_desc *r)
{
- WARN_ONCE(1, "Unexpected sys_reg write to read-only register\n");
- print_sys_reg_instr(params);
- kvm_inject_undefined(vcpu);
- return false;
+ return bad_trap(vcpu, params, r,
+ "sys_reg write to read-only register");
+}
+
+#define PURE_EL2_SYSREG(el2) \
+ case el2: { \
+ *el1r = el2; \
+ return true; \
+ }
+
+#define MAPPED_EL2_SYSREG(el2, el1, fn) \
+ case el2: { \
+ *xlate = fn; \
+ *el1r = el1; \
+ return true; \
+ }
+
+static bool get_el2_to_el1_mapping(unsigned int reg,
+ unsigned int *el1r, u64 (**xlate)(u64))
+{
+ switch (reg) {
+ PURE_EL2_SYSREG( VPIDR_EL2 );
+ PURE_EL2_SYSREG( VMPIDR_EL2 );
+ PURE_EL2_SYSREG( ACTLR_EL2 );
+ PURE_EL2_SYSREG( HCR_EL2 );
+ PURE_EL2_SYSREG( MDCR_EL2 );
+ PURE_EL2_SYSREG( HSTR_EL2 );
+ PURE_EL2_SYSREG( HACR_EL2 );
+ PURE_EL2_SYSREG( VTTBR_EL2 );
+ PURE_EL2_SYSREG( VTCR_EL2 );
+ PURE_EL2_SYSREG( RVBAR_EL2 );
+ PURE_EL2_SYSREG( TPIDR_EL2 );
+ PURE_EL2_SYSREG( HPFAR_EL2 );
+ PURE_EL2_SYSREG( CNTHCTL_EL2 );
+ MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1,
+ translate_sctlr_el2_to_sctlr_el1 );
+ MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1,
+ translate_cptr_el2_to_cpacr_el1 );
+ MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1,
+ translate_ttbr0_el2_to_ttbr0_el1 );
+ MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1, NULL );
+ MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1,
+ translate_tcr_el2_to_tcr_el1 );
+ MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1, NULL );
+ MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1, NULL );
+ MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1, NULL );
+ MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1, NULL );
+ MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1, NULL );
+ MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1, NULL );
+ MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL );
+ MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL );
+ MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL );
+ default:
+ return false;
+ }
}
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
{
u64 val = 0x8badf00d8badf00d;
+ u64 (*xlate)(u64) = NULL;
+ unsigned int el1r;
+
+ if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
+ goto memory_read;
- if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
- __vcpu_read_sys_reg_from_cpu(reg, &val))
+ if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) {
+ if (!is_hyp_ctxt(vcpu))
+ goto memory_read;
+
+ /*
+ * If this register does not have an EL1 counterpart,
+ * then read the stored EL2 version.
+ */
+ if (reg == el1r)
+ goto memory_read;
+
+ /*
+ * If we have a non-VHE guest and that the sysreg
+ * requires translation to be used at EL1, use the
+ * in-memory copy instead.
+ */
+ if (!vcpu_el2_e2h_is_set(vcpu) && xlate)
+ goto memory_read;
+
+ /* Get the current version of the EL1 counterpart. */
+ WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val));
return val;
+ }
+ /* EL1 register can't be on the CPU if the guest is in vEL2. */
+ if (unlikely(is_hyp_ctxt(vcpu)))
+ goto memory_read;
+
+ if (__vcpu_read_sys_reg_from_cpu(reg, &val))
+ return val;
+
+memory_read:
return __vcpu_sys_reg(vcpu, reg);
}
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
{
- if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
- __vcpu_write_sys_reg_to_cpu(val, reg))
+ u64 (*xlate)(u64) = NULL;
+ unsigned int el1r;
+
+ if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
+ goto memory_write;
+
+ if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) {
+ if (!is_hyp_ctxt(vcpu))
+ goto memory_write;
+
+ /*
+ * Always store a copy of the write to memory to avoid having
+ * to reverse-translate virtual EL2 system registers for a
+ * non-VHE guest hypervisor.
+ */
+ __vcpu_sys_reg(vcpu, reg) = val;
+
+ /* No EL1 counterpart? We're done here.? */
+ if (reg == el1r)
+ return;
+
+ if (!vcpu_el2_e2h_is_set(vcpu) && xlate)
+ val = xlate(val);
+
+ /* Redirect this to the EL1 version of the register. */
+ WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, el1r));
+ return;
+ }
+
+ /* EL1 register can't be on the CPU if the guest is in vEL2. */
+ if (unlikely(is_hyp_ctxt(vcpu)))
+ goto memory_write;
+
+ if (__vcpu_write_sys_reg_to_cpu(val, reg))
return;
- __vcpu_sys_reg(vcpu, reg) = val;
+memory_write:
+ __vcpu_sys_reg(vcpu, reg) = val;
}
/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
@@ -1505,8 +1631,6 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,
return write_to_read_only(vcpu, p, r);
p->regval = read_id_reg(vcpu, r);
- if (vcpu_has_nv(vcpu))
- access_nested_id_reg(vcpu, p, r);
return true;
}
@@ -1885,6 +2009,32 @@ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
return REG_HIDDEN;
}
+static bool bad_vncr_trap(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ /*
+ * We really shouldn't be here, and this is likely the result
+ * of a misconfigured trap, as this register should target the
+ * VNCR page, and nothing else.
+ */
+ return bad_trap(vcpu, p, r,
+ "trap of VNCR-backed register");
+}
+
+static bool bad_redir_trap(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ /*
+ * We really shouldn't be here, and this is likely the result
+ * of a misconfigured trap, as this register should target the
+ * corresponding EL1, and nothing else.
+ */
+ return bad_trap(vcpu, p, r,
+ "trap of EL2 register redirected to EL1");
+}
+
#define EL2_REG(name, acc, rst, v) { \
SYS_DESC(SYS_##name), \
.access = acc, \
@@ -1894,6 +2044,9 @@ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
.val = v, \
}
+#define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v)
+#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v)
+
/*
* EL{0,1}2 registers are the EL2 view on an EL0 or EL1 register when
* HCR_EL2.E2H==1, and only in the sysreg table for convenience of
@@ -2508,32 +2661,33 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ PMU_SYS_REG(PMCCFILTR_EL0), .access = access_pmu_evtyper,
.reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 },
- EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0),
- EL2_REG(VMPIDR_EL2, access_rw, reset_unknown, 0),
+ EL2_REG_VNCR(VPIDR_EL2, reset_unknown, 0),
+ EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0),
EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
- EL2_REG(HCR_EL2, access_rw, reset_val, 0),
+ EL2_REG_VNCR(HCR_EL2, reset_val, 0),
EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
- EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
- EL2_REG(HFGRTR_EL2, access_rw, reset_val, 0),
- EL2_REG(HFGWTR_EL2, access_rw, reset_val, 0),
- EL2_REG(HFGITR_EL2, access_rw, reset_val, 0),
- EL2_REG(HACR_EL2, access_rw, reset_val, 0),
+ EL2_REG_VNCR(HSTR_EL2, reset_val, 0),
+ EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0),
+ EL2_REG_VNCR(HFGWTR_EL2, reset_val, 0),
+ EL2_REG_VNCR(HFGITR_EL2, reset_val, 0),
+ EL2_REG_VNCR(HACR_EL2, reset_val, 0),
- EL2_REG(HCRX_EL2, access_rw, reset_val, 0),
+ EL2_REG_VNCR(HCRX_EL2, reset_val, 0),
EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
- EL2_REG(VTTBR_EL2, access_rw, reset_val, 0),
- EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
+ EL2_REG_VNCR(VTTBR_EL2, reset_val, 0),
+ EL2_REG_VNCR(VTCR_EL2, reset_val, 0),
{ SYS_DESC(SYS_DACR32_EL2), trap_undef, reset_unknown, DACR32_EL2 },
- EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0),
- EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0),
- EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
- EL2_REG(ELR_EL2, access_rw, reset_val, 0),
+ EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0),
+ EL2_REG_VNCR(HDFGWTR_EL2, reset_val, 0),
+ EL2_REG_VNCR(HAFGRTR_EL2, reset_val, 0),
+ EL2_REG_REDIR(SPSR_EL2, reset_val, 0),
+ EL2_REG_REDIR(ELR_EL2, reset_val, 0),
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
/* AArch32 SPSR_* are RES0 if trapped from a NV guest */
@@ -2549,10 +2703,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_IFSR32_EL2), trap_undef, reset_unknown, IFSR32_EL2 },
EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
- EL2_REG(ESR_EL2, access_rw, reset_val, 0),
+ EL2_REG_REDIR(ESR_EL2, reset_val, 0),
{ SYS_DESC(SYS_FPEXC32_EL2), trap_undef, reset_val, FPEXC32_EL2, 0x700 },
- EL2_REG(FAR_EL2, access_rw, reset_val, 0),
+ EL2_REG_REDIR(FAR_EL2, reset_val, 0),
EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
EL2_REG(MAIR_EL2, access_rw, reset_val, 0),
@@ -2565,24 +2719,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0),
EL2_REG(TPIDR_EL2, access_rw, reset_val, 0),
- EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0),
+ EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0),
EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0),
- EL12_REG(SCTLR, access_vm_reg, reset_val, 0x00C50078),
- EL12_REG(CPACR, access_rw, reset_val, 0),
- EL12_REG(TTBR0, access_vm_reg, reset_unknown, 0),
- EL12_REG(TTBR1, access_vm_reg, reset_unknown, 0),
- EL12_REG(TCR, access_vm_reg, reset_val, 0),
- { SYS_DESC(SYS_SPSR_EL12), access_spsr},
- { SYS_DESC(SYS_ELR_EL12), access_elr},
- EL12_REG(AFSR0, access_vm_reg, reset_unknown, 0),
- EL12_REG(AFSR1, access_vm_reg, reset_unknown, 0),
- EL12_REG(ESR, access_vm_reg, reset_unknown, 0),
- EL12_REG(FAR, access_vm_reg, reset_unknown, 0),
- EL12_REG(MAIR, access_vm_reg, reset_unknown, 0),
- EL12_REG(AMAIR, access_vm_reg, reset_amair_el1, 0),
- EL12_REG(VBAR, access_rw, reset_val, 0),
- EL12_REG(CONTEXTIDR, access_vm_reg, reset_val, 0),
EL12_REG(CNTKCTL, access_rw, reset_val, 0),
EL2_REG(SP_EL2, NULL, reset_unknown, 0),
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index c8c3cb812783..e949e1d0fd9f 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -368,7 +368,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
vgic_v4_teardown(kvm);
}
-void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
+static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -379,29 +379,39 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
vgic_flush_pending_lpis(vcpu);
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
- vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+ if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+ vgic_unregister_redist_iodev(vcpu);
+ vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+ }
}
-static void __kvm_vgic_destroy(struct kvm *kvm)
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ mutex_lock(&kvm->slots_lock);
+ __kvm_vgic_vcpu_destroy(vcpu);
+ mutex_unlock(&kvm->slots_lock);
+}
+
+void kvm_vgic_destroy(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
unsigned long i;
- lockdep_assert_held(&kvm->arch.config_lock);
+ mutex_lock(&kvm->slots_lock);
vgic_debug_destroy(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_vgic_vcpu_destroy(vcpu);
+ __kvm_vgic_vcpu_destroy(vcpu);
+
+ mutex_lock(&kvm->arch.config_lock);
kvm_vgic_dist_destroy(kvm);
-}
-void kvm_vgic_destroy(struct kvm *kvm)
-{
- mutex_lock(&kvm->arch.config_lock);
- __kvm_vgic_destroy(kvm);
mutex_unlock(&kvm->arch.config_lock);
+ mutex_unlock(&kvm->slots_lock);
}
/**
@@ -469,25 +479,26 @@ int kvm_vgic_map_resources(struct kvm *kvm)
type = VGIC_V3;
}
- if (ret) {
- __kvm_vgic_destroy(kvm);
+ if (ret)
goto out;
- }
+
dist->ready = true;
dist_base = dist->vgic_dist_base;
mutex_unlock(&kvm->arch.config_lock);
ret = vgic_register_dist_iodev(kvm, dist_base, type);
- if (ret) {
+ if (ret)
kvm_err("Unable to register VGIC dist MMIO regions\n");
- kvm_vgic_destroy(kvm);
- }
- mutex_unlock(&kvm->slots_lock);
- return ret;
+ goto out_slots;
out:
mutex_unlock(&kvm->arch.config_lock);
+out_slots:
mutex_unlock(&kvm->slots_lock);
+
+ if (ret)
+ kvm_vgic_destroy(kvm);
+
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 2dad2d095160..e2764d0ffa9f 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -590,7 +590,11 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
unsigned long flags;
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+
irq = __vgic_its_check_cache(dist, db, devid, eventid);
+ if (irq)
+ vgic_get_irq_kref(irq);
+
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
return irq;
@@ -769,6 +773,7 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
raw_spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = true;
vgic_queue_irq_unlock(kvm, irq, flags);
+ vgic_put_irq(kvm, irq);
return 0;
}
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 89117ba2528a..c15ee1df036a 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -357,31 +357,13 @@ static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
- unsigned long flags;
-
- for (i = 0; i < len * 8; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- raw_spin_lock_irqsave(&irq->irq_lock, flags);
- if (test_bit(i, &val)) {
- /*
- * pending_latch is set irrespective of irq type
- * (level or edge) to avoid dependency that VM should
- * restore irq config before pending info.
- */
- irq->pending_latch = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
- } else {
- irq->pending_latch = false;
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
- }
+ int ret;
- vgic_put_irq(vcpu->kvm, irq);
- }
+ ret = vgic_uaccess_write_spending(vcpu, addr, len, val);
+ if (ret)
+ return ret;
- return 0;
+ return vgic_uaccess_write_cpending(vcpu, addr, len, ~val);
}
/* We want to avoid outer shareable. */
@@ -820,7 +802,7 @@ out_unlock:
return ret;
}
-static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
+void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
{
struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
@@ -833,6 +815,8 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
unsigned long c;
int ret = 0;
+ lockdep_assert_held(&kvm->slots_lock);
+
kvm_for_each_vcpu(c, vcpu, kvm) {
ret = vgic_register_redist_iodev(vcpu);
if (ret)
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index ff558c05e990..cf76523a2194 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -301,9 +301,8 @@ static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2);
}
-void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
+static void __set_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len,
+ unsigned long val, bool is_user)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
@@ -312,14 +311,22 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- /* GICD_ISPENDR0 SGI bits are WI */
- if (is_vgic_v2_sgi(vcpu, irq)) {
+ /* GICD_ISPENDR0 SGI bits are WI when written from the guest. */
+ if (is_vgic_v2_sgi(vcpu, irq) && !is_user) {
vgic_put_irq(vcpu->kvm, irq);
continue;
}
raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ /*
+ * GICv2 SGIs are terribly broken. We can't restore
+ * the source of the interrupt, so just pick the vcpu
+ * itself as the source...
+ */
+ if (is_vgic_v2_sgi(vcpu, irq))
+ irq->source |= BIT(vcpu->vcpu_id);
+
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
/* HW SGI? Ask the GIC to inject it */
int err;
@@ -335,7 +342,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
}
irq->pending_latch = true;
- if (irq->hw)
+ if (irq->hw && !is_user)
vgic_irq_set_phys_active(irq, true);
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
@@ -343,33 +350,18 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
}
}
+void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ __set_pending(vcpu, addr, len, val, false);
+}
+
int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
- unsigned long flags;
-
- for_each_set_bit(i, &val, len * 8) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- raw_spin_lock_irqsave(&irq->irq_lock, flags);
- irq->pending_latch = true;
-
- /*
- * GICv2 SGIs are terribly broken. We can't restore
- * the source of the interrupt, so just pick the vcpu
- * itself as the source...
- */
- if (is_vgic_v2_sgi(vcpu, irq))
- irq->source |= BIT(vcpu->vcpu_id);
-
- vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
+ __set_pending(vcpu, addr, len, val, true);
return 0;
}
@@ -394,9 +386,9 @@ static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
vgic_irq_set_phys_active(irq, false);
}
-void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
+static void __clear_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val, bool is_user)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
@@ -405,14 +397,22 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- /* GICD_ICPENDR0 SGI bits are WI */
- if (is_vgic_v2_sgi(vcpu, irq)) {
+ /* GICD_ICPENDR0 SGI bits are WI when written from the guest. */
+ if (is_vgic_v2_sgi(vcpu, irq) && !is_user) {
vgic_put_irq(vcpu->kvm, irq);
continue;
}
raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ /*
+ * More fun with GICv2 SGIs! If we're clearing one of them
+ * from userspace, which source vcpu to clear? Let's not
+ * even think of it, and blow the whole set.
+ */
+ if (is_vgic_v2_sgi(vcpu, irq))
+ irq->source = 0;
+
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
/* HW SGI? Ask the GIC to clear its pending bit */
int err;
@@ -427,7 +427,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
continue;
}
- if (irq->hw)
+ if (irq->hw && !is_user)
vgic_hw_irq_cpending(vcpu, irq);
else
irq->pending_latch = false;
@@ -437,33 +437,18 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
}
}
+void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ __clear_pending(vcpu, addr, len, val, false);
+}
+
int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
- unsigned long flags;
-
- for_each_set_bit(i, &val, len * 8) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- raw_spin_lock_irqsave(&irq->irq_lock, flags);
- /*
- * More fun with GICv2 SGIs! If we're clearing one of them
- * from userspace, which source vcpu to clear? Let's not
- * even think of it, and blow the whole set.
- */
- if (is_vgic_v2_sgi(vcpu, irq))
- irq->source = 0;
-
- irq->pending_latch = false;
-
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
+ __clear_pending(vcpu, addr, len, val, true);
return 0;
}
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index 339a55194b2c..74a67ad87f29 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -436,6 +436,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
if (ret)
goto out;
+ /* Silently exit if the vLPI is already mapped */
+ if (irq->hw)
+ goto out;
+
/*
* Emit the mapping request. If it fails, the ITS probably
* isn't v4 compatible, so let's silently bail out. Holding
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 0ab09b0d4440..8d134569d0a1 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -241,6 +241,7 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
int vgic_v3_save_pending_tables(struct kvm *kvm);
int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count);
int vgic_register_redist_iodev(struct kvm_vcpu *vcpu);
+void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu);
bool vgic_v3_check_base(struct kvm *kvm);
void vgic_v3_load(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 8e2017ba5f1b..924843f1f661 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -29,8 +29,8 @@ bool can_set_direct_map(void)
*
* KFENCE pool requires page-granular mapping if initialized late.
*/
- return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() ||
- arm64_kfence_can_set_direct_map();
+ return rodata_full || debug_pagealloc_enabled() ||
+ arm64_kfence_can_set_direct_map();
}
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
@@ -105,8 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages,
* If we are manipulating read-only permissions, apply the same
* change to the linear mapping of the pages that back this VM area.
*/
- if (rodata_enabled &&
- rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
+ if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
pgprot_val(clear_mask) == PTE_RDONLY)) {
for (i = 0; i < area->nr_pages; i++) {
__change_memory_common((u64)page_address(area->pages[i]),
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index b98c38288a9d..919eceb0b3da 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -37,6 +37,7 @@ HAS_GIC_PRIO_MASKING
HAS_GIC_PRIO_RELAXED_SYNC
HAS_HCX
HAS_LDAPR
+HAS_LPA2
HAS_LSE_ATOMICS
HAS_MOPS
HAS_NESTED_VIRT
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 96cbeeab4eec..5ceaa1d3630e 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -1002,6 +1002,27 @@ UnsignedEnum 3:0 BT
EndEnum
EndSysreg
+Sysreg ID_AA64PFR2_EL1 3 0 0 4 2
+Res0 63:36
+UnsignedEnum 35:32 FPMR
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Res0 31:12
+UnsignedEnum 11:8 MTEFAR
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 7:4 MTESTOREONLY
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 3:0 MTEPERM
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+EndSysreg
+
Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4
Res0 63:60
UnsignedEnum 59:56 F64MM
@@ -1058,7 +1079,11 @@ UnsignedEnum 63 FA64
0b0 NI
0b1 IMP
EndEnum
-Res0 62:60
+Res0 62:61
+UnsignedEnum 60 LUTv2
+ 0b0 NI
+ 0b1 IMP
+EndEnum
UnsignedEnum 59:56 SMEver
0b0000 SME
0b0001 SME2
@@ -1086,7 +1111,14 @@ UnsignedEnum 42 F16F16
0b0 NI
0b1 IMP
EndEnum
-Res0 41:40
+UnsignedEnum 41 F8F16
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 40 F8F32
+ 0b0 NI
+ 0b1 IMP
+EndEnum
UnsignedEnum 39:36 I8I32
0b0000 NI
0b1111 IMP
@@ -1107,7 +1139,49 @@ UnsignedEnum 32 F32F32
0b0 NI
0b1 IMP
EndEnum
-Res0 31:0
+Res0 31
+UnsignedEnum 30 SF8FMA
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 29 SF8DP4
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 28 SF8DP2
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Res0 27:0
+EndSysreg
+
+Sysreg ID_AA64FPFR0_EL1 3 0 0 4 7
+Res0 63:32
+UnsignedEnum 31 F8CVT
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 30 F8FMA
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 29 F8DP4
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 28 F8DP2
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Res0 27:2
+UnsignedEnum 1 F8E4M3
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 0 F8E5M2
+ 0b0 NI
+ 0b1 IMP
+EndEnum
EndSysreg
Sysreg ID_AA64DFR0_EL1 3 0 0 5 0
@@ -1115,7 +1189,10 @@ Enum 63:60 HPMN0
0b0000 UNPREDICTABLE
0b0001 DEF
EndEnum
-Res0 59:56
+UnsignedEnum 59:56 ExtTrcBuff
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
UnsignedEnum 55:52 BRBE
0b0000 NI
0b0001 IMP
@@ -1327,6 +1404,7 @@ UnsignedEnum 11:8 API
0b0011 PAuth2
0b0100 FPAC
0b0101 FPACCOMBINE
+ 0b0110 PAuth_LR
EndEnum
UnsignedEnum 7:4 APA
0b0000 NI
@@ -1335,6 +1413,7 @@ UnsignedEnum 7:4 APA
0b0011 PAuth2
0b0100 FPAC
0b0101 FPACCOMBINE
+ 0b0110 PAuth_LR
EndEnum
UnsignedEnum 3:0 DPB
0b0000 NI
@@ -1344,7 +1423,14 @@ EndEnum
EndSysreg
Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2
-Res0 63:56
+UnsignedEnum 63:60 ATS1A
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 59:56 LUT
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
UnsignedEnum 55:52 CSSC
0b0000 NI
0b0001 IMP
@@ -1353,7 +1439,19 @@ UnsignedEnum 51:48 RPRFM
0b0000 NI
0b0001 IMP
EndEnum
-Res0 47:32
+Res0 47:44
+UnsignedEnum 43:40 PRFMSLC
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 39:36 SYSINSTR_128
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 35:32 SYSREG_128
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
UnsignedEnum 31:28 CLRBHB
0b0000 NI
0b0001 IMP
@@ -1377,6 +1475,7 @@ UnsignedEnum 15:12 APA3
0b0011 PAuth2
0b0100 FPAC
0b0101 FPACCOMBINE
+ 0b0110 PAuth_LR
EndEnum
UnsignedEnum 11:8 GPA3
0b0000 NI
@@ -1392,6 +1491,23 @@ UnsignedEnum 3:0 WFxT
EndEnum
EndSysreg
+Sysreg ID_AA64ISAR3_EL1 3 0 0 6 3
+Res0 63:12
+UnsignedEnum 11:8 TLBIW
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 7:4 FAMINMAX
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 3:0 CPA
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 CPA2
+EndEnum
+EndSysreg
+
Sysreg ID_AA64MMFR0_EL1 3 0 0 7 0
UnsignedEnum 63:60 ECV
0b0000 NI
@@ -1680,7 +1796,8 @@ Field 63 TIDCP
Field 62 SPINTMASK
Field 61 NMI
Field 60 EnTP2
-Res0 59:58
+Field 59 TCSO
+Field 58 TCSO0
Field 57 EPAN
Field 56 EnALS
Field 55 EnAS0
@@ -1709,7 +1826,7 @@ EndEnum
Field 37 ITFSB
Field 36 BT1
Field 35 BT0
-Res0 34
+Field 34 EnFPM
Field 33 MSCEn
Field 32 CMOW
Field 31 EnIA
@@ -1747,7 +1864,8 @@ Field 0 M
EndSysreg
SysregFields CPACR_ELx
-Res0 63:29
+Res0 63:30
+Field 29 E0POE
Field 28 TTA
Res0 27:26
Field 25:24 SMEN
@@ -1790,6 +1908,41 @@ Sysreg SMCR_EL1 3 0 1 2 6
Fields SMCR_ELx
EndSysreg
+SysregFields GCSCR_ELx
+Res0 63:10
+Field 9 STREn
+Field 8 PUSHMEn
+Res0 7
+Field 6 EXLOCKEN
+Field 5 RVCHKEN
+Res0 4:1
+Field 0 PCRSEL
+EndSysregFields
+
+Sysreg GCSCR_EL1 3 0 2 5 0
+Fields GCSCR_ELx
+EndSysreg
+
+SysregFields GCSPR_ELx
+Field 63:3 PTR
+Res0 2:0
+EndSysregFields
+
+Sysreg GCSPR_EL1 3 0 2 5 1
+Fields GCSPR_ELx
+EndSysreg
+
+Sysreg GCSCRE0_EL1 3 0 2 5 2
+Res0 63:11
+Field 10 nTR
+Field 9 STREn
+Field 8 PUSHMEn
+Res0 7:6
+Field 5 RVCHKEN
+Res0 4:1
+Field 0 PCRSEL
+EndSysreg
+
Sysreg ALLINT 3 0 4 3 0
Res0 63:14
Field 13 ALLINT
@@ -1933,10 +2086,18 @@ Sysreg CONTEXTIDR_EL1 3 0 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
+Sysreg RCWSMASK_EL1 3 0 13 0 3
+Field 63:0 RCWSMASK
+EndSysreg
+
Sysreg TPIDR_EL1 3 0 13 0 4
Field 63:0 ThreadID
EndSysreg
+Sysreg RCWMASK_EL1 3 0 13 0 6
+Field 63:0 RCWMASK
+EndSysreg
+
Sysreg SCXTNUM_EL1 3 0 13 0 7
Field 63:0 SoftwareContextNumber
EndSysreg
@@ -2020,12 +2181,39 @@ Field 4 DZP
Field 3:0 BS
EndSysreg
+Sysreg GCSPR_EL0 3 3 2 5 1
+Fields GCSPR_ELx
+EndSysreg
+
Sysreg SVCR 3 3 4 2 2
Res0 63:2
Field 1 ZA
Field 0 SM
EndSysreg
+Sysreg FPMR 3 3 4 4 2
+Res0 63:38
+Field 37:32 LSCALE2
+Field 31:24 NSCALE
+Res0 23
+Field 22:16 LSCALE
+Field 15 OSC
+Field 14 OSM
+Res0 13:9
+UnsignedEnum 8:6 F8D
+ 0b000 E5M2
+ 0b001 E4M3
+EndEnum
+UnsignedEnum 5:3 F8S2
+ 0b000 E5M2
+ 0b001 E4M3
+EndEnum
+UnsignedEnum 2:0 F8S1
+ 0b000 E5M2
+ 0b001 E4M3
+EndEnum
+EndSysreg
+
SysregFields HFGxTR_EL2
Field 63 nAMAIR2_EL1
Field 62 nMAIR2_EL1
@@ -2102,7 +2290,9 @@ Fields HFGxTR_EL2
EndSysreg
Sysreg HFGITR_EL2 3 4 1 1 6
-Res0 63:61
+Res0 63
+Field 62 ATS1E1A
+Res0 61
Field 60 COSPRCTX
Field 59 nGCSEPP
Field 58 nGCSSTR_EL1
@@ -2295,12 +2485,57 @@ Field 1 DBGBVRn_EL1
Field 0 DBGBCRn_EL1
EndSysreg
+Sysreg HAFGRTR_EL2 3 4 3 1 6
+Res0 63:50
+Field 49 AMEVTYPER115_EL0
+Field 48 AMEVCNTR115_EL0
+Field 47 AMEVTYPER114_EL0
+Field 46 AMEVCNTR114_EL0
+Field 45 AMEVTYPER113_EL0
+Field 44 AMEVCNTR113_EL0
+Field 43 AMEVTYPER112_EL0
+Field 42 AMEVCNTR112_EL0
+Field 41 AMEVTYPER111_EL0
+Field 40 AMEVCNTR111_EL0
+Field 39 AMEVTYPER110_EL0
+Field 38 AMEVCNTR110_EL0
+Field 37 AMEVTYPER19_EL0
+Field 36 AMEVCNTR19_EL0
+Field 35 AMEVTYPER18_EL0
+Field 34 AMEVCNTR18_EL0
+Field 33 AMEVTYPER17_EL0
+Field 32 AMEVCNTR17_EL0
+Field 31 AMEVTYPER16_EL0
+Field 30 AMEVCNTR16_EL0
+Field 29 AMEVTYPER15_EL0
+Field 28 AMEVCNTR15_EL0
+Field 27 AMEVTYPER14_EL0
+Field 26 AMEVCNTR14_EL0
+Field 25 AMEVTYPER13_EL0
+Field 24 AMEVCNTR13_EL0
+Field 23 AMEVTYPER12_EL0
+Field 22 AMEVCNTR12_EL0
+Field 21 AMEVTYPER11_EL0
+Field 20 AMEVCNTR11_EL0
+Field 19 AMEVTYPER10_EL0
+Field 18 AMEVCNTR10_EL0
+Field 17 AMCNTEN1
+Res0 16:5
+Field 4 AMEVCNTR03_EL0
+Field 3 AMEVCNTR02_EL0
+Field 2 AMEVCNTR01_EL0
+Field 1 AMEVCNTR00_EL0
+Field 0 AMCNTEN0
+EndSysreg
+
Sysreg ZCR_EL2 3 4 1 2 0
Fields ZCR_ELx
EndSysreg
Sysreg HCRX_EL2 3 4 1 2 2
-Res0 63:23
+Res0 63:25
+Field 24 PACMEn
+Field 23 EnFPM
Field 22 GCSEn
Field 21 EnIDCP128
Field 20 EnSDERR
@@ -2348,6 +2583,14 @@ Sysreg SMCR_EL2 3 4 1 2 6
Fields SMCR_ELx
EndSysreg
+Sysreg GCSCR_EL2 3 4 2 5 0
+Fields GCSCR_ELx
+EndSysreg
+
+Sysreg GCSPR_EL2 3 4 2 5 1
+Fields GCSPR_ELx
+EndSysreg
+
Sysreg DACR32_EL2 3 4 3 0 0
Res0 63:32
Field 31:30 D15
@@ -2407,6 +2650,14 @@ Sysreg SMCR_EL12 3 5 1 2 6
Fields SMCR_ELx
EndSysreg
+Sysreg GCSCR_EL12 3 5 2 5 0
+Fields GCSCR_ELx
+EndSysreg
+
+Sysreg GCSPR_EL12 3 5 2 5 1
+Fields GCSPR_ELx
+EndSysreg
+
Sysreg FAR_EL12 3 5 6 0 0
Field 63:0 ADDR
EndSysreg
@@ -2471,6 +2722,33 @@ Field 1 PIE
Field 0 PnCH
EndSysreg
+SysregFields MAIR2_ELx
+Field 63:56 Attr7
+Field 55:48 Attr6
+Field 47:40 Attr5
+Field 39:32 Attr4
+Field 31:24 Attr3
+Field 23:16 Attr2
+Field 15:8 Attr1
+Field 7:0 Attr0
+EndSysregFields
+
+Sysreg MAIR2_EL1 3 0 10 2 1
+Fields MAIR2_ELx
+EndSysreg
+
+Sysreg MAIR2_EL2 3 4 10 1 1
+Fields MAIR2_ELx
+EndSysreg
+
+Sysreg AMAIR2_EL1 3 0 10 3 1
+Field 63:0 ImpDef
+EndSysreg
+
+Sysreg AMAIR2_EL2 3 4 10 3 1
+Field 63:0 ImpDef
+EndSysreg
+
SysregFields PIRx_ELx
Field 63:60 Perm15
Field 59:56 Perm14
@@ -2510,6 +2788,26 @@ Sysreg PIR_EL2 3 4 10 2 3
Fields PIRx_ELx
EndSysreg
+Sysreg POR_EL0 3 3 10 2 4
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg POR_EL1 3 0 10 2 4
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg POR_EL12 3 5 10 2 4
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg S2POR_EL1 3 0 10 2 5
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg S2PIR_EL2 3 4 10 2 5
+Fields PIRx_ELx
+EndSysreg
+
Sysreg LORSA_EL1 3 0 10 4 0
Res0 63:52
Field 51:16 SA
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 9eeb0c05f3f4..4ba8d67ddb09 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -68,6 +68,7 @@ LDFLAGS_vmlinux += -static -n -nostdlib
ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS
cflags-y += $(call cc-option,-mexplicit-relocs)
KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access)
+KBUILD_CFLAGS_KERNEL += $(call cc-option,-fdirect-access-external-data)
KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data)
KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data)
KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
@@ -82,7 +83,7 @@ endif
ifeq ($(CONFIG_RELOCATABLE),y)
KBUILD_CFLAGS_KERNEL += -fPIE
-LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext
+LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext $(call ld-option, --apply-dynamic-relocs)
endif
cflags-y += $(call cc-option, -mno-check-zero-division)
@@ -142,6 +143,8 @@ vdso-install-y += arch/loongarch/vdso/vdso.so.dbg
all: $(notdir $(KBUILD_IMAGE))
+vmlinuz.efi: vmlinux.efi
+
vmlinux.elf vmlinux.efi vmlinuz.efi: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $(boot)/$@
diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h
index c9544f358c33..655db7d7a427 100644
--- a/arch/loongarch/include/asm/asmmacro.h
+++ b/arch/loongarch/include/asm/asmmacro.h
@@ -609,8 +609,7 @@
lu32i.d \reg, 0
lu52i.d \reg, \reg, 0
.pushsection ".la_abs", "aw", %progbits
- 768:
- .dword 768b-766b
+ .dword 766b
.dword \sym
.popsection
#endif
diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h
index 091897d40b03..91d81f9730ab 100644
--- a/arch/loongarch/include/asm/efi.h
+++ b/arch/loongarch/include/asm/efi.h
@@ -32,6 +32,6 @@ static inline unsigned long efi_get_kimg_min_align(void)
#define EFI_KIMG_PREFERRED_ADDRESS PHYSADDR(VMLINUX_LOAD_ADDRESS)
-unsigned long kernel_entry_address(void);
+unsigned long kernel_entry_address(unsigned long kernel_addr);
#endif /* _ASM_LOONGARCH_EFI_H */
diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h
index b9a4ab54285c..9b16a3b8e706 100644
--- a/arch/loongarch/include/asm/elf.h
+++ b/arch/loongarch/include/asm/elf.h
@@ -293,7 +293,7 @@ extern const char *__elf_platform;
#define ELF_PLAT_INIT(_r, load_addr) do { \
_r->regs[1] = _r->regs[2] = _r->regs[3] = _r->regs[4] = 0; \
_r->regs[5] = _r->regs[6] = _r->regs[7] = _r->regs[8] = 0; \
- _r->regs[9] = _r->regs[10] = _r->regs[11] = _r->regs[12] = 0; \
+ _r->regs[9] = _r->regs[10] /* syscall n */ = _r->regs[12] = 0; \
_r->regs[13] = _r->regs[14] = _r->regs[15] = _r->regs[16] = 0; \
_r->regs[17] = _r->regs[18] = _r->regs[19] = _r->regs[20] = 0; \
_r->regs[21] = _r->regs[22] = _r->regs[23] = _r->regs[24] = 0; \
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index b108301c2e5a..2d62f7b0d377 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -45,7 +45,10 @@ struct kvm_vcpu_stat {
u64 signal_exits;
};
+#define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
+#define KVM_MEM_HUGEPAGE_INCAPABLE (1UL << 1)
struct kvm_arch_memory_slot {
+ unsigned long flags;
};
struct kvm_context {
@@ -92,8 +95,10 @@ enum emulation_result {
};
#define KVM_LARCH_FPU (0x1 << 0)
-#define KVM_LARCH_SWCSR_LATEST (0x1 << 1)
-#define KVM_LARCH_HWCSR_USABLE (0x1 << 2)
+#define KVM_LARCH_LSX (0x1 << 1)
+#define KVM_LARCH_LASX (0x1 << 2)
+#define KVM_LARCH_SWCSR_LATEST (0x1 << 3)
+#define KVM_LARCH_HWCSR_USABLE (0x1 << 4)
struct kvm_vcpu_arch {
/*
@@ -175,6 +180,21 @@ static inline void writel_sw_gcsr(struct loongarch_csrs *csr, int reg, unsigned
csr->csrs[reg] = val;
}
+static inline bool kvm_guest_has_fpu(struct kvm_vcpu_arch *arch)
+{
+ return arch->cpucfg[2] & CPUCFG2_FP;
+}
+
+static inline bool kvm_guest_has_lsx(struct kvm_vcpu_arch *arch)
+{
+ return arch->cpucfg[2] & CPUCFG2_LSX;
+}
+
+static inline bool kvm_guest_has_lasx(struct kvm_vcpu_arch *arch)
+{
+ return arch->cpucfg[2] & CPUCFG2_LASX;
+}
+
/* Debug: dump vcpu state */
int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
index 553cfa2b2b1c..e71ceb88f29e 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -55,7 +55,26 @@ void kvm_save_fpu(struct loongarch_fpu *fpu);
void kvm_restore_fpu(struct loongarch_fpu *fpu);
void kvm_restore_fcsr(struct loongarch_fpu *fpu);
-void kvm_acquire_timer(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_CPU_HAS_LSX
+int kvm_own_lsx(struct kvm_vcpu *vcpu);
+void kvm_save_lsx(struct loongarch_fpu *fpu);
+void kvm_restore_lsx(struct loongarch_fpu *fpu);
+#else
+static inline int kvm_own_lsx(struct kvm_vcpu *vcpu) { }
+static inline void kvm_save_lsx(struct loongarch_fpu *fpu) { }
+static inline void kvm_restore_lsx(struct loongarch_fpu *fpu) { }
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+int kvm_own_lasx(struct kvm_vcpu *vcpu);
+void kvm_save_lasx(struct loongarch_fpu *fpu);
+void kvm_restore_lasx(struct loongarch_fpu *fpu);
+#else
+static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { }
+static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { }
+static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { }
+#endif
+
void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz);
void kvm_reset_timer(struct kvm_vcpu *vcpu);
void kvm_save_timer(struct kvm_vcpu *vcpu);
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 9b4957cefa8a..46366e783c84 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -1098,12 +1098,11 @@
static __always_inline u64 drdtime(void)
{
- int rID = 0;
u64 val = 0;
__asm__ __volatile__(
- "rdtime.d %0, %1 \n\t"
- : "=r"(val), "=r"(rID)
+ "rdtime.d %0, $zero\n\t"
+ : "=r"(val)
:
);
return val;
diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h
index ed5da02b1cf6..9b36ac003f89 100644
--- a/arch/loongarch/include/asm/percpu.h
+++ b/arch/loongarch/include/asm/percpu.h
@@ -40,13 +40,13 @@ static __always_inline unsigned long __percpu_##op(void *ptr, \
switch (size) { \
case 4: \
__asm__ __volatile__( \
- "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \
+ "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \
: [ret] "=&r" (ret), [ptr] "+ZB"(*(u32 *)ptr) \
: [val] "r" (val)); \
break; \
case 8: \
__asm__ __volatile__( \
- "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \
+ "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \
: [ret] "=&r" (ret), [ptr] "+ZB"(*(u64 *)ptr) \
: [val] "r" (val)); \
break; \
@@ -63,7 +63,7 @@ PERCPU_OP(and, and, &)
PERCPU_OP(or, or, |)
#undef PERCPU_OP
-static __always_inline unsigned long __percpu_read(void *ptr, int size)
+static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size)
{
unsigned long ret;
@@ -100,7 +100,7 @@ static __always_inline unsigned long __percpu_read(void *ptr, int size)
return ret;
}
-static __always_inline void __percpu_write(void *ptr, unsigned long val, int size)
+static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size)
{
switch (size) {
case 1:
@@ -132,8 +132,7 @@ static __always_inline void __percpu_write(void *ptr, unsigned long val, int siz
}
}
-static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
- int size)
+static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size)
{
switch (size) {
case 1:
diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h
index a0bc159ce8bd..ee52fb1e9963 100644
--- a/arch/loongarch/include/asm/setup.h
+++ b/arch/loongarch/include/asm/setup.h
@@ -25,7 +25,7 @@ extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len
#ifdef CONFIG_RELOCATABLE
struct rela_la_abs {
- long offset;
+ long pc;
long symvalue;
};
diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h
index c6ad2ee6106c..923d0bd38294 100644
--- a/arch/loongarch/include/uapi/asm/kvm.h
+++ b/arch/loongarch/include/uapi/asm/kvm.h
@@ -79,6 +79,7 @@ struct kvm_fpu {
#define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
#define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
#define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
+#define KVM_LOONGARCH_VCPU_CPUCFG 0
struct kvm_debug_exit_arch {
};
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 4fcc168f0732..3c808c680370 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o
obj-$(CONFIG_RELOCATABLE) += relocate.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index d53ab10f4644..4382e36ae3d4 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -349,6 +349,7 @@ SYM_FUNC_START(_restore_lsx_upper)
lsx_restore_all_upper a0 t0 t1
jr ra
SYM_FUNC_END(_restore_lsx_upper)
+EXPORT_SYMBOL(_restore_lsx_upper)
SYM_FUNC_START(_init_lsx_upper)
lsx_init_all_upper t1
@@ -384,6 +385,7 @@ SYM_FUNC_START(_restore_lasx_upper)
lasx_restore_all_upper a0 t0 t1
jr ra
SYM_FUNC_END(_restore_lasx_upper)
+EXPORT_SYMBOL(_restore_lasx_upper)
SYM_FUNC_START(_init_lasx_upper)
lasx_init_all_upper t1
diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c
index 6c3eff9af9fb..1acfa704c8d0 100644
--- a/arch/loongarch/kernel/relocate.c
+++ b/arch/loongarch/kernel/relocate.c
@@ -52,7 +52,7 @@ static inline void __init relocate_absolute(long random_offset)
for (p = begin; (void *)p < end; p++) {
long v = p->symvalue;
uint32_t lu12iw, ori, lu32id, lu52id;
- union loongarch_instruction *insn = (void *)p - p->offset;
+ union loongarch_instruction *insn = (void *)p->pc;
lu12iw = (v >> 12) & 0xfffff;
ori = v & 0xfff;
@@ -102,6 +102,14 @@ static inline __init unsigned long get_random_boot(void)
return hash;
}
+static int __init nokaslr(char *p)
+{
+ pr_info("KASLR is disabled.\n");
+
+ return 0; /* Print a notice and silence the boot warning */
+}
+early_param("nokaslr", nokaslr);
+
static inline __init bool kaslr_disabled(void)
{
char *str;
diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c
index 92270f14db94..f623feb2129f 100644
--- a/arch/loongarch/kernel/stacktrace.c
+++ b/arch/loongarch/kernel/stacktrace.c
@@ -32,7 +32,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
}
for (unwind_start(&state, task, regs);
- !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
+ !unwind_done(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
if (!addr || !consume_entry(cookie, addr))
break;
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index 3064af94db9c..e7015f7b70e3 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -58,14 +58,16 @@ static int constant_set_state_oneshot(struct clock_event_device *evt)
return 0;
}
-static int constant_set_state_oneshot_stopped(struct clock_event_device *evt)
+static int constant_set_state_periodic(struct clock_event_device *evt)
{
+ unsigned long period;
unsigned long timer_config;
raw_spin_lock(&state_lock);
- timer_config = csr_read64(LOONGARCH_CSR_TCFG);
- timer_config &= ~CSR_TCFG_EN;
+ period = const_clock_freq / HZ;
+ timer_config = period & CSR_TCFG_VAL;
+ timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN);
csr_write64(timer_config, LOONGARCH_CSR_TCFG);
raw_spin_unlock(&state_lock);
@@ -73,16 +75,14 @@ static int constant_set_state_oneshot_stopped(struct clock_event_device *evt)
return 0;
}
-static int constant_set_state_periodic(struct clock_event_device *evt)
+static int constant_set_state_shutdown(struct clock_event_device *evt)
{
- unsigned long period;
unsigned long timer_config;
raw_spin_lock(&state_lock);
- period = const_clock_freq / HZ;
- timer_config = period & CSR_TCFG_VAL;
- timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN);
+ timer_config = csr_read64(LOONGARCH_CSR_TCFG);
+ timer_config &= ~CSR_TCFG_EN;
csr_write64(timer_config, LOONGARCH_CSR_TCFG);
raw_spin_unlock(&state_lock);
@@ -90,11 +90,6 @@ static int constant_set_state_periodic(struct clock_event_device *evt)
return 0;
}
-static int constant_set_state_shutdown(struct clock_event_device *evt)
-{
- return 0;
-}
-
static int constant_timer_next_event(unsigned long delta, struct clock_event_device *evt)
{
unsigned long timer_config;
@@ -161,7 +156,7 @@ int constant_clockevent_init(void)
cd->rating = 320;
cd->cpumask = cpumask_of(cpu);
cd->set_state_oneshot = constant_set_state_oneshot;
- cd->set_state_oneshot_stopped = constant_set_state_oneshot_stopped;
+ cd->set_state_oneshot_stopped = constant_set_state_shutdown;
cd->set_state_periodic = constant_set_state_periodic;
cd->set_state_shutdown = constant_set_state_shutdown;
cd->set_next_event = constant_timer_next_event;
diff --git a/arch/loongarch/kernel/unwind.c b/arch/loongarch/kernel/unwind.c
index ba324ba76fa1..a463d6961344 100644
--- a/arch/loongarch/kernel/unwind.c
+++ b/arch/loongarch/kernel/unwind.c
@@ -28,6 +28,5 @@ bool default_next_frame(struct unwind_state *state)
} while (!get_stack_info(state->sp, state->task, info));
- state->error = true;
return false;
}
diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
index 55afc27320e1..929ae240280a 100644
--- a/arch/loongarch/kernel/unwind_prologue.c
+++ b/arch/loongarch/kernel/unwind_prologue.c
@@ -227,7 +227,7 @@ static bool next_frame(struct unwind_state *state)
} while (!get_stack_info(state->sp, state->task, info));
out:
- state->error = true;
+ state->stack_info.type = STACK_TYPE_UNKNOWN;
return false;
}
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
index f22bae89b07d..61f7e33b1f95 100644
--- a/arch/loongarch/kvm/Kconfig
+++ b/arch/loongarch/kvm/Kconfig
@@ -22,14 +22,13 @@ config KVM
depends on AS_HAS_LVZ_EXTENSION
depends on HAVE_KVM
select HAVE_KVM_DIRTY_RING_ACQ_REL
- select HAVE_KVM_EVENTFD
select HAVE_KVM_VCPU_ASYNC_IOCTL
+ select KVM_COMMON
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_GENERIC_MMU_NOTIFIER
select KVM_MMIO
select KVM_XFER_TO_GUEST_WORK
- select PREEMPT_NOTIFIERS
help
Support hosting virtualized guest machines using
hardware virtualization extensions. You will need
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index ce8de3fa472c..ed1d89d53e2e 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -200,17 +200,8 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
++vcpu->stat.idle_exits;
trace_kvm_exit_idle(vcpu, KVM_TRACE_EXIT_IDLE);
- if (!kvm_arch_vcpu_runnable(vcpu)) {
- /*
- * Switch to the software timer before halt-polling/blocking as
- * the guest's timer may be a break event for the vCPU, and the
- * hypervisor timer runs only when the CPU is in guest mode.
- * Switch before halt-polling so that KVM recognizes an expired
- * timer before blocking.
- */
- kvm_save_timer(vcpu);
- kvm_vcpu_block(vcpu);
- }
+ if (!kvm_arch_vcpu_runnable(vcpu))
+ kvm_vcpu_halt(vcpu);
return EMULATE_DONE;
}
@@ -643,6 +634,11 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
+ if (!kvm_guest_has_fpu(&vcpu->arch)) {
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+ return RESUME_GUEST;
+ }
+
/*
* If guest FPU not present, the FPU operation should have been
* treated as a reserved instruction!
@@ -660,6 +656,36 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
}
/*
+ * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root.
+ * @vcpu: Virtual CPU context.
+ *
+ * Handle when the guest attempts to use LSX when it is disabled in the root
+ * context.
+ */
+static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu)
+{
+ if (kvm_own_lsx(vcpu))
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+
+ return RESUME_GUEST;
+}
+
+/*
+ * kvm_handle_lasx_disabled() - Guest used LASX while disabled in root.
+ * @vcpu: Virtual CPU context.
+ *
+ * Handle when the guest attempts to use LASX when it is disabled in the root
+ * context.
+ */
+static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
+{
+ if (kvm_own_lasx(vcpu))
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+
+ return RESUME_GUEST;
+}
+
+/*
* LoongArch KVM callback handling for unimplemented guest exiting
*/
static int kvm_fault_ni(struct kvm_vcpu *vcpu)
@@ -687,6 +713,8 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = {
[EXCCODE_TLBS] = kvm_handle_write_fault,
[EXCCODE_TLBM] = kvm_handle_write_fault,
[EXCCODE_FPDIS] = kvm_handle_fpu_disabled,
+ [EXCCODE_LSXDIS] = kvm_handle_lsx_disabled,
+ [EXCCODE_LASXDIS] = kvm_handle_lasx_disabled,
[EXCCODE_GSPR] = kvm_handle_gspr,
};
diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c
index 1c1d5199500e..86a2f2d0cb27 100644
--- a/arch/loongarch/kvm/main.c
+++ b/arch/loongarch/kvm/main.c
@@ -287,7 +287,6 @@ int kvm_arch_hardware_enable(void)
if (env & CSR_GCFG_MATC_ROOT)
gcfg |= CSR_GCFG_MATC_ROOT;
- gcfg |= CSR_GCFG_TIT;
write_csr_gcfg(gcfg);
kvm_flush_tlb_all();
diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
index 80480df5f550..915f17527893 100644
--- a/arch/loongarch/kvm/mmu.c
+++ b/arch/loongarch/kvm/mmu.c
@@ -13,6 +13,16 @@
#include <asm/tlb.h>
#include <asm/kvm_mmu.h>
+static inline bool kvm_hugepage_capable(struct kvm_memory_slot *slot)
+{
+ return slot->arch.flags & KVM_MEM_HUGEPAGE_CAPABLE;
+}
+
+static inline bool kvm_hugepage_incapable(struct kvm_memory_slot *slot)
+{
+ return slot->arch.flags & KVM_MEM_HUGEPAGE_INCAPABLE;
+}
+
static inline void kvm_ptw_prepare(struct kvm *kvm, kvm_ptw_ctx *ctx)
{
ctx->level = kvm->arch.root_level;
@@ -365,6 +375,69 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_ptw_top(kvm->arch.pgd, start << PAGE_SHIFT, end << PAGE_SHIFT, &ctx);
}
+int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new, enum kvm_mr_change change)
+{
+ gpa_t gpa_start;
+ hva_t hva_start;
+ size_t size, gpa_offset, hva_offset;
+
+ if ((change != KVM_MR_MOVE) && (change != KVM_MR_CREATE))
+ return 0;
+ /*
+ * Prevent userspace from creating a memory region outside of the
+ * VM GPA address space
+ */
+ if ((new->base_gfn + new->npages) > (kvm->arch.gpa_size >> PAGE_SHIFT))
+ return -ENOMEM;
+
+ new->arch.flags = 0;
+ size = new->npages * PAGE_SIZE;
+ gpa_start = new->base_gfn << PAGE_SHIFT;
+ hva_start = new->userspace_addr;
+ if (IS_ALIGNED(size, PMD_SIZE) && IS_ALIGNED(gpa_start, PMD_SIZE)
+ && IS_ALIGNED(hva_start, PMD_SIZE))
+ new->arch.flags |= KVM_MEM_HUGEPAGE_CAPABLE;
+ else {
+ /*
+ * Pages belonging to memslots that don't have the same
+ * alignment within a PMD for userspace and GPA cannot be
+ * mapped with PMD entries, because we'll end up mapping
+ * the wrong pages.
+ *
+ * Consider a layout like the following:
+ *
+ * memslot->userspace_addr:
+ * +-----+--------------------+--------------------+---+
+ * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
+ * +-----+--------------------+--------------------+---+
+ *
+ * memslot->base_gfn << PAGE_SIZE:
+ * +---+--------------------+--------------------+-----+
+ * |abc|def Stage-2 block | Stage-2 block |tvxyz|
+ * +---+--------------------+--------------------+-----+
+ *
+ * If we create those stage-2 blocks, we'll end up with this
+ * incorrect mapping:
+ * d -> f
+ * e -> g
+ * f -> h
+ */
+ gpa_offset = gpa_start & (PMD_SIZE - 1);
+ hva_offset = hva_start & (PMD_SIZE - 1);
+ if (gpa_offset != hva_offset) {
+ new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE;
+ } else {
+ if (gpa_offset == 0)
+ gpa_offset = PMD_SIZE;
+ if ((size + gpa_offset) < (PMD_SIZE * 2))
+ new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE;
+ }
+ }
+
+ return 0;
+}
+
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
@@ -562,47 +635,23 @@ out:
}
static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
- unsigned long hva, unsigned long map_size, bool write)
+ unsigned long hva, bool write)
{
- size_t size;
- gpa_t gpa_start;
- hva_t uaddr_start, uaddr_end;
+ hva_t start, end;
/* Disable dirty logging on HugePages */
if (kvm_slot_dirty_track_enabled(memslot) && write)
return false;
- size = memslot->npages * PAGE_SIZE;
- gpa_start = memslot->base_gfn << PAGE_SHIFT;
- uaddr_start = memslot->userspace_addr;
- uaddr_end = uaddr_start + size;
+ if (kvm_hugepage_capable(memslot))
+ return true;
- /*
- * Pages belonging to memslots that don't have the same alignment
- * within a PMD for userspace and GPA cannot be mapped with stage-2
- * PMD entries, because we'll end up mapping the wrong pages.
- *
- * Consider a layout like the following:
- *
- * memslot->userspace_addr:
- * +-----+--------------------+--------------------+---+
- * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
- * +-----+--------------------+--------------------+---+
- *
- * memslot->base_gfn << PAGE_SIZE:
- * +---+--------------------+--------------------+-----+
- * |abc|def Stage-2 block | Stage-2 block |tvxyz|
- * +---+--------------------+--------------------+-----+
- *
- * If we create those stage-2 blocks, we'll end up with this incorrect
- * mapping:
- * d -> f
- * e -> g
- * f -> h
- */
- if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
+ if (kvm_hugepage_incapable(memslot))
return false;
+ start = memslot->userspace_addr;
+ end = start + memslot->npages * PAGE_SIZE;
+
/*
* Next, let's make sure we're not trying to map anything not covered
* by the memslot. This means we have to prohibit block size mappings
@@ -615,8 +664,7 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
* userspace_addr or the base_gfn, as both are equally aligned (per
* the check above) and equally sized.
*/
- return (hva & ~(map_size - 1)) >= uaddr_start &&
- (hva & ~(map_size - 1)) + map_size <= uaddr_end;
+ return (hva >= ALIGN(start, PMD_SIZE)) && (hva < ALIGN_DOWN(end, PMD_SIZE));
}
/*
@@ -842,7 +890,7 @@ retry:
/* Disable dirty logging on HugePages */
level = 0;
- if (!fault_supports_huge_mapping(memslot, hva, PMD_SIZE, write)) {
+ if (!fault_supports_huge_mapping(memslot, hva, write)) {
level = 0;
} else {
level = host_pfn_mapping_level(kvm, gfn, memslot);
@@ -901,12 +949,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
}
-int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old,
- struct kvm_memory_slot *new, enum kvm_mr_change change)
-{
- return 0;
-}
-
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
index 0ed9040307b7..ba976509bfe8 100644
--- a/arch/loongarch/kvm/switch.S
+++ b/arch/loongarch/kvm/switch.S
@@ -245,6 +245,37 @@ SYM_FUNC_START(kvm_restore_fpu)
jr ra
SYM_FUNC_END(kvm_restore_fpu)
+#ifdef CONFIG_CPU_HAS_LSX
+SYM_FUNC_START(kvm_save_lsx)
+ fpu_save_csr a0 t1
+ fpu_save_cc a0 t1 t2
+ lsx_save_data a0 t1
+ jr ra
+SYM_FUNC_END(kvm_save_lsx)
+
+SYM_FUNC_START(kvm_restore_lsx)
+ lsx_restore_data a0 t1
+ fpu_restore_cc a0 t1 t2
+ fpu_restore_csr a0 t1 t2
+ jr ra
+SYM_FUNC_END(kvm_restore_lsx)
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+SYM_FUNC_START(kvm_save_lasx)
+ fpu_save_csr a0 t1
+ fpu_save_cc a0 t1 t2
+ lasx_save_data a0 t1
+ jr ra
+SYM_FUNC_END(kvm_save_lasx)
+
+SYM_FUNC_START(kvm_restore_lasx)
+ lasx_restore_data a0 t1
+ fpu_restore_cc a0 t1 t2
+ fpu_restore_csr a0 t1 t2
+ jr ra
+SYM_FUNC_END(kvm_restore_lasx)
+#endif
.section ".rodata"
SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry)
SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest)
diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c
index 284bf553fefe..111328f60872 100644
--- a/arch/loongarch/kvm/timer.c
+++ b/arch/loongarch/kvm/timer.c
@@ -65,40 +65,23 @@ void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long timer_hz)
}
/*
- * Restore hard timer state and enable guest to access timer registers
- * without trap, should be called with irq disabled
- */
-void kvm_acquire_timer(struct kvm_vcpu *vcpu)
-{
- unsigned long cfg;
-
- cfg = read_csr_gcfg();
- if (!(cfg & CSR_GCFG_TIT))
- return;
-
- /* Enable guest access to hard timer */
- write_csr_gcfg(cfg & ~CSR_GCFG_TIT);
-
- /*
- * Freeze the soft-timer and sync the guest stable timer with it. We do
- * this with interrupts disabled to avoid latency.
- */
- hrtimer_cancel(&vcpu->arch.swtimer);
-}
-
-/*
* Restore soft timer state from saved context.
*/
void kvm_restore_timer(struct kvm_vcpu *vcpu)
{
- unsigned long cfg, delta, period;
+ unsigned long cfg, estat;
+ unsigned long ticks, delta, period;
ktime_t expire, now;
struct loongarch_csrs *csr = vcpu->arch.csr;
/*
* Set guest stable timer cfg csr
+ * Disable timer before restore estat CSR register, avoid to
+ * get invalid timer interrupt for old timer cfg
*/
cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG);
+
+ write_gcsr_timercfg(0);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
if (!(cfg & CSR_TCFG_EN)) {
@@ -108,23 +91,55 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu)
}
/*
+ * Freeze the soft-timer and sync the guest stable timer with it.
+ */
+ hrtimer_cancel(&vcpu->arch.swtimer);
+
+ /*
+ * From LoongArch Reference Manual Volume 1 Chapter 7.6.2
+ * If oneshot timer is fired, CSR TVAL will be -1, there are two
+ * conditions:
+ * 1) timer is fired during exiting to host
+ * 2) timer is fired and vm is doing timer irq, and then exiting to
+ * host. Host should not inject timer irq to avoid spurious
+ * timer interrupt again
+ */
+ ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL);
+ estat = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+ if (!(cfg & CSR_TCFG_PERIOD) && (ticks > cfg)) {
+ /*
+ * Writing 0 to LOONGARCH_CSR_TVAL will inject timer irq
+ * and set CSR TVAL with -1
+ */
+ write_gcsr_timertick(0);
+
+ /*
+ * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear
+ * timer interrupt, and CSR TVAL keeps unchanged with -1, it
+ * avoids spurious timer interrupt
+ */
+ if (!(estat & CPU_TIMER))
+ gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ return;
+ }
+
+ /*
* Set remainder tick value if not expired
*/
+ delta = 0;
now = ktime_get();
expire = vcpu->arch.expire;
if (ktime_before(now, expire))
delta = ktime_to_tick(vcpu, ktime_sub(expire, now));
- else {
- if (cfg & CSR_TCFG_PERIOD) {
- period = cfg & CSR_TCFG_VAL;
- delta = ktime_to_tick(vcpu, ktime_sub(now, expire));
- delta = period - (delta % period);
- } else
- delta = 0;
+ else if (cfg & CSR_TCFG_PERIOD) {
+ period = cfg & CSR_TCFG_VAL;
+ delta = ktime_to_tick(vcpu, ktime_sub(now, expire));
+ delta = period - (delta % period);
+
/*
* Inject timer here though sw timer should inject timer
* interrupt async already, since sw timer may be cancelled
- * during injecting intr async in function kvm_acquire_timer
+ * during injecting intr async
*/
kvm_queue_irq(vcpu, INT_TI);
}
@@ -139,27 +154,41 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu)
*/
static void _kvm_save_timer(struct kvm_vcpu *vcpu)
{
- unsigned long ticks, delta;
+ unsigned long ticks, delta, cfg;
ktime_t expire;
struct loongarch_csrs *csr = vcpu->arch.csr;
+ cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG);
ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL);
- delta = tick_to_ns(vcpu, ticks);
- expire = ktime_add_ns(ktime_get(), delta);
- vcpu->arch.expire = expire;
- if (ticks) {
+
+ /*
+ * From LoongArch Reference Manual Volume 1 Chapter 7.6.2
+ * If period timer is fired, CSR TVAL will be reloaded from CSR TCFG
+ * If oneshot timer is fired, CSR TVAL will be -1
+ * Here judge one-shot timer fired by checking whether TVAL is larger
+ * than TCFG
+ */
+ if (ticks < cfg) {
+ delta = tick_to_ns(vcpu, ticks);
+ expire = ktime_add_ns(ktime_get(), delta);
+ vcpu->arch.expire = expire;
+
/*
- * Update hrtimer to use new timeout
* HRTIMER_MODE_PINNED is suggested since vcpu may run in
* the same physical cpu in next time
*/
- hrtimer_cancel(&vcpu->arch.swtimer);
hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
- } else
+ } else if (vcpu->stat.generic.blocking) {
/*
- * Inject timer interrupt so that hall polling can dectect and exit
+ * Inject timer interrupt so that halt polling can dectect and exit.
+ * VCPU is scheduled out already and sleeps in rcuwait queue and
+ * will not poll pending events again. kvm_queue_irq() is not enough,
+ * hrtimer swtimer should be used here.
*/
- kvm_queue_irq(vcpu, INT_TI);
+ expire = ktime_add_ns(ktime_get(), 10);
+ vcpu->arch.expire = expire;
+ hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
+ }
}
/*
@@ -168,21 +197,15 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu)
*/
void kvm_save_timer(struct kvm_vcpu *vcpu)
{
- unsigned long cfg;
struct loongarch_csrs *csr = vcpu->arch.csr;
preempt_disable();
- cfg = read_csr_gcfg();
- if (!(cfg & CSR_GCFG_TIT)) {
- /* Disable guest use of hard timer */
- write_csr_gcfg(cfg | CSR_GCFG_TIT);
-
- /* Save hard timer state */
- kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
- kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL);
- if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN)
- _kvm_save_timer(vcpu);
- }
+
+ /* Save hard timer state */
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL);
+ if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN)
+ _kvm_save_timer(vcpu);
/* Save timer-related state to vCPU context */
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h
index a1e35d655418..c2484ad4cffa 100644
--- a/arch/loongarch/kvm/trace.h
+++ b/arch/loongarch/kvm/trace.h
@@ -102,6 +102,8 @@ TRACE_EVENT(kvm_exit_gspr,
#define KVM_TRACE_AUX_DISCARD 4
#define KVM_TRACE_AUX_FPU 1
+#define KVM_TRACE_AUX_LSX 2
+#define KVM_TRACE_AUX_LASX 3
#define kvm_trace_symbol_aux_op \
{ KVM_TRACE_AUX_SAVE, "save" }, \
@@ -111,7 +113,9 @@ TRACE_EVENT(kvm_exit_gspr,
{ KVM_TRACE_AUX_DISCARD, "discard" }
#define kvm_trace_symbol_aux_state \
- { KVM_TRACE_AUX_FPU, "FPU" }
+ { KVM_TRACE_AUX_FPU, "FPU" }, \
+ { KVM_TRACE_AUX_LSX, "LSX" }, \
+ { KVM_TRACE_AUX_LASX, "LASX" }
TRACE_EVENT(kvm_aux,
TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op,
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 73d0c2b9c1a5..27701991886d 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -95,7 +95,6 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
* check vmid before vcpu enter guest
*/
local_irq_disable();
- kvm_acquire_timer(vcpu);
kvm_deliver_intr(vcpu);
kvm_deliver_exception(vcpu);
/* Make sure the vcpu mode has been written */
@@ -187,8 +186,15 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return kvm_pending_timer(vcpu) ||
+ int ret;
+
+ /* Protect from TOD sync and vcpu_load/put() */
+ preempt_disable();
+ ret = kvm_pending_timer(vcpu) ||
kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT) & (1 << INT_TI);
+ preempt_enable();
+
+ return ret;
}
int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
@@ -244,23 +250,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
return -EINVAL;
}
-/**
- * kvm_migrate_count() - Migrate timer.
- * @vcpu: Virtual CPU.
- *
- * Migrate hrtimer to the current CPU by cancelling and restarting it
- * if the hrtimer is active.
- *
- * Must be called when the vCPU is migrated to a different CPU, so that
- * the timer can interrupt the guest at the new CPU, and the timer irq can
- * be delivered to the vCPU.
- */
-static void kvm_migrate_count(struct kvm_vcpu *vcpu)
-{
- if (hrtimer_cancel(&vcpu->arch.swtimer))
- hrtimer_restart(&vcpu->arch.swtimer);
-}
-
static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
{
unsigned long gintc;
@@ -309,6 +298,76 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
return ret;
}
+static int _kvm_get_cpucfg(int id, u64 *v)
+{
+ int ret = 0;
+
+ if (id < 0 && id >= KVM_MAX_CPUCFG_REGS)
+ return -EINVAL;
+
+ switch (id) {
+ case 2:
+ /* Return CPUCFG2 features which have been supported by KVM */
+ *v = CPUCFG2_FP | CPUCFG2_FPSP | CPUCFG2_FPDP |
+ CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV |
+ CPUCFG2_LAM;
+ /*
+ * If LSX is supported by CPU, it is also supported by KVM,
+ * as we implement it.
+ */
+ if (cpu_has_lsx)
+ *v |= CPUCFG2_LSX;
+ /*
+ * if LASX is supported by CPU, it is also supported by KVM,
+ * as we implement it.
+ */
+ if (cpu_has_lasx)
+ *v |= CPUCFG2_LASX;
+
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
+static int kvm_check_cpucfg(int id, u64 val)
+{
+ u64 mask;
+ int ret = 0;
+
+ if (id < 0 && id >= KVM_MAX_CPUCFG_REGS)
+ return -EINVAL;
+
+ if (_kvm_get_cpucfg(id, &mask))
+ return ret;
+
+ switch (id) {
+ case 2:
+ /* CPUCFG2 features checking */
+ if (val & ~mask)
+ /* The unsupported features should not be set */
+ ret = -EINVAL;
+ else if (!(val & CPUCFG2_LLFTP))
+ /* The LLFTP must be set, as guest must has a constant timer */
+ ret = -EINVAL;
+ else if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP)))
+ /* Single and double float point must both be set when enable FP */
+ ret = -EINVAL;
+ else if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP))
+ /* FP should be set when enable LSX */
+ ret = -EINVAL;
+ else if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX))
+ /* LSX, FP should be set when enable LASX, and FP has been checked before. */
+ ret = -EINVAL;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
static int kvm_get_one_reg(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg, u64 *v)
{
@@ -378,10 +437,10 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu,
break;
case KVM_REG_LOONGARCH_CPUCFG:
id = KVM_GET_IOC_CPUCFG_IDX(reg->id);
- if (id >= 0 && id < KVM_MAX_CPUCFG_REGS)
- vcpu->arch.cpucfg[id] = (u32)v;
- else
- ret = -EINVAL;
+ ret = kvm_check_cpucfg(id, v);
+ if (ret)
+ break;
+ vcpu->arch.cpucfg[id] = (u32)v;
break;
case KVM_REG_LOONGARCH_KVM:
switch (reg->id) {
@@ -471,10 +530,94 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case 2:
+ return 0;
+ default:
+ return -ENXIO;
+ }
+
+ return -ENXIO;
+}
+
+static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_LOONGARCH_VCPU_CPUCFG:
+ ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = 0;
+ uint64_t val;
+ uint64_t __user *uaddr = (uint64_t __user *)attr->addr;
+
+ ret = _kvm_get_cpucfg(attr->attr, &val);
+ if (ret)
+ return ret;
+
+ put_user(val, uaddr);
+
+ return ret;
+}
+
+static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_LOONGARCH_VCPU_CPUCFG:
+ ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ return -ENXIO;
+}
+
+static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_LOONGARCH_VCPU_CPUCFG:
+ ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
long r;
+ struct kvm_device_attr attr;
void __user *argp = (void __user *)arg;
struct kvm_vcpu *vcpu = filp->private_data;
@@ -514,6 +657,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break;
}
+ case KVM_HAS_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ break;
+ r = kvm_loongarch_vcpu_has_attr(vcpu, &attr);
+ break;
+ }
+ case KVM_GET_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ break;
+ r = kvm_loongarch_vcpu_get_attr(vcpu, &attr);
+ break;
+ }
+ case KVM_SET_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ break;
+ r = kvm_loongarch_vcpu_set_attr(vcpu, &attr);
+ break;
+ }
default:
r = -ENOIOCTLCMD;
break;
@@ -561,12 +725,96 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
preempt_enable();
}
+#ifdef CONFIG_CPU_HAS_LSX
+/* Enable LSX and restore context */
+int kvm_own_lsx(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_guest_has_fpu(&vcpu->arch) || !kvm_guest_has_lsx(&vcpu->arch))
+ return -EINVAL;
+
+ preempt_disable();
+
+ /* Enable LSX for guest */
+ set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
+ switch (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
+ case KVM_LARCH_FPU:
+ /*
+ * Guest FPU state already loaded,
+ * only restore upper LSX state
+ */
+ _restore_lsx_upper(&vcpu->arch.fpu);
+ break;
+ default:
+ /* Neither FP or LSX already active,
+ * restore full LSX state
+ */
+ kvm_restore_lsx(&vcpu->arch.fpu);
+ break;
+ }
+
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
+ vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
+ preempt_enable();
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+/* Enable LASX and restore context */
+int kvm_own_lasx(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_guest_has_fpu(&vcpu->arch) || !kvm_guest_has_lsx(&vcpu->arch) || !kvm_guest_has_lasx(&vcpu->arch))
+ return -EINVAL;
+
+ preempt_disable();
+
+ set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
+ switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
+ case KVM_LARCH_LSX:
+ case KVM_LARCH_LSX | KVM_LARCH_FPU:
+ /* Guest LSX state already loaded, only restore upper LASX state */
+ _restore_lasx_upper(&vcpu->arch.fpu);
+ break;
+ case KVM_LARCH_FPU:
+ /* Guest FP state already loaded, only restore upper LSX & LASX state */
+ _restore_lsx_upper(&vcpu->arch.fpu);
+ _restore_lasx_upper(&vcpu->arch.fpu);
+ break;
+ default:
+ /* Neither FP or LSX already active, restore full LASX state */
+ kvm_restore_lasx(&vcpu->arch.fpu);
+ break;
+ }
+
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
+ vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
+ preempt_enable();
+
+ return 0;
+}
+#endif
+
/* Save context and disable FPU */
void kvm_lose_fpu(struct kvm_vcpu *vcpu)
{
preempt_disable();
- if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
+ if (vcpu->arch.aux_inuse & KVM_LARCH_LASX) {
+ kvm_save_lasx(&vcpu->arch.fpu);
+ vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU | KVM_LARCH_LASX);
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LASX);
+
+ /* Disable LASX & LSX & FPU */
+ clear_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
+ } else if (vcpu->arch.aux_inuse & KVM_LARCH_LSX) {
+ kvm_save_lsx(&vcpu->arch.fpu);
+ vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU);
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LSX);
+
+ /* Disable LSX & FPU */
+ clear_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN);
+ } else if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
kvm_save_fpu(&vcpu->arch.fpu);
vcpu->arch.aux_inuse &= ~KVM_LARCH_FPU;
trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU);
@@ -789,17 +1037,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
unsigned long flags;
local_irq_save(flags);
- if (vcpu->arch.last_sched_cpu != cpu) {
- kvm_debug("[%d->%d]KVM vCPU[%d] switch\n",
- vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
- /*
- * Migrate the timer interrupt to the current CPU so that it
- * always interrupts the guest and synchronously triggers a
- * guest timer interrupt.
- */
- kvm_migrate_count(vcpu);
- }
-
/* Restore guest state to registers */
_kvm_vcpu_load(vcpu, cpu);
local_irq_restore(flags);
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index 71d0539e2d0b..2aae72e63871 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -13,13 +13,13 @@ struct page *dmw_virt_to_page(unsigned long kaddr)
{
return pfn_to_page(virt_to_pfn(kaddr));
}
-EXPORT_SYMBOL_GPL(dmw_virt_to_page);
+EXPORT_SYMBOL(dmw_virt_to_page);
struct page *tlb_virt_to_page(unsigned long kaddr)
{
return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
}
-EXPORT_SYMBOL_GPL(tlb_virt_to_page);
+EXPORT_SYMBOL(tlb_virt_to_page);
pgd_t *pgd_alloc(struct mm_struct *mm)
{
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 169ff8b3915e..4fcd6cd6da23 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -480,10 +480,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
case 8:
move_reg(ctx, t1, src);
emit_insn(ctx, extwb, dst, t1);
+ emit_zext_32(ctx, dst, is32);
break;
case 16:
move_reg(ctx, t1, src);
emit_insn(ctx, extwh, dst, t1);
+ emit_zext_32(ctx, dst, is32);
break;
case 32:
emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
@@ -772,8 +774,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
break;
case 32:
emit_insn(ctx, revb2w, dst, dst);
- /* zero-extend 32 bits into 64 bits */
- emit_zext_32(ctx, dst, is32);
+ /* clear the upper 32 bits */
+ emit_zext_32(ctx, dst, true);
break;
case 64:
emit_insn(ctx, revbd, dst, dst);
@@ -911,8 +913,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* function return */
case BPF_JMP | BPF_EXIT:
- emit_sext_32(ctx, regmap[BPF_REG_0], true);
-
if (i == ctx->prog->len - 1)
break;
@@ -988,14 +988,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
}
break;
case BPF_DW:
- if (is_signed_imm12(off)) {
- emit_insn(ctx, ldd, dst, src, off);
- } else if (is_signed_imm14(off)) {
- emit_insn(ctx, ldptrd, dst, src, off);
- } else {
- move_imm(ctx, t1, off, is32);
- emit_insn(ctx, ldxd, dst, src, t1);
- }
+ move_imm(ctx, t1, off, is32);
+ emit_insn(ctx, ldxd, dst, src, t1);
break;
}
diff --git a/arch/m68k/include/asm/kexec.h b/arch/m68k/include/asm/kexec.h
index f5a8b2defa4b..3b0b64f0a353 100644
--- a/arch/m68k/include/asm/kexec.h
+++ b/arch/m68k/include/asm/kexec.h
@@ -2,7 +2,7 @@
#ifndef _ASM_M68K_KEXEC_H
#define _ASM_M68K_KEXEC_H
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/* Maximum physical address we can use pages from */
#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
@@ -25,6 +25,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
#endif /* __ASSEMBLY__ */
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
#endif /* _ASM_M68K_KEXEC_H */
diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile
index 01fb69a5095f..f335bf3268a1 100644
--- a/arch/m68k/kernel/Makefile
+++ b/arch/m68k/kernel/Makefile
@@ -25,7 +25,7 @@ obj-$(CONFIG_PCI) += pcibios.o
obj-$(CONFIG_M68K_NONCOHERENT_DMA) += dma.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_BOOTINFO_PROC) += bootinfo_proc.o
obj-$(CONFIG_UBOOT) += uboot.o
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 76db82542519..797ae590ebdb 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -460,6 +460,7 @@ config MACH_LOONGSON2EF
config MACH_LOONGSON64
bool "Loongson 64-bit family of machines"
+ select ARCH_DMA_DEFAULT_COHERENT
select ARCH_SPARSEMEM_ENABLE
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
@@ -1251,6 +1252,7 @@ config CPU_LOONGSON64
select CPU_SUPPORTS_MSA
select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
select CPU_MIPSR2_IRQ_VI
+ select DMA_NONCOHERENT
select WEAK_ORDERING
select WEAK_REORDERING_BEYOND_LLSC
select MIPS_ASID_BITS_VARIABLE
diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
index f878f47e4501..ee3e2153dd13 100644
--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
+++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
@@ -130,8 +130,7 @@
compatible = "pci0014,7a03.0",
"pci0014,7a03",
"pciclass0c0320",
- "pciclass0c03",
- "loongson, pci-gmac";
+ "pciclass0c03";
reg = <0x1800 0x0 0x0 0x0 0x0>;
interrupts = <12 IRQ_TYPE_LEVEL_LOW>,
diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
index 7c69e8245c2f..cce9428afc41 100644
--- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
+++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
@@ -193,8 +193,7 @@
compatible = "pci0014,7a03.0",
"pci0014,7a03",
"pciclass020000",
- "pciclass0200",
- "loongson, pci-gmac";
+ "pciclass0200";
reg = <0x1800 0x0 0x0 0x0 0x0>;
interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 33c09688210f..08ea2cde1eb5 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -422,7 +422,7 @@ static const struct plat_smp_ops octeon_smp_ops = {
.cpu_disable = octeon_cpu_disable,
.cpu_die = octeon_cpu_die,
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
.kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
#endif
};
@@ -502,7 +502,7 @@ static const struct plat_smp_ops octeon_78xx_smp_ops = {
.cpu_disable = octeon_cpu_disable,
.cpu_die = octeon_cpu_die,
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
.kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
#endif
};
diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h
index d6d5fa5cc31d..69e579e41e66 100644
--- a/arch/mips/include/asm/kexec.h
+++ b/arch/mips/include/asm/kexec.h
@@ -31,7 +31,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
prepare_frametrace(newregs);
}
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
struct kimage;
extern unsigned long kexec_args[4];
extern int (*_machine_kexec_prepare)(struct kimage *);
diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h
index 035b1a69e2d0..e007edd6b60a 100644
--- a/arch/mips/include/asm/mach-loongson64/boot_param.h
+++ b/arch/mips/include/asm/mach-loongson64/boot_param.h
@@ -14,7 +14,11 @@
#define ADAPTER_ROM 8
#define ACPI_TABLE 9
#define SMBIOS_TABLE 10
-#define MAX_MEMORY_TYPE 11
+#define UMA_VIDEO_RAM 11
+#define VUMA_VIDEO_RAM 12
+#define MAX_MEMORY_TYPE 13
+
+#define MEM_SIZE_IS_IN_BYTES (1 << 31)
#define LOONGSON3_BOOT_MEM_MAP_MAX 128
struct efi_memory_map_loongson {
@@ -117,7 +121,8 @@ struct irq_source_routing_table {
u64 pci_io_start_addr;
u64 pci_io_end_addr;
u64 pci_config_addr;
- u32 dma_mask_bits;
+ u16 dma_mask_bits;
+ u16 dma_noncoherent;
} __packed;
struct interface_info {
diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h
index 5719ff49eff1..0c59e168f800 100644
--- a/arch/mips/include/asm/smp-ops.h
+++ b/arch/mips/include/asm/smp-ops.h
@@ -35,7 +35,7 @@ struct plat_smp_ops {
void (*cpu_die)(unsigned int cpu);
void (*cleanup_dead_cpu)(unsigned cpu);
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
void (*kexec_nonboot_cpu)(void);
#endif
};
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index a40d8c0e4b87..901bc61fa7ae 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -93,7 +93,7 @@ static inline void __cpu_die(unsigned int cpu)
extern void __noreturn play_dead(void);
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
static inline void kexec_nonboot_cpu(void)
{
extern const struct plat_smp_ops *mp_ops; /* private */
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 853a43ee4b44..ecf3278a32f7 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -90,7 +90,7 @@ obj-$(CONFIG_GPIO_TXX9) += gpio_txx9.o
obj-$(CONFIG_RELOCATABLE) += relocate.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
+obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_EARLY_PRINTK_8250) += early_printk_8250.o
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 5387ed0a5186..b630604c577f 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -121,6 +121,19 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
/* Put the stack after the struct pt_regs. */
childksp = (unsigned long) childregs;
p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK;
+
+ /*
+ * New tasks lose permission to use the fpu. This accelerates context
+ * switching for most programs since they don't use the fpu.
+ */
+ clear_tsk_thread_flag(p, TIF_USEDFPU);
+ clear_tsk_thread_flag(p, TIF_USEDMSA);
+ clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE);
+
+#ifdef CONFIG_MIPS_MT_FPAFF
+ clear_tsk_thread_flag(p, TIF_FPUBOUND);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
if (unlikely(args->fn)) {
/* kernel thread */
unsigned long status = p->thread.cp0_status;
@@ -149,20 +162,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
p->thread.reg29 = (unsigned long) childregs;
p->thread.reg31 = (unsigned long) ret_from_fork;
- /*
- * New tasks lose permission to use the fpu. This accelerates context
- * switching for most programs since they don't use the fpu.
- */
childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
- clear_tsk_thread_flag(p, TIF_USEDFPU);
- clear_tsk_thread_flag(p, TIF_USEDMSA);
- clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE);
-
-#ifdef CONFIG_MIPS_MT_FPAFF
- clear_tsk_thread_flag(p, TIF_FPUBOUND);
-#endif /* CONFIG_MIPS_MT_FPAFF */
-
#ifdef CONFIG_MIPS_FP_SUPPORT
atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE);
#endif
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index c074ecce3fbf..b3dbf9ecb0d6 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -434,7 +434,7 @@ const struct plat_smp_ops bmips43xx_smp_ops = {
.cpu_disable = bmips_cpu_disable,
.cpu_die = bmips_cpu_die,
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
.kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
#endif
};
@@ -451,7 +451,7 @@ const struct plat_smp_ops bmips5000_smp_ops = {
.cpu_disable = bmips_cpu_disable,
.cpu_die = bmips_cpu_die,
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
.kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
#endif
};
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index dd55d59b88db..f6c37d407f36 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -392,7 +392,7 @@ static void cps_smp_finish(void)
local_irq_enable();
}
-#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE)
enum cpu_death {
CPU_DEATH_HALT,
@@ -429,7 +429,7 @@ static void cps_shutdown_this_cpu(enum cpu_death death)
}
}
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
static void cps_kexec_nonboot_cpu(void)
{
@@ -439,9 +439,9 @@ static void cps_kexec_nonboot_cpu(void)
cps_shutdown_this_cpu(CPU_DEATH_POWER);
}
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
-#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC */
+#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC_CORE */
#ifdef CONFIG_HOTPLUG_CPU
@@ -610,7 +610,7 @@ static const struct plat_smp_ops cps_smp_ops = {
.cpu_die = cps_cpu_die,
.cleanup_dead_cpu = cps_cleanup_dead_cpu,
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
.kexec_nonboot_cpu = cps_kexec_nonboot_cpu,
#endif
};
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 8fbef537fb88..82e2e051b416 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -351,10 +351,11 @@ early_initcall(mips_smp_ipi_init);
*/
asmlinkage void start_secondary(void)
{
- unsigned int cpu;
+ unsigned int cpu = raw_smp_processor_id();
cpu_probe();
per_cpu_trap_init(false);
+ rcutree_report_cpu_starting(cpu);
mips_clockevent_init();
mp_ops->init_secondary();
cpu_report();
@@ -366,7 +367,6 @@ asmlinkage void start_secondary(void)
*/
calibrate_delay();
- cpu = smp_processor_id();
cpu_data[cpu].udelay_val = loops_per_jiffy;
set_cpu_sibling_map(cpu);
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index c04987d2ed2e..18e7a17d5115 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -20,13 +20,11 @@ config KVM
depends on HAVE_KVM
depends on MIPS_FP_SUPPORT
select EXPORT_UASM
- select PREEMPT_NOTIFIERS
+ select KVM_COMMON
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
- select HAVE_KVM_EVENTFD
select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_MMIO
select KVM_GENERIC_MMU_NOTIFIER
- select INTERVAL_TREE
select KVM_GENERIC_HARDWARE_ENABLING
help
Support for hosting Guest kernels.
diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c
index c961e2999f15..ef3750a6ffac 100644
--- a/arch/mips/loongson64/env.c
+++ b/arch/mips/loongson64/env.c
@@ -13,6 +13,8 @@
* Copyright (C) 2009 Lemote Inc.
* Author: Wu Zhangjin, wuzhangjin@gmail.com
*/
+
+#include <linux/dma-map-ops.h>
#include <linux/export.h>
#include <linux/pci_ids.h>
#include <asm/bootinfo.h>
@@ -147,8 +149,14 @@ void __init prom_lefi_init_env(void)
loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits;
if (loongson_sysconf.dma_mask_bits < 32 ||
- loongson_sysconf.dma_mask_bits > 64)
+ loongson_sysconf.dma_mask_bits > 64) {
loongson_sysconf.dma_mask_bits = 32;
+ dma_default_coherent = true;
+ } else {
+ dma_default_coherent = !eirq_source->dma_noncoherent;
+ }
+
+ pr_info("Firmware: Coherent DMA: %s\n", dma_default_coherent ? "on" : "off");
loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm;
loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown;
diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c
index ee8de1735b7c..f25caa6aa9d3 100644
--- a/arch/mips/loongson64/init.c
+++ b/arch/mips/loongson64/init.c
@@ -49,8 +49,7 @@ void virtual_early_config(void)
void __init szmem(unsigned int node)
{
u32 i, mem_type;
- static unsigned long num_physpages;
- u64 node_id, node_psize, start_pfn, end_pfn, mem_start, mem_size;
+ phys_addr_t node_id, mem_start, mem_size;
/* Otherwise come from DTB */
if (loongson_sysconf.fw_interface != LOONGSON_LEFI)
@@ -64,30 +63,46 @@ void __init szmem(unsigned int node)
mem_type = loongson_memmap->map[i].mem_type;
mem_size = loongson_memmap->map[i].mem_size;
- mem_start = loongson_memmap->map[i].mem_start;
+
+ /* Memory size comes in MB if MEM_SIZE_IS_IN_BYTES not set */
+ if (mem_size & MEM_SIZE_IS_IN_BYTES)
+ mem_size &= ~MEM_SIZE_IS_IN_BYTES;
+ else
+ mem_size = mem_size << 20;
+
+ mem_start = (node_id << 44) | loongson_memmap->map[i].mem_start;
switch (mem_type) {
case SYSTEM_RAM_LOW:
case SYSTEM_RAM_HIGH:
- start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT;
- node_psize = (mem_size << 20) >> PAGE_SHIFT;
- end_pfn = start_pfn + node_psize;
- num_physpages += node_psize;
- pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
- (u32)node_id, mem_type, mem_start, mem_size);
- pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
- start_pfn, end_pfn, num_physpages);
- memblock_add_node(PFN_PHYS(start_pfn),
- PFN_PHYS(node_psize), node,
+ case UMA_VIDEO_RAM:
+ pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes usable\n",
+ (u32)node_id, mem_type, &mem_start, &mem_size);
+ memblock_add_node(mem_start, mem_size, node,
MEMBLOCK_NONE);
break;
case SYSTEM_RAM_RESERVED:
- pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
- (u32)node_id, mem_type, mem_start, mem_size);
- memblock_reserve(((node_id << 44) + mem_start), mem_size << 20);
+ case VIDEO_ROM:
+ case ADAPTER_ROM:
+ case ACPI_TABLE:
+ case SMBIOS_TABLE:
+ pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes reserved\n",
+ (u32)node_id, mem_type, &mem_start, &mem_size);
+ memblock_reserve(mem_start, mem_size);
+ break;
+ /* We should not reserve VUMA_VIDEO_RAM as it overlaps with MMIO */
+ case VUMA_VIDEO_RAM:
+ default:
+ pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes unhandled\n",
+ (u32)node_id, mem_type, &mem_start, &mem_size);
break;
}
}
+
+ /* Reserve vgabios if it comes from firmware */
+ if (loongson_sysconf.vgabios_addr)
+ memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr),
+ SZ_256K);
}
#ifndef CONFIG_NUMA
diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c
index e420800043b0..e01c8d4a805a 100644
--- a/arch/mips/loongson64/reset.c
+++ b/arch/mips/loongson64/reset.c
@@ -53,7 +53,7 @@ static void loongson_halt(void)
}
}
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/* 0X80000000~0X80200000 is safe */
#define MAX_ARGS 64
@@ -158,7 +158,7 @@ static int __init mips_reboot_setup(void)
_machine_halt = loongson_halt;
pm_power_off = loongson_poweroff;
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL);
if (WARN_ON(!kexec_argv))
return -ENOMEM;
diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
index e015a26a40f7..498bdc1bb0ed 100644
--- a/arch/mips/loongson64/smp.c
+++ b/arch/mips/loongson64/smp.c
@@ -864,7 +864,7 @@ const struct plat_smp_ops loongson3_smp_ops = {
.cpu_disable = loongson3_cpu_disable,
.cpu_die = loongson3_cpu_die,
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
.kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
#endif
};
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index fd69dfa0cdab..d14ccc948a29 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -115,9 +115,12 @@ config ARCH_HAS_ILOG2_U64
default n
config GENERIC_BUG
- bool
- default y
+ def_bool y
depends on BUG
+ select GENERIC_BUG_RELATIVE_POINTERS if 64BIT
+
+config GENERIC_BUG_RELATIVE_POINTERS
+ bool
config GENERIC_HWEIGHT
bool
@@ -140,11 +143,11 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
default 8
config ARCH_MMAP_RND_BITS_MAX
- default 24 if 64BIT
- default 17
+ default 18 if 64BIT
+ default 13
config ARCH_MMAP_RND_COMPAT_BITS_MAX
- default 17
+ default 13
# unless you want to implement ACPI on PA-RISC ... ;-)
config PM
diff --git a/arch/parisc/include/asm/alternative.h b/arch/parisc/include/asm/alternative.h
index 1ed45fd085d3..1eb488f25b83 100644
--- a/arch/parisc/include/asm/alternative.h
+++ b/arch/parisc/include/asm/alternative.h
@@ -34,7 +34,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
/* Alternative SMP implementation. */
#define ALTERNATIVE(cond, replacement) "!0:" \
- ".section .altinstructions, \"aw\" !" \
+ ".section .altinstructions, \"a\" !" \
+ ".align 4 !" \
".word (0b-4-.) !" \
".hword 1, " __stringify(cond) " !" \
".word " __stringify(replacement) " !" \
@@ -44,7 +45,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
/* to replace one single instructions by a new instruction */
#define ALTERNATIVE(from, to, cond, replacement)\
- .section .altinstructions, "aw" ! \
+ .section .altinstructions, "a" ! \
+ .align 4 ! \
.word (from - .) ! \
.hword (to - from)/4, cond ! \
.word replacement ! \
@@ -52,7 +54,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
/* to replace multiple instructions by new code */
#define ALTERNATIVE_CODE(from, num_instructions, cond, new_instr_ptr)\
- .section .altinstructions, "aw" ! \
+ .section .altinstructions, "a" ! \
+ .align 4 ! \
.word (from - .) ! \
.hword -num_instructions, cond ! \
.word (new_instr_ptr - .) ! \
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index 75677b526b2b..74d17d7e759d 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -574,6 +574,7 @@
*/
#define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr) \
.section __ex_table,"aw" ! \
+ .align 4 ! \
.word (fault_addr - .), (except_addr - .) ! \
.previous
diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h
index 4b6d60b94124..833555f74ffa 100644
--- a/arch/parisc/include/asm/bug.h
+++ b/arch/parisc/include/asm/bug.h
@@ -17,24 +17,27 @@
#define PARISC_BUG_BREAK_ASM "break 0x1f, 0x1fff"
#define PARISC_BUG_BREAK_INSN 0x03ffe01f /* PARISC_BUG_BREAK_ASM */
-#if defined(CONFIG_64BIT)
-#define ASM_WORD_INSN ".dword\t"
+#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+# define __BUG_REL(val) ".word " __stringify(val) " - ."
#else
-#define ASM_WORD_INSN ".word\t"
+# define __BUG_REL(val) ".word " __stringify(val)
#endif
+
#ifdef CONFIG_DEBUG_BUGVERBOSE
#define BUG() \
do { \
asm volatile("\n" \
"1:\t" PARISC_BUG_BREAK_ASM "\n" \
- "\t.pushsection __bug_table,\"aw\"\n" \
- "2:\t" ASM_WORD_INSN "1b, %c0\n" \
- "\t.short %c1, %c2\n" \
- "\t.org 2b+%c3\n" \
+ "\t.pushsection __bug_table,\"a\"\n" \
+ "\t.align 4\n" \
+ "2:\t" __BUG_REL(1b) "\n" \
+ "\t" __BUG_REL(%c0) "\n" \
+ "\t.short %1, %2\n" \
+ "\t.blockz %3-2*4-2*2\n" \
"\t.popsection" \
: : "i" (__FILE__), "i" (__LINE__), \
- "i" (0), "i" (sizeof(struct bug_entry)) ); \
+ "i" (0), "i" (sizeof(struct bug_entry)) ); \
unreachable(); \
} while(0)
@@ -51,10 +54,12 @@
do { \
asm volatile("\n" \
"1:\t" PARISC_BUG_BREAK_ASM "\n" \
- "\t.pushsection __bug_table,\"aw\"\n" \
- "2:\t" ASM_WORD_INSN "1b, %c0\n" \
- "\t.short %c1, %c2\n" \
- "\t.org 2b+%c3\n" \
+ "\t.pushsection __bug_table,\"a\"\n" \
+ "\t.align 4\n" \
+ "2:\t" __BUG_REL(1b) "\n" \
+ "\t" __BUG_REL(%c0) "\n" \
+ "\t.short %1, %2\n" \
+ "\t.blockz %3-2*4-2*2\n" \
"\t.popsection" \
: : "i" (__FILE__), "i" (__LINE__), \
"i" (BUGFLAG_WARNING|(flags)), \
@@ -65,10 +70,11 @@
do { \
asm volatile("\n" \
"1:\t" PARISC_BUG_BREAK_ASM "\n" \
- "\t.pushsection __bug_table,\"aw\"\n" \
- "2:\t" ASM_WORD_INSN "1b\n" \
- "\t.short %c0\n" \
- "\t.org 2b+%c1\n" \
+ "\t.pushsection __bug_table,\"a\"\n" \
+ "\t.align 4\n" \
+ "2:\t" __BUG_REL(1b) "\n" \
+ "\t.short %0\n" \
+ "\t.blockz %1-4-2\n" \
"\t.popsection" \
: : "i" (BUGFLAG_WARNING|(flags)), \
"i" (sizeof(struct bug_entry)) ); \
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 140eaa97bf21..2d73d3c3cd37 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -349,15 +349,7 @@ struct pt_regs; /* forward declaration... */
#define ELF_HWCAP 0
-/* Masks for stack and mmap randomization */
-#define BRK_RND_MASK (is_32bit_task() ? 0x07ffUL : 0x3ffffUL)
-#define MMAP_RND_MASK (is_32bit_task() ? 0x1fffUL : 0x3ffffUL)
-#define STACK_RND_MASK MMAP_RND_MASK
-
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *);
-#define arch_randomize_brk arch_randomize_brk
-
+#define STACK_RND_MASK 0x7ff /* 8MB of VA */
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
struct linux_binprm;
diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h
index af2a598bc0f8..94428798b6aa 100644
--- a/arch/parisc/include/asm/jump_label.h
+++ b/arch/parisc/include/asm/jump_label.h
@@ -15,10 +15,12 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
asm_volatile_goto("1:\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
+ ".align %1\n\t"
".word 1b - ., %l[l_yes] - .\n\t"
__stringify(ASM_ULONG_INSN) " %c0 - .\n\t"
".popsection\n\t"
- : : "i" (&((char *)key)[branch]) : : l_yes);
+ : : "i" (&((char *)key)[branch]), "i" (sizeof(long))
+ : : l_yes);
return false;
l_yes:
@@ -30,10 +32,12 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
asm_volatile_goto("1:\n\t"
"b,n %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
+ ".align %1\n\t"
".word 1b - ., %l[l_yes] - .\n\t"
__stringify(ASM_ULONG_INSN) " %c0 - .\n\t"
".popsection\n\t"
- : : "i" (&((char *)key)[branch]) : : l_yes);
+ : : "i" (&((char *)key)[branch]), "i" (sizeof(long))
+ : : l_yes);
return false;
l_yes:
diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h
index ee9e071859b2..47ebc4c91eaf 100644
--- a/arch/parisc/include/asm/ldcw.h
+++ b/arch/parisc/include/asm/ldcw.h
@@ -55,7 +55,7 @@
})
#ifdef CONFIG_SMP
-# define __lock_aligned __section(".data..lock_aligned")
+# define __lock_aligned __section(".data..lock_aligned") __aligned(16)
#endif
#endif /* __PARISC_LDCW_H */
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index c05d121cf5d0..982aca20f56f 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -47,6 +47,8 @@
#ifndef __ASSEMBLY__
+struct rlimit;
+unsigned long mmap_upper_limit(struct rlimit *rlim_stack);
unsigned long calc_max_stack_size(unsigned long stack_max);
/*
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 2bf660eabe42..4165079898d9 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -41,6 +41,7 @@ struct exception_table_entry {
#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\
".section __ex_table,\"aw\"\n" \
+ ".align 4\n" \
".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \
".previous\n"
diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h
index 87245c584784..8d94739d75c6 100644
--- a/arch/parisc/include/uapi/asm/errno.h
+++ b/arch/parisc/include/uapi/asm/errno.h
@@ -75,7 +75,6 @@
/* We now return you to your regularly scheduled HPUX. */
-#define ENOSYM 215 /* symbol does not exist in executable */
#define ENOTSOCK 216 /* Socket operation on non-socket */
#define EDESTADDRREQ 217 /* Destination address required */
#define EMSGSIZE 218 /* Message too long */
@@ -101,7 +100,6 @@
#define ETIMEDOUT 238 /* Connection timed out */
#define ECONNREFUSED 239 /* Connection refused */
#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */
-#define EREMOTERELEASE 240 /* Remote peer released connection */
#define EHOSTDOWN 241 /* Host is down */
#define EHOSTUNREACH 242 /* No route to host */
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 29e2750f86a4..e95a977ba5f3 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -383,7 +383,7 @@ show_cpuinfo (struct seq_file *m, void *v)
char cpu_name[60], *p;
/* strip PA path from CPU name to not confuse lscpu */
- strlcpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
+ strscpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
p = strrchr(cpu_name, '[');
if (p)
*(--p) = 0;
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index ab896eff7a1d..98af719d5f85 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -77,7 +77,7 @@ unsigned long calc_max_stack_size(unsigned long stack_max)
* indicating that "current" should be used instead of a passed-in
* value from the exec bprm as done with arch_pick_mmap_layout().
*/
-static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
+unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
{
unsigned long stack_base;
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 58694d1989c2..548051b0b4af 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -130,6 +130,7 @@ SECTIONS
RO_DATA(8)
/* unwind info */
+ . = ALIGN(4);
.PARISC.unwind : {
__start___unwind = .;
*(.PARISC.unwind)
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 8d3eacb50d56..9d44e6630908 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -301,7 +301,6 @@ CONFIG_WQ_WATCHDOG=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
-CONFIG_DEBUG_CREDENTIALS=y
# CONFIG_FTRACE is not set
CONFIG_XMON=y
# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 6a9acfb690c9..2f8f3f93cbb6 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -23,6 +23,15 @@
#include <asm/feature-fixups.h>
#ifdef CONFIG_VSX
+#define __REST_1FPVSR(n,c,base) \
+BEGIN_FTR_SECTION \
+ b 2f; \
+END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
+ REST_FPR(n,base); \
+ b 3f; \
+2: REST_VSR(n,c,base); \
+3:
+
#define __REST_32FPVSRS(n,c,base) \
BEGIN_FTR_SECTION \
b 2f; \
@@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
2: SAVE_32VSRS(n,c,base); \
3:
#else
+#define __REST_1FPVSR(n,b,base) REST_FPR(n, base)
#define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base)
#define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base)
#endif
+#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base)
#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
@@ -67,6 +78,7 @@ _GLOBAL(store_fp_state)
SAVE_32FPVSRS(0, R4, R3)
mffs fr0
stfd fr0,FPSTATE_FPSCR(r3)
+ REST_1FPVSR(0, R4, R3)
blr
EXPORT_SYMBOL(store_fp_state)
@@ -138,4 +150,5 @@ _GLOBAL(save_fpu)
2: SAVE_32FPVSRS(0, R4, R6)
mffs fr0
stfd fr0,FPSTATE_FPSCR(r6)
+ REST_1FPVSR(0, R4, R6)
blr
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 392404688cec..9452a54d356c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1198,11 +1198,11 @@ void kvmppc_save_user_regs(void)
usermsr = current->thread.regs->msr;
+ /* Caller has enabled FP/VEC/VSX/TM in MSR */
if (usermsr & MSR_FP)
- save_fpu(current);
-
+ __giveup_fpu(current);
if (usermsr & MSR_VEC)
- save_altivec(current);
+ __giveup_altivec(current);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (usermsr & MSR_TM) {
diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S
index 90701885762c..40677416d7b2 100644
--- a/arch/powerpc/kernel/trace/ftrace_entry.S
+++ b/arch/powerpc/kernel/trace/ftrace_entry.S
@@ -62,7 +62,7 @@
.endif
/* Save previous stack pointer (r1) */
- addi r8, r1, SWITCH_FRAME_SIZE
+ addi r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
PPC_STL r8, GPR1(r1)
.if \allregs == 1
@@ -182,7 +182,7 @@ ftrace_no_trace:
mflr r3
mtctr r3
REST_GPR(3, r1)
- addi r1, r1, SWITCH_FRAME_SIZE
+ addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
mtlr r0
bctr
#endif
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 4094e4c4c77a..80b3f6e476b6 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -33,6 +33,7 @@ _GLOBAL(store_vr_state)
mfvscr v0
li r4, VRSTATE_VSCR
stvx v0, r4, r3
+ lvx v0, 0, r3
blr
EXPORT_SYMBOL(store_vr_state)
@@ -109,6 +110,7 @@ _GLOBAL(save_altivec)
mfvscr v0
li r4,VRSTATE_VSCR
stvx v0,r4,r7
+ lvx v0,0,r7
blr
#ifdef CONFIG_VSX
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index b33358ee6424..074263429faf 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -19,13 +19,11 @@ if VIRTUALIZATION
config KVM
bool
- select PREEMPT_NOTIFIERS
- select HAVE_KVM_EVENTFD
+ select KVM_COMMON
select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_VFIO
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
- select INTERVAL_TREE
config KVM_BOOK3S_HANDLER
bool
@@ -225,7 +223,6 @@ config KVM_MPIC
bool "KVM in-kernel MPIC emulation"
depends on KVM && PPC_E500
select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI
help
@@ -238,7 +235,6 @@ config KVM_XICS
bool "KVM in-kernel XICS emulation"
depends on KVM_BOOK3S_64 && !KVM_MPIC
select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQFD
default y
help
Include support for the XICS (eXternal Interrupt Controller
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c39945a7fce3..23407fbd73c9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -528,7 +528,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_ONE_REG:
case KVM_CAP_IOEVENTFD:
- case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
@@ -578,7 +577,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
#endif
-#ifdef CONFIG_HAVE_KVM_IRQFD
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
case KVM_CAP_IRQFD_RESAMPLE:
r = !xive_enabled();
break;
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index b1f25bac280b..71d52a670d95 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -385,11 +385,15 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
* same fault IRQ is not freed by the OS before.
*/
mutex_lock(&vas_pseries_mutex);
- if (migration_in_progress)
+ if (migration_in_progress) {
rc = -EBUSY;
- else
+ } else {
rc = allocate_setup_window(txwin, (u64 *)&domain[0],
cop_feat_caps->win_type);
+ if (!rc)
+ caps->nr_open_wins_progress++;
+ }
+
mutex_unlock(&vas_pseries_mutex);
if (rc)
goto out;
@@ -404,8 +408,17 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
goto out_free;
txwin->win_type = cop_feat_caps->win_type;
- mutex_lock(&vas_pseries_mutex);
+
/*
+ * The migration SUSPEND thread sets migration_in_progress and
+ * closes all open windows from the list. But the window is
+ * added to the list after open and modify HCALLs. So possible
+ * that migration_in_progress is set before modify HCALL which
+ * may cause some windows are still open when the hypervisor
+ * initiates the migration.
+ * So checks the migration_in_progress flag again and close all
+ * open windows.
+ *
* Possible to lose the acquired credit with DLPAR core
* removal after the window is opened. So if there are any
* closed windows (means with lost credits), do not give new
@@ -413,9 +426,11 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
* after the existing windows are reopened when credits are
* available.
*/
- if (!caps->nr_close_wins) {
+ mutex_lock(&vas_pseries_mutex);
+ if (!caps->nr_close_wins && !migration_in_progress) {
list_add(&txwin->win_list, &caps->list);
caps->nr_open_windows++;
+ caps->nr_open_wins_progress--;
mutex_unlock(&vas_pseries_mutex);
vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
return &txwin->vas_win;
@@ -433,6 +448,12 @@ out_free:
*/
free_irq_setup(txwin);
h_deallocate_vas_window(txwin->vas_win.winid);
+ /*
+ * Hold mutex and reduce nr_open_wins_progress counter.
+ */
+ mutex_lock(&vas_pseries_mutex);
+ caps->nr_open_wins_progress--;
+ mutex_unlock(&vas_pseries_mutex);
out:
atomic_dec(&cop_feat_caps->nr_used_credits);
kfree(txwin);
@@ -937,14 +958,14 @@ int vas_migration_handler(int action)
struct vas_caps *vcaps;
int i, rc = 0;
+ pr_info("VAS migration event %d\n", action);
+
/*
* NX-GZIP is not enabled. Nothing to do for migration.
*/
if (!copypaste_feat)
return rc;
- mutex_lock(&vas_pseries_mutex);
-
if (action == VAS_SUSPEND)
migration_in_progress = true;
else
@@ -990,12 +1011,27 @@ int vas_migration_handler(int action)
switch (action) {
case VAS_SUSPEND:
+ mutex_lock(&vas_pseries_mutex);
rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
true);
+ /*
+ * Windows are included in the list after successful
+ * open. So wait for closing these in-progress open
+ * windows in vas_allocate_window() which will be
+ * done if the migration_in_progress is set.
+ */
+ while (vcaps->nr_open_wins_progress) {
+ mutex_unlock(&vas_pseries_mutex);
+ msleep(10);
+ mutex_lock(&vas_pseries_mutex);
+ }
+ mutex_unlock(&vas_pseries_mutex);
break;
case VAS_RESUME:
+ mutex_lock(&vas_pseries_mutex);
atomic_set(&caps->nr_total_credits, new_nr_creds);
rc = reconfig_open_windows(vcaps, new_nr_creds, true);
+ mutex_unlock(&vas_pseries_mutex);
break;
default:
/* should not happen */
@@ -1011,8 +1047,9 @@ int vas_migration_handler(int action)
goto out;
}
+ pr_info("VAS migration event (%d) successful\n", action);
+
out:
- mutex_unlock(&vas_pseries_mutex);
return rc;
}
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
index 7115043ec488..45567cd13178 100644
--- a/arch/powerpc/platforms/pseries/vas.h
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -91,6 +91,8 @@ struct vas_cop_feat_caps {
struct vas_caps {
struct vas_cop_feat_caps caps;
struct list_head list; /* List of open windows */
+ int nr_open_wins_progress; /* Number of open windows in */
+ /* progress. Used in migration */
int nr_close_wins; /* closed windows in the hypervisor for DLPAR */
int nr_open_windows; /* Number of successful open windows */
u8 feat; /* Feature type */
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 95a2a06acc6a..ef8b7b012a0b 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -685,7 +685,7 @@ config RISCV_BOOT_SPINWAIT
If unsure what to do here, say N.
config ARCH_SUPPORTS_KEXEC
- def_bool MMU
+ def_bool y
config ARCH_SELECTS_KEXEC
def_bool y
@@ -693,7 +693,7 @@ config ARCH_SELECTS_KEXEC
select HOTPLUG_CPU if SMP
config ARCH_SUPPORTS_KEXEC_FILE
- def_bool 64BIT && MMU
+ def_bool 64BIT
config ARCH_SELECTS_KEXEC_FILE
def_bool y
@@ -724,6 +724,25 @@ config COMPAT
If you want to execute 32-bit userspace applications, say Y.
+config PARAVIRT
+ bool "Enable paravirtualization code"
+ depends on RISCV_SBI
+ help
+ This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization.
+
+config PARAVIRT_TIME_ACCOUNTING
+ bool "Paravirtual steal time accounting"
+ depends on PARAVIRT
+ help
+ Select this option to enable fine granularity task steal time
+ accounting. Time spent executing other tasks in parallel with
+ the current vCPU is discounted from the vCPU power. To account for
+ that, there can be a small performance impact.
+
+ If in doubt, say N here.
+
config RELOCATABLE
bool "Build a relocatable kernel"
depends on MMU && 64BIT && !XIP_KERNEL
diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
index 90b261114763..dce96f27cc89 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
@@ -8,9 +8,6 @@
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/leds/common.h>
-/* Clock frequency (in Hz) of the rtcclk */
-#define RTCCLK_FREQ 1000000
-
/ {
model = "Microchip PolarFire-SoC Icicle Kit";
compatible = "microchip,mpfs-icicle-reference-rtlv2210", "microchip,mpfs-icicle-kit",
@@ -29,10 +26,6 @@
stdout-path = "serial1:115200n8";
};
- cpus {
- timebase-frequency = <RTCCLK_FREQ>;
- };
-
leds {
compatible = "gpio-leds";
diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts
index 184cb36a175e..a8d623ee9fa4 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts
@@ -10,9 +10,6 @@
#include "mpfs.dtsi"
#include "mpfs-m100pfs-fabric.dtsi"
-/* Clock frequency (in Hz) of the rtcclk */
-#define MTIMER_FREQ 1000000
-
/ {
model = "Aries Embedded M100PFEVPS";
compatible = "aries,m100pfsevp", "microchip,mpfs";
@@ -33,10 +30,6 @@
stdout-path = "serial1:115200n8";
};
- cpus {
- timebase-frequency = <MTIMER_FREQ>;
- };
-
ddrc_cache_lo: memory@80000000 {
device_type = "memory";
reg = <0x0 0x80000000 0x0 0x40000000>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
index c87cc2d8fe29..ea0808ab1042 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
@@ -6,9 +6,6 @@
#include "mpfs.dtsi"
#include "mpfs-polarberry-fabric.dtsi"
-/* Clock frequency (in Hz) of the rtcclk */
-#define MTIMER_FREQ 1000000
-
/ {
model = "Sundance PolarBerry";
compatible = "sundance,polarberry", "microchip,mpfs";
@@ -22,10 +19,6 @@
stdout-path = "serial0:115200n8";
};
- cpus {
- timebase-frequency = <MTIMER_FREQ>;
- };
-
ddrc_cache_lo: memory@80000000 {
device_type = "memory";
reg = <0x0 0x80000000 0x0 0x2e000000>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts
index 013cb666c72d..f9a890579438 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts
@@ -6,9 +6,6 @@
#include "mpfs.dtsi"
#include "mpfs-sev-kit-fabric.dtsi"
-/* Clock frequency (in Hz) of the rtcclk */
-#define MTIMER_FREQ 1000000
-
/ {
#address-cells = <2>;
#size-cells = <2>;
@@ -28,10 +25,6 @@
stdout-path = "serial1:115200n8";
};
- cpus {
- timebase-frequency = <MTIMER_FREQ>;
- };
-
reserved-memory {
#address-cells = <2>;
#size-cells = <2>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts
index e0797c7e1b35..d1120f5f2c01 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts
@@ -11,9 +11,6 @@
#include "mpfs.dtsi"
#include "mpfs-tysom-m-fabric.dtsi"
-/* Clock frequency (in Hz) of the rtcclk */
-#define MTIMER_FREQ 1000000
-
/ {
model = "Aldec TySOM-M-MPFS250T-REV2";
compatible = "aldec,tysom-m-mpfs250t-rev2", "microchip,mpfs";
@@ -34,10 +31,6 @@
stdout-path = "serial1:115200n8";
};
- cpus {
- timebase-frequency = <MTIMER_FREQ>;
- };
-
ddrc_cache_lo: memory@80000000 {
device_type = "memory";
reg = <0x0 0x80000000 0x0 0x30000000>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi
index a6faf24f1dba..266489d43912 100644
--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
@@ -13,6 +13,7 @@
cpus {
#address-cells = <1>;
#size-cells = <0>;
+ timebase-frequency = <1000000>;
cpu0: cpu@0 {
compatible = "sifive,e51", "sifive,rocket0", "riscv";
diff --git a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi
index df40e87ee063..aec6401a467b 100644
--- a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi
+++ b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi
@@ -34,7 +34,6 @@
cpu0_intc: interrupt-controller {
compatible = "riscv,cpu-intc";
interrupt-controller;
- #address-cells = <0>;
#interrupt-cells = <1>;
};
};
diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c
index 197db68cc8da..17a904869724 100644
--- a/arch/riscv/errata/andes/errata.c
+++ b/arch/riscv/errata/andes/errata.c
@@ -38,29 +38,35 @@ static long ax45mp_iocp_sw_workaround(void)
return ret.error ? 0 : ret.value;
}
-static bool errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid)
+static void errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid)
{
+ static bool done;
+
if (!IS_ENABLED(CONFIG_ERRATA_ANDES_CMO))
- return false;
+ return;
+
+ if (done)
+ return;
+
+ done = true;
if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID)
- return false;
+ return;
if (!ax45mp_iocp_sw_workaround())
- return false;
+ return;
/* Set this just to make core cbo code happy */
riscv_cbom_block_size = 1;
riscv_noncoherent_supported();
-
- return true;
}
void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
unsigned long archid, unsigned long impid,
unsigned int stage)
{
- errata_probe_iocp(stage, archid, impid);
+ if (stage == RISCV_ALTERNATIVES_BOOT)
+ errata_probe_iocp(stage, archid, impid);
/* we have nothing to patch here ATM so just return back */
}
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 6964dd235e97..484d04a92fa6 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -41,6 +41,7 @@
KVM_ARCH_REQ_FLAGS(4, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_HFENCE \
KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6)
enum kvm_riscv_hfence_type {
KVM_RISCV_HFENCE_UNKNOWN = 0,
@@ -262,6 +263,12 @@ struct kvm_vcpu_arch {
/* 'static' configurations which are set only once */
struct kvm_vcpu_config cfg;
+
+ /* SBI steal-time accounting */
+ struct {
+ gpa_t shmem;
+ u64 last_steal;
+ } sta;
};
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
@@ -370,4 +377,7 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask);
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu);
+
#endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h
index 6a453f7f8b56..b96705258cf9 100644
--- a/arch/riscv/include/asm/kvm_vcpu_sbi.h
+++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h
@@ -15,9 +15,10 @@
#define KVM_SBI_VERSION_MINOR 0
enum kvm_riscv_sbi_ext_status {
- KVM_RISCV_SBI_EXT_UNINITIALIZED,
- KVM_RISCV_SBI_EXT_AVAILABLE,
- KVM_RISCV_SBI_EXT_UNAVAILABLE,
+ KVM_RISCV_SBI_EXT_STATUS_UNINITIALIZED,
+ KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE,
+ KVM_RISCV_SBI_EXT_STATUS_ENABLED,
+ KVM_RISCV_SBI_EXT_STATUS_DISABLED,
};
struct kvm_vcpu_sbi_context {
@@ -36,7 +37,7 @@ struct kvm_vcpu_sbi_extension {
unsigned long extid_start;
unsigned long extid_end;
- bool default_unavail;
+ bool default_disabled;
/**
* SBI extension handler. It can be defined for a given extension or group of
@@ -59,11 +60,21 @@ int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg);
int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg);
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(
struct kvm_vcpu *vcpu, unsigned long extid);
+bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx);
int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run);
void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num,
+ unsigned long *reg_val);
+int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num,
+ unsigned long reg_val);
+
#ifdef CONFIG_RISCV_SBI_V01
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01;
#endif
@@ -74,6 +85,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
diff --git a/arch/riscv/include/asm/paravirt.h b/arch/riscv/include/asm/paravirt.h
new file mode 100644
index 000000000000..c0abde70fc2c
--- /dev/null
+++ b/arch/riscv/include/asm/paravirt.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_PARAVIRT_H
+#define _ASM_RISCV_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include <linux/static_call_types.h>
+
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+ return static_call(pv_steal_clock)(cpu);
+}
+
+int __init pv_time_init(void);
+
+#else
+
+#define pv_time_init() do {} while (0)
+
+#endif /* CONFIG_PARAVIRT */
+#endif /* _ASM_RISCV_PARAVIRT_H */
diff --git a/arch/riscv/include/asm/paravirt_api_clock.h b/arch/riscv/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/riscv/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 294044429e8e..ab00235b018f 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -899,7 +899,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
#define PAGE_KERNEL __pgprot(0)
#define swapper_pg_dir NULL
#define TASK_SIZE 0xffffffffUL
-#define VMALLOC_START 0
+#define VMALLOC_START _AC(0, UL)
#define VMALLOC_END TASK_SIZE
#endif /* !CONFIG_MMU */
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 0892f4421bc4..b6f898c56940 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -31,6 +31,7 @@ enum sbi_ext_id {
SBI_EXT_SRST = 0x53525354,
SBI_EXT_PMU = 0x504D55,
SBI_EXT_DBCN = 0x4442434E,
+ SBI_EXT_STA = 0x535441,
/* Experimentals extensions must lie within this range */
SBI_EXT_EXPERIMENTAL_START = 0x08000000,
@@ -243,6 +244,22 @@ enum sbi_ext_dbcn_fid {
SBI_EXT_DBCN_CONSOLE_WRITE_BYTE = 2,
};
+/* SBI STA (steal-time accounting) extension */
+enum sbi_ext_sta_fid {
+ SBI_EXT_STA_STEAL_TIME_SET_SHMEM = 0,
+};
+
+struct sbi_sta_struct {
+ __le32 sequence;
+ __le32 flags;
+ __le64 steal;
+ u8 preempted;
+ u8 pad[47];
+} __packed;
+
+#define SBI_STA_SHMEM_DISABLE -1
+
+/* SBI spec version fields */
#define SBI_SPEC_VERSION_DEFAULT 0x1
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h
index 1d7942c8a6cb..eeec04b7dae6 100644
--- a/arch/riscv/include/asm/syscall_wrapper.h
+++ b/arch/riscv/include/asm/syscall_wrapper.h
@@ -46,9 +46,6 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *);
return sys_ni_syscall(); \
}
-#define COMPAT_SYS_NI(name) \
- SYSCALL_ALIAS(__riscv_compat_sys_##name, sys_ni_posix_timers);
-
#endif /* CONFIG_COMPAT */
#define __SYSCALL_DEFINEx(x, name, ...) \
@@ -82,6 +79,4 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *);
return sys_ni_syscall(); \
}
-#define SYS_NI(name) SYSCALL_ALIAS(__riscv_sys_##name, sys_ni_posix_timers);
-
#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index 60d3b21dead7..d6b7a5b95874 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -157,9 +157,16 @@ enum KVM_RISCV_SBI_EXT_ID {
KVM_RISCV_SBI_EXT_EXPERIMENTAL,
KVM_RISCV_SBI_EXT_VENDOR,
KVM_RISCV_SBI_EXT_DBCN,
+ KVM_RISCV_SBI_EXT_STA,
KVM_RISCV_SBI_EXT_MAX,
};
+/* SBI STA extension registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_sbi_sta {
+ unsigned long shmem_lo;
+ unsigned long shmem_hi;
+};
+
/* Possible states for kvm_riscv_timer */
#define KVM_RISCV_TIMER_STATE_OFF 0
#define KVM_RISCV_TIMER_STATE_ON 1
@@ -241,6 +248,12 @@ enum KVM_RISCV_SBI_EXT_ID {
#define KVM_REG_RISCV_VECTOR_REG(n) \
((n) + sizeof(struct __riscv_v_ext_state) / sizeof(unsigned long))
+/* Registers for specific SBI extensions are mapped as type 10 */
+#define KVM_REG_RISCV_SBI_STATE (0x0a << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_SBI_STA (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_SBI_STA_REG(name) \
+ (offsetof(struct kvm_riscv_sbi_sta, name) / sizeof(unsigned long))
+
/* Device Control API: RISC-V AIA */
#define KVM_DEV_RISCV_APLIC_ALIGN 0x1000
#define KVM_DEV_RISCV_APLIC_SIZE 0x4000
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index fee22a3d1b53..807c2bde1f83 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -85,6 +85,7 @@ obj-$(CONFIG_SMP) += sbi-ipi.o
obj-$(CONFIG_SMP) += cpu_ops_sbi.o
endif
obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o
obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o
diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c
index 55f1d7856b54..8706736fd4e2 100644
--- a/arch/riscv/kernel/crash_core.c
+++ b/arch/riscv/kernel/crash_core.c
@@ -5,18 +5,20 @@
void arch_crash_save_vmcoreinfo(void)
{
- VMCOREINFO_NUMBER(VA_BITS);
VMCOREINFO_NUMBER(phys_ram_base);
vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET);
vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START);
vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END);
+#ifdef CONFIG_MMU
+ VMCOREINFO_NUMBER(VA_BITS);
vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START);
vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END);
#ifdef CONFIG_64BIT
vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR);
vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
#endif
+#endif
vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR);
vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n",
kernel_map.va_kernel_pa_offset);
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index b77397432403..76ace1e0b46f 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -154,7 +154,6 @@ secondary_start_sbi:
XIP_FIXUP_OFFSET a3
add a3, a3, a1
REG_L sp, (a3)
- scs_load_current
.Lsecondary_start_common:
@@ -165,6 +164,7 @@ secondary_start_sbi:
call relocate_enable_mmu
#endif
call .Lsetup_trap_vector
+ scs_load_current
tail smp_callin
#endif /* CONFIG_SMP */
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 56a8c78e9e21..aac019ed63b1 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -40,15 +40,6 @@ struct relocation_handlers {
long buffer);
};
-unsigned int initialize_relocation_hashtable(unsigned int num_relocations);
-void process_accumulated_relocations(struct module *me);
-int add_relocation_to_accumulate(struct module *me, int type, void *location,
- unsigned int hashtable_bits, Elf_Addr v);
-
-struct hlist_head *relocation_hashtable;
-
-struct list_head used_buckets_list;
-
/*
* The auipc+jalr instruction pair can reach any PC-relative offset
* in the range [-2^31 - 2^11, 2^31 - 2^11)
@@ -64,7 +55,7 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val)
static int riscv_insn_rmw(void *location, u32 keep, u32 set)
{
- u16 *parcel = location;
+ __le16 *parcel = location;
u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
insn &= keep;
@@ -77,7 +68,7 @@ static int riscv_insn_rmw(void *location, u32 keep, u32 set)
static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set)
{
- u16 *parcel = location;
+ __le16 *parcel = location;
u16 insn = le16_to_cpu(*parcel);
insn &= keep;
@@ -604,7 +595,10 @@ static const struct relocation_handlers reloc_handlers[] = {
/* 192-255 nonstandard ABI extensions */
};
-void process_accumulated_relocations(struct module *me)
+static void
+process_accumulated_relocations(struct module *me,
+ struct hlist_head **relocation_hashtable,
+ struct list_head *used_buckets_list)
{
/*
* Only ADD/SUB/SET/ULEB128 should end up here.
@@ -624,18 +618,25 @@ void process_accumulated_relocations(struct module *me)
* - Each relocation entry for a location address
*/
struct used_bucket *bucket_iter;
+ struct used_bucket *bucket_iter_tmp;
struct relocation_head *rel_head_iter;
+ struct hlist_node *rel_head_iter_tmp;
struct relocation_entry *rel_entry_iter;
+ struct relocation_entry *rel_entry_iter_tmp;
int curr_type;
void *location;
long buffer;
- list_for_each_entry(bucket_iter, &used_buckets_list, head) {
- hlist_for_each_entry(rel_head_iter, bucket_iter->bucket, node) {
+ list_for_each_entry_safe(bucket_iter, bucket_iter_tmp,
+ used_buckets_list, head) {
+ hlist_for_each_entry_safe(rel_head_iter, rel_head_iter_tmp,
+ bucket_iter->bucket, node) {
buffer = 0;
location = rel_head_iter->location;
- list_for_each_entry(rel_entry_iter,
- rel_head_iter->rel_entry, head) {
+ list_for_each_entry_safe(rel_entry_iter,
+ rel_entry_iter_tmp,
+ rel_head_iter->rel_entry,
+ head) {
curr_type = rel_entry_iter->type;
reloc_handlers[curr_type].reloc_handler(
me, &buffer, rel_entry_iter->value);
@@ -648,11 +649,14 @@ void process_accumulated_relocations(struct module *me)
kfree(bucket_iter);
}
- kfree(relocation_hashtable);
+ kfree(*relocation_hashtable);
}
-int add_relocation_to_accumulate(struct module *me, int type, void *location,
- unsigned int hashtable_bits, Elf_Addr v)
+static int add_relocation_to_accumulate(struct module *me, int type,
+ void *location,
+ unsigned int hashtable_bits, Elf_Addr v,
+ struct hlist_head *relocation_hashtable,
+ struct list_head *used_buckets_list)
{
struct relocation_entry *entry;
struct relocation_head *rel_head;
@@ -661,6 +665,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location,
unsigned long hash;
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+
+ if (!entry)
+ return -ENOMEM;
+
INIT_LIST_HEAD(&entry->head);
entry->type = type;
entry->value = v;
@@ -669,7 +677,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location,
current_head = &relocation_hashtable[hash];
- /* Find matching location (if any) */
+ /*
+ * Search for the relocation_head for the relocations that happen at the
+ * provided location
+ */
bool found = false;
struct relocation_head *rel_head_iter;
@@ -681,19 +692,45 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location,
}
}
+ /*
+ * If there has not yet been any relocations at the provided location,
+ * create a relocation_head for that location and populate it with this
+ * relocation_entry.
+ */
if (!found) {
rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL);
+
+ if (!rel_head) {
+ kfree(entry);
+ return -ENOMEM;
+ }
+
rel_head->rel_entry =
kmalloc(sizeof(struct list_head), GFP_KERNEL);
+
+ if (!rel_head->rel_entry) {
+ kfree(entry);
+ kfree(rel_head);
+ return -ENOMEM;
+ }
+
INIT_LIST_HEAD(rel_head->rel_entry);
rel_head->location = location;
INIT_HLIST_NODE(&rel_head->node);
if (!current_head->first) {
bucket =
kmalloc(sizeof(struct used_bucket), GFP_KERNEL);
+
+ if (!bucket) {
+ kfree(entry);
+ kfree(rel_head);
+ kfree(rel_head->rel_entry);
+ return -ENOMEM;
+ }
+
INIT_LIST_HEAD(&bucket->head);
bucket->bucket = current_head;
- list_add(&bucket->head, &used_buckets_list);
+ list_add(&bucket->head, used_buckets_list);
}
hlist_add_head(&rel_head->node, current_head);
}
@@ -704,7 +741,9 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location,
return 0;
}
-unsigned int initialize_relocation_hashtable(unsigned int num_relocations)
+static unsigned int
+initialize_relocation_hashtable(unsigned int num_relocations,
+ struct hlist_head **relocation_hashtable)
{
/* Can safely assume that bits is not greater than sizeof(long) */
unsigned long hashtable_size = roundup_pow_of_two(num_relocations);
@@ -720,12 +759,13 @@ unsigned int initialize_relocation_hashtable(unsigned int num_relocations)
hashtable_size <<= should_double_size;
- relocation_hashtable = kmalloc_array(hashtable_size,
- sizeof(*relocation_hashtable),
- GFP_KERNEL);
- __hash_init(relocation_hashtable, hashtable_size);
+ *relocation_hashtable = kmalloc_array(hashtable_size,
+ sizeof(*relocation_hashtable),
+ GFP_KERNEL);
+ if (!*relocation_hashtable)
+ return -ENOMEM;
- INIT_LIST_HEAD(&used_buckets_list);
+ __hash_init(*relocation_hashtable, hashtable_size);
return hashtable_bits;
}
@@ -742,7 +782,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
Elf_Addr v;
int res;
unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel);
- unsigned int hashtable_bits = initialize_relocation_hashtable(num_relocations);
+ struct hlist_head *relocation_hashtable;
+ struct list_head used_buckets_list;
+ unsigned int hashtable_bits;
+
+ hashtable_bits = initialize_relocation_hashtable(num_relocations,
+ &relocation_hashtable);
+
+ if (hashtable_bits < 0)
+ return hashtable_bits;
+
+ INIT_LIST_HEAD(&used_buckets_list);
pr_debug("Applying relocate section %u to %u\n", relsec,
sechdrs[relsec].sh_info);
@@ -823,14 +873,18 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
}
if (reloc_handlers[type].accumulate_handler)
- res = add_relocation_to_accumulate(me, type, location, hashtable_bits, v);
+ res = add_relocation_to_accumulate(me, type, location,
+ hashtable_bits, v,
+ relocation_hashtable,
+ &used_buckets_list);
else
res = handler(me, location, v);
if (res)
return res;
}
- process_accumulated_relocations(me);
+ process_accumulated_relocations(me, &relocation_hashtable,
+ &used_buckets_list);
return 0;
}
diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c
new file mode 100644
index 000000000000..8e114f5930ce
--- /dev/null
+++ b/arch/riscv/kernel/paravirt.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Ventana Micro Systems Inc.
+ */
+
+#define pr_fmt(fmt) "riscv-pv: " fmt
+
+#include <linux/cpuhotplug.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/kconfig.h>
+#include <linux/kernel.h>
+#include <linux/percpu-defs.h>
+#include <linux/printk.h>
+#include <linux/static_call.h>
+#include <linux/types.h>
+
+#include <asm/barrier.h>
+#include <asm/page.h>
+#include <asm/paravirt.h>
+#include <asm/sbi.h>
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static u64 native_steal_clock(int cpu)
+{
+ return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+ steal_acc = false;
+ return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+
+DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
+
+static bool __init has_pv_steal_clock(void)
+{
+ if (sbi_spec_version >= sbi_mk_version(2, 0) &&
+ sbi_probe_extension(SBI_EXT_STA) > 0) {
+ pr_info("SBI STA extension detected\n");
+ return true;
+ }
+
+ return false;
+}
+
+static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi,
+ unsigned long flags)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM,
+ lo, hi, flags, 0, 0, 0);
+ if (ret.error) {
+ if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE)
+ pr_warn("Failed to disable steal-time shmem");
+ else
+ pr_warn("Failed to set steal-time shmem");
+ return sbi_err_map_linux_errno(ret.error);
+ }
+
+ return 0;
+}
+
+static int pv_time_cpu_online(unsigned int cpu)
+{
+ struct sbi_sta_struct *st = this_cpu_ptr(&steal_time);
+ phys_addr_t pa = __pa(st);
+ unsigned long lo = (unsigned long)pa;
+ unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0;
+
+ return sbi_sta_steal_time_set_shmem(lo, hi, 0);
+}
+
+static int pv_time_cpu_down_prepare(unsigned int cpu)
+{
+ return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE,
+ SBI_STA_SHMEM_DISABLE, 0);
+}
+
+static u64 pv_time_steal_clock(int cpu)
+{
+ struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
+ u32 sequence;
+ u64 steal;
+
+ /*
+ * Check the sequence field before and after reading the steal
+ * field. Repeat the read if it is different or odd.
+ */
+ do {
+ sequence = READ_ONCE(st->sequence);
+ virt_rmb();
+ steal = READ_ONCE(st->steal);
+ virt_rmb();
+ } while ((le32_to_cpu(sequence) & 1) ||
+ sequence != READ_ONCE(st->sequence));
+
+ return le64_to_cpu(steal);
+}
+
+int __init pv_time_init(void)
+{
+ int ret;
+
+ if (!has_pv_steal_clock())
+ return 0;
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "riscv/pv_time:online",
+ pv_time_cpu_online,
+ pv_time_cpu_down_prepare);
+ if (ret < 0)
+ return ret;
+
+ static_call_update(pv_steal_clock, pv_time_steal_clock);
+
+ static_key_slow_inc(&paravirt_steal_enabled);
+ if (steal_acc)
+ static_key_slow_inc(&paravirt_steal_rq_enabled);
+
+ pr_info("Computing paravirt steal-time\n");
+
+ return 0;
+}
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index c712037dbe10..a2ca5b7756a5 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -169,7 +169,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
pair->value &= ~missing;
}
-static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
+static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext)
{
struct riscv_hwprobe pair;
diff --git a/arch/riscv/kernel/tests/module_test/test_uleb128.S b/arch/riscv/kernel/tests/module_test/test_uleb128.S
index 90f22049d553..8515ed7cd8c1 100644
--- a/arch/riscv/kernel/tests/module_test/test_uleb128.S
+++ b/arch/riscv/kernel/tests/module_test/test_uleb128.S
@@ -6,13 +6,13 @@
.text
.global test_uleb_basic
test_uleb_basic:
- ld a0, second
+ lw a0, second
addi a0, a0, -127
ret
.global test_uleb_large
test_uleb_large:
- ld a0, fourth
+ lw a0, fourth
addi a0, a0, -0x07e8
ret
@@ -22,10 +22,10 @@ first:
second:
.reloc second, R_RISCV_SET_ULEB128, second
.reloc second, R_RISCV_SUB_ULEB128, first
- .dword 0
+ .word 0
third:
.space 1000
fourth:
.reloc fourth, R_RISCV_SET_ULEB128, fourth
.reloc fourth, R_RISCV_SUB_ULEB128, third
- .dword 0
+ .word 0
diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c
index 23641e82a9df..ba3477197789 100644
--- a/arch/riscv/kernel/time.c
+++ b/arch/riscv/kernel/time.c
@@ -12,6 +12,7 @@
#include <asm/sbi.h>
#include <asm/processor.h>
#include <asm/timex.h>
+#include <asm/paravirt.h>
unsigned long riscv_timebase __ro_after_init;
EXPORT_SYMBOL_GPL(riscv_timebase);
@@ -45,4 +46,6 @@ void __init time_init(void)
timer_probe();
tick_setup_hrtimer_broadcast();
+
+ pv_time_init();
}
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 5eba37147caa..5255f8134aef 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -550,16 +550,14 @@ int handle_misaligned_store(struct pt_regs *regs)
} else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) {
len = 8;
val.data_ulong = GET_RS2S(insn, regs);
- } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP &&
- ((insn >> SH_RD) & 0x1f)) {
+ } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP) {
len = 8;
val.data_ulong = GET_RS2C(insn, regs);
#endif
} else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) {
len = 4;
val.data_ulong = GET_RS2S(insn, regs);
- } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP &&
- ((insn >> SH_RD) & 0x1f)) {
+ } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP) {
len = 4;
val.data_ulong = GET_RS2C(insn, regs);
} else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) {
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index ae2e05f050ec..d490db943858 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -20,18 +20,17 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
depends on RISCV_SBI && MMU
- select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI
select HAVE_KVM_VCPU_ASYNC_IOCTL
+ select KVM_COMMON
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_MMIO
select KVM_XFER_TO_GUEST_WORK
select KVM_GENERIC_MMU_NOTIFIER
- select PREEMPT_NOTIFIERS
+ select SCHED_INFO
help
Support hosting virtualized guest machines.
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 4c2067fc59fc..c9646521f113 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -26,6 +26,7 @@ kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
kvm-y += vcpu_sbi_base.o
kvm-y += vcpu_sbi_replace.o
kvm-y += vcpu_sbi_hsm.o
+kvm-y += vcpu_sbi_sta.o
kvm-y += vcpu_timer.o
kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
kvm-y += aia.o
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
index 6cf23b8adb71..e808723a85f1 100644
--- a/arch/riscv/kvm/aia_imsic.c
+++ b/arch/riscv/kvm/aia_imsic.c
@@ -55,6 +55,7 @@ struct imsic {
/* IMSIC SW-file */
struct imsic_mrif *swfile;
phys_addr_t swfile_pa;
+ spinlock_t swfile_extirq_lock;
};
#define imsic_vs_csr_read(__c) \
@@ -613,12 +614,23 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu)
{
struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
struct imsic_mrif *mrif = imsic->swfile;
+ unsigned long flags;
+
+ /*
+ * The critical section is necessary during external interrupt
+ * updates to avoid the risk of losing interrupts due to potential
+ * interruptions between reading topei and updating pending status.
+ */
+
+ spin_lock_irqsave(&imsic->swfile_extirq_lock, flags);
if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) &&
imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis))
kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
else
kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
+
+ spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags);
}
static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear,
@@ -1039,6 +1051,7 @@ int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
}
imsic->swfile = page_to_virt(swfile_page);
imsic->swfile_pa = page_to_phys(swfile_page);
+ spin_lock_init(&imsic->swfile_extirq_lock);
/* Setup IO device */
kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops);
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index e087c809073c..b5ca9f2e98ac 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -83,6 +83,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
vcpu->arch.hfence_tail = 0;
memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
+ kvm_riscv_vcpu_sbi_sta_reset(vcpu);
+
/* Reset the guest CSRs for hotplug usecase */
if (loaded)
kvm_arch_vcpu_load(vcpu, smp_processor_id());
@@ -541,6 +543,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_riscv_vcpu_aia_load(vcpu, cpu);
+ kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
+
vcpu->cpu = cpu;
}
@@ -614,6 +618,9 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
kvm_riscv_hfence_process(vcpu);
+
+ if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
+ kvm_riscv_vcpu_record_steal_time(vcpu);
}
}
@@ -757,8 +764,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/* Update HVIP CSR for current CPU */
kvm_riscv_update_hvip(vcpu);
- if (ret <= 0 ||
- kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
+ if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
kvm_request_pending(vcpu) ||
xfer_to_guest_mode_work_pending()) {
vcpu->mode = OUTSIDE_GUEST_MODE;
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index f8c9fa0c03c5..fc34557f5356 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -485,7 +485,7 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN))
rc = kvm_riscv_vcpu_smstateen_set_csr(vcpu, reg_num,
reg_val);
-break;
+ break;
default:
rc = -ENOENT;
break;
@@ -931,50 +931,106 @@ static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
return copy_isa_ext_reg_indices(vcpu, NULL);;
}
-static inline unsigned long num_sbi_ext_regs(void)
+static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
- /*
- * number of KVM_REG_RISCV_SBI_SINGLE +
- * 2 x (number of KVM_REG_RISCV_SBI_MULTI)
- */
- return KVM_RISCV_SBI_EXT_MAX + 2*(KVM_REG_RISCV_SBI_MULTI_REG_LAST+1);
-}
-
-static int copy_sbi_ext_reg_indices(u64 __user *uindices)
-{
- int n;
+ unsigned int n = 0;
- /* copy KVM_REG_RISCV_SBI_SINGLE */
- n = KVM_RISCV_SBI_EXT_MAX;
- for (int i = 0; i < n; i++) {
+ for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) {
u64 size = IS_ENABLED(CONFIG_32BIT) ?
KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
KVM_REG_RISCV_SBI_SINGLE | i;
+ if (!riscv_vcpu_supports_sbi_ext(vcpu, i))
+ continue;
+
if (uindices) {
if (put_user(reg, uindices))
return -EFAULT;
uindices++;
}
+
+ n++;
}
- /* copy KVM_REG_RISCV_SBI_MULTI */
- n = KVM_REG_RISCV_SBI_MULTI_REG_LAST + 1;
- for (int i = 0; i < n; i++) {
- u64 size = IS_ENABLED(CONFIG_32BIT) ?
- KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
- u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
- KVM_REG_RISCV_SBI_MULTI_EN | i;
+ return n;
+}
+
+static unsigned long num_sbi_ext_regs(struct kvm_vcpu *vcpu)
+{
+ return copy_sbi_ext_reg_indices(vcpu, NULL);
+}
+
+static int copy_sbi_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
+ int total = 0;
+
+ if (scontext->ext_status[KVM_RISCV_SBI_EXT_STA] == KVM_RISCV_SBI_EXT_STATUS_ENABLED) {
+ u64 size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+ int n = sizeof(struct kvm_riscv_sbi_sta) / sizeof(unsigned long);
+
+ for (int i = 0; i < n; i++) {
+ u64 reg = KVM_REG_RISCV | size |
+ KVM_REG_RISCV_SBI_STATE |
+ KVM_REG_RISCV_SBI_STA | i;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+ }
+
+ total += n;
+ }
+
+ return total;
+}
+
+static inline unsigned long num_sbi_regs(struct kvm_vcpu *vcpu)
+{
+ return copy_sbi_reg_indices(vcpu, NULL);
+}
+
+static inline unsigned long num_vector_regs(const struct kvm_vcpu *vcpu)
+{
+ if (!riscv_isa_extension_available(vcpu->arch.isa, v))
+ return 0;
+
+ /* vstart, vl, vtype, vcsr, vlenb and 32 vector regs */
+ return 37;
+}
+
+static int copy_vector_reg_indices(const struct kvm_vcpu *vcpu,
+ u64 __user *uindices)
+{
+ const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ int n = num_vector_regs(vcpu);
+ u64 reg, size;
+ int i;
+
+ if (n == 0)
+ return 0;
+
+ /* copy vstart, vl, vtype, vcsr and vlenb */
+ size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+ for (i = 0; i < 5; i++) {
+ reg = KVM_REG_RISCV | size | KVM_REG_RISCV_VECTOR | i;
if (uindices) {
if (put_user(reg, uindices))
return -EFAULT;
uindices++;
}
+ }
- reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
- KVM_REG_RISCV_SBI_MULTI_DIS | i;
+ /* vector_regs have a variable 'vlenb' size */
+ size = __builtin_ctzl(cntx->vector.vlenb);
+ size <<= KVM_REG_SIZE_SHIFT;
+ for (i = 0; i < 32; i++) {
+ reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size |
+ KVM_REG_RISCV_VECTOR_REG(i);
if (uindices) {
if (put_user(reg, uindices))
@@ -983,7 +1039,7 @@ static int copy_sbi_ext_reg_indices(u64 __user *uindices)
}
}
- return num_sbi_ext_regs();
+ return n;
}
/*
@@ -1001,8 +1057,10 @@ unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu)
res += num_timer_regs();
res += num_fp_f_regs(vcpu);
res += num_fp_d_regs(vcpu);
+ res += num_vector_regs(vcpu);
res += num_isa_ext_regs(vcpu);
- res += num_sbi_ext_regs();
+ res += num_sbi_ext_regs(vcpu);
+ res += num_sbi_regs(vcpu);
return res;
}
@@ -1045,14 +1103,25 @@ int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu,
return ret;
uindices += ret;
+ ret = copy_vector_reg_indices(vcpu, uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
+
ret = copy_isa_ext_reg_indices(vcpu, uindices);
if (ret < 0)
return ret;
uindices += ret;
- ret = copy_sbi_ext_reg_indices(uindices);
+ ret = copy_sbi_ext_reg_indices(vcpu, uindices);
if (ret < 0)
return ret;
+ uindices += ret;
+
+ ret = copy_sbi_reg_indices(vcpu, uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
return 0;
}
@@ -1075,12 +1144,14 @@ int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
case KVM_REG_RISCV_FP_D:
return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
KVM_REG_RISCV_FP_D);
+ case KVM_REG_RISCV_VECTOR:
+ return kvm_riscv_vcpu_set_reg_vector(vcpu, reg);
case KVM_REG_RISCV_ISA_EXT:
return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
case KVM_REG_RISCV_SBI_EXT:
return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
- case KVM_REG_RISCV_VECTOR:
- return kvm_riscv_vcpu_set_reg_vector(vcpu, reg);
+ case KVM_REG_RISCV_SBI_STATE:
+ return kvm_riscv_vcpu_set_reg_sbi(vcpu, reg);
default:
break;
}
@@ -1106,12 +1177,14 @@ int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
case KVM_REG_RISCV_FP_D:
return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
KVM_REG_RISCV_FP_D);
+ case KVM_REG_RISCV_VECTOR:
+ return kvm_riscv_vcpu_get_reg_vector(vcpu, reg);
case KVM_REG_RISCV_ISA_EXT:
return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
case KVM_REG_RISCV_SBI_EXT:
return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
- case KVM_REG_RISCV_VECTOR:
- return kvm_riscv_vcpu_get_reg_vector(vcpu, reg);
+ case KVM_REG_RISCV_SBI_STATE:
+ return kvm_riscv_vcpu_get_reg_sbi(vcpu, reg);
default:
break;
}
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index a04ff98085d9..72a2ffb8dcd1 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -71,6 +71,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = {
.ext_ptr = &vcpu_sbi_ext_dbcn,
},
{
+ .ext_idx = KVM_RISCV_SBI_EXT_STA,
+ .ext_ptr = &vcpu_sbi_ext_sta,
+ },
+ {
.ext_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL,
.ext_ptr = &vcpu_sbi_ext_experimental,
},
@@ -80,6 +84,34 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = {
},
};
+static const struct kvm_riscv_sbi_extension_entry *
+riscv_vcpu_get_sbi_ext(struct kvm_vcpu *vcpu, unsigned long idx)
+{
+ const struct kvm_riscv_sbi_extension_entry *sext = NULL;
+
+ if (idx >= KVM_RISCV_SBI_EXT_MAX)
+ return NULL;
+
+ for (int i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
+ if (sbi_ext[i].ext_idx == idx) {
+ sext = &sbi_ext[i];
+ break;
+ }
+ }
+
+ return sext;
+}
+
+bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx)
+{
+ struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
+ const struct kvm_riscv_sbi_extension_entry *sext;
+
+ sext = riscv_vcpu_get_sbi_ext(vcpu, idx);
+
+ return sext && scontext->ext_status[sext->ext_idx] != KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE;
+}
+
void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
@@ -140,28 +172,19 @@ static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long reg_val)
{
- unsigned long i;
- const struct kvm_riscv_sbi_extension_entry *sext = NULL;
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
-
- if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
- return -ENOENT;
+ const struct kvm_riscv_sbi_extension_entry *sext;
if (reg_val != 1 && reg_val != 0)
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
- if (sbi_ext[i].ext_idx == reg_num) {
- sext = &sbi_ext[i];
- break;
- }
- }
- if (!sext)
+ sext = riscv_vcpu_get_sbi_ext(vcpu, reg_num);
+ if (!sext || scontext->ext_status[sext->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE)
return -ENOENT;
scontext->ext_status[sext->ext_idx] = (reg_val) ?
- KVM_RISCV_SBI_EXT_AVAILABLE :
- KVM_RISCV_SBI_EXT_UNAVAILABLE;
+ KVM_RISCV_SBI_EXT_STATUS_ENABLED :
+ KVM_RISCV_SBI_EXT_STATUS_DISABLED;
return 0;
}
@@ -170,24 +193,16 @@ static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long *reg_val)
{
- unsigned long i;
- const struct kvm_riscv_sbi_extension_entry *sext = NULL;
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
+ const struct kvm_riscv_sbi_extension_entry *sext;
- if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
- return -ENOENT;
-
- for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
- if (sbi_ext[i].ext_idx == reg_num) {
- sext = &sbi_ext[i];
- break;
- }
- }
- if (!sext)
+ sext = riscv_vcpu_get_sbi_ext(vcpu, reg_num);
+ if (!sext || scontext->ext_status[sext->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE)
return -ENOENT;
*reg_val = scontext->ext_status[sext->ext_idx] ==
- KVM_RISCV_SBI_EXT_AVAILABLE;
+ KVM_RISCV_SBI_EXT_STATUS_ENABLED;
+
return 0;
}
@@ -310,6 +325,69 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
return 0;
}
+int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_SBI_STATE);
+ unsigned long reg_subtype, reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+ reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_SBI_STA:
+ return kvm_riscv_vcpu_set_reg_sbi_sta(vcpu, reg_num, reg_val);
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_SBI_STATE);
+ unsigned long reg_subtype, reg_val;
+ int ret;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+ reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_SBI_STA:
+ ret = kvm_riscv_vcpu_get_reg_sbi_sta(vcpu, reg_num, &reg_val);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (ret)
+ return ret;
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(
struct kvm_vcpu *vcpu, unsigned long extid)
{
@@ -325,7 +403,7 @@ const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(
if (ext->extid_start <= extid && ext->extid_end >= extid) {
if (entry->ext_idx >= KVM_RISCV_SBI_EXT_MAX ||
scontext->ext_status[entry->ext_idx] ==
- KVM_RISCV_SBI_EXT_AVAILABLE)
+ KVM_RISCV_SBI_EXT_STATUS_ENABLED)
return ext;
return NULL;
@@ -413,12 +491,12 @@ void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu)
if (ext->probe && !ext->probe(vcpu)) {
scontext->ext_status[entry->ext_idx] =
- KVM_RISCV_SBI_EXT_UNAVAILABLE;
+ KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE;
continue;
}
- scontext->ext_status[entry->ext_idx] = ext->default_unavail ?
- KVM_RISCV_SBI_EXT_UNAVAILABLE :
- KVM_RISCV_SBI_EXT_AVAILABLE;
+ scontext->ext_status[entry->ext_idx] = ext->default_disabled ?
+ KVM_RISCV_SBI_EXT_STATUS_DISABLED :
+ KVM_RISCV_SBI_EXT_STATUS_ENABLED;
}
}
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
index 23b57c931b15..9c2ab3dfa93a 100644
--- a/arch/riscv/kvm/vcpu_sbi_replace.c
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -204,6 +204,6 @@ static int kvm_sbi_ext_dbcn_handler(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn = {
.extid_start = SBI_EXT_DBCN,
.extid_end = SBI_EXT_DBCN,
- .default_unavail = true,
+ .default_disabled = true,
.handler = kvm_sbi_ext_dbcn_handler,
};
diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c
new file mode 100644
index 000000000000..01f09fe8c3b0
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_sta.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Ventana Micro Systems Inc.
+ */
+
+#include <linux/kconfig.h>
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <linux/sizes.h>
+
+#include <asm/bug.h>
+#include <asm/current.h>
+#include <asm/kvm_vcpu_sbi.h>
+#include <asm/page.h>
+#include <asm/sbi.h>
+#include <asm/uaccess.h>
+
+void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.sta.shmem = INVALID_GPA;
+ vcpu->arch.sta.last_steal = 0;
+}
+
+void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu)
+{
+ gpa_t shmem = vcpu->arch.sta.shmem;
+ u64 last_steal = vcpu->arch.sta.last_steal;
+ u32 *sequence_ptr, sequence;
+ u64 *steal_ptr, steal;
+ unsigned long hva;
+ gfn_t gfn;
+
+ if (shmem == INVALID_GPA)
+ return;
+
+ /*
+ * shmem is 64-byte aligned (see the enforcement in
+ * kvm_sbi_sta_steal_time_set_shmem()) and the size of sbi_sta_struct
+ * is 64 bytes, so we know all its offsets are in the same page.
+ */
+ gfn = shmem >> PAGE_SHIFT;
+ hva = kvm_vcpu_gfn_to_hva(vcpu, gfn);
+
+ if (WARN_ON(kvm_is_error_hva(hva))) {
+ vcpu->arch.sta.shmem = INVALID_GPA;
+ return;
+ }
+
+ sequence_ptr = (u32 *)(hva + offset_in_page(shmem) +
+ offsetof(struct sbi_sta_struct, sequence));
+ steal_ptr = (u64 *)(hva + offset_in_page(shmem) +
+ offsetof(struct sbi_sta_struct, steal));
+
+ if (WARN_ON(get_user(sequence, sequence_ptr)))
+ return;
+
+ sequence = le32_to_cpu(sequence);
+ sequence += 1;
+
+ if (WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr)))
+ return;
+
+ if (!WARN_ON(get_user(steal, steal_ptr))) {
+ steal = le64_to_cpu(steal);
+ vcpu->arch.sta.last_steal = READ_ONCE(current->sched_info.run_delay);
+ steal += vcpu->arch.sta.last_steal - last_steal;
+ WARN_ON(put_user(cpu_to_le64(steal), steal_ptr));
+ }
+
+ sequence += 1;
+ WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr));
+
+ kvm_vcpu_mark_page_dirty(vcpu, gfn);
+}
+
+static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long shmem_phys_lo = cp->a0;
+ unsigned long shmem_phys_hi = cp->a1;
+ u32 flags = cp->a2;
+ struct sbi_sta_struct zero_sta = {0};
+ unsigned long hva;
+ bool writable;
+ gpa_t shmem;
+ int ret;
+
+ if (flags != 0)
+ return SBI_ERR_INVALID_PARAM;
+
+ if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE &&
+ shmem_phys_hi == SBI_STA_SHMEM_DISABLE) {
+ vcpu->arch.sta.shmem = INVALID_GPA;
+ return 0;
+ }
+
+ if (shmem_phys_lo & (SZ_64 - 1))
+ return SBI_ERR_INVALID_PARAM;
+
+ shmem = shmem_phys_lo;
+
+ if (shmem_phys_hi != 0) {
+ if (IS_ENABLED(CONFIG_32BIT))
+ shmem |= ((gpa_t)shmem_phys_hi << 32);
+ else
+ return SBI_ERR_INVALID_ADDRESS;
+ }
+
+ hva = kvm_vcpu_gfn_to_hva_prot(vcpu, shmem >> PAGE_SHIFT, &writable);
+ if (kvm_is_error_hva(hva) || !writable)
+ return SBI_ERR_INVALID_ADDRESS;
+
+ ret = kvm_vcpu_write_guest(vcpu, shmem, &zero_sta, sizeof(zero_sta));
+ if (ret)
+ return SBI_ERR_FAILURE;
+
+ vcpu->arch.sta.shmem = shmem;
+ vcpu->arch.sta.last_steal = current->sched_info.run_delay;
+
+ return 0;
+}
+
+static int kvm_sbi_ext_sta_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_vcpu_sbi_return *retdata)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long funcid = cp->a6;
+ int ret;
+
+ switch (funcid) {
+ case SBI_EXT_STA_STEAL_TIME_SET_SHMEM:
+ ret = kvm_sbi_sta_steal_time_set_shmem(vcpu);
+ break;
+ default:
+ ret = SBI_ERR_NOT_SUPPORTED;
+ break;
+ }
+
+ retdata->err_val = ret;
+
+ return 0;
+}
+
+static unsigned long kvm_sbi_ext_sta_probe(struct kvm_vcpu *vcpu)
+{
+ return !!sched_info_on();
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta = {
+ .extid_start = SBI_EXT_STA,
+ .extid_end = SBI_EXT_STA,
+ .handler = kvm_sbi_ext_sta_handler,
+ .probe = kvm_sbi_ext_sta_probe,
+};
+
+int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu,
+ unsigned long reg_num,
+ unsigned long *reg_val)
+{
+ switch (reg_num) {
+ case KVM_REG_RISCV_SBI_STA_REG(shmem_lo):
+ *reg_val = (unsigned long)vcpu->arch.sta.shmem;
+ break;
+ case KVM_REG_RISCV_SBI_STA_REG(shmem_hi):
+ if (IS_ENABLED(CONFIG_32BIT))
+ *reg_val = upper_32_bits(vcpu->arch.sta.shmem);
+ else
+ *reg_val = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu,
+ unsigned long reg_num,
+ unsigned long reg_val)
+{
+ switch (reg_num) {
+ case KVM_REG_RISCV_SBI_STA_REG(shmem_lo):
+ if (IS_ENABLED(CONFIG_32BIT)) {
+ gpa_t hi = upper_32_bits(vcpu->arch.sta.shmem);
+
+ vcpu->arch.sta.shmem = reg_val;
+ vcpu->arch.sta.shmem |= hi << 32;
+ } else {
+ vcpu->arch.sta.shmem = reg_val;
+ }
+ break;
+ case KVM_REG_RISCV_SBI_STA_REG(shmem_hi):
+ if (IS_ENABLED(CONFIG_32BIT)) {
+ gpa_t lo = lower_32_bits(vcpu->arch.sta.shmem);
+
+ vcpu->arch.sta.shmem = ((gpa_t)reg_val << 32);
+ vcpu->arch.sta.shmem |= lo;
+ } else if (reg_val != 0) {
+ return -EINVAL;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S
index d74df8eb4d71..0c26189aa01c 100644
--- a/arch/riscv/kvm/vcpu_switch.S
+++ b/arch/riscv/kvm/vcpu_switch.S
@@ -15,7 +15,7 @@
.altmacro
.option norelax
-ENTRY(__kvm_riscv_switch_to)
+SYM_FUNC_START(__kvm_riscv_switch_to)
/* Save Host GPRs (except A0 and T0-T6) */
REG_S ra, (KVM_ARCH_HOST_RA)(a0)
REG_S sp, (KVM_ARCH_HOST_SP)(a0)
@@ -45,7 +45,7 @@ ENTRY(__kvm_riscv_switch_to)
REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0)
REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0)
REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
- la t4, __kvm_switch_return
+ la t4, .Lkvm_switch_return
REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0)
/* Save Host and Restore Guest SSTATUS */
@@ -113,7 +113,7 @@ ENTRY(__kvm_riscv_switch_to)
/* Back to Host */
.align 2
-__kvm_switch_return:
+.Lkvm_switch_return:
/* Swap Guest A0 with SSCRATCH */
csrrw a0, CSR_SSCRATCH, a0
@@ -208,9 +208,9 @@ __kvm_switch_return:
/* Return to C code */
ret
-ENDPROC(__kvm_riscv_switch_to)
+SYM_FUNC_END(__kvm_riscv_switch_to)
-ENTRY(__kvm_riscv_unpriv_trap)
+SYM_CODE_START(__kvm_riscv_unpriv_trap)
/*
* We assume that faulting unpriv load/store instruction is
* 4-byte long and blindly increment SEPC by 4.
@@ -231,12 +231,10 @@ ENTRY(__kvm_riscv_unpriv_trap)
csrr a1, CSR_HTINST
REG_S a1, (KVM_ARCH_TRAP_HTINST)(a0)
sret
-ENDPROC(__kvm_riscv_unpriv_trap)
+SYM_CODE_END(__kvm_riscv_unpriv_trap)
#ifdef CONFIG_FPU
- .align 3
- .global __kvm_riscv_fp_f_save
-__kvm_riscv_fp_f_save:
+SYM_FUNC_START(__kvm_riscv_fp_f_save)
csrr t2, CSR_SSTATUS
li t1, SR_FS
csrs CSR_SSTATUS, t1
@@ -276,10 +274,9 @@ __kvm_riscv_fp_f_save:
sw t0, KVM_ARCH_FP_F_FCSR(a0)
csrw CSR_SSTATUS, t2
ret
+SYM_FUNC_END(__kvm_riscv_fp_f_save)
- .align 3
- .global __kvm_riscv_fp_d_save
-__kvm_riscv_fp_d_save:
+SYM_FUNC_START(__kvm_riscv_fp_d_save)
csrr t2, CSR_SSTATUS
li t1, SR_FS
csrs CSR_SSTATUS, t1
@@ -319,10 +316,9 @@ __kvm_riscv_fp_d_save:
sw t0, KVM_ARCH_FP_D_FCSR(a0)
csrw CSR_SSTATUS, t2
ret
+SYM_FUNC_END(__kvm_riscv_fp_d_save)
- .align 3
- .global __kvm_riscv_fp_f_restore
-__kvm_riscv_fp_f_restore:
+SYM_FUNC_START(__kvm_riscv_fp_f_restore)
csrr t2, CSR_SSTATUS
li t1, SR_FS
lw t0, KVM_ARCH_FP_F_FCSR(a0)
@@ -362,10 +358,9 @@ __kvm_riscv_fp_f_restore:
fscsr t0
csrw CSR_SSTATUS, t2
ret
+SYM_FUNC_END(__kvm_riscv_fp_f_restore)
- .align 3
- .global __kvm_riscv_fp_d_restore
-__kvm_riscv_fp_d_restore:
+SYM_FUNC_START(__kvm_riscv_fp_d_restore)
csrr t2, CSR_SSTATUS
li t1, SR_FS
lw t0, KVM_ARCH_FP_D_FCSR(a0)
@@ -405,4 +400,5 @@ __kvm_riscv_fp_d_restore:
fscsr t0
csrw CSR_SSTATUS, t2
ret
+SYM_FUNC_END(__kvm_riscv_fp_d_restore)
#endif
diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c
index b339a2682f25..d92d1348045c 100644
--- a/arch/riscv/kvm/vcpu_vector.c
+++ b/arch/riscv/kvm/vcpu_vector.c
@@ -76,6 +76,7 @@ int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu,
cntx->vector.datap = kmalloc(riscv_v_vsize, GFP_KERNEL);
if (!cntx->vector.datap)
return -ENOMEM;
+ cntx->vector.vlenb = riscv_v_vsize / 32;
vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
if (!vcpu->arch.host_context.vector.datap)
@@ -115,6 +116,9 @@ static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
*reg_addr = &cntx->vector.vcsr;
break;
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb):
+ *reg_addr = &cntx->vector.vlenb;
+ break;
case KVM_REG_RISCV_VECTOR_CSR_REG(datap):
default:
return -ENOENT;
@@ -173,6 +177,18 @@ int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
if (!riscv_isa_extension_available(isa, v))
return -ENOENT;
+ if (reg_num == KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)) {
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ unsigned long reg_val;
+
+ if (copy_from_user(&reg_val, uaddr, reg_size))
+ return -EFAULT;
+ if (reg_val != cntx->vector.vlenb)
+ return -EINVAL;
+
+ return 0;
+ }
+
rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, &reg_addr);
if (rc)
return rc;
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 7e2b50c692c1..ce58bc48e5b8 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -179,7 +179,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = kvm_riscv_aia_available();
break;
case KVM_CAP_IOEVENTFD:
- case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 438cd92e6080..6de44ede4e14 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -44,8 +44,7 @@ CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
CONFIG_CRASH_DUMP=y
CONFIG_LIVEPATCH=y
-CONFIG_MARCH_ZEC12=y
-CONFIG_TUNE_ZEC12=y
+CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
CONFIG_HZ_100=y
@@ -76,7 +75,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
@@ -93,6 +91,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
+CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
CONFIG_ZSMALLOC_STAT=y
CONFIG_SLUB_STATS=y
# CONFIG_COMPAT_BRK is not set
@@ -619,6 +618,9 @@ CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_BTRFS_DEBUG=y
CONFIG_BTRFS_ASSERT=y
CONFIG_NILFS2_FS=m
+CONFIG_BCACHEFS_FS=y
+CONFIG_BCACHEFS_QUOTA=y
+CONFIG_BCACHEFS_POSIX_ACL=y
CONFIG_FS_DAX=y
CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FS_ENCRYPTION=y
@@ -691,7 +693,6 @@ CONFIG_PERSISTENT_KEYRINGS=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_FORTIFY_SOURCE=y
CONFIG_SECURITY_SELINUX=y
@@ -834,7 +835,6 @@ CONFIG_DEBUG_IRQFLAGS=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_DEBUG_CREDENTIALS=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_REF_SCALE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=300
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 1b8150e50f6a..bcae47da6b7c 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -42,8 +42,7 @@ CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
CONFIG_CRASH_DUMP=y
CONFIG_LIVEPATCH=y
-CONFIG_MARCH_ZEC12=y
-CONFIG_TUNE_ZEC12=y
+CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
CONFIG_HZ_100=y
@@ -71,7 +70,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
@@ -88,6 +86,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
+CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
CONFIG_ZSMALLOC_STAT=y
# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
@@ -605,6 +604,9 @@ CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
+CONFIG_BCACHEFS_FS=m
+CONFIG_BCACHEFS_QUOTA=y
+CONFIG_BCACHEFS_POSIX_ACL=y
CONFIG_FS_DAX=y
CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FS_ENCRYPTION=y
@@ -677,7 +679,6 @@ CONFIG_PERSISTENT_KEYRINGS=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_LOCKDOWN_LSM=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index b831083b4edd..47028450eee1 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -9,8 +9,7 @@ CONFIG_BPF_SYSCALL=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_CRASH_DUMP=y
-CONFIG_MARCH_ZEC12=y
-CONFIG_TUNE_ZEC12=y
+CONFIG_MARCH_Z13=y
# CONFIG_COMPAT is not set
CONFIG_NR_CPUS=2
CONFIG_HZ_100=y
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 94b6919026df..796007125dff 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -111,4 +111,10 @@ static inline void stfle(u64 *stfle_fac_list, int size)
preempt_enable();
}
+/**
+ * stfle_size - Actual size of the facility list as specified by stfle
+ * (number of double words)
+ */
+unsigned int stfle_size(void);
+
#endif /* __ASM_FACILITY_H */
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index b714ed0ef688..9acf48e53a87 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -79,7 +79,7 @@ static inline int test_fp_ctl(u32 fpc)
#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
#define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH)
-#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_V0V7)
+#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_LOW)
struct kernel_fpu;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 67a298b6cf6e..52664105a473 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -818,7 +818,7 @@ struct s390_io_adapter {
struct kvm_s390_cpu_model {
/* facility mask supported by kvm & hosting machine */
- __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
+ __u64 fac_mask[S390_ARCH_FAC_MASK_SIZE_U64];
struct kvm_s390_vm_cpu_subfunc subfuncs;
/* facility list requested by guest (in dma page) */
__u64 *fac_list;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index dc17896a001a..c15eadbb9983 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -228,7 +228,6 @@ typedef struct thread_struct thread_struct;
execve_tail(); \
} while (0)
-/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
struct seq_file;
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index 9286430fe729..35c1d1b860d8 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -63,10 +63,6 @@
cond_syscall(__s390x_sys_##name); \
cond_syscall(__s390_sys_##name)
-#define SYS_NI(name) \
- SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers); \
- SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers)
-
#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
long __s390_compat_sys##name(struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \
@@ -85,15 +81,11 @@
/*
* As some compat syscalls may not be implemented, we need to expand
- * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in
- * kernel/time/posix-stubs.c to cover this case as well.
+ * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well.
*/
#define COND_SYSCALL_COMPAT(name) \
cond_syscall(__s390_compat_sys_##name)
-#define COMPAT_SYS_NI(name) \
- SYSCALL_ALIAS(__s390_compat_sys_##name, sys_ni_posix_timers)
-
#define __S390_SYS_STUBx(x, name, ...) \
long __s390_sys##name(struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \
@@ -124,9 +116,6 @@
#define COND_SYSCALL(name) \
cond_syscall(__s390x_sys_##name)
-#define SYS_NI(name) \
- SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers)
-
#define __S390_SYS_STUBx(x, fullname, name, ...)
#endif /* CONFIG_COMPAT */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 353def93973b..7a562b4199c8 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -41,7 +41,7 @@ obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o
extra-y += vmlinux.lds
diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c
new file mode 100644
index 000000000000..f02127219a27
--- /dev/null
+++ b/arch/s390/kernel/facility.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2023
+ */
+
+#include <asm/facility.h>
+
+unsigned int stfle_size(void)
+{
+ static unsigned int size;
+ unsigned int r;
+ u64 dummy;
+
+ r = READ_ONCE(size);
+ if (!r) {
+ r = __stfle_asm(&dummy, 1) + 1;
+ WRITE_ONCE(size, r);
+ }
+ return r;
+}
+EXPORT_SYMBOL(stfle_size);
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index cc364fce6aa9..ba75f6bee774 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -666,6 +666,7 @@ static int __init ipl_init(void)
&ipl_ccw_attr_group_lpar);
break;
case IPL_TYPE_ECKD:
+ case IPL_TYPE_ECKD_DUMP:
rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group);
break;
case IPL_TYPE_FCP:
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 77fd24e6cbb6..39a91b00438a 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -279,12 +279,6 @@ static int paicrypt_event_init(struct perf_event *event)
if (IS_ERR(cpump))
return PTR_ERR(cpump);
- /* Event initialization sets last_tag to 0. When later on the events
- * are deleted and re-added, do not reset the event count value to zero.
- * Events are added, deleted and re-added when 2 or more events
- * are active at the same time.
- */
- event->hw.last_tag = 0;
event->destroy = paicrypt_event_destroy;
if (a->sample_period) {
@@ -318,6 +312,11 @@ static void paicrypt_start(struct perf_event *event, int flags)
{
u64 sum;
+ /* Event initialization sets last_tag to 0. When later on the events
+ * are deleted and re-added, do not reset the event count value to zero.
+ * Events are added, deleted and re-added when 2 or more events
+ * are active at the same time.
+ */
if (!event->hw.last_tag) {
event->hw.last_tag = 1;
sum = paicrypt_getall(event); /* Get current value */
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index 8ba0f1a3a39d..e7013a2e8960 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -260,7 +260,6 @@ static int paiext_event_init(struct perf_event *event)
rc = paiext_alloc(a, event);
if (rc)
return rc;
- event->hw.last_tag = 0;
event->destroy = paiext_event_destroy;
if (a->sample_period) {
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 45fdf2a9b2e3..72e9b7dcdf7d 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -20,19 +20,16 @@ config KVM
def_tristate y
prompt "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
- select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_VCPU_ASYNC_IOCTL
- select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
select KVM_ASYNC_PF_SYNC
+ select KVM_COMMON
select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_INVALID_WAKEUPS
select HAVE_KVM_NO_POLL
select KVM_VFIO
- select INTERVAL_TREE
select MMU_NOTIFIER
help
Support hosting paravirtualized guest machines using the SIE
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 3765c4223bf9..80879fc73c90 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -213,8 +213,8 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
return -EINVAL;
- bp_data = memdup_user(dbg->arch.hw_bp,
- sizeof(*bp_data) * dbg->arch.nr_hw_bp);
+ bp_data = memdup_array_user(dbg->arch.hw_bp, dbg->arch.nr_hw_bp,
+ sizeof(*bp_data));
if (IS_ERR(bp_data))
return PTR_ERR(bp_data);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 7aa0e668488f..39463d0e4a1c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -563,7 +563,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IOEVENTFD:
- case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_S390_IRQCHIP:
case KVM_CAP_VM_ATTRIBUTES:
case KVM_CAP_MP_STATE:
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 02dcbe82a8e5..fef42e2a80a2 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -19,6 +19,7 @@
#include <asm/nmi.h>
#include <asm/dis.h>
#include <asm/fpu/api.h>
+#include <asm/facility.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -587,10 +588,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
if (!gmap_is_shadow(gmap))
return;
- if (start >= 1UL << 31)
- /* We are only interested in prefix pages */
- return;
-
/*
* Only new shadow blocks are added to the list during runtime,
* therefore we can safely reference them all the time.
@@ -988,12 +985,26 @@ static void retry_vsie_icpt(struct vsie_page *vsie_page)
static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
- __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
+ __u32 fac = READ_ONCE(vsie_page->scb_o->fac);
+ /*
+ * Alternate-STFLE-Interpretive-Execution facilities are not supported
+ * -> format-0 flcb
+ */
if (fac && test_kvm_facility(vcpu->kvm, 7)) {
retry_vsie_icpt(vsie_page);
+ /*
+ * The facility list origin (FLO) is in bits 1 - 28 of the FLD
+ * so we need to mask here before reading.
+ */
+ fac = fac & 0x7ffffff8U;
+ /*
+ * format-0 -> size of nested guest's facility list == guest's size
+ * guest's size == host's size, since STFLE is interpretatively executed
+ * using a format-0 for the guest, too.
+ */
if (read_guest_real(vcpu, fac, &vsie_page->fac,
- sizeof(vsie_page->fac)))
+ stfle_size() * sizeof(u64)))
return set_validity_icpt(scb_s, 0x1090U);
scb_s->fac = (__u32)(__u64) &vsie_page->fac;
}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 3bd2ab2a9a34..5cb92941540b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -756,7 +756,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
pte_clear(mm, addr, ptep);
}
if (reset)
- pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
+ pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
pgste_set_unlock(ptep, pgste);
preempt_enable();
}
diff --git a/arch/sh/include/asm/kexec.h b/arch/sh/include/asm/kexec.h
index 927d80ba2332..76631714673c 100644
--- a/arch/sh/include/asm/kexec.h
+++ b/arch/sh/include/asm/kexec.h
@@ -28,7 +28,7 @@
/* The native architecture */
#define KEXEC_ARCH KEXEC_ARCH_SH
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/* arch/sh/kernel/machine_kexec.c */
void reserve_crashkernel(void);
@@ -67,6 +67,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
}
#else
static inline void reserve_crashkernel(void) { }
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
#endif /* __ASM_SH_KEXEC_H */
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index 69cd9ac4b2ab..2d7e70537de0 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -33,7 +33,7 @@ obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_MODULES) += sh_ksyms_32.o module.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_IO_TRAPPED) += io_trapped.o
diff --git a/arch/sh/kernel/reboot.c b/arch/sh/kernel/reboot.c
index e8eeedc9b182..1de006b1c339 100644
--- a/arch/sh/kernel/reboot.c
+++ b/arch/sh/kernel/reboot.c
@@ -63,7 +63,7 @@ struct machine_ops machine_ops = {
.shutdown = native_machine_shutdown,
.restart = native_machine_restart,
.halt = native_machine_halt,
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
.crash_shutdown = native_machine_crash_shutdown,
#endif
};
@@ -88,7 +88,7 @@ void machine_halt(void)
machine_ops.halt();
}
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
void machine_crash_shutdown(struct pt_regs *regs)
{
machine_ops.crash_shutdown(regs);
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 3d80515298d2..d3175f09b3aa 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -220,7 +220,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
request_resource(res, &code_resource);
request_resource(res, &data_resource);
request_resource(res, &bss_resource);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
request_resource(res, &crashk_res);
#endif
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 55c98fdd67d2..18d15d1ce87d 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -178,7 +178,7 @@ static unsigned long get_cmdline_acpi_rsdp(void)
{
unsigned long addr = 0;
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
char val[MAX_ADDR_LEN] = { };
int ret;
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 1b5d17a9f70d..cf1f13c82175 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -10,6 +10,7 @@
#include <asm/coco.h>
#include <asm/tdx.h>
#include <asm/vmx.h>
+#include <asm/ia32.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index d813160b14d8..6356060caaf3 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -26,6 +26,7 @@
#include <xen/events.h>
#endif
+#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/traps.h>
#include <asm/vdso.h>
@@ -167,7 +168,96 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
}
}
-/* Handles int $0x80 */
+#ifdef CONFIG_IA32_EMULATION
+static __always_inline bool int80_is_external(void)
+{
+ const unsigned int offs = (0x80 / 32) * 0x10;
+ const u32 bit = BIT(0x80 % 32);
+
+ /* The local APIC on XENPV guests is fake */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /*
+ * If vector 0x80 is set in the APIC ISR then this is an external
+ * interrupt. Either from broken hardware or injected by a VMM.
+ *
+ * Note: In guest mode this is only valid for secure guests where
+ * the secure module fully controls the vAPIC exposed to the guest.
+ */
+ return apic_read(APIC_ISR + offs) & bit;
+}
+
+/**
+ * int80_emulation - 32-bit legacy syscall entry
+ *
+ * This entry point can be used by 32-bit and 64-bit programs to perform
+ * 32-bit system calls. Instances of INT $0x80 can be found inline in
+ * various programs and libraries. It is also used by the vDSO's
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
+ * entry method. Restarted 32-bit system calls also fall back to INT
+ * $0x80 regardless of what instruction was originally used to do the
+ * system call.
+ *
+ * This is considered a slow path. It is not used by most libc
+ * implementations on modern hardware except during process startup.
+ *
+ * The arguments for the INT $0x80 based syscall are on stack in the
+ * pt_regs structure:
+ * eax: system call number
+ * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
+ */
+DEFINE_IDTENTRY_RAW(int80_emulation)
+{
+ int nr;
+
+ /* Kernel does not use INT $0x80! */
+ if (unlikely(!user_mode(regs))) {
+ irqentry_enter(regs);
+ instrumentation_begin();
+ panic("Unexpected external interrupt 0x80\n");
+ }
+
+ /*
+ * Establish kernel context for instrumentation, including for
+ * int80_is_external() below which calls into the APIC driver.
+ * Identical for soft and external interrupts.
+ */
+ enter_from_user_mode(regs);
+
+ instrumentation_begin();
+ add_random_kstack_offset();
+
+ /* Validate that this is a soft interrupt to the extent possible */
+ if (unlikely(int80_is_external()))
+ panic("Unexpected external interrupt 0x80\n");
+
+ /*
+ * The low level idtentry code pushed -1 into regs::orig_ax
+ * and regs::ax contains the syscall number.
+ *
+ * User tracing code (ptrace or signal handlers) might assume
+ * that the regs::orig_ax contains a 32-bit number on invoking
+ * a 32-bit syscall.
+ *
+ * Establish the syscall convention by saving the 32bit truncated
+ * syscall number in regs::orig_ax and by invalidating regs::ax.
+ */
+ regs->orig_ax = regs->ax & GENMASK(31, 0);
+ regs->ax = -ENOSYS;
+
+ nr = syscall_32_enter(regs);
+
+ local_irq_enable();
+ nr = syscall_enter_from_user_mode_work(regs, nr);
+ do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+}
+#else /* CONFIG_IA32_EMULATION */
+
+/* Handles int $0x80 on a 32bit kernel */
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
int nr = syscall_32_enter(regs);
@@ -186,6 +276,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
instrumentation_end();
syscall_exit_to_user_mode(regs);
}
+#endif /* !CONFIG_IA32_EMULATION */
static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
{
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 27c05d08558a..de94e2e84ecc 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -275,80 +275,3 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
int3
SYM_CODE_END(entry_SYSCALL_compat)
-
-/*
- * 32-bit legacy system call entry.
- *
- * 32-bit x86 Linux system calls traditionally used the INT $0x80
- * instruction. INT $0x80 lands here.
- *
- * This entry point can be used by 32-bit and 64-bit programs to perform
- * 32-bit system calls. Instances of INT $0x80 can be found inline in
- * various programs and libraries. It is also used by the vDSO's
- * __kernel_vsyscall fallback for hardware that doesn't support a faster
- * entry method. Restarted 32-bit system calls also fall back to INT
- * $0x80 regardless of what instruction was originally used to do the
- * system call.
- *
- * This is considered a slow path. It is not used by most libc
- * implementations on modern hardware except during process startup.
- *
- * Arguments:
- * eax system call number
- * ebx arg1
- * ecx arg2
- * edx arg3
- * esi arg4
- * edi arg5
- * ebp arg6
- */
-SYM_CODE_START(entry_INT80_compat)
- UNWIND_HINT_ENTRY
- ENDBR
- /*
- * Interrupts are off on entry.
- */
- ASM_CLAC /* Do this early to minimize exposure */
- ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
-
- /*
- * User tracing code (ptrace or signal handlers) might assume that
- * the saved RAX contains a 32-bit number when we're invoking a 32-bit
- * syscall. Just in case the high bits are nonzero, zero-extend
- * the syscall number. (This could almost certainly be deleted
- * with no ill effects.)
- */
- movl %eax, %eax
-
- /* switch to thread stack expects orig_ax and rdi to be pushed */
- pushq %rax /* pt_regs->orig_ax */
-
- /* Need to switch before accessing the thread stack. */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
-
- /* In the Xen PV case we already run on the thread stack. */
- ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
-
- movq %rsp, %rax
- movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
-
- pushq 5*8(%rax) /* regs->ss */
- pushq 4*8(%rax) /* regs->rsp */
- pushq 3*8(%rax) /* regs->eflags */
- pushq 2*8(%rax) /* regs->cs */
- pushq 1*8(%rax) /* regs->ip */
- pushq 0*8(%rax) /* regs->orig_ax */
-.Lint80_keep_stack:
-
- PUSH_AND_CLEAR_REGS rax=$-ENOSYS
- UNWIND_HINT_REGS
-
- cld
-
- IBRS_ENTER
- UNTRAIN_RET
-
- movq %rsp, %rdi
- call do_int80_syscall_32
- jmp swapgs_restore_regs_and_return_to_usermode
-SYM_CODE_END(entry_INT80_compat)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a08f794a0e79..ce1c777227b4 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4660,7 +4660,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
if (pmu->intel_cap.pebs_output_pt_available)
pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
else
- pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT;
+ pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT;
intel_pmu_check_event_constraints(pmu->event_constraints,
pmu->num_counters,
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 21556ad87f4b..8f3a4d16bb79 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -15,6 +15,7 @@
#include <linux/io.h>
#include <asm/apic.h>
#include <asm/desc.h>
+#include <asm/e820/api.h>
#include <asm/sev.h>
#include <asm/ibt.h>
#include <asm/hypervisor.h>
@@ -286,15 +287,31 @@ static int hv_cpu_die(unsigned int cpu)
static int __init hv_pci_init(void)
{
- int gen2vm = efi_enabled(EFI_BOOT);
+ bool gen2vm = efi_enabled(EFI_BOOT);
/*
- * For Generation-2 VM, we exit from pci_arch_init() by returning 0.
- * The purpose is to suppress the harmless warning:
+ * A Generation-2 VM doesn't support legacy PCI/PCIe, so both
+ * raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() ->
+ * pcibios_init() doesn't call pcibios_resource_survey() ->
+ * e820__reserve_resources_late(); as a result, any emulated persistent
+ * memory of E820_TYPE_PRAM (12) via the kernel parameter
+ * memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be
+ * detected by register_e820_pmem(). Fix this by directly calling
+ * e820__reserve_resources_late() here: e820__reserve_resources_late()
+ * depends on e820__reserve_resources(), which has been called earlier
+ * from setup_arch(). Note: e820__reserve_resources_late() also adds
+ * any memory of E820_TYPE_PMEM (7) into iomem_resource, and
+ * acpi_nfit_register_region() -> acpi_nfit_insert_resource() ->
+ * region_intersects() returns REGION_INTERSECTS, so the memory of
+ * E820_TYPE_PMEM won't get added twice.
+ *
+ * We return 0 here so that pci_arch_init() won't print the warning:
* "PCI: Fatal: No config space access function found"
*/
- if (gen2vm)
+ if (gen2vm) {
+ e820__reserve_resources_late();
return 0;
+ }
/* For Generation-1 VM, we'll proceed in pci_arch_init(). */
return 1;
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index c8a7fc23f63c..f896eed4516c 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -16,6 +16,9 @@
#include <asm/x86_init.h>
#include <asm/cpufeature.h>
#include <asm/irq_vectors.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
#ifdef CONFIG_ACPI_APEI
# include <asm/pgtable_types.h>
@@ -127,6 +130,17 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
if (!cpu_has(c, X86_FEATURE_MWAIT) ||
boot_option_idle_override == IDLE_NOMWAIT)
*cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH);
+
+ if (xen_initial_domain()) {
+ /*
+ * When Linux is running as Xen dom0, the hypervisor is the
+ * entity in charge of the processor power management, and so
+ * Xen needs to check the OS capabilities reported in the
+ * processor capabilities buffer matches what the hypervisor
+ * driver supports.
+ */
+ xen_sanitize_proc_cap_bits(cap);
+ }
}
static inline bool acpi_has_cpu_in_madt(void)
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 5a2ae24b1204..9805629479d9 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -75,6 +75,11 @@ static inline bool ia32_enabled(void)
return __ia32_enabled;
}
+static inline void ia32_disable(void)
+{
+ __ia32_enabled = false;
+}
+
#else /* !CONFIG_IA32_EMULATION */
static inline bool ia32_enabled(void)
@@ -82,6 +87,8 @@ static inline bool ia32_enabled(void)
return IS_ENABLED(CONFIG_X86_32);
}
+static inline void ia32_disable(void) {}
+
#endif
#endif /* _ASM_X86_IA32_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 05fd175cec7d..13639e57e1f8 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -569,6 +569,10 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op);
DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault);
+#if defined(CONFIG_IA32_EMULATION)
+DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation);
+#endif
+
#ifdef CONFIG_X86_MCE
#ifdef CONFIG_X86_64
DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check);
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 4d84122bd643..484f4f0131a5 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -32,10 +32,6 @@ void entry_SYSCALL_compat(void);
void entry_SYSCALL_compat_safe_stack(void);
void entry_SYSRETL_compat_unsafe_stack(void);
void entry_SYSRETL_compat_end(void);
-void entry_INT80_compat(void);
-#ifdef CONFIG_XEN_PV
-void xen_entry_INT80_compat(void);
-#endif
#else /* !CONFIG_IA32_EMULATION */
#define entry_SYSCALL_compat NULL
#define entry_SYSENTER_compat NULL
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index fd2669b1cb2d..21f9407be5d3 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -86,9 +86,6 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
return sys_ni_syscall(); \
}
-#define __SYS_NI(abi, name) \
- SYSCALL_ALIAS(__##abi##_##name, sys_ni_posix_timers);
-
#ifdef CONFIG_X86_64
#define __X64_SYS_STUB0(name) \
__SYS_STUB0(x64, sys_##name)
@@ -100,13 +97,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
#define __X64_COND_SYSCALL(name) \
__COND_SYSCALL(x64, sys_##name)
-#define __X64_SYS_NI(name) \
- __SYS_NI(x64, sys_##name)
#else /* CONFIG_X86_64 */
#define __X64_SYS_STUB0(name)
#define __X64_SYS_STUBx(x, name, ...)
#define __X64_COND_SYSCALL(name)
-#define __X64_SYS_NI(name)
#endif /* CONFIG_X86_64 */
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
@@ -120,13 +114,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
#define __IA32_COND_SYSCALL(name) \
__COND_SYSCALL(ia32, sys_##name)
-#define __IA32_SYS_NI(name) \
- __SYS_NI(ia32, sys_##name)
#else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
#define __IA32_SYS_STUB0(name)
#define __IA32_SYS_STUBx(x, name, ...)
#define __IA32_COND_SYSCALL(name)
-#define __IA32_SYS_NI(name)
#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
#ifdef CONFIG_IA32_EMULATION
@@ -135,8 +126,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
* additional wrappers (aptly named __ia32_sys_xyzzy) which decode the
* ia32 regs in the proper order for shared or "common" syscalls. As some
* syscalls may not be implemented, we need to expand COND_SYSCALL in
- * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this
- * case as well.
+ * kernel/sys_ni.c to cover this case as well.
*/
#define __IA32_COMPAT_SYS_STUB0(name) \
__SYS_STUB0(ia32, compat_sys_##name)
@@ -148,14 +138,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
#define __IA32_COMPAT_COND_SYSCALL(name) \
__COND_SYSCALL(ia32, compat_sys_##name)
-#define __IA32_COMPAT_SYS_NI(name) \
- __SYS_NI(ia32, compat_sys_##name)
-
#else /* CONFIG_IA32_EMULATION */
#define __IA32_COMPAT_SYS_STUB0(name)
#define __IA32_COMPAT_SYS_STUBx(x, name, ...)
#define __IA32_COMPAT_COND_SYSCALL(name)
-#define __IA32_COMPAT_SYS_NI(name)
#endif /* CONFIG_IA32_EMULATION */
@@ -175,13 +161,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
#define __X32_COMPAT_COND_SYSCALL(name) \
__COND_SYSCALL(x64, compat_sys_##name)
-#define __X32_COMPAT_SYS_NI(name) \
- __SYS_NI(x64, compat_sys_##name)
#else /* CONFIG_X86_X32_ABI */
#define __X32_COMPAT_SYS_STUB0(name)
#define __X32_COMPAT_SYS_STUBx(x, name, ...)
#define __X32_COMPAT_COND_SYSCALL(name)
-#define __X32_COMPAT_SYS_NI(name)
#endif /* CONFIG_X86_X32_ABI */
@@ -212,17 +195,12 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
/*
* As some compat syscalls may not be implemented, we need to expand
- * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in
- * kernel/time/posix-stubs.c to cover this case as well.
+ * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well.
*/
#define COND_SYSCALL_COMPAT(name) \
__IA32_COMPAT_COND_SYSCALL(name) \
__X32_COMPAT_COND_SYSCALL(name)
-#define COMPAT_SYS_NI(name) \
- __IA32_COMPAT_SYS_NI(name) \
- __X32_COMPAT_SYS_NI(name)
-
#endif /* CONFIG_COMPAT */
#define __SYSCALL_DEFINEx(x, name, ...) \
@@ -243,8 +221,8 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
* As the generic SYSCALL_DEFINE0() macro does not decode any parameters for
* obvious reasons, and passing struct pt_regs *regs to it in %rdi does not
* hurt, we only need to re-define it here to keep the naming congruent to
- * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI()
- * macros to work correctly.
+ * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() macro
+ * to work correctly.
*/
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
@@ -257,10 +235,6 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
__X64_COND_SYSCALL(name) \
__IA32_COND_SYSCALL(name)
-#define SYS_NI(name) \
- __X64_SYS_NI(name) \
- __IA32_SYS_NI(name)
-
/*
* For VSYSCALLS, we need to declare these three syscalls with the new
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 7048dfacc04b..a9088250770f 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -100,4 +100,13 @@ static inline void leave_lazy(enum xen_lazy_mode mode)
enum xen_lazy_mode xen_get_lazy_mode(void);
+#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI)
+void xen_sanitize_proc_cap_bits(uint32_t *buf);
+#else
+static inline void xen_sanitize_proc_cap_bits(uint32_t *buf)
+{
+ BUG();
+}
+#endif
+
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d0918a75cb00..85a3ce2a3666 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -63,6 +63,7 @@ int acpi_fix_pin2_polarity __initdata;
#ifdef CONFIG_X86_LOCAL_APIC
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+static bool has_lapic_cpus __initdata;
static bool acpi_support_online_capable;
#endif
@@ -233,6 +234,14 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
return 0;
/*
+ * According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure
+ * when MADT provides both valid LAPIC and x2APIC entries, the APIC ID
+ * in x2APIC must be equal or greater than 0xff.
+ */
+ if (has_lapic_cpus && apic_id < 0xff)
+ return 0;
+
+ /*
* We need to register disabled CPU as well to permit
* counting disabled CPUs. This allows us to size
* cpus_possible_map more accurately, to permit
@@ -284,6 +293,7 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
processor->processor_id, /* ACPI ID */
processor->lapic_flags & ACPI_MADT_ENABLED);
+ has_lapic_cpus = true;
return 0;
}
@@ -1114,10 +1124,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
static int __init acpi_parse_madt_lapic_entries(void)
{
- int count;
- int x2count = 0;
- int ret;
- struct acpi_subtable_proc madt_proc[2];
+ int count, x2count = 0;
if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
@@ -1126,21 +1133,10 @@ static int __init acpi_parse_madt_lapic_entries(void)
acpi_parse_sapic, MAX_LOCAL_APIC);
if (!count) {
- memset(madt_proc, 0, sizeof(madt_proc));
- madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
- madt_proc[0].handler = acpi_parse_lapic;
- madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
- madt_proc[1].handler = acpi_parse_x2apic;
- ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
- sizeof(struct acpi_table_madt),
- madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
- if (ret < 0) {
- pr_err("Error parsing LAPIC/X2APIC entries\n");
- return ret;
- }
-
- count = madt_proc[0].count;
- x2count = madt_proc[1].count;
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
+ acpi_parse_lapic, MAX_LOCAL_APIC);
+ x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
+ acpi_parse_x2apic, MAX_LOCAL_APIC);
}
if (!count && !x2count) {
pr_err("No LAPIC entries present\n");
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 73be3931e4f0..aae7456ece07 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -255,6 +255,16 @@ static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
}
}
+static void __init_or_module noinline optimize_nops_inplace(u8 *instr, size_t len)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ optimize_nops(instr, len);
+ sync_core();
+ local_irq_restore(flags);
+}
+
/*
* In this context, "source" is where the instructions are placed in the
* section .altinstr_replacement, for example during kernel build by the
@@ -438,7 +448,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* patch if feature is *NOT* present.
*/
if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
- optimize_nops(instr, a->instrlen);
+ optimize_nops_inplace(instr, a->instrlen);
continue;
}
@@ -1685,8 +1695,8 @@ void __init_or_module text_poke_early(void *addr, const void *opcode,
} else {
local_irq_save(flags);
memcpy(addr, opcode, len);
- local_irq_restore(flags);
sync_core();
+ local_irq_restore(flags);
/*
* Could also do a CLFLUSH here to speed up CPU recovery; but
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a7eab05e5f29..f322ebd053a9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1320,6 +1320,9 @@ static void zenbleed_check_cpu(void *unused)
void amd_check_microcode(void)
{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return;
+
on_each_cpu(zenbleed_check_cpu, NULL, 1);
}
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 9373ec01c5ae..13b45b9c806d 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -104,8 +104,6 @@ struct cont_desc {
size_t size;
};
-static u32 ucode_new_rev;
-
/*
* Microcode patch container file is prepended to the initrd in cpio
* format. See Documentation/arch/x86/microcode.rst
@@ -442,12 +440,11 @@ static int __apply_microcode_amd(struct microcode_amd *mc)
*
* Returns true if container found (sets @desc), false otherwise.
*/
-static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size)
+static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, size_t size)
{
struct cont_desc desc = { 0 };
struct microcode_amd *mc;
bool ret = false;
- u32 rev, dummy;
desc.cpuid_1_eax = cpuid_1_eax;
@@ -457,22 +454,15 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size)
if (!mc)
return ret;
- native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-
/*
* Allow application of the same revision to pick up SMT-specific
* changes even if the revision of the other SMT thread is already
* up-to-date.
*/
- if (rev > mc->hdr.patch_id)
+ if (old_rev > mc->hdr.patch_id)
return ret;
- if (!__apply_microcode_amd(mc)) {
- ucode_new_rev = mc->hdr.patch_id;
- ret = true;
- }
-
- return ret;
+ return !__apply_microcode_amd(mc);
}
static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
@@ -506,9 +496,12 @@ static void __init find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpi
*ret = cp;
}
-void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
+void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_eax)
{
struct cpio_data cp = { };
+ u32 dummy;
+
+ native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->old_rev, dummy);
/* Needed in load_microcode_amd() */
ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax;
@@ -517,7 +510,8 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
if (!(cp.data && cp.size))
return;
- early_apply_microcode(cpuid_1_eax, cp.data, cp.size);
+ if (early_apply_microcode(cpuid_1_eax, ed->old_rev, cp.data, cp.size))
+ native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy);
}
static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
@@ -625,10 +619,8 @@ void reload_ucode_amd(unsigned int cpu)
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
if (rev < mc->hdr.patch_id) {
- if (!__apply_microcode_amd(mc)) {
- ucode_new_rev = mc->hdr.patch_id;
- pr_info("reload patch_level=0x%08x\n", ucode_new_rev);
- }
+ if (!__apply_microcode_amd(mc))
+ pr_info_once("reload revision: 0x%08x\n", mc->hdr.patch_id);
}
}
@@ -649,8 +641,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
if (p && (p->patch_id == csig->rev))
uci->mc = p->data;
- pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
-
return 0;
}
@@ -691,8 +681,6 @@ static enum ucode_state apply_microcode_amd(int cpu)
rev = mc_amd->hdr.patch_id;
ret = UCODE_UPDATED;
- pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
-
out:
uci->cpu_sig.rev = rev;
c->microcode = rev;
@@ -935,11 +923,6 @@ struct microcode_ops * __init init_amd_microcode(void)
pr_warn("AMD CPU family 0x%x not supported\n", c->x86);
return NULL;
}
-
- if (ucode_new_rev)
- pr_info_once("microcode updated early to new patch_level=0x%08x\n",
- ucode_new_rev);
-
return &microcode_amd_ops;
}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 666d25bbc5ad..232026a239a6 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -41,8 +41,6 @@
#include "internal.h"
-#define DRIVER_VERSION "2.2"
-
static struct microcode_ops *microcode_ops;
bool dis_ucode_ldr = true;
@@ -77,6 +75,8 @@ static u32 final_levels[] = {
0, /* T-101 terminator */
};
+struct early_load_data early_data;
+
/*
* Check the current patch level on this CPU.
*
@@ -155,9 +155,9 @@ void __init load_ucode_bsp(void)
return;
if (intel)
- load_ucode_intel_bsp();
+ load_ucode_intel_bsp(&early_data);
else
- load_ucode_amd_bsp(cpuid_1_eax);
+ load_ucode_amd_bsp(&early_data, cpuid_1_eax);
}
void load_ucode_ap(void)
@@ -828,6 +828,11 @@ static int __init microcode_init(void)
if (!microcode_ops)
return -ENODEV;
+ pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev));
+
+ if (early_data.new_rev)
+ pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev);
+
microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0);
if (IS_ERR(microcode_pdev))
return PTR_ERR(microcode_pdev);
@@ -846,8 +851,6 @@ static int __init microcode_init(void)
cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
mc_cpu_online, mc_cpu_down_prep);
- pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
-
return 0;
out_pdev:
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 6024feb98d29..070426b9895f 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -339,16 +339,9 @@ static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
static enum ucode_state apply_microcode_early(struct ucode_cpu_info *uci)
{
struct microcode_intel *mc = uci->mc;
- enum ucode_state ret;
- u32 cur_rev, date;
+ u32 cur_rev;
- ret = __apply_microcode(uci, mc, &cur_rev);
- if (ret == UCODE_UPDATED) {
- date = mc->hdr.date;
- pr_info_once("updated early: 0x%x -> 0x%x, date = %04x-%02x-%02x\n",
- cur_rev, mc->hdr.rev, date & 0xffff, date >> 24, (date >> 16) & 0xff);
- }
- return ret;
+ return __apply_microcode(uci, mc, &cur_rev);
}
static __init bool load_builtin_intel_microcode(struct cpio_data *cp)
@@ -413,13 +406,17 @@ static int __init save_builtin_microcode(void)
early_initcall(save_builtin_microcode);
/* Load microcode on BSP from initrd or builtin blobs */
-void __init load_ucode_intel_bsp(void)
+void __init load_ucode_intel_bsp(struct early_load_data *ed)
{
struct ucode_cpu_info uci;
+ ed->old_rev = intel_get_microcode_revision();
+
uci.mc = get_microcode_blob(&uci, false);
if (uci.mc && apply_microcode_early(&uci) == UCODE_UPDATED)
ucode_patch_va = UCODE_BSP_LOADED;
+
+ ed->new_rev = uci.cpu_sig.rev;
}
void load_ucode_intel_ap(void)
diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h
index f8047b12329a..21776c529fa9 100644
--- a/arch/x86/kernel/cpu/microcode/internal.h
+++ b/arch/x86/kernel/cpu/microcode/internal.h
@@ -37,6 +37,12 @@ struct microcode_ops {
use_nmi : 1;
};
+struct early_load_data {
+ u32 old_rev;
+ u32 new_rev;
+};
+
+extern struct early_load_data early_data;
extern struct ucode_cpu_info ucode_cpu_info[];
struct cpio_data find_microcode_in_initrd(const char *path);
@@ -92,14 +98,14 @@ extern bool dis_ucode_ldr;
extern bool force_minrev;
#ifdef CONFIG_CPU_SUP_AMD
-void load_ucode_amd_bsp(unsigned int family);
+void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family);
void load_ucode_amd_ap(unsigned int family);
int save_microcode_in_initrd_amd(unsigned int family);
void reload_ucode_amd(unsigned int cpu);
struct microcode_ops *init_amd_microcode(void);
void exit_amd_microcode(void);
#else /* CONFIG_CPU_SUP_AMD */
-static inline void load_ucode_amd_bsp(unsigned int family) { }
+static inline void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family) { }
static inline void load_ucode_amd_ap(unsigned int family) { }
static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
static inline void reload_ucode_amd(unsigned int cpu) { }
@@ -108,12 +114,12 @@ static inline void exit_amd_microcode(void) { }
#endif /* !CONFIG_CPU_SUP_AMD */
#ifdef CONFIG_CPU_SUP_INTEL
-void load_ucode_intel_bsp(void);
+void load_ucode_intel_bsp(struct early_load_data *ed);
void load_ucode_intel_ap(void);
void reload_ucode_intel(void);
struct microcode_ops *init_intel_microcode(void);
#else /* CONFIG_CPU_SUP_INTEL */
-static inline void load_ucode_intel_bsp(void) { }
+static inline void load_ucode_intel_bsp(struct early_load_data *ed) { }
static inline void load_ucode_intel_ap(void) { }
static inline void reload_ucode_intel(void) { }
static inline struct microcode_ops *init_intel_microcode(void) { return NULL; }
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e6bba12c759c..01fa06dd06b6 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -262,11 +262,14 @@ static uint32_t __init ms_hyperv_platform(void)
static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
{
static atomic_t nmi_cpu = ATOMIC_INIT(-1);
+ unsigned int old_cpu, this_cpu;
if (!unknown_nmi_panic)
return NMI_DONE;
- if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1)
+ old_cpu = -1;
+ this_cpu = raw_smp_processor_id();
+ if (!atomic_try_cmpxchg(&nmi_cpu, &old_cpu, this_cpu))
return NMI_HANDLED;
return NMI_DONE;
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 086a2c3aaaa0..0f8103240fda 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -255,6 +255,22 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
testl $X2APIC_ENABLE, %eax
jnz .Lread_apicid_msr
+#ifdef CONFIG_X86_X2APIC
+ /*
+ * If system is in X2APIC mode then MMIO base might not be
+ * mapped causing the MMIO read below to fault. Faults can't
+ * be handled at that point.
+ */
+ cmpl $0, x2apic_mode(%rip)
+ jz .Lread_apicid_mmio
+
+ /* Force the AP into X2APIC mode. */
+ orl $X2APIC_ENABLE, %eax
+ wrmsr
+ jmp .Lread_apicid_msr
+#endif
+
+.Lread_apicid_mmio:
/* Read the APIC ID from the fix-mapped MMIO space. */
movq apic_mmio_base(%rip), %rcx
addq $APIC_ID, %rcx
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 8857abc706e4..660b601f1d6c 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -121,7 +121,7 @@ static const __initconst struct idt_data def_idts[] = {
static const struct idt_data ia32_idt[] __initconst = {
#if defined(CONFIG_IA32_EMULATION)
- SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat),
+ SYSG(IA32_SYSCALL_VECTOR, asm_int80_emulation),
#elif defined(CONFIG_X86_32)
SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32),
#endif
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 70472eebe719..c67285824e82 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -1234,10 +1234,6 @@ void setup_ghcb(void)
if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
return;
- /* First make sure the hypervisor talks a supported protocol. */
- if (!sev_es_negotiate_protocol())
- sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
-
/*
* Check whether the runtime #VC exception handler is active. It uses
* the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
@@ -1255,6 +1251,13 @@ void setup_ghcb(void)
}
/*
+ * Make sure the hypervisor talks a supported protocol.
+ * This gets called only in the BSP boot phase.
+ */
+ if (!sev_es_negotiate_protocol())
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+
+ /*
* Clear the boot_ghcb. The first exception comes in before the bss
* section is cleared.
*/
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index cacf2ede6217..23d8aaf8d9fd 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -175,9 +175,6 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp);
uc_flags = frame_uc_flags(regs);
- if (setup_signal_shadow_stack(ksig))
- return -EFAULT;
-
if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
@@ -198,6 +195,9 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
return -EFAULT;
}
+ if (setup_signal_shadow_stack(ksig))
+ return -EFAULT;
+
/* Set up registers for signal handler */
regs->di = ksig->sig;
/* In case the signal handler was declared without prototypes */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 34f2f47cadf2..b9d35b6b19f7 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -23,17 +23,15 @@ config KVM
depends on HAVE_KVM
depends on HIGH_RES_TIMERS
depends on X86_LOCAL_APIC
- select PREEMPT_NOTIFIERS
+ select KVM_COMMON
select KVM_GENERIC_MMU_NOTIFIER
select HAVE_KVM_IRQCHIP
select HAVE_KVM_PFNCACHE
- select HAVE_KVM_IRQFD
select HAVE_KVM_DIRTY_RING_TSO
select HAVE_KVM_DIRTY_RING_ACQ_REL
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_IRQ_ROUTING
- select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
select USER_RETURN_NOTIFIER
select KVM_MMIO
@@ -46,7 +44,6 @@ config KVM
select KVM_XFER_TO_GUEST_WORK
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO
- select INTERVAL_TREE
select HAVE_KVM_PM_NOTIFIER if PM
select KVM_GENERIC_HARDWARE_ENABLING
help
@@ -80,7 +77,7 @@ config KVM_WERROR
config KVM_SW_PROTECTED_VM
bool "Enable support for KVM software-protected VMs"
depends on EXPERT
- depends on X86_64
+ depends on KVM && X86_64
select KVM_GENERIC_PRIVATE_MEM
help
Enable support for KVM software-protected VMs. Currently "protected"
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 1b278a3f0689..cab20216921b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -475,7 +475,7 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
return -E2BIG;
if (cpuid->nent) {
- e = vmemdup_user(entries, array_size(sizeof(*e), cpuid->nent));
+ e = vmemdup_array_user(entries, cpuid->nent, sizeof(*e));
if (IS_ERR(e))
return PTR_ERR(e);
@@ -519,7 +519,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
return -E2BIG;
if (cpuid->nent) {
- e2 = vmemdup_user(entries, array_size(sizeof(*e2), cpuid->nent));
+ e2 = vmemdup_array_user(entries, cpuid->nent, sizeof(*e2));
if (IS_ERR(e2))
return PTR_ERR(e2);
}
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index 42026b3f3ff3..95ea1a1f7403 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -182,6 +182,7 @@ static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
}
static const struct file_operations mmu_rmaps_stat_fops = {
+ .owner = THIS_MODULE,
.open = kvm_mmu_rmaps_stat_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 4900c078045a..6ee925d66648 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2972,6 +2972,25 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm)
set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux);
}
+
+ /*
+ * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if
+ * the host/guest supports its use.
+ *
+ * guest_can_use() checks a number of requirements on the host/guest to
+ * ensure that MSR_IA32_XSS is available, but it might report true even
+ * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host
+ * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better
+ * to further check that the guest CPUID actually supports
+ * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved
+ * guests will still get intercepted and caught in the normal
+ * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths.
+ */
+ if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1);
+ else
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0);
}
void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 712146312358..a8bd4e909a1e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -103,6 +103,7 @@ static const struct svm_direct_access_msrs {
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
{ .index = MSR_IA32_LASTINTTOIP, .always = false },
+ { .index = MSR_IA32_XSS, .always = false },
{ .index = MSR_EFER, .always = false },
{ .index = MSR_IA32_CR_PAT, .always = false },
{ .index = MSR_AMD64_SEV_ES_GHCB, .always = true },
@@ -1855,15 +1856,17 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
bool old_paging = is_paging(vcpu);
#ifdef CONFIG_X86_64
- if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
+ if (vcpu->arch.efer & EFER_LME) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
vcpu->arch.efer |= EFER_LMA;
- svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
+ if (!vcpu->arch.guest_state_protected)
+ svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
}
if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
vcpu->arch.efer &= ~EFER_LMA;
- svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
+ if (!vcpu->arch.guest_state_protected)
+ svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
}
}
#endif
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 59adff7bbf55..8ef95139cd24 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -30,7 +30,7 @@
#define IOPM_SIZE PAGE_SIZE * 3
#define MSRPM_SIZE PAGE_SIZE * 2
-#define MAX_DIRECT_ACCESS_MSRS 46
+#define MAX_DIRECT_ACCESS_MSRS 47
#define MSRPM_OFFSETS 32
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 598b057611e0..b6fca43cb455 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5546,8 +5546,8 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
- return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
- sizeof(guest_xsave->region));
+ kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
+ sizeof(guest_xsave->region));
}
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
@@ -13081,7 +13081,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
if (vcpu->arch.guest_state_protected)
return true;
- return vcpu->arch.preempted_in_kernel;
+ if (vcpu != kvm_get_running_vcpu())
+ return vcpu->arch.preempted_in_kernel;
+
+ return static_call(kvm_x86_get_cpl)(vcpu) == 0;
}
unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index a68f2dda0948..70b91de2e053 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -32,6 +32,7 @@
#include <asm/msr.h>
#include <asm/cmdline.h>
#include <asm/sev.h>
+#include <asm/ia32.h>
#include "mm_internal.h"
@@ -481,6 +482,16 @@ void __init sme_early_init(void)
*/
if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
x86_cpuinit.parallel_bringup = false;
+
+ /*
+ * The VMM is capable of injecting interrupt 0x80 and triggering the
+ * compatibility syscall path.
+ *
+ * By default, the 32-bit emulation is disabled in order to ensure
+ * the safety of the VM.
+ */
+ if (sev_status & MSR_AMD64_SEV_ENABLED)
+ ia32_disable();
}
void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8c10d9abc239..e89e415aa743 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -3025,3 +3025,49 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp
#endif
WARN(1, "verification of programs using bpf_throw should have failed\n");
}
+
+void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+ struct bpf_prog *new, struct bpf_prog *old)
+{
+ u8 *old_addr, *new_addr, *old_bypass_addr;
+ int ret;
+
+ old_bypass_addr = old ? NULL : poke->bypass_addr;
+ old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
+ new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
+
+ /*
+ * On program loading or teardown, the program's kallsym entry
+ * might not be in place, so we use __bpf_arch_text_poke to skip
+ * the kallsyms check.
+ */
+ if (new) {
+ ret = __bpf_arch_text_poke(poke->tailcall_target,
+ BPF_MOD_JUMP,
+ old_addr, new_addr);
+ BUG_ON(ret < 0);
+ if (!old) {
+ ret = __bpf_arch_text_poke(poke->tailcall_bypass,
+ BPF_MOD_JUMP,
+ poke->bypass_addr,
+ NULL);
+ BUG_ON(ret < 0);
+ }
+ } else {
+ ret = __bpf_arch_text_poke(poke->tailcall_bypass,
+ BPF_MOD_JUMP,
+ old_bypass_addr,
+ poke->bypass_addr);
+ BUG_ON(ret < 0);
+ /* let other CPUs finish the execution of program
+ * so that it will not possible to expose them
+ * to invalid nop, stack unwind, nop state
+ */
+ if (!ret)
+ synchronize_rcu();
+ ret = __bpf_arch_text_poke(poke->tailcall_target,
+ BPF_MOD_JUMP,
+ old_addr, NULL);
+ BUG_ON(ret < 0);
+ }
+}
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 9b1ec5d8c99c..a65fc2ae15b4 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -9,6 +9,7 @@ config XEN
select PARAVIRT_CLOCK
select X86_HV_CALLBACK_VECTOR
depends on X86_64 || (X86_32 && X86_PAE)
+ depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MCORE2 || MATOM || MK8)
depends on X86_LOCAL_APIC && X86_TSC
help
This is the Linux Xen port. Enabling this will allow the
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0337392a3121..3c61bb98c10e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,9 +33,12 @@ EXPORT_SYMBOL_GPL(hypercall_page);
* and xen_vcpu_setup for details. By default it points to share_info->vcpu_info
* but during boot it is switched to point to xen_vcpu_info.
* The pointer is used in xen_evtchn_do_upcall to acknowledge pending events.
+ * Make sure that xen_vcpu_info doesn't cross a page boundary by making it
+ * cache-line aligned (the struct is guaranteed to have a size of 64 bytes,
+ * which matches the cache line size of 64-bit x86 processors).
*/
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
-DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
+DEFINE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info);
/* Linux <-> Xen vCPU id mapping */
DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
@@ -160,6 +163,7 @@ void xen_vcpu_setup(int cpu)
int err;
struct vcpu_info *vcpup;
+ BUILD_BUG_ON(sizeof(*vcpup) > SMP_CACHE_BYTES);
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
/*
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index bbbfdd495ebd..aeb33e0a3f76 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -704,7 +704,7 @@ static struct trap_array_entry trap_array[] = {
TRAP_ENTRY(exc_int3, false ),
TRAP_ENTRY(exc_overflow, false ),
#ifdef CONFIG_IA32_EMULATION
- { entry_INT80_compat, xen_entry_INT80_compat, false },
+ TRAP_ENTRY(int80_emulation, false ),
#endif
TRAP_ENTRY(exc_page_fault, false ),
TRAP_ENTRY(exc_divide_error, false ),
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 9e5e68008785..1a9cd18dfbd3 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -156,7 +156,7 @@ xen_pv_trap asm_xenpv_exc_machine_check
#endif /* CONFIG_X86_MCE */
xen_pv_trap asm_exc_simd_coprocessor_error
#ifdef CONFIG_IA32_EMULATION
-xen_pv_trap entry_INT80_compat
+xen_pv_trap asm_int80_emulation
#endif
xen_pv_trap asm_exc_xen_unknown_trap
xen_pv_trap asm_exc_xen_hypervisor_callback
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 408a2aa66c69..a87ab36889e7 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -21,7 +21,7 @@ extern void *xen_initial_gdt;
struct trap_info;
void xen_copy_trap_info(struct trap_info *traps);
-DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info);
+DECLARE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info);
DECLARE_PER_CPU(unsigned long, xen_cr3);
DECLARE_PER_CPU(unsigned long, xen_current_cr3);