summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/Kconfig13
-rw-r--r--arch/arm64/boot/dts/freescale/imx93.dtsi4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7622.dtsi2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986a.dtsi2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195-demo.dts39
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/sm8150.dtsi2
-rw-r--r--arch/arm64/include/asm/acpi.h19
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/kvm_arm.h4
-rw-r--r--arch/arm64/kernel/cpu_errata.c8
-rw-r--r--arch/arm64/kernel/entry.S4
-rw-r--r--arch/arm64/kvm/arch_timer.c13
-rw-r--r--arch/arm64/kvm/emulate-nested.c2
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c44
-rw-r--r--arch/arm64/kvm/pmu.c4
-rw-r--r--arch/arm64/kvm/sys_regs.c4
-rw-r--r--arch/arm64/tools/cpucaps1
-rw-r--r--arch/ia64/include/asm/cpu.h5
-rw-r--r--arch/ia64/kernel/topology.c2
-rw-r--r--arch/loongarch/include/asm/io.h5
-rw-r--r--arch/loongarch/include/asm/linkage.h8
-rw-r--r--arch/loongarch/include/asm/pgtable-bits.h4
-rw-r--r--arch/loongarch/kernel/entry.S4
-rw-r--r--arch/loongarch/kernel/genex.S16
-rw-r--r--arch/loongarch/kernel/setup.c10
-rw-r--r--arch/loongarch/mm/init.c9
-rw-r--r--arch/loongarch/mm/tlbex.S36
-rw-r--r--arch/mips/kvm/mmu.c3
-rw-r--r--arch/parisc/include/asm/ldcw.h37
-rw-r--r--arch/parisc/include/asm/spinlock_types.h5
-rw-r--r--arch/parisc/kernel/smp.c4
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-8xx.h7
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h2
-rw-r--r--arch/powerpc/kernel/entry_32.S8
-rw-r--r--arch/powerpc/kernel/head_85xx.S2
-rw-r--r--arch/powerpc/lib/qspinlock.c3
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c9
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S8
-rw-r--r--arch/riscv/Makefile1
-rw-r--r--arch/riscv/errata/andes/Makefile4
-rw-r--r--arch/riscv/include/asm/ftrace.h21
-rw-r--r--arch/riscv/include/asm/kprobes.h11
-rw-r--r--arch/riscv/include/asm/uprobes.h13
-rw-r--r--arch/riscv/kernel/irq.c4
-rw-r--r--arch/riscv/kernel/setup.c13
-rw-r--r--arch/riscv/kernel/signal.c7
-rw-r--r--arch/riscv/kernel/traps.c28
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c18
-rw-r--r--arch/s390/boot/vmem.c7
-rw-r--r--arch/s390/kvm/interrupt.c16
-rw-r--r--arch/s390/net/bpf_jit_comp.c27
-rw-r--r--arch/s390/pci/pci_dma.c15
-rw-r--r--arch/x86/boot/compressed/sev.c10
-rw-r--r--arch/x86/events/utils.c5
-rw-r--r--arch/x86/hyperv/hv_init.c20
-rw-r--r--arch/x86/hyperv/hv_vtl.c3
-rw-r--r--arch/x86/include/asm/cpu.h2
-rw-r--r--arch/x86/include/asm/fpu/api.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/mshyperv.h2
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/svm.h1
-rw-r--r--arch/x86/kernel/alternative.c13
-rw-r--r--arch/x86/kernel/cpu/amd.c8
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c10
-rw-r--r--arch/x86/kernel/fpu/core.c5
-rw-r--r--arch/x86/kernel/fpu/xstate.c12
-rw-r--r--arch/x86/kernel/fpu/xstate.h3
-rw-r--r--arch/x86/kernel/sev-shared.c122
-rw-r--r--arch/x86/kernel/sev.c33
-rw-r--r--arch/x86/kernel/smp.c39
-rw-r--r--arch/x86/kernel/smpboot.c27
-rw-r--r--arch/x86/kernel/topology.c2
-rw-r--r--arch/x86/kvm/cpuid.c8
-rw-r--r--arch/x86/kvm/lapic.c8
-rw-r--r--arch/x86/kvm/pmu.c27
-rw-r--r--arch/x86/kvm/pmu.h6
-rw-r--r--arch/x86/kvm/svm/avic.c5
-rw-r--r--arch/x86/kvm/svm/nested.c3
-rw-r--r--arch/x86/kvm/svm/pmu.c2
-rw-r--r--arch/x86/kvm/svm/svm.c5
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c4
-rw-r--r--arch/x86/kvm/x86.c40
87 files changed, 630 insertions, 353 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b10515c0200b..78f20e632712 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1037,6 +1037,19 @@ config ARM64_ERRATUM_2645198
If unsure, say Y.
+config ARM64_ERRATUM_2966298
+ bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load"
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A520 erratum 2966298.
+
+ On an affected Cortex-A520 core, a speculatively executed unprivileged
+ load might leak data from a privileged level via a cache side channel.
+
+ Work around this problem by executing a TLBI before returning to EL0.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi
index 6f85a05ee7e1..dcf6e4846ac9 100644
--- a/arch/arm64/boot/dts/freescale/imx93.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx93.dtsi
@@ -185,7 +185,7 @@
#size-cells = <1>;
ranges;
- anomix_ns_gpr: syscon@44210000 {
+ aonmix_ns_gpr: syscon@44210000 {
compatible = "fsl,imx93-aonmix-ns-syscfg", "syscon";
reg = <0x44210000 0x1000>;
};
@@ -319,6 +319,7 @@
assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>;
assigned-clock-rates = <40000000>;
fsl,clk-source = /bits/ 8 <0>;
+ fsl,stop-mode = <&aonmix_ns_gpr 0x14 0>;
status = "disabled";
};
@@ -591,6 +592,7 @@
assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>;
assigned-clock-rates = <40000000>;
fsl,clk-source = /bits/ 8 <0>;
+ fsl,stop-mode = <&wakeupmix_gpr 0x0c 2>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
index 36ef2dbe8add..3ee9266fa8e9 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
@@ -905,7 +905,7 @@
status = "disabled";
};
- sata_phy: t-phy@1a243000 {
+ sata_phy: t-phy {
compatible = "mediatek,mt7622-tphy",
"mediatek,generic-tphy-v1";
#address-cells = <2>;
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
index 68539ea788df..24eda00e320d 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
@@ -434,7 +434,7 @@
};
};
- pcie_phy: t-phy@11c00000 {
+ pcie_phy: t-phy {
compatible = "mediatek,mt7986-tphy",
"mediatek,generic-tphy-v2";
#address-cells = <2>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts
index b2485ddfd33b..5d635085fe3f 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts
@@ -48,7 +48,7 @@
memory@40000000 {
device_type = "memory";
- reg = <0 0x40000000 0 0x80000000>;
+ reg = <0 0x40000000 0x2 0x00000000>;
};
reserved-memory {
@@ -56,13 +56,8 @@
#size-cells = <2>;
ranges;
- /* 2 MiB reserved for ARM Trusted Firmware (BL31) */
- bl31_secmon_reserved: secmon@54600000 {
- no-map;
- reg = <0 0x54600000 0x0 0x200000>;
- };
-
- /* 12 MiB reserved for OP-TEE (BL32)
+ /*
+ * 12 MiB reserved for OP-TEE (BL32)
* +-----------------------+ 0x43e0_0000
* | SHMEM 2MiB |
* +-----------------------+ 0x43c0_0000
@@ -75,6 +70,34 @@
no-map;
reg = <0 0x43200000 0 0x00c00000>;
};
+
+ scp_mem: memory@50000000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x50000000 0 0x2900000>;
+ no-map;
+ };
+
+ vpu_mem: memory@53000000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x53000000 0 0x1400000>; /* 20 MB */
+ };
+
+ /* 2 MiB reserved for ARM Trusted Firmware (BL31) */
+ bl31_secmon_mem: memory@54600000 {
+ no-map;
+ reg = <0 0x54600000 0x0 0x200000>;
+ };
+
+ snd_dma_mem: memory@60000000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x60000000 0 0x1100000>;
+ no-map;
+ };
+
+ apu_mem: memory@62000000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x62000000 0 0x1400000>; /* 20 MB */
+ };
};
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
index a9e52b50c8c4..54c674c45b49 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
@@ -313,6 +313,7 @@
interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH 0>;
cpus = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>,
<&cpu4>, <&cpu5>, <&cpu6>, <&cpu7>;
+ status = "fail";
};
dmic_codec: dmic-codec {
diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi
index a7c3020a5de4..06c53000bb74 100644
--- a/arch/arm64/boot/dts/qcom/sm8150.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi
@@ -3958,7 +3958,7 @@
pdc: interrupt-controller@b220000 {
compatible = "qcom,sm8150-pdc", "qcom,pdc";
- reg = <0 0x0b220000 0 0x400>;
+ reg = <0 0x0b220000 0 0x30000>;
qcom,pdc-ranges = <0 480 94>, <94 609 31>,
<125 63 1>;
#interrupt-cells = <2>;
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 4d537d56eb84..6792a1f83f2a 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -9,6 +9,7 @@
#ifndef _ASM_ACPI_H
#define _ASM_ACPI_H
+#include <linux/cpuidle.h>
#include <linux/efi.h>
#include <linux/memblock.h>
#include <linux/psci.h>
@@ -44,6 +45,24 @@
#define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \
trbe_interrupt) + sizeof(u16))
+/*
+ * ArmĀ® Functional Fixed Hardware Specification Version 1.2.
+ * Table 2: Arm Architecture context loss flags
+ */
+#define CPUIDLE_CORE_CTXT BIT(0) /* Core context Lost */
+
+static inline unsigned int arch_get_idle_state_flags(u32 arch_flags)
+{
+ if (arch_flags & CPUIDLE_CORE_CTXT)
+ return CPUIDLE_FLAG_TIMER_STOP;
+
+ return 0;
+}
+#define arch_get_idle_state_flags arch_get_idle_state_flags
+
+#define CPUIDLE_TRACE_CTXT BIT(1) /* Trace context loss */
+#define CPUIDLE_GICR_CTXT BIT(2) /* GICR */
+#define CPUIDLE_GICD_CTXT BIT(3) /* GICD */
/* Basic configuration for ACPI */
#ifdef CONFIG_ACPI
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 5f6f84837a49..74d00feb62f0 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -79,6 +79,7 @@
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
+#define ARM_CPU_PART_CORTEX_A520 0xD80
#define ARM_CPU_PART_CORTEX_A710 0xD47
#define ARM_CPU_PART_CORTEX_A715 0xD4D
#define ARM_CPU_PART_CORTEX_X2 0xD48
@@ -148,6 +149,7 @@
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 5882b2415596..1095c6647e96 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -344,14 +344,14 @@
*/
#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51))
#define __HFGRTR_EL2_MASK GENMASK(49, 0)
-#define __HFGRTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
+#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \
BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
GENMASK(26, 25) | BIT(21) | BIT(18) | \
GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
#define __HFGWTR_EL2_MASK GENMASK(49, 0)
-#define __HFGWTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
+#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
#define __HFGITR_EL2_RES0 GENMASK(63, 57)
#define __HFGITR_EL2_MASK GENMASK(54, 0)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index be66e94a21bd..5706e74c5578 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -730,6 +730,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.cpu_enable = cpu_clear_bf16_from_user_emulation,
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_2966298
+ {
+ .desc = "ARM erratum 2966298",
+ .capability = ARM64_WORKAROUND_2966298,
+ /* Cortex-A520 r0p0 - r0p1 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1),
+ },
+#endif
#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
{
.desc = "AmpereOne erratum AC03_CPU_38",
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 6ad61de03d0a..a6030913cd58 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -428,6 +428,10 @@ alternative_else_nop_endif
ldp x28, x29, [sp, #16 * 14]
.if \el == 0
+alternative_if ARM64_WORKAROUND_2966298
+ tlbi vale1, xzr
+ dsb nsh
+alternative_else_nop_endif
alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
ldr lr, [sp, #S_LR]
add sp, sp, #PT_REGS_SIZE // restore sp
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 6dcdae4d38cb..a1e24228aaaa 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = {
.get_input_level = kvm_arch_timer_get_input_level,
};
-static bool has_cntpoff(void)
-{
- return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
-}
-
static int nr_timers(struct kvm_vcpu *vcpu)
{
if (!vcpu_has_nv(vcpu))
@@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void)
return timecounter->cc->read(timecounter->cc);
}
-static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
+void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
{
if (vcpu_has_nv(vcpu)) {
if (is_hyp_ctxt(vcpu)) {
@@ -548,8 +543,7 @@ static void timer_save_state(struct arch_timer_context *ctx)
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
cval = read_sysreg_el0(SYS_CNTP_CVAL);
- if (!has_cntpoff())
- cval -= timer_get_offset(ctx);
+ cval -= timer_get_offset(ctx);
timer_set_cval(ctx, cval);
@@ -636,8 +630,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
cval = timer_get_cval(ctx);
offset = timer_get_offset(ctx);
set_cntpoff(offset);
- if (!has_cntpoff())
- cval += offset;
+ cval += offset;
write_sysreg_el0(cval, SYS_CNTP_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 9ced1bf0c2b7..ee902ff2a50f 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -977,6 +977,8 @@ enum fg_filter_id {
static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
/* HFGRTR_EL2, HFGWTR_EL2 */
+ SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0),
+ SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0),
SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0),
SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0),
SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0),
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 6537f58b1a8c..448b17080d36 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
___activate_traps(vcpu);
+ if (has_cntpoff()) {
+ struct timer_map map;
+
+ get_timer_map(vcpu, &map);
+
+ /*
+ * We're entrering the guest. Reload the correct
+ * values from memory now that TGE is clear.
+ */
+ if (map.direct_ptimer == vcpu_ptimer(vcpu))
+ val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ if (map.direct_ptimer == vcpu_hptimer(vcpu))
+ val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
+
+ if (map.direct_ptimer) {
+ write_sysreg_el0(val, SYS_CNTP_CVAL);
+ isb();
+ }
+ }
+
val = read_sysreg(cpacr_el1);
val |= CPACR_ELx_TTA;
val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
@@ -77,6 +97,30 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ if (has_cntpoff()) {
+ struct timer_map map;
+ u64 val, offset;
+
+ get_timer_map(vcpu, &map);
+
+ /*
+ * We're exiting the guest. Save the latest CVAL value
+ * to memory and apply the offset now that TGE is set.
+ */
+ val = read_sysreg_el0(SYS_CNTP_CVAL);
+ if (map.direct_ptimer == vcpu_ptimer(vcpu))
+ __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
+ if (map.direct_ptimer == vcpu_hptimer(vcpu))
+ __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
+
+ offset = read_sysreg_s(SYS_CNTPOFF_EL2);
+
+ if (map.direct_ptimer && offset) {
+ write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
+ isb();
+ }
+ }
+
/*
* ARM errata 1165522 and 1530923 require the actual execution of the
* above before we can switch to the EL2/EL0 translation regime used by
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 0eea225fd09a..a243934c5568 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -39,7 +39,7 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
- if (!kvm_arm_support_pmu_v3() || !pmu || !kvm_pmu_switch_needed(attr))
+ if (!kvm_arm_support_pmu_v3() || !kvm_pmu_switch_needed(attr))
return;
if (!attr->exclude_host)
@@ -55,7 +55,7 @@ void kvm_clr_pmu_events(u32 clr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
- if (!kvm_arm_support_pmu_v3() || !pmu)
+ if (!kvm_arm_support_pmu_v3())
return;
pmu->events_host &= ~clr;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index e92ec810d449..0afd6136e275 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2122,8 +2122,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
- { SYS_DESC(SYS_PIRE0_EL1), access_vm_reg, reset_unknown, PIRE0_EL1 },
- { SYS_DESC(SYS_PIR_EL1), access_vm_reg, reset_unknown, PIR_EL1 },
+ { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 },
+ { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
{ SYS_DESC(SYS_LORSA_EL1), trap_loregion },
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index c3f06fdef609..dea3dc89234b 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -84,6 +84,7 @@ WORKAROUND_2077057
WORKAROUND_2457168
WORKAROUND_2645198
WORKAROUND_2658417
+WORKAROUND_2966298
WORKAROUND_AMPERE_AC03_CPU_38
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
WORKAROUND_TSB_FLUSH_FAILURE
diff --git a/arch/ia64/include/asm/cpu.h b/arch/ia64/include/asm/cpu.h
index db125df9e088..642d71675ddb 100644
--- a/arch/ia64/include/asm/cpu.h
+++ b/arch/ia64/include/asm/cpu.h
@@ -15,9 +15,4 @@ DECLARE_PER_CPU(struct ia64_cpu, cpu_devices);
DECLARE_PER_CPU(int, cpu_state);
-#ifdef CONFIG_HOTPLUG_CPU
-extern int arch_register_cpu(int num);
-extern void arch_unregister_cpu(int);
-#endif
-
#endif /* _ASM_IA64_CPU_H_ */
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 94a848b06f15..741863a187a6 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -59,7 +59,7 @@ void __ref arch_unregister_cpu(int num)
}
EXPORT_SYMBOL(arch_unregister_cpu);
#else
-static int __init arch_register_cpu(int num)
+int __init arch_register_cpu(int num)
{
return register_cpu(&sysfs_cpus[num].cpu, num);
}
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index 0dcb36b32cb2..c486c2341b66 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -52,10 +52,9 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
* @offset: bus address of the memory
* @size: size of the resource to map
*/
-extern pgprot_t pgprot_wc;
-
#define ioremap_wc(offset, size) \
- ioremap_prot((offset), (size), pgprot_val(pgprot_wc))
+ ioremap_prot((offset), (size), \
+ pgprot_val(wc_enabled ? PAGE_KERNEL_WUC : PAGE_KERNEL_SUC))
#define ioremap_cache(offset, size) \
ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL))
diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h
index 81b0c4cfbf4f..e2eca1a25b4e 100644
--- a/arch/loongarch/include/asm/linkage.h
+++ b/arch/loongarch/include/asm/linkage.h
@@ -33,4 +33,12 @@
.cfi_endproc; \
SYM_END(name, SYM_T_FUNC)
+#define SYM_CODE_START(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
+ .cfi_startproc;
+
+#define SYM_CODE_END(name) \
+ .cfi_endproc; \
+ SYM_END(name, SYM_T_NONE)
+
#endif
diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h
index 35348d4c4209..21319c1e045c 100644
--- a/arch/loongarch/include/asm/pgtable-bits.h
+++ b/arch/loongarch/include/asm/pgtable-bits.h
@@ -105,13 +105,15 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot)
return __pgprot(prot);
}
+extern bool wc_enabled;
+
#define pgprot_writecombine pgprot_writecombine
static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
{
unsigned long prot = pgprot_val(_prot);
- prot = (prot & ~_CACHE_MASK) | _CACHE_WUC;
+ prot = (prot & ~_CACHE_MASK) | (wc_enabled ? _CACHE_WUC : _CACHE_SUC);
return __pgprot(prot);
}
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index 65518bb8f472..1ec8e4c4cc2b 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -18,7 +18,7 @@
.text
.cfi_sections .debug_frame
.align 5
-SYM_FUNC_START(handle_syscall)
+SYM_CODE_START(handle_syscall)
csrrd t0, PERCPU_BASE_KS
la.pcrel t1, kernelsp
add.d t1, t1, t0
@@ -71,7 +71,7 @@ SYM_FUNC_START(handle_syscall)
bl do_syscall
RESTORE_ALL_AND_RET
-SYM_FUNC_END(handle_syscall)
+SYM_CODE_END(handle_syscall)
_ASM_NOKPROBE(handle_syscall)
SYM_CODE_START(ret_from_fork)
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 78f066384657..2bb3aa2dcfcb 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -31,7 +31,7 @@ SYM_FUNC_START(__arch_cpu_idle)
1: jr ra
SYM_FUNC_END(__arch_cpu_idle)
-SYM_FUNC_START(handle_vint)
+SYM_CODE_START(handle_vint)
BACKUP_T0T1
SAVE_ALL
la_abs t1, __arch_cpu_idle
@@ -46,11 +46,11 @@ SYM_FUNC_START(handle_vint)
la_abs t0, do_vint
jirl ra, t0, 0
RESTORE_ALL_AND_RET
-SYM_FUNC_END(handle_vint)
+SYM_CODE_END(handle_vint)
-SYM_FUNC_START(except_vec_cex)
+SYM_CODE_START(except_vec_cex)
b cache_parity_error
-SYM_FUNC_END(except_vec_cex)
+SYM_CODE_END(except_vec_cex)
.macro build_prep_badv
csrrd t0, LOONGARCH_CSR_BADV
@@ -66,7 +66,7 @@ SYM_FUNC_END(except_vec_cex)
.macro BUILD_HANDLER exception handler prep
.align 5
- SYM_FUNC_START(handle_\exception)
+ SYM_CODE_START(handle_\exception)
666:
BACKUP_T0T1
SAVE_ALL
@@ -76,7 +76,7 @@ SYM_FUNC_END(except_vec_cex)
jirl ra, t0, 0
668:
RESTORE_ALL_AND_RET
- SYM_FUNC_END(handle_\exception)
+ SYM_CODE_END(handle_\exception)
SYM_DATA(unwind_hint_\exception, .word 668b - 666b)
.endm
@@ -93,7 +93,7 @@ SYM_FUNC_END(except_vec_cex)
BUILD_HANDLER watch watch none
BUILD_HANDLER reserved reserved none /* others */
-SYM_FUNC_START(handle_sys)
+SYM_CODE_START(handle_sys)
la_abs t0, handle_syscall
jr t0
-SYM_FUNC_END(handle_sys)
+SYM_CODE_END(handle_sys)
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 7783f0a3d742..aed65915e932 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -161,19 +161,19 @@ static void __init smbios_parse(void)
}
#ifdef CONFIG_ARCH_WRITECOMBINE
-pgprot_t pgprot_wc = PAGE_KERNEL_WUC;
+bool wc_enabled = true;
#else
-pgprot_t pgprot_wc = PAGE_KERNEL_SUC;
+bool wc_enabled = false;
#endif
-EXPORT_SYMBOL(pgprot_wc);
+EXPORT_SYMBOL(wc_enabled);
static int __init setup_writecombine(char *p)
{
if (!strcmp(p, "on"))
- pgprot_wc = PAGE_KERNEL_WUC;
+ wc_enabled = true;
else if (!strcmp(p, "off"))
- pgprot_wc = PAGE_KERNEL_SUC;
+ wc_enabled = false;
else
pr_warn("Unknown writecombine setting \"%s\".\n", p);
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index f3fe8c06ba4d..4dd53427f657 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -43,11 +43,11 @@ void copy_user_highpage(struct page *to, struct page *from,
{
void *vfrom, *vto;
- vto = kmap_atomic(to);
- vfrom = kmap_atomic(from);
+ vfrom = kmap_local_page(from);
+ vto = kmap_local_page(to);
copy_page(vto, vfrom);
- kunmap_atomic(vfrom);
- kunmap_atomic(vto);
+ kunmap_local(vfrom);
+ kunmap_local(vto);
/* Make sure this page is cleared on other CPU's too before using it */
smp_wmb();
}
@@ -240,6 +240,7 @@ pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
pgd_t invalid_pg_dir[_PTRS_PER_PGD] __page_aligned_bss;
#ifndef __PAGETABLE_PUD_FOLDED
pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss;
+EXPORT_SYMBOL(invalid_pud_table);
#endif
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
index ca17dd3a1915..d5d682f3d29f 100644
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -17,7 +17,7 @@
#define PTRS_PER_PTE_BITS (PAGE_SHIFT - 3)
.macro tlb_do_page_fault, write
- SYM_FUNC_START(tlb_do_page_fault_\write)
+ SYM_CODE_START(tlb_do_page_fault_\write)
SAVE_ALL
csrrd a2, LOONGARCH_CSR_BADV
move a0, sp
@@ -25,13 +25,13 @@
li.w a1, \write
bl do_page_fault
RESTORE_ALL_AND_RET
- SYM_FUNC_END(tlb_do_page_fault_\write)
+ SYM_CODE_END(tlb_do_page_fault_\write)
.endm
tlb_do_page_fault 0
tlb_do_page_fault 1
-SYM_FUNC_START(handle_tlb_protect)
+SYM_CODE_START(handle_tlb_protect)
BACKUP_T0T1
SAVE_ALL
move a0, sp
@@ -41,9 +41,9 @@ SYM_FUNC_START(handle_tlb_protect)
la_abs t0, do_page_fault
jirl ra, t0, 0
RESTORE_ALL_AND_RET
-SYM_FUNC_END(handle_tlb_protect)
+SYM_CODE_END(handle_tlb_protect)
-SYM_FUNC_START(handle_tlb_load)
+SYM_CODE_START(handle_tlb_load)
csrwr t0, EXCEPTION_KS0
csrwr t1, EXCEPTION_KS1
csrwr ra, EXCEPTION_KS2
@@ -187,16 +187,16 @@ nopage_tlb_load:
csrrd ra, EXCEPTION_KS2
la_abs t0, tlb_do_page_fault_0
jr t0
-SYM_FUNC_END(handle_tlb_load)
+SYM_CODE_END(handle_tlb_load)
-SYM_FUNC_START(handle_tlb_load_ptw)
+SYM_CODE_START(handle_tlb_load_ptw)
csrwr t0, LOONGARCH_CSR_KS0
csrwr t1, LOONGARCH_CSR_KS1
la_abs t0, tlb_do_page_fault_0
jr t0
-SYM_FUNC_END(handle_tlb_load_ptw)
+SYM_CODE_END(handle_tlb_load_ptw)
-SYM_FUNC_START(handle_tlb_store)
+SYM_CODE_START(handle_tlb_store)
csrwr t0, EXCEPTION_KS0
csrwr t1, EXCEPTION_KS1
csrwr ra, EXCEPTION_KS2
@@ -343,16 +343,16 @@ nopage_tlb_store:
csrrd ra, EXCEPTION_KS2
la_abs t0, tlb_do_page_fault_1
jr t0
-SYM_FUNC_END(handle_tlb_store)
+SYM_CODE_END(handle_tlb_store)
-SYM_FUNC_START(handle_tlb_store_ptw)
+SYM_CODE_START(handle_tlb_store_ptw)
csrwr t0, LOONGARCH_CSR_KS0
csrwr t1, LOONGARCH_CSR_KS1
la_abs t0, tlb_do_page_fault_1
jr t0
-SYM_FUNC_END(handle_tlb_store_ptw)
+SYM_CODE_END(handle_tlb_store_ptw)
-SYM_FUNC_START(handle_tlb_modify)
+SYM_CODE_START(handle_tlb_modify)
csrwr t0, EXCEPTION_KS0
csrwr t1, EXCEPTION_KS1
csrwr ra, EXCEPTION_KS2
@@ -497,16 +497,16 @@ nopage_tlb_modify:
csrrd ra, EXCEPTION_KS2
la_abs t0, tlb_do_page_fault_1
jr t0
-SYM_FUNC_END(handle_tlb_modify)
+SYM_CODE_END(handle_tlb_modify)
-SYM_FUNC_START(handle_tlb_modify_ptw)
+SYM_CODE_START(handle_tlb_modify_ptw)
csrwr t0, LOONGARCH_CSR_KS0
csrwr t1, LOONGARCH_CSR_KS1
la_abs t0, tlb_do_page_fault_1
jr t0
-SYM_FUNC_END(handle_tlb_modify_ptw)
+SYM_CODE_END(handle_tlb_modify_ptw)
-SYM_FUNC_START(handle_tlb_refill)
+SYM_CODE_START(handle_tlb_refill)
csrwr t0, LOONGARCH_CSR_TLBRSAVE
csrrd t0, LOONGARCH_CSR_PGD
lddir t0, t0, 3
@@ -521,4 +521,4 @@ SYM_FUNC_START(handle_tlb_refill)
tlbfill
csrrd t0, LOONGARCH_CSR_TLBRSAVE
ertn
-SYM_FUNC_END(handle_tlb_refill)
+SYM_CODE_END(handle_tlb_refill)
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 7b2ac1319d70..467ee6b95ae1 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -592,7 +592,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
gfn_t gfn = gpa >> PAGE_SHIFT;
int srcu_idx, err;
kvm_pfn_t pfn;
- pte_t *ptep, entry, old_pte;
+ pte_t *ptep, entry;
bool writeable;
unsigned long prot_bits;
unsigned long mmu_seq;
@@ -664,7 +664,6 @@ retry:
entry = pfn_pte(pfn, __pgprot(prot_bits));
/* Write the PTE */
- old_pte = *ptep;
set_pte(ptep, entry);
err = 0;
diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h
index 6d28b5514699..ee9e071859b2 100644
--- a/arch/parisc/include/asm/ldcw.h
+++ b/arch/parisc/include/asm/ldcw.h
@@ -2,39 +2,42 @@
#ifndef __PARISC_LDCW_H
#define __PARISC_LDCW_H
-#ifndef CONFIG_PA20
/* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data,
and GCC only guarantees 8-byte alignment for stack locals, we can't
be assured of 16-byte alignment for atomic lock data even if we
specify "__attribute ((aligned(16)))" in the type declaration. So,
we use a struct containing an array of four ints for the atomic lock
type and dynamically select the 16-byte aligned int from the array
- for the semaphore. */
+ for the semaphore. */
+
+/* From: "Jim Hull" <jim.hull of hp.com>
+ I've attached a summary of the change, but basically, for PA 2.0, as
+ long as the ",CO" (coherent operation) completer is implemented, then the
+ 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead
+ they only require "natural" alignment (4-byte for ldcw, 8-byte for
+ ldcd).
+
+ Although the cache control hint is accepted by all PA 2.0 processors,
+ it is only implemented on PA8800/PA8900 CPUs. Prior PA8X00 CPUs still
+ require 16-byte alignment. If the address is unaligned, the operation
+ of the instruction is undefined. The ldcw instruction does not generate
+ unaligned data reference traps so misaligned accesses are not detected.
+ This hid the problem for years. So, restore the 16-byte alignment dropped
+ by Kyle McMartin in "Remove __ldcw_align for PA-RISC 2.0 processors". */
#define __PA_LDCW_ALIGNMENT 16
-#define __PA_LDCW_ALIGN_ORDER 4
#define __ldcw_align(a) ({ \
unsigned long __ret = (unsigned long) &(a)->lock[0]; \
__ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \
& ~(__PA_LDCW_ALIGNMENT - 1); \
(volatile unsigned int *) __ret; \
})
-#define __LDCW "ldcw"
-#else /*CONFIG_PA20*/
-/* From: "Jim Hull" <jim.hull of hp.com>
- I've attached a summary of the change, but basically, for PA 2.0, as
- long as the ",CO" (coherent operation) completer is specified, then the
- 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead
- they only require "natural" alignment (4-byte for ldcw, 8-byte for
- ldcd). */
-
-#define __PA_LDCW_ALIGNMENT 4
-#define __PA_LDCW_ALIGN_ORDER 2
-#define __ldcw_align(a) (&(a)->slock)
+#ifdef CONFIG_PA20
#define __LDCW "ldcw,co"
-
-#endif /*!CONFIG_PA20*/
+#else
+#define __LDCW "ldcw"
+#endif
/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*.
We don't explicitly expose that "*a" may be written as reload
diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
index efd06a897c6a..7b986b09dba8 100644
--- a/arch/parisc/include/asm/spinlock_types.h
+++ b/arch/parisc/include/asm/spinlock_types.h
@@ -9,15 +9,10 @@
#ifndef __ASSEMBLY__
typedef struct {
-#ifdef CONFIG_PA20
- volatile unsigned int slock;
-# define __ARCH_SPIN_LOCK_UNLOCKED { __ARCH_SPIN_LOCK_UNLOCKED_VAL }
-#else
volatile unsigned int lock[4];
# define __ARCH_SPIN_LOCK_UNLOCKED \
{ { __ARCH_SPIN_LOCK_UNLOCKED_VAL, __ARCH_SPIN_LOCK_UNLOCKED_VAL, \
__ARCH_SPIN_LOCK_UNLOCKED_VAL, __ARCH_SPIN_LOCK_UNLOCKED_VAL } }
-#endif
} arch_spinlock_t;
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 4098f9a0964b..2019c1f04bd0 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -440,7 +440,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
if (cpu_online(cpu))
return 0;
- if (num_online_cpus() < setup_max_cpus && smp_boot_one_cpu(cpu, tidle))
+ if (num_online_cpus() < nr_cpu_ids &&
+ num_online_cpus() < setup_max_cpus &&
+ smp_boot_one_cpu(cpu, tidle))
return -EIO;
return cpu_online(cpu) ? 0 : -EIO;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3aaadfd2c8eb..d5d5388973ac 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -910,7 +910,7 @@ config ARCH_FORCE_MAX_ORDER
default "6" if PPC32 && PPC_64K_PAGES
range 4 10 if PPC32 && PPC_256K_PAGES
default "4" if PPC32 && PPC_256K_PAGES
- range 10 10
+ range 10 12
default "10"
help
The kernel page allocator limits the size of maximal physically
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
index 21f681ee535a..e6fe1d5731f2 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -94,6 +94,13 @@ static inline pte_t pte_wrprotect(pte_t pte)
#define pte_wrprotect pte_wrprotect
+static inline int pte_read(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_RO) != _PAGE_NA;
+}
+
+#define pte_read pte_read
+
static inline int pte_write(pte_t pte)
{
return !(pte_val(pte) & _PAGE_RO);
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 5cd9acf58a7d..eb6891e34cbd 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -197,7 +197,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
{
unsigned long old;
- if (pte_young(*ptep))
+ if (!pte_young(*ptep))
return 0;
old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
return (old & _PAGE_ACCESSED) != 0;
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 56ea48276356..c721478c5934 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -25,7 +25,9 @@ static inline int pte_write(pte_t pte)
return pte_val(pte) & _PAGE_RW;
}
#endif
+#ifndef pte_read
static inline int pte_read(pte_t pte) { return 1; }
+#endif
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 9692acb0361f..7eda33a24bb4 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -137,8 +137,9 @@ ret_from_syscall:
lis r4,icache_44x_need_flush@ha
lwz r5,icache_44x_need_flush@l(r4)
cmplwi cr0,r5,0
- bne- 2f
+ bne- .L44x_icache_flush
#endif /* CONFIG_PPC_47x */
+.L44x_icache_flush_return:
kuep_unlock
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
@@ -172,10 +173,11 @@ syscall_exit_finish:
b 1b
#ifdef CONFIG_44x
-2: li r7,0
+.L44x_icache_flush:
+ li r7,0
iccci r0,r0
stw r7,icache_44x_need_flush@l(r4)
- b 1b
+ b .L44x_icache_flush_return
#endif /* CONFIG_44x */
.globl ret_from_fork
diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S
index 97e9ea0c7297..0f1641a31250 100644
--- a/arch/powerpc/kernel/head_85xx.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -395,7 +395,7 @@ interrupt_base:
#ifdef CONFIG_PPC_FPU
FP_UNAVAILABLE_EXCEPTION
#else
- EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception)
+ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, emulation_assist_interrupt)
#endif
/* System Call Interrupt */
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 253620979d0c..6dd2f46bd3ef 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -406,6 +406,9 @@ static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *
if ((yield_count & 1) == 0)
goto yield_prev; /* owner vcpu is running */
+ if (get_owner_cpu(READ_ONCE(lock->val)) != yield_cpu)
+ goto yield_prev; /* re-sample lock owner */
+
spin_end();
preempted = true;
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 39acc2cbab4c..9e1f6558d026 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -1212,14 +1212,7 @@ void radix__tlb_flush(struct mmu_gather *tlb)
smp_mb(); /* see radix__flush_tlb_mm */
exit_flush_lazy_tlbs(mm);
- _tlbiel_pid(mm->context.id, RIC_FLUSH_ALL);
-
- /*
- * It should not be possible to have coprocessors still
- * attached here.
- */
- if (WARN_ON_ONCE(atomic_read(&mm->context.copros) > 0))
- __flush_all_mm(mm, true);
+ __flush_all_mm(mm, true);
preempt_enable();
} else {
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index bae45b358a09..2b0cac6fb61f 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -184,9 +184,6 @@ _GLOBAL_TOC(plpar_hcall)
plpar_hcall_trace:
HCALL_INST_PRECALL(R5)
- std r4,STK_PARAM(R4)(r1)
- mr r0,r4
-
mr r4,r5
mr r5,r6
mr r6,r7
@@ -196,7 +193,7 @@ plpar_hcall_trace:
HVSC
- ld r12,STK_PARAM(R4)(r1)
+ ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1)
std r4,0(r12)
std r5,8(r12)
std r6,16(r12)
@@ -296,9 +293,6 @@ _GLOBAL_TOC(plpar_hcall9)
plpar_hcall9_trace:
HCALL_INST_PRECALL(R5)
- std r4,STK_PARAM(R4)(r1)
- mr r0,r4
-
mr r4,r5
mr r5,r6
mr r6,r7
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 1329e060c548..b43a6bb7e4dc 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -6,7 +6,6 @@
# for more details.
#
-OBJCOPYFLAGS := -O binary
LDFLAGS_vmlinux := -z norelro
ifeq ($(CONFIG_RELOCATABLE),y)
LDFLAGS_vmlinux += -shared -Bsymbolic -z notext --emit-relocs
diff --git a/arch/riscv/errata/andes/Makefile b/arch/riscv/errata/andes/Makefile
index 2d644e19caef..6278c389b801 100644
--- a/arch/riscv/errata/andes/Makefile
+++ b/arch/riscv/errata/andes/Makefile
@@ -1 +1,5 @@
+ifdef CONFIG_RISCV_ALTERNATIVE_EARLY
+CFLAGS_errata.o := -mcmodel=medany
+endif
+
obj-y += errata.o
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index 740a979171e5..2b2f5df7ef2c 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -31,6 +31,27 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
return addr;
}
+/*
+ * Let's do like x86/arm64 and ignore the compat syscalls.
+ */
+#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
+static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
+{
+ return is_compat_task();
+}
+
+#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+static inline bool arch_syscall_match_sym_name(const char *sym,
+ const char *name)
+{
+ /*
+ * Since all syscall functions have __riscv_ prefix, we must skip it.
+ * However, as we described above, we decided to ignore compat
+ * syscalls, so we don't care about __riscv_compat_ prefix here.
+ */
+ return !strcmp(sym + 8, name);
+}
+
struct dyn_arch_ftrace {
};
#endif
diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h
index e7882ccb0fd4..78ea44f76718 100644
--- a/arch/riscv/include/asm/kprobes.h
+++ b/arch/riscv/include/asm/kprobes.h
@@ -40,6 +40,15 @@ void arch_remove_kprobe(struct kprobe *p);
int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr);
bool kprobe_breakpoint_handler(struct pt_regs *regs);
bool kprobe_single_step_handler(struct pt_regs *regs);
-
+#else
+static inline bool kprobe_breakpoint_handler(struct pt_regs *regs)
+{
+ return false;
+}
+
+static inline bool kprobe_single_step_handler(struct pt_regs *regs)
+{
+ return false;
+}
#endif /* CONFIG_KPROBES */
#endif /* _ASM_RISCV_KPROBES_H */
diff --git a/arch/riscv/include/asm/uprobes.h b/arch/riscv/include/asm/uprobes.h
index f2183e00fdd2..3fc7deda9190 100644
--- a/arch/riscv/include/asm/uprobes.h
+++ b/arch/riscv/include/asm/uprobes.h
@@ -34,7 +34,18 @@ struct arch_uprobe {
bool simulate;
};
+#ifdef CONFIG_UPROBES
bool uprobe_breakpoint_handler(struct pt_regs *regs);
bool uprobe_single_step_handler(struct pt_regs *regs);
-
+#else
+static inline bool uprobe_breakpoint_handler(struct pt_regs *regs)
+{
+ return false;
+}
+
+static inline bool uprobe_single_step_handler(struct pt_regs *regs)
+{
+ return false;
+}
+#endif /* CONFIG_UPROBES */
#endif /* _ASM_RISCV_UPROBES_H */
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index a8efa053c4a5..9cc0a7669271 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -60,7 +60,7 @@ static void init_irq_stacks(void)
}
#endif /* CONFIG_VMAP_STACK */
-#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
void do_softirq_own_stack(void)
{
#ifdef CONFIG_IRQ_STACKS
@@ -92,7 +92,7 @@ void do_softirq_own_stack(void)
#endif
__do_softirq();
}
-#endif /* CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK */
+#endif /* CONFIG_SOFTIRQ_ON_OWN_STACK */
#else
static void init_irq_stacks(void) {}
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index e600aab116a4..aac853ae4eb7 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -173,19 +173,6 @@ static void __init init_resources(void)
if (ret < 0)
goto error;
-#ifdef CONFIG_KEXEC_CORE
- if (crashk_res.start != crashk_res.end) {
- ret = add_resource(&iomem_resource, &crashk_res);
- if (ret < 0)
- goto error;
- }
- if (crashk_low_res.start != crashk_low_res.end) {
- ret = add_resource(&iomem_resource, &crashk_low_res);
- if (ret < 0)
- goto error;
- }
-#endif
-
#ifdef CONFIG_CRASH_DUMP
if (elfcorehdr_size > 0) {
elfcorehdr_res.start = elfcorehdr_addr;
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 180d951d3624..21a4d0e111bc 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -311,13 +311,6 @@ static inline void __user *get_sigframe(struct ksignal *ksig,
/* Align the stack frame. */
sp &= ~0xfUL;
- /*
- * Fail if the size of the altstack is not large enough for the
- * sigframe construction.
- */
- if (current->sas_ss_size && sp < current->sas_ss_sp)
- return (void __user __force *)-1UL;
-
return (void __user *)sp;
}
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 19807c4d3805..fae8f610d867 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -13,6 +13,8 @@
#include <linux/kdebug.h>
#include <linux/uaccess.h>
#include <linux/kprobes.h>
+#include <linux/uprobes.h>
+#include <asm/uprobes.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/irq.h>
@@ -247,22 +249,28 @@ static inline unsigned long get_break_insn_length(unsigned long pc)
return GET_INSN_LENGTH(insn);
}
+static bool probe_single_step_handler(struct pt_regs *regs)
+{
+ bool user = user_mode(regs);
+
+ return user ? uprobe_single_step_handler(regs) : kprobe_single_step_handler(regs);
+}
+
+static bool probe_breakpoint_handler(struct pt_regs *regs)
+{
+ bool user = user_mode(regs);
+
+ return user ? uprobe_breakpoint_handler(regs) : kprobe_breakpoint_handler(regs);
+}
+
void handle_break(struct pt_regs *regs)
{
-#ifdef CONFIG_KPROBES
- if (kprobe_single_step_handler(regs))
+ if (probe_single_step_handler(regs))
return;
- if (kprobe_breakpoint_handler(regs))
- return;
-#endif
-#ifdef CONFIG_UPROBES
- if (uprobe_single_step_handler(regs))
+ if (probe_breakpoint_handler(regs))
return;
- if (uprobe_breakpoint_handler(regs))
- return;
-#endif
current->thread.bad_cause = regs->cause;
if (user_mode(regs))
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index ecd3ae6f4116..8581693e62d3 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -245,7 +245,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
/* Set return value. */
if (!is_tail_call)
- emit_mv(RV_REG_A0, RV_REG_A5, ctx);
+ emit_addiw(RV_REG_A0, RV_REG_A5, 0, ctx);
emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */
ctx);
@@ -759,8 +759,10 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of
if (ret)
return ret;
- if (save_ret)
- emit_sd(RV_REG_FP, -retval_off, regmap[BPF_REG_0], ctx);
+ if (save_ret) {
+ emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx);
+ emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx);
+ }
/* update branch with beqz */
if (ctx->insns) {
@@ -853,7 +855,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
if (save_ret) {
- stack_size += 8;
+ stack_size += 16; /* Save both A5 (BPF R0) and A0 */
retval_off = stack_size;
}
@@ -957,6 +959,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
if (ret)
goto out;
emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx);
+ emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx);
im->ip_after_call = ctx->insns + ctx->ninsns;
/* 2 nops reserved for auipc+jalr pair */
emit(rv_nop(), ctx);
@@ -988,8 +991,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
if (flags & BPF_TRAMP_F_RESTORE_REGS)
restore_args(nregs, args_off, ctx);
- if (save_ret)
+ if (save_ret) {
emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx);
+ emit_ld(regmap[BPF_REG_0], -(retval_off - 8), RV_REG_FP, ctx);
+ }
emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);
@@ -1515,7 +1520,8 @@ out_be:
if (ret)
return ret;
- emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx);
+ if (insn->src_reg != BPF_PSEUDO_CALL)
+ emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx);
break;
}
/* tail call */
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 01257ce3b89c..442a74f113cb 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -57,6 +57,7 @@ static void kasan_populate_shadow(void)
pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
+ unsigned long memgap_start = 0;
unsigned long untracked_end;
unsigned long start, end;
int i;
@@ -101,8 +102,12 @@ static void kasan_populate_shadow(void)
* +- shadow end ----+---------+- shadow end ---+
*/
- for_each_physmem_usable_range(i, &start, &end)
+ for_each_physmem_usable_range(i, &start, &end) {
kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW);
+ if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260)
+ kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW);
+ memgap_start = end;
+ }
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
untracked_end = VMALLOC_START;
/* shallowly populate kasan shadow for vmalloc and modules */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c1b47d608a2b..efaebba5ee19 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -303,11 +303,6 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
return 0;
}
-static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa)
-{
- return READ_ONCE(gisa->next_alert) != (u32)virt_to_phys(gisa);
-}
-
static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
{
set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
@@ -3216,11 +3211,12 @@ void kvm_s390_gisa_destroy(struct kvm *kvm)
if (!gi->origin)
return;
- if (gi->alert.mask)
- KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x",
- kvm, gi->alert.mask);
- while (gisa_in_alert_list(gi->origin))
- cpu_relax();
+ WARN(gi->alert.mask != 0x00,
+ "unexpected non zero alert.mask 0x%02x",
+ gi->alert.mask);
+ gi->alert.mask = 0x00;
+ if (gisa_set_iam(gi->origin, gi->alert.mask))
+ process_gib_alert_list();
hrtimer_cancel(&gi->timer);
gi->origin = NULL;
VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index de2fb12120d2..e507692e51e7 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2066,6 +2066,7 @@ struct bpf_tramp_jit {
* func_addr's original caller
*/
int stack_size; /* Trampoline stack size */
+ int backchain_off; /* Offset of backchain */
int stack_args_off; /* Offset of stack arguments for calling
* func_addr, has to be at the top
*/
@@ -2086,9 +2087,10 @@ struct bpf_tramp_jit {
* for __bpf_prog_enter() return value and
* func_addr respectively
*/
- int r14_off; /* Offset of saved %r14 */
int run_ctx_off; /* Offset of struct bpf_tramp_run_ctx */
int tccnt_off; /* Offset of saved tailcall counter */
+ int r14_off; /* Offset of saved %r14, has to be at the
+ * bottom */
int do_fexit; /* do_fexit: label */
};
@@ -2247,8 +2249,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
* Calculate the stack layout.
*/
- /* Reserve STACK_FRAME_OVERHEAD bytes for the callees. */
+ /*
+ * Allocate STACK_FRAME_OVERHEAD bytes for the callees. As the s390x
+ * ABI requires, put our backchain at the end of the allocated memory.
+ */
tjit->stack_size = STACK_FRAME_OVERHEAD;
+ tjit->backchain_off = tjit->stack_size - sizeof(u64);
tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64));
tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64));
tjit->ip_off = alloc_stack(tjit, sizeof(u64));
@@ -2256,16 +2262,25 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64));
tjit->retval_off = alloc_stack(tjit, sizeof(u64));
tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64));
- tjit->r14_off = alloc_stack(tjit, sizeof(u64));
tjit->run_ctx_off = alloc_stack(tjit,
sizeof(struct bpf_tramp_run_ctx));
tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
- /* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */
- tjit->stack_size -= STACK_FRAME_OVERHEAD;
+ tjit->r14_off = alloc_stack(tjit, sizeof(u64) * 2);
+ /*
+ * In accordance with the s390x ABI, the caller has allocated
+ * STACK_FRAME_OVERHEAD bytes for us. 8 of them contain the caller's
+ * backchain, and the rest we can use.
+ */
+ tjit->stack_size -= STACK_FRAME_OVERHEAD - sizeof(u64);
tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
+ /* lgr %r1,%r15 */
+ EMIT4(0xb9040000, REG_1, REG_15);
/* aghi %r15,-stack_size */
EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
+ /* stg %r1,backchain_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15,
+ tjit->backchain_off);
/* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
_EMIT6(0xd203f000 | tjit->tccnt_off,
0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
@@ -2513,7 +2528,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
return -E2BIG;
}
- return ret;
+ return tjit.common.prg;
}
bool bpf_jit_supports_subprog_tailcalls(void)
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 2d9b01d7ca4c..99209085c75b 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -564,6 +564,17 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
s->dma_length = 0;
}
}
+
+static unsigned long *bitmap_vzalloc(size_t bits, gfp_t flags)
+{
+ size_t n = BITS_TO_LONGS(bits);
+ size_t bytes;
+
+ if (unlikely(check_mul_overflow(n, sizeof(unsigned long), &bytes)))
+ return NULL;
+
+ return vzalloc(bytes);
+}
int zpci_dma_init_device(struct zpci_dev *zdev)
{
@@ -604,13 +615,13 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
zdev->end_dma - zdev->start_dma + 1);
zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
- zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
+ zdev->iommu_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL);
if (!zdev->iommu_bitmap) {
rc = -ENOMEM;
goto free_dma_table;
}
if (!s390_iommu_strict) {
- zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8);
+ zdev->lazy_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL);
if (!zdev->lazy_bitmap) {
rc = -ENOMEM;
goto free_bitmap;
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index dc8c876fbd8f..80d76aea1f7b 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -103,6 +103,16 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
return ES_OK;
}
+static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
+{
+ return ES_OK;
+}
+
+static bool fault_in_kernel_space(unsigned long address)
+{
+ return false;
+}
+
#undef __init
#define __init
diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c
index 76b1f8bb0fd5..dab4ed199227 100644
--- a/arch/x86/events/utils.c
+++ b/arch/x86/events/utils.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <asm/insn.h>
+#include <linux/mm.h>
#include "perf_event.h"
@@ -132,9 +133,9 @@ static int get_branch_type(unsigned long from, unsigned long to, int abort,
* The LBR logs any address in the IP, even if the IP just
* faulted. This means userspace can control the from address.
* Ensure we don't blindly read any address by validating it is
- * a known text address.
+ * a known text address and not a vsyscall address.
*/
- if (kernel_text_address(from)) {
+ if (kernel_text_address(from) && !in_gate_area_no_mm(from)) {
addr = (void *)from;
/*
* Assume we can get the maximum possible size
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 783ed339f341..21556ad87f4b 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -7,6 +7,8 @@
* Author : K. Y. Srinivasan <kys@microsoft.com>
*/
+#define pr_fmt(fmt) "Hyper-V: " fmt
+
#include <linux/efi.h>
#include <linux/types.h>
#include <linux/bitfield.h>
@@ -191,7 +193,7 @@ void set_hv_tscchange_cb(void (*cb)(void))
struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
if (!hv_reenlightenment_available()) {
- pr_warn("Hyper-V: reenlightenment support is unavailable\n");
+ pr_warn("reenlightenment support is unavailable\n");
return;
}
@@ -394,6 +396,7 @@ static void __init hv_get_partition_id(void)
local_irq_restore(flags);
}
+#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
static u8 __init get_vtl(void)
{
u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS;
@@ -416,13 +419,16 @@ static u8 __init get_vtl(void)
if (hv_result_success(ret)) {
ret = output->as64.low & HV_X64_VTL_MASK;
} else {
- pr_err("Failed to get VTL(%lld) and set VTL to zero by default.\n", ret);
- ret = 0;
+ pr_err("Failed to get VTL(error: %lld) exiting...\n", ret);
+ BUG();
}
local_irq_restore(flags);
return ret;
}
+#else
+static inline u8 get_vtl(void) { return 0; }
+#endif
/*
* This function is to be invoked early in the boot sequence after the
@@ -564,7 +570,7 @@ skip_hypercall_pg_init:
if (cpu_feature_enabled(X86_FEATURE_IBT) &&
*(u32 *)hv_hypercall_pg != gen_endbr()) {
setup_clear_cpu_cap(X86_FEATURE_IBT);
- pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n");
+ pr_warn("Disabling IBT because of Hyper-V bug\n");
}
#endif
@@ -604,8 +610,10 @@ skip_hypercall_pg_init:
hv_query_ext_cap(0);
/* Find the VTL */
- if (!ms_hyperv.paravisor_present && hv_isolation_type_snp())
- ms_hyperv.vtl = get_vtl();
+ ms_hyperv.vtl = get_vtl();
+
+ if (ms_hyperv.vtl > 0) /* non default VTL */
+ hv_vtl_early_init();
return;
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 36a562218010..999f5ac82fe9 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -215,7 +215,7 @@ static int hv_vtl_wakeup_secondary_cpu(int apicid, unsigned long start_eip)
return hv_vtl_bringup_vcpu(vp_id, start_eip);
}
-static int __init hv_vtl_early_init(void)
+int __init hv_vtl_early_init(void)
{
/*
* `boot_cpu_has` returns the runtime feature support,
@@ -230,4 +230,3 @@ static int __init hv_vtl_early_init(void)
return 0;
}
-early_initcall(hv_vtl_early_init);
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 3a233ebff712..25050d953eee 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -28,8 +28,6 @@ struct x86_cpu {
};
#ifdef CONFIG_HOTPLUG_CPU
-extern int arch_register_cpu(int num);
-extern void arch_unregister_cpu(int);
extern void soft_restart_cpu(void);
#endif
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 31089b851c4f..a2be3aefff9f 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -157,7 +157,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) {
static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
#endif
-extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
+extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
+ unsigned int size, u64 xfeatures, u32 pkru);
extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 17715cb8731d..70d139406bc8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -528,7 +528,6 @@ struct kvm_pmu {
u64 raw_event_mask;
struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
- struct irq_work irq_work;
/*
* Overlay the bitmap with a 64-bit atomic so that all bits can be
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 033b53f993c6..896445edc6a8 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -340,8 +340,10 @@ static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
#ifdef CONFIG_HYPERV_VTL_MODE
void __init hv_vtl_init_platform(void);
+int __init hv_vtl_early_init(void);
#else
static inline void __init hv_vtl_init_platform(void) {}
+static inline int __init hv_vtl_early_init(void) { return 0; }
#endif
#include <asm-generic/mshyperv.h>
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1d111350197f..b37abb55e948 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -637,12 +637,17 @@
/* AMD Last Branch Record MSRs */
#define MSR_AMD64_LBR_SELECT 0xc000010e
-/* Fam 17h MSRs */
-#define MSR_F17H_IRPERF 0xc00000e9
+/* Zen4 */
+#define MSR_ZEN4_BP_CFG 0xc001102e
+#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
+/* Zen 2 */
#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF 0xc00000e9
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index ad98dd1d9cfb..c31c633419fe 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -129,7 +129,6 @@ void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
-bool smp_park_other_cpus_in_init(void);
void smp_store_cpu_info(int id);
asmlinkage __visible void smp_reboot_interrupt(void);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 19bf955b67e0..3ac0ffc4f3e2 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -268,6 +268,7 @@ enum avic_ipi_failure_cause {
AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
AVIC_IPI_FAILURE_INVALID_TARGET,
AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+ AVIC_IPI_FAILURE_INVALID_IPI_VECTOR,
};
#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 517ee01503be..73be3931e4f0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -403,6 +403,17 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
u8 insn_buff[MAX_PATCH_LEN];
DPRINTK(ALT, "alt table %px, -> %px", start, end);
+
+ /*
+ * In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using
+ * cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here.
+ * During the process, KASAN becomes confused seeing partial LA57
+ * conversion and triggers a false-positive out-of-bound report.
+ *
+ * Disable KASAN until the patching is complete.
+ */
+ kasan_disable_current();
+
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
@@ -452,6 +463,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
text_poke_early(instr, insn_buff, insn_buff_sz);
}
+
+ kasan_enable_current();
}
static inline bool is_jcc32(struct insn *insn)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 03ef962a6992..ece2b5b7b0fe 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -80,6 +80,10 @@ static const int amd_div0[] =
AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
+static const int amd_erratum_1485[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
+ AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
+
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
{
int osvw_id = *erratum++;
@@ -1149,6 +1153,10 @@ static void init_amd(struct cpuinfo_x86 *c)
pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
setup_force_cpu_bug(X86_BUG_DIV0);
}
+
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
+ cpu_has_amd_erratum(c, amd_erratum_1485))
+ msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index ded1fc7cb7cb..f136ac046851 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -30,15 +30,15 @@ struct rmid_entry {
struct list_head list;
};
-/**
- * @rmid_free_lru A least recently used list of free RMIDs
+/*
+ * @rmid_free_lru - A least recently used list of free RMIDs
* These RMIDs are guaranteed to have an occupancy less than the
* threshold occupancy
*/
static LIST_HEAD(rmid_free_lru);
-/**
- * @rmid_limbo_count count of currently unused but (potentially)
+/*
+ * @rmid_limbo_count - count of currently unused but (potentially)
* dirty RMIDs.
* This counts RMIDs that no one is currently using but that
* may have a occupancy value > resctrl_rmid_realloc_threshold. User can
@@ -46,7 +46,7 @@ static LIST_HEAD(rmid_free_lru);
*/
static unsigned int rmid_limbo_count;
-/**
+/*
* @rmid_entry - The entry in the limbo and free lists.
*/
static struct rmid_entry *rmid_ptrs;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index a86d37052a64..a21a4d0ecc34 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
- unsigned int size, u32 pkru)
+ unsigned int size, u64 xfeatures, u32 pkru)
{
struct fpstate *kstate = gfpu->fpstate;
union fpregs_state *ustate = buf;
struct membuf mb = { .p = buf, .left = size };
if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
- __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
+ __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
+ XSTATE_COPY_XSAVE);
} else {
memcpy(&ustate->fxsave, &kstate->regs.fxsave,
sizeof(ustate->fxsave));
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index cadf68737e6b..ef6906107c54 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1049,6 +1049,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
* __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
* @to: membuf descriptor
* @fpstate: The fpstate buffer from which to copy
+ * @xfeatures: The mask of xfeatures to save (XSAVE mode only)
* @pkru_val: The PKRU value to store in the PKRU component
* @copy_mode: The requested copy mode
*
@@ -1059,7 +1060,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
* It supports partial copy but @to.pos always starts from zero.
*/
void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
- u32 pkru_val, enum xstate_copy_mode copy_mode)
+ u64 xfeatures, u32 pkru_val,
+ enum xstate_copy_mode copy_mode)
{
const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
struct xregs_state *xinit = &init_fpstate.regs.xsave;
@@ -1083,7 +1085,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
break;
case XSTATE_COPY_XSAVE:
- header.xfeatures &= fpstate->user_xfeatures;
+ header.xfeatures &= fpstate->user_xfeatures & xfeatures;
break;
}
@@ -1185,6 +1187,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode copy_mode)
{
__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
+ tsk->thread.fpu.fpstate->user_xfeatures,
tsk->thread.pkru, copy_mode);
}
@@ -1536,10 +1539,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
-
- if (!guest_fpu)
- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
-
+ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
newfps->xfd = curfps->xfd & ~xfeatures;
/* Do the final updates within the locked region */
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index a4ecb04d8d64..3518fb26d06b 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -43,7 +43,8 @@ enum xstate_copy_mode {
struct membuf;
extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
- u32 pkru_val, enum xstate_copy_mode copy_mode);
+ u64 xfeatures, u32 pkru_val,
+ enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 2eabccde94fb..ccb0915e84e1 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -256,7 +256,7 @@ static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
return 0;
}
-static int sev_cpuid_hv(struct cpuid_leaf *leaf)
+static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf)
{
int ret;
@@ -279,6 +279,45 @@ static int sev_cpuid_hv(struct cpuid_leaf *leaf)
return ret;
}
+static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+{
+ u32 cr4 = native_read_cr4();
+ int ret;
+
+ ghcb_set_rax(ghcb, leaf->fn);
+ ghcb_set_rcx(ghcb, leaf->subfn);
+
+ if (cr4 & X86_CR4_OSXSAVE)
+ /* Safe to read xcr0 */
+ ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
+ else
+ /* xgetbv will cause #UD - use reset value for xcr0 */
+ ghcb_set_xcr0(ghcb, 1);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) &&
+ ghcb_rbx_is_valid(ghcb) &&
+ ghcb_rcx_is_valid(ghcb) &&
+ ghcb_rdx_is_valid(ghcb)))
+ return ES_VMM_ERROR;
+
+ leaf->eax = ghcb->save.rax;
+ leaf->ebx = ghcb->save.rbx;
+ leaf->ecx = ghcb->save.rcx;
+ leaf->edx = ghcb->save.rdx;
+
+ return ES_OK;
+}
+
+static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+{
+ return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf)
+ : __sev_cpuid_hv_msr(leaf);
+}
+
/*
* This may be called early while still running on the initial identity
* mapping. Use RIP-relative addressing to obtain the correct address
@@ -388,19 +427,20 @@ snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
return false;
}
-static void snp_cpuid_hv(struct cpuid_leaf *leaf)
+static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
{
- if (sev_cpuid_hv(leaf))
+ if (sev_cpuid_hv(ghcb, ctxt, leaf))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
}
-static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
+static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+ struct cpuid_leaf *leaf)
{
struct cpuid_leaf leaf_hv = *leaf;
switch (leaf->fn) {
case 0x1:
- snp_cpuid_hv(&leaf_hv);
+ snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
/* initial APIC ID */
leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
@@ -419,7 +459,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
break;
case 0xB:
leaf_hv.subfn = 0;
- snp_cpuid_hv(&leaf_hv);
+ snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
/* extended APIC ID */
leaf->edx = leaf_hv.edx;
@@ -467,7 +507,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
}
break;
case 0x8000001E:
- snp_cpuid_hv(&leaf_hv);
+ snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
/* extended APIC ID */
leaf->eax = leaf_hv.eax;
@@ -488,7 +528,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
* Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
* should be treated as fatal by caller.
*/
-static int snp_cpuid(struct cpuid_leaf *leaf)
+static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -522,7 +562,7 @@ static int snp_cpuid(struct cpuid_leaf *leaf)
return 0;
}
- return snp_cpuid_postprocess(leaf);
+ return snp_cpuid_postprocess(ghcb, ctxt, leaf);
}
/*
@@ -544,14 +584,14 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
leaf.fn = fn;
leaf.subfn = subfn;
- ret = snp_cpuid(&leaf);
+ ret = snp_cpuid(NULL, NULL, &leaf);
if (!ret)
goto cpuid_done;
if (ret != -EOPNOTSUPP)
goto fail;
- if (sev_cpuid_hv(&leaf))
+ if (__sev_cpuid_hv_msr(&leaf))
goto fail;
cpuid_done:
@@ -592,6 +632,23 @@ fail:
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
}
+static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt,
+ unsigned long address,
+ bool write)
+{
+ if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_USER;
+ ctxt->fi.cr2 = address;
+ if (write)
+ ctxt->fi.error_code |= X86_PF_WRITE;
+
+ return ES_EXCEPTION;
+ }
+
+ return ES_OK;
+}
+
static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
void *src, char *buf,
unsigned int data_size,
@@ -599,7 +656,12 @@ static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
bool backwards)
{
int i, b = backwards ? -1 : 1;
- enum es_result ret = ES_OK;
+ unsigned long address = (unsigned long)src;
+ enum es_result ret;
+
+ ret = vc_insn_string_check(ctxt, address, false);
+ if (ret != ES_OK)
+ return ret;
for (i = 0; i < count; i++) {
void *s = src + (i * data_size * b);
@@ -620,7 +682,12 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
bool backwards)
{
int i, s = backwards ? -1 : 1;
- enum es_result ret = ES_OK;
+ unsigned long address = (unsigned long)dst;
+ enum es_result ret;
+
+ ret = vc_insn_string_check(ctxt, address, true);
+ if (ret != ES_OK)
+ return ret;
for (i = 0; i < count; i++) {
void *d = dst + (i * data_size * s);
@@ -656,6 +723,9 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
{
struct insn *insn = &ctxt->insn;
+ size_t size;
+ u64 port;
+
*exitinfo = 0;
switch (insn->opcode.bytes[0]) {
@@ -664,7 +734,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
case 0x6d:
*exitinfo |= IOIO_TYPE_INS;
*exitinfo |= IOIO_SEG_ES;
- *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+ port = ctxt->regs->dx & 0xffff;
break;
/* OUTS opcodes */
@@ -672,41 +742,43 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
case 0x6f:
*exitinfo |= IOIO_TYPE_OUTS;
*exitinfo |= IOIO_SEG_DS;
- *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+ port = ctxt->regs->dx & 0xffff;
break;
/* IN immediate opcodes */
case 0xe4:
case 0xe5:
*exitinfo |= IOIO_TYPE_IN;
- *exitinfo |= (u8)insn->immediate.value << 16;
+ port = (u8)insn->immediate.value & 0xffff;
break;
/* OUT immediate opcodes */
case 0xe6:
case 0xe7:
*exitinfo |= IOIO_TYPE_OUT;
- *exitinfo |= (u8)insn->immediate.value << 16;
+ port = (u8)insn->immediate.value & 0xffff;
break;
/* IN register opcodes */
case 0xec:
case 0xed:
*exitinfo |= IOIO_TYPE_IN;
- *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+ port = ctxt->regs->dx & 0xffff;
break;
/* OUT register opcodes */
case 0xee:
case 0xef:
*exitinfo |= IOIO_TYPE_OUT;
- *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+ port = ctxt->regs->dx & 0xffff;
break;
default:
return ES_DECODE_FAILED;
}
+ *exitinfo |= port << 16;
+
switch (insn->opcode.bytes[0]) {
case 0x6c:
case 0x6e:
@@ -716,12 +788,15 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
case 0xee:
/* Single byte opcodes */
*exitinfo |= IOIO_DATA_8;
+ size = 1;
break;
default:
/* Length determined by instruction parsing */
*exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
: IOIO_DATA_32;
+ size = (insn->opnd_bytes == 2) ? 2 : 4;
}
+
switch (insn->addr_bytes) {
case 2:
*exitinfo |= IOIO_ADDR_16;
@@ -737,7 +812,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
if (insn_has_rep_prefix(insn))
*exitinfo |= IOIO_REP;
- return ES_OK;
+ return vc_ioio_check(ctxt, (u16)port, size);
}
static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
@@ -848,14 +923,15 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ret;
}
-static int vc_handle_cpuid_snp(struct pt_regs *regs)
+static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
+ struct pt_regs *regs = ctxt->regs;
struct cpuid_leaf leaf;
int ret;
leaf.fn = regs->ax;
leaf.subfn = regs->cx;
- ret = snp_cpuid(&leaf);
+ ret = snp_cpuid(ghcb, ctxt, &leaf);
if (!ret) {
regs->ax = leaf.eax;
regs->bx = leaf.ebx;
@@ -874,7 +950,7 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
enum es_result ret;
int snp_cpuid_ret;
- snp_cpuid_ret = vc_handle_cpuid_snp(regs);
+ snp_cpuid_ret = vc_handle_cpuid_snp(ghcb, ctxt);
if (!snp_cpuid_ret)
return ES_OK;
if (snp_cpuid_ret != -EOPNOTSUPP)
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 2787826d9f60..6395bfd87b68 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -524,6 +524,33 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
return ES_OK;
}
+static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
+{
+ BUG_ON(size > 4);
+
+ if (user_mode(ctxt->regs)) {
+ struct thread_struct *t = &current->thread;
+ struct io_bitmap *iobm = t->io_bitmap;
+ size_t idx;
+
+ if (!iobm)
+ goto fault;
+
+ for (idx = port; idx < port + size; ++idx) {
+ if (test_bit(idx, iobm->bitmap))
+ goto fault;
+ }
+ }
+
+ return ES_OK;
+
+fault:
+ ctxt->fi.vector = X86_TRAP_GP;
+ ctxt->fi.error_code = 0;
+
+ return ES_EXCEPTION;
+}
+
/* Include code shared with pre-decompression boot stage */
#include "sev-shared.c"
@@ -868,8 +895,7 @@ void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
void snp_accept_memory(phys_addr_t start, phys_addr_t end)
{
- unsigned long vaddr;
- unsigned int npages;
+ unsigned long vaddr, npages;
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
return;
@@ -1509,6 +1535,9 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ES_DECODE_FAILED;
}
+ if (user_mode(ctxt->regs))
+ return ES_UNSUPPORTED;
+
switch (mmio) {
case INSN_MMIO_WRITE:
memcpy(ghcb->shared_buffer, reg_data, bytes);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 6eb06d001bcc..96a771f9f930 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
}
/*
- * Disable virtualization, APIC etc. and park the CPU in a HLT loop
+ * this function calls the 'stop' function on all other CPUs in the system.
*/
DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
{
@@ -172,17 +172,13 @@ static void native_stop_other_cpus(int wait)
* 2) Wait for all other CPUs to report that they reached the
* HLT loop in stop_this_cpu()
*
- * 3) If the system uses INIT/STARTUP for CPU bringup, then
- * send all present CPUs an INIT vector, which brings them
- * completely out of the way.
+ * 3) If #2 timed out send an NMI to the CPUs which did not
+ * yet report
*
- * 4) If #3 is not possible and #2 timed out send an NMI to the
- * CPUs which did not yet report
- *
- * 5) Wait for all other CPUs to report that they reached the
+ * 4) Wait for all other CPUs to report that they reached the
* HLT loop in stop_this_cpu()
*
- * #4 can obviously race against a CPU reaching the HLT loop late.
+ * #3 can obviously race against a CPU reaching the HLT loop late.
* That CPU will have reported already and the "have all CPUs
* reached HLT" condition will be true despite the fact that the
* other CPU is still handling the NMI. Again, there is no
@@ -198,7 +194,7 @@ static void native_stop_other_cpus(int wait)
/*
* Don't wait longer than a second for IPI completion. The
* wait request is not checked here because that would
- * prevent an NMI/INIT shutdown in case that not all
+ * prevent an NMI shutdown attempt in case that not all
* CPUs reach shutdown state.
*/
timeout = USEC_PER_SEC;
@@ -206,27 +202,7 @@ static void native_stop_other_cpus(int wait)
udelay(1);
}
- /*
- * Park all other CPUs in INIT including "offline" CPUs, if
- * possible. That's a safe place where they can't resume execution
- * of HLT and then execute the HLT loop from overwritten text or
- * page tables.
- *
- * The only downside is a broadcast MCE, but up to the point where
- * the kexec() kernel brought all APs online again an MCE will just
- * make HLT resume and handle the MCE. The machine crashes and burns
- * due to overwritten text, page tables and data. So there is a
- * choice between fire and frying pan. The result is pretty much
- * the same. Chose frying pan until x86 provides a sane mechanism
- * to park a CPU.
- */
- if (smp_park_other_cpus_in_init())
- goto done;
-
- /*
- * If park with INIT was not possible and the REBOOT_VECTOR didn't
- * take all secondary CPUs offline, try with the NMI.
- */
+ /* if the REBOOT_VECTOR didn't work, try with the NMI */
if (!cpumask_empty(&cpus_stop_mask)) {
/*
* If NMI IPI is enabled, try to register the stop handler
@@ -249,7 +225,6 @@ static void native_stop_other_cpus(int wait)
udelay(1);
}
-done:
local_irq_save(flags);
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 48e040618731..2a187c0cbd5b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1240,33 +1240,6 @@ void arch_thaw_secondary_cpus_end(void)
cache_aps_init();
}
-bool smp_park_other_cpus_in_init(void)
-{
- unsigned int cpu, this_cpu = smp_processor_id();
- unsigned int apicid;
-
- if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu)
- return false;
-
- /*
- * If this is a crash stop which does not execute on the boot CPU,
- * then this cannot use the INIT mechanism because INIT to the boot
- * CPU will reset the machine.
- */
- if (this_cpu)
- return false;
-
- for_each_cpu_and(cpu, &cpus_booted_once_mask, cpu_present_mask) {
- if (cpu == this_cpu)
- continue;
- apicid = apic->cpu_present_to_apicid(cpu);
- if (apicid == BAD_APICID)
- continue;
- send_init_sequence(apicid);
- }
- return true;
-}
-
/*
* Early setup to make printk work.
*/
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index ca004e2e4469..0bab03130033 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -54,7 +54,7 @@ void arch_unregister_cpu(int num)
EXPORT_SYMBOL(arch_unregister_cpu);
#else /* CONFIG_HOTPLUG_CPU */
-static int __init arch_register_cpu(int num)
+int __init arch_register_cpu(int num)
{
return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0544e30b4946..773132c3bf5a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -360,14 +360,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.guest_supported_xcr0 =
cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
- /*
- * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
- * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
- * supported by the host.
- */
- vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
- XFEATURE_MASK_FPSSE;
-
kvm_update_pv_runtime(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index dcd60b39e794..3e977dbbf993 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2759,13 +2759,17 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
{
u32 reg = kvm_lapic_get_reg(apic, lvt_type);
int vector, mode, trig_mode;
+ int r;
if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
vector = reg & APIC_VECTOR_MASK;
mode = reg & APIC_MODE_MASK;
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
- return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
- NULL);
+
+ r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
+ if (r && lvt_type == APIC_LVTPC)
+ kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
+ return r;
}
return 0;
}
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index edb89b51b383..9ae07db6f0f6 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -93,14 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}
-static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
-{
- struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
- struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
-
- kvm_pmu_deliver_pmi(vcpu);
-}
-
static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -124,20 +116,7 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
}
- if (!pmc->intr || skip_pmi)
- return;
-
- /*
- * Inject PMI. If vcpu was in a guest mode during NMI PMI
- * can be ejected on a guest mode re-entry. Otherwise we can't
- * be sure that vcpu wasn't executing hlt instruction at the
- * time of vmexit and is not going to re-enter guest mode until
- * woken up. So we should wake it, but this is impossible from
- * NMI context. Do it from irq work instead.
- */
- if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
- irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
- else
+ if (pmc->intr && !skip_pmi)
kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
}
@@ -675,9 +654,6 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
void kvm_pmu_reset(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-
- irq_work_sync(&pmu->irq_work);
static_call(kvm_x86_pmu_reset)(vcpu);
}
@@ -687,7 +663,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
memset(pmu, 0, sizeof(*pmu));
static_call(kvm_x86_pmu_init)(vcpu);
- init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
pmu->event_count = 0;
pmu->need_cleanup = false;
kvm_pmu_refresh(vcpu);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 7d9ba301c090..1d64113de488 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -74,6 +74,12 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
return counter & pmc_bitmask(pmc);
}
+static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
+{
+ pmc->counter += val - pmc_read_counter(pmc);
+ pmc->counter &= pmc_bitmask(pmc);
+}
+
static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
{
if (pmc->perf_event) {
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 2092db892d7d..4b74ea91f4e6 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -529,8 +529,11 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
break;
+ case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR:
+ /* Invalid IPI with vector < 16 */
+ break;
default:
- pr_err("Unknown IPI interception\n");
+ vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n");
}
return 1;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index dd496c9e5f91..3fea8c47679e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1253,6 +1253,9 @@ void svm_leave_nested(struct kvm_vcpu *vcpu)
nested_svm_uninit_mmu_context(vcpu);
vmcb_mark_all_dirty(svm->vmcb);
+
+ if (kvm_apicv_activated(vcpu->kvm))
+ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
}
kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index cef5a3d0abd0..373ff6a6687b 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -160,7 +160,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* MSR_PERFCTRn */
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
- pmc->counter += data - pmc_read_counter(pmc);
+ pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
return 0;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9507df93f410..beea99c8e8e0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -691,7 +691,7 @@ static int svm_hardware_enable(void)
*/
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) {
struct sev_es_save_area *hostsa;
- u32 msr_hi;
+ u32 __maybe_unused msr_hi;
hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
@@ -913,8 +913,7 @@ void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept)
if (intercept == svm->x2avic_msrs_intercepted)
return;
- if (!x2avic_enabled ||
- !apic_x2apic_mode(svm->vcpu.arch.apic))
+ if (!x2avic_enabled)
return;
for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) {
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index f2efa0bf7ae8..820d3e1f6b4f 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -436,11 +436,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated &&
!(msr & MSR_PMC_FULL_WIDTH_BIT))
data = (s64)(s32)data;
- pmc->counter += data - pmc_read_counter(pmc);
+ pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
- pmc->counter += data - pmc_read_counter(pmc);
+ pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9f18b06bbda6..41cce5031126 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5382,26 +5382,37 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
return 0;
}
-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
- struct kvm_xsave *guest_xsave)
-{
- if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
- return;
-
- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
- guest_xsave->region,
- sizeof(guest_xsave->region),
- vcpu->arch.pkru);
-}
static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
u8 *state, unsigned int size)
{
+ /*
+ * Only copy state for features that are enabled for the guest. The
+ * state itself isn't problematic, but setting bits in the header for
+ * features that are supported in *this* host but not exposed to the
+ * guest can result in KVM_SET_XSAVE failing when live migrating to a
+ * compatible host without the features that are NOT exposed to the
+ * guest.
+ *
+ * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
+ * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
+ * supported by the host.
+ */
+ u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 |
+ XFEATURE_MASK_FPSSE;
+
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return;
- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
- state, size, vcpu->arch.pkru);
+ fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
+ supported_xcr0, vcpu->arch.pkru);
+}
+
+static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *guest_xsave)
+{
+ return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
+ sizeof(guest_xsave->region));
}
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
@@ -12843,6 +12854,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
return true;
#endif
+ if (kvm_test_request(KVM_REQ_PMI, vcpu))
+ return true;
+
if (kvm_arch_interrupt_allowed(vcpu) &&
(kvm_cpu_has_interrupt(vcpu) ||
kvm_guest_apic_has_interrupt(vcpu)))