diff options
Diffstat (limited to 'arch/arm64/kvm/sys_regs.c')
| -rw-r--r-- | arch/arm64/kvm/sys_regs.c | 2049 |
1 files changed, 1347 insertions, 702 deletions
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 83c6b4a07ef5..c8fd7c6a12a1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -17,6 +17,7 @@ #include <linux/mm.h> #include <linux/printk.h> #include <linux/uaccess.h> +#include <linux/irqchip/arm-gic-v3.h> #include <asm/arm_pmuv3.h> #include <asm/cacheflush.h> @@ -81,44 +82,105 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, "sys_reg write to read-only register"); } -#define PURE_EL2_SYSREG(el2) \ - case el2: { \ - *el1r = el2; \ - return true; \ +enum sr_loc_attr { + SR_LOC_MEMORY = 0, /* Register definitely in memory */ + SR_LOC_LOADED = BIT(0), /* Register on CPU, unless it cannot */ + SR_LOC_MAPPED = BIT(1), /* Register in a different CPU register */ + SR_LOC_XLATED = BIT(2), /* Register translated to fit another reg */ + SR_LOC_SPECIAL = BIT(3), /* Demanding register, implies loaded */ +}; + +struct sr_loc { + enum sr_loc_attr loc; + enum vcpu_sysreg map_reg; + u64 (*xlate)(u64); +}; + +static enum sr_loc_attr locate_direct_register(const struct kvm_vcpu *vcpu, + enum vcpu_sysreg reg) +{ + switch (reg) { + case SCTLR_EL1: + case CPACR_EL1: + case TTBR0_EL1: + case TTBR1_EL1: + case TCR_EL1: + case TCR2_EL1: + case PIR_EL1: + case PIRE0_EL1: + case POR_EL1: + case ESR_EL1: + case AFSR0_EL1: + case AFSR1_EL1: + case FAR_EL1: + case MAIR_EL1: + case VBAR_EL1: + case CONTEXTIDR_EL1: + case AMAIR_EL1: + case CNTKCTL_EL1: + case ELR_EL1: + case SPSR_EL1: + case ZCR_EL1: + case SCTLR2_EL1: + /* + * EL1 registers which have an ELx2 mapping are loaded if + * we're not in hypervisor context. + */ + return is_hyp_ctxt(vcpu) ? SR_LOC_MEMORY : SR_LOC_LOADED; + + case TPIDR_EL0: + case TPIDRRO_EL0: + case TPIDR_EL1: + case PAR_EL1: + case DACR32_EL2: + case IFSR32_EL2: + case DBGVCR32_EL2: + /* These registers are always loaded, no matter what */ + return SR_LOC_LOADED; + + default: + /* Non-mapped EL2 registers are by definition in memory. */ + return SR_LOC_MEMORY; } +} -#define MAPPED_EL2_SYSREG(el2, el1, fn) \ - case el2: { \ - *xlate = fn; \ - *el1r = el1; \ - return true; \ +static void locate_mapped_el2_register(const struct kvm_vcpu *vcpu, + enum vcpu_sysreg reg, + enum vcpu_sysreg map_reg, + u64 (*xlate)(u64), + struct sr_loc *loc) +{ + if (!is_hyp_ctxt(vcpu)) { + loc->loc = SR_LOC_MEMORY; + return; + } + + loc->loc = SR_LOC_LOADED | SR_LOC_MAPPED; + loc->map_reg = map_reg; + + WARN_ON(locate_direct_register(vcpu, map_reg) != SR_LOC_MEMORY); + + if (xlate != NULL && !vcpu_el2_e2h_is_set(vcpu)) { + loc->loc |= SR_LOC_XLATED; + loc->xlate = xlate; } +} -static bool get_el2_to_el1_mapping(unsigned int reg, - unsigned int *el1r, u64 (**xlate)(u64)) +#define MAPPED_EL2_SYSREG(r, m, t) \ + case r: { \ + locate_mapped_el2_register(vcpu, r, m, t, loc); \ + break; \ + } + +static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg, + struct sr_loc *loc) { + if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) { + loc->loc = SR_LOC_MEMORY; + return; + } + switch (reg) { - PURE_EL2_SYSREG( VPIDR_EL2 ); - PURE_EL2_SYSREG( VMPIDR_EL2 ); - PURE_EL2_SYSREG( ACTLR_EL2 ); - PURE_EL2_SYSREG( HCR_EL2 ); - PURE_EL2_SYSREG( MDCR_EL2 ); - PURE_EL2_SYSREG( HSTR_EL2 ); - PURE_EL2_SYSREG( HACR_EL2 ); - PURE_EL2_SYSREG( VTTBR_EL2 ); - PURE_EL2_SYSREG( VTCR_EL2 ); - PURE_EL2_SYSREG( RVBAR_EL2 ); - PURE_EL2_SYSREG( TPIDR_EL2 ); - PURE_EL2_SYSREG( HPFAR_EL2 ); - PURE_EL2_SYSREG( HCRX_EL2 ); - PURE_EL2_SYSREG( HFGRTR_EL2 ); - PURE_EL2_SYSREG( HFGWTR_EL2 ); - PURE_EL2_SYSREG( HFGITR_EL2 ); - PURE_EL2_SYSREG( HDFGRTR_EL2 ); - PURE_EL2_SYSREG( HDFGWTR_EL2 ); - PURE_EL2_SYSREG( HAFGRTR_EL2 ); - PURE_EL2_SYSREG( CNTVOFF_EL2 ); - PURE_EL2_SYSREG( CNTHCTL_EL2 ); MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1, translate_sctlr_el2_to_sctlr_el1 ); MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1, @@ -141,128 +203,192 @@ static bool get_el2_to_el1_mapping(unsigned int reg, MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL ); MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL ); MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); - MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL ); + MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL ); + case CNTHCTL_EL2: + /* CNTHCTL_EL2 is super special, until we support NV2.1 */ + loc->loc = ((is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) ? + SR_LOC_SPECIAL : SR_LOC_MEMORY); + break; default: - return false; + loc->loc = locate_direct_register(vcpu, reg); } } -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +static u64 read_sr_from_cpu(enum vcpu_sysreg reg) { u64 val = 0x8badf00d8badf00d; - u64 (*xlate)(u64) = NULL; - unsigned int el1r; - if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) - goto memory_read; + switch (reg) { + case SCTLR_EL1: val = read_sysreg_s(SYS_SCTLR_EL12); break; + case CPACR_EL1: val = read_sysreg_s(SYS_CPACR_EL12); break; + case TTBR0_EL1: val = read_sysreg_s(SYS_TTBR0_EL12); break; + case TTBR1_EL1: val = read_sysreg_s(SYS_TTBR1_EL12); break; + case TCR_EL1: val = read_sysreg_s(SYS_TCR_EL12); break; + case TCR2_EL1: val = read_sysreg_s(SYS_TCR2_EL12); break; + case PIR_EL1: val = read_sysreg_s(SYS_PIR_EL12); break; + case PIRE0_EL1: val = read_sysreg_s(SYS_PIRE0_EL12); break; + case POR_EL1: val = read_sysreg_s(SYS_POR_EL12); break; + case ESR_EL1: val = read_sysreg_s(SYS_ESR_EL12); break; + case AFSR0_EL1: val = read_sysreg_s(SYS_AFSR0_EL12); break; + case AFSR1_EL1: val = read_sysreg_s(SYS_AFSR1_EL12); break; + case FAR_EL1: val = read_sysreg_s(SYS_FAR_EL12); break; + case MAIR_EL1: val = read_sysreg_s(SYS_MAIR_EL12); break; + case VBAR_EL1: val = read_sysreg_s(SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; + case AMAIR_EL1: val = read_sysreg_s(SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case ELR_EL1: val = read_sysreg_s(SYS_ELR_EL12); break; + case SPSR_EL1: val = read_sysreg_s(SYS_SPSR_EL12); break; + case ZCR_EL1: val = read_sysreg_s(SYS_ZCR_EL12); break; + case SCTLR2_EL1: val = read_sysreg_s(SYS_SCTLR2_EL12); break; + case TPIDR_EL0: val = read_sysreg_s(SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: val = read_sysreg_s(SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: val = read_sysreg_s(SYS_TPIDR_EL1); break; + case PAR_EL1: val = read_sysreg_par(); break; + case DACR32_EL2: val = read_sysreg_s(SYS_DACR32_EL2); break; + case IFSR32_EL2: val = read_sysreg_s(SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + default: WARN_ON_ONCE(1); + } - if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) { - if (!is_hyp_ctxt(vcpu)) - goto memory_read; + return val; +} + +static void write_sr_to_cpu(enum vcpu_sysreg reg, u64 val) +{ + switch (reg) { + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break; + case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break; + case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break; + case POR_EL1: write_sysreg_s(val, SYS_POR_EL12); break; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; + case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break; + case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break; + case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + default: WARN_ON_ONCE(1); + } +} + +u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg) +{ + struct sr_loc loc = {}; + + locate_register(vcpu, reg, &loc); + + WARN_ON_ONCE(!has_vhe() && loc.loc != SR_LOC_MEMORY); + + if (loc.loc & SR_LOC_SPECIAL) { + u64 val; + + WARN_ON_ONCE(loc.loc & ~SR_LOC_SPECIAL); /* - * CNTHCTL_EL2 requires some special treatment to - * account for the bits that can be set via CNTKCTL_EL1. + * CNTHCTL_EL2 requires some special treatment to account + * for the bits that can be set via CNTKCTL_EL1 when E2H==1. */ switch (reg) { case CNTHCTL_EL2: - if (vcpu_el2_e2h_is_set(vcpu)) { - val = read_sysreg_el1(SYS_CNTKCTL); - val &= CNTKCTL_VALID_BITS; - val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS; - return val; - } - break; + val = read_sysreg_el1(SYS_CNTKCTL); + val &= CNTKCTL_VALID_BITS; + val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS; + return val; + default: + WARN_ON_ONCE(1); } + } - /* - * If this register does not have an EL1 counterpart, - * then read the stored EL2 version. - */ - if (reg == el1r) - goto memory_read; + if (loc.loc & SR_LOC_LOADED) { + enum vcpu_sysreg map_reg = reg; - /* - * If we have a non-VHE guest and that the sysreg - * requires translation to be used at EL1, use the - * in-memory copy instead. - */ - if (!vcpu_el2_e2h_is_set(vcpu) && xlate) - goto memory_read; + if (loc.loc & SR_LOC_MAPPED) + map_reg = loc.map_reg; - /* Get the current version of the EL1 counterpart. */ - WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val)); - if (reg >= __SANITISED_REG_START__) - val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); - - return val; - } + if (!(loc.loc & SR_LOC_XLATED)) { + u64 val = read_sr_from_cpu(map_reg); - /* EL1 register can't be on the CPU if the guest is in vEL2. */ - if (unlikely(is_hyp_ctxt(vcpu))) - goto memory_read; + if (reg >= __SANITISED_REG_START__) + val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); - if (__vcpu_read_sys_reg_from_cpu(reg, &val)) - return val; + return val; + } + } -memory_read: return __vcpu_sys_reg(vcpu, reg); } -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, enum vcpu_sysreg reg) { - u64 (*xlate)(u64) = NULL; - unsigned int el1r; + struct sr_loc loc = {}; - if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) - goto memory_write; + locate_register(vcpu, reg, &loc); - if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) { - if (!is_hyp_ctxt(vcpu)) - goto memory_write; + WARN_ON_ONCE(!has_vhe() && loc.loc != SR_LOC_MEMORY); - /* - * Always store a copy of the write to memory to avoid having - * to reverse-translate virtual EL2 system registers for a - * non-VHE guest hypervisor. - */ - __vcpu_sys_reg(vcpu, reg) = val; + if (loc.loc & SR_LOC_SPECIAL) { + + WARN_ON_ONCE(loc.loc & ~SR_LOC_SPECIAL); switch (reg) { case CNTHCTL_EL2: /* - * If E2H=0, CNHTCTL_EL2 is a pure shadow register. - * Otherwise, some of the bits are backed by + * If E2H=1, some of the bits are backed by * CNTKCTL_EL1, while the rest is kept in memory. * Yes, this is fun stuff. */ - if (vcpu_el2_e2h_is_set(vcpu)) - write_sysreg_el1(val, SYS_CNTKCTL); - return; + write_sysreg_el1(val, SYS_CNTKCTL); + break; + default: + WARN_ON_ONCE(1); } + } - /* No EL1 counterpart? We're done here.? */ - if (reg == el1r) - return; + if (loc.loc & SR_LOC_LOADED) { + enum vcpu_sysreg map_reg = reg; + u64 xlated_val; - if (!vcpu_el2_e2h_is_set(vcpu) && xlate) - val = xlate(val); + if (reg >= __SANITISED_REG_START__) + val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); - /* Redirect this to the EL1 version of the register. */ - WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, el1r)); - return; - } + if (loc.loc & SR_LOC_MAPPED) + map_reg = loc.map_reg; - /* EL1 register can't be on the CPU if the guest is in vEL2. */ - if (unlikely(is_hyp_ctxt(vcpu))) - goto memory_write; + if (loc.loc & SR_LOC_XLATED) + xlated_val = loc.xlate(val); + else + xlated_val = val; - if (__vcpu_write_sys_reg_to_cpu(val, reg)) - return; + write_sr_to_cpu(map_reg, xlated_val); + + /* + * Fall through to write the backing store anyway, which + * allows translated registers to be directly read without a + * reverse translation. + */ + } -memory_write: - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); } /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ @@ -531,7 +657,27 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu, if (p->is_write) return ignore_write(vcpu, p); - p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre; + if (p->Op1 == 4) { /* ICC_SRE_EL2 */ + p->regval = KVM_ICC_SRE_EL2; + } else { /* ICC_SRE_EL1 */ + p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre; + } + + return true; +} + +static bool access_gic_dir(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!kvm_has_gicv3(vcpu->kvm)) + return undef_access(vcpu, p, r); + + if (!p->is_write) + return undef_access(vcpu, p, r); + + vgic_v3_deactivate(vcpu, p->regval); + return true; } @@ -570,17 +716,10 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - u64 oslsr; - if (!p->is_write) return read_from_write_only(vcpu, p, r); - /* Forward the OSLK bit to OSLSR */ - oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~OSLSR_EL1_OSLK; - if (p->regval & OSLAR_EL1_OSLK) - oslsr |= OSLSR_EL1_OSLK; - - __vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr; + kvm_debug_handle_oslar(vcpu, p->regval); return true; } @@ -605,7 +744,7 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, if ((val ^ rd->val) & ~OSLSR_EL1_OSLK) return -EINVAL; - __vcpu_sys_reg(vcpu, rd->reg) = val; + __vcpu_assign_sys_reg(vcpu, rd->reg, val); return 0; } @@ -621,43 +760,13 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, } } -/* - * We want to avoid world-switching all the DBG registers all the - * time: - * - * - If we've touched any debug register, it is likely that we're - * going to touch more of them. It then makes sense to disable the - * traps and start doing the save/restore dance - * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is - * then mandatory to save/restore the registers, as the guest - * depends on them. - * - * For this, we use a DIRTY bit, indicating the guest has modified the - * debug registers, used as follow: - * - * On guest entry: - * - If the dirty bit is set (because we're coming back from trapping), - * disable the traps, save host registers, restore guest registers. - * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), - * set the dirty bit, disable the traps, save host registers, - * restore guest registers. - * - Otherwise, enable the traps - * - * On guest exit: - * - If the dirty bit is set, save guest registers, restore host - * registers and clear the dirty bit. This ensure that the host can - * now use the debug registers. - */ static bool trap_debug_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { access_rw(vcpu, p, r); - if (p->is_write) - vcpu_set_flag(vcpu, DEBUG_DIRTY); - - trace_trap_reg(__func__, r->reg, p->is_write, p->regval); + kvm_debug_set_guest_ownership(vcpu); return true; } @@ -666,9 +775,6 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, * * A 32 bit write to a debug register leave top bits alone * A 32 bit read from a debug register only returns the bottom bits - * - * All writes will set the DEBUG_DIRTY flag to ensure the hyp code - * switches between host and guest values in future. */ static void reg_to_dbg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, @@ -683,8 +789,6 @@ static void reg_to_dbg(struct kvm_vcpu *vcpu, val &= ~mask; val |= (p->regval & (mask >> shift)) << shift; *dbg_reg = val; - - vcpu_set_flag(vcpu, DEBUG_DIRTY); } static void dbg_to_reg(struct kvm_vcpu *vcpu, @@ -698,152 +802,79 @@ static void dbg_to_reg(struct kvm_vcpu *vcpu, p->regval = (*dbg_reg & mask) >> shift; } -static bool trap_bvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static u64 *demux_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; - - if (p->is_write) - reg_to_dbg(vcpu, p, rd, dbg_reg); - else - dbg_to_reg(vcpu, p, rd, dbg_reg); - - trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); + struct kvm_guest_debug_arch *dbg = &vcpu->arch.vcpu_debug_state; - return true; -} - -static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - u64 val) -{ - vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = val; - return 0; -} - -static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - u64 *val) -{ - *val = vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; - return 0; + switch (rd->Op2) { + case 0b100: + return &dbg->dbg_bvr[rd->CRm]; + case 0b101: + return &dbg->dbg_bcr[rd->CRm]; + case 0b110: + return &dbg->dbg_wvr[rd->CRm]; + case 0b111: + return &dbg->dbg_wcr[rd->CRm]; + default: + KVM_BUG_ON(1, vcpu->kvm); + return NULL; + } } -static u64 reset_bvr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static bool trap_dbg_wb_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val; - return rd->val; -} + u64 *reg = demux_wb_reg(vcpu, rd); -static bool trap_bcr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) -{ - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; + if (!reg) + return false; if (p->is_write) - reg_to_dbg(vcpu, p, rd, dbg_reg); + reg_to_dbg(vcpu, p, rd, reg); else - dbg_to_reg(vcpu, p, rd, dbg_reg); - - trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); + dbg_to_reg(vcpu, p, rd, reg); + kvm_debug_set_guest_ownership(vcpu); return true; } -static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - u64 val) -{ - vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = val; - return 0; -} - -static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - u64 *val) -{ - *val = vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; - return 0; -} - -static u64 reset_bcr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static int set_dbg_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + u64 val) { - vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val; - return rd->val; -} - -static bool trap_wvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) -{ - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; - - if (p->is_write) - reg_to_dbg(vcpu, p, rd, dbg_reg); - else - dbg_to_reg(vcpu, p, rd, dbg_reg); - - trace_trap_reg(__func__, rd->CRm, p->is_write, - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]); - - return true; -} + u64 *reg = demux_wb_reg(vcpu, rd); -static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - u64 val) -{ - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = val; - return 0; -} + if (!reg) + return -EINVAL; -static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - u64 *val) -{ - *val = vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; + *reg = val; return 0; } -static u64 reset_wvr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) -{ - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val; - return rd->val; -} - -static bool trap_wcr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static int get_dbg_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + u64 *val) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; + u64 *reg = demux_wb_reg(vcpu, rd); - if (p->is_write) - reg_to_dbg(vcpu, p, rd, dbg_reg); - else - dbg_to_reg(vcpu, p, rd, dbg_reg); - - trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); - - return true; -} + if (!reg) + return -EINVAL; -static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - u64 val) -{ - vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = val; + *val = *reg; return 0; } -static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - u64 *val) +static u64 reset_dbg_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - *val = vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; - return 0; -} + u64 *reg = demux_wb_reg(vcpu, rd); -static u64 reset_wcr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) -{ - vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val; + /* + * Bail early if we couldn't find storage for the register, the + * KVM_BUG_ON() in demux_wb_reg() will prevent this VM from ever + * being run. + */ + if (!reg) + return 0; + + *reg = rd->val; return rd->val; } @@ -881,6 +912,12 @@ static u64 reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) return mpidr; } +static unsigned int hidden_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + return REG_HIDDEN; +} + static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { @@ -893,13 +930,13 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu, static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 mask = BIT(ARMV8_PMU_CYCLE_IDX); - u8 n = vcpu->kvm->arch.pmcr_n; + u8 n = vcpu->kvm->arch.nr_pmu_counters; if (n) mask |= GENMASK(n - 1, 0); reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= mask; + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, mask); return __vcpu_sys_reg(vcpu, r->reg); } @@ -907,7 +944,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static u64 reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0); + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, GENMASK(31, 0)); return __vcpu_sys_reg(vcpu, r->reg); } @@ -919,7 +956,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) return 0; reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= kvm_pmu_evtyper_mask(vcpu->kvm); + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, kvm_pmu_evtyper_mask(vcpu->kvm)); return __vcpu_sys_reg(vcpu, r->reg); } @@ -927,7 +964,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= PMSELR_EL0_SEL_MASK; + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, PMSELR_EL0_SEL_MASK); return __vcpu_sys_reg(vcpu, r->reg); } @@ -943,7 +980,7 @@ static u64 reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * The value of PMCR.N field is included when the * vCPU register is read via kvm_vcpu_read_pmcr(). */ - __vcpu_sys_reg(vcpu, r->reg) = pmcr; + __vcpu_assign_sys_reg(vcpu, r->reg, pmcr); return __vcpu_sys_reg(vcpu, r->reg); } @@ -1015,7 +1052,7 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (p->is_write) - __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; + __vcpu_assign_sys_reg(vcpu, PMSELR_EL0, p->regval); else /* return PMSELR.SEL field */ p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0) @@ -1075,6 +1112,22 @@ static int get_pmu_evcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, return 0; } +static int set_pmu_evcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) +{ + u64 idx; + + if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 0) + /* PMCCNTR_EL0 */ + idx = ARMV8_PMU_CYCLE_IDX; + else + /* PMEVCNTRn_EL0 */ + idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); + + kvm_pmu_set_counter_value_user(vcpu, idx, val); + return 0; +} + static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -1166,25 +1219,10 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static int set_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 val) { - bool set; - - val &= kvm_pmu_accessible_counter_mask(vcpu); - - switch (r->reg) { - case PMOVSSET_EL0: - /* CRm[1] being set indicates a SET register, and CLR otherwise */ - set = r->CRm & 2; - break; - default: - /* Op2[0] being set indicates a SET register, and CLR otherwise */ - set = r->Op2 & 1; - break; - } + u64 mask = kvm_pmu_accessible_counter_mask(vcpu); - if (set) - __vcpu_sys_reg(vcpu, r->reg) |= val; - else - __vcpu_sys_reg(vcpu, r->reg) &= ~val; + __vcpu_assign_sys_reg(vcpu, r->reg, val & mask); + kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); return 0; } @@ -1208,16 +1246,14 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, mask = kvm_pmu_accessible_counter_mask(vcpu); if (p->is_write) { val = p->regval & mask; - if (r->Op2 & 0x1) { + if (r->Op2 & 0x1) /* accessing PMCNTENSET_EL0 */ - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; - kvm_pmu_enable_counter_mask(vcpu, val); - kvm_vcpu_pmu_restore_guest(vcpu); - } else { + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, |=, val); + else /* accessing PMCNTENCLR_EL0 */ - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; - kvm_pmu_disable_counter_mask(vcpu, val); - } + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, ~val); + + kvm_pmu_reprogram_counter_mask(vcpu, val); } else { p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); } @@ -1238,10 +1274,10 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (r->Op2 & 0x1) /* accessing PMINTENSET_EL1 */ - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, |=, val); else /* accessing PMINTENCLR_EL1 */ - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, ~val); } else { p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1); } @@ -1260,10 +1296,10 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { if (r->CRm & 0x2) /* accessing PMOVSSET_EL0 */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, (p->regval & mask)); else /* accessing PMOVSCLR_EL0 */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, ~(p->regval & mask)); } else { p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); } @@ -1294,8 +1330,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!vcpu_mode_priv(vcpu)) return undef_access(vcpu, p, r); - __vcpu_sys_reg(vcpu, PMUSERENR_EL0) = - p->regval & ARMV8_PMU_USERENR_MASK; + __vcpu_assign_sys_reg(vcpu, PMUSERENR_EL0, + (p->regval & ARMV8_PMU_USERENR_MASK)); } else { p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0) & ARMV8_PMU_USERENR_MASK; @@ -1325,8 +1361,9 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, * with the existing KVM behavior. */ if (!kvm_vm_has_ran_once(kvm) && + !vcpu_has_nv(vcpu) && new_n <= kvm_arm_pmu_get_max_counters(kvm)) - kvm->arch.pmcr_n = new_n; + kvm->arch.nr_pmu_counters = new_n; mutex_unlock(&kvm->arch.config_lock); @@ -1345,20 +1382,26 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; - __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); + kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); + return 0; } /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ - trap_bvr, reset_bvr, 0, 0, get_bvr, set_bvr }, \ + trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \ + get_dbg_wb_reg, set_dbg_wb_reg }, \ { SYS_DESC(SYS_DBGBCRn_EL1(n)), \ - trap_bcr, reset_bcr, 0, 0, get_bcr, set_bcr }, \ + trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \ + get_dbg_wb_reg, set_dbg_wb_reg }, \ { SYS_DESC(SYS_DBGWVRn_EL1(n)), \ - trap_wvr, reset_wvr, 0, 0, get_wvr, set_wvr }, \ + trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \ + get_dbg_wb_reg, set_dbg_wb_reg }, \ { SYS_DESC(SYS_DBGWCRn_EL1(n)), \ - trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr } + trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \ + get_dbg_wb_reg, set_dbg_wb_reg } #define PMU_SYS_REG(name) \ SYS_DESC(SYS_##name), .reset = reset_pmu_reg, \ @@ -1368,6 +1411,7 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, #define PMU_PMEVCNTR_EL0(n) \ { PMU_SYS_REG(PMEVCNTRn_EL0(n)), \ .reset = reset_pmevcntr, .get_user = get_pmu_evcntr, \ + .set_user = set_pmu_evcntr, \ .access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), } /* Macro to expand the PMEVTYPERn_EL0 register */ @@ -1412,26 +1456,146 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, switch (reg) { case SYS_CNTP_TVAL_EL0: + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) + tmr = TIMER_HPTIMER; + else + tmr = TIMER_PTIMER; + treg = TIMER_REG_TVAL; + break; + + case SYS_CNTV_TVAL_EL0: + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) + tmr = TIMER_HVTIMER; + else + tmr = TIMER_VTIMER; + treg = TIMER_REG_TVAL; + break; + case SYS_AARCH32_CNTP_TVAL: + case SYS_CNTP_TVAL_EL02: tmr = TIMER_PTIMER; treg = TIMER_REG_TVAL; break; + + case SYS_CNTV_TVAL_EL02: + tmr = TIMER_VTIMER; + treg = TIMER_REG_TVAL; + break; + + case SYS_CNTHP_TVAL_EL2: + tmr = TIMER_HPTIMER; + treg = TIMER_REG_TVAL; + break; + + case SYS_CNTHV_TVAL_EL2: + tmr = TIMER_HVTIMER; + treg = TIMER_REG_TVAL; + break; + case SYS_CNTP_CTL_EL0: + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) + tmr = TIMER_HPTIMER; + else + tmr = TIMER_PTIMER; + treg = TIMER_REG_CTL; + break; + + case SYS_CNTV_CTL_EL0: + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) + tmr = TIMER_HVTIMER; + else + tmr = TIMER_VTIMER; + treg = TIMER_REG_CTL; + break; + case SYS_AARCH32_CNTP_CTL: + case SYS_CNTP_CTL_EL02: tmr = TIMER_PTIMER; treg = TIMER_REG_CTL; break; + + case SYS_CNTV_CTL_EL02: + tmr = TIMER_VTIMER; + treg = TIMER_REG_CTL; + break; + + case SYS_CNTHP_CTL_EL2: + tmr = TIMER_HPTIMER; + treg = TIMER_REG_CTL; + break; + + case SYS_CNTHV_CTL_EL2: + tmr = TIMER_HVTIMER; + treg = TIMER_REG_CTL; + break; + case SYS_CNTP_CVAL_EL0: + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) + tmr = TIMER_HPTIMER; + else + tmr = TIMER_PTIMER; + treg = TIMER_REG_CVAL; + break; + + case SYS_CNTV_CVAL_EL0: + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) + tmr = TIMER_HVTIMER; + else + tmr = TIMER_VTIMER; + treg = TIMER_REG_CVAL; + break; + case SYS_AARCH32_CNTP_CVAL: + case SYS_CNTP_CVAL_EL02: tmr = TIMER_PTIMER; treg = TIMER_REG_CVAL; break; + + case SYS_CNTV_CVAL_EL02: + tmr = TIMER_VTIMER; + treg = TIMER_REG_CVAL; + break; + + case SYS_CNTHP_CVAL_EL2: + tmr = TIMER_HPTIMER; + treg = TIMER_REG_CVAL; + break; + + case SYS_CNTHV_CVAL_EL2: + tmr = TIMER_HVTIMER; + treg = TIMER_REG_CVAL; + break; + case SYS_CNTPCT_EL0: case SYS_CNTPCTSS_EL0: + if (is_hyp_ctxt(vcpu)) + tmr = TIMER_HPTIMER; + else + tmr = TIMER_PTIMER; + treg = TIMER_REG_CNT; + break; + case SYS_AARCH32_CNTPCT: + case SYS_AARCH32_CNTPCTSS: tmr = TIMER_PTIMER; treg = TIMER_REG_CNT; break; + + case SYS_CNTVCT_EL0: + case SYS_CNTVCTSS_EL0: + if (is_hyp_ctxt(vcpu)) + tmr = TIMER_HVTIMER; + else + tmr = TIMER_VTIMER; + treg = TIMER_REG_CNT; + break; + + case SYS_AARCH32_CNTVCT: + case SYS_AARCH32_CNTVCTSS: + tmr = TIMER_VTIMER; + treg = TIMER_REG_CNT; + break; + default: print_sys_reg_msg(p, "%s", "Unhandled trapped timer register"); return undef_access(vcpu, p, r); @@ -1445,6 +1609,49 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } +static int arch_timer_set_user(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) +{ + switch (reg_to_encoding(rd)) { + case SYS_CNTV_CTL_EL0: + case SYS_CNTP_CTL_EL0: + case SYS_CNTHV_CTL_EL2: + case SYS_CNTHP_CTL_EL2: + val &= ~ARCH_TIMER_CTRL_IT_STAT; + break; + case SYS_CNTVCT_EL0: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) + timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read() - val); + return 0; + case SYS_CNTPCT_EL0: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) + timer_set_offset(vcpu_ptimer(vcpu), kvm_phys_timer_read() - val); + return 0; + } + + __vcpu_assign_sys_reg(vcpu, rd->reg, val); + return 0; +} + +static int arch_timer_get_user(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 *val) +{ + switch (reg_to_encoding(rd)) { + case SYS_CNTVCT_EL0: + *val = kvm_phys_timer_read() - timer_get_offset(vcpu_vtimer(vcpu)); + break; + case SYS_CNTPCT_EL0: + *val = kvm_phys_timer_read() - timer_get_offset(vcpu_ptimer(vcpu)); + break; + default: + *val = __vcpu_sys_reg(vcpu, rd->reg); + } + + return 0; +} + static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, s64 new, s64 cur) { @@ -1550,6 +1757,7 @@ static u8 pmuver_to_perfmon(u8 pmuver) } static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val); +static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val); static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val); /* Read a sanitised cpufeature ID register by sys_reg_desc */ @@ -1572,50 +1780,51 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val = sanitise_id_aa64pfr0_el1(vcpu, val); break; case SYS_ID_AA64PFR1_EL1: - if (!kvm_has_mte(vcpu->kvm)) - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); - - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); + val = sanitise_id_aa64pfr1_el1(vcpu, val); break; case SYS_ID_AA64PFR2_EL1: - /* We only expose FPMR */ - val &= ID_AA64PFR2_EL1_FPMR; + val &= ID_AA64PFR2_EL1_FPMR | + (kvm_has_mte(vcpu->kvm) ? + ID_AA64PFR2_EL1_MTEFAR | ID_AA64PFR2_EL1_MTESTOREONLY : + 0); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); + val &= ~(ID_AA64ISAR1_EL1_APA | + ID_AA64ISAR1_EL1_API | + ID_AA64ISAR1_EL1_GPA | + ID_AA64ISAR1_EL1_GPI); break; case SYS_ID_AA64ISAR2_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); - if (!cpus_have_final_cap(ARM64_HAS_WFXT)) - val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); + val &= ~(ID_AA64ISAR2_EL1_APA3 | + ID_AA64ISAR2_EL1_GPA3); + if (!cpus_have_final_cap(ARM64_HAS_WFXT) || + has_broken_cntvoff()) + val &= ~ID_AA64ISAR2_EL1_WFxT; + break; + case SYS_ID_AA64ISAR3_EL1: + val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_LSFE | + ID_AA64ISAR3_EL1_FAMINMAX; break; case SYS_ID_AA64MMFR2_EL1: val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK; + val &= ~ID_AA64MMFR2_EL1_NV; break; case SYS_ID_AA64MMFR3_EL1: - val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE | - ID_AA64MMFR3_EL1_S1PIE; + val &= ID_AA64MMFR3_EL1_TCRX | + ID_AA64MMFR3_EL1_SCTLRX | + ID_AA64MMFR3_EL1_S1POE | + ID_AA64MMFR3_EL1_S1PIE; break; case SYS_ID_MMFR4_EL1: - val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX); + val &= ~ID_MMFR4_EL1_CCIDX; break; } + if (vcpu_has_nv(vcpu)) + val = limit_nv_id_reg(vcpu->kvm, id, val); + return val; } @@ -1642,15 +1851,24 @@ static bool is_feature_id_reg(u32 encoding) * Return true if the register's (Op0, Op1, CRn, CRm, Op2) is * (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8, which is the range of ID * registers KVM maintains on a per-VM basis. + * + * Additionally, the implementation ID registers and CTR_EL0 are handled as + * per-VM registers. */ static inline bool is_vm_ftr_id_reg(u32 id) { - if (id == SYS_CTR_EL0) + switch (id) { + case SYS_CTR_EL0: + case SYS_MIDR_EL1: + case SYS_REVIDR_EL1: + case SYS_AIDR_EL1: return true; + default: + return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 && + sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 && + sys_reg_CRm(id) < 8); - return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 && - sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 && - sys_reg_CRm(id) < 8); + } } static inline bool is_vcpu_ftr_id_reg(u32 id) @@ -1764,7 +1982,7 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val) val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, CSV3, IMP); } - if (kvm_vgic_global_state.type == VGIC_V3) { + if (vgic_is_v3(vcpu->kvm)) { val &= ~ID_AA64PFR0_EL1_GIC_MASK; val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP); } @@ -1781,15 +1999,30 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val) return val; } -#define ID_REG_LIMIT_FIELD_ENUM(val, reg, field, limit) \ -({ \ - u64 __f_val = FIELD_GET(reg##_##field##_MASK, val); \ - (val) &= ~reg##_##field##_MASK; \ - (val) |= FIELD_PREP(reg##_##field##_MASK, \ - min(__f_val, \ - (u64)SYS_FIELD_VALUE(reg, field, limit))); \ - (val); \ -}) +static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val) +{ + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + if (!kvm_has_mte(vcpu->kvm)) { + val &= ~ID_AA64PFR1_EL1_MTE; + val &= ~ID_AA64PFR1_EL1_MTE_frac; + } + + if (!(cpus_have_final_cap(ARM64_HAS_RASV1P1_EXTN) && + SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0) == ID_AA64PFR0_EL1_RAS_IMP)) + val &= ~ID_AA64PFR1_EL1_RAS_frac; + + val &= ~ID_AA64PFR1_EL1_SME; + val &= ~ID_AA64PFR1_EL1_RNDR_trap; + val &= ~ID_AA64PFR1_EL1_NMI; + val &= ~ID_AA64PFR1_EL1_GCS; + val &= ~ID_AA64PFR1_EL1_THE; + val &= ~ID_AA64PFR1_EL1_MTEX; + val &= ~ID_AA64PFR1_EL1_PFAR; + val &= ~ID_AA64PFR1_EL1_MPAM_frac; + + return val; +} static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) { @@ -1806,9 +2039,32 @@ static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) /* Hide SPE from guests */ val &= ~ID_AA64DFR0_EL1_PMSVer_MASK; + /* Hide BRBE from guests */ + val &= ~ID_AA64DFR0_EL1_BRBE_MASK; + return val; } +/* + * Older versions of KVM erroneously claim support for FEAT_DoubleLock with + * NV-enabled VMs on unsupporting hardware. Silently ignore the incorrect + * value if it is consistent with the bug. + */ +static bool ignore_feat_doublelock(struct kvm_vcpu *vcpu, u64 val) +{ + u8 host, user; + + if (!vcpu_has_nv(vcpu)) + return false; + + host = SYS_FIELD_GET(ID_AA64DFR0_EL1, DoubleLock, + read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1)); + user = SYS_FIELD_GET(ID_AA64DFR0_EL1, DoubleLock, val); + + return host == ID_AA64DFR0_EL1_DoubleLock_NI && + user == ID_AA64DFR0_EL1_DoubleLock_IMP; +} + static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 val) @@ -1840,18 +2096,25 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, if (debugver < ID_AA64DFR0_EL1_DebugVer_IMP) return -EINVAL; + if (ignore_feat_doublelock(vcpu, val)) { + val &= ~ID_AA64DFR0_EL1_DoubleLock; + val |= SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DoubleLock, NI); + } + return set_id_reg(vcpu, rd, val); } static u64 read_sanitised_id_dfr0_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - u8 perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit()); + u8 perfmon; u64 val = read_sanitised_ftr_reg(SYS_ID_DFR0_EL1); val &= ~ID_DFR0_EL1_PerfMon_MASK; - if (kvm_vcpu_has_pmu(vcpu)) + if (kvm_vcpu_has_pmu(vcpu)) { + perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit()); val |= SYS_FIELD_PREP(ID_DFR0_EL1, PerfMon, perfmon); + } val = ID_REG_LIMIT_FIELD_ENUM(val, ID_DFR0_EL1, CopDbg, Debugv8p8); @@ -1905,6 +2168,20 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, if ((hw_val & mpam_mask) == (user_val & mpam_mask)) user_val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + /* Fail the guest's request to disable the AA64 ISA at EL{0,1,2} */ + if (!FIELD_GET(ID_AA64PFR0_EL1_EL0, user_val) || + !FIELD_GET(ID_AA64PFR0_EL1_EL1, user_val) || + (vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val))) + return -EINVAL; + + /* + * If we are running on a GICv5 host and support FEAT_GCIE_LEGACY, then + * we support GICv3. Fail attempts to do anything but set that to IMP. + */ + if (vgic_is_v3_compat(vcpu->kvm) && + FIELD_GET(ID_AA64PFR0_EL1_GIC_MASK, user_val) != ID_AA64PFR0_EL1_GIC_IMP) + return -EINVAL; + return set_id_reg(vcpu, rd, user_val); } @@ -1913,11 +2190,78 @@ static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu, { u64 hw_val = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); u64 mpam_mask = ID_AA64PFR1_EL1_MPAM_frac_MASK; + u8 mte = SYS_FIELD_GET(ID_AA64PFR1_EL1, MTE, hw_val); + u8 user_mte_frac = SYS_FIELD_GET(ID_AA64PFR1_EL1, MTE_frac, user_val); + u8 hw_mte_frac = SYS_FIELD_GET(ID_AA64PFR1_EL1, MTE_frac, hw_val); /* See set_id_aa64pfr0_el1 for comment about MPAM */ if ((hw_val & mpam_mask) == (user_val & mpam_mask)) user_val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + /* + * Previously MTE_frac was hidden from guest. However, if the + * hardware supports MTE2 but not MTE_ASYM_FAULT then a value + * of 0 for this field indicates that the hardware supports + * MTE_ASYNC. Whereas, 0xf indicates MTE_ASYNC is not supported. + * + * As KVM must accept values from KVM provided by user-space, + * when ID_AA64PFR1_EL1.MTE is 2 allow user-space to set + * ID_AA64PFR1_EL1.MTE_frac to 0. However, ignore it to avoid + * incorrectly claiming hardware support for MTE_ASYNC in the + * guest. + */ + + if (mte == ID_AA64PFR1_EL1_MTE_MTE2 && + hw_mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI && + user_mte_frac == ID_AA64PFR1_EL1_MTE_frac_ASYNC) { + user_val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK; + user_val |= hw_val & ID_AA64PFR1_EL1_MTE_frac_MASK; + } + + return set_id_reg(vcpu, rd, user_val); +} + +/* + * Allow userspace to de-feature a stage-2 translation granule but prevent it + * from claiming the impossible. + */ +#define tgran2_val_allowed(tg, safe, user) \ +({ \ + u8 __s = SYS_FIELD_GET(ID_AA64MMFR0_EL1, tg, safe); \ + u8 __u = SYS_FIELD_GET(ID_AA64MMFR0_EL1, tg, user); \ + \ + __s == __u || __u == ID_AA64MMFR0_EL1_##tg##_NI; \ +}) + +static int set_id_aa64mmfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, u64 user_val) +{ + u64 sanitized_val = kvm_read_sanitised_id_reg(vcpu, rd); + + if (!vcpu_has_nv(vcpu)) + return set_id_reg(vcpu, rd, user_val); + + if (!tgran2_val_allowed(TGRAN4_2, sanitized_val, user_val) || + !tgran2_val_allowed(TGRAN16_2, sanitized_val, user_val) || + !tgran2_val_allowed(TGRAN64_2, sanitized_val, user_val)) + return -EINVAL; + + return set_id_reg(vcpu, rd, user_val); +} + +static int set_id_aa64mmfr2_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, u64 user_val) +{ + u64 hw_val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); + u64 nv_mask = ID_AA64MMFR2_EL1_NV_MASK; + + /* + * We made the mistake to expose the now deprecated NV field, + * so allow userspace to write it, but silently ignore it. + */ + if ((hw_val & nv_mask) == (user_val & nv_mask)) + user_val &= ~nv_mask; + return set_id_reg(vcpu, rd, user_val); } @@ -2111,7 +2455,7 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) if (kvm_has_mte(vcpu->kvm)) clidr |= 2ULL << CLIDR_TTYPE_SHIFT(loc); - __vcpu_sys_reg(vcpu, r->reg) = clidr; + __vcpu_assign_sys_reg(vcpu, r->reg, clidr); return __vcpu_sys_reg(vcpu, r->reg); } @@ -2125,7 +2469,7 @@ static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc)) return -EINVAL; - __vcpu_sys_reg(vcpu, rd->reg) = val; + __vcpu_assign_sys_reg(vcpu, rd->reg, val); return 0; } @@ -2210,27 +2554,34 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, "trap of EL2 register redirected to EL1"); } -#define EL2_REG(name, acc, rst, v) { \ - SYS_DESC(SYS_##name), \ - .access = acc, \ - .reset = rst, \ - .reg = name, \ - .visibility = el2_visibility, \ - .val = v, \ -} - -#define EL2_REG_FILTERED(name, acc, rst, v, filter) { \ +#define SYS_REG_USER_FILTER(name, acc, rst, v, gu, su, filter) { \ SYS_DESC(SYS_##name), \ .access = acc, \ .reset = rst, \ .reg = name, \ + .get_user = gu, \ + .set_user = su, \ .visibility = filter, \ .val = v, \ } +#define EL2_REG_FILTERED(name, acc, rst, v, filter) \ + SYS_REG_USER_FILTER(name, acc, rst, v, NULL, NULL, filter) + +#define EL2_REG(name, acc, rst, v) \ + EL2_REG_FILTERED(name, acc, rst, v, el2_visibility) + #define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v) +#define EL2_REG_VNCR_FILT(name, vis) \ + EL2_REG_FILTERED(name, bad_vncr_trap, reset_val, 0, vis) +#define EL2_REG_VNCR_GICv3(name) \ + EL2_REG_VNCR_FILT(name, hidden_visibility) #define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v) +#define TIMER_REG(name, vis) \ + SYS_REG_USER_FILTER(name, access_arch_timer, reset_val, 0, \ + arch_timer_get_user, arch_timer_set_user, vis) + /* * Since reset() callback and field val are not used for idregs, they will be * used for specific purposes for idregs. @@ -2242,44 +2593,44 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, * from userspace. */ -#define ID_DESC(name) \ - SYS_DESC(SYS_##name), \ +#define ID_DESC_DEFAULT_CALLBACKS \ .access = access_id_reg, \ - .get_user = get_id_reg \ - -/* sys_reg_desc initialiser for known cpufeature ID registers */ -#define ID_SANITISED(name) { \ - ID_DESC(name), \ + .get_user = get_id_reg, \ .set_user = set_id_reg, \ .visibility = id_visibility, \ - .reset = kvm_read_sanitised_id_reg, \ - .val = 0, \ -} + .reset = kvm_read_sanitised_id_reg + +#define ID_DESC(name) \ + SYS_DESC(SYS_##name), \ + ID_DESC_DEFAULT_CALLBACKS /* sys_reg_desc initialiser for known cpufeature ID registers */ -#define AA32_ID_SANITISED(name) { \ +#define ID_SANITISED(name) { \ ID_DESC(name), \ - .set_user = set_id_reg, \ - .visibility = aa32_id_visibility, \ - .reset = kvm_read_sanitised_id_reg, \ .val = 0, \ } /* sys_reg_desc initialiser for writable ID registers */ #define ID_WRITABLE(name, mask) { \ ID_DESC(name), \ - .set_user = set_id_reg, \ - .visibility = id_visibility, \ - .reset = kvm_read_sanitised_id_reg, \ .val = mask, \ } +/* + * 32bit ID regs are fully writable when the guest is 32bit + * capable. Nothing in the KVM code should rely on 32bit features + * anyway, only 64bit, so let the VMM do its worse. + */ +#define AA32_ID_WRITABLE(name) { \ + ID_DESC(name), \ + .visibility = aa32_id_visibility, \ + .val = GENMASK(31, 0), \ +} + /* sys_reg_desc initialiser for cpufeature ID registers that need filtering */ #define ID_FILTERED(sysreg, name, mask) { \ ID_DESC(sysreg), \ .set_user = set_##name, \ - .visibility = id_visibility, \ - .reset = kvm_read_sanitised_id_reg, \ .val = (mask), \ } @@ -2289,12 +2640,10 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, * (1 <= crm < 8, 0 <= Op2 < 8). */ #define ID_UNALLOCATED(crm, op2) { \ + .name = "S3_0_0_" #crm "_" #op2, \ Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \ - .access = access_id_reg, \ - .get_user = get_id_reg, \ - .set_user = set_id_reg, \ + ID_DESC_DEFAULT_CALLBACKS, \ .visibility = raz_visibility, \ - .reset = kvm_read_sanitised_id_reg, \ .val = 0, \ } @@ -2305,9 +2654,7 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, */ #define ID_HIDDEN(name) { \ ID_DESC(name), \ - .set_user = set_id_reg, \ .visibility = raz_visibility, \ - .reset = kvm_read_sanitised_id_reg, \ .val = 0, \ } @@ -2316,7 +2663,7 @@ static bool access_sp_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, SP_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, SP_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, SP_EL1); @@ -2340,7 +2687,7 @@ static bool access_spsr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, SPSR_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, SPSR_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, SPSR_EL1); @@ -2352,7 +2699,7 @@ static bool access_cntkctl_el12(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, CNTKCTL_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, CNTKCTL_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, CNTKCTL_EL1); @@ -2366,7 +2713,9 @@ static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1)) val |= HCR_E2H; - return __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); + + return __vcpu_sys_reg(vcpu, r->reg); } static unsigned int __el2_visibility(const struct kvm_vcpu *vcpu, @@ -2383,6 +2732,31 @@ static unsigned int sve_el2_visibility(const struct kvm_vcpu *vcpu, return __el2_visibility(vcpu, rd, sve_visibility); } +static unsigned int vncr_el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (el2_visibility(vcpu, rd) == 0 && + kvm_has_feat(vcpu->kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY)) + return 0; + + return REG_HIDDEN; +} + +static unsigned int sctlr2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (kvm_has_sctlr2(vcpu->kvm)) + return 0; + + return REG_HIDDEN; +} + +static unsigned int sctlr2_el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + return __el2_visibility(vcpu, rd, sctlr2_visibility); +} + static bool access_zcr_el2(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -2391,17 +2765,65 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu, if (guest_hyp_sve_traps_enabled(vcpu)) { kvm_inject_nested_sve_trap(vcpu); - return true; + return false; } if (!p->is_write) { - p->regval = vcpu_read_sys_reg(vcpu, ZCR_EL2); + p->regval = __vcpu_sys_reg(vcpu, ZCR_EL2); return true; } vq = SYS_FIELD_GET(ZCR_ELx, LEN, p->regval) + 1; vq = min(vq, vcpu_sve_max_vq(vcpu)); - vcpu_write_sys_reg(vcpu, vq - 1, ZCR_EL2); + __vcpu_assign_sys_reg(vcpu, ZCR_EL2, vq - 1); + return true; +} + +static bool access_gic_vtr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = kvm_get_guest_vtr_el2(); + + return true; +} + +static bool access_gic_misr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = vgic_v3_get_misr(vcpu); + + return true; +} + +static bool access_gic_eisr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = vgic_v3_get_eisr(vcpu); + + return true; +} + +static bool access_gic_elrsr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = vgic_v3_get_elrsr(vcpu); + return true; } @@ -2435,6 +2857,26 @@ static unsigned int tcr2_el2_visibility(const struct kvm_vcpu *vcpu, return __el2_visibility(vcpu, rd, tcr2_visibility); } +static unsigned int fgt2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (el2_visibility(vcpu, rd) == 0 && + kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, FGT, FGT2)) + return 0; + + return REG_HIDDEN; +} + +static unsigned int fgt_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (el2_visibility(vcpu, rd) == 0 && + kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, FGT, IMP)) + return 0; + + return REG_HIDDEN; +} + static unsigned int s1pie_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { @@ -2450,6 +2892,202 @@ static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu, return __el2_visibility(vcpu, rd, s1pie_visibility); } +static unsigned int cnthv_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (vcpu_has_nv(vcpu) && + !vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2_E2H0)) + return 0; + + return REG_HIDDEN; +} + +static bool access_mdcr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 hpmn, val, old = __vcpu_sys_reg(vcpu, MDCR_EL2); + + if (!p->is_write) { + p->regval = old; + return true; + } + + val = p->regval; + hpmn = FIELD_GET(MDCR_EL2_HPMN, val); + + /* + * If HPMN is out of bounds, limit it to what we actually + * support. This matches the UNKNOWN definition of the field + * in that case, and keeps the emulation simple. Sort of. + */ + if (hpmn > vcpu->kvm->arch.nr_pmu_counters) { + hpmn = vcpu->kvm->arch.nr_pmu_counters; + u64p_replace_bits(&val, hpmn, MDCR_EL2_HPMN); + } + + __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val); + + /* + * Request a reload of the PMU to enable/disable the counters + * affected by HPME. + */ + if ((old ^ val) & MDCR_EL2_HPME) + kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); + + return true; +} + +static bool access_ras(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct kvm *kvm = vcpu->kvm; + + switch(reg_to_encoding(r)) { + case SYS_ERXPFGCDN_EL1: + case SYS_ERXPFGCTL_EL1: + case SYS_ERXPFGF_EL1: + case SYS_ERXMISC2_EL1: + case SYS_ERXMISC3_EL1: + if (!(kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, V1P1) || + (kvm_has_feat_enum(kvm, ID_AA64PFR0_EL1, RAS, IMP) && + kvm_has_feat(kvm, ID_AA64PFR1_EL1, RAS_frac, RASv1p1)))) { + kvm_inject_undefined(vcpu); + return false; + } + break; + default: + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) { + kvm_inject_undefined(vcpu); + return false; + } + } + + return trap_raz_wi(vcpu, p, r); +} + +/* + * For historical (ahem ABI) reasons, KVM treated MIDR_EL1, REVIDR_EL1, and + * AIDR_EL1 as "invariant" registers, meaning userspace cannot change them. + * The values made visible to userspace were the register values of the boot + * CPU. + * + * At the same time, reads from these registers at EL1 previously were not + * trapped, allowing the guest to read the actual hardware value. On big-little + * machines, this means the VM can see different values depending on where a + * given vCPU got scheduled. + * + * These registers are now trapped as collateral damage from SME, and what + * follows attempts to give a user / guest view consistent with the existing + * ABI. + */ +static bool access_imp_id_reg(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + /* + * Return the VM-scoped implementation ID register values if userspace + * has made them writable. + */ + if (test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &vcpu->kvm->arch.flags)) + return access_id_reg(vcpu, p, r); + + /* + * Otherwise, fall back to the old behavior of returning the value of + * the current CPU. + */ + switch (reg_to_encoding(r)) { + case SYS_REVIDR_EL1: + p->regval = read_sysreg(revidr_el1); + break; + case SYS_AIDR_EL1: + p->regval = read_sysreg(aidr_el1); + break; + default: + WARN_ON_ONCE(1); + } + + return true; +} + +static u64 __ro_after_init boot_cpu_midr_val; +static u64 __ro_after_init boot_cpu_revidr_val; +static u64 __ro_after_init boot_cpu_aidr_val; + +static void init_imp_id_regs(void) +{ + boot_cpu_midr_val = read_sysreg(midr_el1); + boot_cpu_revidr_val = read_sysreg(revidr_el1); + boot_cpu_aidr_val = read_sysreg(aidr_el1); +} + +static u64 reset_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + switch (reg_to_encoding(r)) { + case SYS_MIDR_EL1: + return boot_cpu_midr_val; + case SYS_REVIDR_EL1: + return boot_cpu_revidr_val; + case SYS_AIDR_EL1: + return boot_cpu_aidr_val; + default: + KVM_BUG_ON(1, vcpu->kvm); + return 0; + } +} + +static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) +{ + struct kvm *kvm = vcpu->kvm; + u64 expected; + + guard(mutex)(&kvm->arch.config_lock); + + expected = read_id_reg(vcpu, r); + if (expected == val) + return 0; + + if (!test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags)) + return -EINVAL; + + /* + * Once the VM has started the ID registers are immutable. Reject the + * write if userspace tries to change it. + */ + if (kvm_vm_has_ran_once(kvm)) + return -EBUSY; + + /* + * Any value is allowed for the implementation ID registers so long as + * it is within the writable mask. + */ + if ((val & r->val) != val) + return -EINVAL; + + kvm_set_vm_id_reg(kvm, reg_to_encoding(r), val); + return 0; +} + +#define IMPLEMENTATION_ID(reg, mask) { \ + SYS_DESC(SYS_##reg), \ + .access = access_imp_id_reg, \ + .get_user = get_id_reg, \ + .set_user = set_imp_id_reg, \ + .reset = reset_imp_id_reg, \ + .val = mask, \ + } + +static u64 reset_mdcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + __vcpu_assign_sys_reg(vcpu, r->reg, vcpu->kvm->arch.nr_pmu_counters); + return vcpu->kvm->arch.nr_pmu_counters; +} + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -2498,7 +3136,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DBGVCR32_EL2), undef_access, reset_val, DBGVCR32_EL2, 0 }, + IMPLEMENTATION_ID(MIDR_EL1, GENMASK_ULL(31, 0)), { SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 }, + IMPLEMENTATION_ID(REVIDR_EL1, GENMASK_ULL(63, 0)), /* * ID regs: all ID_SANITISED() entries here must have corresponding @@ -2507,40 +3147,39 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* AArch64 mappings of the AArch32 ID registers */ /* CRm=1 */ - AA32_ID_SANITISED(ID_PFR0_EL1), - AA32_ID_SANITISED(ID_PFR1_EL1), + AA32_ID_WRITABLE(ID_PFR0_EL1), + AA32_ID_WRITABLE(ID_PFR1_EL1), { SYS_DESC(SYS_ID_DFR0_EL1), .access = access_id_reg, .get_user = get_id_reg, .set_user = set_id_dfr0_el1, .visibility = aa32_id_visibility, .reset = read_sanitised_id_dfr0_el1, - .val = ID_DFR0_EL1_PerfMon_MASK | - ID_DFR0_EL1_CopDbg_MASK, }, + .val = GENMASK(31, 0) }, ID_HIDDEN(ID_AFR0_EL1), - AA32_ID_SANITISED(ID_MMFR0_EL1), - AA32_ID_SANITISED(ID_MMFR1_EL1), - AA32_ID_SANITISED(ID_MMFR2_EL1), - AA32_ID_SANITISED(ID_MMFR3_EL1), + AA32_ID_WRITABLE(ID_MMFR0_EL1), + AA32_ID_WRITABLE(ID_MMFR1_EL1), + AA32_ID_WRITABLE(ID_MMFR2_EL1), + AA32_ID_WRITABLE(ID_MMFR3_EL1), /* CRm=2 */ - AA32_ID_SANITISED(ID_ISAR0_EL1), - AA32_ID_SANITISED(ID_ISAR1_EL1), - AA32_ID_SANITISED(ID_ISAR2_EL1), - AA32_ID_SANITISED(ID_ISAR3_EL1), - AA32_ID_SANITISED(ID_ISAR4_EL1), - AA32_ID_SANITISED(ID_ISAR5_EL1), - AA32_ID_SANITISED(ID_MMFR4_EL1), - AA32_ID_SANITISED(ID_ISAR6_EL1), + AA32_ID_WRITABLE(ID_ISAR0_EL1), + AA32_ID_WRITABLE(ID_ISAR1_EL1), + AA32_ID_WRITABLE(ID_ISAR2_EL1), + AA32_ID_WRITABLE(ID_ISAR3_EL1), + AA32_ID_WRITABLE(ID_ISAR4_EL1), + AA32_ID_WRITABLE(ID_ISAR5_EL1), + AA32_ID_WRITABLE(ID_MMFR4_EL1), + AA32_ID_WRITABLE(ID_ISAR6_EL1), /* CRm=3 */ - AA32_ID_SANITISED(MVFR0_EL1), - AA32_ID_SANITISED(MVFR1_EL1), - AA32_ID_SANITISED(MVFR2_EL1), + AA32_ID_WRITABLE(MVFR0_EL1), + AA32_ID_WRITABLE(MVFR1_EL1), + AA32_ID_WRITABLE(MVFR2_EL1), ID_UNALLOCATED(3,3), - AA32_ID_SANITISED(ID_PFR2_EL1), + AA32_ID_WRITABLE(ID_PFR2_EL1), ID_HIDDEN(ID_DFR1_EL1), - AA32_ID_SANITISED(ID_MMFR5_EL1), + AA32_ID_WRITABLE(ID_MMFR5_EL1), ID_UNALLOCATED(3,7), /* AArch64 ID registers */ @@ -2549,12 +3188,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { ~(ID_AA64PFR0_EL1_AMU | ID_AA64PFR0_EL1_MPAM | ID_AA64PFR0_EL1_SVE | - ID_AA64PFR0_EL1_RAS | ID_AA64PFR0_EL1_AdvSIMD | ID_AA64PFR0_EL1_FP)), ID_FILTERED(ID_AA64PFR1_EL1, id_aa64pfr1_el1, ~(ID_AA64PFR1_EL1_PFAR | - ID_AA64PFR1_EL1_DF2 | ID_AA64PFR1_EL1_MTEX | ID_AA64PFR1_EL1_THE | ID_AA64PFR1_EL1_GCS | @@ -2564,9 +3201,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64PFR1_EL1_SME | ID_AA64PFR1_EL1_RES0 | ID_AA64PFR1_EL1_MPAM_frac | - ID_AA64PFR1_EL1_RAS_frac | ID_AA64PFR1_EL1_MTE)), - ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR), + ID_WRITABLE(ID_AA64PFR2_EL1, + ID_AA64PFR2_EL1_FPMR | + ID_AA64PFR2_EL1_MTEFAR | + ID_AA64PFR2_EL1_MTESTOREONLY), ID_UNALLOCATED(4,3), ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0), ID_HIDDEN(ID_AA64SMFR0_EL1), @@ -2608,33 +3247,34 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_WRITABLE(ID_AA64ISAR2_EL1, ~(ID_AA64ISAR2_EL1_RES0 | ID_AA64ISAR2_EL1_APA3 | ID_AA64ISAR2_EL1_GPA3)), - ID_UNALLOCATED(6,3), + ID_WRITABLE(ID_AA64ISAR3_EL1, (ID_AA64ISAR3_EL1_FPRCVT | + ID_AA64ISAR3_EL1_LSFE | + ID_AA64ISAR3_EL1_FAMINMAX)), ID_UNALLOCATED(6,4), ID_UNALLOCATED(6,5), ID_UNALLOCATED(6,6), ID_UNALLOCATED(6,7), /* CRm=7 */ - ID_WRITABLE(ID_AA64MMFR0_EL1, ~(ID_AA64MMFR0_EL1_RES0 | - ID_AA64MMFR0_EL1_TGRAN4_2 | - ID_AA64MMFR0_EL1_TGRAN64_2 | - ID_AA64MMFR0_EL1_TGRAN16_2)), + ID_FILTERED(ID_AA64MMFR0_EL1, id_aa64mmfr0_el1, + ~(ID_AA64MMFR0_EL1_RES0 | + ID_AA64MMFR0_EL1_ASIDBITS)), ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 | - ID_AA64MMFR1_EL1_HCX | - ID_AA64MMFR1_EL1_TWED | ID_AA64MMFR1_EL1_XNX | ID_AA64MMFR1_EL1_VH | ID_AA64MMFR1_EL1_VMIDBits)), - ID_WRITABLE(ID_AA64MMFR2_EL1, ~(ID_AA64MMFR2_EL1_RES0 | + ID_FILTERED(ID_AA64MMFR2_EL1, + id_aa64mmfr2_el1, ~(ID_AA64MMFR2_EL1_RES0 | ID_AA64MMFR2_EL1_EVT | ID_AA64MMFR2_EL1_FWB | ID_AA64MMFR2_EL1_IDS | ID_AA64MMFR2_EL1_NV | ID_AA64MMFR2_EL1_CCIDX)), ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX | + ID_AA64MMFR3_EL1_SCTLRX | ID_AA64MMFR3_EL1_S1PIE | ID_AA64MMFR3_EL1_S1POE)), - ID_SANITISED(ID_AA64MMFR4_EL1), + ID_WRITABLE(ID_AA64MMFR4_EL1, ID_AA64MMFR4_EL1_NV_frac), ID_UNALLOCATED(7,5), ID_UNALLOCATED(7,6), ID_UNALLOCATED(7,7), @@ -2642,6 +3282,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, + { SYS_DESC(SYS_SCTLR2_EL1), access_vm_reg, reset_val, SCTLR2_EL1, 0, + .visibility = sctlr2_visibility }, MTE_REG(RGSR_EL1), MTE_REG(GCR_EL1), @@ -2671,14 +3313,19 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 }, { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 }, - { SYS_DESC(SYS_ERRIDR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERRSELR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXFR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXCTLR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXSTATUS_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXADDR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, + { SYS_DESC(SYS_ERRIDR_EL1), access_ras }, + { SYS_DESC(SYS_ERRSELR_EL1), access_ras }, + { SYS_DESC(SYS_ERXFR_EL1), access_ras }, + { SYS_DESC(SYS_ERXCTLR_EL1), access_ras }, + { SYS_DESC(SYS_ERXSTATUS_EL1), access_ras }, + { SYS_DESC(SYS_ERXADDR_EL1), access_ras }, + { SYS_DESC(SYS_ERXPFGF_EL1), access_ras }, + { SYS_DESC(SYS_ERXPFGCTL_EL1), access_ras }, + { SYS_DESC(SYS_ERXPFGCDN_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC0_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC1_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC2_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC3_EL1), access_ras }, MTE_REG(TFSR_EL1), MTE_REG(TFSRE0_EL1), @@ -2697,6 +3344,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMBLIMITR_EL1), undef_access }, { SYS_DESC(SYS_PMBPTR_EL1), undef_access }, { SYS_DESC(SYS_PMBSR_EL1), undef_access }, + { SYS_DESC(SYS_PMSDSFR_EL1), undef_access }, /* PMBIDR_EL1 is not trapped */ { PMU_SYS_REG(PMINTENSET_EL1), @@ -2740,7 +3388,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access }, { SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access }, { SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access }, - { SYS_DESC(SYS_ICC_DIR_EL1), undef_access }, + { SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir }, { SYS_DESC(SYS_ICC_RPR_EL1), undef_access }, { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, { SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi }, @@ -2768,6 +3416,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { .set_user = set_clidr, .val = ~CLIDR_EL1_RES0 }, { SYS_DESC(SYS_CCSIDR2_EL1), undef_access }, { SYS_DESC(SYS_SMIDR_EL1), undef_access }, + IMPLEMENTATION_ID(AIDR_EL1, GENMASK_ULL(63, 0)), { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, ID_FILTERED(CTR_EL0, ctr_el0, CTR_EL0_DIC_MASK | @@ -2804,7 +3453,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { .access = access_pmceid, .reset = NULL }, { PMU_SYS_REG(PMCCNTR_EL0), .access = access_pmu_evcntr, .reset = reset_unknown, - .reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr}, + .reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr, + .set_user = set_pmu_evcntr }, { PMU_SYS_REG(PMXEVTYPER_EL0), .access = access_pmu_evtyper, .reset = NULL }, { PMU_SYS_REG(PMXEVCNTR_EL0), @@ -2900,11 +3550,19 @@ static const struct sys_reg_desc sys_reg_descs[] = { AMU_AMEVTYPER1_EL0(14), AMU_AMEVTYPER1_EL0(15), - { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTPCT_EL0), .access = access_arch_timer, + .get_user = arch_timer_get_user, .set_user = arch_timer_set_user }, + { SYS_DESC(SYS_CNTVCT_EL0), .access = access_arch_timer, + .get_user = arch_timer_get_user, .set_user = arch_timer_set_user }, { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, + TIMER_REG(CNTP_CTL_EL0, NULL), + TIMER_REG(CNTP_CVAL_EL0, NULL), + + { SYS_DESC(SYS_CNTV_TVAL_EL0), access_arch_timer }, + TIMER_REG(CNTV_CTL_EL0, NULL), + TIMER_REG(CNTV_CVAL_EL0, NULL), /* PMEVCNTRn_EL0 */ PMU_PMEVCNTR_EL0(0), @@ -2981,12 +3639,14 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0), EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1), EL2_REG(ACTLR_EL2, access_rw, reset_val, 0), + EL2_REG_FILTERED(SCTLR2_EL2, access_vm_reg, reset_val, 0, + sctlr2_el2_visibility), EL2_REG_VNCR(HCR_EL2, reset_hcr, 0), - EL2_REG(MDCR_EL2, access_rw, reset_val, 0), + EL2_REG(MDCR_EL2, access_mdcr, reset_mdcr, 0), EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1), EL2_REG_VNCR(HSTR_EL2, reset_val, 0), - EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0), - EL2_REG_VNCR(HFGWTR_EL2, reset_val, 0), + EL2_REG_VNCR_FILT(HFGRTR_EL2, fgt_visibility), + EL2_REG_VNCR_FILT(HFGWTR_EL2, fgt_visibility), EL2_REG_VNCR(HFGITR_EL2, reset_val, 0), EL2_REG_VNCR(HACR_EL2, reset_val, 0), @@ -3002,11 +3662,18 @@ static const struct sys_reg_desc sys_reg_descs[] = { tcr2_el2_visibility), EL2_REG_VNCR(VTTBR_EL2, reset_val, 0), EL2_REG_VNCR(VTCR_EL2, reset_val, 0), + EL2_REG_FILTERED(VNCR_EL2, bad_vncr_trap, reset_val, 0, + vncr_el2_visibility), { SYS_DESC(SYS_DACR32_EL2), undef_access, reset_unknown, DACR32_EL2 }, - EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0), - EL2_REG_VNCR(HDFGWTR_EL2, reset_val, 0), - EL2_REG_VNCR(HAFGRTR_EL2, reset_val, 0), + EL2_REG_VNCR_FILT(HDFGRTR2_EL2, fgt2_visibility), + EL2_REG_VNCR_FILT(HDFGWTR2_EL2, fgt2_visibility), + EL2_REG_VNCR_FILT(HFGRTR2_EL2, fgt2_visibility), + EL2_REG_VNCR_FILT(HFGWTR2_EL2, fgt2_visibility), + EL2_REG_VNCR_FILT(HDFGRTR_EL2, fgt_visibility), + EL2_REG_VNCR_FILT(HDFGWTR_EL2, fgt_visibility), + EL2_REG_VNCR_FILT(HAFGRTR_EL2, fgt_visibility), + EL2_REG_VNCR_FILT(HFGITR2_EL2, fgt2_visibility), EL2_REG_REDIR(SPSR_EL2, reset_val, 0), EL2_REG_REDIR(ELR_EL2, reset_val, 0), { SYS_DESC(SYS_SP_EL1), access_sp_el1}, @@ -3021,6 +3688,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(AFSR0_EL2, access_rw, reset_val, 0), EL2_REG(AFSR1_EL2, access_rw, reset_val, 0), EL2_REG_REDIR(ESR_EL2, reset_val, 0), + EL2_REG_VNCR(VSESR_EL2, reset_unknown, 0), { SYS_DESC(SYS_FPEXC32_EL2), undef_access, reset_val, FPEXC32_EL2, 0x700 }, EL2_REG_REDIR(FAR_EL2, reset_val, 0), @@ -3047,19 +3715,68 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_MPAMVPM7_EL2), undef_access }, EL2_REG(VBAR_EL2, access_rw, reset_val, 0), - EL2_REG(RVBAR_EL2, access_rw, reset_val, 0), + { SYS_DESC(SYS_RVBAR_EL2), undef_access }, { SYS_DESC(SYS_RMR_EL2), undef_access }, - - EL2_REG_VNCR(ICH_HCR_EL2, reset_val, 0), + EL2_REG_VNCR(VDISR_EL2, reset_unknown, 0), + + EL2_REG_VNCR_GICv3(ICH_AP0R0_EL2), + EL2_REG_VNCR_GICv3(ICH_AP0R1_EL2), + EL2_REG_VNCR_GICv3(ICH_AP0R2_EL2), + EL2_REG_VNCR_GICv3(ICH_AP0R3_EL2), + EL2_REG_VNCR_GICv3(ICH_AP1R0_EL2), + EL2_REG_VNCR_GICv3(ICH_AP1R1_EL2), + EL2_REG_VNCR_GICv3(ICH_AP1R2_EL2), + EL2_REG_VNCR_GICv3(ICH_AP1R3_EL2), + + { SYS_DESC(SYS_ICC_SRE_EL2), access_gic_sre }, + + EL2_REG_VNCR_GICv3(ICH_HCR_EL2), + { SYS_DESC(SYS_ICH_VTR_EL2), access_gic_vtr }, + { SYS_DESC(SYS_ICH_MISR_EL2), access_gic_misr }, + { SYS_DESC(SYS_ICH_EISR_EL2), access_gic_eisr }, + { SYS_DESC(SYS_ICH_ELRSR_EL2), access_gic_elrsr }, + EL2_REG_VNCR_GICv3(ICH_VMCR_EL2), + + EL2_REG_VNCR_GICv3(ICH_LR0_EL2), + EL2_REG_VNCR_GICv3(ICH_LR1_EL2), + EL2_REG_VNCR_GICv3(ICH_LR2_EL2), + EL2_REG_VNCR_GICv3(ICH_LR3_EL2), + EL2_REG_VNCR_GICv3(ICH_LR4_EL2), + EL2_REG_VNCR_GICv3(ICH_LR5_EL2), + EL2_REG_VNCR_GICv3(ICH_LR6_EL2), + EL2_REG_VNCR_GICv3(ICH_LR7_EL2), + EL2_REG_VNCR_GICv3(ICH_LR8_EL2), + EL2_REG_VNCR_GICv3(ICH_LR9_EL2), + EL2_REG_VNCR_GICv3(ICH_LR10_EL2), + EL2_REG_VNCR_GICv3(ICH_LR11_EL2), + EL2_REG_VNCR_GICv3(ICH_LR12_EL2), + EL2_REG_VNCR_GICv3(ICH_LR13_EL2), + EL2_REG_VNCR_GICv3(ICH_LR14_EL2), + EL2_REG_VNCR_GICv3(ICH_LR15_EL2), EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0), EL2_REG(TPIDR_EL2, access_rw, reset_val, 0), EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0), EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), + { SYS_DESC(SYS_CNTHP_TVAL_EL2), access_arch_timer }, + TIMER_REG(CNTHP_CTL_EL2, el2_visibility), + TIMER_REG(CNTHP_CVAL_EL2, el2_visibility), + + { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer, .visibility = cnthv_visibility }, + TIMER_REG(CNTHV_CTL_EL2, cnthv_visibility), + TIMER_REG(CNTHV_CVAL_EL2, cnthv_visibility), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, + { SYS_DESC(SYS_CNTP_TVAL_EL02), access_arch_timer }, + { SYS_DESC(SYS_CNTP_CTL_EL02), access_arch_timer }, + { SYS_DESC(SYS_CNTP_CVAL_EL02), access_arch_timer }, + + { SYS_DESC(SYS_CNTV_TVAL_EL02), access_arch_timer }, + { SYS_DESC(SYS_CNTV_CTL_EL02), access_arch_timer }, + { SYS_DESC(SYS_CNTV_CVAL_EL02), access_arch_timer }, + EL2_REG(SP_EL2, NULL, reset_unknown, 0), }; @@ -3068,7 +3785,8 @@ static bool handle_at_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); - __kvm_at_s1e01(vcpu, op, p->regval); + if (__kvm_at_s1e01(vcpu, op, p->regval)) + return false; return true; } @@ -3085,7 +3803,8 @@ static bool handle_at_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; } - __kvm_at_s1e2(vcpu, op, p->regval); + if (__kvm_at_s1e2(vcpu, op, p->regval)) + return false; return true; } @@ -3095,7 +3814,8 @@ static bool handle_at_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); - __kvm_at_s12(vcpu, op, p->regval); + if (__kvm_at_s12(vcpu, op, p->regval)) + return false; return true; } @@ -3237,8 +3957,7 @@ static bool handle_ripas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); - u64 base, range, tg, num, scale; - int shift; + u64 base, range; if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding)) return undef_access(vcpu, p, r); @@ -3248,26 +3967,7 @@ static bool handle_ripas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, * of the guest's S2 (different base granule size, for example), we * decide to ignore TTL and only use the described range. */ - tg = FIELD_GET(GENMASK(47, 46), p->regval); - scale = FIELD_GET(GENMASK(45, 44), p->regval); - num = FIELD_GET(GENMASK(43, 39), p->regval); - base = p->regval & GENMASK(36, 0); - - switch(tg) { - case 1: - shift = 12; - break; - case 2: - shift = 14; - break; - case 3: - default: /* IMPDEF: handle tg==0 as 64k */ - shift = 16; - break; - } - - base <<= shift; - range = __TLBI_RANGE_PAGES(num, scale) << shift; + base = decode_range_tlbi(p->regval, &range, NULL); kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr), &(union tlbi_info) { @@ -3333,11 +4033,22 @@ static void s2_mmu_tlbi_s1e1(struct kvm_s2_mmu *mmu, WARN_ON(__kvm_tlbi_s1e2(mmu, info->va.addr, info->va.encoding)); } +static bool handle_tlbi_el2(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); + + if (!kvm_supported_tlbi_s1e2_op(vcpu, sys_encoding)) + return undef_access(vcpu, p, r); + + kvm_handle_s1e2_tlbi(vcpu, sys_encoding, p->regval); + return true; +} + static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); - u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); /* * If we're here, this is because we've trapped on a EL1 TLBI @@ -3348,6 +4059,13 @@ static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, * - HCR_EL2.E2H == 0 : a non-VHE guest * - HCR_EL2.{E2H,TGE} == { 1, 0 } : a VHE guest in guest mode * + * Another possibility is that we are invalidating the EL2 context + * using EL1 instructions, but that we landed here because we need + * additional invalidation for structures that are not held in the + * CPU TLBs (such as the VNCR pseudo-TLB and its EL2 mapping). In + * that case, we are guaranteed that HCR_EL2.{E2H,TGE} == { 1, 1 } + * as we don't allow an NV-capable L1 in a nVHE configuration. + * * We don't expect these helpers to ever be called when running * in a vEL1 context. */ @@ -3357,7 +4075,13 @@ static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!kvm_supported_tlbi_s1e1_op(vcpu, sys_encoding)) return undef_access(vcpu, p, r); - kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr), + if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) { + kvm_handle_s1e2_tlbi(vcpu, sys_encoding, p->regval); + return true; + } + + kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, + get_vmid(__vcpu_sys_reg(vcpu, VTTBR_EL2)), &(union tlbi_info) { .va = { .addr = p->regval, @@ -3479,16 +4203,21 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_IPAS2LE1IS, handle_ipas2e1is), SYS_INSN(TLBI_RIPAS2LE1IS, handle_ripas2e1is), - SYS_INSN(TLBI_ALLE2OS, undef_access), - SYS_INSN(TLBI_VAE2OS, undef_access), + SYS_INSN(TLBI_ALLE2OS, handle_tlbi_el2), + SYS_INSN(TLBI_VAE2OS, handle_tlbi_el2), SYS_INSN(TLBI_ALLE1OS, handle_alle1is), - SYS_INSN(TLBI_VALE2OS, undef_access), + SYS_INSN(TLBI_VALE2OS, handle_tlbi_el2), SYS_INSN(TLBI_VMALLS12E1OS, handle_vmalls12e1is), - SYS_INSN(TLBI_RVAE2IS, undef_access), - SYS_INSN(TLBI_RVALE2IS, undef_access), + SYS_INSN(TLBI_RVAE2IS, handle_tlbi_el2), + SYS_INSN(TLBI_RVALE2IS, handle_tlbi_el2), + SYS_INSN(TLBI_ALLE2IS, handle_tlbi_el2), + SYS_INSN(TLBI_VAE2IS, handle_tlbi_el2), SYS_INSN(TLBI_ALLE1IS, handle_alle1is), + + SYS_INSN(TLBI_VALE2IS, handle_tlbi_el2), + SYS_INSN(TLBI_VMALLS12E1IS, handle_vmalls12e1is), SYS_INSN(TLBI_IPAS2E1OS, handle_ipas2e1is), SYS_INSN(TLBI_IPAS2E1, handle_ipas2e1is), @@ -3498,11 +4227,17 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_IPAS2LE1, handle_ipas2e1is), SYS_INSN(TLBI_RIPAS2LE1, handle_ripas2e1is), SYS_INSN(TLBI_RIPAS2LE1OS, handle_ripas2e1is), - SYS_INSN(TLBI_RVAE2OS, undef_access), - SYS_INSN(TLBI_RVALE2OS, undef_access), - SYS_INSN(TLBI_RVAE2, undef_access), - SYS_INSN(TLBI_RVALE2, undef_access), + SYS_INSN(TLBI_RVAE2OS, handle_tlbi_el2), + SYS_INSN(TLBI_RVALE2OS, handle_tlbi_el2), + SYS_INSN(TLBI_RVAE2, handle_tlbi_el2), + SYS_INSN(TLBI_RVALE2, handle_tlbi_el2), + SYS_INSN(TLBI_ALLE2, handle_tlbi_el2), + SYS_INSN(TLBI_VAE2, handle_tlbi_el2), + SYS_INSN(TLBI_ALLE1, handle_alle1is), + + SYS_INSN(TLBI_VALE2, handle_tlbi_el2), + SYS_INSN(TLBI_VMALLS12E1, handle_vmalls12e1is), SYS_INSN(TLBI_IPAS2E1ISNXS, handle_ipas2e1is), @@ -3510,19 +4245,19 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_IPAS2LE1ISNXS, handle_ipas2e1is), SYS_INSN(TLBI_RIPAS2LE1ISNXS, handle_ripas2e1is), - SYS_INSN(TLBI_ALLE2OSNXS, undef_access), - SYS_INSN(TLBI_VAE2OSNXS, undef_access), + SYS_INSN(TLBI_ALLE2OSNXS, handle_tlbi_el2), + SYS_INSN(TLBI_VAE2OSNXS, handle_tlbi_el2), SYS_INSN(TLBI_ALLE1OSNXS, handle_alle1is), - SYS_INSN(TLBI_VALE2OSNXS, undef_access), + SYS_INSN(TLBI_VALE2OSNXS, handle_tlbi_el2), SYS_INSN(TLBI_VMALLS12E1OSNXS, handle_vmalls12e1is), - SYS_INSN(TLBI_RVAE2ISNXS, undef_access), - SYS_INSN(TLBI_RVALE2ISNXS, undef_access), - SYS_INSN(TLBI_ALLE2ISNXS, undef_access), - SYS_INSN(TLBI_VAE2ISNXS, undef_access), + SYS_INSN(TLBI_RVAE2ISNXS, handle_tlbi_el2), + SYS_INSN(TLBI_RVALE2ISNXS, handle_tlbi_el2), + SYS_INSN(TLBI_ALLE2ISNXS, handle_tlbi_el2), + SYS_INSN(TLBI_VAE2ISNXS, handle_tlbi_el2), SYS_INSN(TLBI_ALLE1ISNXS, handle_alle1is), - SYS_INSN(TLBI_VALE2ISNXS, undef_access), + SYS_INSN(TLBI_VALE2ISNXS, handle_tlbi_el2), SYS_INSN(TLBI_VMALLS12E1ISNXS, handle_vmalls12e1is), SYS_INSN(TLBI_IPAS2E1OSNXS, handle_ipas2e1is), SYS_INSN(TLBI_IPAS2E1NXS, handle_ipas2e1is), @@ -3532,14 +4267,14 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_IPAS2LE1NXS, handle_ipas2e1is), SYS_INSN(TLBI_RIPAS2LE1NXS, handle_ripas2e1is), SYS_INSN(TLBI_RIPAS2LE1OSNXS, handle_ripas2e1is), - SYS_INSN(TLBI_RVAE2OSNXS, undef_access), - SYS_INSN(TLBI_RVALE2OSNXS, undef_access), - SYS_INSN(TLBI_RVAE2NXS, undef_access), - SYS_INSN(TLBI_RVALE2NXS, undef_access), - SYS_INSN(TLBI_ALLE2NXS, undef_access), - SYS_INSN(TLBI_VAE2NXS, undef_access), + SYS_INSN(TLBI_RVAE2OSNXS, handle_tlbi_el2), + SYS_INSN(TLBI_RVALE2OSNXS, handle_tlbi_el2), + SYS_INSN(TLBI_RVAE2NXS, handle_tlbi_el2), + SYS_INSN(TLBI_RVALE2NXS, handle_tlbi_el2), + SYS_INSN(TLBI_ALLE2NXS, handle_tlbi_el2), + SYS_INSN(TLBI_VAE2NXS, handle_tlbi_el2), SYS_INSN(TLBI_ALLE1NXS, handle_alle1is), - SYS_INSN(TLBI_VALE2NXS, undef_access), + SYS_INSN(TLBI_VALE2NXS, handle_tlbi_el2), SYS_INSN(TLBI_VMALLS12E1NXS, handle_vmalls12e1is), }; @@ -3571,18 +4306,20 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu, * None of the other registers share their location, so treat them as * if they were 64bit. */ -#define DBG_BCR_BVR_WCR_WVR(n) \ - /* DBGBVRn */ \ - { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ - /* DBGBCRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ - /* DBGWVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ - /* DBGWCRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n } - -#define DBGBXVR(n) \ - { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_bvr, NULL, n } +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), \ + trap_dbg_wb_reg, NULL, n }, \ + /* DBGBCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_dbg_wb_reg, NULL, n }, \ + /* DBGWVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_dbg_wb_reg, NULL, n }, \ + /* DBGWCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_dbg_wb_reg, NULL, n } + +#define DBGBXVR(n) \ + { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), \ + trap_dbg_wb_reg, NULL, n } /* * Trapped cp14 registers. We generally ignore most of the external @@ -3779,7 +4516,7 @@ static const struct sys_reg_desc cp15_regs[] = { { CP15_SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access }, { CP15_SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access }, { CP15_SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access }, - { CP15_SYS_DESC(SYS_ICC_DIR_EL1), undef_access }, + { CP15_SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir }, { CP15_SYS_DESC(SYS_ICC_RPR_EL1), undef_access }, { CP15_SYS_DESC(SYS_ICC_IAR1_EL1), undef_access }, { CP15_SYS_DESC(SYS_ICC_EOIR1_EL1), undef_access }, @@ -3879,18 +4616,20 @@ static const struct sys_reg_desc cp15_64_regs[] = { { SYS_DESC(SYS_AARCH32_CNTPCT), access_arch_timer }, { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 }, { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ + { SYS_DESC(SYS_AARCH32_CNTVCT), access_arch_timer }, { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, { SYS_DESC(SYS_AARCH32_CNTPCTSS), access_arch_timer }, + { SYS_DESC(SYS_AARCH32_CNTVCTSS), access_arch_timer }, }; static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, - bool is_32) + bool reset_check) { unsigned int i; for (i = 0; i < n; i++) { - if (!is_32 && table[i].reg && !table[i].reset) { + if (reset_check && table[i].reg && !table[i].reset) { kvm_err("sys_reg table %pS entry %d (%s) lacks reset\n", &table[i], i, table[i].name); return false; @@ -4085,7 +4824,7 @@ static bool kvm_esr_cp10_id_to_sys64(u64 esr, struct sys_reg_params *params) return true; kvm_pr_unimpl("Unhandled cp10 register %s: %u\n", - params->is_write ? "write" : "read", reg_id); + str_write_read(params->is_write), reg_id); return false; } @@ -4201,9 +4940,13 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu) * Certain AArch32 ID registers are handled by rerouting to the AArch64 * system register table. Registers in the ID range where CRm=0 are * excluded from this scheme as they do not trivially map into AArch64 - * system register encodings. + * system register encodings, except for AIDR/REVIDR. */ - if (params.Op1 == 0 && params.CRn == 0 && params.CRm) + if (params.Op1 == 0 && params.CRn == 0 && + (params.CRm || params.Op2 == 6 /* REVIDR */)) + return kvm_emulate_cp15_id_reg(vcpu, ¶ms); + if (params.Op1 == 1 && params.CRn == 0 && + params.CRm == 0 && params.Op2 == 7 /* AIDR */) return kvm_emulate_cp15_id_reg(vcpu, ¶ms); return kvm_handle_cp_32(vcpu, ¶ms, cp15_regs, ARRAY_SIZE(cp15_regs)); @@ -4396,9 +5139,15 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) reset_vcpu_ftr_id_reg(vcpu, r); else r->reset(vcpu, r); + + if (r->reg >= __SANITISED_REG_START__ && r->reg < NR_SYS_REGS) + __vcpu_rmw_sys_reg(vcpu, r->reg, |=, 0); } set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags); + + if (kvm_vcpu_has_pmu(vcpu)) + kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); } /** @@ -4504,65 +5253,6 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id, return r; } -/* - * These are the invariant sys_reg registers: we let the guest see the - * host versions of these, so they're part of the guest state. - * - * A future CPU may provide a mechanism to present different values to - * the guest, or a future kvm may trap them. - */ - -#define FUNCTION_INVARIANT(reg) \ - static u64 reset_##reg(struct kvm_vcpu *v, \ - const struct sys_reg_desc *r) \ - { \ - ((struct sys_reg_desc *)r)->val = read_sysreg(reg); \ - return ((struct sys_reg_desc *)r)->val; \ - } - -FUNCTION_INVARIANT(midr_el1) -FUNCTION_INVARIANT(revidr_el1) -FUNCTION_INVARIANT(aidr_el1) - -/* ->val is filled in by kvm_sys_reg_table_init() */ -static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = { - { SYS_DESC(SYS_MIDR_EL1), NULL, reset_midr_el1 }, - { SYS_DESC(SYS_REVIDR_EL1), NULL, reset_revidr_el1 }, - { SYS_DESC(SYS_AIDR_EL1), NULL, reset_aidr_el1 }, -}; - -static int get_invariant_sys_reg(u64 id, u64 __user *uaddr) -{ - const struct sys_reg_desc *r; - - r = get_reg_by_id(id, invariant_sys_regs, - ARRAY_SIZE(invariant_sys_regs)); - if (!r) - return -ENOENT; - - return put_user(r->val, uaddr); -} - -static int set_invariant_sys_reg(u64 id, u64 __user *uaddr) -{ - const struct sys_reg_desc *r; - u64 val; - - r = get_reg_by_id(id, invariant_sys_regs, - ARRAY_SIZE(invariant_sys_regs)); - if (!r) - return -ENOENT; - - if (get_user(val, uaddr)) - return -EFAULT; - - /* This is what we mean by invariant: you can't change it. */ - if (r->val != val) - return -EINVAL; - - return 0; -} - static int demux_c15_get(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) { u32 val; @@ -4616,15 +5306,28 @@ static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) } } +static u64 kvm_one_reg_to_id(const struct kvm_one_reg *reg) +{ + switch(reg->id) { + case KVM_REG_ARM_TIMER_CVAL: + return TO_ARM64_SYS_REG(CNTV_CVAL_EL0); + case KVM_REG_ARM_TIMER_CNT: + return TO_ARM64_SYS_REG(CNTVCT_EL0); + default: + return reg->id; + } +} + int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num) { u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 id = kvm_one_reg_to_id(reg); u64 val; int ret; - r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + r = id_to_sys_reg_desc(vcpu, id, table, num); if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; @@ -4644,15 +5347,10 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { void __user *uaddr = (void __user *)(unsigned long)reg->addr; - int err; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_get(vcpu, reg->id, uaddr); - err = get_invariant_sys_reg(reg->id, uaddr); - if (err != -ENOENT) - return err; - return kvm_sys_reg_get_user(vcpu, reg, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); } @@ -4662,13 +5360,14 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, { u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 id = kvm_one_reg_to_id(reg); u64 val; int ret; if (get_user(val, uaddr)) return -EFAULT; - r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + r = id_to_sys_reg_desc(vcpu, id, table, num); if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; @@ -4678,7 +5377,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, if (r->set_user) { ret = (r->set_user)(vcpu, r, val); } else { - __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); ret = 0; } @@ -4688,15 +5387,10 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { void __user *uaddr = (void __user *)(unsigned long)reg->addr; - int err; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_set(vcpu, reg->id, uaddr); - err = set_invariant_sys_reg(reg->id, uaddr); - if (err != -ENOENT) - return err; - return kvm_sys_reg_set_user(vcpu, reg, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); } @@ -4733,10 +5427,23 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg) static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) { + u64 idx; + if (!*uind) return true; - if (put_user(sys_reg_to_index(reg), *uind)) + switch (reg_to_encoding(reg)) { + case SYS_CNTV_CVAL_EL0: + idx = KVM_REG_ARM_TIMER_CVAL; + break; + case SYS_CNTVCT_EL0: + idx = KVM_REG_ARM_TIMER_CNT; + break; + default: + idx = sys_reg_to_index(reg); + } + + if (put_user(idx, *uind)) return false; (*uind)++; @@ -4785,23 +5492,14 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu) { - return ARRAY_SIZE(invariant_sys_regs) - + num_demux_regs() + return num_demux_regs() + walk_sys_regs(vcpu, (u64 __user *)NULL); } int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { - unsigned int i; int err; - /* Then give them all the invariant registers' indices. */ - for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) { - if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices)) - return -EFAULT; - uindices++; - } - err = walk_sys_regs(vcpu, uindices); if (err < 0) return err; @@ -4897,80 +5595,18 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu) mutex_lock(&kvm->arch.config_lock); vcpu_set_hcr(vcpu); vcpu_set_ich_hcr(vcpu); - - if (cpus_have_final_cap(ARM64_HAS_HCX)) { - /* - * In general, all HCRX_EL2 bits are gated by a feature. - * The only reason we can set SMPME without checking any - * feature is that its effects are not directly observable - * from the guest. - */ - vcpu->arch.hcrx_el2 = HCRX_EL2_SMPME; - - if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP)) - vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2); - - if (kvm_has_tcr2(kvm)) - vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En; - - if (kvm_has_fpmr(kvm)) - vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM; - } + vcpu_set_hcrx(vcpu); if (test_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags)) goto out; - kvm->arch.fgu[HFGxTR_GROUP] = (HFGxTR_EL2_nAMAIR2_EL1 | - HFGxTR_EL2_nMAIR2_EL1 | - HFGxTR_EL2_nS2POR_EL1 | - HFGxTR_EL2_nACCDATA_EL1 | - HFGxTR_EL2_nSMPRI_EL1_MASK | - HFGxTR_EL2_nTPIDR2_EL0_MASK); - - if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) - kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_TLBIRVAALE1OS| - HFGITR_EL2_TLBIRVALE1OS | - HFGITR_EL2_TLBIRVAAE1OS | - HFGITR_EL2_TLBIRVAE1OS | - HFGITR_EL2_TLBIVAALE1OS | - HFGITR_EL2_TLBIVALE1OS | - HFGITR_EL2_TLBIVAAE1OS | - HFGITR_EL2_TLBIASIDE1OS | - HFGITR_EL2_TLBIVAE1OS | - HFGITR_EL2_TLBIVMALLE1OS); - - if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) - kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_TLBIRVAALE1 | - HFGITR_EL2_TLBIRVALE1 | - HFGITR_EL2_TLBIRVAAE1 | - HFGITR_EL2_TLBIRVAE1 | - HFGITR_EL2_TLBIRVAALE1IS| - HFGITR_EL2_TLBIRVALE1IS | - HFGITR_EL2_TLBIRVAAE1IS | - HFGITR_EL2_TLBIRVAE1IS | - HFGITR_EL2_TLBIRVAALE1OS| - HFGITR_EL2_TLBIRVALE1OS | - HFGITR_EL2_TLBIRVAAE1OS | - HFGITR_EL2_TLBIRVAE1OS); - - if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, ATS1A, IMP)) - kvm->arch.fgu[HFGITR_GROUP] |= HFGITR_EL2_ATS1E1A; - - if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN2)) - kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_ATS1E1RP | - HFGITR_EL2_ATS1E1WP); - - if (!kvm_has_s1pie(kvm)) - kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 | - HFGxTR_EL2_nPIR_EL1); - - if (!kvm_has_s1poe(kvm)) - kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPOR_EL1 | - HFGxTR_EL2_nPOR_EL0); - - if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP)) - kvm->arch.fgu[HAFGRTR_GROUP] |= ~(HAFGRTR_EL2_RES0 | - HAFGRTR_EL2_RES1); + compute_fgu(kvm, HFGRTR_GROUP); + compute_fgu(kvm, HFGITR_GROUP); + compute_fgu(kvm, HDFGRTR_GROUP); + compute_fgu(kvm, HAFGRTR_GROUP); + compute_fgu(kvm, HFGRTR2_GROUP); + compute_fgu(kvm, HFGITR2_GROUP); + compute_fgu(kvm, HDFGRTR2_GROUP); set_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags); out: @@ -4991,15 +5627,21 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu) guard(mutex)(&kvm->arch.config_lock); - if (!(static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) && - irqchip_in_kernel(kvm) && - kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)) { - kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] &= ~ID_AA64PFR0_EL1_GIC_MASK; - kvm->arch.id_regs[IDREG_IDX(SYS_ID_PFR1_EL1)] &= ~ID_PFR1_EL1_GIC_MASK; + /* + * This hacks into the ID registers, so only perform it when the + * first vcpu runs, or the kvm_set_vm_id_reg() helper will scream. + */ + if (!irqchip_in_kernel(kvm) && !kvm_vm_has_ran_once(kvm)) { + u64 val; + + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val); + val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val); } if (vcpu_has_nv(vcpu)) { - int ret = kvm_init_nv_sysregs(kvm); + int ret = kvm_init_nv_sysregs(vcpu); if (ret) return ret; } @@ -5009,28 +5651,31 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu) int __init kvm_sys_reg_table_init(void) { + const struct sys_reg_desc *gicv3_regs; bool valid = true; - unsigned int i; + unsigned int i, sz; int ret = 0; /* Make sure tables are unique and in order. */ - valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false); - valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true); - valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true); - valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true); - valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true); - valid &= check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false); + valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), true); + valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), false); + valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), false); + valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), false); + valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), false); valid &= check_sysreg_table(sys_insn_descs, ARRAY_SIZE(sys_insn_descs), false); + gicv3_regs = vgic_v3_get_sysreg_table(&sz); + valid &= check_sysreg_table(gicv3_regs, sz, false); + if (!valid) return -EINVAL; - /* We abuse the reset function to overwrite the table itself. */ - for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) - invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]); + init_imp_id_regs(); ret = populate_nv_trap_config(); + check_feature_map(); + for (i = 0; !ret && i < ARRAY_SIZE(sys_reg_descs); i++) ret = populate_sysreg_config(sys_reg_descs + i, i); |
