summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/Kconfig5
-rw-r--r--arch/arm64/kvm/Makefile4
-rw-r--r--arch/arm64/kvm/arch_timer.c7
-rw-r--r--arch/arm64/kvm/arm.c111
-rw-r--r--arch/arm64/kvm/at.c194
-rw-r--r--arch/arm64/kvm/config.c1085
-rw-r--r--arch/arm64/kvm/emulate-nested.c614
-rw-r--r--arch/arm64/kvm/handle_exit.c90
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h70
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h177
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h14
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h16
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/memory.h58
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mm.h4
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/pkvm.h6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c9
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c20
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp.lds.S2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c523
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c97
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c126
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c27
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c14
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sysreg-sr.c4
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c6
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c60
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c70
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c28
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c4
-rw-r--r--arch/arm64/kvm/hypercalls.c13
-rw-r--r--arch/arm64/kvm/mmu.c72
-rw-r--r--arch/arm64/kvm/nested.c1128
-rw-r--r--arch/arm64/kvm/pkvm.c225
-rw-r--r--arch/arm64/kvm/pmu-emul.c254
-rw-r--r--arch/arm64/kvm/pmu.c10
-rw-r--r--arch/arm64/kvm/ptdump.c4
-rw-r--r--arch/arm64/kvm/reset.c5
-rw-r--r--arch/arm64/kvm/sys_regs.c757
-rw-r--r--arch/arm64/kvm/sys_regs.h10
-rw-r--r--arch/arm64/kvm/trace_arm.h6
-rw-r--r--arch/arm64/kvm/vgic-sys-reg-v3.c8
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c224
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c29
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c39
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c29
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3-nested.c406
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c46
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c35
-rw-r--r--arch/arm64/kvm/vgic/vgic.c38
-rw-r--r--arch/arm64/kvm/vgic/vgic.h39
53 files changed, 5222 insertions, 1612 deletions
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index ead632ad01b4..713248f240e0 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -19,7 +19,6 @@ if VIRTUALIZATION
menuconfig KVM
bool "Kernel-based Virtual Machine (KVM) support"
- depends on AS_HAS_ARMV8_4
select KVM_COMMON
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_GENERIC_MMU_NOTIFIER
@@ -71,8 +70,8 @@ config PTDUMP_STAGE2_DEBUGFS
depends on KVM
depends on DEBUG_KERNEL
depends on DEBUG_FS
- depends on GENERIC_PTDUMP
- select PTDUMP_CORE
+ depends on ARCH_HAS_PTDUMP
+ select PTDUMP
default n
help
Say Y here if you want to show the stage-2 kernel pagetables
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 3cf7adb2b503..7c329e01c557 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -14,7 +14,7 @@ CFLAGS_sys_regs.o += -Wno-override-init
CFLAGS_handle_exit.o += -Wno-override-init
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
- inject_fault.o va_layout.o handle_exit.o \
+ inject_fault.o va_layout.o handle_exit.o config.o \
guest.o debug.o reset.o sys_regs.o stacktrace.o \
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
arch_timer.o trng.o vmid.o emulate-nested.o nested.o at.o \
@@ -23,7 +23,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
- vgic/vgic-its.o vgic/vgic-debug.o
+ vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 70802e4c91cf..5133dcbfe9f7 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -1070,8 +1070,7 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
else
ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
- hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
- ctxt->hrtimer.function = kvm_hrtimer_expire;
+ hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
switch (timerid) {
case TIMER_PTIMER:
@@ -1098,8 +1097,8 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
timer_set_offset(vcpu_ptimer(vcpu), 0);
}
- hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
- timer->bg_timer.function = kvm_bg_timer_expire;
+ hrtimer_setup(&timer->bg_timer, kvm_bg_timer_expire, CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS_HARD);
}
void kvm_timer_init_vm(struct kvm *kvm)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 0160b4924351..36cfcffb40d8 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -125,6 +125,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
}
mutex_unlock(&kvm->slots_lock);
break;
+ case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS:
+ mutex_lock(&kvm->lock);
+ if (!kvm->created_vcpus) {
+ r = 0;
+ set_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags);
+ }
+ mutex_unlock(&kvm->lock);
+ break;
default:
break;
}
@@ -313,6 +321,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_SYSTEM_SUSPEND:
case KVM_CAP_IRQFD_RESAMPLE:
case KVM_CAP_COUNTER_OFFSET:
+ case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -359,6 +368,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_EL1_32BIT:
r = cpus_have_final_cap(ARM64_HAS_32BIT_EL1);
break;
+ case KVM_CAP_ARM_EL2:
+ r = cpus_have_final_cap(ARM64_HAS_NESTED_VIRT);
+ break;
+ case KVM_CAP_ARM_EL2_E2H0:
+ r = cpus_have_final_cap(ARM64_HAS_HCR_NV1);
+ break;
case KVM_CAP_GUEST_DEBUG_HW_BPS:
r = get_num_brps();
break;
@@ -366,7 +381,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = get_num_wrps();
break;
case KVM_CAP_ARM_PMU_V3:
- r = kvm_arm_support_pmu_v3();
+ r = kvm_supports_guest_pmuv3();
break;
case KVM_CAP_ARM_INJECT_SERROR_ESR:
r = cpus_have_final_cap(ARM64_HAS_RAS_EXTN);
@@ -466,7 +481,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (err)
return err;
- return kvm_share_hyp(vcpu, vcpu + 1);
+ err = kvm_share_hyp(vcpu, vcpu + 1);
+ if (err)
+ kvm_vgic_vcpu_destroy(vcpu);
+
+ return err;
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -586,8 +605,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
nommu:
vcpu->cpu = cpu;
- kvm_vgic_load(vcpu);
+ /*
+ * The timer must be loaded before the vgic to correctly set up physical
+ * interrupt deactivation in nested state (e.g. timer interrupt).
+ */
kvm_timer_vcpu_load(vcpu);
+ kvm_vgic_load(vcpu);
kvm_vcpu_load_debug(vcpu);
if (has_vhe())
kvm_vcpu_load_vhe(vcpu);
@@ -825,6 +848,16 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (ret)
return ret;
+ if (vcpu_has_nv(vcpu)) {
+ ret = kvm_vcpu_allocate_vncr_tlb(vcpu);
+ if (ret)
+ return ret;
+
+ ret = kvm_vgic_vcpu_nv_init(vcpu);
+ if (ret)
+ return ret;
+ }
+
/*
* This needs to happen after any restriction has been applied
* to the feature set.
@@ -835,14 +868,20 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (ret)
return ret;
- ret = kvm_arm_pmu_v3_enable(vcpu);
- if (ret)
- return ret;
+ if (kvm_vcpu_has_pmu(vcpu)) {
+ ret = kvm_arm_pmu_v3_enable(vcpu);
+ if (ret)
+ return ret;
+ }
if (is_protected_kvm_enabled()) {
ret = pkvm_create_hyp_vm(kvm);
if (ret)
return ret;
+
+ ret = pkvm_create_hyp_vcpu(vcpu);
+ if (ret)
+ return ret;
}
mutex_lock(&kvm->arch.config_lock);
@@ -1148,7 +1187,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
preempt_disable();
- kvm_pmu_flush_hwstate(vcpu);
+ if (kvm_vcpu_has_pmu(vcpu))
+ kvm_pmu_flush_hwstate(vcpu);
local_irq_disable();
@@ -1167,7 +1207,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
isb(); /* Ensure work in x_flush_hwstate is committed */
- kvm_pmu_sync_hwstate(vcpu);
+ if (kvm_vcpu_has_pmu(vcpu))
+ kvm_pmu_sync_hwstate(vcpu);
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
kvm_timer_sync_user(vcpu);
kvm_vgic_sync_hwstate(vcpu);
@@ -1197,7 +1238,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* that the vgic can properly sample the updated state of the
* interrupt line.
*/
- kvm_pmu_sync_hwstate(vcpu);
+ if (kvm_vcpu_has_pmu(vcpu))
+ kvm_pmu_sync_hwstate(vcpu);
/*
* Sync the vgic state before syncing the timer state because
@@ -1386,7 +1428,7 @@ static unsigned long system_supported_vcpu_features(void)
if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1))
clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features);
- if (!kvm_arm_support_pmu_v3())
+ if (!kvm_supports_guest_pmuv3())
clear_bit(KVM_ARM_VCPU_PMU_V3, &features);
if (!system_supports_sve())
@@ -2307,6 +2349,13 @@ static int __init init_subsystems(void)
goto out;
}
+ if (kvm_mode == KVM_MODE_NV &&
+ !(vgic_present && kvm_vgic_global_state.type == VGIC_V3)) {
+ kvm_err("NV support requires GICv3, giving up\n");
+ err = -EINVAL;
+ goto out;
+ }
+
/*
* Init HYP architected timer support
*/
@@ -2411,6 +2460,19 @@ static void kvm_hyp_init_symbols(void)
kvm_nvhe_sym(__icache_flags) = __icache_flags;
kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
+ /* Propagate the FGT state to the the nVHE side */
+ kvm_nvhe_sym(hfgrtr_masks) = hfgrtr_masks;
+ kvm_nvhe_sym(hfgwtr_masks) = hfgwtr_masks;
+ kvm_nvhe_sym(hfgitr_masks) = hfgitr_masks;
+ kvm_nvhe_sym(hdfgrtr_masks) = hdfgrtr_masks;
+ kvm_nvhe_sym(hdfgwtr_masks) = hdfgwtr_masks;
+ kvm_nvhe_sym(hafgrtr_masks) = hafgrtr_masks;
+ kvm_nvhe_sym(hfgrtr2_masks) = hfgrtr2_masks;
+ kvm_nvhe_sym(hfgwtr2_masks) = hfgwtr2_masks;
+ kvm_nvhe_sym(hfgitr2_masks) = hfgitr2_masks;
+ kvm_nvhe_sym(hdfgrtr2_masks)= hdfgrtr2_masks;
+ kvm_nvhe_sym(hdfgwtr2_masks)= hdfgwtr2_masks;
+
/*
* Flush entire BSS since part of its data containing init symbols is read
* while the MMU is off.
@@ -2565,6 +2627,13 @@ static int __init init_hyp_mode(void)
goto out_err;
}
+ err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_start),
+ kvm_ksym_ref(__hyp_data_end), PAGE_HYP);
+ if (err) {
+ kvm_err("Cannot map .hyp.data section\n");
+ goto out_err;
+ }
+
err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
if (err) {
@@ -2704,16 +2773,19 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
return irqchip_in_kernel(kvm);
}
-bool kvm_arch_has_irq_bypass(void)
-{
- return true;
-}
-
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
+ struct kvm_kernel_irq_routing_entry *irq_entry = &irqfd->irq_entry;
+
+ /*
+ * The only thing we have a chance of directly-injecting is LPIs. Maybe
+ * one day...
+ */
+ if (irq_entry->type != KVM_IRQ_ROUTING_MSI)
+ return 0;
return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
&irqfd->irq_entry);
@@ -2723,6 +2795,10 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
+ struct kvm_kernel_irq_routing_entry *irq_entry = &irqfd->irq_entry;
+
+ if (irq_entry->type != KVM_IRQ_ROUTING_MSI)
+ return;
kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
&irqfd->irq_entry);
@@ -2803,11 +2879,12 @@ static __init int kvm_arm_init(void)
if (err)
goto out_hyp;
- kvm_info("%s%sVHE mode initialized successfully\n",
+ kvm_info("%s%sVHE%s mode initialized successfully\n",
in_hyp_mode ? "" : (is_protected_kvm_enabled() ?
"Protected " : "Hyp "),
in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ?
- "h" : "n"));
+ "h" : "n"),
+ cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) ? "+NV2": "");
/*
* FIXME: Do something reasonable if kvm_init() fails after pKVM
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 3a96c96816e9..a25be111cd8f 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -10,61 +10,11 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
-enum trans_regime {
- TR_EL10,
- TR_EL20,
- TR_EL2,
-};
-
-struct s1_walk_info {
- u64 baddr;
- enum trans_regime regime;
- unsigned int max_oa_bits;
- unsigned int pgshift;
- unsigned int txsz;
- int sl;
- bool hpd;
- bool e0poe;
- bool poe;
- bool pan;
- bool be;
- bool s2;
-};
-
-struct s1_walk_result {
- union {
- struct {
- u64 desc;
- u64 pa;
- s8 level;
- u8 APTable;
- bool UXNTable;
- bool PXNTable;
- bool uwxn;
- bool uov;
- bool ur;
- bool uw;
- bool ux;
- bool pwxn;
- bool pov;
- bool pr;
- bool pw;
- bool px;
- };
- struct {
- u8 fst;
- bool ptw;
- bool s2;
- };
- };
- bool failed;
-};
-
-static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2)
+static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw)
{
wr->fst = fst;
- wr->ptw = ptw;
- wr->s2 = s2;
+ wr->ptw = s1ptw;
+ wr->s2 = s1ptw;
wr->failed = true;
}
@@ -145,20 +95,15 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
}
}
-static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
+static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va)
{
u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
unsigned int stride, x;
- bool va55, tbi, lva, as_el0;
+ bool va55, tbi, lva;
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
- wi->regime = compute_translation_regime(vcpu, op);
- as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
- wi->pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
- (*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
-
va55 = va & BIT(55);
if (wi->regime == TR_EL2 && va55)
@@ -319,7 +264,7 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
/* R_BNDVG and following statements */
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
- as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
+ wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
goto transfault_l0;
/* AArch64.S1StartLevel() */
@@ -345,11 +290,11 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
return 0;
addrsz: /* Address Size Fault level 0 */
- fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false);
+ fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
return -EFAULT;
transfault_l0: /* Translation Fault level 0 */
- fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false);
+ fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false);
return -EFAULT;
}
@@ -380,13 +325,13 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
if (ret) {
fail_s1_walk(wr,
(s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
- true, true);
+ true);
return ret;
}
if (!kvm_s2_trans_readable(&s2_trans)) {
fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
- true, true);
+ true);
return -EPERM;
}
@@ -396,8 +341,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
if (ret) {
- fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level),
- true, false);
+ fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
return ret;
}
@@ -457,6 +401,11 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
if (check_output_size(desc & GENMASK(47, va_bottom), wi))
goto addrsz;
+ if (!(desc & PTE_AF)) {
+ fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
+ return -EACCES;
+ }
+
va_bottom += contiguous_bit_shift(desc, wi, level);
wr->failed = false;
@@ -465,13 +414,40 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wr->pa = desc & GENMASK(47, va_bottom);
wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
+ wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
+ if (wr->nG) {
+ u64 asid_ttbr, tcr;
+
+ switch (wi->regime) {
+ case TR_EL10:
+ tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
+ asid_ttbr = ((tcr & TCR_A1) ?
+ vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
+ vcpu_read_sys_reg(vcpu, TTBR0_EL1));
+ break;
+ case TR_EL20:
+ tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ asid_ttbr = ((tcr & TCR_A1) ?
+ vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
+ vcpu_read_sys_reg(vcpu, TTBR0_EL2));
+ break;
+ default:
+ BUG();
+ }
+
+ wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr);
+ if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
+ !(tcr & TCR_ASID16))
+ wr->asid &= GENMASK(7, 0);
+ }
+
return 0;
addrsz:
- fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false);
+ fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false);
return -EINVAL;
transfault:
- fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false);
+ fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false);
return -ENOENT;
}
@@ -488,7 +464,6 @@ struct mmu_config {
u64 sctlr;
u64 vttbr;
u64 vtcr;
- u64 hcr;
};
static void __mmu_config_save(struct mmu_config *config)
@@ -511,13 +486,10 @@ static void __mmu_config_save(struct mmu_config *config)
config->sctlr = read_sysreg_el1(SYS_SCTLR);
config->vttbr = read_sysreg(vttbr_el2);
config->vtcr = read_sysreg(vtcr_el2);
- config->hcr = read_sysreg(hcr_el2);
}
static void __mmu_config_restore(struct mmu_config *config)
{
- write_sysreg(config->hcr, hcr_el2);
-
/*
* ARM errata 1165522 and 1530923 require TGE to be 1 before
* we update the guest state.
@@ -1090,22 +1062,22 @@ static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
break;
}
- if (pov_perms & ~POE_RXW)
+ if (pov_perms & ~POE_RWX)
pov_perms = POE_NONE;
if (wi->poe && wr->pov) {
wr->pr &= pov_perms & POE_R;
- wr->px &= pov_perms & POE_X;
wr->pw &= pov_perms & POE_W;
+ wr->px &= pov_perms & POE_X;
}
- if (uov_perms & ~POE_RXW)
+ if (uov_perms & ~POE_RWX)
uov_perms = POE_NONE;
if (wi->e0poe && wr->uov) {
wr->ur &= uov_perms & POE_R;
- wr->ux &= uov_perms & POE_X;
wr->uw &= uov_perms & POE_W;
+ wr->ux &= uov_perms & POE_X;
}
}
@@ -1155,7 +1127,12 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
bool perm_fail = false;
int ret, idx;
- ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
+ wi.regime = compute_translation_regime(vcpu, op);
+ wi.as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
+ wi.pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
+ (*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
+
+ ret = setup_s1_walk(vcpu, &wi, &wr, vaddr);
if (ret)
goto compute_par;
@@ -1198,7 +1175,7 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
}
if (perm_fail)
- fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false);
+ fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
compute_par:
return compute_par_s1(vcpu, &wr, wi.regime);
@@ -1210,7 +1187,8 @@ compute_par:
* If the translation is unsuccessful, the value may only contain
* PAR_EL1.F, and cannot be taken at face value. It isn't an
* indication of the translation having failed, only that the fast
- * path did not succeed, *unless* it indicates a S1 permission fault.
+ * path did not succeed, *unless* it indicates a S1 permission or
+ * access fault.
*/
static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
@@ -1266,8 +1244,8 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
__load_stage2(mmu, mmu->arch);
skip_mmu_switch:
- /* Clear TGE, enable S2 translation, we're rolling */
- write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2);
+ /* Temporarily switch back to guest context */
+ write_sysreg_hcr(vcpu->arch.hcr_el2);
isb();
switch (op) {
@@ -1299,6 +1277,8 @@ skip_mmu_switch:
if (!fail)
par = read_sysreg_par();
+ write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
+
if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
__mmu_config_restore(&config);
@@ -1313,19 +1293,29 @@ static bool par_check_s1_perm_fault(u64 par)
!(par & SYS_PAR_EL1_S));
}
+static bool par_check_s1_access_fault(u64 par)
+{
+ u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
+
+ return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS &&
+ !(par & SYS_PAR_EL1_S));
+}
+
void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
/*
- * If PAR_EL1 reports that AT failed on a S1 permission fault, we
- * know for sure that the PTW was able to walk the S1 tables and
- * there's nothing else to do.
+ * If PAR_EL1 reports that AT failed on a S1 permission or access
+ * fault, we know for sure that the PTW was able to walk the S1
+ * tables and there's nothing else to do.
*
* If AT failed for any other reason, then we must walk the guest S1
* to emulate the instruction.
*/
- if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
+ if ((par & SYS_PAR_EL1_F) &&
+ !par_check_s1_perm_fault(par) &&
+ !par_check_s1_access_fault(par))
par = handle_at_slow(vcpu, op, vaddr);
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
@@ -1350,7 +1340,7 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
if (!vcpu_el2_e2h_is_set(vcpu))
val |= HCR_NV | HCR_NV1;
- write_sysreg(val, hcr_el2);
+ write_sysreg_hcr(val);
isb();
par = SYS_PAR_EL1_F;
@@ -1375,7 +1365,7 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
if (!fail)
par = read_sysreg_par();
- write_sysreg(hcr, hcr_el2);
+ write_sysreg_hcr(hcr);
isb();
}
@@ -1444,3 +1434,31 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
par = compute_par_s12(vcpu, par, &out);
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
}
+
+/*
+ * Translate a VA for a given EL in a given translation regime, with
+ * or without PAN. This requires wi->{regime, as_el0, pan} to be
+ * set. The rest of the wi and wr should be 0-initialised.
+ */
+int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
+ struct s1_walk_result *wr, u64 va)
+{
+ int ret;
+
+ ret = setup_s1_walk(vcpu, wi, wr, va);
+ if (ret)
+ return ret;
+
+ if (wr->level == S1_MMU_DISABLED) {
+ wr->ur = wr->uw = wr->ux = true;
+ wr->pr = wr->pw = wr->px = true;
+ } else {
+ ret = walk_s1(vcpu, wi, wr, va);
+ if (ret)
+ return ret;
+
+ compute_s1_permissions(vcpu, wi, wr);
+ }
+
+ return 0;
+}
diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c
new file mode 100644
index 000000000000..54911a93b001
--- /dev/null
+++ b/arch/arm64/kvm/config.c
@@ -0,0 +1,1085 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/sysreg.h>
+
+struct reg_bits_to_feat_map {
+ u64 bits;
+
+#define NEVER_FGU BIT(0) /* Can trap, but never UNDEF */
+#define CALL_FUNC BIT(1) /* Needs to evaluate tons of crap */
+#define FIXED_VALUE BIT(2) /* RAZ/WI or RAO/WI in KVM */
+ unsigned long flags;
+
+ union {
+ struct {
+ u8 regidx;
+ u8 shift;
+ u8 width;
+ bool sign;
+ s8 lo_lim;
+ };
+ bool (*match)(struct kvm *);
+ bool (*fval)(struct kvm *, u64 *);
+ };
+};
+
+#define __NEEDS_FEAT_3(m, f, id, fld, lim) \
+ { \
+ .bits = (m), \
+ .flags = (f), \
+ .regidx = IDREG_IDX(SYS_ ## id), \
+ .shift = id ##_## fld ## _SHIFT, \
+ .width = id ##_## fld ## _WIDTH, \
+ .sign = id ##_## fld ## _SIGNED, \
+ .lo_lim = id ##_## fld ##_## lim \
+ }
+
+#define __NEEDS_FEAT_2(m, f, fun, dummy) \
+ { \
+ .bits = (m), \
+ .flags = (f) | CALL_FUNC, \
+ .fval = (fun), \
+ }
+
+#define __NEEDS_FEAT_1(m, f, fun) \
+ { \
+ .bits = (m), \
+ .flags = (f) | CALL_FUNC, \
+ .match = (fun), \
+ }
+
+#define NEEDS_FEAT_FLAG(m, f, ...) \
+ CONCATENATE(__NEEDS_FEAT_, COUNT_ARGS(__VA_ARGS__))(m, f, __VA_ARGS__)
+
+#define NEEDS_FEAT_FIXED(m, ...) \
+ NEEDS_FEAT_FLAG(m, FIXED_VALUE, __VA_ARGS__, 0)
+
+#define NEEDS_FEAT(m, ...) NEEDS_FEAT_FLAG(m, 0, __VA_ARGS__)
+
+#define FEAT_SPE ID_AA64DFR0_EL1, PMSVer, IMP
+#define FEAT_SPE_FnE ID_AA64DFR0_EL1, PMSVer, V1P2
+#define FEAT_BRBE ID_AA64DFR0_EL1, BRBE, IMP
+#define FEAT_TRC_SR ID_AA64DFR0_EL1, TraceVer, IMP
+#define FEAT_PMUv3 ID_AA64DFR0_EL1, PMUVer, IMP
+#define FEAT_PMUv3p9 ID_AA64DFR0_EL1, PMUVer, V3P9
+#define FEAT_TRBE ID_AA64DFR0_EL1, TraceBuffer, IMP
+#define FEAT_TRBEv1p1 ID_AA64DFR0_EL1, TraceBuffer, TRBE_V1P1
+#define FEAT_DoubleLock ID_AA64DFR0_EL1, DoubleLock, IMP
+#define FEAT_TRF ID_AA64DFR0_EL1, TraceFilt, IMP
+#define FEAT_AA32EL0 ID_AA64PFR0_EL1, EL0, AARCH32
+#define FEAT_AA32EL1 ID_AA64PFR0_EL1, EL1, AARCH32
+#define FEAT_AA64EL1 ID_AA64PFR0_EL1, EL1, IMP
+#define FEAT_AA64EL3 ID_AA64PFR0_EL1, EL3, IMP
+#define FEAT_AIE ID_AA64MMFR3_EL1, AIE, IMP
+#define FEAT_S2POE ID_AA64MMFR3_EL1, S2POE, IMP
+#define FEAT_S1POE ID_AA64MMFR3_EL1, S1POE, IMP
+#define FEAT_S1PIE ID_AA64MMFR3_EL1, S1PIE, IMP
+#define FEAT_THE ID_AA64PFR1_EL1, THE, IMP
+#define FEAT_SME ID_AA64PFR1_EL1, SME, IMP
+#define FEAT_GCS ID_AA64PFR1_EL1, GCS, IMP
+#define FEAT_LS64 ID_AA64ISAR1_EL1, LS64, LS64
+#define FEAT_LS64_V ID_AA64ISAR1_EL1, LS64, LS64_V
+#define FEAT_LS64_ACCDATA ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA
+#define FEAT_RAS ID_AA64PFR0_EL1, RAS, IMP
+#define FEAT_RASv2 ID_AA64PFR0_EL1, RAS, V2
+#define FEAT_GICv3 ID_AA64PFR0_EL1, GIC, IMP
+#define FEAT_LOR ID_AA64MMFR1_EL1, LO, IMP
+#define FEAT_SPEv1p4 ID_AA64DFR0_EL1, PMSVer, V1P4
+#define FEAT_SPEv1p5 ID_AA64DFR0_EL1, PMSVer, V1P5
+#define FEAT_ATS1A ID_AA64ISAR2_EL1, ATS1A, IMP
+#define FEAT_SPECRES2 ID_AA64ISAR1_EL1, SPECRES, COSP_RCTX
+#define FEAT_SPECRES ID_AA64ISAR1_EL1, SPECRES, IMP
+#define FEAT_TLBIRANGE ID_AA64ISAR0_EL1, TLB, RANGE
+#define FEAT_TLBIOS ID_AA64ISAR0_EL1, TLB, OS
+#define FEAT_PAN2 ID_AA64MMFR1_EL1, PAN, PAN2
+#define FEAT_DPB2 ID_AA64ISAR1_EL1, DPB, DPB2
+#define FEAT_AMUv1 ID_AA64PFR0_EL1, AMU, IMP
+#define FEAT_AMUv1p1 ID_AA64PFR0_EL1, AMU, V1P1
+#define FEAT_CMOW ID_AA64MMFR1_EL1, CMOW, IMP
+#define FEAT_D128 ID_AA64MMFR3_EL1, D128, IMP
+#define FEAT_DoubleFault2 ID_AA64PFR1_EL1, DF2, IMP
+#define FEAT_FPMR ID_AA64PFR2_EL1, FPMR, IMP
+#define FEAT_MOPS ID_AA64ISAR2_EL1, MOPS, IMP
+#define FEAT_NMI ID_AA64PFR1_EL1, NMI, IMP
+#define FEAT_SCTLR2 ID_AA64MMFR3_EL1, SCTLRX, IMP
+#define FEAT_SYSREG128 ID_AA64ISAR2_EL1, SYSREG_128, IMP
+#define FEAT_TCR2 ID_AA64MMFR3_EL1, TCRX, IMP
+#define FEAT_XS ID_AA64ISAR1_EL1, XS, IMP
+#define FEAT_EVT ID_AA64MMFR2_EL1, EVT, IMP
+#define FEAT_EVT_TTLBxS ID_AA64MMFR2_EL1, EVT, TTLBxS
+#define FEAT_MTE2 ID_AA64PFR1_EL1, MTE, MTE2
+#define FEAT_RME ID_AA64PFR0_EL1, RME, IMP
+#define FEAT_MPAM ID_AA64PFR0_EL1, MPAM, 1
+#define FEAT_S2FWB ID_AA64MMFR2_EL1, FWB, IMP
+#define FEAT_TME ID_AA64ISAR0_EL1, TME, IMP
+#define FEAT_TWED ID_AA64MMFR1_EL1, TWED, IMP
+#define FEAT_E2H0 ID_AA64MMFR4_EL1, E2H0, IMP
+#define FEAT_SRMASK ID_AA64MMFR4_EL1, SRMASK, IMP
+#define FEAT_PoPS ID_AA64MMFR4_EL1, PoPS, IMP
+#define FEAT_PFAR ID_AA64PFR1_EL1, PFAR, IMP
+#define FEAT_Debugv8p9 ID_AA64DFR0_EL1, PMUVer, V3P9
+#define FEAT_PMUv3_SS ID_AA64DFR0_EL1, PMSS, IMP
+#define FEAT_SEBEP ID_AA64DFR0_EL1, SEBEP, IMP
+#define FEAT_EBEP ID_AA64DFR1_EL1, EBEP, IMP
+#define FEAT_ITE ID_AA64DFR1_EL1, ITE, IMP
+#define FEAT_PMUv3_ICNTR ID_AA64DFR1_EL1, PMICNTR, IMP
+#define FEAT_SPMU ID_AA64DFR1_EL1, SPMU, IMP
+#define FEAT_SPE_nVM ID_AA64DFR2_EL1, SPE_nVM, IMP
+#define FEAT_STEP2 ID_AA64DFR2_EL1, STEP, IMP
+
+static bool not_feat_aa64el3(struct kvm *kvm)
+{
+ return !kvm_has_feat(kvm, FEAT_AA64EL3);
+}
+
+static bool feat_nv2(struct kvm *kvm)
+{
+ return ((kvm_has_feat(kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY) &&
+ kvm_has_feat_enum(kvm, ID_AA64MMFR2_EL1, NV, NI)) ||
+ kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, NV2));
+}
+
+static bool feat_nv2_e2h0_ni(struct kvm *kvm)
+{
+ return feat_nv2(kvm) && !kvm_has_feat(kvm, FEAT_E2H0);
+}
+
+static bool feat_rasv1p1(struct kvm *kvm)
+{
+ return (kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, V1P1) ||
+ (kvm_has_feat_enum(kvm, ID_AA64PFR0_EL1, RAS, IMP) &&
+ kvm_has_feat(kvm, ID_AA64PFR1_EL1, RAS_frac, RASv1p1)));
+}
+
+static bool feat_csv2_2_csv2_1p2(struct kvm *kvm)
+{
+ return (kvm_has_feat(kvm, ID_AA64PFR0_EL1, CSV2, CSV2_2) ||
+ (kvm_has_feat(kvm, ID_AA64PFR1_EL1, CSV2_frac, CSV2_1p2) &&
+ kvm_has_feat_enum(kvm, ID_AA64PFR0_EL1, CSV2, IMP)));
+}
+
+static bool feat_pauth(struct kvm *kvm)
+{
+ return kvm_has_pauth(kvm, PAuth);
+}
+
+static bool feat_pauth_lr(struct kvm *kvm)
+{
+ return kvm_has_pauth(kvm, PAuth_LR);
+}
+
+static bool feat_aderr(struct kvm *kvm)
+{
+ return (kvm_has_feat(kvm, ID_AA64MMFR3_EL1, ADERR, FEAT_ADERR) &&
+ kvm_has_feat(kvm, ID_AA64MMFR3_EL1, SDERR, FEAT_ADERR));
+}
+
+static bool feat_anerr(struct kvm *kvm)
+{
+ return (kvm_has_feat(kvm, ID_AA64MMFR3_EL1, ANERR, FEAT_ANERR) &&
+ kvm_has_feat(kvm, ID_AA64MMFR3_EL1, SNERR, FEAT_ANERR));
+}
+
+static bool feat_sme_smps(struct kvm *kvm)
+{
+ /*
+ * Revists this if KVM ever supports SME -- this really should
+ * look at the guest's view of SMIDR_EL1. Funnily enough, this
+ * is not captured in the JSON file, but only as a note in the
+ * ARM ARM.
+ */
+ return (kvm_has_feat(kvm, FEAT_SME) &&
+ (read_sysreg_s(SYS_SMIDR_EL1) & SMIDR_EL1_SMPS));
+}
+
+static bool feat_spe_fds(struct kvm *kvm)
+{
+ /*
+ * Revists this if KVM ever supports SPE -- this really should
+ * look at the guest's view of PMSIDR_EL1.
+ */
+ return (kvm_has_feat(kvm, FEAT_SPEv1p4) &&
+ (read_sysreg_s(SYS_PMSIDR_EL1) & PMSIDR_EL1_FDS));
+}
+
+static bool feat_trbe_mpam(struct kvm *kvm)
+{
+ /*
+ * Revists this if KVM ever supports both MPAM and TRBE --
+ * this really should look at the guest's view of TRBIDR_EL1.
+ */
+ return (kvm_has_feat(kvm, FEAT_TRBE) &&
+ kvm_has_feat(kvm, FEAT_MPAM) &&
+ (read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_MPAM));
+}
+
+static bool feat_ebep_pmuv3_ss(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, FEAT_EBEP) || kvm_has_feat(kvm, FEAT_PMUv3_SS);
+}
+
+static bool compute_hcr_rw(struct kvm *kvm, u64 *bits)
+{
+ /* This is purely academic: AArch32 and NV are mutually exclusive */
+ if (bits) {
+ if (kvm_has_feat(kvm, FEAT_AA32EL1))
+ *bits &= ~HCR_EL2_RW;
+ else
+ *bits |= HCR_EL2_RW;
+ }
+
+ return true;
+}
+
+static bool compute_hcr_e2h(struct kvm *kvm, u64 *bits)
+{
+ if (bits) {
+ if (kvm_has_feat(kvm, FEAT_E2H0))
+ *bits &= ~HCR_EL2_E2H;
+ else
+ *bits |= HCR_EL2_E2H;
+ }
+
+ return true;
+}
+
+static const struct reg_bits_to_feat_map hfgrtr_feat_map[] = {
+ NEEDS_FEAT(HFGRTR_EL2_nAMAIR2_EL1 |
+ HFGRTR_EL2_nMAIR2_EL1,
+ FEAT_AIE),
+ NEEDS_FEAT(HFGRTR_EL2_nS2POR_EL1, FEAT_S2POE),
+ NEEDS_FEAT(HFGRTR_EL2_nPOR_EL1 |
+ HFGRTR_EL2_nPOR_EL0,
+ FEAT_S1POE),
+ NEEDS_FEAT(HFGRTR_EL2_nPIR_EL1 |
+ HFGRTR_EL2_nPIRE0_EL1,
+ FEAT_S1PIE),
+ NEEDS_FEAT(HFGRTR_EL2_nRCWMASK_EL1, FEAT_THE),
+ NEEDS_FEAT(HFGRTR_EL2_nTPIDR2_EL0 |
+ HFGRTR_EL2_nSMPRI_EL1,
+ FEAT_SME),
+ NEEDS_FEAT(HFGRTR_EL2_nGCS_EL1 |
+ HFGRTR_EL2_nGCS_EL0,
+ FEAT_GCS),
+ NEEDS_FEAT(HFGRTR_EL2_nACCDATA_EL1, FEAT_LS64_ACCDATA),
+ NEEDS_FEAT(HFGRTR_EL2_ERXADDR_EL1 |
+ HFGRTR_EL2_ERXMISCn_EL1 |
+ HFGRTR_EL2_ERXSTATUS_EL1 |
+ HFGRTR_EL2_ERXCTLR_EL1 |
+ HFGRTR_EL2_ERXFR_EL1 |
+ HFGRTR_EL2_ERRSELR_EL1 |
+ HFGRTR_EL2_ERRIDR_EL1,
+ FEAT_RAS),
+ NEEDS_FEAT(HFGRTR_EL2_ERXPFGCDN_EL1 |
+ HFGRTR_EL2_ERXPFGCTL_EL1 |
+ HFGRTR_EL2_ERXPFGF_EL1,
+ feat_rasv1p1),
+ NEEDS_FEAT(HFGRTR_EL2_ICC_IGRPENn_EL1, FEAT_GICv3),
+ NEEDS_FEAT(HFGRTR_EL2_SCXTNUM_EL0 |
+ HFGRTR_EL2_SCXTNUM_EL1,
+ feat_csv2_2_csv2_1p2),
+ NEEDS_FEAT(HFGRTR_EL2_LORSA_EL1 |
+ HFGRTR_EL2_LORN_EL1 |
+ HFGRTR_EL2_LORID_EL1 |
+ HFGRTR_EL2_LOREA_EL1 |
+ HFGRTR_EL2_LORC_EL1,
+ FEAT_LOR),
+ NEEDS_FEAT(HFGRTR_EL2_APIBKey |
+ HFGRTR_EL2_APIAKey |
+ HFGRTR_EL2_APGAKey |
+ HFGRTR_EL2_APDBKey |
+ HFGRTR_EL2_APDAKey,
+ feat_pauth),
+ NEEDS_FEAT_FLAG(HFGRTR_EL2_VBAR_EL1 |
+ HFGRTR_EL2_TTBR1_EL1 |
+ HFGRTR_EL2_TTBR0_EL1 |
+ HFGRTR_EL2_TPIDR_EL0 |
+ HFGRTR_EL2_TPIDRRO_EL0 |
+ HFGRTR_EL2_TPIDR_EL1 |
+ HFGRTR_EL2_TCR_EL1 |
+ HFGRTR_EL2_SCTLR_EL1 |
+ HFGRTR_EL2_REVIDR_EL1 |
+ HFGRTR_EL2_PAR_EL1 |
+ HFGRTR_EL2_MPIDR_EL1 |
+ HFGRTR_EL2_MIDR_EL1 |
+ HFGRTR_EL2_MAIR_EL1 |
+ HFGRTR_EL2_ISR_EL1 |
+ HFGRTR_EL2_FAR_EL1 |
+ HFGRTR_EL2_ESR_EL1 |
+ HFGRTR_EL2_DCZID_EL0 |
+ HFGRTR_EL2_CTR_EL0 |
+ HFGRTR_EL2_CSSELR_EL1 |
+ HFGRTR_EL2_CPACR_EL1 |
+ HFGRTR_EL2_CONTEXTIDR_EL1|
+ HFGRTR_EL2_CLIDR_EL1 |
+ HFGRTR_EL2_CCSIDR_EL1 |
+ HFGRTR_EL2_AMAIR_EL1 |
+ HFGRTR_EL2_AIDR_EL1 |
+ HFGRTR_EL2_AFSR1_EL1 |
+ HFGRTR_EL2_AFSR0_EL1,
+ NEVER_FGU, FEAT_AA64EL1),
+};
+
+static const struct reg_bits_to_feat_map hfgwtr_feat_map[] = {
+ NEEDS_FEAT(HFGWTR_EL2_nAMAIR2_EL1 |
+ HFGWTR_EL2_nMAIR2_EL1,
+ FEAT_AIE),
+ NEEDS_FEAT(HFGWTR_EL2_nS2POR_EL1, FEAT_S2POE),
+ NEEDS_FEAT(HFGWTR_EL2_nPOR_EL1 |
+ HFGWTR_EL2_nPOR_EL0,
+ FEAT_S1POE),
+ NEEDS_FEAT(HFGWTR_EL2_nPIR_EL1 |
+ HFGWTR_EL2_nPIRE0_EL1,
+ FEAT_S1PIE),
+ NEEDS_FEAT(HFGWTR_EL2_nRCWMASK_EL1, FEAT_THE),
+ NEEDS_FEAT(HFGWTR_EL2_nTPIDR2_EL0 |
+ HFGWTR_EL2_nSMPRI_EL1,
+ FEAT_SME),
+ NEEDS_FEAT(HFGWTR_EL2_nGCS_EL1 |
+ HFGWTR_EL2_nGCS_EL0,
+ FEAT_GCS),
+ NEEDS_FEAT(HFGWTR_EL2_nACCDATA_EL1, FEAT_LS64_ACCDATA),
+ NEEDS_FEAT(HFGWTR_EL2_ERXADDR_EL1 |
+ HFGWTR_EL2_ERXMISCn_EL1 |
+ HFGWTR_EL2_ERXSTATUS_EL1 |
+ HFGWTR_EL2_ERXCTLR_EL1 |
+ HFGWTR_EL2_ERRSELR_EL1,
+ FEAT_RAS),
+ NEEDS_FEAT(HFGWTR_EL2_ERXPFGCDN_EL1 |
+ HFGWTR_EL2_ERXPFGCTL_EL1,
+ feat_rasv1p1),
+ NEEDS_FEAT(HFGWTR_EL2_ICC_IGRPENn_EL1, FEAT_GICv3),
+ NEEDS_FEAT(HFGWTR_EL2_SCXTNUM_EL0 |
+ HFGWTR_EL2_SCXTNUM_EL1,
+ feat_csv2_2_csv2_1p2),
+ NEEDS_FEAT(HFGWTR_EL2_LORSA_EL1 |
+ HFGWTR_EL2_LORN_EL1 |
+ HFGWTR_EL2_LOREA_EL1 |
+ HFGWTR_EL2_LORC_EL1,
+ FEAT_LOR),
+ NEEDS_FEAT(HFGWTR_EL2_APIBKey |
+ HFGWTR_EL2_APIAKey |
+ HFGWTR_EL2_APGAKey |
+ HFGWTR_EL2_APDBKey |
+ HFGWTR_EL2_APDAKey,
+ feat_pauth),
+ NEEDS_FEAT_FLAG(HFGWTR_EL2_VBAR_EL1 |
+ HFGWTR_EL2_TTBR1_EL1 |
+ HFGWTR_EL2_TTBR0_EL1 |
+ HFGWTR_EL2_TPIDR_EL0 |
+ HFGWTR_EL2_TPIDRRO_EL0 |
+ HFGWTR_EL2_TPIDR_EL1 |
+ HFGWTR_EL2_TCR_EL1 |
+ HFGWTR_EL2_SCTLR_EL1 |
+ HFGWTR_EL2_PAR_EL1 |
+ HFGWTR_EL2_MAIR_EL1 |
+ HFGWTR_EL2_FAR_EL1 |
+ HFGWTR_EL2_ESR_EL1 |
+ HFGWTR_EL2_CSSELR_EL1 |
+ HFGWTR_EL2_CPACR_EL1 |
+ HFGWTR_EL2_CONTEXTIDR_EL1|
+ HFGWTR_EL2_AMAIR_EL1 |
+ HFGWTR_EL2_AFSR1_EL1 |
+ HFGWTR_EL2_AFSR0_EL1,
+ NEVER_FGU, FEAT_AA64EL1),
+};
+
+static const struct reg_bits_to_feat_map hdfgrtr_feat_map[] = {
+ NEEDS_FEAT(HDFGRTR_EL2_PMBIDR_EL1 |
+ HDFGRTR_EL2_PMSLATFR_EL1 |
+ HDFGRTR_EL2_PMSIRR_EL1 |
+ HDFGRTR_EL2_PMSIDR_EL1 |
+ HDFGRTR_EL2_PMSICR_EL1 |
+ HDFGRTR_EL2_PMSFCR_EL1 |
+ HDFGRTR_EL2_PMSEVFR_EL1 |
+ HDFGRTR_EL2_PMSCR_EL1 |
+ HDFGRTR_EL2_PMBSR_EL1 |
+ HDFGRTR_EL2_PMBPTR_EL1 |
+ HDFGRTR_EL2_PMBLIMITR_EL1,
+ FEAT_SPE),
+ NEEDS_FEAT(HDFGRTR_EL2_nPMSNEVFR_EL1, FEAT_SPE_FnE),
+ NEEDS_FEAT(HDFGRTR_EL2_nBRBDATA |
+ HDFGRTR_EL2_nBRBCTL |
+ HDFGRTR_EL2_nBRBIDR,
+ FEAT_BRBE),
+ NEEDS_FEAT(HDFGRTR_EL2_TRCVICTLR |
+ HDFGRTR_EL2_TRCSTATR |
+ HDFGRTR_EL2_TRCSSCSRn |
+ HDFGRTR_EL2_TRCSEQSTR |
+ HDFGRTR_EL2_TRCPRGCTLR |
+ HDFGRTR_EL2_TRCOSLSR |
+ HDFGRTR_EL2_TRCIMSPECn |
+ HDFGRTR_EL2_TRCID |
+ HDFGRTR_EL2_TRCCNTVRn |
+ HDFGRTR_EL2_TRCCLAIM |
+ HDFGRTR_EL2_TRCAUXCTLR |
+ HDFGRTR_EL2_TRCAUTHSTATUS |
+ HDFGRTR_EL2_TRC,
+ FEAT_TRC_SR),
+ NEEDS_FEAT(HDFGRTR_EL2_PMCEIDn_EL0 |
+ HDFGRTR_EL2_PMUSERENR_EL0 |
+ HDFGRTR_EL2_PMMIR_EL1 |
+ HDFGRTR_EL2_PMSELR_EL0 |
+ HDFGRTR_EL2_PMOVS |
+ HDFGRTR_EL2_PMINTEN |
+ HDFGRTR_EL2_PMCNTEN |
+ HDFGRTR_EL2_PMCCNTR_EL0 |
+ HDFGRTR_EL2_PMCCFILTR_EL0 |
+ HDFGRTR_EL2_PMEVTYPERn_EL0 |
+ HDFGRTR_EL2_PMEVCNTRn_EL0,
+ FEAT_PMUv3),
+ NEEDS_FEAT(HDFGRTR_EL2_TRBTRG_EL1 |
+ HDFGRTR_EL2_TRBSR_EL1 |
+ HDFGRTR_EL2_TRBPTR_EL1 |
+ HDFGRTR_EL2_TRBMAR_EL1 |
+ HDFGRTR_EL2_TRBLIMITR_EL1 |
+ HDFGRTR_EL2_TRBIDR_EL1 |
+ HDFGRTR_EL2_TRBBASER_EL1,
+ FEAT_TRBE),
+ NEEDS_FEAT_FLAG(HDFGRTR_EL2_OSDLR_EL1, NEVER_FGU,
+ FEAT_DoubleLock),
+ NEEDS_FEAT_FLAG(HDFGRTR_EL2_OSECCR_EL1 |
+ HDFGRTR_EL2_OSLSR_EL1 |
+ HDFGRTR_EL2_DBGPRCR_EL1 |
+ HDFGRTR_EL2_DBGAUTHSTATUS_EL1|
+ HDFGRTR_EL2_DBGCLAIM |
+ HDFGRTR_EL2_MDSCR_EL1 |
+ HDFGRTR_EL2_DBGWVRn_EL1 |
+ HDFGRTR_EL2_DBGWCRn_EL1 |
+ HDFGRTR_EL2_DBGBVRn_EL1 |
+ HDFGRTR_EL2_DBGBCRn_EL1,
+ NEVER_FGU, FEAT_AA64EL1)
+};
+
+static const struct reg_bits_to_feat_map hdfgwtr_feat_map[] = {
+ NEEDS_FEAT(HDFGWTR_EL2_PMSLATFR_EL1 |
+ HDFGWTR_EL2_PMSIRR_EL1 |
+ HDFGWTR_EL2_PMSICR_EL1 |
+ HDFGWTR_EL2_PMSFCR_EL1 |
+ HDFGWTR_EL2_PMSEVFR_EL1 |
+ HDFGWTR_EL2_PMSCR_EL1 |
+ HDFGWTR_EL2_PMBSR_EL1 |
+ HDFGWTR_EL2_PMBPTR_EL1 |
+ HDFGWTR_EL2_PMBLIMITR_EL1,
+ FEAT_SPE),
+ NEEDS_FEAT(HDFGWTR_EL2_nPMSNEVFR_EL1, FEAT_SPE_FnE),
+ NEEDS_FEAT(HDFGWTR_EL2_nBRBDATA |
+ HDFGWTR_EL2_nBRBCTL,
+ FEAT_BRBE),
+ NEEDS_FEAT(HDFGWTR_EL2_TRCVICTLR |
+ HDFGWTR_EL2_TRCSSCSRn |
+ HDFGWTR_EL2_TRCSEQSTR |
+ HDFGWTR_EL2_TRCPRGCTLR |
+ HDFGWTR_EL2_TRCOSLAR |
+ HDFGWTR_EL2_TRCIMSPECn |
+ HDFGWTR_EL2_TRCCNTVRn |
+ HDFGWTR_EL2_TRCCLAIM |
+ HDFGWTR_EL2_TRCAUXCTLR |
+ HDFGWTR_EL2_TRC,
+ FEAT_TRC_SR),
+ NEEDS_FEAT(HDFGWTR_EL2_PMUSERENR_EL0 |
+ HDFGWTR_EL2_PMCR_EL0 |
+ HDFGWTR_EL2_PMSWINC_EL0 |
+ HDFGWTR_EL2_PMSELR_EL0 |
+ HDFGWTR_EL2_PMOVS |
+ HDFGWTR_EL2_PMINTEN |
+ HDFGWTR_EL2_PMCNTEN |
+ HDFGWTR_EL2_PMCCNTR_EL0 |
+ HDFGWTR_EL2_PMCCFILTR_EL0 |
+ HDFGWTR_EL2_PMEVTYPERn_EL0 |
+ HDFGWTR_EL2_PMEVCNTRn_EL0,
+ FEAT_PMUv3),
+ NEEDS_FEAT(HDFGWTR_EL2_TRBTRG_EL1 |
+ HDFGWTR_EL2_TRBSR_EL1 |
+ HDFGWTR_EL2_TRBPTR_EL1 |
+ HDFGWTR_EL2_TRBMAR_EL1 |
+ HDFGWTR_EL2_TRBLIMITR_EL1 |
+ HDFGWTR_EL2_TRBBASER_EL1,
+ FEAT_TRBE),
+ NEEDS_FEAT_FLAG(HDFGWTR_EL2_OSDLR_EL1,
+ NEVER_FGU, FEAT_DoubleLock),
+ NEEDS_FEAT_FLAG(HDFGWTR_EL2_OSECCR_EL1 |
+ HDFGWTR_EL2_OSLAR_EL1 |
+ HDFGWTR_EL2_DBGPRCR_EL1 |
+ HDFGWTR_EL2_DBGCLAIM |
+ HDFGWTR_EL2_MDSCR_EL1 |
+ HDFGWTR_EL2_DBGWVRn_EL1 |
+ HDFGWTR_EL2_DBGWCRn_EL1 |
+ HDFGWTR_EL2_DBGBVRn_EL1 |
+ HDFGWTR_EL2_DBGBCRn_EL1,
+ NEVER_FGU, FEAT_AA64EL1),
+ NEEDS_FEAT(HDFGWTR_EL2_TRFCR_EL1, FEAT_TRF),
+};
+
+
+static const struct reg_bits_to_feat_map hfgitr_feat_map[] = {
+ NEEDS_FEAT(HFGITR_EL2_PSBCSYNC, FEAT_SPEv1p5),
+ NEEDS_FEAT(HFGITR_EL2_ATS1E1A, FEAT_ATS1A),
+ NEEDS_FEAT(HFGITR_EL2_COSPRCTX, FEAT_SPECRES2),
+ NEEDS_FEAT(HFGITR_EL2_nGCSEPP |
+ HFGITR_EL2_nGCSSTR_EL1 |
+ HFGITR_EL2_nGCSPUSHM_EL1,
+ FEAT_GCS),
+ NEEDS_FEAT(HFGITR_EL2_nBRBIALL |
+ HFGITR_EL2_nBRBINJ,
+ FEAT_BRBE),
+ NEEDS_FEAT(HFGITR_EL2_CPPRCTX |
+ HFGITR_EL2_DVPRCTX |
+ HFGITR_EL2_CFPRCTX,
+ FEAT_SPECRES),
+ NEEDS_FEAT(HFGITR_EL2_TLBIRVAALE1 |
+ HFGITR_EL2_TLBIRVALE1 |
+ HFGITR_EL2_TLBIRVAAE1 |
+ HFGITR_EL2_TLBIRVAE1 |
+ HFGITR_EL2_TLBIRVAALE1IS |
+ HFGITR_EL2_TLBIRVALE1IS |
+ HFGITR_EL2_TLBIRVAAE1IS |
+ HFGITR_EL2_TLBIRVAE1IS |
+ HFGITR_EL2_TLBIRVAALE1OS |
+ HFGITR_EL2_TLBIRVALE1OS |
+ HFGITR_EL2_TLBIRVAAE1OS |
+ HFGITR_EL2_TLBIRVAE1OS,
+ FEAT_TLBIRANGE),
+ NEEDS_FEAT(HFGITR_EL2_TLBIVAALE1OS |
+ HFGITR_EL2_TLBIVALE1OS |
+ HFGITR_EL2_TLBIVAAE1OS |
+ HFGITR_EL2_TLBIASIDE1OS |
+ HFGITR_EL2_TLBIVAE1OS |
+ HFGITR_EL2_TLBIVMALLE1OS,
+ FEAT_TLBIOS),
+ NEEDS_FEAT(HFGITR_EL2_ATS1E1WP |
+ HFGITR_EL2_ATS1E1RP,
+ FEAT_PAN2),
+ NEEDS_FEAT(HFGITR_EL2_DCCVADP, FEAT_DPB2),
+ NEEDS_FEAT_FLAG(HFGITR_EL2_DCCVAC |
+ HFGITR_EL2_SVC_EL1 |
+ HFGITR_EL2_SVC_EL0 |
+ HFGITR_EL2_ERET |
+ HFGITR_EL2_TLBIVAALE1 |
+ HFGITR_EL2_TLBIVALE1 |
+ HFGITR_EL2_TLBIVAAE1 |
+ HFGITR_EL2_TLBIASIDE1 |
+ HFGITR_EL2_TLBIVAE1 |
+ HFGITR_EL2_TLBIVMALLE1 |
+ HFGITR_EL2_TLBIVAALE1IS |
+ HFGITR_EL2_TLBIVALE1IS |
+ HFGITR_EL2_TLBIVAAE1IS |
+ HFGITR_EL2_TLBIASIDE1IS |
+ HFGITR_EL2_TLBIVAE1IS |
+ HFGITR_EL2_TLBIVMALLE1IS|
+ HFGITR_EL2_ATS1E0W |
+ HFGITR_EL2_ATS1E0R |
+ HFGITR_EL2_ATS1E1W |
+ HFGITR_EL2_ATS1E1R |
+ HFGITR_EL2_DCZVA |
+ HFGITR_EL2_DCCIVAC |
+ HFGITR_EL2_DCCVAP |
+ HFGITR_EL2_DCCVAU |
+ HFGITR_EL2_DCCISW |
+ HFGITR_EL2_DCCSW |
+ HFGITR_EL2_DCISW |
+ HFGITR_EL2_DCIVAC |
+ HFGITR_EL2_ICIVAU |
+ HFGITR_EL2_ICIALLU |
+ HFGITR_EL2_ICIALLUIS,
+ NEVER_FGU, FEAT_AA64EL1),
+};
+
+static const struct reg_bits_to_feat_map hafgrtr_feat_map[] = {
+ NEEDS_FEAT(HAFGRTR_EL2_AMEVTYPER115_EL0 |
+ HAFGRTR_EL2_AMEVTYPER114_EL0 |
+ HAFGRTR_EL2_AMEVTYPER113_EL0 |
+ HAFGRTR_EL2_AMEVTYPER112_EL0 |
+ HAFGRTR_EL2_AMEVTYPER111_EL0 |
+ HAFGRTR_EL2_AMEVTYPER110_EL0 |
+ HAFGRTR_EL2_AMEVTYPER19_EL0 |
+ HAFGRTR_EL2_AMEVTYPER18_EL0 |
+ HAFGRTR_EL2_AMEVTYPER17_EL0 |
+ HAFGRTR_EL2_AMEVTYPER16_EL0 |
+ HAFGRTR_EL2_AMEVTYPER15_EL0 |
+ HAFGRTR_EL2_AMEVTYPER14_EL0 |
+ HAFGRTR_EL2_AMEVTYPER13_EL0 |
+ HAFGRTR_EL2_AMEVTYPER12_EL0 |
+ HAFGRTR_EL2_AMEVTYPER11_EL0 |
+ HAFGRTR_EL2_AMEVTYPER10_EL0 |
+ HAFGRTR_EL2_AMEVCNTR115_EL0 |
+ HAFGRTR_EL2_AMEVCNTR114_EL0 |
+ HAFGRTR_EL2_AMEVCNTR113_EL0 |
+ HAFGRTR_EL2_AMEVCNTR112_EL0 |
+ HAFGRTR_EL2_AMEVCNTR111_EL0 |
+ HAFGRTR_EL2_AMEVCNTR110_EL0 |
+ HAFGRTR_EL2_AMEVCNTR19_EL0 |
+ HAFGRTR_EL2_AMEVCNTR18_EL0 |
+ HAFGRTR_EL2_AMEVCNTR17_EL0 |
+ HAFGRTR_EL2_AMEVCNTR16_EL0 |
+ HAFGRTR_EL2_AMEVCNTR15_EL0 |
+ HAFGRTR_EL2_AMEVCNTR14_EL0 |
+ HAFGRTR_EL2_AMEVCNTR13_EL0 |
+ HAFGRTR_EL2_AMEVCNTR12_EL0 |
+ HAFGRTR_EL2_AMEVCNTR11_EL0 |
+ HAFGRTR_EL2_AMEVCNTR10_EL0 |
+ HAFGRTR_EL2_AMCNTEN1 |
+ HAFGRTR_EL2_AMCNTEN0 |
+ HAFGRTR_EL2_AMEVCNTR03_EL0 |
+ HAFGRTR_EL2_AMEVCNTR02_EL0 |
+ HAFGRTR_EL2_AMEVCNTR01_EL0 |
+ HAFGRTR_EL2_AMEVCNTR00_EL0,
+ FEAT_AMUv1),
+};
+
+static const struct reg_bits_to_feat_map hfgitr2_feat_map[] = {
+ NEEDS_FEAT(HFGITR2_EL2_nDCCIVAPS, FEAT_PoPS),
+ NEEDS_FEAT(HFGITR2_EL2_TSBCSYNC, FEAT_TRBEv1p1)
+};
+
+static const struct reg_bits_to_feat_map hfgrtr2_feat_map[] = {
+ NEEDS_FEAT(HFGRTR2_EL2_nPFAR_EL1, FEAT_PFAR),
+ NEEDS_FEAT(HFGRTR2_EL2_nERXGSR_EL1, FEAT_RASv2),
+ NEEDS_FEAT(HFGRTR2_EL2_nACTLRALIAS_EL1 |
+ HFGRTR2_EL2_nACTLRMASK_EL1 |
+ HFGRTR2_EL2_nCPACRALIAS_EL1 |
+ HFGRTR2_EL2_nCPACRMASK_EL1 |
+ HFGRTR2_EL2_nSCTLR2MASK_EL1 |
+ HFGRTR2_EL2_nSCTLRALIAS2_EL1 |
+ HFGRTR2_EL2_nSCTLRALIAS_EL1 |
+ HFGRTR2_EL2_nSCTLRMASK_EL1 |
+ HFGRTR2_EL2_nTCR2ALIAS_EL1 |
+ HFGRTR2_EL2_nTCR2MASK_EL1 |
+ HFGRTR2_EL2_nTCRALIAS_EL1 |
+ HFGRTR2_EL2_nTCRMASK_EL1,
+ FEAT_SRMASK),
+ NEEDS_FEAT(HFGRTR2_EL2_nRCWSMASK_EL1, FEAT_THE),
+};
+
+static const struct reg_bits_to_feat_map hfgwtr2_feat_map[] = {
+ NEEDS_FEAT(HFGWTR2_EL2_nPFAR_EL1, FEAT_PFAR),
+ NEEDS_FEAT(HFGWTR2_EL2_nACTLRALIAS_EL1 |
+ HFGWTR2_EL2_nACTLRMASK_EL1 |
+ HFGWTR2_EL2_nCPACRALIAS_EL1 |
+ HFGWTR2_EL2_nCPACRMASK_EL1 |
+ HFGWTR2_EL2_nSCTLR2MASK_EL1 |
+ HFGWTR2_EL2_nSCTLRALIAS2_EL1 |
+ HFGWTR2_EL2_nSCTLRALIAS_EL1 |
+ HFGWTR2_EL2_nSCTLRMASK_EL1 |
+ HFGWTR2_EL2_nTCR2ALIAS_EL1 |
+ HFGWTR2_EL2_nTCR2MASK_EL1 |
+ HFGWTR2_EL2_nTCRALIAS_EL1 |
+ HFGWTR2_EL2_nTCRMASK_EL1,
+ FEAT_SRMASK),
+ NEEDS_FEAT(HFGWTR2_EL2_nRCWSMASK_EL1, FEAT_THE),
+};
+
+static const struct reg_bits_to_feat_map hdfgrtr2_feat_map[] = {
+ NEEDS_FEAT(HDFGRTR2_EL2_nMDSELR_EL1, FEAT_Debugv8p9),
+ NEEDS_FEAT(HDFGRTR2_EL2_nPMECR_EL1, feat_ebep_pmuv3_ss),
+ NEEDS_FEAT(HDFGRTR2_EL2_nTRCITECR_EL1, FEAT_ITE),
+ NEEDS_FEAT(HDFGRTR2_EL2_nPMICFILTR_EL0 |
+ HDFGRTR2_EL2_nPMICNTR_EL0,
+ FEAT_PMUv3_ICNTR),
+ NEEDS_FEAT(HDFGRTR2_EL2_nPMUACR_EL1, FEAT_PMUv3p9),
+ NEEDS_FEAT(HDFGRTR2_EL2_nPMSSCR_EL1 |
+ HDFGRTR2_EL2_nPMSSDATA,
+ FEAT_PMUv3_SS),
+ NEEDS_FEAT(HDFGRTR2_EL2_nPMIAR_EL1, FEAT_SEBEP),
+ NEEDS_FEAT(HDFGRTR2_EL2_nPMSDSFR_EL1, feat_spe_fds),
+ NEEDS_FEAT(HDFGRTR2_EL2_nPMBMAR_EL1, FEAT_SPE_nVM),
+ NEEDS_FEAT(HDFGRTR2_EL2_nSPMACCESSR_EL1 |
+ HDFGRTR2_EL2_nSPMCNTEN |
+ HDFGRTR2_EL2_nSPMCR_EL0 |
+ HDFGRTR2_EL2_nSPMDEVAFF_EL1 |
+ HDFGRTR2_EL2_nSPMEVCNTRn_EL0 |
+ HDFGRTR2_EL2_nSPMEVTYPERn_EL0|
+ HDFGRTR2_EL2_nSPMID |
+ HDFGRTR2_EL2_nSPMINTEN |
+ HDFGRTR2_EL2_nSPMOVS |
+ HDFGRTR2_EL2_nSPMSCR_EL1 |
+ HDFGRTR2_EL2_nSPMSELR_EL0,
+ FEAT_SPMU),
+ NEEDS_FEAT(HDFGRTR2_EL2_nMDSTEPOP_EL1, FEAT_STEP2),
+ NEEDS_FEAT(HDFGRTR2_EL2_nTRBMPAM_EL1, feat_trbe_mpam),
+};
+
+static const struct reg_bits_to_feat_map hdfgwtr2_feat_map[] = {
+ NEEDS_FEAT(HDFGWTR2_EL2_nMDSELR_EL1, FEAT_Debugv8p9),
+ NEEDS_FEAT(HDFGWTR2_EL2_nPMECR_EL1, feat_ebep_pmuv3_ss),
+ NEEDS_FEAT(HDFGWTR2_EL2_nTRCITECR_EL1, FEAT_ITE),
+ NEEDS_FEAT(HDFGWTR2_EL2_nPMICFILTR_EL0 |
+ HDFGWTR2_EL2_nPMICNTR_EL0,
+ FEAT_PMUv3_ICNTR),
+ NEEDS_FEAT(HDFGWTR2_EL2_nPMUACR_EL1 |
+ HDFGWTR2_EL2_nPMZR_EL0,
+ FEAT_PMUv3p9),
+ NEEDS_FEAT(HDFGWTR2_EL2_nPMSSCR_EL1, FEAT_PMUv3_SS),
+ NEEDS_FEAT(HDFGWTR2_EL2_nPMIAR_EL1, FEAT_SEBEP),
+ NEEDS_FEAT(HDFGWTR2_EL2_nPMSDSFR_EL1, feat_spe_fds),
+ NEEDS_FEAT(HDFGWTR2_EL2_nPMBMAR_EL1, FEAT_SPE_nVM),
+ NEEDS_FEAT(HDFGWTR2_EL2_nSPMACCESSR_EL1 |
+ HDFGWTR2_EL2_nSPMCNTEN |
+ HDFGWTR2_EL2_nSPMCR_EL0 |
+ HDFGWTR2_EL2_nSPMEVCNTRn_EL0 |
+ HDFGWTR2_EL2_nSPMEVTYPERn_EL0|
+ HDFGWTR2_EL2_nSPMINTEN |
+ HDFGWTR2_EL2_nSPMOVS |
+ HDFGWTR2_EL2_nSPMSCR_EL1 |
+ HDFGWTR2_EL2_nSPMSELR_EL0,
+ FEAT_SPMU),
+ NEEDS_FEAT(HDFGWTR2_EL2_nMDSTEPOP_EL1, FEAT_STEP2),
+ NEEDS_FEAT(HDFGWTR2_EL2_nTRBMPAM_EL1, feat_trbe_mpam),
+};
+
+static const struct reg_bits_to_feat_map hcrx_feat_map[] = {
+ NEEDS_FEAT(HCRX_EL2_PACMEn, feat_pauth_lr),
+ NEEDS_FEAT(HCRX_EL2_EnFPM, FEAT_FPMR),
+ NEEDS_FEAT(HCRX_EL2_GCSEn, FEAT_GCS),
+ NEEDS_FEAT(HCRX_EL2_EnIDCP128, FEAT_SYSREG128),
+ NEEDS_FEAT(HCRX_EL2_EnSDERR, feat_aderr),
+ NEEDS_FEAT(HCRX_EL2_TMEA, FEAT_DoubleFault2),
+ NEEDS_FEAT(HCRX_EL2_EnSNERR, feat_anerr),
+ NEEDS_FEAT(HCRX_EL2_D128En, FEAT_D128),
+ NEEDS_FEAT(HCRX_EL2_PTTWI, FEAT_THE),
+ NEEDS_FEAT(HCRX_EL2_SCTLR2En, FEAT_SCTLR2),
+ NEEDS_FEAT(HCRX_EL2_TCR2En, FEAT_TCR2),
+ NEEDS_FEAT(HCRX_EL2_MSCEn |
+ HCRX_EL2_MCE2,
+ FEAT_MOPS),
+ NEEDS_FEAT(HCRX_EL2_CMOW, FEAT_CMOW),
+ NEEDS_FEAT(HCRX_EL2_VFNMI |
+ HCRX_EL2_VINMI |
+ HCRX_EL2_TALLINT,
+ FEAT_NMI),
+ NEEDS_FEAT(HCRX_EL2_SMPME, feat_sme_smps),
+ NEEDS_FEAT(HCRX_EL2_FGTnXS |
+ HCRX_EL2_FnXS,
+ FEAT_XS),
+ NEEDS_FEAT(HCRX_EL2_EnASR, FEAT_LS64_V),
+ NEEDS_FEAT(HCRX_EL2_EnALS, FEAT_LS64),
+ NEEDS_FEAT(HCRX_EL2_EnAS0, FEAT_LS64_ACCDATA),
+};
+
+static const struct reg_bits_to_feat_map hcr_feat_map[] = {
+ NEEDS_FEAT(HCR_EL2_TID0, FEAT_AA32EL0),
+ NEEDS_FEAT_FIXED(HCR_EL2_RW, compute_hcr_rw),
+ NEEDS_FEAT(HCR_EL2_HCD, not_feat_aa64el3),
+ NEEDS_FEAT(HCR_EL2_AMO |
+ HCR_EL2_BSU |
+ HCR_EL2_CD |
+ HCR_EL2_DC |
+ HCR_EL2_FB |
+ HCR_EL2_FMO |
+ HCR_EL2_ID |
+ HCR_EL2_IMO |
+ HCR_EL2_MIOCNCE |
+ HCR_EL2_PTW |
+ HCR_EL2_SWIO |
+ HCR_EL2_TACR |
+ HCR_EL2_TDZ |
+ HCR_EL2_TGE |
+ HCR_EL2_TID1 |
+ HCR_EL2_TID2 |
+ HCR_EL2_TID3 |
+ HCR_EL2_TIDCP |
+ HCR_EL2_TPCP |
+ HCR_EL2_TPU |
+ HCR_EL2_TRVM |
+ HCR_EL2_TSC |
+ HCR_EL2_TSW |
+ HCR_EL2_TTLB |
+ HCR_EL2_TVM |
+ HCR_EL2_TWE |
+ HCR_EL2_TWI |
+ HCR_EL2_VF |
+ HCR_EL2_VI |
+ HCR_EL2_VM |
+ HCR_EL2_VSE,
+ FEAT_AA64EL1),
+ NEEDS_FEAT(HCR_EL2_AMVOFFEN, FEAT_AMUv1p1),
+ NEEDS_FEAT(HCR_EL2_EnSCXT, feat_csv2_2_csv2_1p2),
+ NEEDS_FEAT(HCR_EL2_TICAB |
+ HCR_EL2_TID4 |
+ HCR_EL2_TOCU,
+ FEAT_EVT),
+ NEEDS_FEAT(HCR_EL2_TTLBIS |
+ HCR_EL2_TTLBOS,
+ FEAT_EVT_TTLBxS),
+ NEEDS_FEAT(HCR_EL2_TLOR, FEAT_LOR),
+ NEEDS_FEAT(HCR_EL2_ATA |
+ HCR_EL2_DCT |
+ HCR_EL2_TID5,
+ FEAT_MTE2),
+ NEEDS_FEAT(HCR_EL2_AT | /* Ignore the original FEAT_NV */
+ HCR_EL2_NV2 |
+ HCR_EL2_NV,
+ feat_nv2),
+ NEEDS_FEAT(HCR_EL2_NV1, feat_nv2_e2h0_ni), /* Missing from JSON */
+ NEEDS_FEAT(HCR_EL2_API |
+ HCR_EL2_APK,
+ feat_pauth),
+ NEEDS_FEAT(HCR_EL2_TEA |
+ HCR_EL2_TERR,
+ FEAT_RAS),
+ NEEDS_FEAT(HCR_EL2_FIEN, feat_rasv1p1),
+ NEEDS_FEAT(HCR_EL2_GPF, FEAT_RME),
+ NEEDS_FEAT(HCR_EL2_FWB, FEAT_S2FWB),
+ NEEDS_FEAT(HCR_EL2_TME, FEAT_TME),
+ NEEDS_FEAT(HCR_EL2_TWEDEL |
+ HCR_EL2_TWEDEn,
+ FEAT_TWED),
+ NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h),
+};
+
+static void __init check_feat_map(const struct reg_bits_to_feat_map *map,
+ int map_size, u64 res0, const char *str)
+{
+ u64 mask = 0;
+
+ for (int i = 0; i < map_size; i++)
+ mask |= map[i].bits;
+
+ if (mask != ~res0)
+ kvm_err("Undefined %s behaviour, bits %016llx\n",
+ str, mask ^ ~res0);
+}
+
+void __init check_feature_map(void)
+{
+ check_feat_map(hfgrtr_feat_map, ARRAY_SIZE(hfgrtr_feat_map),
+ hfgrtr_masks.res0, hfgrtr_masks.str);
+ check_feat_map(hfgwtr_feat_map, ARRAY_SIZE(hfgwtr_feat_map),
+ hfgwtr_masks.res0, hfgwtr_masks.str);
+ check_feat_map(hfgitr_feat_map, ARRAY_SIZE(hfgitr_feat_map),
+ hfgitr_masks.res0, hfgitr_masks.str);
+ check_feat_map(hdfgrtr_feat_map, ARRAY_SIZE(hdfgrtr_feat_map),
+ hdfgrtr_masks.res0, hdfgrtr_masks.str);
+ check_feat_map(hdfgwtr_feat_map, ARRAY_SIZE(hdfgwtr_feat_map),
+ hdfgwtr_masks.res0, hdfgwtr_masks.str);
+ check_feat_map(hafgrtr_feat_map, ARRAY_SIZE(hafgrtr_feat_map),
+ hafgrtr_masks.res0, hafgrtr_masks.str);
+ check_feat_map(hcrx_feat_map, ARRAY_SIZE(hcrx_feat_map),
+ __HCRX_EL2_RES0, "HCRX_EL2");
+ check_feat_map(hcr_feat_map, ARRAY_SIZE(hcr_feat_map),
+ HCR_EL2_RES0, "HCR_EL2");
+}
+
+static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map *map)
+{
+ u64 regval = kvm->arch.id_regs[map->regidx];
+ u64 regfld = (regval >> map->shift) & GENMASK(map->width - 1, 0);
+
+ if (map->sign) {
+ s64 sfld = sign_extend64(regfld, map->width - 1);
+ s64 slim = sign_extend64(map->lo_lim, map->width - 1);
+ return sfld >= slim;
+ } else {
+ return regfld >= map->lo_lim;
+ }
+}
+
+static u64 __compute_fixed_bits(struct kvm *kvm,
+ const struct reg_bits_to_feat_map *map,
+ int map_size,
+ u64 *fixed_bits,
+ unsigned long require,
+ unsigned long exclude)
+{
+ u64 val = 0;
+
+ for (int i = 0; i < map_size; i++) {
+ bool match;
+
+ if ((map[i].flags & require) != require)
+ continue;
+
+ if (map[i].flags & exclude)
+ continue;
+
+ if (map[i].flags & CALL_FUNC)
+ match = (map[i].flags & FIXED_VALUE) ?
+ map[i].fval(kvm, fixed_bits) :
+ map[i].match(kvm);
+ else
+ match = idreg_feat_match(kvm, &map[i]);
+
+ if (!match || (map[i].flags & FIXED_VALUE))
+ val |= map[i].bits;
+ }
+
+ return val;
+}
+
+static u64 compute_res0_bits(struct kvm *kvm,
+ const struct reg_bits_to_feat_map *map,
+ int map_size,
+ unsigned long require,
+ unsigned long exclude)
+{
+ return __compute_fixed_bits(kvm, map, map_size, NULL,
+ require, exclude | FIXED_VALUE);
+}
+
+static u64 compute_fixed_bits(struct kvm *kvm,
+ const struct reg_bits_to_feat_map *map,
+ int map_size,
+ u64 *fixed_bits,
+ unsigned long require,
+ unsigned long exclude)
+{
+ return __compute_fixed_bits(kvm, map, map_size, fixed_bits,
+ require | FIXED_VALUE, exclude);
+}
+
+void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt)
+{
+ u64 val = 0;
+
+ switch (fgt) {
+ case HFGRTR_GROUP:
+ val |= compute_res0_bits(kvm, hfgrtr_feat_map,
+ ARRAY_SIZE(hfgrtr_feat_map),
+ 0, NEVER_FGU);
+ val |= compute_res0_bits(kvm, hfgwtr_feat_map,
+ ARRAY_SIZE(hfgwtr_feat_map),
+ 0, NEVER_FGU);
+ break;
+ case HFGITR_GROUP:
+ val |= compute_res0_bits(kvm, hfgitr_feat_map,
+ ARRAY_SIZE(hfgitr_feat_map),
+ 0, NEVER_FGU);
+ break;
+ case HDFGRTR_GROUP:
+ val |= compute_res0_bits(kvm, hdfgrtr_feat_map,
+ ARRAY_SIZE(hdfgrtr_feat_map),
+ 0, NEVER_FGU);
+ val |= compute_res0_bits(kvm, hdfgwtr_feat_map,
+ ARRAY_SIZE(hdfgwtr_feat_map),
+ 0, NEVER_FGU);
+ break;
+ case HAFGRTR_GROUP:
+ val |= compute_res0_bits(kvm, hafgrtr_feat_map,
+ ARRAY_SIZE(hafgrtr_feat_map),
+ 0, NEVER_FGU);
+ break;
+ case HFGRTR2_GROUP:
+ val |= compute_res0_bits(kvm, hfgrtr2_feat_map,
+ ARRAY_SIZE(hfgrtr2_feat_map),
+ 0, NEVER_FGU);
+ val |= compute_res0_bits(kvm, hfgwtr2_feat_map,
+ ARRAY_SIZE(hfgwtr2_feat_map),
+ 0, NEVER_FGU);
+ break;
+ case HFGITR2_GROUP:
+ val |= compute_res0_bits(kvm, hfgitr2_feat_map,
+ ARRAY_SIZE(hfgitr2_feat_map),
+ 0, NEVER_FGU);
+ break;
+ case HDFGRTR2_GROUP:
+ val |= compute_res0_bits(kvm, hdfgrtr2_feat_map,
+ ARRAY_SIZE(hdfgrtr2_feat_map),
+ 0, NEVER_FGU);
+ val |= compute_res0_bits(kvm, hdfgwtr2_feat_map,
+ ARRAY_SIZE(hdfgwtr2_feat_map),
+ 0, NEVER_FGU);
+ break;
+ default:
+ BUG();
+ }
+
+ kvm->arch.fgu[fgt] = val;
+}
+
+void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1)
+{
+ u64 fixed = 0, mask;
+
+ switch (reg) {
+ case HFGRTR_EL2:
+ *res0 = compute_res0_bits(kvm, hfgrtr_feat_map,
+ ARRAY_SIZE(hfgrtr_feat_map), 0, 0);
+ *res0 |= hfgrtr_masks.res0;
+ *res1 = HFGRTR_EL2_RES1;
+ break;
+ case HFGWTR_EL2:
+ *res0 = compute_res0_bits(kvm, hfgwtr_feat_map,
+ ARRAY_SIZE(hfgwtr_feat_map), 0, 0);
+ *res0 |= hfgwtr_masks.res0;
+ *res1 = HFGWTR_EL2_RES1;
+ break;
+ case HFGITR_EL2:
+ *res0 = compute_res0_bits(kvm, hfgitr_feat_map,
+ ARRAY_SIZE(hfgitr_feat_map), 0, 0);
+ *res0 |= hfgitr_masks.res0;
+ *res1 = HFGITR_EL2_RES1;
+ break;
+ case HDFGRTR_EL2:
+ *res0 = compute_res0_bits(kvm, hdfgrtr_feat_map,
+ ARRAY_SIZE(hdfgrtr_feat_map), 0, 0);
+ *res0 |= hdfgrtr_masks.res0;
+ *res1 = HDFGRTR_EL2_RES1;
+ break;
+ case HDFGWTR_EL2:
+ *res0 = compute_res0_bits(kvm, hdfgwtr_feat_map,
+ ARRAY_SIZE(hdfgwtr_feat_map), 0, 0);
+ *res0 |= hdfgwtr_masks.res0;
+ *res1 = HDFGWTR_EL2_RES1;
+ break;
+ case HAFGRTR_EL2:
+ *res0 = compute_res0_bits(kvm, hafgrtr_feat_map,
+ ARRAY_SIZE(hafgrtr_feat_map), 0, 0);
+ *res0 |= hafgrtr_masks.res0;
+ *res1 = HAFGRTR_EL2_RES1;
+ break;
+ case HFGRTR2_EL2:
+ *res0 = compute_res0_bits(kvm, hfgrtr2_feat_map,
+ ARRAY_SIZE(hfgrtr2_feat_map), 0, 0);
+ *res0 |= hfgrtr2_masks.res0;
+ *res1 = HFGRTR2_EL2_RES1;
+ break;
+ case HFGWTR2_EL2:
+ *res0 = compute_res0_bits(kvm, hfgwtr2_feat_map,
+ ARRAY_SIZE(hfgwtr2_feat_map), 0, 0);
+ *res0 |= hfgwtr2_masks.res0;
+ *res1 = HFGWTR2_EL2_RES1;
+ break;
+ case HFGITR2_EL2:
+ *res0 = compute_res0_bits(kvm, hfgitr2_feat_map,
+ ARRAY_SIZE(hfgitr2_feat_map), 0, 0);
+ *res0 |= hfgitr2_masks.res0;
+ *res1 = HFGITR2_EL2_RES1;
+ break;
+ case HDFGRTR2_EL2:
+ *res0 = compute_res0_bits(kvm, hdfgrtr2_feat_map,
+ ARRAY_SIZE(hdfgrtr2_feat_map), 0, 0);
+ *res0 |= hdfgrtr2_masks.res0;
+ *res1 = HDFGRTR2_EL2_RES1;
+ break;
+ case HDFGWTR2_EL2:
+ *res0 = compute_res0_bits(kvm, hdfgwtr2_feat_map,
+ ARRAY_SIZE(hdfgwtr2_feat_map), 0, 0);
+ *res0 |= hdfgwtr2_masks.res0;
+ *res1 = HDFGWTR2_EL2_RES1;
+ break;
+ case HCRX_EL2:
+ *res0 = compute_res0_bits(kvm, hcrx_feat_map,
+ ARRAY_SIZE(hcrx_feat_map), 0, 0);
+ *res0 |= __HCRX_EL2_RES0;
+ *res1 = __HCRX_EL2_RES1;
+ break;
+ case HCR_EL2:
+ mask = compute_fixed_bits(kvm, hcr_feat_map,
+ ARRAY_SIZE(hcr_feat_map), &fixed,
+ 0, 0);
+ *res0 = compute_res0_bits(kvm, hcr_feat_map,
+ ARRAY_SIZE(hcr_feat_map), 0, 0);
+ *res0 |= HCR_EL2_RES0 | (mask & ~fixed);
+ *res1 = HCR_EL2_RES1 | (mask & fixed);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ *res0 = *res1 = 0;
+ break;
+ }
+}
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 607d37bab70b..3a384e9660b8 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -412,26 +412,26 @@ static const struct trap_bits coarse_trap_bits[] = {
},
[CGT_ICH_HCR_TC] = {
.index = ICH_HCR_EL2,
- .value = ICH_HCR_TC,
- .mask = ICH_HCR_TC,
+ .value = ICH_HCR_EL2_TC,
+ .mask = ICH_HCR_EL2_TC,
.behaviour = BEHAVE_FORWARD_RW,
},
[CGT_ICH_HCR_TALL0] = {
.index = ICH_HCR_EL2,
- .value = ICH_HCR_TALL0,
- .mask = ICH_HCR_TALL0,
+ .value = ICH_HCR_EL2_TALL0,
+ .mask = ICH_HCR_EL2_TALL0,
.behaviour = BEHAVE_FORWARD_RW,
},
[CGT_ICH_HCR_TALL1] = {
.index = ICH_HCR_EL2,
- .value = ICH_HCR_TALL1,
- .mask = ICH_HCR_TALL1,
+ .value = ICH_HCR_EL2_TALL1,
+ .mask = ICH_HCR_EL2_TALL1,
.behaviour = BEHAVE_FORWARD_RW,
},
[CGT_ICH_HCR_TDIR] = {
.index = ICH_HCR_EL2,
- .value = ICH_HCR_TDIR,
- .mask = ICH_HCR_TDIR,
+ .value = ICH_HCR_EL2_TDIR,
+ .mask = ICH_HCR_EL2_TDIR,
.behaviour = BEHAVE_FORWARD_RW,
},
};
@@ -622,6 +622,11 @@ struct encoding_to_trap_config {
const unsigned int line;
};
+/*
+ * WARNING: using ranges is a treacherous endeavour, as sysregs that
+ * are part of an architectural range are not necessarily contiguous
+ * in the [Op0,Op1,CRn,CRm,Ops] space. Tread carefully.
+ */
#define SR_RANGE_TRAP(sr_start, sr_end, trap_id) \
{ \
.encoding = sr_start, \
@@ -1279,98 +1284,128 @@ enum fg_filter_id {
__NR_FG_FILTER_IDS__
};
-#define SR_FGF(sr, g, b, p, f) \
- { \
- .encoding = sr, \
- .end = sr, \
- .tc = { \
+#define __FGT(g, b, p, f) \
+ { \
.fgt = g ## _GROUP, \
.bit = g ## _EL2_ ## b ## _SHIFT, \
.pol = p, \
.fgf = f, \
- }, \
+ }
+
+#define FGT(g, b, p) __FGT(g, b, p, __NO_FGF__)
+
+/*
+ * See the warning next to SR_RANGE_TRAP(), and apply the same
+ * level of caution.
+ */
+#define SR_FGF_RANGE(sr, e, g, b, p, f) \
+ { \
+ .encoding = sr, \
+ .end = e, \
+ .tc = __FGT(g, b, p, f), \
.line = __LINE__, \
}
-#define SR_FGT(sr, g, b, p) SR_FGF(sr, g, b, p, __NO_FGF__)
+#define SR_FGF(sr, g, b, p, f) SR_FGF_RANGE(sr, sr, g, b, p, f)
+#define SR_FGT(sr, g, b, p) SR_FGF_RANGE(sr, sr, g, b, p, __NO_FGF__)
+#define SR_FGT_RANGE(sr, end, g, b, p) \
+ SR_FGF_RANGE(sr, end, g, b, p, __NO_FGF__)
static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
/* HFGRTR_EL2, HFGWTR_EL2 */
- SR_FGT(SYS_AMAIR2_EL1, HFGxTR, nAMAIR2_EL1, 0),
- SR_FGT(SYS_MAIR2_EL1, HFGxTR, nMAIR2_EL1, 0),
- SR_FGT(SYS_S2POR_EL1, HFGxTR, nS2POR_EL1, 0),
- SR_FGT(SYS_POR_EL1, HFGxTR, nPOR_EL1, 0),
- SR_FGT(SYS_POR_EL0, HFGxTR, nPOR_EL0, 0),
- SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0),
- SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0),
- SR_FGT(SYS_RCWMASK_EL1, HFGxTR, nRCWMASK_EL1, 0),
- SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0),
- SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0),
- SR_FGT(SYS_GCSCR_EL1, HFGxTR, nGCS_EL1, 0),
- SR_FGT(SYS_GCSPR_EL1, HFGxTR, nGCS_EL1, 0),
- SR_FGT(SYS_GCSCRE0_EL1, HFGxTR, nGCS_EL0, 0),
- SR_FGT(SYS_GCSPR_EL0, HFGxTR, nGCS_EL0, 0),
- SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0),
- SR_FGT(SYS_ERXADDR_EL1, HFGxTR, ERXADDR_EL1, 1),
- SR_FGT(SYS_ERXPFGCDN_EL1, HFGxTR, ERXPFGCDN_EL1, 1),
- SR_FGT(SYS_ERXPFGCTL_EL1, HFGxTR, ERXPFGCTL_EL1, 1),
- SR_FGT(SYS_ERXPFGF_EL1, HFGxTR, ERXPFGF_EL1, 1),
- SR_FGT(SYS_ERXMISC0_EL1, HFGxTR, ERXMISCn_EL1, 1),
- SR_FGT(SYS_ERXMISC1_EL1, HFGxTR, ERXMISCn_EL1, 1),
- SR_FGT(SYS_ERXMISC2_EL1, HFGxTR, ERXMISCn_EL1, 1),
- SR_FGT(SYS_ERXMISC3_EL1, HFGxTR, ERXMISCn_EL1, 1),
- SR_FGT(SYS_ERXSTATUS_EL1, HFGxTR, ERXSTATUS_EL1, 1),
- SR_FGT(SYS_ERXCTLR_EL1, HFGxTR, ERXCTLR_EL1, 1),
- SR_FGT(SYS_ERXFR_EL1, HFGxTR, ERXFR_EL1, 1),
- SR_FGT(SYS_ERRSELR_EL1, HFGxTR, ERRSELR_EL1, 1),
- SR_FGT(SYS_ERRIDR_EL1, HFGxTR, ERRIDR_EL1, 1),
- SR_FGT(SYS_ICC_IGRPEN0_EL1, HFGxTR, ICC_IGRPENn_EL1, 1),
- SR_FGT(SYS_ICC_IGRPEN1_EL1, HFGxTR, ICC_IGRPENn_EL1, 1),
- SR_FGT(SYS_VBAR_EL1, HFGxTR, VBAR_EL1, 1),
- SR_FGT(SYS_TTBR1_EL1, HFGxTR, TTBR1_EL1, 1),
- SR_FGT(SYS_TTBR0_EL1, HFGxTR, TTBR0_EL1, 1),
- SR_FGT(SYS_TPIDR_EL0, HFGxTR, TPIDR_EL0, 1),
- SR_FGT(SYS_TPIDRRO_EL0, HFGxTR, TPIDRRO_EL0, 1),
- SR_FGT(SYS_TPIDR_EL1, HFGxTR, TPIDR_EL1, 1),
- SR_FGT(SYS_TCR_EL1, HFGxTR, TCR_EL1, 1),
- SR_FGT(SYS_TCR2_EL1, HFGxTR, TCR_EL1, 1),
- SR_FGT(SYS_SCXTNUM_EL0, HFGxTR, SCXTNUM_EL0, 1),
- SR_FGT(SYS_SCXTNUM_EL1, HFGxTR, SCXTNUM_EL1, 1),
- SR_FGT(SYS_SCTLR_EL1, HFGxTR, SCTLR_EL1, 1),
- SR_FGT(SYS_REVIDR_EL1, HFGxTR, REVIDR_EL1, 1),
- SR_FGT(SYS_PAR_EL1, HFGxTR, PAR_EL1, 1),
- SR_FGT(SYS_MPIDR_EL1, HFGxTR, MPIDR_EL1, 1),
- SR_FGT(SYS_MIDR_EL1, HFGxTR, MIDR_EL1, 1),
- SR_FGT(SYS_MAIR_EL1, HFGxTR, MAIR_EL1, 1),
- SR_FGT(SYS_LORSA_EL1, HFGxTR, LORSA_EL1, 1),
- SR_FGT(SYS_LORN_EL1, HFGxTR, LORN_EL1, 1),
- SR_FGT(SYS_LORID_EL1, HFGxTR, LORID_EL1, 1),
- SR_FGT(SYS_LOREA_EL1, HFGxTR, LOREA_EL1, 1),
- SR_FGT(SYS_LORC_EL1, HFGxTR, LORC_EL1, 1),
- SR_FGT(SYS_ISR_EL1, HFGxTR, ISR_EL1, 1),
- SR_FGT(SYS_FAR_EL1, HFGxTR, FAR_EL1, 1),
- SR_FGT(SYS_ESR_EL1, HFGxTR, ESR_EL1, 1),
- SR_FGT(SYS_DCZID_EL0, HFGxTR, DCZID_EL0, 1),
- SR_FGT(SYS_CTR_EL0, HFGxTR, CTR_EL0, 1),
- SR_FGT(SYS_CSSELR_EL1, HFGxTR, CSSELR_EL1, 1),
- SR_FGT(SYS_CPACR_EL1, HFGxTR, CPACR_EL1, 1),
- SR_FGT(SYS_CONTEXTIDR_EL1, HFGxTR, CONTEXTIDR_EL1, 1),
- SR_FGT(SYS_CLIDR_EL1, HFGxTR, CLIDR_EL1, 1),
- SR_FGT(SYS_CCSIDR_EL1, HFGxTR, CCSIDR_EL1, 1),
- SR_FGT(SYS_APIBKEYLO_EL1, HFGxTR, APIBKey, 1),
- SR_FGT(SYS_APIBKEYHI_EL1, HFGxTR, APIBKey, 1),
- SR_FGT(SYS_APIAKEYLO_EL1, HFGxTR, APIAKey, 1),
- SR_FGT(SYS_APIAKEYHI_EL1, HFGxTR, APIAKey, 1),
- SR_FGT(SYS_APGAKEYLO_EL1, HFGxTR, APGAKey, 1),
- SR_FGT(SYS_APGAKEYHI_EL1, HFGxTR, APGAKey, 1),
- SR_FGT(SYS_APDBKEYLO_EL1, HFGxTR, APDBKey, 1),
- SR_FGT(SYS_APDBKEYHI_EL1, HFGxTR, APDBKey, 1),
- SR_FGT(SYS_APDAKEYLO_EL1, HFGxTR, APDAKey, 1),
- SR_FGT(SYS_APDAKEYHI_EL1, HFGxTR, APDAKey, 1),
- SR_FGT(SYS_AMAIR_EL1, HFGxTR, AMAIR_EL1, 1),
- SR_FGT(SYS_AIDR_EL1, HFGxTR, AIDR_EL1, 1),
- SR_FGT(SYS_AFSR1_EL1, HFGxTR, AFSR1_EL1, 1),
- SR_FGT(SYS_AFSR0_EL1, HFGxTR, AFSR0_EL1, 1),
+ SR_FGT(SYS_AMAIR2_EL1, HFGRTR, nAMAIR2_EL1, 0),
+ SR_FGT(SYS_MAIR2_EL1, HFGRTR, nMAIR2_EL1, 0),
+ SR_FGT(SYS_S2POR_EL1, HFGRTR, nS2POR_EL1, 0),
+ SR_FGT(SYS_POR_EL1, HFGRTR, nPOR_EL1, 0),
+ SR_FGT(SYS_POR_EL0, HFGRTR, nPOR_EL0, 0),
+ SR_FGT(SYS_PIR_EL1, HFGRTR, nPIR_EL1, 0),
+ SR_FGT(SYS_PIRE0_EL1, HFGRTR, nPIRE0_EL1, 0),
+ SR_FGT(SYS_RCWMASK_EL1, HFGRTR, nRCWMASK_EL1, 0),
+ SR_FGT(SYS_TPIDR2_EL0, HFGRTR, nTPIDR2_EL0, 0),
+ SR_FGT(SYS_SMPRI_EL1, HFGRTR, nSMPRI_EL1, 0),
+ SR_FGT(SYS_GCSCR_EL1, HFGRTR, nGCS_EL1, 0),
+ SR_FGT(SYS_GCSPR_EL1, HFGRTR, nGCS_EL1, 0),
+ SR_FGT(SYS_GCSCRE0_EL1, HFGRTR, nGCS_EL0, 0),
+ SR_FGT(SYS_GCSPR_EL0, HFGRTR, nGCS_EL0, 0),
+ SR_FGT(SYS_ACCDATA_EL1, HFGRTR, nACCDATA_EL1, 0),
+ SR_FGT(SYS_ERXADDR_EL1, HFGRTR, ERXADDR_EL1, 1),
+ SR_FGT(SYS_ERXPFGCDN_EL1, HFGRTR, ERXPFGCDN_EL1, 1),
+ SR_FGT(SYS_ERXPFGCTL_EL1, HFGRTR, ERXPFGCTL_EL1, 1),
+ SR_FGT(SYS_ERXPFGF_EL1, HFGRTR, ERXPFGF_EL1, 1),
+ SR_FGT(SYS_ERXMISC0_EL1, HFGRTR, ERXMISCn_EL1, 1),
+ SR_FGT(SYS_ERXMISC1_EL1, HFGRTR, ERXMISCn_EL1, 1),
+ SR_FGT(SYS_ERXMISC2_EL1, HFGRTR, ERXMISCn_EL1, 1),
+ SR_FGT(SYS_ERXMISC3_EL1, HFGRTR, ERXMISCn_EL1, 1),
+ SR_FGT(SYS_ERXSTATUS_EL1, HFGRTR, ERXSTATUS_EL1, 1),
+ SR_FGT(SYS_ERXCTLR_EL1, HFGRTR, ERXCTLR_EL1, 1),
+ SR_FGT(SYS_ERXFR_EL1, HFGRTR, ERXFR_EL1, 1),
+ SR_FGT(SYS_ERRSELR_EL1, HFGRTR, ERRSELR_EL1, 1),
+ SR_FGT(SYS_ERRIDR_EL1, HFGRTR, ERRIDR_EL1, 1),
+ SR_FGT(SYS_ICC_IGRPEN0_EL1, HFGRTR, ICC_IGRPENn_EL1, 1),
+ SR_FGT(SYS_ICC_IGRPEN1_EL1, HFGRTR, ICC_IGRPENn_EL1, 1),
+ SR_FGT(SYS_VBAR_EL1, HFGRTR, VBAR_EL1, 1),
+ SR_FGT(SYS_TTBR1_EL1, HFGRTR, TTBR1_EL1, 1),
+ SR_FGT(SYS_TTBR0_EL1, HFGRTR, TTBR0_EL1, 1),
+ SR_FGT(SYS_TPIDR_EL0, HFGRTR, TPIDR_EL0, 1),
+ SR_FGT(SYS_TPIDRRO_EL0, HFGRTR, TPIDRRO_EL0, 1),
+ SR_FGT(SYS_TPIDR_EL1, HFGRTR, TPIDR_EL1, 1),
+ SR_FGT(SYS_TCR_EL1, HFGRTR, TCR_EL1, 1),
+ SR_FGT(SYS_TCR2_EL1, HFGRTR, TCR_EL1, 1),
+ SR_FGT(SYS_SCXTNUM_EL0, HFGRTR, SCXTNUM_EL0, 1),
+ SR_FGT(SYS_SCXTNUM_EL1, HFGRTR, SCXTNUM_EL1, 1),
+ SR_FGT(SYS_SCTLR_EL1, HFGRTR, SCTLR_EL1, 1),
+ SR_FGT(SYS_REVIDR_EL1, HFGRTR, REVIDR_EL1, 1),
+ SR_FGT(SYS_PAR_EL1, HFGRTR, PAR_EL1, 1),
+ SR_FGT(SYS_MPIDR_EL1, HFGRTR, MPIDR_EL1, 1),
+ SR_FGT(SYS_MIDR_EL1, HFGRTR, MIDR_EL1, 1),
+ SR_FGT(SYS_MAIR_EL1, HFGRTR, MAIR_EL1, 1),
+ SR_FGT(SYS_LORSA_EL1, HFGRTR, LORSA_EL1, 1),
+ SR_FGT(SYS_LORN_EL1, HFGRTR, LORN_EL1, 1),
+ SR_FGT(SYS_LORID_EL1, HFGRTR, LORID_EL1, 1),
+ SR_FGT(SYS_LOREA_EL1, HFGRTR, LOREA_EL1, 1),
+ SR_FGT(SYS_LORC_EL1, HFGRTR, LORC_EL1, 1),
+ SR_FGT(SYS_ISR_EL1, HFGRTR, ISR_EL1, 1),
+ SR_FGT(SYS_FAR_EL1, HFGRTR, FAR_EL1, 1),
+ SR_FGT(SYS_ESR_EL1, HFGRTR, ESR_EL1, 1),
+ SR_FGT(SYS_DCZID_EL0, HFGRTR, DCZID_EL0, 1),
+ SR_FGT(SYS_CTR_EL0, HFGRTR, CTR_EL0, 1),
+ SR_FGT(SYS_CSSELR_EL1, HFGRTR, CSSELR_EL1, 1),
+ SR_FGT(SYS_CPACR_EL1, HFGRTR, CPACR_EL1, 1),
+ SR_FGT(SYS_CONTEXTIDR_EL1, HFGRTR, CONTEXTIDR_EL1, 1),
+ SR_FGT(SYS_CLIDR_EL1, HFGRTR, CLIDR_EL1, 1),
+ SR_FGT(SYS_CCSIDR_EL1, HFGRTR, CCSIDR_EL1, 1),
+ SR_FGT(SYS_APIBKEYLO_EL1, HFGRTR, APIBKey, 1),
+ SR_FGT(SYS_APIBKEYHI_EL1, HFGRTR, APIBKey, 1),
+ SR_FGT(SYS_APIAKEYLO_EL1, HFGRTR, APIAKey, 1),
+ SR_FGT(SYS_APIAKEYHI_EL1, HFGRTR, APIAKey, 1),
+ SR_FGT(SYS_APGAKEYLO_EL1, HFGRTR, APGAKey, 1),
+ SR_FGT(SYS_APGAKEYHI_EL1, HFGRTR, APGAKey, 1),
+ SR_FGT(SYS_APDBKEYLO_EL1, HFGRTR, APDBKey, 1),
+ SR_FGT(SYS_APDBKEYHI_EL1, HFGRTR, APDBKey, 1),
+ SR_FGT(SYS_APDAKEYLO_EL1, HFGRTR, APDAKey, 1),
+ SR_FGT(SYS_APDAKEYHI_EL1, HFGRTR, APDAKey, 1),
+ SR_FGT(SYS_AMAIR_EL1, HFGRTR, AMAIR_EL1, 1),
+ SR_FGT(SYS_AIDR_EL1, HFGRTR, AIDR_EL1, 1),
+ SR_FGT(SYS_AFSR1_EL1, HFGRTR, AFSR1_EL1, 1),
+ SR_FGT(SYS_AFSR0_EL1, HFGRTR, AFSR0_EL1, 1),
+
+ /* HFGRTR2_EL2, HFGWTR2_EL2 */
+ SR_FGT(SYS_ACTLRALIAS_EL1, HFGRTR2, nACTLRALIAS_EL1, 0),
+ SR_FGT(SYS_ACTLRMASK_EL1, HFGRTR2, nACTLRMASK_EL1, 0),
+ SR_FGT(SYS_CPACRALIAS_EL1, HFGRTR2, nCPACRALIAS_EL1, 0),
+ SR_FGT(SYS_CPACRMASK_EL1, HFGRTR2, nCPACRMASK_EL1, 0),
+ SR_FGT(SYS_PFAR_EL1, HFGRTR2, nPFAR_EL1, 0),
+ SR_FGT(SYS_RCWSMASK_EL1, HFGRTR2, nRCWSMASK_EL1, 0),
+ SR_FGT(SYS_SCTLR2ALIAS_EL1, HFGRTR2, nSCTLRALIAS2_EL1, 0),
+ SR_FGT(SYS_SCTLR2MASK_EL1, HFGRTR2, nSCTLR2MASK_EL1, 0),
+ SR_FGT(SYS_SCTLRALIAS_EL1, HFGRTR2, nSCTLRALIAS_EL1, 0),
+ SR_FGT(SYS_SCTLRMASK_EL1, HFGRTR2, nSCTLRMASK_EL1, 0),
+ SR_FGT(SYS_TCR2ALIAS_EL1, HFGRTR2, nTCR2ALIAS_EL1, 0),
+ SR_FGT(SYS_TCR2MASK_EL1, HFGRTR2, nTCR2MASK_EL1, 0),
+ SR_FGT(SYS_TCRALIAS_EL1, HFGRTR2, nTCRALIAS_EL1, 0),
+ SR_FGT(SYS_TCRMASK_EL1, HFGRTR2, nTCRMASK_EL1, 0),
+ SR_FGT(SYS_ERXGSR_EL1, HFGRTR2, nERXGSR_EL1, 0),
+
/* HFGITR_EL2 */
SR_FGT(OP_AT_S1E1A, HFGITR, ATS1E1A, 1),
SR_FGT(OP_COSP_RCTX, HFGITR, COSPRCTX, 1),
@@ -1480,6 +1515,11 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
SR_FGT(SYS_IC_IVAU, HFGITR, ICIVAU, 1),
SR_FGT(SYS_IC_IALLU, HFGITR, ICIALLU, 1),
SR_FGT(SYS_IC_IALLUIS, HFGITR, ICIALLUIS, 1),
+
+ /* HFGITR2_EL2 */
+ SR_FGT(SYS_DC_CIGDVAPS, HFGITR2, nDCCIVAPS, 0),
+ SR_FGT(SYS_DC_CIVAPS, HFGITR2, nDCCIVAPS, 0),
+
/* HDFGRTR_EL2 */
SR_FGT(SYS_PMBIDR_EL1, HDFGRTR, PMBIDR_EL1, 1),
SR_FGT(SYS_PMSNEVFR_EL1, HDFGRTR, nPMSNEVFR_EL1, 0),
@@ -1789,68 +1829,12 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
SR_FGT(SYS_PMCNTENSET_EL0, HDFGRTR, PMCNTEN, 1),
SR_FGT(SYS_PMCCNTR_EL0, HDFGRTR, PMCCNTR_EL0, 1),
SR_FGT(SYS_PMCCFILTR_EL0, HDFGRTR, PMCCFILTR_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(0), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(1), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(2), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(3), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(4), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(5), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(6), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(7), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(8), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(9), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(10), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(11), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(12), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(13), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(14), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(15), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(16), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(17), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(18), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(19), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(20), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(21), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(22), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(23), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(24), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(25), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(26), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(27), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(28), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(29), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVTYPERn_EL0(30), HDFGRTR, PMEVTYPERn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(0), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(1), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(2), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(3), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(4), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(5), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(6), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(7), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(8), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(9), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(10), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(11), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(12), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(13), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(14), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(15), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(16), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(17), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(18), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(19), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(20), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(21), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(22), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(23), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(24), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(25), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(26), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(27), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(28), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(29), HDFGRTR, PMEVCNTRn_EL0, 1),
- SR_FGT(SYS_PMEVCNTRn_EL0(30), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT_RANGE(SYS_PMEVTYPERn_EL0(0),
+ SYS_PMEVTYPERn_EL0(30),
+ HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT_RANGE(SYS_PMEVCNTRn_EL0(0),
+ SYS_PMEVCNTRn_EL0(30),
+ HDFGRTR, PMEVCNTRn_EL0, 1),
SR_FGT(SYS_OSDLR_EL1, HDFGRTR, OSDLR_EL1, 1),
SR_FGT(SYS_OSECCR_EL1, HDFGRTR, OSECCR_EL1, 1),
SR_FGT(SYS_OSLSR_EL1, HDFGRTR, OSLSR_EL1, 1),
@@ -1928,6 +1912,59 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
SR_FGT(SYS_DBGBCRn_EL1(13), HDFGRTR, DBGBCRn_EL1, 1),
SR_FGT(SYS_DBGBCRn_EL1(14), HDFGRTR, DBGBCRn_EL1, 1),
SR_FGT(SYS_DBGBCRn_EL1(15), HDFGRTR, DBGBCRn_EL1, 1),
+
+ /* HDFGRTR2_EL2 */
+ SR_FGT(SYS_MDSELR_EL1, HDFGRTR2, nMDSELR_EL1, 0),
+ SR_FGT(SYS_MDSTEPOP_EL1, HDFGRTR2, nMDSTEPOP_EL1, 0),
+ SR_FGT(SYS_PMCCNTSVR_EL1, HDFGRTR2, nPMSSDATA, 0),
+ SR_FGT_RANGE(SYS_PMEVCNTSVRn_EL1(0),
+ SYS_PMEVCNTSVRn_EL1(30),
+ HDFGRTR2, nPMSSDATA, 0),
+ SR_FGT(SYS_PMICNTSVR_EL1, HDFGRTR2, nPMSSDATA, 0),
+ SR_FGT(SYS_PMECR_EL1, HDFGRTR2, nPMECR_EL1, 0),
+ SR_FGT(SYS_PMIAR_EL1, HDFGRTR2, nPMIAR_EL1, 0),
+ SR_FGT(SYS_PMICFILTR_EL0, HDFGRTR2, nPMICFILTR_EL0, 0),
+ SR_FGT(SYS_PMICNTR_EL0, HDFGRTR2, nPMICNTR_EL0, 0),
+ SR_FGT(SYS_PMSSCR_EL1, HDFGRTR2, nPMSSCR_EL1, 0),
+ SR_FGT(SYS_PMUACR_EL1, HDFGRTR2, nPMUACR_EL1, 0),
+ SR_FGT(SYS_SPMACCESSR_EL1, HDFGRTR2, nSPMACCESSR_EL1, 0),
+ SR_FGT(SYS_SPMCFGR_EL1, HDFGRTR2, nSPMID, 0),
+ SR_FGT(SYS_SPMDEVARCH_EL1, HDFGRTR2, nSPMID, 0),
+ SR_FGT(SYS_SPMCGCRn_EL1(0), HDFGRTR2, nSPMID, 0),
+ SR_FGT(SYS_SPMCGCRn_EL1(1), HDFGRTR2, nSPMID, 0),
+ SR_FGT(SYS_SPMIIDR_EL1, HDFGRTR2, nSPMID, 0),
+ SR_FGT(SYS_SPMCNTENCLR_EL0, HDFGRTR2, nSPMCNTEN, 0),
+ SR_FGT(SYS_SPMCNTENSET_EL0, HDFGRTR2, nSPMCNTEN, 0),
+ SR_FGT(SYS_SPMCR_EL0, HDFGRTR2, nSPMCR_EL0, 0),
+ SR_FGT(SYS_SPMDEVAFF_EL1, HDFGRTR2, nSPMDEVAFF_EL1, 0),
+ /*
+ * We have up to 64 of these registers in ranges of 16, banked via
+ * SPMSELR_EL0.BANK. We're only concerned with the accessors here,
+ * not the architectural registers.
+ */
+ SR_FGT_RANGE(SYS_SPMEVCNTRn_EL0(0),
+ SYS_SPMEVCNTRn_EL0(15),
+ HDFGRTR2, nSPMEVCNTRn_EL0, 0),
+ SR_FGT_RANGE(SYS_SPMEVFILT2Rn_EL0(0),
+ SYS_SPMEVFILT2Rn_EL0(15),
+ HDFGRTR2, nSPMEVTYPERn_EL0, 0),
+ SR_FGT_RANGE(SYS_SPMEVFILTRn_EL0(0),
+ SYS_SPMEVFILTRn_EL0(15),
+ HDFGRTR2, nSPMEVTYPERn_EL0, 0),
+ SR_FGT_RANGE(SYS_SPMEVTYPERn_EL0(0),
+ SYS_SPMEVTYPERn_EL0(15),
+ HDFGRTR2, nSPMEVTYPERn_EL0, 0),
+ SR_FGT(SYS_SPMINTENCLR_EL1, HDFGRTR2, nSPMINTEN, 0),
+ SR_FGT(SYS_SPMINTENSET_EL1, HDFGRTR2, nSPMINTEN, 0),
+ SR_FGT(SYS_SPMOVSCLR_EL0, HDFGRTR2, nSPMOVS, 0),
+ SR_FGT(SYS_SPMOVSSET_EL0, HDFGRTR2, nSPMOVS, 0),
+ SR_FGT(SYS_SPMSCR_EL1, HDFGRTR2, nSPMSCR_EL1, 0),
+ SR_FGT(SYS_SPMSELR_EL0, HDFGRTR2, nSPMSELR_EL0, 0),
+ SR_FGT(SYS_TRCITECR_EL1, HDFGRTR2, nTRCITECR_EL1, 0),
+ SR_FGT(SYS_PMBMAR_EL1, HDFGRTR2, nPMBMAR_EL1, 0),
+ SR_FGT(SYS_PMSDSFR_EL1, HDFGRTR2, nPMSDSFR_EL1, 0),
+ SR_FGT(SYS_TRBMPAM_EL1, HDFGRTR2, nTRBMPAM_EL1, 0),
+
/*
* HDFGWTR_EL2
*
@@ -1938,12 +1975,19 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
* read-side mappings, and only the write-side mappings that
* differ from the read side, and the trap handler will pick
* the correct shadow register based on the access type.
+ *
+ * Same model applies to the FEAT_FGT2 registers.
*/
SR_FGT(SYS_TRFCR_EL1, HDFGWTR, TRFCR_EL1, 1),
SR_FGT(SYS_TRCOSLAR, HDFGWTR, TRCOSLAR, 1),
SR_FGT(SYS_PMCR_EL0, HDFGWTR, PMCR_EL0, 1),
SR_FGT(SYS_PMSWINC_EL0, HDFGWTR, PMSWINC_EL0, 1),
SR_FGT(SYS_OSLAR_EL1, HDFGWTR, OSLAR_EL1, 1),
+
+ /* HDFGWTR2_EL2 */
+ SR_FGT(SYS_PMZR_EL0, HDFGWTR2, nPMZR_EL0, 0),
+ SR_FGT(SYS_SPMZR_EL0, HDFGWTR2, nSPMEVCNTRn_EL0, 0),
+
/*
* HAFGRTR_EL2
*/
@@ -1989,6 +2033,20 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
SR_FGT(SYS_AMEVCNTR0_EL0(0), HAFGRTR, AMEVCNTR00_EL0, 1),
};
+/*
+ * Additional FGTs that do not fire with ESR_EL2.EC==0x18. This table
+ * isn't used for exception routing, but only as a promise that the
+ * trap is handled somewhere else.
+ */
+static const union trap_config non_0x18_fgt[] __initconst = {
+ FGT(HFGITR, PSBCSYNC, 1),
+ FGT(HFGITR, nGCSSTR_EL1, 0),
+ FGT(HFGITR, SVC_EL1, 1),
+ FGT(HFGITR, SVC_EL0, 1),
+ FGT(HFGITR, ERET, 1),
+ FGT(HFGITR2, TSBCSYNC, 1),
+};
+
static union trap_config get_trap_config(u32 sysreg)
{
return (union trap_config) {
@@ -2033,6 +2091,130 @@ static u32 encoding_next(u32 encoding)
return sys_reg(op0 + 1, 0, 0, 0, 0);
}
+#define FGT_MASKS(__n, __m) \
+ struct fgt_masks __n = { .str = #__m, .res0 = __m, }
+
+FGT_MASKS(hfgrtr_masks, HFGRTR_EL2_RES0);
+FGT_MASKS(hfgwtr_masks, HFGWTR_EL2_RES0);
+FGT_MASKS(hfgitr_masks, HFGITR_EL2_RES0);
+FGT_MASKS(hdfgrtr_masks, HDFGRTR_EL2_RES0);
+FGT_MASKS(hdfgwtr_masks, HDFGWTR_EL2_RES0);
+FGT_MASKS(hafgrtr_masks, HAFGRTR_EL2_RES0);
+FGT_MASKS(hfgrtr2_masks, HFGRTR2_EL2_RES0);
+FGT_MASKS(hfgwtr2_masks, HFGWTR2_EL2_RES0);
+FGT_MASKS(hfgitr2_masks, HFGITR2_EL2_RES0);
+FGT_MASKS(hdfgrtr2_masks, HDFGRTR2_EL2_RES0);
+FGT_MASKS(hdfgwtr2_masks, HDFGWTR2_EL2_RES0);
+
+static __init bool aggregate_fgt(union trap_config tc)
+{
+ struct fgt_masks *rmasks, *wmasks;
+
+ switch (tc.fgt) {
+ case HFGRTR_GROUP:
+ rmasks = &hfgrtr_masks;
+ wmasks = &hfgwtr_masks;
+ break;
+ case HDFGRTR_GROUP:
+ rmasks = &hdfgrtr_masks;
+ wmasks = &hdfgwtr_masks;
+ break;
+ case HAFGRTR_GROUP:
+ rmasks = &hafgrtr_masks;
+ wmasks = NULL;
+ break;
+ case HFGITR_GROUP:
+ rmasks = &hfgitr_masks;
+ wmasks = NULL;
+ break;
+ case HFGRTR2_GROUP:
+ rmasks = &hfgrtr2_masks;
+ wmasks = &hfgwtr2_masks;
+ break;
+ case HDFGRTR2_GROUP:
+ rmasks = &hdfgrtr2_masks;
+ wmasks = &hdfgwtr2_masks;
+ break;
+ case HFGITR2_GROUP:
+ rmasks = &hfgitr2_masks;
+ wmasks = NULL;
+ break;
+ }
+
+ /*
+ * A bit can be reserved in either the R or W register, but
+ * not both.
+ */
+ if ((BIT(tc.bit) & rmasks->res0) &&
+ (!wmasks || (BIT(tc.bit) & wmasks->res0)))
+ return false;
+
+ if (tc.pol)
+ rmasks->mask |= BIT(tc.bit) & ~rmasks->res0;
+ else
+ rmasks->nmask |= BIT(tc.bit) & ~rmasks->res0;
+
+ if (wmasks) {
+ if (tc.pol)
+ wmasks->mask |= BIT(tc.bit) & ~wmasks->res0;
+ else
+ wmasks->nmask |= BIT(tc.bit) & ~wmasks->res0;
+ }
+
+ return true;
+}
+
+static __init int check_fgt_masks(struct fgt_masks *masks)
+{
+ unsigned long duplicate = masks->mask & masks->nmask;
+ u64 res0 = masks->res0;
+ int ret = 0;
+
+ if (duplicate) {
+ int i;
+
+ for_each_set_bit(i, &duplicate, 64) {
+ kvm_err("%s[%d] bit has both polarities\n",
+ masks->str, i);
+ }
+
+ ret = -EINVAL;
+ }
+
+ masks->res0 = ~(masks->mask | masks->nmask);
+ if (masks->res0 != res0)
+ kvm_info("Implicit %s = %016llx, expecting %016llx\n",
+ masks->str, masks->res0, res0);
+
+ return ret;
+}
+
+static __init int check_all_fgt_masks(int ret)
+{
+ static struct fgt_masks * const masks[] __initconst = {
+ &hfgrtr_masks,
+ &hfgwtr_masks,
+ &hfgitr_masks,
+ &hdfgrtr_masks,
+ &hdfgwtr_masks,
+ &hafgrtr_masks,
+ &hfgrtr2_masks,
+ &hfgwtr2_masks,
+ &hfgitr2_masks,
+ &hdfgrtr2_masks,
+ &hdfgwtr2_masks,
+ };
+ int err = 0;
+
+ for (int i = 0; i < ARRAY_SIZE(masks); i++)
+ err |= check_fgt_masks(masks[i]);
+
+ return ret ?: err;
+}
+
+#define for_each_encoding_in(__x, __s, __e) \
+ for (u32 __x = __s; __x <= __e; __x = encoding_next(__x))
+
int __init populate_nv_trap_config(void)
{
int ret = 0;
@@ -2041,6 +2223,7 @@ int __init populate_nv_trap_config(void)
BUILD_BUG_ON(__NR_CGT_GROUP_IDS__ > BIT(TC_CGT_BITS));
BUILD_BUG_ON(__NR_FGT_GROUP_IDS__ > BIT(TC_FGT_BITS));
BUILD_BUG_ON(__NR_FG_FILTER_IDS__ > BIT(TC_FGF_BITS));
+ BUILD_BUG_ON(__HCRX_EL2_MASK & __HCRX_EL2_nMASK);
for (int i = 0; i < ARRAY_SIZE(encoding_to_cgt); i++) {
const struct encoding_to_trap_config *cgt = &encoding_to_cgt[i];
@@ -2051,7 +2234,7 @@ int __init populate_nv_trap_config(void)
ret = -EINVAL;
}
- for (u32 enc = cgt->encoding; enc <= cgt->end; enc = encoding_next(enc)) {
+ for_each_encoding_in(enc, cgt->encoding, cgt->end) {
prev = xa_store(&sr_forward_xa, enc,
xa_mk_value(cgt->tc.val), GFP_KERNEL);
if (prev && !xa_is_err(prev)) {
@@ -2066,6 +2249,10 @@ int __init populate_nv_trap_config(void)
}
}
+ if (__HCRX_EL2_RES0 != HCRX_EL2_RES0)
+ kvm_info("Sanitised HCR_EL2_RES0 = %016llx, expecting %016llx\n",
+ __HCRX_EL2_RES0, HCRX_EL2_RES0);
+
kvm_info("nv: %ld coarse grained trap handlers\n",
ARRAY_SIZE(encoding_to_cgt));
@@ -2082,23 +2269,39 @@ int __init populate_nv_trap_config(void)
print_nv_trap_error(fgt, "Invalid FGT", ret);
}
- tc = get_trap_config(fgt->encoding);
+ for_each_encoding_in(enc, fgt->encoding, fgt->end) {
+ tc = get_trap_config(enc);
- if (tc.fgt) {
- ret = -EINVAL;
- print_nv_trap_error(fgt, "Duplicate FGT", ret);
- }
+ if (tc.fgt) {
+ ret = -EINVAL;
+ print_nv_trap_error(fgt, "Duplicate FGT", ret);
+ }
+
+ tc.val |= fgt->tc.val;
+ prev = xa_store(&sr_forward_xa, enc,
+ xa_mk_value(tc.val), GFP_KERNEL);
+
+ if (xa_is_err(prev)) {
+ ret = xa_err(prev);
+ print_nv_trap_error(fgt, "Failed FGT insertion", ret);
+ }
- tc.val |= fgt->tc.val;
- prev = xa_store(&sr_forward_xa, fgt->encoding,
- xa_mk_value(tc.val), GFP_KERNEL);
+ if (!aggregate_fgt(tc)) {
+ ret = -EINVAL;
+ print_nv_trap_error(fgt, "FGT bit is reserved", ret);
+ }
+ }
+ }
- if (xa_is_err(prev)) {
- ret = xa_err(prev);
- print_nv_trap_error(fgt, "Failed FGT insertion", ret);
+ for (int i = 0; i < ARRAY_SIZE(non_0x18_fgt); i++) {
+ if (!aggregate_fgt(non_0x18_fgt[i])) {
+ ret = -EINVAL;
+ kvm_err("non_0x18_fgt[%d] is reserved\n", i);
}
}
+ ret = check_all_fgt_masks(ret);
+
kvm_info("nv: %ld fine grained trap handlers\n",
ARRAY_SIZE(encoding_to_fgt));
@@ -2215,11 +2418,11 @@ static u64 kvm_get_sysreg_res0(struct kvm *kvm, enum vcpu_sysreg sr)
return masks->mask[sr - __VNCR_START__].res0;
}
-static bool check_fgt_bit(struct kvm_vcpu *vcpu, bool is_read,
- u64 val, const union trap_config tc)
+static bool check_fgt_bit(struct kvm_vcpu *vcpu, enum vcpu_sysreg sr,
+ const union trap_config tc)
{
struct kvm *kvm = vcpu->kvm;
- enum vcpu_sysreg sr;
+ u64 val;
/*
* KVM doesn't know about any FGTs that apply to the host, and hopefully
@@ -2228,6 +2431,8 @@ static bool check_fgt_bit(struct kvm_vcpu *vcpu, bool is_read,
if (is_hyp_ctxt(vcpu))
return false;
+ val = __vcpu_sys_reg(vcpu, sr);
+
if (tc.pol)
return (val & BIT(tc.bit));
@@ -2242,38 +2447,17 @@ static bool check_fgt_bit(struct kvm_vcpu *vcpu, bool is_read,
if (val & BIT(tc.bit))
return false;
- switch ((enum fgt_group_id)tc.fgt) {
- case HFGxTR_GROUP:
- sr = is_read ? HFGRTR_EL2 : HFGWTR_EL2;
- break;
-
- case HDFGRTR_GROUP:
- sr = is_read ? HDFGRTR_EL2 : HDFGWTR_EL2;
- break;
-
- case HAFGRTR_GROUP:
- sr = HAFGRTR_EL2;
- break;
-
- case HFGITR_GROUP:
- sr = HFGITR_EL2;
- break;
-
- default:
- WARN_ONCE(1, "Unhandled FGT group");
- return false;
- }
-
return !(kvm_get_sysreg_res0(kvm, sr) & BIT(tc.bit));
}
bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index)
{
+ enum vcpu_sysreg fgtreg;
union trap_config tc;
enum trap_behaviour b;
bool is_read;
u32 sysreg;
- u64 esr, val;
+ u64 esr;
esr = kvm_vcpu_get_esr(vcpu);
sysreg = esr_sys64_to_sysreg(esr);
@@ -2319,26 +2503,20 @@ bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index)
case __NO_FGT_GROUP__:
break;
- case HFGxTR_GROUP:
- if (is_read)
- val = __vcpu_sys_reg(vcpu, HFGRTR_EL2);
- else
- val = __vcpu_sys_reg(vcpu, HFGWTR_EL2);
+ case HFGRTR_GROUP:
+ fgtreg = is_read ? HFGRTR_EL2 : HFGWTR_EL2;
break;
case HDFGRTR_GROUP:
- if (is_read)
- val = __vcpu_sys_reg(vcpu, HDFGRTR_EL2);
- else
- val = __vcpu_sys_reg(vcpu, HDFGWTR_EL2);
+ fgtreg = is_read ? HDFGRTR_EL2 : HDFGWTR_EL2;
break;
case HAFGRTR_GROUP:
- val = __vcpu_sys_reg(vcpu, HAFGRTR_EL2);
+ fgtreg = HAFGRTR_EL2;
break;
case HFGITR_GROUP:
- val = __vcpu_sys_reg(vcpu, HFGITR_EL2);
+ fgtreg = HFGITR_EL2;
switch (tc.fgf) {
u64 tmp;
@@ -2352,13 +2530,26 @@ bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index)
}
break;
- case __NR_FGT_GROUP_IDS__:
+ case HFGRTR2_GROUP:
+ fgtreg = is_read ? HFGRTR2_EL2 : HFGWTR2_EL2;
+ break;
+
+ case HDFGRTR2_GROUP:
+ fgtreg = is_read ? HDFGRTR2_EL2 : HDFGWTR2_EL2;
+ break;
+
+ case HFGITR2_GROUP:
+ fgtreg = HFGITR2_EL2;
+ break;
+
+ default:
/* Something is really wrong, bail out */
- WARN_ONCE(1, "__NR_FGT_GROUP_IDS__");
+ WARN_ONCE(1, "Bad FGT group (encoding %08x, config %016llx)\n",
+ sysreg, tc.val);
goto local;
}
- if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(vcpu, is_read, val, tc))
+ if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(vcpu, fgtreg, tc))
goto inject;
b = compute_trap_behaviour(vcpu, tc);
@@ -2471,13 +2662,6 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
{
u64 spsr, elr, esr;
- /*
- * Forward this trap to the virtual EL2 if the virtual
- * HCR_EL2.NV bit is set and this is coming from !EL2.
- */
- if (forward_hcr_traps(vcpu, HCR_NV))
- return;
-
spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
spsr = kvm_check_illegal_exception_return(vcpu, spsr);
@@ -2503,6 +2687,7 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
}
preempt_disable();
+ vcpu_set_flag(vcpu, IN_NESTED_ERET);
kvm_arch_vcpu_put(vcpu);
if (!esr_iss_is_eretax(esr))
@@ -2514,9 +2699,11 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
*vcpu_cpsr(vcpu) = spsr;
kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ vcpu_clear_flag(vcpu, IN_NESTED_ERET);
preempt_enable();
- kvm_pmu_nested_transition(vcpu);
+ if (kvm_vcpu_has_pmu(vcpu))
+ kvm_pmu_nested_transition(vcpu);
}
static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2,
@@ -2599,7 +2786,8 @@ static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2,
kvm_arch_vcpu_load(vcpu, smp_processor_id());
preempt_enable();
- kvm_pmu_nested_transition(vcpu);
+ if (kvm_vcpu_has_pmu(vcpu))
+ kvm_pmu_nested_transition(vcpu);
return 1;
}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 512d152233ff..453266c96481 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -10,6 +10,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/ubsan.h>
#include <asm/esr.h>
#include <asm/exception.h>
@@ -129,8 +130,12 @@ static int kvm_handle_fpasimd(struct kvm_vcpu *vcpu)
static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
{
u64 esr = kvm_vcpu_get_esr(vcpu);
+ bool is_wfe = !!(esr & ESR_ELx_WFx_ISS_WFE);
- if (esr & ESR_ELx_WFx_ISS_WFE) {
+ if (guest_hyp_wfx_traps_enabled(vcpu))
+ return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+
+ if (is_wfe) {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
vcpu->stat.wfe_exit_stat++;
} else {
@@ -294,6 +299,81 @@ static int handle_svc(struct kvm_vcpu *vcpu)
return 1;
}
+static int kvm_handle_gcs(struct kvm_vcpu *vcpu)
+{
+ /* We don't expect GCS, so treat it with contempt */
+ if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, GCS, IMP))
+ WARN_ON_ONCE(1);
+
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
+static int handle_other(struct kvm_vcpu *vcpu)
+{
+ bool is_l2 = vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu);
+ u64 hcrx = __vcpu_sys_reg(vcpu, HCRX_EL2);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ u64 iss = ESR_ELx_ISS(esr);
+ struct kvm *kvm = vcpu->kvm;
+ bool allowed, fwd = false;
+
+ /*
+ * We only trap for two reasons:
+ *
+ * - the feature is disabled, and the only outcome is to
+ * generate an UNDEF.
+ *
+ * - the feature is enabled, but a NV guest wants to trap the
+ * feature used by its L2 guest. We forward the exception in
+ * this case.
+ *
+ * What we don't expect is to end-up here if the guest is
+ * expected be be able to directly use the feature, hence the
+ * WARN_ON below.
+ */
+ switch (iss) {
+ case ESR_ELx_ISS_OTHER_ST64BV:
+ allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V);
+ if (is_l2)
+ fwd = !(hcrx & HCRX_EL2_EnASR);
+ break;
+ case ESR_ELx_ISS_OTHER_ST64BV0:
+ allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA);
+ if (is_l2)
+ fwd = !(hcrx & HCRX_EL2_EnAS0);
+ break;
+ case ESR_ELx_ISS_OTHER_LDST64B:
+ allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64);
+ if (is_l2)
+ fwd = !(hcrx & HCRX_EL2_EnALS);
+ break;
+ case ESR_ELx_ISS_OTHER_TSBCSYNC:
+ allowed = kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, TRBE_V1P1);
+ if (is_l2)
+ fwd = (__vcpu_sys_reg(vcpu, HFGITR2_EL2) & HFGITR2_EL2_TSBCSYNC);
+ break;
+ case ESR_ELx_ISS_OTHER_PSBCSYNC:
+ allowed = kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P5);
+ if (is_l2)
+ fwd = (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_PSBCSYNC);
+ break;
+ default:
+ /* Clearly, we're missing something. */
+ WARN_ON_ONCE(1);
+ allowed = false;
+ }
+
+ WARN_ON_ONCE(allowed && !fwd);
+
+ if (allowed && fwd)
+ kvm_inject_nested_sync(vcpu, esr);
+ else
+ kvm_inject_undefined(vcpu);
+
+ return 1;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec,
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
@@ -303,6 +383,7 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_CP14_LS] = kvm_handle_cp14_load_store,
[ESR_ELx_EC_CP10_ID] = kvm_handle_cp10_id,
[ESR_ELx_EC_CP14_64] = kvm_handle_cp14_64,
+ [ESR_ELx_EC_OTHER] = handle_other,
[ESR_ELx_EC_HVC32] = handle_hvc,
[ESR_ELx_EC_SMC32] = handle_smc,
[ESR_ELx_EC_HVC64] = handle_hvc,
@@ -313,6 +394,7 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_ERET] = kvm_handle_eret,
[ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort,
+ [ESR_ELx_EC_DABT_CUR] = kvm_handle_vncr_abort,
[ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
[ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug,
[ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
@@ -320,6 +402,7 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_BRK64] = kvm_handle_guest_debug,
[ESR_ELx_EC_FP_ASIMD] = kvm_handle_fpasimd,
[ESR_ELx_EC_PAC] = kvm_handle_ptrauth,
+ [ESR_ELx_EC_GCS] = kvm_handle_gcs,
};
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
@@ -470,6 +553,11 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
print_nvhe_hyp_panic("BUG", panic_addr);
} else if (IS_ENABLED(CONFIG_CFI_CLANG) && esr_is_cfi_brk(esr)) {
kvm_nvhe_report_cfi_failure(panic_addr);
+ } else if (IS_ENABLED(CONFIG_UBSAN_KVM_EL2) &&
+ ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
+ esr_is_ubsan_brk(esr)) {
+ print_nvhe_hyp_panic(report_ubsan_failure(esr & UBSAN_BRK_MASK),
+ panic_addr);
} else {
print_nvhe_hyp_panic("panic", panic_addr);
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
index 17df94570f03..fc573fc767b0 100644
--- a/arch/arm64/kvm/hyp/include/hyp/fault.h
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -12,6 +12,16 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
+static inline bool __fault_safe_to_translate(u64 esr)
+{
+ u64 fsc = esr & ESR_ELx_FSC;
+
+ if (esr_fsc_is_sea_ttw(esr) || esr_fsc_is_secc_ttw(esr))
+ return false;
+
+ return !(fsc == ESR_ELx_FSC_EXTABT && (esr & ESR_ELx_FnV));
+}
+
static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
int ret;
@@ -44,34 +54,50 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
return true;
}
-static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
+/*
+ * Checks for the conditions when HPFAR_EL2 is written, per ARM ARM R_FKLWR.
+ */
+static inline bool __hpfar_valid(u64 esr)
{
- u64 hpfar, far;
-
- far = read_sysreg_el2(SYS_FAR);
-
/*
- * The HPFAR can be invalid if the stage 2 fault did not
- * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
- * bit is clear) and one of the two following cases are true:
- * 1. The fault was due to a permission fault
- * 2. The processor carries errata 834220
+ * CPUs affected by ARM erratum #834220 may incorrectly report a
+ * stage-2 translation fault when a stage-1 permission fault occurs.
*
- * Therefore, for all non S1PTW faults where we either have a
- * permission fault or the errata workaround is enabled, we
- * resolve the IPA using the AT instruction.
+ * Re-walk the page tables to determine if a stage-1 fault actually
+ * occurred.
*/
- if (!(esr & ESR_ELx_S1PTW) &&
- (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- esr_fsc_is_permission_fault(esr))) {
- if (!__translate_far_to_hpfar(far, &hpfar))
- return false;
- } else {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_834220) &&
+ esr_fsc_is_translation_fault(esr))
+ return false;
+
+ if (esr_fsc_is_translation_fault(esr) || esr_fsc_is_access_flag_fault(esr))
+ return true;
+
+ if ((esr & ESR_ELx_S1PTW) && esr_fsc_is_permission_fault(esr))
+ return true;
+
+ return esr_fsc_is_addr_sz_fault(esr);
+}
+
+static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
+{
+ u64 hpfar;
+
+ fault->far_el2 = read_sysreg_el2(SYS_FAR);
+ fault->hpfar_el2 = 0;
+
+ if (__hpfar_valid(esr))
hpfar = read_sysreg(hpfar_el2);
- }
+ else if (unlikely(!__fault_safe_to_translate(esr)))
+ return true;
+ else if (!__translate_far_to_hpfar(fault->far_el2, &hpfar))
+ return false;
- fault->far_el2 = far;
- fault->hpfar_el2 = hpfar;
+ /*
+ * Hijack HPFAR_EL2.NS (RES0 in Non-secure) to indicate a valid
+ * HPFAR value.
+ */
+ fault->hpfar_el2 = hpfar | HPFAR_EL2_NS;
return true;
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 23bbe28eaaf9..bb9f2eecfb67 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -65,12 +65,56 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
}
}
+#define reg_to_fgt_masks(reg) \
+ ({ \
+ struct fgt_masks *m; \
+ switch(reg) { \
+ case HFGRTR_EL2: \
+ m = &hfgrtr_masks; \
+ break; \
+ case HFGWTR_EL2: \
+ m = &hfgwtr_masks; \
+ break; \
+ case HFGITR_EL2: \
+ m = &hfgitr_masks; \
+ break; \
+ case HDFGRTR_EL2: \
+ m = &hdfgrtr_masks; \
+ break; \
+ case HDFGWTR_EL2: \
+ m = &hdfgwtr_masks; \
+ break; \
+ case HAFGRTR_EL2: \
+ m = &hafgrtr_masks; \
+ break; \
+ case HFGRTR2_EL2: \
+ m = &hfgrtr2_masks; \
+ break; \
+ case HFGWTR2_EL2: \
+ m = &hfgwtr2_masks; \
+ break; \
+ case HFGITR2_EL2: \
+ m = &hfgitr2_masks; \
+ break; \
+ case HDFGRTR2_EL2: \
+ m = &hdfgrtr2_masks; \
+ break; \
+ case HDFGWTR2_EL2: \
+ m = &hdfgwtr2_masks; \
+ break; \
+ default: \
+ BUILD_BUG_ON(1); \
+ } \
+ \
+ m; \
+ })
+
#define compute_clr_set(vcpu, reg, clr, set) \
do { \
- u64 hfg; \
- hfg = __vcpu_sys_reg(vcpu, reg) & ~__ ## reg ## _RES0; \
- set |= hfg & __ ## reg ## _MASK; \
- clr |= ~hfg & __ ## reg ## _nMASK; \
+ u64 hfg = __vcpu_sys_reg(vcpu, reg); \
+ struct fgt_masks *m = reg_to_fgt_masks(reg); \
+ set |= hfg & m->mask; \
+ clr |= ~hfg & m->nmask; \
} while(0)
#define reg_to_fgt_group_id(reg) \
@@ -79,7 +123,7 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
switch(reg) { \
case HFGRTR_EL2: \
case HFGWTR_EL2: \
- id = HFGxTR_GROUP; \
+ id = HFGRTR_GROUP; \
break; \
case HFGITR_EL2: \
id = HFGITR_GROUP; \
@@ -91,6 +135,17 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
case HAFGRTR_EL2: \
id = HAFGRTR_GROUP; \
break; \
+ case HFGRTR2_EL2: \
+ case HFGWTR2_EL2: \
+ id = HFGRTR2_GROUP; \
+ break; \
+ case HFGITR2_EL2: \
+ id = HFGITR2_GROUP; \
+ break; \
+ case HDFGRTR2_EL2: \
+ case HDFGWTR2_EL2: \
+ id = HDFGRTR2_GROUP; \
+ break; \
default: \
BUILD_BUG_ON(1); \
} \
@@ -101,13 +156,16 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
#define compute_undef_clr_set(vcpu, kvm, reg, clr, set) \
do { \
u64 hfg = kvm->arch.fgu[reg_to_fgt_group_id(reg)]; \
- set |= hfg & __ ## reg ## _MASK; \
- clr |= hfg & __ ## reg ## _nMASK; \
+ struct fgt_masks *m = reg_to_fgt_masks(reg); \
+ set |= hfg & m->mask; \
+ clr |= hfg & m->nmask; \
} while(0)
#define update_fgt_traps_cs(hctxt, vcpu, kvm, reg, clr, set) \
do { \
- u64 c = 0, s = 0; \
+ struct fgt_masks *m = reg_to_fgt_masks(reg); \
+ u64 c = clr, s = set; \
+ u64 val; \
\
ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) \
@@ -115,30 +173,15 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
\
compute_undef_clr_set(vcpu, kvm, reg, c, s); \
\
- s |= set; \
- c |= clr; \
- if (c || s) { \
- u64 val = __ ## reg ## _nMASK; \
- val |= s; \
- val &= ~c; \
- write_sysreg_s(val, SYS_ ## reg); \
- } \
+ val = m->nmask; \
+ val |= s; \
+ val &= ~c; \
+ write_sysreg_s(val, SYS_ ## reg); \
} while(0)
#define update_fgt_traps(hctxt, vcpu, kvm, reg) \
update_fgt_traps_cs(hctxt, vcpu, kvm, reg, 0, 0)
-/*
- * Validate the fine grain trap masks.
- * Check that the masks do not overlap and that all bits are accounted for.
- */
-#define CHECK_FGT_MASKS(reg) \
- do { \
- BUILD_BUG_ON((__ ## reg ## _MASK) & (__ ## reg ## _nMASK)); \
- BUILD_BUG_ON(~((__ ## reg ## _RES0) ^ (__ ## reg ## _MASK) ^ \
- (__ ## reg ## _nMASK))); \
- } while(0)
-
static inline bool cpu_has_amu(void)
{
u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1);
@@ -152,56 +195,60 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- CHECK_FGT_MASKS(HFGRTR_EL2);
- CHECK_FGT_MASKS(HFGWTR_EL2);
- CHECK_FGT_MASKS(HFGITR_EL2);
- CHECK_FGT_MASKS(HDFGRTR_EL2);
- CHECK_FGT_MASKS(HDFGWTR_EL2);
- CHECK_FGT_MASKS(HAFGRTR_EL2);
- CHECK_FGT_MASKS(HCRX_EL2);
-
if (!cpus_have_final_cap(ARM64_HAS_FGT))
return;
update_fgt_traps(hctxt, vcpu, kvm, HFGRTR_EL2);
update_fgt_traps_cs(hctxt, vcpu, kvm, HFGWTR_EL2, 0,
cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) ?
- HFGxTR_EL2_TCR_EL1_MASK : 0);
+ HFGWTR_EL2_TCR_EL1_MASK : 0);
update_fgt_traps(hctxt, vcpu, kvm, HFGITR_EL2);
update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR_EL2);
update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR_EL2);
if (cpu_has_amu())
update_fgt_traps(hctxt, vcpu, kvm, HAFGRTR_EL2);
+
+ if (!cpus_have_final_cap(ARM64_HAS_FGT2))
+ return;
+
+ update_fgt_traps(hctxt, vcpu, kvm, HFGRTR2_EL2);
+ update_fgt_traps(hctxt, vcpu, kvm, HFGWTR2_EL2);
+ update_fgt_traps(hctxt, vcpu, kvm, HFGITR2_EL2);
+ update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR2_EL2);
+ update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR2_EL2);
}
-#define __deactivate_fgt(htcxt, vcpu, kvm, reg) \
+#define __deactivate_fgt(htcxt, vcpu, reg) \
do { \
- if ((vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) || \
- kvm->arch.fgu[reg_to_fgt_group_id(reg)]) \
- write_sysreg_s(ctxt_sys_reg(hctxt, reg), \
- SYS_ ## reg); \
+ write_sysreg_s(ctxt_sys_reg(hctxt, reg), \
+ SYS_ ## reg); \
} while(0)
static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
if (!cpus_have_final_cap(ARM64_HAS_FGT))
return;
- __deactivate_fgt(hctxt, vcpu, kvm, HFGRTR_EL2);
- if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
- write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
- else
- __deactivate_fgt(hctxt, vcpu, kvm, HFGWTR_EL2);
- __deactivate_fgt(hctxt, vcpu, kvm, HFGITR_EL2);
- __deactivate_fgt(hctxt, vcpu, kvm, HDFGRTR_EL2);
- __deactivate_fgt(hctxt, vcpu, kvm, HDFGWTR_EL2);
+ __deactivate_fgt(hctxt, vcpu, HFGRTR_EL2);
+ __deactivate_fgt(hctxt, vcpu, HFGWTR_EL2);
+ __deactivate_fgt(hctxt, vcpu, HFGITR_EL2);
+ __deactivate_fgt(hctxt, vcpu, HDFGRTR_EL2);
+ __deactivate_fgt(hctxt, vcpu, HDFGWTR_EL2);
if (cpu_has_amu())
- __deactivate_fgt(hctxt, vcpu, kvm, HAFGRTR_EL2);
+ __deactivate_fgt(hctxt, vcpu, HAFGRTR_EL2);
+
+ if (!cpus_have_final_cap(ARM64_HAS_FGT2))
+ return;
+
+ __deactivate_fgt(hctxt, vcpu, HFGRTR2_EL2);
+ __deactivate_fgt(hctxt, vcpu, HFGWTR2_EL2);
+ __deactivate_fgt(hctxt, vcpu, HFGITR2_EL2);
+ __deactivate_fgt(hctxt, vcpu, HDFGRTR2_EL2);
+ __deactivate_fgt(hctxt, vcpu, HDFGWTR2_EL2);
}
static inline void __activate_traps_mpam(struct kvm_vcpu *vcpu)
@@ -235,6 +282,8 @@ static inline void __deactivate_traps_mpam(void)
static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
{
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
+
/* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
@@ -244,12 +293,9 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
* counter, which could make a PMXEVCNTR_EL0 access UNDEF at
* EL1 instead of being trapped to EL2.
*/
- if (kvm_arm_support_pmu_v3()) {
- struct kvm_cpu_context *hctxt;
-
+ if (system_supports_pmuv3()) {
write_sysreg(0, pmselr_el0);
- hctxt = host_data_ptr(host_ctxt);
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
@@ -261,14 +307,12 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
u64 hcrx = vcpu->arch.hcrx_el2;
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
- u64 clr = 0, set = 0;
-
- compute_clr_set(vcpu, HCRX_EL2, clr, set);
-
- hcrx |= set;
- hcrx &= ~clr;
+ u64 val = __vcpu_sys_reg(vcpu, HCRX_EL2);
+ hcrx |= val & __HCRX_EL2_MASK;
+ hcrx &= ~(~val & __HCRX_EL2_nMASK);
}
+ ctxt_sys_reg(hctxt, HCRX_EL2) = read_sysreg_s(SYS_HCRX_EL2);
write_sysreg_s(hcrx, SYS_HCRX_EL2);
}
@@ -278,19 +322,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
{
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
+
write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
write_sysreg(0, hstr_el2);
- if (kvm_arm_support_pmu_v3()) {
- struct kvm_cpu_context *hctxt;
-
- hctxt = host_data_ptr(host_ctxt);
+ if (system_supports_pmuv3()) {
write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
}
if (cpus_have_final_cap(ARM64_HAS_HCX))
- write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
+ write_sysreg_s(ctxt_sys_reg(hctxt, HCRX_EL2), SYS_HCRX_EL2);
__deactivate_traps_hfgxtr(vcpu);
__deactivate_traps_mpam();
@@ -301,7 +344,7 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr)
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
hcr |= HCR_TVM;
- write_sysreg(hcr, hcr_el2);
+ write_sysreg_hcr(hcr);
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index 76ff095c6b6e..b9cff893bbe0 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -43,6 +43,17 @@ static inline u64 *ctxt_mdscr_el1(struct kvm_cpu_context *ctxt)
return &ctxt_sys_reg(ctxt, MDSCR_EL1);
}
+static inline u64 ctxt_midr_el1(struct kvm_cpu_context *ctxt)
+{
+ struct kvm *kvm = kern_hyp_va(ctxt_to_vcpu(ctxt)->kvm);
+
+ if (!(ctxt_is_guest(ctxt) &&
+ test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags)))
+ return read_cpuid_id();
+
+ return kvm_read_vm_id_reg(kvm, SYS_MIDR_EL1);
+}
+
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
*ctxt_mdscr_el1(ctxt) = read_sysreg(mdscr_el1);
@@ -168,8 +179,9 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
}
static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt,
- u64 mpidr)
+ u64 midr, u64 mpidr)
{
+ write_sysreg(midr, vpidr_el2);
write_sysreg(mpidr, vmpidr_el2);
if (has_vhe() ||
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 978f38c386ee..5f9d56754e39 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -39,12 +39,12 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
-int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
+int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
enum kvm_pgtable_prot prot);
-int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm);
+int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *hyp_vm);
int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot);
-int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm);
-int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm);
+int __pkvm_host_wrprotect_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *hyp_vm);
+int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct pkvm_hyp_vm *vm);
int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu);
bool addr_is_memory(phys_addr_t phys);
@@ -56,7 +56,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
int hyp_pin_shared_mem(void *from, void *to);
void hyp_unpin_shared_mem(void *from, void *to);
-void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc);
+void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc);
int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
struct kvm_hyp_memcache *host_mc);
@@ -67,4 +67,10 @@ static __always_inline void __load_host_stage2(void)
else
write_sysreg(0, vttbr_el2);
}
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+void pkvm_ownership_selftest(void *base);
+#else
+static inline void pkvm_ownership_selftest(void *base) { }
+#endif
#endif /* __KVM_NVHE_MEM_PROTECT__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index 34233d586060..dee1a406b0c2 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -8,23 +8,30 @@
#include <linux/types.h>
/*
- * Bits 0-1 are reserved to track the memory ownership state of each page:
- * 00: The page is owned exclusively by the page-table owner.
- * 01: The page is owned by the page-table owner, but is shared
- * with another entity.
- * 10: The page is shared with, but not owned by the page-table owner.
- * 11: Reserved for future use (lending).
+ * Bits 0-1 are used to encode the memory ownership state of each page from the
+ * point of view of a pKVM "component" (host, hyp, guest, ... see enum
+ * pkvm_component_id):
+ * 00: The page is owned and exclusively accessible by the component;
+ * 01: The page is owned and accessible by the component, but is also
+ * accessible by another component;
+ * 10: The page is accessible but not owned by the component;
+ * The storage of this state depends on the component: either in the
+ * hyp_vmemmap for the host and hyp states or in PTE software bits for guests.
*/
enum pkvm_page_state {
PKVM_PAGE_OWNED = 0ULL,
PKVM_PAGE_SHARED_OWNED = BIT(0),
PKVM_PAGE_SHARED_BORROWED = BIT(1),
- __PKVM_PAGE_RESERVED = BIT(0) | BIT(1),
- /* Meta-states which aren't encoded directly in the PTE's SW bits */
- PKVM_NOPAGE = BIT(2),
+ /*
+ * 'Meta-states' are not stored directly in PTE SW bits for guest
+ * states, but inferred from the context (e.g. invalid PTE entries).
+ * For the host and hyp, meta-states are stored directly in the
+ * struct hyp_page.
+ */
+ PKVM_NOPAGE = BIT(0) | BIT(1),
};
-#define PKVM_PAGE_META_STATES_MASK (~__PKVM_PAGE_RESERVED)
+#define PKVM_PAGE_STATE_MASK (BIT(0) | BIT(1))
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
@@ -44,8 +51,15 @@ struct hyp_page {
u16 refcount;
u8 order;
- /* Host (non-meta) state. Guarded by the host stage-2 lock. */
- enum pkvm_page_state host_state : 8;
+ /* Host state. Guarded by the host stage-2 lock. */
+ unsigned __host_state : 4;
+
+ /*
+ * Complement of the hyp state. Guarded by the hyp stage-1 lock. We use
+ * the complement so that the initial 0 in __hyp_state_comp (due to the
+ * entire vmemmap starting off zeroed) encodes PKVM_NOPAGE.
+ */
+ unsigned __hyp_state_comp : 4;
u32 host_share_guest_count;
};
@@ -82,6 +96,26 @@ static inline struct hyp_page *hyp_phys_to_page(phys_addr_t phys)
#define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page))
#define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool)
+static inline enum pkvm_page_state get_host_state(struct hyp_page *p)
+{
+ return p->__host_state;
+}
+
+static inline void set_host_state(struct hyp_page *p, enum pkvm_page_state state)
+{
+ p->__host_state = state;
+}
+
+static inline enum pkvm_page_state get_hyp_state(struct hyp_page *p)
+{
+ return p->__hyp_state_comp ^ PKVM_PAGE_STATE_MASK;
+}
+
+static inline void set_hyp_state(struct hyp_page *p, enum pkvm_page_state state)
+{
+ p->__hyp_state_comp = state ^ PKVM_PAGE_STATE_MASK;
+}
+
/*
* Refcounting for 'struct hyp_page'.
* hyp_pool::lock must be held if atomic access to the refcount is required.
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 230e4f2527de..6e83ce35c2f2 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -13,9 +13,11 @@
extern struct kvm_pgtable pkvm_pgtable;
extern hyp_spinlock_t pkvm_pgd_lock;
-int hyp_create_pcpu_fixmap(void);
+int hyp_create_fixmap(void);
void *hyp_fixmap_map(phys_addr_t phys);
void hyp_fixmap_unmap(void);
+void *hyp_fixblock_map(phys_addr_t phys, size_t *size);
+void hyp_fixblock_unmap(void);
int hyp_create_idmap(u32 hyp_va_bits);
int hyp_map_vectors(void);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index e42bf68c8848..ce31d3b73603 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -43,12 +43,6 @@ struct pkvm_hyp_vm {
struct hyp_pool pool;
hyp_spinlock_t lock;
- /*
- * The number of vcpus initialized and ready to run.
- * Modifying this is protected by 'vm_table_lock'.
- */
- unsigned int nr_vcpus;
-
/* Array of the hyp vCPU structures for this VM. */
struct pkvm_hyp_vcpu *vcpus[];
};
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index b43426a493df..a76522d63c3e 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -99,3 +99,9 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAG
# causes a build failure. Remove profile optimization flags.
KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS))
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables
+
+ifeq ($(CONFIG_UBSAN_KVM_EL2),y)
+UBSAN_SANITIZE := y
+# Always use brk and not hooks
+ccflags-y += $(CFLAGS_UBSAN_TRAP)
+endif
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index e433dfab882a..3369dd0c4009 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -730,10 +730,10 @@ static void do_ffa_version(struct arm_smccc_res *res,
hyp_ffa_version = ffa_req_version;
}
- if (hyp_ffa_post_init())
+ if (hyp_ffa_post_init()) {
res->a0 = FFA_RET_NOT_SUPPORTED;
- else {
- has_version_negotiated = true;
+ } else {
+ smp_store_release(&has_version_negotiated, true);
res->a0 = hyp_ffa_version;
}
unlock:
@@ -809,7 +809,8 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
if (!is_ffa_call(func_id))
return false;
- if (!has_version_negotiated && func_id != FFA_VERSION) {
+ if (func_id != FFA_VERSION &&
+ !smp_load_acquire(&has_version_negotiated)) {
ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS);
goto out_handled;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 58f0cb2298cc..eef15b374abb 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -124,7 +124,7 @@ SYM_FUNC_START(__hyp_do_panic)
/* Ensure host stage-2 is disabled */
mrs x0, hcr_el2
bic x0, x0, #HCR_VM
- msr hcr_el2, x0
+ msr_hcr_el2 x0
isb
tlbi vmalls12e1
dsb nsh
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index f8af11189572..aada42522e7b 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -100,7 +100,7 @@ SYM_CODE_START_LOCAL(___kvm_hyp_init)
msr mair_el2, x1
ldr x1, [x0, #NVHE_INIT_HCR_EL2]
- msr hcr_el2, x1
+ msr_hcr_el2 x1
mov x2, #HCR_E2H
and x2, x1, x2
@@ -262,7 +262,7 @@ reset:
alternative_if ARM64_KVM_PROTECTED_MODE
mov_q x5, HCR_HOST_NVHE_FLAGS
- msr hcr_el2, x5
+ msr_hcr_el2 x5
alternative_else_nop_endif
/* Install stub vectors */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 2c37680d954c..8e8848de4d47 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -123,10 +123,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
- hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state);
- /* Limit guest vector length to the maximum supported by the host. */
- hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
-
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE);
hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) &
@@ -249,7 +245,8 @@ static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, pfn, host_ctxt, 1);
DECLARE_REG(u64, gfn, host_ctxt, 2);
- DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
+ DECLARE_REG(u64, nr_pages, host_ctxt, 3);
+ DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4);
struct pkvm_hyp_vcpu *hyp_vcpu;
int ret = -EINVAL;
@@ -264,7 +261,7 @@ static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt)
if (ret)
goto out;
- ret = __pkvm_host_share_guest(pfn, gfn, hyp_vcpu, prot);
+ ret = __pkvm_host_share_guest(pfn, gfn, nr_pages, hyp_vcpu, prot);
out:
cpu_reg(host_ctxt, 1) = ret;
}
@@ -273,6 +270,7 @@ static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
DECLARE_REG(u64, gfn, host_ctxt, 2);
+ DECLARE_REG(u64, nr_pages, host_ctxt, 3);
struct pkvm_hyp_vm *hyp_vm;
int ret = -EINVAL;
@@ -283,7 +281,7 @@ static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt)
if (!hyp_vm)
goto out;
- ret = __pkvm_host_unshare_guest(gfn, hyp_vm);
+ ret = __pkvm_host_unshare_guest(gfn, nr_pages, hyp_vm);
put_pkvm_hyp_vm(hyp_vm);
out:
cpu_reg(host_ctxt, 1) = ret;
@@ -312,6 +310,7 @@ static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
DECLARE_REG(u64, gfn, host_ctxt, 2);
+ DECLARE_REG(u64, nr_pages, host_ctxt, 3);
struct pkvm_hyp_vm *hyp_vm;
int ret = -EINVAL;
@@ -322,7 +321,7 @@ static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt
if (!hyp_vm)
goto out;
- ret = __pkvm_host_wrprotect_guest(gfn, hyp_vm);
+ ret = __pkvm_host_wrprotect_guest(gfn, nr_pages, hyp_vm);
put_pkvm_hyp_vm(hyp_vm);
out:
cpu_reg(host_ctxt, 1) = ret;
@@ -332,7 +331,8 @@ static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *ho
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
DECLARE_REG(u64, gfn, host_ctxt, 2);
- DECLARE_REG(bool, mkold, host_ctxt, 3);
+ DECLARE_REG(u64, nr_pages, host_ctxt, 3);
+ DECLARE_REG(bool, mkold, host_ctxt, 4);
struct pkvm_hyp_vm *hyp_vm;
int ret = -EINVAL;
@@ -343,7 +343,7 @@ static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *ho
if (!hyp_vm)
goto out;
- ret = __pkvm_host_test_clear_young_guest(gfn, mkold, hyp_vm);
+ ret = __pkvm_host_test_clear_young_guest(gfn, nr_pages, mkold, hyp_vm);
put_pkvm_hyp_vm(hyp_vm);
out:
cpu_reg(host_ctxt, 1) = ret;
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
index f4562f417d3f..d724f6d69302 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
@@ -25,5 +25,7 @@ SECTIONS {
BEGIN_HYP_SECTION(.data..percpu)
PERCPU_INPUT(L1_CACHE_BYTES)
END_HYP_SECTION
+
HYP_SECTION(.bss)
+ HYP_SECTION(.data)
}
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 19c3c631708c..95d7534c9679 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -60,6 +60,11 @@ static void hyp_unlock_component(void)
hyp_spin_unlock(&pkvm_pgd_lock);
}
+#define for_each_hyp_page(__p, __st, __sz) \
+ for (struct hyp_page *__p = hyp_phys_to_page(__st), \
+ *__e = __p + ((__sz) >> PAGE_SHIFT); \
+ __p < __e; __p++)
+
static void *host_s2_zalloc_pages_exact(size_t size)
{
void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
@@ -161,12 +166,6 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
return 0;
}
-static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
- enum kvm_pgtable_prot prot)
-{
- return true;
-}
-
static void *guest_s2_zalloc_pages_exact(size_t size)
{
void *addr = hyp_alloc_pages(&current_vm->pool, get_order(size));
@@ -217,16 +216,42 @@ static void guest_s2_put_page(void *addr)
hyp_put_page(&current_vm->pool, addr);
}
+static void __apply_guest_page(void *va, size_t size,
+ void (*func)(void *addr, size_t size))
+{
+ size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE);
+ va = PTR_ALIGN_DOWN(va, PAGE_SIZE);
+ size = PAGE_ALIGN(size);
+
+ while (size) {
+ size_t map_size = PAGE_SIZE;
+ void *map;
+
+ if (IS_ALIGNED((unsigned long)va, PMD_SIZE) && size >= PMD_SIZE)
+ map = hyp_fixblock_map(__hyp_pa(va), &map_size);
+ else
+ map = hyp_fixmap_map(__hyp_pa(va));
+
+ func(map, map_size);
+
+ if (map_size == PMD_SIZE)
+ hyp_fixblock_unmap();
+ else
+ hyp_fixmap_unmap();
+
+ size -= map_size;
+ va += map_size;
+ }
+}
+
static void clean_dcache_guest_page(void *va, size_t size)
{
- __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
- hyp_fixmap_unmap();
+ __apply_guest_page(va, size, __clean_dcache_guest_page);
}
static void invalidate_icache_guest_page(void *va, size_t size)
{
- __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
- hyp_fixmap_unmap();
+ __apply_guest_page(va, size, __invalidate_icache_guest_page);
}
int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
@@ -255,8 +280,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
};
guest_lock_component(vm);
- ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0,
- guest_stage2_force_pte_cb);
+ ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, NULL);
guest_unlock_component(vm);
if (ret)
return ret;
@@ -266,7 +290,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
return 0;
}
-void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
+void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
{
struct hyp_page *page;
void *addr;
@@ -309,7 +333,7 @@ int __pkvm_prot_finalize(void)
*/
kvm_flush_dcache_to_poc(params, sizeof(*params));
- write_sysreg(params->hcr_el2, hcr_el2);
+ write_sysreg_hcr(params->hcr_el2);
__load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
/*
@@ -467,7 +491,8 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
return -EAGAIN;
if (pte) {
- WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE);
+ WARN_ON(addr_is_memory(addr) &&
+ get_host_state(hyp_phys_to_page(addr)) != PKVM_NOPAGE);
return -EPERM;
}
@@ -493,17 +518,15 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
{
- phys_addr_t end = addr + size;
-
- for (; addr < end; addr += PAGE_SIZE)
- hyp_phys_to_page(addr)->host_state = state;
+ for_each_hyp_page(page, addr, size)
+ set_host_state(page, state);
}
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
{
int ret;
- if (!addr_is_memory(addr))
+ if (!range_is_memory(addr, addr + size))
return -EPERM;
ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
@@ -578,7 +601,14 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
return;
}
- addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
+
+ /*
+ * Yikes, we couldn't resolve the fault IPA. This should reinject an
+ * abort into the host when we figure out how to do that.
+ */
+ BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
+ addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
+
ret = host_stage2_idmap(addr);
BUG_ON(ret && ret != -EAGAIN);
}
@@ -611,16 +641,16 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
static int __host_check_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
- u64 end = addr + size;
int ret;
- ret = check_range_allowed_memory(addr, end);
+ ret = check_range_allowed_memory(addr, addr + size);
if (ret)
return ret;
hyp_assert_lock_held(&host_mmu.lock);
- for (; addr < end; addr += PAGE_SIZE) {
- if (hyp_phys_to_page(addr)->host_state != state)
+
+ for_each_hyp_page(page, addr, size) {
+ if (get_host_state(page) != state)
return -EPERM;
}
@@ -630,7 +660,7 @@ static int __host_check_page_state_range(u64 addr, u64 size,
static int __host_set_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
- if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) {
+ if (get_host_state(hyp_phys_to_page(addr)) == PKVM_NOPAGE) {
int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
if (ret)
@@ -642,24 +672,20 @@ static int __host_set_page_state_range(u64 addr, u64 size,
return 0;
}
-static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
+static void __hyp_set_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
{
- if (!kvm_pte_valid(pte))
- return PKVM_NOPAGE;
-
- return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
+ for_each_hyp_page(page, phys, size)
+ set_hyp_state(page, state);
}
-static int __hyp_check_page_state_range(u64 addr, u64 size,
- enum pkvm_page_state state)
+static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
{
- struct check_walk_data d = {
- .desired = state,
- .get_page_state = hyp_get_page_state,
- };
+ for_each_hyp_page(page, phys, size) {
+ if (get_hyp_state(page) != state)
+ return -EPERM;
+ }
- hyp_assert_lock_held(&pkvm_pgd_lock);
- return check_page_state_range(&pkvm_pgtable, addr, size, &d);
+ return 0;
}
static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
@@ -670,10 +696,9 @@ static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
}
-static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr,
+static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr,
u64 size, enum pkvm_page_state state)
{
- struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
struct check_walk_data d = {
.desired = state,
.get_page_state = guest_get_page_state,
@@ -686,8 +711,6 @@ static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr,
int __pkvm_host_share_hyp(u64 pfn)
{
u64 phys = hyp_pfn_to_phys(pfn);
- void *virt = __hyp_va(phys);
- enum kvm_pgtable_prot prot;
u64 size = PAGE_SIZE;
int ret;
@@ -697,14 +720,11 @@ int __pkvm_host_share_hyp(u64 pfn)
ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
if (ret)
goto unlock;
- if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
- ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
- if (ret)
- goto unlock;
- }
+ ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);
+ if (ret)
+ goto unlock;
- prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
- WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
+ __hyp_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
unlock:
@@ -727,7 +747,7 @@ int __pkvm_host_unshare_hyp(u64 pfn)
ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
if (ret)
goto unlock;
- ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED);
+ ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
if (ret)
goto unlock;
if (hyp_page_count((void *)virt)) {
@@ -735,7 +755,7 @@ int __pkvm_host_unshare_hyp(u64 pfn)
goto unlock;
}
- WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
+ __hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));
unlock:
@@ -750,7 +770,6 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
u64 phys = hyp_pfn_to_phys(pfn);
u64 size = PAGE_SIZE * nr_pages;
void *virt = __hyp_va(phys);
- enum kvm_pgtable_prot prot;
int ret;
host_lock_component();
@@ -759,14 +778,12 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
if (ret)
goto unlock;
- if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
- ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
- if (ret)
- goto unlock;
- }
+ ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);
+ if (ret)
+ goto unlock;
- prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
- WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
+ __hyp_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
+ WARN_ON(pkvm_create_mappings_locked(virt, virt + size, PAGE_HYP));
WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));
unlock:
@@ -786,15 +803,14 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
host_lock_component();
hyp_lock_component();
- ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED);
+ ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
+ if (ret)
+ goto unlock;
+ ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
if (ret)
goto unlock;
- if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
- ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
- if (ret)
- goto unlock;
- }
+ __hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
@@ -809,24 +825,30 @@ int hyp_pin_shared_mem(void *from, void *to)
{
u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
u64 end = PAGE_ALIGN((u64)to);
+ u64 phys = __hyp_pa(start);
u64 size = end - start;
+ struct hyp_page *p;
int ret;
host_lock_component();
hyp_lock_component();
- ret = __host_check_page_state_range(__hyp_pa(start), size,
- PKVM_PAGE_SHARED_OWNED);
+ ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
if (ret)
goto unlock;
- ret = __hyp_check_page_state_range(start, size,
- PKVM_PAGE_SHARED_BORROWED);
+ ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
if (ret)
goto unlock;
- for (cur = start; cur < end; cur += PAGE_SIZE)
- hyp_page_ref_inc(hyp_virt_to_page(cur));
+ for (cur = start; cur < end; cur += PAGE_SIZE) {
+ p = hyp_virt_to_page(cur);
+ hyp_page_ref_inc(p);
+ if (p->refcount == 1)
+ WARN_ON(pkvm_create_mappings_locked((void *)cur,
+ (void *)cur + PAGE_SIZE,
+ PAGE_HYP));
+ }
unlock:
hyp_unlock_component();
@@ -839,12 +861,17 @@ void hyp_unpin_shared_mem(void *from, void *to)
{
u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
u64 end = PAGE_ALIGN((u64)to);
+ struct hyp_page *p;
host_lock_component();
hyp_lock_component();
- for (cur = start; cur < end; cur += PAGE_SIZE)
- hyp_page_ref_dec(hyp_virt_to_page(cur));
+ for (cur = start; cur < end; cur += PAGE_SIZE) {
+ p = hyp_virt_to_page(cur);
+ if (p->refcount == 1)
+ WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, cur, PAGE_SIZE) != PAGE_SIZE);
+ hyp_page_ref_dec(p);
+ }
hyp_unlock_component();
host_unlock_component();
@@ -880,49 +907,84 @@ int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
return ret;
}
-int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
+static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *size)
+{
+ size_t block_size;
+
+ if (nr_pages == 1) {
+ *size = PAGE_SIZE;
+ return 0;
+ }
+
+ /* We solely support second to last level huge mapping */
+ block_size = kvm_granule_size(KVM_PGTABLE_LAST_LEVEL - 1);
+
+ if (nr_pages != block_size >> PAGE_SHIFT)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(phys | ipa, block_size))
+ return -EINVAL;
+
+ *size = block_size;
+ return 0;
+}
+
+int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
enum kvm_pgtable_prot prot)
{
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
u64 phys = hyp_pfn_to_phys(pfn);
u64 ipa = hyp_pfn_to_phys(gfn);
- struct hyp_page *page;
+ u64 size;
int ret;
if (prot & ~KVM_PGTABLE_PROT_RWX)
return -EINVAL;
- ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
+ ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);
+ if (ret)
+ return ret;
+
+ ret = check_range_allowed_memory(phys, phys + size);
if (ret)
return ret;
host_lock_component();
guest_lock_component(vm);
- ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE);
+ ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE);
if (ret)
goto unlock;
- page = hyp_phys_to_page(phys);
- switch (page->host_state) {
- case PKVM_PAGE_OWNED:
- WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED));
- break;
- case PKVM_PAGE_SHARED_OWNED:
- if (page->host_share_guest_count)
- break;
- /* Only host to np-guest multi-sharing is tolerated */
- WARN_ON(1);
- fallthrough;
- default:
- ret = -EPERM;
- goto unlock;
+ for_each_hyp_page(page, phys, size) {
+ switch (get_host_state(page)) {
+ case PKVM_PAGE_OWNED:
+ continue;
+ case PKVM_PAGE_SHARED_OWNED:
+ if (page->host_share_guest_count == U32_MAX) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ /* Only host to np-guest multi-sharing is tolerated */
+ if (page->host_share_guest_count)
+ continue;
+
+ fallthrough;
+ default:
+ ret = -EPERM;
+ goto unlock;
+ }
+ }
+
+ for_each_hyp_page(page, phys, size) {
+ set_host_state(page, PKVM_PAGE_SHARED_OWNED);
+ page->host_share_guest_count++;
}
- WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
+ WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,
pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
&vcpu->vcpu.arch.pkvm_memcache, 0));
- page->host_share_guest_count++;
unlock:
guest_unlock_component(vm);
@@ -931,10 +993,9 @@ unlock:
return ret;
}
-static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa)
+static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa, u64 size)
{
enum pkvm_page_state state;
- struct hyp_page *page;
kvm_pte_t pte;
u64 phys;
s8 level;
@@ -945,7 +1006,7 @@ static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ip
return ret;
if (!kvm_pte_valid(pte))
return -ENOENT;
- if (level != KVM_PGTABLE_LAST_LEVEL)
+ if (kvm_granule_size(level) != size)
return -E2BIG;
state = guest_get_page_state(pte, ipa);
@@ -953,43 +1014,49 @@ static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ip
return -EPERM;
phys = kvm_pte_to_phys(pte);
- ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
+ ret = check_range_allowed_memory(phys, phys + size);
if (WARN_ON(ret))
return ret;
- page = hyp_phys_to_page(phys);
- if (page->host_state != PKVM_PAGE_SHARED_OWNED)
- return -EPERM;
- if (WARN_ON(!page->host_share_guest_count))
- return -EINVAL;
+ for_each_hyp_page(page, phys, size) {
+ if (get_host_state(page) != PKVM_PAGE_SHARED_OWNED)
+ return -EPERM;
+ if (WARN_ON(!page->host_share_guest_count))
+ return -EINVAL;
+ }
*__phys = phys;
return 0;
}
-int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm)
+int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm)
{
u64 ipa = hyp_pfn_to_phys(gfn);
- struct hyp_page *page;
- u64 phys;
+ u64 size, phys;
int ret;
+ ret = __guest_check_transition_size(0, ipa, nr_pages, &size);
+ if (ret)
+ return ret;
+
host_lock_component();
guest_lock_component(vm);
- ret = __check_host_shared_guest(vm, &phys, ipa);
+ ret = __check_host_shared_guest(vm, &phys, ipa, size);
if (ret)
goto unlock;
- ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE);
+ ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, size);
if (ret)
goto unlock;
- page = hyp_phys_to_page(phys);
- page->host_share_guest_count--;
- if (!page->host_share_guest_count)
- WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED));
+ for_each_hyp_page(page, phys, size) {
+ /* __check_host_shared_guest() protects against underflow */
+ page->host_share_guest_count--;
+ if (!page->host_share_guest_count)
+ set_host_state(page, PKVM_PAGE_OWNED);
+ }
unlock:
guest_unlock_component(vm);
@@ -998,7 +1065,7 @@ unlock:
return ret;
}
-static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa)
+static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa, u64 size)
{
u64 phys;
int ret;
@@ -1009,7 +1076,7 @@ static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa)
host_lock_component();
guest_lock_component(vm);
- ret = __check_host_shared_guest(vm, &phys, ipa);
+ ret = __check_host_shared_guest(vm, &phys, ipa, size);
guest_unlock_component(vm);
host_unlock_component();
@@ -1029,7 +1096,7 @@ int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_
if (prot & ~KVM_PGTABLE_PROT_RWX)
return -EINVAL;
- assert_host_shared_guest(vm, ipa);
+ assert_host_shared_guest(vm, ipa, PAGE_SIZE);
guest_lock_component(vm);
ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
guest_unlock_component(vm);
@@ -1037,33 +1104,41 @@ int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_
return ret;
}
-int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm)
+int __pkvm_host_wrprotect_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm)
{
- u64 ipa = hyp_pfn_to_phys(gfn);
+ u64 size, ipa = hyp_pfn_to_phys(gfn);
int ret;
if (pkvm_hyp_vm_is_protected(vm))
return -EPERM;
- assert_host_shared_guest(vm, ipa);
+ ret = __guest_check_transition_size(0, ipa, nr_pages, &size);
+ if (ret)
+ return ret;
+
+ assert_host_shared_guest(vm, ipa, size);
guest_lock_component(vm);
- ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE);
+ ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, size);
guest_unlock_component(vm);
return ret;
}
-int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm)
+int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct pkvm_hyp_vm *vm)
{
- u64 ipa = hyp_pfn_to_phys(gfn);
+ u64 size, ipa = hyp_pfn_to_phys(gfn);
int ret;
if (pkvm_hyp_vm_is_protected(vm))
return -EPERM;
- assert_host_shared_guest(vm, ipa);
+ ret = __guest_check_transition_size(0, ipa, nr_pages, &size);
+ if (ret)
+ return ret;
+
+ assert_host_shared_guest(vm, ipa, size);
guest_lock_component(vm);
- ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold);
+ ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, size, mkold);
guest_unlock_component(vm);
return ret;
@@ -1077,10 +1152,210 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
if (pkvm_hyp_vm_is_protected(vm))
return -EPERM;
- assert_host_shared_guest(vm, ipa);
+ assert_host_shared_guest(vm, ipa, PAGE_SIZE);
guest_lock_component(vm);
kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
guest_unlock_component(vm);
return 0;
}
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+struct pkvm_expected_state {
+ enum pkvm_page_state host;
+ enum pkvm_page_state hyp;
+ enum pkvm_page_state guest[2]; /* [ gfn, gfn + 1 ] */
+};
+
+static struct pkvm_expected_state selftest_state;
+static struct hyp_page *selftest_page;
+
+static struct pkvm_hyp_vm selftest_vm = {
+ .kvm = {
+ .arch = {
+ .mmu = {
+ .arch = &selftest_vm.kvm.arch,
+ .pgt = &selftest_vm.pgt,
+ },
+ },
+ },
+};
+
+static struct pkvm_hyp_vcpu selftest_vcpu = {
+ .vcpu = {
+ .arch = {
+ .hw_mmu = &selftest_vm.kvm.arch.mmu,
+ },
+ .kvm = &selftest_vm.kvm,
+ },
+};
+
+static void init_selftest_vm(void *virt)
+{
+ struct hyp_page *p = hyp_virt_to_page(virt);
+ int i;
+
+ selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
+ WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt));
+
+ for (i = 0; i < pkvm_selftest_pages(); i++) {
+ if (p[i].refcount)
+ continue;
+ p[i].refcount = 1;
+ hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
+ }
+}
+
+static u64 selftest_ipa(void)
+{
+ return BIT(selftest_vm.pgt.ia_bits - 1);
+}
+
+static void assert_page_state(void)
+{
+ void *virt = hyp_page_to_virt(selftest_page);
+ u64 size = PAGE_SIZE << selftest_page->order;
+ struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu;
+ u64 phys = hyp_virt_to_phys(virt);
+ u64 ipa[2] = { selftest_ipa(), selftest_ipa() + PAGE_SIZE };
+ struct pkvm_hyp_vm *vm;
+
+ vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
+
+ host_lock_component();
+ WARN_ON(__host_check_page_state_range(phys, size, selftest_state.host));
+ host_unlock_component();
+
+ hyp_lock_component();
+ WARN_ON(__hyp_check_page_state_range(phys, size, selftest_state.hyp));
+ hyp_unlock_component();
+
+ guest_lock_component(&selftest_vm);
+ WARN_ON(__guest_check_page_state_range(vm, ipa[0], size, selftest_state.guest[0]));
+ WARN_ON(__guest_check_page_state_range(vm, ipa[1], size, selftest_state.guest[1]));
+ guest_unlock_component(&selftest_vm);
+}
+
+#define assert_transition_res(res, fn, ...) \
+ do { \
+ WARN_ON(fn(__VA_ARGS__) != res); \
+ assert_page_state(); \
+ } while (0)
+
+void pkvm_ownership_selftest(void *base)
+{
+ enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX;
+ void *virt = hyp_alloc_pages(&host_s2_pool, 0);
+ struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu;
+ struct pkvm_hyp_vm *vm = &selftest_vm;
+ u64 phys, size, pfn, gfn;
+
+ WARN_ON(!virt);
+ selftest_page = hyp_virt_to_page(virt);
+ selftest_page->refcount = 0;
+ init_selftest_vm(base);
+
+ size = PAGE_SIZE << selftest_page->order;
+ phys = hyp_virt_to_phys(virt);
+ pfn = hyp_phys_to_pfn(phys);
+ gfn = hyp_phys_to_pfn(selftest_ipa());
+
+ selftest_state.host = PKVM_NOPAGE;
+ selftest_state.hyp = PKVM_PAGE_OWNED;
+ selftest_state.guest[0] = selftest_state.guest[1] = PKVM_NOPAGE;
+ assert_page_state();
+ assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
+ assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
+ assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
+ assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
+
+ selftest_state.host = PKVM_PAGE_OWNED;
+ selftest_state.hyp = PKVM_NOPAGE;
+ assert_transition_res(0, __pkvm_hyp_donate_host, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
+ assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
+ assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
+
+ selftest_state.host = PKVM_PAGE_SHARED_OWNED;
+ selftest_state.hyp = PKVM_PAGE_SHARED_BORROWED;
+ assert_transition_res(0, __pkvm_host_share_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
+ assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
+
+ assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);
+ assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);
+ hyp_unpin_shared_mem(virt, virt + size);
+ WARN_ON(hyp_page_count(virt) != 1);
+ assert_transition_res(-EBUSY, __pkvm_host_unshare_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
+ assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
+
+ hyp_unpin_shared_mem(virt, virt + size);
+ assert_page_state();
+ WARN_ON(hyp_page_count(virt));
+
+ selftest_state.host = PKVM_PAGE_OWNED;
+ selftest_state.hyp = PKVM_NOPAGE;
+ assert_transition_res(0, __pkvm_host_unshare_hyp, pfn);
+
+ selftest_state.host = PKVM_PAGE_SHARED_OWNED;
+ selftest_state.hyp = PKVM_NOPAGE;
+ assert_transition_res(0, __pkvm_host_share_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
+ assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
+ assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
+
+ selftest_state.host = PKVM_PAGE_OWNED;
+ selftest_state.hyp = PKVM_NOPAGE;
+ assert_transition_res(0, __pkvm_host_unshare_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
+
+ selftest_state.host = PKVM_PAGE_SHARED_OWNED;
+ selftest_state.guest[0] = PKVM_PAGE_SHARED_BORROWED;
+ assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
+ assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
+ assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
+ assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
+
+ selftest_state.guest[1] = PKVM_PAGE_SHARED_BORROWED;
+ assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
+ WARN_ON(hyp_virt_to_page(virt)->host_share_guest_count != 2);
+
+ selftest_state.guest[0] = PKVM_NOPAGE;
+ assert_transition_res(0, __pkvm_host_unshare_guest, gfn, 1, vm);
+
+ selftest_state.guest[1] = PKVM_NOPAGE;
+ selftest_state.host = PKVM_PAGE_OWNED;
+ assert_transition_res(0, __pkvm_host_unshare_guest, gfn + 1, 1, vm);
+
+ selftest_state.host = PKVM_NOPAGE;
+ selftest_state.hyp = PKVM_PAGE_OWNED;
+ assert_transition_res(0, __pkvm_host_donate_hyp, pfn, 1);
+
+ selftest_page->refcount = 1;
+ hyp_put_page(&host_s2_pool, virt);
+}
+#endif
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index f41c7440b34b..ae8391baebc3 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -229,9 +229,8 @@ int hyp_map_vectors(void)
return 0;
}
-void *hyp_fixmap_map(phys_addr_t phys)
+static void *fixmap_map_slot(struct hyp_fixmap_slot *slot, phys_addr_t phys)
{
- struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots);
kvm_pte_t pte, *ptep = slot->ptep;
pte = *ptep;
@@ -243,10 +242,21 @@ void *hyp_fixmap_map(phys_addr_t phys)
return (void *)slot->addr;
}
+void *hyp_fixmap_map(phys_addr_t phys)
+{
+ return fixmap_map_slot(this_cpu_ptr(&fixmap_slots), phys);
+}
+
static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
{
kvm_pte_t *ptep = slot->ptep;
u64 addr = slot->addr;
+ u32 level;
+
+ if (FIELD_GET(KVM_PTE_TYPE, *ptep) == KVM_PTE_TYPE_PAGE)
+ level = KVM_PGTABLE_LAST_LEVEL;
+ else
+ level = KVM_PGTABLE_LAST_LEVEL - 1; /* create_fixblock() guarantees PMD level */
WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID);
@@ -260,7 +270,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
* https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
*/
dsb(ishst);
- __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL);
+ __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
dsb(ish);
isb();
}
@@ -273,9 +283,9 @@ void hyp_fixmap_unmap(void)
static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
- struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg);
+ struct hyp_fixmap_slot *slot = (struct hyp_fixmap_slot *)ctx->arg;
- if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL)
+ if (!kvm_pte_valid(ctx->old) || (ctx->end - ctx->start) != kvm_granule_size(ctx->level))
return -EINVAL;
slot->addr = ctx->addr;
@@ -296,13 +306,84 @@ static int create_fixmap_slot(u64 addr, u64 cpu)
struct kvm_pgtable_walker walker = {
.cb = __create_fixmap_slot_cb,
.flags = KVM_PGTABLE_WALK_LEAF,
- .arg = (void *)cpu,
+ .arg = per_cpu_ptr(&fixmap_slots, cpu),
};
return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker);
}
-int hyp_create_pcpu_fixmap(void)
+#if PAGE_SHIFT < 16
+#define HAS_FIXBLOCK
+static struct hyp_fixmap_slot hyp_fixblock_slot;
+static DEFINE_HYP_SPINLOCK(hyp_fixblock_lock);
+#endif
+
+static int create_fixblock(void)
+{
+#ifdef HAS_FIXBLOCK
+ struct kvm_pgtable_walker walker = {
+ .cb = __create_fixmap_slot_cb,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ .arg = &hyp_fixblock_slot,
+ };
+ unsigned long addr;
+ phys_addr_t phys;
+ int ret, i;
+
+ /* Find a RAM phys address, PMD aligned */
+ for (i = 0; i < hyp_memblock_nr; i++) {
+ phys = ALIGN(hyp_memory[i].base, PMD_SIZE);
+ if (phys + PMD_SIZE < (hyp_memory[i].base + hyp_memory[i].size))
+ break;
+ }
+
+ if (i >= hyp_memblock_nr)
+ return -EINVAL;
+
+ hyp_spin_lock(&pkvm_pgd_lock);
+ addr = ALIGN(__io_map_base, PMD_SIZE);
+ ret = __pkvm_alloc_private_va_range(addr, PMD_SIZE);
+ if (ret)
+ goto unlock;
+
+ ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PMD_SIZE, phys, PAGE_HYP);
+ if (ret)
+ goto unlock;
+
+ ret = kvm_pgtable_walk(&pkvm_pgtable, addr, PMD_SIZE, &walker);
+
+unlock:
+ hyp_spin_unlock(&pkvm_pgd_lock);
+
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+void *hyp_fixblock_map(phys_addr_t phys, size_t *size)
+{
+#ifdef HAS_FIXBLOCK
+ *size = PMD_SIZE;
+ hyp_spin_lock(&hyp_fixblock_lock);
+ return fixmap_map_slot(&hyp_fixblock_slot, phys);
+#else
+ *size = PAGE_SIZE;
+ return hyp_fixmap_map(phys);
+#endif
+}
+
+void hyp_fixblock_unmap(void)
+{
+#ifdef HAS_FIXBLOCK
+ fixmap_clear_slot(&hyp_fixblock_slot);
+ hyp_spin_unlock(&hyp_fixblock_lock);
+#else
+ hyp_fixmap_unmap();
+#endif
+}
+
+int hyp_create_fixmap(void)
{
unsigned long addr, i;
int ret;
@@ -322,7 +403,7 @@ int hyp_create_pcpu_fixmap(void)
return ret;
}
- return 0;
+ return create_fixblock();
}
int hyp_create_idmap(u32 hyp_va_bits)
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 3927fe52a3dd..338505cb0171 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -46,7 +46,8 @@ static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 |= HCR_FWB;
if (cpus_have_final_cap(ARM64_HAS_EVT) &&
- !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
+ !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE) &&
+ kvm_read_vm_id_reg(vcpu->kvm, SYS_CTR_EL0) == read_cpuid(CTR_EL0))
vcpu->arch.hcr_el2 |= HCR_TID4;
else
vcpu->arch.hcr_el2 |= HCR_TID2;
@@ -166,8 +167,13 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu)
pkvm_vcpu_reset_hcr(vcpu);
- if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu)))
+ if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu))) {
+ struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+
+ /* Trust the host for non-protected vcpu features. */
+ vcpu->arch.hcrx_el2 = host_vcpu->arch.hcrx_el2;
return 0;
+ }
ret = pkvm_check_pvm_cpu_features(vcpu);
if (ret)
@@ -175,6 +181,7 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu)
pvm_init_traps_hcr(vcpu);
pvm_init_traps_mdcr(vcpu);
+ vcpu_set_hcrx(vcpu);
return 0;
}
@@ -239,10 +246,12 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
hyp_spin_lock(&vm_table_lock);
hyp_vm = get_vm_by_handle(handle);
- if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx)
+ if (!hyp_vm || hyp_vm->kvm.created_vcpus <= vcpu_idx)
goto unlock;
hyp_vcpu = hyp_vm->vcpus[vcpu_idx];
+ if (!hyp_vcpu)
+ goto unlock;
/* Ensure vcpu isn't loaded on more than one cpu simultaneously. */
if (unlikely(hyp_vcpu->loaded_hyp_vcpu)) {
@@ -315,6 +324,9 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
unsigned long host_arch_flags = READ_ONCE(host_kvm->arch.flags);
DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES);
+ /* CTR_EL0 is always under host control, even for protected VMs. */
+ hyp_vm->kvm.arch.ctr_el0 = host_kvm->arch.ctr_el0;
+
if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags))
set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
@@ -325,6 +337,10 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
bitmap_copy(kvm->arch.vcpu_features,
host_kvm->arch.vcpu_features,
KVM_VCPU_MAX_FEATURES);
+
+ if (test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &host_arch_flags))
+ hyp_vm->kvm.arch.midr_el1 = host_kvm->arch.midr_el1;
+
return;
}
@@ -356,13 +372,32 @@ static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu)
hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
}
+static void unpin_host_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ void *sve_state;
+
+ if (!vcpu_has_feature(&hyp_vcpu->vcpu, KVM_ARM_VCPU_SVE))
+ return;
+
+ sve_state = kern_hyp_va(hyp_vcpu->vcpu.arch.sve_state);
+ hyp_unpin_shared_mem(sve_state,
+ sve_state + vcpu_sve_state_size(&hyp_vcpu->vcpu));
+}
+
static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[],
unsigned int nr_vcpus)
{
int i;
- for (i = 0; i < nr_vcpus; i++)
- unpin_host_vcpu(hyp_vcpus[i]->host_vcpu);
+ for (i = 0; i < nr_vcpus; i++) {
+ struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vcpus[i];
+
+ if (!hyp_vcpu)
+ continue;
+
+ unpin_host_vcpu(hyp_vcpu->host_vcpu);
+ unpin_host_sve_state(hyp_vcpu);
+ }
}
static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
@@ -376,34 +411,56 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
pkvm_init_features_from_host(hyp_vm, host_kvm);
}
-static void pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu)
+static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu)
{
struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
+ unsigned int sve_max_vl;
+ size_t sve_state_size;
+ void *sve_state;
+ int ret = 0;
- if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE))
+ if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) {
vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED);
+ return 0;
+ }
+
+ /* Limit guest vector length to the maximum supported by the host. */
+ sve_max_vl = min(READ_ONCE(host_vcpu->arch.sve_max_vl), kvm_host_sve_max_vl);
+ sve_state_size = sve_state_size_from_vl(sve_max_vl);
+ sve_state = kern_hyp_va(READ_ONCE(host_vcpu->arch.sve_state));
+
+ if (!sve_state || !sve_state_size) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = hyp_pin_shared_mem(sve_state, sve_state + sve_state_size);
+ if (ret)
+ goto err;
+
+ vcpu->arch.sve_state = sve_state;
+ vcpu->arch.sve_max_vl = sve_max_vl;
+
+ return 0;
+err:
+ clear_bit(KVM_ARM_VCPU_SVE, vcpu->kvm->arch.vcpu_features);
+ return ret;
}
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
struct pkvm_hyp_vm *hyp_vm,
- struct kvm_vcpu *host_vcpu,
- unsigned int vcpu_idx)
+ struct kvm_vcpu *host_vcpu)
{
int ret = 0;
if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1))
return -EBUSY;
- if (host_vcpu->vcpu_idx != vcpu_idx) {
- ret = -EINVAL;
- goto done;
- }
-
hyp_vcpu->host_vcpu = host_vcpu;
hyp_vcpu->vcpu.kvm = &hyp_vm->kvm;
hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id);
- hyp_vcpu->vcpu.vcpu_idx = vcpu_idx;
+ hyp_vcpu->vcpu.vcpu_idx = READ_ONCE(host_vcpu->vcpu_idx);
hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu;
hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
@@ -416,7 +473,7 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
if (ret)
goto done;
- pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu);
+ ret = pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu);
done:
if (ret)
unpin_host_vcpu(host_vcpu);
@@ -641,27 +698,28 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
goto unlock;
}
- idx = hyp_vm->nr_vcpus;
+ ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu);
+ if (ret)
+ goto unlock;
+
+ idx = hyp_vcpu->vcpu.vcpu_idx;
if (idx >= hyp_vm->kvm.created_vcpus) {
ret = -EINVAL;
goto unlock;
}
- ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu, idx);
- if (ret)
+ if (hyp_vm->vcpus[idx]) {
+ ret = -EINVAL;
goto unlock;
+ }
hyp_vm->vcpus[idx] = hyp_vcpu;
- hyp_vm->nr_vcpus++;
unlock:
hyp_spin_unlock(&vm_table_lock);
- if (ret) {
+ if (ret)
unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
- return ret;
- }
-
- return 0;
+ return ret;
}
static void
@@ -678,7 +736,7 @@ teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size)
int __pkvm_teardown_vm(pkvm_handle_t handle)
{
- struct kvm_hyp_memcache *mc;
+ struct kvm_hyp_memcache *mc, *stage2_mc;
struct pkvm_hyp_vm *hyp_vm;
struct kvm *host_kvm;
unsigned int idx;
@@ -706,18 +764,24 @@ int __pkvm_teardown_vm(pkvm_handle_t handle)
/* Reclaim guest pages (including page-table pages) */
mc = &host_kvm->arch.pkvm.teardown_mc;
- reclaim_guest_pages(hyp_vm, mc);
- unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus);
+ stage2_mc = &host_kvm->arch.pkvm.stage2_teardown_mc;
+ reclaim_pgtable_pages(hyp_vm, stage2_mc);
+ unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->kvm.created_vcpus);
/* Push the metadata pages to the teardown memcache */
- for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) {
+ for (idx = 0; idx < hyp_vm->kvm.created_vcpus; ++idx) {
struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx];
- struct kvm_hyp_memcache *vcpu_mc = &hyp_vcpu->vcpu.arch.pkvm_memcache;
+ struct kvm_hyp_memcache *vcpu_mc;
+
+ if (!hyp_vcpu)
+ continue;
+
+ vcpu_mc = &hyp_vcpu->vcpu.arch.pkvm_memcache;
while (vcpu_mc->nr_pages) {
void *addr = pop_hyp_memcache(vcpu_mc, hyp_phys_to_virt);
- push_hyp_memcache(mc, addr, hyp_virt_to_phys);
+ push_hyp_memcache(stage2_mc, addr, hyp_virt_to_phys);
unmap_donated_memory_noclear(addr, PAGE_SIZE);
}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index d62bcb5634a2..a48d3f5a5afb 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -28,6 +28,7 @@ static void *vmemmap_base;
static void *vm_table_base;
static void *hyp_pgt_base;
static void *host_s2_pgt_base;
+static void *selftest_base;
static void *ffa_proxy_pages;
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
static struct hyp_pool hpool;
@@ -38,6 +39,11 @@ static int divide_memory_pool(void *virt, unsigned long size)
hyp_early_alloc_init(virt, size);
+ nr_pages = pkvm_selftest_pages();
+ selftest_base = hyp_early_alloc_contig(nr_pages);
+ if (nr_pages && !selftest_base)
+ return -ENOMEM;
+
nr_pages = hyp_vmemmap_pages(sizeof(struct hyp_page));
vmemmap_base = hyp_early_alloc_contig(nr_pages);
if (!vmemmap_base)
@@ -119,6 +125,10 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
if (ret)
return ret;
+ ret = pkvm_create_mappings(__hyp_data_start, __hyp_data_end, PAGE_HYP);
+ if (ret)
+ return ret;
+
ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO);
if (ret)
return ret;
@@ -180,6 +190,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
enum pkvm_page_state state;
+ struct hyp_page *page;
phys_addr_t phys;
if (!kvm_pte_valid(ctx->old))
@@ -192,19 +203,25 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
if (!addr_is_memory(phys))
return -EINVAL;
+ page = hyp_phys_to_page(phys);
+
/*
* Adjust the host stage-2 mappings to match the ownership attributes
- * configured in the hypervisor stage-1.
+ * configured in the hypervisor stage-1, and make sure to propagate them
+ * to the hyp_vmemmap state.
*/
state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old));
switch (state) {
case PKVM_PAGE_OWNED:
+ set_hyp_state(page, PKVM_PAGE_OWNED);
return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
case PKVM_PAGE_SHARED_OWNED:
- hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_BORROWED;
+ set_hyp_state(page, PKVM_PAGE_SHARED_OWNED);
+ set_host_state(page, PKVM_PAGE_SHARED_BORROWED);
break;
case PKVM_PAGE_SHARED_BORROWED:
- hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_OWNED;
+ set_hyp_state(page, PKVM_PAGE_SHARED_BORROWED);
+ set_host_state(page, PKVM_PAGE_SHARED_OWNED);
break;
default:
return -EINVAL;
@@ -295,7 +312,7 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
- ret = hyp_create_pcpu_fixmap();
+ ret = hyp_create_fixmap();
if (ret)
goto out;
@@ -304,6 +321,8 @@ void __noreturn __pkvm_init_finalise(void)
goto out;
pkvm_hyp_vm_table_init(vm_table_base);
+
+ pkvm_ownership_selftest(selftest_base);
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 7d2ba6ef0261..73affe1333a4 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -33,6 +33,18 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
+struct fgt_masks hfgrtr_masks;
+struct fgt_masks hfgwtr_masks;
+struct fgt_masks hfgitr_masks;
+struct fgt_masks hdfgrtr_masks;
+struct fgt_masks hdfgwtr_masks;
+struct fgt_masks hafgrtr_masks;
+struct fgt_masks hfgrtr2_masks;
+struct fgt_masks hfgwtr2_masks;
+struct fgt_masks hfgitr2_masks;
+struct fgt_masks hdfgrtr2_masks;
+struct fgt_masks hdfgwtr2_masks;
+
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
@@ -142,7 +154,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
__deactivate_traps_common(vcpu);
- write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
+ write_sysreg_hcr(this_cpu_ptr(&kvm_init_params)->hcr_el2);
__deactivate_cptr_traps(vcpu);
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
index dba101565de3..3cc613cce5f5 100644
--- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
@@ -28,7 +28,9 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1));
+ u64 midr = ctxt_midr_el1(ctxt);
+
+ __sysreg_restore_el1_state(ctxt, midr, ctxt_sys_reg(ctxt, MPIDR_EL1));
__sysreg_restore_common_state(ctxt);
__sysreg_restore_user_state(ctxt);
__sysreg_restore_el2_return_state(ctxt);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index df5cc74a7dd0..c351b4abd5db 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -11,12 +11,6 @@
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
-
-#define KVM_PTE_TYPE BIT(1)
-#define KVM_PTE_TYPE_BLOCK 0
-#define KVM_PTE_TYPE_PAGE 1
-#define KVM_PTE_TYPE_TABLE 1
-
struct kvm_pgtable_walk_data {
struct kvm_pgtable_walker *walker;
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 3f9741e51d41..f162b0df5cae 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -18,7 +18,7 @@
#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1)
#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5))
-static u64 __gic_v3_get_lr(unsigned int lr)
+u64 __gic_v3_get_lr(unsigned int lr)
{
switch (lr & 0xf) {
case 0:
@@ -218,7 +218,7 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
elrsr = read_gicreg(ICH_ELRSR_EL2);
- write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2);
+ write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EL2_En, ICH_HCR_EL2);
for (i = 0; i < used_lrs; i++) {
if (elrsr & (1 << i))
@@ -274,7 +274,7 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
* system registers to trap to EL1 (duh), force ICC_SRE_EL1.SRE to 1
* so that the trap bits can take effect. Yes, we *loves* the GIC.
*/
- if (!(cpu_if->vgic_hcr & ICH_HCR_EN)) {
+ if (!(cpu_if->vgic_hcr & ICH_HCR_EL2_En)) {
write_gicreg(ICC_SRE_EL1_SRE, ICC_SRE_EL1);
isb();
} else if (!cpu_if->vgic_sre) {
@@ -429,23 +429,27 @@ u64 __vgic_v3_get_gic_config(void)
/*
* To check whether we have a MMIO-based (GICv2 compatible)
* CPU interface, we need to disable the system register
- * view. To do that safely, we have to prevent any interrupt
- * from firing (which would be deadly).
+ * view.
*
- * Note that this only makes sense on VHE, as interrupts are
- * already masked for nVHE as part of the exception entry to
- * EL2.
- */
- if (has_vhe())
- flags = local_daif_save();
-
- /*
* Table 11-2 "Permitted ICC_SRE_ELx.SRE settings" indicates
* that to be able to set ICC_SRE_EL1.SRE to 0, all the
* interrupt overrides must be set. You've got to love this.
+ *
+ * As we always run VHE with HCR_xMO set, no extra xMO
+ * manipulation is required in that case.
+ *
+ * To safely disable SRE, we have to prevent any interrupt
+ * from firing (which would be deadly). This only makes sense
+ * on VHE, as interrupts are already masked for nVHE as part
+ * of the exception entry to EL2.
*/
- sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO);
- isb();
+ if (has_vhe()) {
+ flags = local_daif_save();
+ } else {
+ sysreg_clear_set_hcr(0, HCR_AMO | HCR_FMO | HCR_IMO);
+ isb();
+ }
+
write_gicreg(0, ICC_SRE_EL1);
isb();
@@ -453,11 +457,13 @@ u64 __vgic_v3_get_gic_config(void)
write_gicreg(sre, ICC_SRE_EL1);
isb();
- sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0);
- isb();
- if (has_vhe())
+ if (has_vhe()) {
local_daif_restore(flags);
+ } else {
+ sysreg_clear_set_hcr(HCR_AMO | HCR_FMO | HCR_IMO, 0);
+ isb();
+ }
val = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63);
val |= read_gicreg(ICH_VTR_EL2);
@@ -752,7 +758,7 @@ static void __vgic_v3_bump_eoicount(void)
u32 hcr;
hcr = read_gicreg(ICH_HCR_EL2);
- hcr += 1 << ICH_HCR_EOIcount_SHIFT;
+ hcr += 1 << ICH_HCR_EL2_EOIcount_SHIFT;
write_gicreg(hcr, ICH_HCR_EL2);
}
@@ -1052,11 +1058,11 @@ static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu,
switch (sysreg) {
case SYS_ICC_IGRPEN0_EL1:
if (is_read &&
- (__vcpu_sys_reg(vcpu, HFGRTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ (__vcpu_sys_reg(vcpu, HFGRTR_EL2) & HFGRTR_EL2_ICC_IGRPENn_EL1))
return true;
if (!is_read &&
- (__vcpu_sys_reg(vcpu, HFGWTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ (__vcpu_sys_reg(vcpu, HFGWTR_EL2) & HFGWTR_EL2_ICC_IGRPENn_EL1))
return true;
fallthrough;
@@ -1069,15 +1075,15 @@ static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu,
case SYS_ICC_EOIR0_EL1:
case SYS_ICC_HPPIR0_EL1:
case SYS_ICC_IAR0_EL1:
- return ich_hcr & ICH_HCR_TALL0;
+ return ich_hcr & ICH_HCR_EL2_TALL0;
case SYS_ICC_IGRPEN1_EL1:
if (is_read &&
- (__vcpu_sys_reg(vcpu, HFGRTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ (__vcpu_sys_reg(vcpu, HFGRTR_EL2) & HFGRTR_EL2_ICC_IGRPENn_EL1))
return true;
if (!is_read &&
- (__vcpu_sys_reg(vcpu, HFGWTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ (__vcpu_sys_reg(vcpu, HFGWTR_EL2) & HFGWTR_EL2_ICC_IGRPENn_EL1))
return true;
fallthrough;
@@ -1090,10 +1096,10 @@ static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu,
case SYS_ICC_EOIR1_EL1:
case SYS_ICC_HPPIR1_EL1:
case SYS_ICC_IAR1_EL1:
- return ich_hcr & ICH_HCR_TALL1;
+ return ich_hcr & ICH_HCR_EL2_TALL1;
case SYS_ICC_DIR_EL1:
- if (ich_hcr & ICH_HCR_TDIR)
+ if (ich_hcr & ICH_HCR_EL2_TDIR)
return true;
fallthrough;
@@ -1101,7 +1107,7 @@ static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu,
case SYS_ICC_RPR_EL1:
case SYS_ICC_CTLR_EL1:
case SYS_ICC_PMR_EL1:
- return ich_hcr & ICH_HCR_TC;
+ return ich_hcr & ICH_HCR_EL2_TC;
default:
return false;
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 647737d6e8d0..c9b330dc2066 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -48,21 +48,46 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
static u64 __compute_hcr(struct kvm_vcpu *vcpu)
{
+ u64 guest_hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
u64 hcr = vcpu->arch.hcr_el2;
if (!vcpu_has_nv(vcpu))
return hcr;
+ /*
+ * We rely on the invariant that a vcpu entered from HYP
+ * context must also exit in the same context, as only an ERET
+ * instruction can kick us out of it, and we obviously trap
+ * that sucker. PSTATE.M will get fixed-up on exit.
+ */
if (is_hyp_ctxt(vcpu)) {
+ host_data_set_flag(VCPU_IN_HYP_CONTEXT);
+
hcr |= HCR_NV | HCR_NV2 | HCR_AT | HCR_TTLB;
if (!vcpu_el2_e2h_is_set(vcpu))
hcr |= HCR_NV1;
write_sysreg_s(vcpu->arch.ctxt.vncr_array, SYS_VNCR_EL2);
+ } else {
+ host_data_clear_flag(VCPU_IN_HYP_CONTEXT);
+
+ if (guest_hcr & HCR_NV) {
+ u64 va = __fix_to_virt(vncr_fixmap(smp_processor_id()));
+
+ /* Inherit the low bits from the actual register */
+ va |= __vcpu_sys_reg(vcpu, VNCR_EL2) & GENMASK(PAGE_SHIFT - 1, 0);
+ write_sysreg_s(va, SYS_VNCR_EL2);
+
+ /* Force NV2 in case the guest is forgetful... */
+ guest_hcr |= HCR_NV2;
+ }
}
- return hcr | (__vcpu_sys_reg(vcpu, HCR_EL2) & ~NV_HCR_GUEST_EXCLUDE);
+ BUG_ON(host_data_test_flag(VCPU_IN_HYP_CONTEXT) &&
+ host_data_test_flag(L1_VNCR_MAPPED));
+
+ return hcr | (guest_hcr & ~NV_HCR_GUEST_EXCLUDE);
}
static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
@@ -184,7 +209,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
___deactivate_traps(vcpu);
- write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
if (has_cntpoff()) {
struct timer_map map;
@@ -459,6 +484,14 @@ static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
if (ret)
return false;
+ /*
+ * If we have to check for any VNCR mapping being invalidated,
+ * go back to the slow path for further processing.
+ */
+ if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu) &&
+ atomic_read(&vcpu->kvm->arch.vncr_map_count))
+ return false;
+
__kvm_skip_instr(vcpu);
return true;
@@ -527,6 +560,25 @@ static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code)
return kvm_hyp_handle_sysreg(vcpu, exit_code);
}
+static bool kvm_hyp_handle_impdef(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u64 iss;
+
+ if (!cpus_have_final_cap(ARM64_WORKAROUND_PMUV3_IMPDEF_TRAPS))
+ return false;
+
+ /*
+ * Compute a synthetic ESR for a sysreg trap. Conveniently, AFSR1_EL2
+ * is populated with a correct ISS for a sysreg trap. These fruity
+ * parts are 64bit only, so unconditionally set IL.
+ */
+ iss = ESR_ELx_ISS(read_sysreg_s(SYS_AFSR1_EL2));
+ vcpu->arch.fault.esr_el2 = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SYS64) |
+ FIELD_PREP(ESR_ELx_ISS_MASK, iss) |
+ ESR_ELx_IL;
+ return false;
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
@@ -538,6 +590,9 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_ERET] = kvm_hyp_handle_eret,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
+
+ /* Apple shenanigans */
+ [0x3F] = kvm_hyp_handle_impdef,
};
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
@@ -546,9 +601,12 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
/*
* If we were in HYP context on entry, adjust the PSTATE view
- * so that the usual helpers work correctly.
+ * so that the usual helpers work correctly. This enforces our
+ * invariant that the guest's HYP context status is preserved
+ * across a run.
*/
- if (vcpu_has_nv(vcpu) && (read_sysreg(hcr_el2) & HCR_NV)) {
+ if (vcpu_has_nv(vcpu) &&
+ unlikely(host_data_test_flag(VCPU_IN_HYP_CONTEXT))) {
u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
switch (mode) {
@@ -564,6 +622,10 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
*vcpu_cpsr(vcpu) |= mode;
}
+ /* Apply extreme paranoia! */
+ BUG_ON(vcpu_has_nv(vcpu) &&
+ !!host_data_test_flag(VCPU_IN_HYP_CONTEXT) != is_hyp_ctxt(vcpu));
+
return __fixup_guest_exit(vcpu, exit_code, hyp_exit_handlers);
}
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 90b018e06f2c..3814b0b2c937 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -87,11 +87,12 @@ static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1), par_el1);
write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1), tpidr_el1);
- write_sysreg(__vcpu_sys_reg(vcpu, MPIDR_EL1), vmpidr_el2);
- write_sysreg_el1(__vcpu_sys_reg(vcpu, MAIR_EL2), SYS_MAIR);
- write_sysreg_el1(__vcpu_sys_reg(vcpu, VBAR_EL2), SYS_VBAR);
- write_sysreg_el1(__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2), SYS_CONTEXTIDR);
- write_sysreg_el1(__vcpu_sys_reg(vcpu, AMAIR_EL2), SYS_AMAIR);
+ write_sysreg(ctxt_midr_el1(&vcpu->arch.ctxt), vpidr_el2);
+ write_sysreg(__vcpu_sys_reg(vcpu, MPIDR_EL1), vmpidr_el2);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, MAIR_EL2), SYS_MAIR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, VBAR_EL2), SYS_VBAR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2), SYS_CONTEXTIDR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, AMAIR_EL2), SYS_AMAIR);
if (vcpu_el2_e2h_is_set(vcpu)) {
/*
@@ -191,7 +192,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
- u64 mpidr;
+ u64 midr, mpidr;
host_ctxt = host_data_ptr(host_ctxt);
__sysreg_save_user_state(host_ctxt);
@@ -221,22 +222,17 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
} else {
if (vcpu_has_nv(vcpu)) {
/*
- * Use the guest hypervisor's VPIDR_EL2 when in a
- * nested state. The hardware value of MIDR_EL1 gets
- * restored on put.
- */
- write_sysreg(ctxt_sys_reg(guest_ctxt, VPIDR_EL2), vpidr_el2);
-
- /*
* As we're restoring a nested guest, set the value
* provided by the guest hypervisor.
*/
+ midr = ctxt_sys_reg(guest_ctxt, VPIDR_EL2);
mpidr = ctxt_sys_reg(guest_ctxt, VMPIDR_EL2);
} else {
+ midr = ctxt_midr_el1(guest_ctxt);
mpidr = ctxt_sys_reg(guest_ctxt, MPIDR_EL1);
}
- __sysreg_restore_el1_state(guest_ctxt, mpidr);
+ __sysreg_restore_el1_state(guest_ctxt, midr, mpidr);
}
vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
@@ -271,9 +267,5 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
/* Restore host user state */
__sysreg_restore_user_state(host_ctxt);
- /* If leaving a nesting guest, restore MIDR_EL1 default view */
- if (vcpu_has_nv(vcpu))
- write_sysreg(read_cpuid_id(), vpidr_el2);
-
vcpu_clear_flag(vcpu, SYSREGS_ON_CPU);
}
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 3d50a1bd2bdb..ec2569818629 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -63,7 +63,7 @@ static void enter_vmid_context(struct kvm_s2_mmu *mmu,
__load_stage2(mmu, mmu->arch);
val = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
- write_sysreg(val, hcr_el2);
+ write_sysreg_hcr(val);
isb();
}
@@ -73,7 +73,7 @@ static void exit_vmid_context(struct tlb_inv_context *cxt)
* We're done with the TLB operation, let's restore the host's
* view of HCR_EL2.
*/
- write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
isb();
/* ... and the stage-2 MMU context that we switched away from */
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 27ce4cb44904..569941eeb3fe 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -15,6 +15,8 @@
GENMASK(KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT - 1, 0)
#define KVM_ARM_SMCCC_VENDOR_HYP_FEATURES \
GENMASK(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_COUNT - 1, 0)
+#define KVM_ARM_SMCCC_VENDOR_HYP_FEATURES_2 \
+ GENMASK(KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_COUNT - 1, 0)
static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
{
@@ -360,6 +362,8 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu)
break;
case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
val[0] = smccc_feat->vendor_hyp_bmap;
+ /* Function numbers 2-63 are reserved for pKVM for now */
+ val[2] = smccc_feat->vendor_hyp_bmap_2;
break;
case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID:
kvm_ptp_get_time(vcpu, val);
@@ -387,6 +391,7 @@ static const u64 kvm_arm_fw_reg_ids[] = {
KVM_REG_ARM_STD_BMAP,
KVM_REG_ARM_STD_HYP_BMAP,
KVM_REG_ARM_VENDOR_HYP_BMAP,
+ KVM_REG_ARM_VENDOR_HYP_BMAP_2,
};
void kvm_arm_init_hypercalls(struct kvm *kvm)
@@ -497,6 +502,9 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
case KVM_REG_ARM_VENDOR_HYP_BMAP:
val = READ_ONCE(smccc_feat->vendor_hyp_bmap);
break;
+ case KVM_REG_ARM_VENDOR_HYP_BMAP_2:
+ val = READ_ONCE(smccc_feat->vendor_hyp_bmap_2);
+ break;
default:
return -ENOENT;
}
@@ -527,6 +535,10 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val)
fw_reg_bmap = &smccc_feat->vendor_hyp_bmap;
fw_reg_features = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
break;
+ case KVM_REG_ARM_VENDOR_HYP_BMAP_2:
+ fw_reg_bmap = &smccc_feat->vendor_hyp_bmap_2;
+ fw_reg_features = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES_2;
+ break;
default:
return -ENOENT;
}
@@ -633,6 +645,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
case KVM_REG_ARM_STD_BMAP:
case KVM_REG_ARM_STD_HYP_BMAP:
case KVM_REG_ARM_VENDOR_HYP_BMAP:
+ case KVM_REG_ARM_VENDOR_HYP_BMAP_2:
return kvm_arm_set_fw_reg_bmap(vcpu, reg->id, val);
default:
return -ENOENT;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 1f55b0c7b11d..2942ec92c5a4 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1086,14 +1086,26 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
}
}
-static void hyp_mc_free_fn(void *addr, void *unused)
+static void hyp_mc_free_fn(void *addr, void *mc)
{
+ struct kvm_hyp_memcache *memcache = mc;
+
+ if (memcache->flags & HYP_MEMCACHE_ACCOUNT_STAGE2)
+ kvm_account_pgtable_pages(addr, -1);
+
free_page((unsigned long)addr);
}
-static void *hyp_mc_alloc_fn(void *unused)
+static void *hyp_mc_alloc_fn(void *mc)
{
- return (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
+ struct kvm_hyp_memcache *memcache = mc;
+ void *addr;
+
+ addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
+ if (addr && memcache->flags & HYP_MEMCACHE_ACCOUNT_STAGE2)
+ kvm_account_pgtable_pages(addr, 1);
+
+ return addr;
}
void free_hyp_memcache(struct kvm_hyp_memcache *mc)
@@ -1102,7 +1114,7 @@ void free_hyp_memcache(struct kvm_hyp_memcache *mc)
return;
kfree(mc->mapping);
- __free_hyp_memcache(mc, hyp_mc_free_fn, kvm_host_va, NULL);
+ __free_hyp_memcache(mc, hyp_mc_free_fn, kvm_host_va, mc);
}
int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages)
@@ -1117,7 +1129,7 @@ int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages)
}
return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn,
- kvm_host_pa, NULL);
+ kvm_host_pa, mc);
}
/**
@@ -1292,6 +1304,10 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
if (map_size == PAGE_SIZE)
return true;
+ /* pKVM only supports PMD_SIZE huge-mappings */
+ if (is_protected_kvm_enabled() && map_size != PMD_SIZE)
+ return false;
+
size = memslot->npages * PAGE_SIZE;
gpa_start = memslot->base_gfn << PAGE_SHIFT;
@@ -1489,6 +1505,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
+ if (!is_protected_kvm_enabled())
+ memcache = &vcpu->arch.mmu_page_cache;
+ else
+ memcache = &vcpu->arch.pkvm_memcache;
+
/*
* Permission faults just need to update the existing leaf entry,
* and so normally don't require allocations from the memcache. The
@@ -1498,13 +1519,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (!fault_is_perm || (logging_active && write_fault)) {
int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);
- if (!is_protected_kvm_enabled()) {
- memcache = &vcpu->arch.mmu_page_cache;
+ if (!is_protected_kvm_enabled())
ret = kvm_mmu_topup_memory_cache(memcache, min_pages);
- } else {
- memcache = &vcpu->arch.pkvm_memcache;
+ else
ret = topup_hyp_memcache(memcache, min_pages);
- }
+
if (ret)
return ret;
}
@@ -1525,7 +1544,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* logging_active is guaranteed to never be true for VM_PFNMAP
* memslots.
*/
- if (logging_active || is_protected_kvm_enabled()) {
+ if (logging_active) {
force_pte = true;
vma_shift = PAGE_SHIFT;
} else {
@@ -1782,9 +1801,28 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
gfn_t gfn;
int ret, idx;
+ /* Synchronous External Abort? */
+ if (kvm_vcpu_abt_issea(vcpu)) {
+ /*
+ * For RAS the host kernel may handle this abort.
+ * There is no need to pass the error into the guest.
+ */
+ if (kvm_handle_guest_sea())
+ kvm_inject_vabt(vcpu);
+
+ return 1;
+ }
+
esr = kvm_vcpu_get_esr(vcpu);
+ /*
+ * The fault IPA should be reliable at this point as we're not dealing
+ * with an SEA.
+ */
ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+ if (KVM_BUG_ON(ipa == INVALID_GPA, vcpu->kvm))
+ return -EFAULT;
+
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
if (esr_fsc_is_translation_fault(esr)) {
@@ -1806,18 +1844,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
}
}
- /* Synchronous External Abort? */
- if (kvm_vcpu_abt_issea(vcpu)) {
- /*
- * For RAS the host kernel may handle this abort.
- * There is no need to pass the error into the guest.
- */
- if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu)))
- kvm_inject_vabt(vcpu);
-
- return 1;
- }
-
trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 0c9387d2f507..291dbe38eb5c 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -8,6 +8,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <asm/fixmap.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
@@ -16,8 +17,23 @@
#include "sys_regs.h"
-/* Protection against the sysreg repainting madness... */
-#define NV_FTR(r, f) ID_AA64##r##_EL1_##f
+struct vncr_tlb {
+ /* The guest's VNCR_EL2 */
+ u64 gva;
+ struct s1_walk_info wi;
+ struct s1_walk_result wr;
+
+ u64 hpa;
+
+ /* -1 when not mapped on a CPU */
+ int cpu;
+
+ /*
+ * true if the TLB is valid. Can only be changed with the
+ * mmu_lock held.
+ */
+ bool valid;
+};
/*
* Ratio of live shadow S2 MMU per vcpu. This is a trade-off between
@@ -31,6 +47,7 @@ void kvm_init_nested(struct kvm *kvm)
{
kvm->arch.nested_mmus = NULL;
kvm->arch.nested_mmus_size = 0;
+ atomic_set(&kvm->arch.vncr_map_count, 0);
}
static int init_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
@@ -54,6 +71,17 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
struct kvm_s2_mmu *tmp;
int num_mmus, ret = 0;
+ if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features) &&
+ !cpus_have_final_cap(ARM64_HAS_HCR_NV1))
+ return -EINVAL;
+
+ if (!vcpu->arch.ctxt.vncr_array)
+ vcpu->arch.ctxt.vncr_array = (u64 *)__get_free_page(GFP_KERNEL_ACCOUNT |
+ __GFP_ZERO);
+
+ if (!vcpu->arch.ctxt.vncr_array)
+ return -ENOMEM;
+
/*
* Let's treat memory allocation failures as benign: If we fail to
* allocate anything, return an error and keep the allocated array
@@ -84,6 +112,9 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
for (int i = kvm->arch.nested_mmus_size; i < num_mmus; i++)
kvm_free_stage2_pgd(&kvm->arch.nested_mmus[i]);
+ free_page((unsigned long)vcpu->arch.ctxt.vncr_array);
+ vcpu->arch.ctxt.vncr_array = NULL;
+
return ret;
}
@@ -404,6 +435,30 @@ static unsigned int ttl_to_size(u8 ttl)
return max_size;
}
+static u8 pgshift_level_to_ttl(u16 shift, u8 level)
+{
+ u8 ttl;
+
+ switch(shift) {
+ case 12:
+ ttl = TLBI_TTL_TG_4K;
+ break;
+ case 14:
+ ttl = TLBI_TTL_TG_16K;
+ break;
+ case 16:
+ ttl = TLBI_TTL_TG_64K;
+ break;
+ default:
+ BUG();
+ }
+
+ ttl <<= 2;
+ ttl |= level & 3;
+
+ return ttl;
+}
+
/*
* Compute the equivalent of the TTL field by parsing the shadow PT. The
* granule size is extracted from the cached VTCR_EL2.TG0 while the level is
@@ -675,23 +730,36 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu)
void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
{
/*
- * The vCPU kept its reference on the MMU after the last put, keep
- * rolling with it.
+ * If the vCPU kept its reference on the MMU after the last put,
+ * keep rolling with it.
*/
- if (vcpu->arch.hw_mmu)
- return;
-
if (is_hyp_ctxt(vcpu)) {
- vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+ if (!vcpu->arch.hw_mmu)
+ vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
} else {
- write_lock(&vcpu->kvm->mmu_lock);
- vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu);
- write_unlock(&vcpu->kvm->mmu_lock);
+ if (!vcpu->arch.hw_mmu) {
+ scoped_guard(write_lock, &vcpu->kvm->mmu_lock)
+ vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu);
+ }
+
+ if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV)
+ kvm_make_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu);
}
}
void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
{
+ /* Unconditionally drop the VNCR mapping if we have one */
+ if (host_data_test_flag(L1_VNCR_MAPPED)) {
+ BUG_ON(vcpu->arch.vncr_tlb->cpu != smp_processor_id());
+ BUG_ON(is_hyp_ctxt(vcpu));
+
+ clear_fixmap(vncr_fixmap(vcpu->arch.vncr_tlb->cpu));
+ vcpu->arch.vncr_tlb->cpu = -1;
+ host_data_clear_flag(L1_VNCR_MAPPED);
+ atomic_dec(&vcpu->kvm->arch.vncr_map_count);
+ }
+
/*
* Keep a reference on the associated stage-2 MMU if the vCPU is
* scheduling out and not in WFI emulation, suggesting it is likely to
@@ -742,6 +810,245 @@ int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
return kvm_inject_nested_sync(vcpu, esr_el2);
}
+static void invalidate_vncr(struct vncr_tlb *vt)
+{
+ vt->valid = false;
+ if (vt->cpu != -1)
+ clear_fixmap(vncr_fixmap(vt->cpu));
+}
+
+static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end)
+{
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+
+ lockdep_assert_held_write(&kvm->mmu_lock);
+
+ if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY))
+ return;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+ u64 ipa_start, ipa_end, ipa_size;
+
+ /*
+ * Careful here: We end-up here from an MMU notifier,
+ * and this can race against a vcpu not being onlined
+ * yet, without the pseudo-TLB being allocated.
+ *
+ * Skip those, as they obviously don't participate in
+ * the invalidation at this stage.
+ */
+ if (!vt)
+ continue;
+
+ if (!vt->valid)
+ continue;
+
+ ipa_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift,
+ vt->wr.level));
+ ipa_start = vt->wr.pa & (ipa_size - 1);
+ ipa_end = ipa_start + ipa_size;
+
+ if (ipa_end <= start || ipa_start >= end)
+ continue;
+
+ invalidate_vncr(vt);
+ }
+}
+
+struct s1e2_tlbi_scope {
+ enum {
+ TLBI_ALL,
+ TLBI_VA,
+ TLBI_VAA,
+ TLBI_ASID,
+ } type;
+
+ u16 asid;
+ u64 va;
+ u64 size;
+};
+
+static void invalidate_vncr_va(struct kvm *kvm,
+ struct s1e2_tlbi_scope *scope)
+{
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+
+ lockdep_assert_held_write(&kvm->mmu_lock);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+ u64 va_start, va_end, va_size;
+
+ if (!vt->valid)
+ continue;
+
+ va_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift,
+ vt->wr.level));
+ va_start = vt->gva & (va_size - 1);
+ va_end = va_start + va_size;
+
+ switch (scope->type) {
+ case TLBI_ALL:
+ break;
+
+ case TLBI_VA:
+ if (va_end <= scope->va ||
+ va_start >= (scope->va + scope->size))
+ continue;
+ if (vt->wr.nG && vt->wr.asid != scope->asid)
+ continue;
+ break;
+
+ case TLBI_VAA:
+ if (va_end <= scope->va ||
+ va_start >= (scope->va + scope->size))
+ continue;
+ break;
+
+ case TLBI_ASID:
+ if (!vt->wr.nG || vt->wr.asid != scope->asid)
+ continue;
+ break;
+ }
+
+ invalidate_vncr(vt);
+ }
+}
+
+static void compute_s1_tlbi_range(struct kvm_vcpu *vcpu, u32 inst, u64 val,
+ struct s1e2_tlbi_scope *scope)
+{
+ switch (inst) {
+ case OP_TLBI_ALLE2:
+ case OP_TLBI_ALLE2IS:
+ case OP_TLBI_ALLE2OS:
+ case OP_TLBI_VMALLE1:
+ case OP_TLBI_VMALLE1IS:
+ case OP_TLBI_VMALLE1OS:
+ case OP_TLBI_ALLE2NXS:
+ case OP_TLBI_ALLE2ISNXS:
+ case OP_TLBI_ALLE2OSNXS:
+ case OP_TLBI_VMALLE1NXS:
+ case OP_TLBI_VMALLE1ISNXS:
+ case OP_TLBI_VMALLE1OSNXS:
+ scope->type = TLBI_ALL;
+ break;
+ case OP_TLBI_VAE2:
+ case OP_TLBI_VAE2IS:
+ case OP_TLBI_VAE2OS:
+ case OP_TLBI_VAE1:
+ case OP_TLBI_VAE1IS:
+ case OP_TLBI_VAE1OS:
+ case OP_TLBI_VAE2NXS:
+ case OP_TLBI_VAE2ISNXS:
+ case OP_TLBI_VAE2OSNXS:
+ case OP_TLBI_VAE1NXS:
+ case OP_TLBI_VAE1ISNXS:
+ case OP_TLBI_VAE1OSNXS:
+ case OP_TLBI_VALE2:
+ case OP_TLBI_VALE2IS:
+ case OP_TLBI_VALE2OS:
+ case OP_TLBI_VALE1:
+ case OP_TLBI_VALE1IS:
+ case OP_TLBI_VALE1OS:
+ case OP_TLBI_VALE2NXS:
+ case OP_TLBI_VALE2ISNXS:
+ case OP_TLBI_VALE2OSNXS:
+ case OP_TLBI_VALE1NXS:
+ case OP_TLBI_VALE1ISNXS:
+ case OP_TLBI_VALE1OSNXS:
+ scope->type = TLBI_VA;
+ scope->size = ttl_to_size(FIELD_GET(TLBI_TTL_MASK, val));
+ if (!scope->size)
+ scope->size = SZ_1G;
+ scope->va = (val << 12) & ~(scope->size - 1);
+ scope->asid = FIELD_GET(TLBIR_ASID_MASK, val);
+ break;
+ case OP_TLBI_ASIDE1:
+ case OP_TLBI_ASIDE1IS:
+ case OP_TLBI_ASIDE1OS:
+ case OP_TLBI_ASIDE1NXS:
+ case OP_TLBI_ASIDE1ISNXS:
+ case OP_TLBI_ASIDE1OSNXS:
+ scope->type = TLBI_ASID;
+ scope->asid = FIELD_GET(TLBIR_ASID_MASK, val);
+ break;
+ case OP_TLBI_VAAE1:
+ case OP_TLBI_VAAE1IS:
+ case OP_TLBI_VAAE1OS:
+ case OP_TLBI_VAAE1NXS:
+ case OP_TLBI_VAAE1ISNXS:
+ case OP_TLBI_VAAE1OSNXS:
+ case OP_TLBI_VAALE1:
+ case OP_TLBI_VAALE1IS:
+ case OP_TLBI_VAALE1OS:
+ case OP_TLBI_VAALE1NXS:
+ case OP_TLBI_VAALE1ISNXS:
+ case OP_TLBI_VAALE1OSNXS:
+ scope->type = TLBI_VAA;
+ scope->size = ttl_to_size(FIELD_GET(TLBI_TTL_MASK, val));
+ if (!scope->size)
+ scope->size = SZ_1G;
+ scope->va = (val << 12) & ~(scope->size - 1);
+ break;
+ case OP_TLBI_RVAE2:
+ case OP_TLBI_RVAE2IS:
+ case OP_TLBI_RVAE2OS:
+ case OP_TLBI_RVAE1:
+ case OP_TLBI_RVAE1IS:
+ case OP_TLBI_RVAE1OS:
+ case OP_TLBI_RVAE2NXS:
+ case OP_TLBI_RVAE2ISNXS:
+ case OP_TLBI_RVAE2OSNXS:
+ case OP_TLBI_RVAE1NXS:
+ case OP_TLBI_RVAE1ISNXS:
+ case OP_TLBI_RVAE1OSNXS:
+ case OP_TLBI_RVALE2:
+ case OP_TLBI_RVALE2IS:
+ case OP_TLBI_RVALE2OS:
+ case OP_TLBI_RVALE1:
+ case OP_TLBI_RVALE1IS:
+ case OP_TLBI_RVALE1OS:
+ case OP_TLBI_RVALE2NXS:
+ case OP_TLBI_RVALE2ISNXS:
+ case OP_TLBI_RVALE2OSNXS:
+ case OP_TLBI_RVALE1NXS:
+ case OP_TLBI_RVALE1ISNXS:
+ case OP_TLBI_RVALE1OSNXS:
+ scope->type = TLBI_VA;
+ scope->va = decode_range_tlbi(val, &scope->size, &scope->asid);
+ break;
+ case OP_TLBI_RVAAE1:
+ case OP_TLBI_RVAAE1IS:
+ case OP_TLBI_RVAAE1OS:
+ case OP_TLBI_RVAAE1NXS:
+ case OP_TLBI_RVAAE1ISNXS:
+ case OP_TLBI_RVAAE1OSNXS:
+ case OP_TLBI_RVAALE1:
+ case OP_TLBI_RVAALE1IS:
+ case OP_TLBI_RVAALE1OS:
+ case OP_TLBI_RVAALE1NXS:
+ case OP_TLBI_RVAALE1ISNXS:
+ case OP_TLBI_RVAALE1OSNXS:
+ scope->type = TLBI_VAA;
+ scope->va = decode_range_tlbi(val, &scope->size, NULL);
+ break;
+ }
+}
+
+void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val)
+{
+ struct s1e2_tlbi_scope scope = {};
+
+ compute_s1_tlbi_range(vcpu, inst, val, &scope);
+
+ guard(write_lock)(&vcpu->kvm->mmu_lock);
+ invalidate_vncr_va(vcpu->kvm, &scope);
+}
+
void kvm_nested_s2_wp(struct kvm *kvm)
{
int i;
@@ -754,6 +1061,8 @@ void kvm_nested_s2_wp(struct kvm *kvm)
if (kvm_s2_mmu_valid(mmu))
kvm_stage2_wp_range(mmu, 0, kvm_phys_size(mmu));
}
+
+ kvm_invalidate_vncr_ipa(kvm, 0, BIT(kvm->arch.mmu.pgt->ia_bits));
}
void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block)
@@ -768,6 +1077,8 @@ void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block)
if (kvm_s2_mmu_valid(mmu))
kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), may_block);
}
+
+ kvm_invalidate_vncr_ipa(kvm, 0, BIT(kvm->arch.mmu.pgt->ia_bits));
}
void kvm_nested_s2_flush(struct kvm *kvm)
@@ -801,140 +1112,446 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
}
/*
+ * Dealing with VNCR_EL2 exposed by the *guest* is a complicated matter:
+ *
+ * - We introduce an internal representation of a vcpu-private TLB,
+ * representing the mapping between the guest VA contained in VNCR_EL2,
+ * the IPA the guest's EL2 PTs point to, and the actual PA this lives at.
+ *
+ * - On translation fault from a nested VNCR access, we create such a TLB.
+ * If there is no mapping to describe, the guest inherits the fault.
+ * Crucially, no actual mapping is done at this stage.
+ *
+ * - On vcpu_load() in a non-HYP context with HCR_EL2.NV==1, if the above
+ * TLB exists, we map it in the fixmap for this CPU, and run with it. We
+ * have to respect the permissions dictated by the guest, but not the
+ * memory type (FWB is a must).
+ *
+ * - Note that we usually don't do a vcpu_load() on the back of a fault
+ * (unless we are preempted), so the resolution of a translation fault
+ * must go via a request that will map the VNCR page in the fixmap.
+ * vcpu_load() might as well use the same mechanism.
+ *
+ * - On vcpu_put() in a non-HYP context with HCR_EL2.NV==1, if the TLB was
+ * mapped, we unmap it. Yes it is that simple. The TLB still exists
+ * though, and may be reused at a later load.
+ *
+ * - On permission fault, we simply forward the fault to the guest's EL2.
+ * Get out of my way.
+ *
+ * - On any TLBI for the EL2&0 translation regime, we must find any TLB that
+ * intersects with the TLBI request, invalidate it, and unmap the page
+ * from the fixmap. Because we need to look at all the vcpu-private TLBs,
+ * this requires some wide-ranging locking to ensure that nothing races
+ * against it. This may require some refcounting to avoid the search when
+ * no such TLB is present.
+ *
+ * - On MMU notifiers, we must invalidate our TLB in a similar way, but
+ * looking at the IPA instead. The funny part is that there may not be a
+ * stage-2 mapping for this page if L1 hasn't accessed it using LD/ST
+ * instructions.
+ */
+
+int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY))
+ return 0;
+
+ vcpu->arch.vncr_tlb = kzalloc(sizeof(*vcpu->arch.vncr_tlb),
+ GFP_KERNEL_ACCOUNT);
+ if (!vcpu->arch.vncr_tlb)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static u64 read_vncr_el2(struct kvm_vcpu *vcpu)
+{
+ return (u64)sign_extend64(__vcpu_sys_reg(vcpu, VNCR_EL2), 48);
+}
+
+static int kvm_translate_vncr(struct kvm_vcpu *vcpu)
+{
+ bool write_fault, writable;
+ unsigned long mmu_seq;
+ struct vncr_tlb *vt;
+ struct page *page;
+ u64 va, pfn, gfn;
+ int ret;
+
+ vt = vcpu->arch.vncr_tlb;
+
+ /*
+ * If we're about to walk the EL2 S1 PTs, we must invalidate the
+ * current TLB, as it could be sampled from another vcpu doing a
+ * TLBI *IS. A real CPU wouldn't do that, but we only keep a single
+ * translation, so not much of a choice.
+ *
+ * We also prepare the next walk wilst we're at it.
+ */
+ scoped_guard(write_lock, &vcpu->kvm->mmu_lock) {
+ invalidate_vncr(vt);
+
+ vt->wi = (struct s1_walk_info) {
+ .regime = TR_EL20,
+ .as_el0 = false,
+ .pan = false,
+ };
+ vt->wr = (struct s1_walk_result){};
+ }
+
+ guard(srcu)(&vcpu->kvm->srcu);
+
+ va = read_vncr_el2(vcpu);
+
+ ret = __kvm_translate_va(vcpu, &vt->wi, &vt->wr, va);
+ if (ret)
+ return ret;
+
+ write_fault = kvm_is_write_fault(vcpu);
+
+ mmu_seq = vcpu->kvm->mmu_invalidate_seq;
+ smp_rmb();
+
+ gfn = vt->wr.pa >> PAGE_SHIFT;
+ pfn = kvm_faultin_pfn(vcpu, gfn, write_fault, &writable, &page);
+ if (is_error_noslot_pfn(pfn) || (write_fault && !writable))
+ return -EFAULT;
+
+ scoped_guard(write_lock, &vcpu->kvm->mmu_lock) {
+ if (mmu_invalidate_retry(vcpu->kvm, mmu_seq))
+ return -EAGAIN;
+
+ vt->gva = va;
+ vt->hpa = pfn << PAGE_SHIFT;
+ vt->valid = true;
+ vt->cpu = -1;
+
+ kvm_make_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu);
+ kvm_release_faultin_page(vcpu->kvm, page, false, vt->wr.pw);
+ }
+
+ if (vt->wr.pw)
+ mark_page_dirty(vcpu->kvm, gfn);
+
+ return 0;
+}
+
+static void inject_vncr_perm(struct kvm_vcpu *vcpu)
+{
+ struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+
+ /* Adjust the fault level to reflect that of the guest's */
+ esr &= ~ESR_ELx_FSC;
+ esr |= FIELD_PREP(ESR_ELx_FSC,
+ ESR_ELx_FSC_PERM_L(vt->wr.level));
+
+ kvm_inject_nested_sync(vcpu, esr);
+}
+
+static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu)
+{
+ struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+
+ lockdep_assert_held_read(&vcpu->kvm->mmu_lock);
+
+ if (!vt->valid)
+ return false;
+
+ if (read_vncr_el2(vcpu) != vt->gva)
+ return false;
+
+ if (vt->wr.nG) {
+ u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ u64 ttbr = ((tcr & TCR_A1) ?
+ vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
+ vcpu_read_sys_reg(vcpu, TTBR0_EL2));
+ u16 asid;
+
+ asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
+ if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
+ !(tcr & TCR_ASID16))
+ asid &= GENMASK(7, 0);
+
+ return asid != vt->wr.asid;
+ }
+
+ return true;
+}
+
+int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu)
+{
+ struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+
+ BUG_ON(!(esr & ESR_ELx_VNCR_SHIFT));
+
+ if (esr_fsc_is_permission_fault(esr)) {
+ inject_vncr_perm(vcpu);
+ } else if (esr_fsc_is_translation_fault(esr)) {
+ bool valid;
+ int ret;
+
+ scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
+ valid = kvm_vncr_tlb_lookup(vcpu);
+
+ if (!valid)
+ ret = kvm_translate_vncr(vcpu);
+ else
+ ret = -EPERM;
+
+ switch (ret) {
+ case -EAGAIN:
+ case -ENOMEM:
+ /* Let's try again... */
+ break;
+ case -EFAULT:
+ case -EINVAL:
+ case -ENOENT:
+ case -EACCES:
+ /*
+ * Translation failed, inject the corresponding
+ * exception back to EL2.
+ */
+ BUG_ON(!vt->wr.failed);
+
+ esr &= ~ESR_ELx_FSC;
+ esr |= FIELD_PREP(ESR_ELx_FSC, vt->wr.fst);
+
+ kvm_inject_nested_sync(vcpu, esr);
+ break;
+ case -EPERM:
+ /* Hack to deal with POE until we get kernel support */
+ inject_vncr_perm(vcpu);
+ break;
+ case 0:
+ break;
+ }
+ } else {
+ WARN_ONCE(1, "Unhandled VNCR abort, ESR=%llx\n", esr);
+ }
+
+ return 1;
+}
+
+static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
+{
+ struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+ pgprot_t prot;
+
+ guard(preempt)();
+ guard(read_lock)(&vcpu->kvm->mmu_lock);
+
+ /*
+ * The request to map VNCR may have raced against some other
+ * event, such as an interrupt, and may not be valid anymore.
+ */
+ if (is_hyp_ctxt(vcpu))
+ return;
+
+ /*
+ * Check that the pseudo-TLB is valid and that VNCR_EL2 still
+ * contains the expected value. If it doesn't, we simply bail out
+ * without a mapping -- a transformed MSR/MRS will generate the
+ * fault and allows us to populate the pseudo-TLB.
+ */
+ if (!vt->valid)
+ return;
+
+ if (read_vncr_el2(vcpu) != vt->gva)
+ return;
+
+ if (vt->wr.nG) {
+ u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ u64 ttbr = ((tcr & TCR_A1) ?
+ vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
+ vcpu_read_sys_reg(vcpu, TTBR0_EL2));
+ u16 asid;
+
+ asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
+ if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
+ !(tcr & TCR_ASID16))
+ asid &= GENMASK(7, 0);
+
+ if (asid != vt->wr.asid)
+ return;
+ }
+
+ vt->cpu = smp_processor_id();
+
+ if (vt->wr.pw && vt->wr.pr)
+ prot = PAGE_KERNEL;
+ else if (vt->wr.pr)
+ prot = PAGE_KERNEL_RO;
+ else
+ prot = PAGE_NONE;
+
+ /*
+ * We can't map write-only (or no permission at all) in the kernel,
+ * but the guest can do it if using POE, so we'll have to turn a
+ * translation fault into a permission fault at runtime.
+ * FIXME: WO doesn't work at all, need POE support in the kernel.
+ */
+ if (pgprot_val(prot) != pgprot_val(PAGE_NONE)) {
+ __set_fixmap(vncr_fixmap(vt->cpu), vt->hpa, prot);
+ host_data_set_flag(L1_VNCR_MAPPED);
+ atomic_inc(&vcpu->kvm->arch.vncr_map_count);
+ }
+}
+
+/*
* Our emulated CPU doesn't support all the possible features. For the
* sake of simplicity (and probably mental sanity), wipe out a number
* of feature bits we don't intend to support for the time being.
* This list should get updated as new features get added to the NV
* support, and new extension to the architecture.
*/
-static void limit_nv_id_regs(struct kvm *kvm)
+u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
{
- u64 val, tmp;
-
- /* Support everything but TME */
- val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64ISAR0_EL1);
- val &= ~NV_FTR(ISAR0, TME);
- kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR0_EL1, val);
-
- /* Support everything but Spec Invalidation and LS64 */
- val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1);
- val &= ~(NV_FTR(ISAR1, LS64) |
- NV_FTR(ISAR1, SPECRES));
- kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1, val);
-
- /* No AMU, MPAM, S-EL2, or RAS */
- val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1);
- val &= ~(GENMASK_ULL(55, 52) |
- NV_FTR(PFR0, AMU) |
- NV_FTR(PFR0, MPAM) |
- NV_FTR(PFR0, SEL2) |
- NV_FTR(PFR0, RAS) |
- NV_FTR(PFR0, EL3) |
- NV_FTR(PFR0, EL2) |
- NV_FTR(PFR0, EL1) |
- NV_FTR(PFR0, EL0));
- /* 64bit only at any EL */
- val |= FIELD_PREP(NV_FTR(PFR0, EL0), 0b0001);
- val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001);
- val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001);
- val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001);
- kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val);
-
- /* Only support BTI, SSBS, CSV2_frac */
- val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR1_EL1);
- val &= (NV_FTR(PFR1, BT) |
- NV_FTR(PFR1, SSBS) |
- NV_FTR(PFR1, CSV2_frac));
- kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR1_EL1, val);
-
- /* Hide ECV, ExS, Secure Memory */
- val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1);
- val &= ~(NV_FTR(MMFR0, ECV) |
- NV_FTR(MMFR0, EXS) |
- NV_FTR(MMFR0, TGRAN4_2) |
- NV_FTR(MMFR0, TGRAN16_2) |
- NV_FTR(MMFR0, TGRAN64_2) |
- NV_FTR(MMFR0, SNSMEM));
-
- /* Disallow unsupported S2 page sizes */
- switch (PAGE_SIZE) {
- case SZ_64K:
- val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001);
- fallthrough;
- case SZ_16K:
- val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001);
- fallthrough;
- case SZ_4K:
- /* Support everything */
+ switch (reg) {
+ case SYS_ID_AA64ISAR0_EL1:
+ /* Support everything but TME */
+ val &= ~ID_AA64ISAR0_EL1_TME;
break;
- }
- /*
- * Since we can't support a guest S2 page size smaller than
- * the host's own page size (due to KVM only populating its
- * own S2 using the kernel's page size), advertise the
- * limitation using FEAT_GTG.
- */
- switch (PAGE_SIZE) {
- case SZ_4K:
- val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010);
- fallthrough;
- case SZ_16K:
- val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010);
- fallthrough;
- case SZ_64K:
- val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010);
+
+ case SYS_ID_AA64ISAR1_EL1:
+ /* Support everything but LS64 and Spec Invalidation */
+ val &= ~(ID_AA64ISAR1_EL1_LS64 |
+ ID_AA64ISAR1_EL1_SPECRES);
+ break;
+
+ case SYS_ID_AA64PFR0_EL1:
+ /* No RME, AMU, MPAM, S-EL2, or RAS */
+ val &= ~(ID_AA64PFR0_EL1_RME |
+ ID_AA64PFR0_EL1_AMU |
+ ID_AA64PFR0_EL1_MPAM |
+ ID_AA64PFR0_EL1_SEL2 |
+ ID_AA64PFR0_EL1_RAS |
+ ID_AA64PFR0_EL1_EL3 |
+ ID_AA64PFR0_EL1_EL2 |
+ ID_AA64PFR0_EL1_EL1 |
+ ID_AA64PFR0_EL1_EL0);
+ /* 64bit only at any EL */
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL0, IMP);
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL1, IMP);
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL2, IMP);
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL3, IMP);
+ break;
+
+ case SYS_ID_AA64PFR1_EL1:
+ /* Only support BTI, SSBS, CSV2_frac */
+ val &= (ID_AA64PFR1_EL1_BT |
+ ID_AA64PFR1_EL1_SSBS |
+ ID_AA64PFR1_EL1_CSV2_frac);
+ break;
+
+ case SYS_ID_AA64MMFR0_EL1:
+ /* Hide ExS, Secure Memory */
+ val &= ~(ID_AA64MMFR0_EL1_EXS |
+ ID_AA64MMFR0_EL1_TGRAN4_2 |
+ ID_AA64MMFR0_EL1_TGRAN16_2 |
+ ID_AA64MMFR0_EL1_TGRAN64_2 |
+ ID_AA64MMFR0_EL1_SNSMEM);
+
+ /* Hide CNTPOFF if present */
+ val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR0_EL1, ECV, IMP);
+
+ /* Disallow unsupported S2 page sizes */
+ switch (PAGE_SIZE) {
+ case SZ_64K:
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, NI);
+ fallthrough;
+ case SZ_16K:
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, NI);
+ fallthrough;
+ case SZ_4K:
+ /* Support everything */
+ break;
+ }
+
+ /*
+ * Since we can't support a guest S2 page size smaller
+ * than the host's own page size (due to KVM only
+ * populating its own S2 using the kernel's page
+ * size), advertise the limitation using FEAT_GTG.
+ */
+ switch (PAGE_SIZE) {
+ case SZ_4K:
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP);
+ fallthrough;
+ case SZ_16K:
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP);
+ fallthrough;
+ case SZ_64K:
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP);
+ break;
+ }
+
+ /* Cap PARange to 48bits */
+ val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR0_EL1, PARANGE, 48);
+ break;
+
+ case SYS_ID_AA64MMFR1_EL1:
+ val &= (ID_AA64MMFR1_EL1_HCX |
+ ID_AA64MMFR1_EL1_PAN |
+ ID_AA64MMFR1_EL1_LO |
+ ID_AA64MMFR1_EL1_HPDS |
+ ID_AA64MMFR1_EL1_VH |
+ ID_AA64MMFR1_EL1_VMIDBits);
+ /* FEAT_E2H0 implies no VHE */
+ if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features))
+ val &= ~ID_AA64MMFR1_EL1_VH;
+ break;
+
+ case SYS_ID_AA64MMFR2_EL1:
+ val &= ~(ID_AA64MMFR2_EL1_BBM |
+ ID_AA64MMFR2_EL1_TTL |
+ GENMASK_ULL(47, 44) |
+ ID_AA64MMFR2_EL1_ST |
+ ID_AA64MMFR2_EL1_CCIDX |
+ ID_AA64MMFR2_EL1_VARange);
+
+ /* Force TTL support */
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR2_EL1, TTL, IMP);
+ break;
+
+ case SYS_ID_AA64MMFR4_EL1:
+ /*
+ * You get EITHER
+ *
+ * - FEAT_VHE without FEAT_E2H0
+ * - FEAT_NV limited to FEAT_NV2
+ * - HCR_EL2.NV1 being RES0
+ *
+ * OR
+ *
+ * - FEAT_E2H0 without FEAT_VHE nor FEAT_NV
+ *
+ * Life is too short for anything else.
+ */
+ if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features)) {
+ val = 0;
+ } else {
+ val = SYS_FIELD_PREP_ENUM(ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY);
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR4_EL1, E2H0, NI_NV1);
+ }
+ break;
+
+ case SYS_ID_AA64DFR0_EL1:
+ /* Only limited support for PMU, Debug, BPs, WPs, and HPMN0 */
+ val &= (ID_AA64DFR0_EL1_PMUVer |
+ ID_AA64DFR0_EL1_WRPs |
+ ID_AA64DFR0_EL1_BRPs |
+ ID_AA64DFR0_EL1_DebugVer|
+ ID_AA64DFR0_EL1_HPMN0);
+
+ /* Cap Debug to ARMv8.1 */
+ val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, VHE);
break;
}
- /* Cap PARange to 48bits */
- tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val);
- if (tmp > 0b0101) {
- val &= ~NV_FTR(MMFR0, PARANGE);
- val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101);
- }
- kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1, val);
-
- val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR1_EL1);
- val &= (NV_FTR(MMFR1, HCX) |
- NV_FTR(MMFR1, PAN) |
- NV_FTR(MMFR1, LO) |
- NV_FTR(MMFR1, HPDS) |
- NV_FTR(MMFR1, VH) |
- NV_FTR(MMFR1, VMIDBits));
- kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR1_EL1, val);
-
- val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR2_EL1);
- val &= ~(NV_FTR(MMFR2, BBM) |
- NV_FTR(MMFR2, TTL) |
- GENMASK_ULL(47, 44) |
- NV_FTR(MMFR2, ST) |
- NV_FTR(MMFR2, CCIDX) |
- NV_FTR(MMFR2, VARange));
-
- /* Force TTL support */
- val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001);
- kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR2_EL1, val);
-
- val = 0;
- if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
- val |= FIELD_PREP(NV_FTR(MMFR4, E2H0),
- ID_AA64MMFR4_EL1_E2H0_NI_NV1);
- kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR4_EL1, val);
-
- /* Only limited support for PMU, Debug, BPs, WPs, and HPMN0 */
- val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1);
- val &= (NV_FTR(DFR0, PMUVer) |
- NV_FTR(DFR0, WRPs) |
- NV_FTR(DFR0, BRPs) |
- NV_FTR(DFR0, DebugVer) |
- NV_FTR(DFR0, HPMN0));
-
- /* Cap Debug to ARMv8.1 */
- tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val);
- if (tmp > 0b0111) {
- val &= ~NV_FTR(DFR0, DebugVer);
- val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111);
- }
- kvm_set_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1, val);
+
+ return val;
}
u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *vcpu,
@@ -981,8 +1598,6 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
if (!kvm->arch.sysreg_masks)
return -ENOMEM;
- limit_nv_id_regs(kvm);
-
/* VTTBR_EL2 */
res0 = res1 = 0;
if (!kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16))
@@ -1002,213 +1617,49 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
set_sysreg_masks(kvm, VMPIDR_EL2, res0, res1);
/* HCR_EL2 */
- res0 = BIT(48);
- res1 = HCR_RW;
- if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, TWED, IMP))
- res0 |= GENMASK(63, 59);
- if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, MTE2))
- res0 |= (HCR_TID5 | HCR_DCT | HCR_ATA);
- if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, EVT, TTLBxS))
- res0 |= (HCR_TTLBIS | HCR_TTLBOS);
- if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, CSV2, CSV2_2) &&
- !kvm_has_feat(kvm, ID_AA64PFR1_EL1, CSV2_frac, CSV2_1p2))
- res0 |= HCR_ENSCXT;
- if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, EVT, IMP))
- res0 |= (HCR_TOCU | HCR_TICAB | HCR_TID4);
- if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, V1P1))
- res0 |= HCR_AMVOFFEN;
- if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, V1P1))
- res0 |= HCR_FIEN;
- if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, FWB, IMP))
- res0 |= HCR_FWB;
- if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, NV2))
- res0 |= HCR_NV2;
- if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, IMP))
- res0 |= (HCR_AT | HCR_NV1 | HCR_NV);
- if (!(kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
- kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
- res0 |= (HCR_API | HCR_APK);
- if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TME, IMP))
- res0 |= BIT(39);
- if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP))
- res0 |= (HCR_TEA | HCR_TERR);
- if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
- res0 |= HCR_TLOR;
- if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, E2H0, IMP))
- res1 |= HCR_E2H;
+ get_reg_fixed_bits(kvm, HCR_EL2, &res0, &res1);
set_sysreg_masks(kvm, HCR_EL2, res0, res1);
/* HCRX_EL2 */
- res0 = HCRX_EL2_RES0;
- res1 = HCRX_EL2_RES1;
- if (!kvm_has_feat(kvm, ID_AA64ISAR3_EL1, PACM, TRIVIAL_IMP))
- res0 |= HCRX_EL2_PACMEn;
- if (!kvm_has_feat(kvm, ID_AA64PFR2_EL1, FPMR, IMP))
- res0 |= HCRX_EL2_EnFPM;
- if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP))
- res0 |= HCRX_EL2_GCSEn;
- if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, SYSREG_128, IMP))
- res0 |= HCRX_EL2_EnIDCP128;
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, ADERR, DEV_ASYNC))
- res0 |= (HCRX_EL2_EnSDERR | HCRX_EL2_EnSNERR);
- if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, DF2, IMP))
- res0 |= HCRX_EL2_TMEA;
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, D128, IMP))
- res0 |= HCRX_EL2_D128En;
- if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP))
- res0 |= HCRX_EL2_PTTWI;
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, SCTLRX, IMP))
- res0 |= HCRX_EL2_SCTLR2En;
- if (!kvm_has_tcr2(kvm))
- res0 |= HCRX_EL2_TCR2En;
- if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
- res0 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
- if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, CMOW, IMP))
- res0 |= HCRX_EL2_CMOW;
- if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, NMI, IMP))
- res0 |= (HCRX_EL2_VFNMI | HCRX_EL2_VINMI | HCRX_EL2_TALLINT);
- if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP) ||
- !(read_sysreg_s(SYS_SMIDR_EL1) & SMIDR_EL1_SMPS))
- res0 |= HCRX_EL2_SMPME;
- if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP))
- res0 |= (HCRX_EL2_FGTnXS | HCRX_EL2_FnXS);
- if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V))
- res0 |= HCRX_EL2_EnASR;
- if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64))
- res0 |= HCRX_EL2_EnALS;
- if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA))
- res0 |= HCRX_EL2_EnAS0;
+ get_reg_fixed_bits(kvm, HCRX_EL2, &res0, &res1);
set_sysreg_masks(kvm, HCRX_EL2, res0, res1);
/* HFG[RW]TR_EL2 */
- res0 = res1 = 0;
- if (!(kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
- kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
- res0 |= (HFGxTR_EL2_APDAKey | HFGxTR_EL2_APDBKey |
- HFGxTR_EL2_APGAKey | HFGxTR_EL2_APIAKey |
- HFGxTR_EL2_APIBKey);
- if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
- res0 |= (HFGxTR_EL2_LORC_EL1 | HFGxTR_EL2_LOREA_EL1 |
- HFGxTR_EL2_LORID_EL1 | HFGxTR_EL2_LORN_EL1 |
- HFGxTR_EL2_LORSA_EL1);
- if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, CSV2, CSV2_2) &&
- !kvm_has_feat(kvm, ID_AA64PFR1_EL1, CSV2_frac, CSV2_1p2))
- res0 |= (HFGxTR_EL2_SCXTNUM_EL1 | HFGxTR_EL2_SCXTNUM_EL0);
- if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP))
- res0 |= HFGxTR_EL2_ICC_IGRPENn_EL1;
- if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP))
- res0 |= (HFGxTR_EL2_ERRIDR_EL1 | HFGxTR_EL2_ERRSELR_EL1 |
- HFGxTR_EL2_ERXFR_EL1 | HFGxTR_EL2_ERXCTLR_EL1 |
- HFGxTR_EL2_ERXSTATUS_EL1 | HFGxTR_EL2_ERXMISCn_EL1 |
- HFGxTR_EL2_ERXPFGF_EL1 | HFGxTR_EL2_ERXPFGCTL_EL1 |
- HFGxTR_EL2_ERXPFGCDN_EL1 | HFGxTR_EL2_ERXADDR_EL1);
- if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA))
- res0 |= HFGxTR_EL2_nACCDATA_EL1;
- if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP))
- res0 |= (HFGxTR_EL2_nGCS_EL0 | HFGxTR_EL2_nGCS_EL1);
- if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP))
- res0 |= (HFGxTR_EL2_nSMPRI_EL1 | HFGxTR_EL2_nTPIDR2_EL0);
- if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP))
- res0 |= HFGxTR_EL2_nRCWMASK_EL1;
- if (!kvm_has_s1pie(kvm))
- res0 |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1);
- if (!kvm_has_s1poe(kvm))
- res0 |= (HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nPOR_EL1);
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
- res0 |= HFGxTR_EL2_nS2POR_EL1;
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP))
- res0 |= (HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nAMAIR2_EL1);
- set_sysreg_masks(kvm, HFGRTR_EL2, res0 | __HFGRTR_EL2_RES0, res1);
- set_sysreg_masks(kvm, HFGWTR_EL2, res0 | __HFGWTR_EL2_RES0, res1);
+ get_reg_fixed_bits(kvm, HFGRTR_EL2, &res0, &res1);
+ set_sysreg_masks(kvm, HFGRTR_EL2, res0, res1);
+ get_reg_fixed_bits(kvm, HFGWTR_EL2, &res0, &res1);
+ set_sysreg_masks(kvm, HFGWTR_EL2, res0, res1);
/* HDFG[RW]TR_EL2 */
- res0 = res1 = 0;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DoubleLock, IMP))
- res0 |= HDFGRTR_EL2_OSDLR_EL1;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP))
- res0 |= (HDFGRTR_EL2_PMEVCNTRn_EL0 | HDFGRTR_EL2_PMEVTYPERn_EL0 |
- HDFGRTR_EL2_PMCCFILTR_EL0 | HDFGRTR_EL2_PMCCNTR_EL0 |
- HDFGRTR_EL2_PMCNTEN | HDFGRTR_EL2_PMINTEN |
- HDFGRTR_EL2_PMOVS | HDFGRTR_EL2_PMSELR_EL0 |
- HDFGRTR_EL2_PMMIR_EL1 | HDFGRTR_EL2_PMUSERENR_EL0 |
- HDFGRTR_EL2_PMCEIDn_EL0);
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP))
- res0 |= (HDFGRTR_EL2_PMBLIMITR_EL1 | HDFGRTR_EL2_PMBPTR_EL1 |
- HDFGRTR_EL2_PMBSR_EL1 | HDFGRTR_EL2_PMSCR_EL1 |
- HDFGRTR_EL2_PMSEVFR_EL1 | HDFGRTR_EL2_PMSFCR_EL1 |
- HDFGRTR_EL2_PMSICR_EL1 | HDFGRTR_EL2_PMSIDR_EL1 |
- HDFGRTR_EL2_PMSIRR_EL1 | HDFGRTR_EL2_PMSLATFR_EL1 |
- HDFGRTR_EL2_PMBIDR_EL1);
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceVer, IMP))
- res0 |= (HDFGRTR_EL2_TRC | HDFGRTR_EL2_TRCAUTHSTATUS |
- HDFGRTR_EL2_TRCAUXCTLR | HDFGRTR_EL2_TRCCLAIM |
- HDFGRTR_EL2_TRCCNTVRn | HDFGRTR_EL2_TRCID |
- HDFGRTR_EL2_TRCIMSPECn | HDFGRTR_EL2_TRCOSLSR |
- HDFGRTR_EL2_TRCPRGCTLR | HDFGRTR_EL2_TRCSEQSTR |
- HDFGRTR_EL2_TRCSSCSRn | HDFGRTR_EL2_TRCSTATR |
- HDFGRTR_EL2_TRCVICTLR);
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP))
- res0 |= (HDFGRTR_EL2_TRBBASER_EL1 | HDFGRTR_EL2_TRBIDR_EL1 |
- HDFGRTR_EL2_TRBLIMITR_EL1 | HDFGRTR_EL2_TRBMAR_EL1 |
- HDFGRTR_EL2_TRBPTR_EL1 | HDFGRTR_EL2_TRBSR_EL1 |
- HDFGRTR_EL2_TRBTRG_EL1);
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP))
- res0 |= (HDFGRTR_EL2_nBRBIDR | HDFGRTR_EL2_nBRBCTL |
- HDFGRTR_EL2_nBRBDATA);
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P2))
- res0 |= HDFGRTR_EL2_nPMSNEVFR_EL1;
- set_sysreg_masks(kvm, HDFGRTR_EL2, res0 | HDFGRTR_EL2_RES0, res1);
-
- /* Reuse the bits from the read-side and add the write-specific stuff */
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP))
- res0 |= (HDFGWTR_EL2_PMCR_EL0 | HDFGWTR_EL2_PMSWINC_EL0);
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceVer, IMP))
- res0 |= HDFGWTR_EL2_TRCOSLAR;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
- res0 |= HDFGWTR_EL2_TRFCR_EL1;
- set_sysreg_masks(kvm, HFGWTR_EL2, res0 | HDFGWTR_EL2_RES0, res1);
+ get_reg_fixed_bits(kvm, HDFGRTR_EL2, &res0, &res1);
+ set_sysreg_masks(kvm, HDFGRTR_EL2, res0, res1);
+ get_reg_fixed_bits(kvm, HDFGWTR_EL2, &res0, &res1);
+ set_sysreg_masks(kvm, HDFGWTR_EL2, res0, res1);
/* HFGITR_EL2 */
- res0 = HFGITR_EL2_RES0;
- res1 = HFGITR_EL2_RES1;
- if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, DPB, DPB2))
- res0 |= HFGITR_EL2_DCCVADP;
- if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN2))
- res0 |= (HFGITR_EL2_ATS1E1RP | HFGITR_EL2_ATS1E1WP);
- if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
- res0 |= (HFGITR_EL2_TLBIRVAALE1OS | HFGITR_EL2_TLBIRVALE1OS |
- HFGITR_EL2_TLBIRVAAE1OS | HFGITR_EL2_TLBIRVAE1OS |
- HFGITR_EL2_TLBIVAALE1OS | HFGITR_EL2_TLBIVALE1OS |
- HFGITR_EL2_TLBIVAAE1OS | HFGITR_EL2_TLBIASIDE1OS |
- HFGITR_EL2_TLBIVAE1OS | HFGITR_EL2_TLBIVMALLE1OS);
- if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE))
- res0 |= (HFGITR_EL2_TLBIRVAALE1 | HFGITR_EL2_TLBIRVALE1 |
- HFGITR_EL2_TLBIRVAAE1 | HFGITR_EL2_TLBIRVAE1 |
- HFGITR_EL2_TLBIRVAALE1IS | HFGITR_EL2_TLBIRVALE1IS |
- HFGITR_EL2_TLBIRVAAE1IS | HFGITR_EL2_TLBIRVAE1IS |
- HFGITR_EL2_TLBIRVAALE1OS | HFGITR_EL2_TLBIRVALE1OS |
- HFGITR_EL2_TLBIRVAAE1OS | HFGITR_EL2_TLBIRVAE1OS);
- if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, SPECRES, IMP))
- res0 |= (HFGITR_EL2_CFPRCTX | HFGITR_EL2_DVPRCTX |
- HFGITR_EL2_CPPRCTX);
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP))
- res0 |= (HFGITR_EL2_nBRBINJ | HFGITR_EL2_nBRBIALL);
- if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP))
- res0 |= (HFGITR_EL2_nGCSPUSHM_EL1 | HFGITR_EL2_nGCSSTR_EL1 |
- HFGITR_EL2_nGCSEPP);
- if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, SPECRES, COSP_RCTX))
- res0 |= HFGITR_EL2_COSPRCTX;
- if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, ATS1A, IMP))
- res0 |= HFGITR_EL2_ATS1E1A;
+ get_reg_fixed_bits(kvm, HFGITR_EL2, &res0, &res1);
set_sysreg_masks(kvm, HFGITR_EL2, res0, res1);
/* HAFGRTR_EL2 - not a lot to see here */
- res0 = HAFGRTR_EL2_RES0;
- res1 = HAFGRTR_EL2_RES1;
- if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, V1P1))
- res0 |= ~(res0 | res1);
+ get_reg_fixed_bits(kvm, HAFGRTR_EL2, &res0, &res1);
set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1);
+ /* HFG[RW]TR2_EL2 */
+ get_reg_fixed_bits(kvm, HFGRTR2_EL2, &res0, &res1);
+ set_sysreg_masks(kvm, HFGRTR2_EL2, res0, res1);
+ get_reg_fixed_bits(kvm, HFGWTR2_EL2, &res0, &res1);
+ set_sysreg_masks(kvm, HFGWTR2_EL2, res0, res1);
+
+ /* HDFG[RW]TR2_EL2 */
+ get_reg_fixed_bits(kvm, HDFGRTR2_EL2, &res0, &res1);
+ set_sysreg_masks(kvm, HDFGRTR2_EL2, res0, res1);
+ get_reg_fixed_bits(kvm, HDFGWTR2_EL2, &res0, &res1);
+ set_sysreg_masks(kvm, HDFGWTR2_EL2, res0, res1);
+
+ /* HFGITR2_EL2 */
+ get_reg_fixed_bits(kvm, HFGITR2_EL2, &res0, &res1);
+ set_sysreg_masks(kvm, HFGITR2_EL2, res0, res1);
+
/* TCR2_EL2 */
res0 = TCR2_EL2_RES0;
res1 = TCR2_EL2_RES1;
@@ -1290,6 +1741,18 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
res0 |= GENMASK(11, 8);
set_sysreg_masks(kvm, CNTHCTL_EL2, res0, res1);
+ /* ICH_HCR_EL2 */
+ res0 = ICH_HCR_EL2_RES0;
+ res1 = ICH_HCR_EL2_RES1;
+ if (!(kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_TDS))
+ res0 |= ICH_HCR_EL2_TDIR;
+ /* No GICv4 is presented to the guest */
+ res0 |= ICH_HCR_EL2_DVIM | ICH_HCR_EL2_vSGIEOICount;
+ set_sysreg_masks(kvm, ICH_HCR_EL2, res0, res1);
+
+ /* VNCR_EL2 */
+ set_sysreg_masks(kvm, VNCR_EL2, VNCR_EL2_RES0, VNCR_EL2_RES1);
+
out:
for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++)
(void)__vcpu_sys_reg(vcpu, sr);
@@ -1309,4 +1772,11 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
}
write_unlock(&vcpu->kvm->mmu_lock);
}
+
+ if (kvm_check_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu))
+ kvm_map_l1_vncr(vcpu);
+
+ /* Must be last, as may switch context! */
+ if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu))
+ kvm_inject_nested_irq(vcpu);
}
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 930b677eb9b0..fcd70bfe44fb 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -5,12 +5,12 @@
*/
#include <linux/init.h>
+#include <linux/interval_tree_generic.h>
#include <linux/kmemleak.h>
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
#include <linux/memblock.h>
#include <linux/mutex.h>
-#include <linux/sort.h>
#include <asm/kvm_pkvm.h>
@@ -24,23 +24,6 @@ static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
phys_addr_t hyp_mem_base;
phys_addr_t hyp_mem_size;
-static int cmp_hyp_memblock(const void *p1, const void *p2)
-{
- const struct memblock_region *r1 = p1;
- const struct memblock_region *r2 = p2;
-
- return r1->base < r2->base ? -1 : (r1->base > r2->base);
-}
-
-static void __init sort_memblock_regions(void)
-{
- sort(hyp_memory,
- *hyp_memblock_nr_ptr,
- sizeof(struct memblock_region),
- cmp_hyp_memblock,
- NULL);
-}
-
static int __init register_memblock_regions(void)
{
struct memblock_region *reg;
@@ -52,7 +35,6 @@ static int __init register_memblock_regions(void)
hyp_memory[*hyp_memblock_nr_ptr] = *reg;
(*hyp_memblock_nr_ptr)++;
}
- sort_memblock_regions();
return 0;
}
@@ -79,6 +61,7 @@ void __init kvm_hyp_reserve(void)
hyp_mem_pages += host_s2_pgtable_pages();
hyp_mem_pages += hyp_vm_table_pages();
hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
+ hyp_mem_pages += pkvm_selftest_pages();
hyp_mem_pages += hyp_ffa_proxy_pages();
/*
@@ -111,6 +94,29 @@ static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
host_kvm->arch.pkvm.handle = 0;
free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
+ free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
+}
+
+static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
+{
+ size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
+ pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle;
+ void *hyp_vcpu;
+ int ret;
+
+ vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
+
+ hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
+ if (!hyp_vcpu)
+ return -ENOMEM;
+
+ ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu);
+ if (!ret)
+ vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED);
+ else
+ free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
+
+ return ret;
}
/*
@@ -125,11 +131,8 @@ static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
*/
static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
{
- size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz;
- struct kvm_vcpu *host_vcpu;
- pkvm_handle_t handle;
+ size_t pgd_sz, hyp_vm_sz;
void *pgd, *hyp_vm;
- unsigned long idx;
int ret;
if (host_kvm->created_vcpus < 1)
@@ -161,40 +164,11 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
if (ret < 0)
goto free_vm;
- handle = ret;
-
- host_kvm->arch.pkvm.handle = handle;
-
- /* Donate memory for the vcpus at hyp and initialize it. */
- hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
- kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
- void *hyp_vcpu;
-
- /* Indexing of the vcpus to be sequential starting at 0. */
- if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
- ret = -EINVAL;
- goto destroy_vm;
- }
-
- hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
- if (!hyp_vcpu) {
- ret = -ENOMEM;
- goto destroy_vm;
- }
-
- ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
- hyp_vcpu);
- if (ret) {
- free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
- goto destroy_vm;
- }
- }
+ host_kvm->arch.pkvm.handle = ret;
+ host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
+ kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
return 0;
-
-destroy_vm:
- __pkvm_destroy_hyp_vm(host_kvm);
- return ret;
free_vm:
free_pages_exact(hyp_vm, hyp_vm_sz);
free_pgd:
@@ -214,6 +188,18 @@ int pkvm_create_hyp_vm(struct kvm *host_kvm)
return ret;
}
+int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
+{
+ int ret = 0;
+
+ mutex_lock(&vcpu->kvm->arch.config_lock);
+ if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED))
+ ret = __pkvm_create_hyp_vcpu(vcpu);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+
+ return ret;
+}
+
void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
{
mutex_lock(&host_kvm->arch.config_lock);
@@ -259,6 +245,7 @@ static int __init finalize_pkvm(void)
* at, which would end badly once inaccessible.
*/
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+ kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start);
kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
@@ -270,80 +257,68 @@ static int __init finalize_pkvm(void)
}
device_initcall_sync(finalize_pkvm);
-static int cmp_mappings(struct rb_node *node, const struct rb_node *parent)
+static u64 __pkvm_mapping_start(struct pkvm_mapping *m)
{
- struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node);
- struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node);
-
- if (a->gfn < b->gfn)
- return -1;
- if (a->gfn > b->gfn)
- return 1;
- return 0;
+ return m->gfn * PAGE_SIZE;
}
-static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn)
+static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
{
- struct rb_node *node = root->rb_node, *prev = NULL;
- struct pkvm_mapping *mapping;
-
- while (node) {
- mapping = rb_entry(node, struct pkvm_mapping, node);
- if (mapping->gfn == gfn)
- return node;
- prev = node;
- node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right;
- }
-
- return prev;
+ return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
}
+INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
+ __pkvm_mapping_start, __pkvm_mapping_end, static,
+ pkvm_mapping);
+
/*
- * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing
- * of __map inline.
+ * __tmp is updated to iter_first(pkvm_mappings) *before* entering the body of the loop to allow
+ * freeing of __map inline.
*/
#define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \
- for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings, \
- ((__start) >> PAGE_SHIFT)); \
+ for (struct pkvm_mapping *__tmp = pkvm_mapping_iter_first(&(__pgt)->pkvm_mappings, \
+ __start, __end - 1); \
__tmp && ({ \
- __map = rb_entry(__tmp, struct pkvm_mapping, node); \
- __tmp = rb_next(__tmp); \
+ __map = __tmp; \
+ __tmp = pkvm_mapping_iter_next(__map, __start, __end - 1); \
true; \
}); \
- ) \
- if (__map->gfn < ((__start) >> PAGE_SHIFT)) \
- continue; \
- else if (__map->gfn >= ((__end) >> PAGE_SHIFT)) \
- break; \
- else
+ )
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops)
{
- pgt->pkvm_mappings = RB_ROOT;
+ pgt->pkvm_mappings = RB_ROOT_CACHED;
pgt->mmu = mmu;
return 0;
}
-void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
+static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end)
{
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
pkvm_handle_t handle = kvm->arch.pkvm.handle;
struct pkvm_mapping *mapping;
- struct rb_node *node;
+ int ret;
if (!handle)
- return;
+ return 0;
- node = rb_first(&pgt->pkvm_mappings);
- while (node) {
- mapping = rb_entry(node, struct pkvm_mapping, node);
- kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
- node = rb_next(node);
- rb_erase(&mapping->node, &pgt->pkvm_mappings);
+ for_each_mapping_in_range_safe(pgt, start, end, mapping) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
+ mapping->nr_pages);
+ if (WARN_ON(ret))
+ return ret;
+ pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
kfree(mapping);
}
+
+ return 0;
+}
+
+void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
+{
+ __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
}
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
@@ -357,42 +332,46 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
u64 pfn = phys >> PAGE_SHIFT;
int ret;
- if (size != PAGE_SIZE)
+ if (size != PAGE_SIZE && size != PMD_SIZE)
return -EINVAL;
lockdep_assert_held_write(&kvm->mmu_lock);
- ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
- if (ret) {
- /* Is the gfn already mapped due to a racing vCPU? */
- if (ret == -EPERM)
+
+ /*
+ * Calling stage2_map() on top of existing mappings is either happening because of a race
+ * with another vCPU, or because we're changing between page and block mappings. As per
+ * user_mem_abort(), same-size permission faults are handled in the relax_perms() path.
+ */
+ mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, addr + size - 1);
+ if (mapping) {
+ if (size == (mapping->nr_pages * PAGE_SIZE))
return -EAGAIN;
+
+ /* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */
+ ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
+ if (ret)
+ return ret;
+ mapping = NULL;
}
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, size / PAGE_SIZE, prot);
+ if (WARN_ON(ret))
+ return ret;
+
swap(mapping, cache->mapping);
mapping->gfn = gfn;
mapping->pfn = pfn;
- WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings));
+ mapping->nr_pages = size / PAGE_SIZE;
+ pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
return ret;
}
int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
- struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
- pkvm_handle_t handle = kvm->arch.pkvm.handle;
- struct pkvm_mapping *mapping;
- int ret = 0;
+ lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock);
- lockdep_assert_held_write(&kvm->mmu_lock);
- for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
- ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
- if (WARN_ON(ret))
- break;
- rb_erase(&mapping->node, &pgt->pkvm_mappings);
- kfree(mapping);
- }
-
- return ret;
+ return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
}
int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
@@ -404,7 +383,8 @@ int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
lockdep_assert_held(&kvm->mmu_lock);
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
- ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
+ mapping->nr_pages);
if (WARN_ON(ret))
break;
}
@@ -419,7 +399,8 @@ int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
lockdep_assert_held(&kvm->mmu_lock);
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
- __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE);
+ __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
+ PAGE_SIZE * mapping->nr_pages);
return 0;
}
@@ -434,7 +415,7 @@ bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64
lockdep_assert_held(&kvm->mmu_lock);
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
- mkold);
+ mapping->nr_pages, mkold);
return young;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 6c5950b9ceac..25c29107f13f 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -17,8 +17,6 @@
#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0)
-DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
-
static LIST_HEAD(arm_pmus);
static DEFINE_MUTEX(arm_pmus_lock);
@@ -26,6 +24,12 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc);
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc);
static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc);
+bool kvm_supports_guest_pmuv3(void)
+{
+ guard(mutex)(&arm_pmus_lock);
+ return !list_empty(&arm_pmus);
+}
+
static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc)
{
return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]);
@@ -150,9 +154,6 @@ static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc)
*/
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
{
- if (!kvm_vcpu_has_pmu(vcpu))
- return 0;
-
return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx));
}
@@ -191,13 +192,23 @@ static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force)
*/
void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
{
- if (!kvm_vcpu_has_pmu(vcpu))
- return;
-
kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false);
}
/**
+ * kvm_pmu_set_counter_value_user - set PMU counter value from user
+ * @vcpu: The vcpu pointer
+ * @select_idx: The counter index
+ * @val: The counter value
+ */
+void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
+{
+ kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, select_idx));
+ __vcpu_sys_reg(vcpu, counter_index_to_reg(select_idx)) = val;
+ kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
+}
+
+/**
* kvm_pmu_release_perf_event - remove the perf event
* @pmc: The PMU counter pointer
*/
@@ -248,20 +259,6 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
}
/**
- * kvm_pmu_vcpu_reset - reset pmu state for cpu
- * @vcpu: The vcpu pointer
- *
- */
-void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
-{
- unsigned long mask = kvm_pmu_implemented_counter_mask(vcpu);
- int i;
-
- for_each_set_bit(i, &mask, 32)
- kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i));
-}
-
-/**
* kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
* @vcpu: The vcpu pointer
*
@@ -283,7 +280,7 @@ static u64 kvm_pmu_hyp_counter_mask(struct kvm_vcpu *vcpu)
return 0;
hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2));
- n = vcpu->kvm->arch.pmcr_n;
+ n = vcpu->kvm->arch.nr_pmu_counters;
/*
* Programming HPMN to a value greater than PMCR_EL0.N is
@@ -350,7 +347,7 @@ void kvm_pmu_reprogram_counter_mask(struct kvm_vcpu *vcpu, u64 val)
{
int i;
- if (!kvm_vcpu_has_pmu(vcpu) || !val)
+ if (!val)
return;
for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) {
@@ -401,9 +398,6 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = &vcpu->arch.pmu;
bool overflow;
- if (!kvm_vcpu_has_pmu(vcpu))
- return;
-
overflow = kvm_pmu_overflow_status(vcpu);
if (pmu->irq_level == overflow)
return;
@@ -599,9 +593,6 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
{
int i;
- if (!kvm_vcpu_has_pmu(vcpu))
- return;
-
/* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */
if (!kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5))
val &= ~ARMV8_PMU_PMCR_LP;
@@ -617,14 +608,12 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
if (val & ARMV8_PMU_PMCR_P) {
- /*
- * Unlike other PMU sysregs, the controls in PMCR_EL0 always apply
- * to the 'guest' range of counters and never the 'hyp' range.
- */
unsigned long mask = kvm_pmu_implemented_counter_mask(vcpu) &
- ~kvm_pmu_hyp_counter_mask(vcpu) &
~BIT(ARMV8_PMU_CYCLE_IDX);
+ if (!vcpu_is_el2(vcpu))
+ mask &= ~kvm_pmu_hyp_counter_mask(vcpu);
+
for_each_set_bit(i, &mask, 32)
kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
}
@@ -673,6 +662,20 @@ static bool kvm_pmc_counts_at_el2(struct kvm_pmc *pmc)
return kvm_pmc_read_evtreg(pmc) & ARMV8_PMU_INCLUDE_EL2;
}
+static int kvm_map_pmu_event(struct kvm *kvm, unsigned int eventsel)
+{
+ struct arm_pmu *pmu = kvm->arch.arm_pmu;
+
+ /*
+ * The CPU PMU likely isn't PMUv3; let the driver provide a mapping
+ * for the guest's PMUv3 event ID.
+ */
+ if (unlikely(pmu->map_pmuv3_event))
+ return pmu->map_pmuv3_event(eventsel);
+
+ return eventsel;
+}
+
/**
* kvm_pmu_create_perf_event - create a perf event for a counter
* @pmc: Counter context
@@ -683,7 +686,8 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
struct perf_event *event;
struct perf_event_attr attr;
- u64 eventsel, evtreg;
+ int eventsel;
+ u64 evtreg;
evtreg = kvm_pmc_read_evtreg(pmc);
@@ -709,6 +713,14 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
!test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
return;
+ /*
+ * Don't create an event if we're running on hardware that requires
+ * PMUv3 event translation and we couldn't find a valid mapping.
+ */
+ eventsel = kvm_map_pmu_event(vcpu->kvm, eventsel);
+ if (eventsel < 0)
+ return;
+
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.type = arm_pmu->pmu.type;
attr.size = sizeof(attr);
@@ -766,9 +778,6 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx);
u64 reg;
- if (!kvm_vcpu_has_pmu(vcpu))
- return;
-
reg = counter_index_to_evtreg(pmc->idx);
__vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm);
@@ -786,29 +795,23 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit()))
return;
- mutex_lock(&arm_pmus_lock);
+ guard(mutex)(&arm_pmus_lock);
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
- goto out_unlock;
+ return;
entry->arm_pmu = pmu;
list_add_tail(&entry->entry, &arm_pmus);
-
- if (list_is_singular(&arm_pmus))
- static_branch_enable(&kvm_arm_pmu_available);
-
-out_unlock:
- mutex_unlock(&arm_pmus_lock);
}
static struct arm_pmu *kvm_pmu_probe_armpmu(void)
{
- struct arm_pmu *tmp, *pmu = NULL;
struct arm_pmu_entry *entry;
+ struct arm_pmu *pmu;
int cpu;
- mutex_lock(&arm_pmus_lock);
+ guard(mutex)(&arm_pmus_lock);
/*
* It is safe to use a stale cpu to iterate the list of PMUs so long as
@@ -829,42 +832,62 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void)
*/
cpu = raw_smp_processor_id();
list_for_each_entry(entry, &arm_pmus, entry) {
- tmp = entry->arm_pmu;
+ pmu = entry->arm_pmu;
- if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) {
- pmu = tmp;
- break;
- }
+ if (cpumask_test_cpu(cpu, &pmu->supported_cpus))
+ return pmu;
}
- mutex_unlock(&arm_pmus_lock);
+ return NULL;
+}
+
+static u64 __compute_pmceid(struct arm_pmu *pmu, bool pmceid1)
+{
+ u32 hi[2], lo[2];
+
+ bitmap_to_arr32(lo, pmu->pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
+ bitmap_to_arr32(hi, pmu->pmceid_ext_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
+
+ return ((u64)hi[pmceid1] << 32) | lo[pmceid1];
+}
+
+static u64 compute_pmceid0(struct arm_pmu *pmu)
+{
+ u64 val = __compute_pmceid(pmu, 0);
- return pmu;
+ /* always support SW_INCR */
+ val |= BIT(ARMV8_PMUV3_PERFCTR_SW_INCR);
+ /* always support CHAIN */
+ val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN);
+ return val;
+}
+
+static u64 compute_pmceid1(struct arm_pmu *pmu)
+{
+ u64 val = __compute_pmceid(pmu, 1);
+
+ /*
+ * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
+ * as RAZ
+ */
+ val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) |
+ BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) |
+ BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32));
+ return val;
}
u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
{
+ struct arm_pmu *cpu_pmu = vcpu->kvm->arch.arm_pmu;
unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
u64 val, mask = 0;
int base, i, nr_events;
- if (!kvm_vcpu_has_pmu(vcpu))
- return 0;
-
if (!pmceid1) {
- val = read_sysreg(pmceid0_el0);
- /* always support CHAIN */
- val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN);
+ val = compute_pmceid0(cpu_pmu);
base = 0;
} else {
- val = read_sysreg(pmceid1_el0);
- /*
- * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
- * as RAZ
- */
- val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) |
- BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) |
- BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32));
+ val = compute_pmceid1(cpu_pmu);
base = 32;
}
@@ -900,9 +923,6 @@ void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu)
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
{
- if (!kvm_vcpu_has_pmu(vcpu))
- return 0;
-
if (!vcpu->arch.pmu.created)
return -EINVAL;
@@ -925,9 +945,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
return -EINVAL;
}
- /* One-off reload of the PMU on first run */
- kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
-
return 0;
}
@@ -995,18 +1012,43 @@ u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm)
struct arm_pmu *arm_pmu = kvm->arch.arm_pmu;
/*
+ * PMUv3 requires that all event counters are capable of counting any
+ * event, though the same may not be true of non-PMUv3 hardware.
+ */
+ if (cpus_have_final_cap(ARM64_WORKAROUND_PMUV3_IMPDEF_TRAPS))
+ return 1;
+
+ /*
* The arm_pmu->cntr_mask considers the fixed counter(s) as well.
* Ignore those and return only the general-purpose counters.
*/
return bitmap_weight(arm_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS);
}
+static void kvm_arm_set_nr_counters(struct kvm *kvm, unsigned int nr)
+{
+ kvm->arch.nr_pmu_counters = nr;
+
+ /* Reset MDCR_EL2.HPMN behind the vcpus' back... */
+ if (test_bit(KVM_ARM_VCPU_HAS_EL2, kvm->arch.vcpu_features)) {
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ u64 val = __vcpu_sys_reg(vcpu, MDCR_EL2);
+ val &= ~MDCR_EL2_HPMN;
+ val |= FIELD_PREP(MDCR_EL2_HPMN, kvm->arch.nr_pmu_counters);
+ __vcpu_sys_reg(vcpu, MDCR_EL2) = val;
+ }
+ }
+}
+
static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
{
lockdep_assert_held(&kvm->arch.config_lock);
kvm->arch.arm_pmu = arm_pmu;
- kvm->arch.pmcr_n = kvm_arm_pmu_get_max_counters(kvm);
+ kvm_arm_set_nr_counters(kvm, kvm_arm_pmu_get_max_counters(kvm));
}
/**
@@ -1062,6 +1104,20 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
return ret;
}
+static int kvm_arm_pmu_v3_set_nr_counters(struct kvm_vcpu *vcpu, unsigned int n)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ if (!kvm->arch.arm_pmu)
+ return -EINVAL;
+
+ if (n > kvm_arm_pmu_get_max_counters(kvm))
+ return -EINVAL;
+
+ kvm_arm_set_nr_counters(kvm, n);
+ return 0;
+}
+
int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
struct kvm *kvm = vcpu->kvm;
@@ -1158,6 +1214,15 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
}
+ case KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS: {
+ unsigned int __user *uaddr = (unsigned int __user *)(long)attr->addr;
+ unsigned int n;
+
+ if (get_user(n, uaddr))
+ return -EFAULT;
+
+ return kvm_arm_pmu_v3_set_nr_counters(vcpu, n);
+ }
case KVM_ARM_VCPU_PMU_V3_INIT:
return kvm_arm_pmu_v3_init(vcpu);
}
@@ -1196,6 +1261,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
case KVM_ARM_VCPU_PMU_V3_INIT:
case KVM_ARM_VCPU_PMU_V3_FILTER:
case KVM_ARM_VCPU_PMU_V3_SET_PMU:
+ case KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS:
if (kvm_vcpu_has_pmu(vcpu))
return 0;
}
@@ -1205,13 +1271,26 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
u8 kvm_arm_pmu_get_pmuver_limit(void)
{
- u64 tmp;
+ unsigned int pmuver;
+
+ pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer,
+ read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1));
- tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
- tmp = cpuid_feature_cap_perfmon_field(tmp,
- ID_AA64DFR0_EL1_PMUVer_SHIFT,
- ID_AA64DFR0_EL1_PMUVer_V3P5);
- return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp);
+ /*
+ * Spoof a barebones PMUv3 implementation if the system supports IMPDEF
+ * traps of the PMUv3 sysregs
+ */
+ if (cpus_have_final_cap(ARM64_WORKAROUND_PMUV3_IMPDEF_TRAPS))
+ return ID_AA64DFR0_EL1_PMUVer_IMP;
+
+ /*
+ * Otherwise, treat IMPLEMENTATION DEFINED functionality as
+ * unimplemented
+ */
+ if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ return 0;
+
+ return min(pmuver, ID_AA64DFR0_EL1_PMUVer_V3P5);
}
/**
@@ -1221,8 +1300,12 @@ u8 kvm_arm_pmu_get_pmuver_limit(void)
u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu)
{
u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0);
+ u64 n = vcpu->kvm->arch.nr_pmu_counters;
+
+ if (vcpu_has_nv(vcpu) && !vcpu_is_el2(vcpu))
+ n = FIELD_GET(MDCR_EL2_HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2));
- return u64_replace_bits(pmcr, vcpu->kvm->arch.pmcr_n, ARMV8_PMU_PMCR_N);
+ return u64_replace_bits(pmcr, n, ARMV8_PMU_PMCR_N);
}
void kvm_pmu_nested_transition(struct kvm_vcpu *vcpu)
@@ -1231,9 +1314,6 @@ void kvm_pmu_nested_transition(struct kvm_vcpu *vcpu)
unsigned long mask;
int i;
- if (!kvm_vcpu_has_pmu(vcpu))
- return;
-
mask = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
for_each_set_bit(i, &mask, 32) {
struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 0b3adf3e17b4..6b48a3d16d0d 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -41,7 +41,7 @@ void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
- if (!kvm_arm_support_pmu_v3() || !kvm_pmu_switch_needed(attr))
+ if (!system_supports_pmuv3() || !kvm_pmu_switch_needed(attr))
return;
if (!attr->exclude_host)
@@ -57,7 +57,7 @@ void kvm_clr_pmu_events(u64 clr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
- if (!kvm_arm_support_pmu_v3())
+ if (!system_supports_pmuv3())
return;
pmu->events_host &= ~clr;
@@ -133,7 +133,7 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
struct kvm_pmu_events *pmu;
u64 events_guest, events_host;
- if (!kvm_arm_support_pmu_v3() || !has_vhe())
+ if (!system_supports_pmuv3() || !has_vhe())
return;
preempt_disable();
@@ -154,7 +154,7 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
struct kvm_pmu_events *pmu;
u64 events_guest, events_host;
- if (!kvm_arm_support_pmu_v3() || !has_vhe())
+ if (!system_supports_pmuv3() || !has_vhe())
return;
pmu = kvm_get_pmu_events();
@@ -180,7 +180,7 @@ bool kvm_set_pmuserenr(u64 val)
struct kvm_cpu_context *hctxt;
struct kvm_vcpu *vcpu;
- if (!kvm_arm_support_pmu_v3() || !has_vhe())
+ if (!system_supports_pmuv3() || !has_vhe())
return false;
vcpu = kvm_get_running_vcpu();
diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
index e4a342e903e2..098416d7e5c2 100644
--- a/arch/arm64/kvm/ptdump.c
+++ b/arch/arm64/kvm/ptdump.c
@@ -52,8 +52,8 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = {
.set = "AF",
.clear = " ",
}, {
- .mask = PTE_TABLE_BIT | PTE_VALID,
- .val = PTE_VALID,
+ .mask = PMD_TYPE_MASK,
+ .val = PMD_TYPE_SECT,
.set = "BLK",
.clear = " ",
},
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 803e11b0dc8f..959532422d3a 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -158,6 +158,8 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
if (sve_state)
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
kfree(sve_state);
+ free_page((unsigned long)vcpu->arch.ctxt.vncr_array);
+ kfree(vcpu->arch.vncr_tlb);
kfree(vcpu->arch.ccsidr);
}
@@ -196,9 +198,6 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
vcpu->arch.reset_state.reset = false;
spin_unlock(&vcpu->arch.mp_state_lock);
- /* Reset PMU outside of the non-preemptible section */
- kvm_pmu_vcpu_reset(vcpu);
-
preempt_disable();
loaded = (vcpu->cpu != -1);
if (loaded)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 82430c1e1dd0..a6cf2888d150 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -17,6 +17,7 @@
#include <linux/mm.h>
#include <linux/printk.h>
#include <linux/uaccess.h>
+#include <linux/irqchip/arm-gic-v3.h>
#include <asm/arm_pmuv3.h>
#include <asm/cacheflush.h>
@@ -531,7 +532,13 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
if (p->is_write)
return ignore_write(vcpu, p);
- p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre;
+ if (p->Op1 == 4) { /* ICC_SRE_EL2 */
+ p->regval = (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE |
+ ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB);
+ } else { /* ICC_SRE_EL1 */
+ p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre;
+ }
+
return true;
}
@@ -778,7 +785,7 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 mask = BIT(ARMV8_PMU_CYCLE_IDX);
- u8 n = vcpu->kvm->arch.pmcr_n;
+ u8 n = vcpu->kvm->arch.nr_pmu_counters;
if (n)
mask |= GENMASK(n - 1, 0);
@@ -960,6 +967,22 @@ static int get_pmu_evcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
return 0;
}
+static int set_pmu_evcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ u64 idx;
+
+ if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 0)
+ /* PMCCNTR_EL0 */
+ idx = ARMV8_PMU_CYCLE_IDX;
+ else
+ /* PMEVCNTRn_EL0 */
+ idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
+
+ kvm_pmu_set_counter_value_user(vcpu, idx, val);
+ return 0;
+}
+
static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -1051,25 +1074,10 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
static int set_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 val)
{
- bool set;
-
- val &= kvm_pmu_accessible_counter_mask(vcpu);
-
- switch (r->reg) {
- case PMOVSSET_EL0:
- /* CRm[1] being set indicates a SET register, and CLR otherwise */
- set = r->CRm & 2;
- break;
- default:
- /* Op2[0] being set indicates a SET register, and CLR otherwise */
- set = r->Op2 & 1;
- break;
- }
+ u64 mask = kvm_pmu_accessible_counter_mask(vcpu);
- if (set)
- __vcpu_sys_reg(vcpu, r->reg) |= val;
- else
- __vcpu_sys_reg(vcpu, r->reg) &= ~val;
+ __vcpu_sys_reg(vcpu, r->reg) = val & mask;
+ kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
return 0;
}
@@ -1208,8 +1216,9 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
* with the existing KVM behavior.
*/
if (!kvm_vm_has_ran_once(kvm) &&
+ !vcpu_has_nv(vcpu) &&
new_n <= kvm_arm_pmu_get_max_counters(kvm))
- kvm->arch.pmcr_n = new_n;
+ kvm->arch.nr_pmu_counters = new_n;
mutex_unlock(&kvm->arch.config_lock);
@@ -1229,6 +1238,8 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
val |= ARMV8_PMU_PMCR_LC;
__vcpu_sys_reg(vcpu, r->reg) = val;
+ kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
+
return 0;
}
@@ -1255,6 +1266,7 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
#define PMU_PMEVCNTR_EL0(n) \
{ PMU_SYS_REG(PMEVCNTRn_EL0(n)), \
.reset = reset_pmevcntr, .get_user = get_pmu_evcntr, \
+ .set_user = set_pmu_evcntr, \
.access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
/* Macro to expand the PMEVTYPERn_EL0 register */
@@ -1589,13 +1601,14 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val = sanitise_id_aa64pfr0_el1(vcpu, val);
break;
case SYS_ID_AA64PFR1_EL1:
- if (!kvm_has_mte(vcpu->kvm))
+ if (!kvm_has_mte(vcpu->kvm)) {
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac);
+ }
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX);
@@ -1627,6 +1640,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
break;
case SYS_ID_AA64MMFR2_EL1:
val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
+ val &= ~ID_AA64MMFR2_EL1_NV;
break;
case SYS_ID_AA64MMFR3_EL1:
val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE |
@@ -1637,6 +1651,9 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
break;
}
+ if (vcpu_has_nv(vcpu))
+ val = limit_nv_id_reg(vcpu->kvm, id, val);
+
return val;
}
@@ -1663,15 +1680,24 @@ static bool is_feature_id_reg(u32 encoding)
* Return true if the register's (Op0, Op1, CRn, CRm, Op2) is
* (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8, which is the range of ID
* registers KVM maintains on a per-VM basis.
+ *
+ * Additionally, the implementation ID registers and CTR_EL0 are handled as
+ * per-VM registers.
*/
static inline bool is_vm_ftr_id_reg(u32 id)
{
- if (id == SYS_CTR_EL0)
+ switch (id) {
+ case SYS_CTR_EL0:
+ case SYS_MIDR_EL1:
+ case SYS_REVIDR_EL1:
+ case SYS_AIDR_EL1:
return true;
+ default:
+ return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 &&
+ sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 &&
+ sys_reg_CRm(id) < 8);
- return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 &&
- sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 &&
- sys_reg_CRm(id) < 8);
+ }
}
static inline bool is_vcpu_ftr_id_reg(u32 id)
@@ -1802,16 +1828,6 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
return val;
}
-#define ID_REG_LIMIT_FIELD_ENUM(val, reg, field, limit) \
-({ \
- u64 __f_val = FIELD_GET(reg##_##field##_MASK, val); \
- (val) &= ~reg##_##field##_MASK; \
- (val) |= FIELD_PREP(reg##_##field##_MASK, \
- min(__f_val, \
- (u64)SYS_FIELD_VALUE(reg, field, limit))); \
- (val); \
-})
-
static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
{
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8);
@@ -1870,12 +1886,14 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu,
static u64 read_sanitised_id_dfr0_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
- u8 perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit());
+ u8 perfmon;
u64 val = read_sanitised_ftr_reg(SYS_ID_DFR0_EL1);
val &= ~ID_DFR0_EL1_PerfMon_MASK;
- if (kvm_vcpu_has_pmu(vcpu))
+ if (kvm_vcpu_has_pmu(vcpu)) {
+ perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit());
val |= SYS_FIELD_PREP(ID_DFR0_EL1, PerfMon, perfmon);
+ }
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_DFR0_EL1, CopDbg, Debugv8p8);
@@ -1929,6 +1947,12 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
if ((hw_val & mpam_mask) == (user_val & mpam_mask))
user_val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+ /* Fail the guest's request to disable the AA64 ISA at EL{0,1,2} */
+ if (!FIELD_GET(ID_AA64PFR0_EL1_EL0, user_val) ||
+ !FIELD_GET(ID_AA64PFR0_EL1_EL1, user_val) ||
+ (vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val)))
+ return -EINVAL;
+
return set_id_reg(vcpu, rd, user_val);
}
@@ -1937,11 +1961,65 @@ static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu,
{
u64 hw_val = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
u64 mpam_mask = ID_AA64PFR1_EL1_MPAM_frac_MASK;
+ u8 mte = SYS_FIELD_GET(ID_AA64PFR1_EL1, MTE, hw_val);
+ u8 user_mte_frac = SYS_FIELD_GET(ID_AA64PFR1_EL1, MTE_frac, user_val);
+ u8 hw_mte_frac = SYS_FIELD_GET(ID_AA64PFR1_EL1, MTE_frac, hw_val);
/* See set_id_aa64pfr0_el1 for comment about MPAM */
if ((hw_val & mpam_mask) == (user_val & mpam_mask))
user_val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+ /*
+ * Previously MTE_frac was hidden from guest. However, if the
+ * hardware supports MTE2 but not MTE_ASYM_FAULT then a value
+ * of 0 for this field indicates that the hardware supports
+ * MTE_ASYNC. Whereas, 0xf indicates MTE_ASYNC is not supported.
+ *
+ * As KVM must accept values from KVM provided by user-space,
+ * when ID_AA64PFR1_EL1.MTE is 2 allow user-space to set
+ * ID_AA64PFR1_EL1.MTE_frac to 0. However, ignore it to avoid
+ * incorrectly claiming hardware support for MTE_ASYNC in the
+ * guest.
+ */
+
+ if (mte == ID_AA64PFR1_EL1_MTE_MTE2 &&
+ hw_mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI &&
+ user_mte_frac == ID_AA64PFR1_EL1_MTE_frac_ASYNC) {
+ user_val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK;
+ user_val |= hw_val & ID_AA64PFR1_EL1_MTE_frac_MASK;
+ }
+
+ return set_id_reg(vcpu, rd, user_val);
+}
+
+static int set_id_aa64mmfr0_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd, u64 user_val)
+{
+ u64 sanitized_val = kvm_read_sanitised_id_reg(vcpu, rd);
+ u64 tgran2_mask = ID_AA64MMFR0_EL1_TGRAN4_2_MASK |
+ ID_AA64MMFR0_EL1_TGRAN16_2_MASK |
+ ID_AA64MMFR0_EL1_TGRAN64_2_MASK;
+
+ if (vcpu_has_nv(vcpu) &&
+ ((sanitized_val & tgran2_mask) != (user_val & tgran2_mask)))
+ return -EINVAL;
+
+ return set_id_reg(vcpu, rd, user_val);
+}
+
+static int set_id_aa64mmfr2_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd, u64 user_val)
+{
+ u64 hw_val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
+ u64 nv_mask = ID_AA64MMFR2_EL1_NV_MASK;
+
+ /*
+ * We made the mistake to expose the now deprecated NV field,
+ * so allow userspace to write it, but silently ignore it.
+ */
+ if ((hw_val & nv_mask) == (user_val & nv_mask))
+ user_val &= ~nv_mask;
+
return set_id_reg(vcpu, rd, user_val);
}
@@ -2234,15 +2312,6 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
"trap of EL2 register redirected to EL1");
}
-#define EL2_REG(name, acc, rst, v) { \
- SYS_DESC(SYS_##name), \
- .access = acc, \
- .reset = rst, \
- .reg = name, \
- .visibility = el2_visibility, \
- .val = v, \
-}
-
#define EL2_REG_FILTERED(name, acc, rst, v, filter) { \
SYS_DESC(SYS_##name), \
.access = acc, \
@@ -2252,6 +2321,9 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
.val = v, \
}
+#define EL2_REG(name, acc, rst, v) \
+ EL2_REG_FILTERED(name, acc, rst, v, el2_visibility)
+
#define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v)
#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v)
@@ -2266,35 +2338,33 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
* from userspace.
*/
+#define ID_DESC_DEFAULT_CALLBACKS \
+ .access = access_id_reg, \
+ .get_user = get_id_reg, \
+ .set_user = set_id_reg, \
+ .visibility = id_visibility, \
+ .reset = kvm_read_sanitised_id_reg
+
#define ID_DESC(name) \
SYS_DESC(SYS_##name), \
- .access = access_id_reg, \
- .get_user = get_id_reg \
+ ID_DESC_DEFAULT_CALLBACKS
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define ID_SANITISED(name) { \
ID_DESC(name), \
- .set_user = set_id_reg, \
- .visibility = id_visibility, \
- .reset = kvm_read_sanitised_id_reg, \
.val = 0, \
}
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define AA32_ID_SANITISED(name) { \
ID_DESC(name), \
- .set_user = set_id_reg, \
.visibility = aa32_id_visibility, \
- .reset = kvm_read_sanitised_id_reg, \
.val = 0, \
}
/* sys_reg_desc initialiser for writable ID registers */
#define ID_WRITABLE(name, mask) { \
ID_DESC(name), \
- .set_user = set_id_reg, \
- .visibility = id_visibility, \
- .reset = kvm_read_sanitised_id_reg, \
.val = mask, \
}
@@ -2302,8 +2372,6 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
#define ID_FILTERED(sysreg, name, mask) { \
ID_DESC(sysreg), \
.set_user = set_##name, \
- .visibility = id_visibility, \
- .reset = kvm_read_sanitised_id_reg, \
.val = (mask), \
}
@@ -2313,12 +2381,10 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
* (1 <= crm < 8, 0 <= Op2 < 8).
*/
#define ID_UNALLOCATED(crm, op2) { \
+ .name = "S3_0_0_" #crm "_" #op2, \
Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
- .access = access_id_reg, \
- .get_user = get_id_reg, \
- .set_user = set_id_reg, \
+ ID_DESC_DEFAULT_CALLBACKS, \
.visibility = raz_visibility, \
- .reset = kvm_read_sanitised_id_reg, \
.val = 0, \
}
@@ -2329,9 +2395,7 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
*/
#define ID_HIDDEN(name) { \
ID_DESC(name), \
- .set_user = set_id_reg, \
.visibility = raz_visibility, \
- .reset = kvm_read_sanitised_id_reg, \
.val = 0, \
}
@@ -2407,6 +2471,16 @@ static unsigned int sve_el2_visibility(const struct kvm_vcpu *vcpu,
return __el2_visibility(vcpu, rd, sve_visibility);
}
+static unsigned int vncr_el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (el2_visibility(vcpu, rd) == 0 &&
+ kvm_has_feat(vcpu->kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
static bool access_zcr_el2(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -2426,6 +2500,59 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu,
vq = SYS_FIELD_GET(ZCR_ELx, LEN, p->regval) + 1;
vq = min(vq, vcpu_sve_max_vq(vcpu));
vcpu_write_sys_reg(vcpu, vq - 1, ZCR_EL2);
+
+ return true;
+}
+
+static bool access_gic_vtr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return write_to_read_only(vcpu, p, r);
+
+ p->regval = kvm_vgic_global_state.ich_vtr_el2;
+ p->regval &= ~(ICH_VTR_EL2_DVIM |
+ ICH_VTR_EL2_A3V |
+ ICH_VTR_EL2_IDbits);
+ p->regval |= ICH_VTR_EL2_nV4;
+
+ return true;
+}
+
+static bool access_gic_misr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return write_to_read_only(vcpu, p, r);
+
+ p->regval = vgic_v3_get_misr(vcpu);
+
+ return true;
+}
+
+static bool access_gic_eisr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return write_to_read_only(vcpu, p, r);
+
+ p->regval = vgic_v3_get_eisr(vcpu);
+
+ return true;
+}
+
+static bool access_gic_elrsr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return write_to_read_only(vcpu, p, r);
+
+ p->regval = vgic_v3_get_elrsr(vcpu);
+
return true;
}
@@ -2478,21 +2605,158 @@ static bool access_mdcr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- u64 old = __vcpu_sys_reg(vcpu, MDCR_EL2);
+ u64 hpmn, val, old = __vcpu_sys_reg(vcpu, MDCR_EL2);
- if (!access_rw(vcpu, p, r))
- return false;
+ if (!p->is_write) {
+ p->regval = old;
+ return true;
+ }
+
+ val = p->regval;
+ hpmn = FIELD_GET(MDCR_EL2_HPMN, val);
/*
- * Request a reload of the PMU to enable/disable the counters affected
- * by HPME.
+ * If HPMN is out of bounds, limit it to what we actually
+ * support. This matches the UNKNOWN definition of the field
+ * in that case, and keeps the emulation simple. Sort of.
*/
- if ((old ^ __vcpu_sys_reg(vcpu, MDCR_EL2)) & MDCR_EL2_HPME)
+ if (hpmn > vcpu->kvm->arch.nr_pmu_counters) {
+ hpmn = vcpu->kvm->arch.nr_pmu_counters;
+ u64_replace_bits(val, hpmn, MDCR_EL2_HPMN);
+ }
+
+ __vcpu_sys_reg(vcpu, MDCR_EL2) = val;
+
+ /*
+ * Request a reload of the PMU to enable/disable the counters
+ * affected by HPME.
+ */
+ if ((old ^ val) & MDCR_EL2_HPME)
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
return true;
}
+/*
+ * For historical (ahem ABI) reasons, KVM treated MIDR_EL1, REVIDR_EL1, and
+ * AIDR_EL1 as "invariant" registers, meaning userspace cannot change them.
+ * The values made visible to userspace were the register values of the boot
+ * CPU.
+ *
+ * At the same time, reads from these registers at EL1 previously were not
+ * trapped, allowing the guest to read the actual hardware value. On big-little
+ * machines, this means the VM can see different values depending on where a
+ * given vCPU got scheduled.
+ *
+ * These registers are now trapped as collateral damage from SME, and what
+ * follows attempts to give a user / guest view consistent with the existing
+ * ABI.
+ */
+static bool access_imp_id_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return write_to_read_only(vcpu, p, r);
+
+ /*
+ * Return the VM-scoped implementation ID register values if userspace
+ * has made them writable.
+ */
+ if (test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &vcpu->kvm->arch.flags))
+ return access_id_reg(vcpu, p, r);
+
+ /*
+ * Otherwise, fall back to the old behavior of returning the value of
+ * the current CPU.
+ */
+ switch (reg_to_encoding(r)) {
+ case SYS_REVIDR_EL1:
+ p->regval = read_sysreg(revidr_el1);
+ break;
+ case SYS_AIDR_EL1:
+ p->regval = read_sysreg(aidr_el1);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+
+ return true;
+}
+
+static u64 __ro_after_init boot_cpu_midr_val;
+static u64 __ro_after_init boot_cpu_revidr_val;
+static u64 __ro_after_init boot_cpu_aidr_val;
+
+static void init_imp_id_regs(void)
+{
+ boot_cpu_midr_val = read_sysreg(midr_el1);
+ boot_cpu_revidr_val = read_sysreg(revidr_el1);
+ boot_cpu_aidr_val = read_sysreg(aidr_el1);
+}
+
+static u64 reset_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ switch (reg_to_encoding(r)) {
+ case SYS_MIDR_EL1:
+ return boot_cpu_midr_val;
+ case SYS_REVIDR_EL1:
+ return boot_cpu_revidr_val;
+ case SYS_AIDR_EL1:
+ return boot_cpu_aidr_val;
+ default:
+ KVM_BUG_ON(1, vcpu->kvm);
+ return 0;
+ }
+}
+
+static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 expected;
+
+ guard(mutex)(&kvm->arch.config_lock);
+
+ expected = read_id_reg(vcpu, r);
+ if (expected == val)
+ return 0;
+
+ if (!test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags))
+ return -EINVAL;
+
+ /*
+ * Once the VM has started the ID registers are immutable. Reject the
+ * write if userspace tries to change it.
+ */
+ if (kvm_vm_has_ran_once(kvm))
+ return -EBUSY;
+
+ /*
+ * Any value is allowed for the implementation ID registers so long as
+ * it is within the writable mask.
+ */
+ if ((val & r->val) != val)
+ return -EINVAL;
+
+ kvm_set_vm_id_reg(kvm, reg_to_encoding(r), val);
+ return 0;
+}
+
+#define IMPLEMENTATION_ID(reg, mask) { \
+ SYS_DESC(SYS_##reg), \
+ .access = access_imp_id_reg, \
+ .get_user = get_id_reg, \
+ .set_user = set_imp_id_reg, \
+ .reset = reset_imp_id_reg, \
+ .val = mask, \
+ }
+
+static u64 reset_mdcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ __vcpu_sys_reg(vcpu, r->reg) = vcpu->kvm->arch.nr_pmu_counters;
+ return vcpu->kvm->arch.nr_pmu_counters;
+}
/*
* Architected system registers.
@@ -2542,7 +2806,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_DBGVCR32_EL2), undef_access, reset_val, DBGVCR32_EL2, 0 },
+ IMPLEMENTATION_ID(MIDR_EL1, GENMASK_ULL(31, 0)),
{ SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
+ IMPLEMENTATION_ID(REVIDR_EL1, GENMASK_ULL(63, 0)),
/*
* ID regs: all ID_SANITISED() entries here must have corresponding
@@ -2660,10 +2926,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_UNALLOCATED(6,7),
/* CRm=7 */
- ID_WRITABLE(ID_AA64MMFR0_EL1, ~(ID_AA64MMFR0_EL1_RES0 |
- ID_AA64MMFR0_EL1_TGRAN4_2 |
- ID_AA64MMFR0_EL1_TGRAN64_2 |
- ID_AA64MMFR0_EL1_TGRAN16_2 |
+ ID_FILTERED(ID_AA64MMFR0_EL1, id_aa64mmfr0_el1,
+ ~(ID_AA64MMFR0_EL1_RES0 |
ID_AA64MMFR0_EL1_ASIDBITS)),
ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 |
ID_AA64MMFR1_EL1_HCX |
@@ -2671,7 +2935,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR1_EL1_XNX |
ID_AA64MMFR1_EL1_VH |
ID_AA64MMFR1_EL1_VMIDBits)),
- ID_WRITABLE(ID_AA64MMFR2_EL1, ~(ID_AA64MMFR2_EL1_RES0 |
+ ID_FILTERED(ID_AA64MMFR2_EL1,
+ id_aa64mmfr2_el1, ~(ID_AA64MMFR2_EL1_RES0 |
ID_AA64MMFR2_EL1_EVT |
ID_AA64MMFR2_EL1_FWB |
ID_AA64MMFR2_EL1_IDS |
@@ -2680,7 +2945,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX |
ID_AA64MMFR3_EL1_S1PIE |
ID_AA64MMFR3_EL1_S1POE)),
- ID_SANITISED(ID_AA64MMFR4_EL1),
+ ID_WRITABLE(ID_AA64MMFR4_EL1, ID_AA64MMFR4_EL1_NV_frac),
ID_UNALLOCATED(7,5),
ID_UNALLOCATED(7,6),
ID_UNALLOCATED(7,7),
@@ -2814,6 +3079,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.set_user = set_clidr, .val = ~CLIDR_EL1_RES0 },
{ SYS_DESC(SYS_CCSIDR2_EL1), undef_access },
{ SYS_DESC(SYS_SMIDR_EL1), undef_access },
+ IMPLEMENTATION_ID(AIDR_EL1, GENMASK_ULL(63, 0)),
{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
ID_FILTERED(CTR_EL0, ctr_el0,
CTR_EL0_DIC_MASK |
@@ -2850,7 +3116,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.access = access_pmceid, .reset = NULL },
{ PMU_SYS_REG(PMCCNTR_EL0),
.access = access_pmu_evcntr, .reset = reset_unknown,
- .reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr},
+ .reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr,
+ .set_user = set_pmu_evcntr },
{ PMU_SYS_REG(PMXEVTYPER_EL0),
.access = access_pmu_evtyper, .reset = NULL },
{ PMU_SYS_REG(PMXEVCNTR_EL0),
@@ -3034,7 +3301,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
EL2_REG_VNCR(HCR_EL2, reset_hcr, 0),
- EL2_REG(MDCR_EL2, access_mdcr, reset_val, 0),
+ EL2_REG(MDCR_EL2, access_mdcr, reset_mdcr, 0),
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
EL2_REG_VNCR(HSTR_EL2, reset_val, 0),
EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0),
@@ -3054,6 +3321,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
tcr2_el2_visibility),
EL2_REG_VNCR(VTTBR_EL2, reset_val, 0),
EL2_REG_VNCR(VTCR_EL2, reset_val, 0),
+ EL2_REG_FILTERED(VNCR_EL2, bad_vncr_trap, reset_val, 0,
+ vncr_el2_visibility),
{ SYS_DESC(SYS_DACR32_EL2), undef_access, reset_unknown, DACR32_EL2 },
EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0),
@@ -3102,7 +3371,40 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(RVBAR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_RMR_EL2), undef_access },
+ EL2_REG_VNCR(ICH_AP0R0_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_AP0R1_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_AP0R2_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_AP0R3_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_AP1R0_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_AP1R1_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_AP1R2_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_AP1R3_EL2, reset_val, 0),
+
+ { SYS_DESC(SYS_ICC_SRE_EL2), access_gic_sre },
+
EL2_REG_VNCR(ICH_HCR_EL2, reset_val, 0),
+ { SYS_DESC(SYS_ICH_VTR_EL2), access_gic_vtr },
+ { SYS_DESC(SYS_ICH_MISR_EL2), access_gic_misr },
+ { SYS_DESC(SYS_ICH_EISR_EL2), access_gic_eisr },
+ { SYS_DESC(SYS_ICH_ELRSR_EL2), access_gic_elrsr },
+ EL2_REG_VNCR(ICH_VMCR_EL2, reset_val, 0),
+
+ EL2_REG_VNCR(ICH_LR0_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR1_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR2_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR3_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR4_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR5_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR6_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR7_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR8_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR9_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR10_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR11_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR12_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR13_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR14_EL2, reset_val, 0),
+ EL2_REG_VNCR(ICH_LR15_EL2, reset_val, 0),
EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0),
EL2_REG(TPIDR_EL2, access_rw, reset_val, 0),
@@ -3304,8 +3606,7 @@ static bool handle_ripas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
- u64 base, range, tg, num, scale;
- int shift;
+ u64 base, range;
if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding))
return undef_access(vcpu, p, r);
@@ -3315,26 +3616,7 @@ static bool handle_ripas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
* of the guest's S2 (different base granule size, for example), we
* decide to ignore TTL and only use the described range.
*/
- tg = FIELD_GET(GENMASK(47, 46), p->regval);
- scale = FIELD_GET(GENMASK(45, 44), p->regval);
- num = FIELD_GET(GENMASK(43, 39), p->regval);
- base = p->regval & GENMASK(36, 0);
-
- switch(tg) {
- case 1:
- shift = 12;
- break;
- case 2:
- shift = 14;
- break;
- case 3:
- default: /* IMPDEF: handle tg==0 as 64k */
- shift = 16;
- break;
- }
-
- base <<= shift;
- range = __TLBI_RANGE_PAGES(num, scale) << shift;
+ base = decode_range_tlbi(p->regval, &range, NULL);
kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr),
&(union tlbi_info) {
@@ -3400,11 +3682,22 @@ static void s2_mmu_tlbi_s1e1(struct kvm_s2_mmu *mmu,
WARN_ON(__kvm_tlbi_s1e2(mmu, info->va.addr, info->va.encoding));
}
+static bool handle_tlbi_el2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+ if (!kvm_supported_tlbi_s1e2_op(vcpu, sys_encoding))
+ return undef_access(vcpu, p, r);
+
+ kvm_handle_s1e2_tlbi(vcpu, sys_encoding, p->regval);
+ return true;
+}
+
static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
- u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
/*
* If we're here, this is because we've trapped on a EL1 TLBI
@@ -3415,6 +3708,13 @@ static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
* - HCR_EL2.E2H == 0 : a non-VHE guest
* - HCR_EL2.{E2H,TGE} == { 1, 0 } : a VHE guest in guest mode
*
+ * Another possibility is that we are invalidating the EL2 context
+ * using EL1 instructions, but that we landed here because we need
+ * additional invalidation for structures that are not held in the
+ * CPU TLBs (such as the VNCR pseudo-TLB and its EL2 mapping). In
+ * that case, we are guaranteed that HCR_EL2.{E2H,TGE} == { 1, 1 }
+ * as we don't allow an NV-capable L1 in a nVHE configuration.
+ *
* We don't expect these helpers to ever be called when running
* in a vEL1 context.
*/
@@ -3424,7 +3724,13 @@ static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (!kvm_supported_tlbi_s1e1_op(vcpu, sys_encoding))
return undef_access(vcpu, p, r);
- kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr),
+ if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) {
+ kvm_handle_s1e2_tlbi(vcpu, sys_encoding, p->regval);
+ return true;
+ }
+
+ kvm_s2_mmu_iterate_by_vmid(vcpu->kvm,
+ get_vmid(__vcpu_sys_reg(vcpu, VTTBR_EL2)),
&(union tlbi_info) {
.va = {
.addr = p->regval,
@@ -3546,16 +3852,21 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1IS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1IS, handle_ripas2e1is),
- SYS_INSN(TLBI_ALLE2OS, undef_access),
- SYS_INSN(TLBI_VAE2OS, undef_access),
+ SYS_INSN(TLBI_ALLE2OS, handle_tlbi_el2),
+ SYS_INSN(TLBI_VAE2OS, handle_tlbi_el2),
SYS_INSN(TLBI_ALLE1OS, handle_alle1is),
- SYS_INSN(TLBI_VALE2OS, undef_access),
+ SYS_INSN(TLBI_VALE2OS, handle_tlbi_el2),
SYS_INSN(TLBI_VMALLS12E1OS, handle_vmalls12e1is),
- SYS_INSN(TLBI_RVAE2IS, undef_access),
- SYS_INSN(TLBI_RVALE2IS, undef_access),
+ SYS_INSN(TLBI_RVAE2IS, handle_tlbi_el2),
+ SYS_INSN(TLBI_RVALE2IS, handle_tlbi_el2),
+ SYS_INSN(TLBI_ALLE2IS, handle_tlbi_el2),
+ SYS_INSN(TLBI_VAE2IS, handle_tlbi_el2),
SYS_INSN(TLBI_ALLE1IS, handle_alle1is),
+
+ SYS_INSN(TLBI_VALE2IS, handle_tlbi_el2),
+
SYS_INSN(TLBI_VMALLS12E1IS, handle_vmalls12e1is),
SYS_INSN(TLBI_IPAS2E1OS, handle_ipas2e1is),
SYS_INSN(TLBI_IPAS2E1, handle_ipas2e1is),
@@ -3565,11 +3876,17 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1, handle_ripas2e1is),
SYS_INSN(TLBI_RIPAS2LE1OS, handle_ripas2e1is),
- SYS_INSN(TLBI_RVAE2OS, undef_access),
- SYS_INSN(TLBI_RVALE2OS, undef_access),
- SYS_INSN(TLBI_RVAE2, undef_access),
- SYS_INSN(TLBI_RVALE2, undef_access),
+ SYS_INSN(TLBI_RVAE2OS, handle_tlbi_el2),
+ SYS_INSN(TLBI_RVALE2OS, handle_tlbi_el2),
+ SYS_INSN(TLBI_RVAE2, handle_tlbi_el2),
+ SYS_INSN(TLBI_RVALE2, handle_tlbi_el2),
+ SYS_INSN(TLBI_ALLE2, handle_tlbi_el2),
+ SYS_INSN(TLBI_VAE2, handle_tlbi_el2),
+
SYS_INSN(TLBI_ALLE1, handle_alle1is),
+
+ SYS_INSN(TLBI_VALE2, handle_tlbi_el2),
+
SYS_INSN(TLBI_VMALLS12E1, handle_vmalls12e1is),
SYS_INSN(TLBI_IPAS2E1ISNXS, handle_ipas2e1is),
@@ -3577,19 +3894,19 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1ISNXS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1ISNXS, handle_ripas2e1is),
- SYS_INSN(TLBI_ALLE2OSNXS, undef_access),
- SYS_INSN(TLBI_VAE2OSNXS, undef_access),
+ SYS_INSN(TLBI_ALLE2OSNXS, handle_tlbi_el2),
+ SYS_INSN(TLBI_VAE2OSNXS, handle_tlbi_el2),
SYS_INSN(TLBI_ALLE1OSNXS, handle_alle1is),
- SYS_INSN(TLBI_VALE2OSNXS, undef_access),
+ SYS_INSN(TLBI_VALE2OSNXS, handle_tlbi_el2),
SYS_INSN(TLBI_VMALLS12E1OSNXS, handle_vmalls12e1is),
- SYS_INSN(TLBI_RVAE2ISNXS, undef_access),
- SYS_INSN(TLBI_RVALE2ISNXS, undef_access),
- SYS_INSN(TLBI_ALLE2ISNXS, undef_access),
- SYS_INSN(TLBI_VAE2ISNXS, undef_access),
+ SYS_INSN(TLBI_RVAE2ISNXS, handle_tlbi_el2),
+ SYS_INSN(TLBI_RVALE2ISNXS, handle_tlbi_el2),
+ SYS_INSN(TLBI_ALLE2ISNXS, handle_tlbi_el2),
+ SYS_INSN(TLBI_VAE2ISNXS, handle_tlbi_el2),
SYS_INSN(TLBI_ALLE1ISNXS, handle_alle1is),
- SYS_INSN(TLBI_VALE2ISNXS, undef_access),
+ SYS_INSN(TLBI_VALE2ISNXS, handle_tlbi_el2),
SYS_INSN(TLBI_VMALLS12E1ISNXS, handle_vmalls12e1is),
SYS_INSN(TLBI_IPAS2E1OSNXS, handle_ipas2e1is),
SYS_INSN(TLBI_IPAS2E1NXS, handle_ipas2e1is),
@@ -3599,14 +3916,14 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1NXS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1NXS, handle_ripas2e1is),
SYS_INSN(TLBI_RIPAS2LE1OSNXS, handle_ripas2e1is),
- SYS_INSN(TLBI_RVAE2OSNXS, undef_access),
- SYS_INSN(TLBI_RVALE2OSNXS, undef_access),
- SYS_INSN(TLBI_RVAE2NXS, undef_access),
- SYS_INSN(TLBI_RVALE2NXS, undef_access),
- SYS_INSN(TLBI_ALLE2NXS, undef_access),
- SYS_INSN(TLBI_VAE2NXS, undef_access),
+ SYS_INSN(TLBI_RVAE2OSNXS, handle_tlbi_el2),
+ SYS_INSN(TLBI_RVALE2OSNXS, handle_tlbi_el2),
+ SYS_INSN(TLBI_RVAE2NXS, handle_tlbi_el2),
+ SYS_INSN(TLBI_RVALE2NXS, handle_tlbi_el2),
+ SYS_INSN(TLBI_ALLE2NXS, handle_tlbi_el2),
+ SYS_INSN(TLBI_VAE2NXS, handle_tlbi_el2),
SYS_INSN(TLBI_ALLE1NXS, handle_alle1is),
- SYS_INSN(TLBI_VALE2NXS, undef_access),
+ SYS_INSN(TLBI_VALE2NXS, handle_tlbi_el2),
SYS_INSN(TLBI_VMALLS12E1NXS, handle_vmalls12e1is),
};
@@ -4272,9 +4589,13 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
* Certain AArch32 ID registers are handled by rerouting to the AArch64
* system register table. Registers in the ID range where CRm=0 are
* excluded from this scheme as they do not trivially map into AArch64
- * system register encodings.
+ * system register encodings, except for AIDR/REVIDR.
*/
- if (params.Op1 == 0 && params.CRn == 0 && params.CRm)
+ if (params.Op1 == 0 && params.CRn == 0 &&
+ (params.CRm || params.Op2 == 6 /* REVIDR */))
+ return kvm_emulate_cp15_id_reg(vcpu, &params);
+ if (params.Op1 == 1 && params.CRn == 0 &&
+ params.CRm == 0 && params.Op2 == 7 /* AIDR */)
return kvm_emulate_cp15_id_reg(vcpu, &params);
return kvm_handle_cp_32(vcpu, &params, cp15_regs, ARRAY_SIZE(cp15_regs));
@@ -4473,6 +4794,9 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
}
set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
+
+ if (kvm_vcpu_has_pmu(vcpu))
+ kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
}
/**
@@ -4578,65 +4902,6 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
return r;
}
-/*
- * These are the invariant sys_reg registers: we let the guest see the
- * host versions of these, so they're part of the guest state.
- *
- * A future CPU may provide a mechanism to present different values to
- * the guest, or a future kvm may trap them.
- */
-
-#define FUNCTION_INVARIANT(reg) \
- static u64 reset_##reg(struct kvm_vcpu *v, \
- const struct sys_reg_desc *r) \
- { \
- ((struct sys_reg_desc *)r)->val = read_sysreg(reg); \
- return ((struct sys_reg_desc *)r)->val; \
- }
-
-FUNCTION_INVARIANT(midr_el1)
-FUNCTION_INVARIANT(revidr_el1)
-FUNCTION_INVARIANT(aidr_el1)
-
-/* ->val is filled in by kvm_sys_reg_table_init() */
-static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = {
- { SYS_DESC(SYS_MIDR_EL1), NULL, reset_midr_el1 },
- { SYS_DESC(SYS_REVIDR_EL1), NULL, reset_revidr_el1 },
- { SYS_DESC(SYS_AIDR_EL1), NULL, reset_aidr_el1 },
-};
-
-static int get_invariant_sys_reg(u64 id, u64 __user *uaddr)
-{
- const struct sys_reg_desc *r;
-
- r = get_reg_by_id(id, invariant_sys_regs,
- ARRAY_SIZE(invariant_sys_regs));
- if (!r)
- return -ENOENT;
-
- return put_user(r->val, uaddr);
-}
-
-static int set_invariant_sys_reg(u64 id, u64 __user *uaddr)
-{
- const struct sys_reg_desc *r;
- u64 val;
-
- r = get_reg_by_id(id, invariant_sys_regs,
- ARRAY_SIZE(invariant_sys_regs));
- if (!r)
- return -ENOENT;
-
- if (get_user(val, uaddr))
- return -EFAULT;
-
- /* This is what we mean by invariant: you can't change it. */
- if (r->val != val)
- return -EINVAL;
-
- return 0;
-}
-
static int demux_c15_get(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
{
u32 val;
@@ -4718,15 +4983,10 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
void __user *uaddr = (void __user *)(unsigned long)reg->addr;
- int err;
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
return demux_c15_get(vcpu, reg->id, uaddr);
- err = get_invariant_sys_reg(reg->id, uaddr);
- if (err != -ENOENT)
- return err;
-
return kvm_sys_reg_get_user(vcpu, reg,
sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
}
@@ -4762,15 +5022,10 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
void __user *uaddr = (void __user *)(unsigned long)reg->addr;
- int err;
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
return demux_c15_set(vcpu, reg->id, uaddr);
- err = set_invariant_sys_reg(reg->id, uaddr);
- if (err != -ENOENT)
- return err;
-
return kvm_sys_reg_set_user(vcpu, reg,
sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
}
@@ -4859,23 +5114,14 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu)
{
- return ARRAY_SIZE(invariant_sys_regs)
- + num_demux_regs()
+ return num_demux_regs()
+ walk_sys_regs(vcpu, (u64 __user *)NULL);
}
int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
- unsigned int i;
int err;
- /* Then give them all the invariant registers' indices. */
- for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) {
- if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices))
- return -EFAULT;
- uindices++;
- }
-
err = walk_sys_regs(vcpu, uindices);
if (err < 0)
return err;
@@ -4971,88 +5217,18 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
mutex_lock(&kvm->arch.config_lock);
vcpu_set_hcr(vcpu);
vcpu_set_ich_hcr(vcpu);
-
- if (cpus_have_final_cap(ARM64_HAS_HCX)) {
- /*
- * In general, all HCRX_EL2 bits are gated by a feature.
- * The only reason we can set SMPME without checking any
- * feature is that its effects are not directly observable
- * from the guest.
- */
- vcpu->arch.hcrx_el2 = HCRX_EL2_SMPME;
-
- if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
- vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
-
- if (kvm_has_tcr2(kvm))
- vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En;
-
- if (kvm_has_fpmr(kvm))
- vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM;
- }
+ vcpu_set_hcrx(vcpu);
if (test_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags))
goto out;
- kvm->arch.fgu[HFGxTR_GROUP] = (HFGxTR_EL2_nAMAIR2_EL1 |
- HFGxTR_EL2_nMAIR2_EL1 |
- HFGxTR_EL2_nS2POR_EL1 |
- HFGxTR_EL2_nACCDATA_EL1 |
- HFGxTR_EL2_nSMPRI_EL1_MASK |
- HFGxTR_EL2_nTPIDR2_EL0_MASK);
-
- if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
- kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_TLBIRVAALE1OS|
- HFGITR_EL2_TLBIRVALE1OS |
- HFGITR_EL2_TLBIRVAAE1OS |
- HFGITR_EL2_TLBIRVAE1OS |
- HFGITR_EL2_TLBIVAALE1OS |
- HFGITR_EL2_TLBIVALE1OS |
- HFGITR_EL2_TLBIVAAE1OS |
- HFGITR_EL2_TLBIASIDE1OS |
- HFGITR_EL2_TLBIVAE1OS |
- HFGITR_EL2_TLBIVMALLE1OS);
-
- if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE))
- kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_TLBIRVAALE1 |
- HFGITR_EL2_TLBIRVALE1 |
- HFGITR_EL2_TLBIRVAAE1 |
- HFGITR_EL2_TLBIRVAE1 |
- HFGITR_EL2_TLBIRVAALE1IS|
- HFGITR_EL2_TLBIRVALE1IS |
- HFGITR_EL2_TLBIRVAAE1IS |
- HFGITR_EL2_TLBIRVAE1IS |
- HFGITR_EL2_TLBIRVAALE1OS|
- HFGITR_EL2_TLBIRVALE1OS |
- HFGITR_EL2_TLBIRVAAE1OS |
- HFGITR_EL2_TLBIRVAE1OS);
-
- if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, ATS1A, IMP))
- kvm->arch.fgu[HFGITR_GROUP] |= HFGITR_EL2_ATS1E1A;
-
- if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN2))
- kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_ATS1E1RP |
- HFGITR_EL2_ATS1E1WP);
-
- if (!kvm_has_s1pie(kvm))
- kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 |
- HFGxTR_EL2_nPIR_EL1);
-
- if (!kvm_has_s1poe(kvm))
- kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPOR_EL1 |
- HFGxTR_EL2_nPOR_EL0);
-
- if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP))
- kvm->arch.fgu[HAFGRTR_GROUP] |= ~(HAFGRTR_EL2_RES0 |
- HAFGRTR_EL2_RES1);
-
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP)) {
- kvm->arch.fgu[HDFGRTR_GROUP] |= (HDFGRTR_EL2_nBRBDATA |
- HDFGRTR_EL2_nBRBCTL |
- HDFGRTR_EL2_nBRBIDR);
- kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_nBRBINJ |
- HFGITR_EL2_nBRBIALL);
- }
+ compute_fgu(kvm, HFGRTR_GROUP);
+ compute_fgu(kvm, HFGITR_GROUP);
+ compute_fgu(kvm, HDFGRTR_GROUP);
+ compute_fgu(kvm, HAFGRTR_GROUP);
+ compute_fgu(kvm, HFGRTR2_GROUP);
+ compute_fgu(kvm, HFGITR2_GROUP);
+ compute_fgu(kvm, HDFGRTR2_GROUP);
set_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags);
out:
@@ -5101,18 +5277,17 @@ int __init kvm_sys_reg_table_init(void)
valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true);
valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true);
valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true);
- valid &= check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false);
valid &= check_sysreg_table(sys_insn_descs, ARRAY_SIZE(sys_insn_descs), false);
if (!valid)
return -EINVAL;
- /* We abuse the reset function to overwrite the table itself. */
- for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
- invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);
+ init_imp_id_regs();
ret = populate_nv_trap_config();
+ check_feature_map();
+
for (i = 0; !ret && i < ARRAY_SIZE(sys_reg_descs); i++)
ret = populate_sysreg_config(sys_reg_descs + i, i);
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 1d94ed6efad2..cc6338d38766 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -247,4 +247,14 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu);
CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \
Op2(sys_reg_Op2(reg))
+#define ID_REG_LIMIT_FIELD_ENUM(val, reg, field, limit) \
+({ \
+ u64 __f_val = FIELD_GET(reg##_##field##_MASK, val); \
+ (val) &= ~reg##_##field##_MASK; \
+ (val) |= FIELD_PREP(reg##_##field##_MASK, \
+ min(__f_val, \
+ (u64)SYS_FIELD_VALUE(reg, field, limit))); \
+ (val); \
+})
+
#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h
index c18c1a95831e..9c60f6465c78 100644
--- a/arch/arm64/kvm/trace_arm.h
+++ b/arch/arm64/kvm/trace_arm.h
@@ -176,7 +176,7 @@ TRACE_EVENT(kvm_set_way_flush,
),
TP_printk("S/W flush at 0x%016lx (cache %s)",
- __entry->vcpu_pc, __entry->cache ? "on" : "off")
+ __entry->vcpu_pc, str_on_off(__entry->cache))
);
TRACE_EVENT(kvm_toggle_cache,
@@ -196,8 +196,8 @@ TRACE_EVENT(kvm_toggle_cache,
),
TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
- __entry->vcpu_pc, __entry->was ? "on" : "off",
- __entry->now ? "on" : "off")
+ __entry->vcpu_pc, str_on_off(__entry->was),
+ str_on_off(__entry->now))
);
/*
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index 9e7c486b48c2..5eacb4b3250a 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -35,12 +35,12 @@ static int set_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
vgic_v3_cpu->num_id_bits = host_id_bits;
- host_seis = FIELD_GET(ICH_VTR_SEIS_MASK, kvm_vgic_global_state.ich_vtr_el2);
+ host_seis = FIELD_GET(ICH_VTR_EL2_SEIS, kvm_vgic_global_state.ich_vtr_el2);
seis = FIELD_GET(ICC_CTLR_EL1_SEIS_MASK, val);
if (host_seis != seis)
return -EINVAL;
- host_a3v = FIELD_GET(ICH_VTR_A3V_MASK, kvm_vgic_global_state.ich_vtr_el2);
+ host_a3v = FIELD_GET(ICH_VTR_EL2_A3V, kvm_vgic_global_state.ich_vtr_el2);
a3v = FIELD_GET(ICC_CTLR_EL1_A3V_MASK, val);
if (host_a3v != a3v)
return -EINVAL;
@@ -68,10 +68,10 @@ static int get_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
val |= FIELD_PREP(ICC_CTLR_EL1_PRI_BITS_MASK, vgic_v3_cpu->num_pri_bits - 1);
val |= FIELD_PREP(ICC_CTLR_EL1_ID_BITS_MASK, vgic_v3_cpu->num_id_bits);
val |= FIELD_PREP(ICC_CTLR_EL1_SEIS_MASK,
- FIELD_GET(ICH_VTR_SEIS_MASK,
+ FIELD_GET(ICH_VTR_EL2_SEIS,
kvm_vgic_global_state.ich_vtr_el2));
val |= FIELD_PREP(ICC_CTLR_EL1_A3V_MASK,
- FIELD_GET(ICH_VTR_A3V_MASK, kvm_vgic_global_state.ich_vtr_el2));
+ FIELD_GET(ICH_VTR_EL2_A3V, kvm_vgic_global_state.ich_vtr_el2));
/*
* The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
* Extract it directly using ICC_CTLR_EL1 reg definitions.
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index afb018528bc3..f8425f381de9 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -320,3 +320,227 @@ void vgic_debug_init(struct kvm *kvm)
void vgic_debug_destroy(struct kvm *kvm)
{
}
+
+/**
+ * struct vgic_its_iter - Iterator for traversing VGIC ITS device tables.
+ * @dev: Pointer to the current its_device being processed.
+ * @ite: Pointer to the current its_ite within the device being processed.
+ *
+ * This structure is used to maintain the current position during iteration
+ * over the ITS device tables. It holds pointers to both the current device
+ * and the current ITE within that device.
+ */
+struct vgic_its_iter {
+ struct its_device *dev;
+ struct its_ite *ite;
+};
+
+/**
+ * end_of_iter - Checks if the iterator has reached the end.
+ * @iter: The iterator to check.
+ *
+ * When the iterator completed processing the final ITE in the last device
+ * table, it was marked to indicate the end of iteration by setting its
+ * device and ITE pointers to NULL.
+ * This function checks whether the iterator was marked as end.
+ *
+ * Return: True if the iterator is marked as end, false otherwise.
+ */
+static inline bool end_of_iter(struct vgic_its_iter *iter)
+{
+ return !iter->dev && !iter->ite;
+}
+
+/**
+ * vgic_its_iter_next - Advances the iterator to the next entry in the ITS tables.
+ * @its: The VGIC ITS structure.
+ * @iter: The iterator to advance.
+ *
+ * This function moves the iterator to the next ITE within the current device,
+ * or to the first ITE of the next device if the current ITE is the last in
+ * the device. If the current device is the last device, the iterator is set
+ * to indicate the end of iteration.
+ */
+static void vgic_its_iter_next(struct vgic_its *its, struct vgic_its_iter *iter)
+{
+ struct its_device *dev = iter->dev;
+ struct its_ite *ite = iter->ite;
+
+ if (!ite || list_is_last(&ite->ite_list, &dev->itt_head)) {
+ if (list_is_last(&dev->dev_list, &its->device_list)) {
+ dev = NULL;
+ ite = NULL;
+ } else {
+ dev = list_next_entry(dev, dev_list);
+ ite = list_first_entry_or_null(&dev->itt_head,
+ struct its_ite,
+ ite_list);
+ }
+ } else {
+ ite = list_next_entry(ite, ite_list);
+ }
+
+ iter->dev = dev;
+ iter->ite = ite;
+}
+
+/**
+ * vgic_its_debug_start - Start function for the seq_file interface.
+ * @s: The seq_file structure.
+ * @pos: The starting position (offset).
+ *
+ * This function initializes the iterator to the beginning of the ITS tables
+ * and advances it to the specified position. It acquires the its_lock mutex
+ * to protect shared data.
+ *
+ * Return: An iterator pointer on success, NULL if no devices are found or
+ * the end of the list is reached, or ERR_PTR(-ENOMEM) on memory
+ * allocation failure.
+ */
+static void *vgic_its_debug_start(struct seq_file *s, loff_t *pos)
+{
+ struct vgic_its *its = s->private;
+ struct vgic_its_iter *iter;
+ struct its_device *dev;
+ loff_t offset = *pos;
+
+ mutex_lock(&its->its_lock);
+
+ dev = list_first_entry_or_null(&its->device_list,
+ struct its_device, dev_list);
+ if (!dev)
+ return NULL;
+
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return ERR_PTR(-ENOMEM);
+
+ iter->dev = dev;
+ iter->ite = list_first_entry_or_null(&dev->itt_head,
+ struct its_ite, ite_list);
+
+ while (!end_of_iter(iter) && offset--)
+ vgic_its_iter_next(its, iter);
+
+ if (end_of_iter(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+/**
+ * vgic_its_debug_next - Next function for the seq_file interface.
+ * @s: The seq_file structure.
+ * @v: The current iterator.
+ * @pos: The current position (offset).
+ *
+ * This function advances the iterator to the next entry and increments the
+ * position.
+ *
+ * Return: An iterator pointer on success, or NULL if the end of the list is
+ * reached.
+ */
+static void *vgic_its_debug_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct vgic_its *its = s->private;
+ struct vgic_its_iter *iter = v;
+
+ ++*pos;
+ vgic_its_iter_next(its, iter);
+
+ if (end_of_iter(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+ return iter;
+}
+
+/**
+ * vgic_its_debug_stop - Stop function for the seq_file interface.
+ * @s: The seq_file structure.
+ * @v: The current iterator.
+ *
+ * This function frees the iterator and releases the its_lock mutex.
+ */
+static void vgic_its_debug_stop(struct seq_file *s, void *v)
+{
+ struct vgic_its *its = s->private;
+ struct vgic_its_iter *iter = v;
+
+ if (!IS_ERR_OR_NULL(iter))
+ kfree(iter);
+ mutex_unlock(&its->its_lock);
+}
+
+/**
+ * vgic_its_debug_show - Show function for the seq_file interface.
+ * @s: The seq_file structure.
+ * @v: The current iterator.
+ *
+ * This function formats and prints the ITS table entry information to the
+ * seq_file output.
+ *
+ * Return: 0 on success.
+ */
+static int vgic_its_debug_show(struct seq_file *s, void *v)
+{
+ struct vgic_its_iter *iter = v;
+ struct its_device *dev = iter->dev;
+ struct its_ite *ite = iter->ite;
+
+ if (list_is_first(&ite->ite_list, &dev->itt_head)) {
+ seq_printf(s, "\n");
+ seq_printf(s, "Device ID: 0x%x, Event ID Range: [0 - %llu]\n",
+ dev->device_id, BIT_ULL(dev->num_eventid_bits) - 1);
+ seq_printf(s, "EVENT_ID INTID HWINTID TARGET COL_ID HW\n");
+ seq_printf(s, "-----------------------------------------------\n");
+ }
+
+ if (ite && ite->irq && ite->collection) {
+ seq_printf(s, "%8u %8u %8u %8u %8u %2d\n",
+ ite->event_id, ite->irq->intid, ite->irq->hwintid,
+ ite->collection->target_addr,
+ ite->collection->collection_id, ite->irq->hw);
+ }
+
+ return 0;
+}
+
+static const struct seq_operations vgic_its_debug_sops = {
+ .start = vgic_its_debug_start,
+ .next = vgic_its_debug_next,
+ .stop = vgic_its_debug_stop,
+ .show = vgic_its_debug_show
+};
+
+DEFINE_SEQ_ATTRIBUTE(vgic_its_debug);
+
+/**
+ * vgic_its_debug_init - Initializes the debugfs interface for VGIC ITS.
+ * @dev: The KVM device structure.
+ *
+ * This function creates a debugfs file named "vgic-its-state@%its_base"
+ * to expose the ITS table information.
+ *
+ * Return: 0 on success.
+ */
+int vgic_its_debug_init(struct kvm_device *dev)
+{
+ struct vgic_its *its = dev->private;
+ char *name;
+
+ name = kasprintf(GFP_KERNEL, "vgic-its-state@%llx", (u64)its->vgic_its_base);
+ if (!name)
+ return -ENOMEM;
+
+ debugfs_create_file(name, 0444, dev->kvm->debugfs_dentry, its, &vgic_its_debug_fops);
+
+ kfree(name);
+ return 0;
+}
+
+void vgic_its_debug_destroy(struct kvm_device *dev)
+{
+}
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 775461cf2d2d..1f33e71c2a73 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -198,6 +198,27 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
return 0;
}
+/* Default GICv3 Maintenance Interrupt INTID, as per SBSA */
+#define DEFAULT_MI_INTID 25
+
+int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ guard(mutex)(&vcpu->kvm->arch.config_lock);
+
+ /*
+ * Matching the tradition established with the timers, provide
+ * a default PPI for the maintenance interrupt. It makes
+ * things easier to reason about.
+ */
+ if (vcpu->kvm->arch.vgic.mi_intid == 0)
+ vcpu->kvm->arch.vgic.mi_intid = DEFAULT_MI_INTID;
+ ret = kvm_vgic_set_owner(vcpu, vcpu->kvm->arch.vgic.mi_intid, vcpu);
+
+ return ret;
+}
+
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -588,12 +609,20 @@ void kvm_vgic_cpu_down(void)
static irqreturn_t vgic_maintenance_handler(int irq, void *data)
{
+ struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data;
+
/*
* We cannot rely on the vgic maintenance interrupt to be
* delivered synchronously. This means we can only use it to
* exit the VM, and we perform the handling of EOIed
* interrupts on the exit path (see vgic_fold_lr_state).
+ *
+ * Of course, NV throws a wrench in this plan, and needs
+ * something special.
*/
+ if (vcpu && vgic_state_is_nested(vcpu))
+ vgic_v3_handle_nested_maint_irq(vcpu);
+
return IRQ_HANDLED;
}
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index fb96802799c6..569f9da9049f 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -154,36 +154,6 @@ out_unlock:
return irq;
}
-struct its_device {
- struct list_head dev_list;
-
- /* the head for the list of ITTEs */
- struct list_head itt_head;
- u32 num_eventid_bits;
- gpa_t itt_addr;
- u32 device_id;
-};
-
-#define COLLECTION_NOT_MAPPED ((u32)~0)
-
-struct its_collection {
- struct list_head coll_list;
-
- u32 collection_id;
- u32 target_addr;
-};
-
-#define its_is_collection_mapped(coll) ((coll) && \
- ((coll)->target_addr != COLLECTION_NOT_MAPPED))
-
-struct its_ite {
- struct list_head ite_list;
-
- struct vgic_irq *irq;
- struct its_collection *collection;
- u32 event_id;
-};
-
/**
* struct vgic_its_abi - ITS abi ops and settings
* @cte_esz: collection table entry size
@@ -1938,6 +1908,8 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
mutex_lock(&its->its_lock);
+ vgic_its_debug_destroy(kvm_dev);
+
vgic_its_free_device_list(kvm, its);
vgic_its_free_collection_list(kvm, its);
vgic_its_invalidate_cache(its);
@@ -2771,7 +2743,12 @@ static int vgic_its_set_attr(struct kvm_device *dev,
if (ret)
return ret;
- return vgic_register_its_iodev(dev->kvm, its, addr);
+ ret = vgic_register_its_iodev(dev->kvm, its, addr);
+ if (ret)
+ return ret;
+
+ return vgic_its_debug_init(dev);
+
}
case KVM_DEV_ARM_VGIC_GRP_CTRL:
return vgic_its_ctrl(dev->kvm, its, attr->attr);
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 5f4f57aaa23e..359094f68c23 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -303,6 +303,12 @@ static int vgic_get_common_attr(struct kvm_device *dev,
VGIC_NR_PRIVATE_IRQS, uaddr);
break;
}
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+
+ r = put_user(dev->kvm->arch.vgic.mi_intid, uaddr);
+ break;
+ }
}
return r;
@@ -517,7 +523,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
struct vgic_reg_attr reg_attr;
gpa_t addr;
struct kvm_vcpu *vcpu;
- bool uaccess;
+ bool uaccess, post_init = true;
u32 val;
int ret;
@@ -533,6 +539,9 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
/* Sysregs uaccess is performed by the sysreg handling code */
uaccess = false;
break;
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
+ post_init = false;
+ fallthrough;
default:
uaccess = true;
}
@@ -552,7 +561,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->arch.config_lock);
- if (unlikely(!vgic_initialized(dev->kvm))) {
+ if (post_init != vgic_initialized(dev->kvm)) {
ret = -EBUSY;
goto out;
}
@@ -582,6 +591,19 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
}
break;
}
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
+ if (!is_write) {
+ val = dev->kvm->arch.vgic.mi_intid;
+ ret = 0;
+ break;
+ }
+
+ ret = -EINVAL;
+ if ((val < VGIC_NR_PRIVATE_IRQS) && (val >= VGIC_NR_SGIS)) {
+ dev->kvm->arch.vgic.mi_intid = val;
+ ret = 0;
+ }
+ break;
default:
ret = -EINVAL;
break;
@@ -608,6 +630,7 @@ static int vgic_v3_set_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO:
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
return vgic_v3_attr_regs_access(dev, attr, true);
default:
return vgic_set_common_attr(dev, attr);
@@ -622,6 +645,7 @@ static int vgic_v3_get_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO:
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
return vgic_v3_attr_regs_access(dev, attr, false);
default:
return vgic_get_common_attr(dev, attr);
@@ -645,6 +669,7 @@ static int vgic_v3_has_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
return vgic_v3_has_attr_regs(dev, attr);
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
return 0;
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
if (((attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >>
diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c
new file mode 100644
index 000000000000..4f6954c30674
--- /dev/null
+++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <kvm/arm_vgic.h>
+
+#include <asm/kvm_arm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_nested.h>
+
+#include "vgic.h"
+
+#define ICH_LRN(n) (ICH_LR0_EL2 + (n))
+#define ICH_AP0RN(n) (ICH_AP0R0_EL2 + (n))
+#define ICH_AP1RN(n) (ICH_AP1R0_EL2 + (n))
+
+struct mi_state {
+ u16 eisr;
+ u16 elrsr;
+ bool pend;
+};
+
+/*
+ * The shadow registers loaded to the hardware when running a L2 guest
+ * with the virtual IMO/FMO bits set.
+ */
+struct shadow_if {
+ struct vgic_v3_cpu_if cpuif;
+ unsigned long lr_map;
+};
+
+static DEFINE_PER_CPU(struct shadow_if, shadow_if);
+
+/*
+ * Nesting GICv3 support
+ *
+ * On a non-nesting VM (only running at EL0/EL1), the host hypervisor
+ * completely controls the interrupts injected via the list registers.
+ * Consequently, most of the state that is modified by the guest (by ACK-ing
+ * and EOI-ing interrupts) is synced by KVM on each entry/exit, so that we
+ * keep a semi-consistent view of the interrupts.
+ *
+ * This still applies for a NV guest, but only while "InHost" (either
+ * running at EL2, or at EL0 with HCR_EL2.{E2H.TGE}=={1,1}.
+ *
+ * When running a L2 guest ("not InHost"), things are radically different,
+ * as the L1 guest is in charge of provisioning the interrupts via its own
+ * view of the ICH_LR*_EL2 registers, which conveniently live in the VNCR
+ * page. This means that the flow described above does work (there is no
+ * state to rebuild in the L0 hypervisor), and that most things happed on L2
+ * load/put:
+ *
+ * - on L2 load: move the in-memory L1 vGIC configuration into a shadow,
+ * per-CPU data structure that is used to populate the actual LRs. This is
+ * an extra copy that we could avoid, but life is short. In the process,
+ * we remap any interrupt that has the HW bit set to the mapped interrupt
+ * on the host, should the host consider it a HW one. This allows the HW
+ * deactivation to take its course, such as for the timer.
+ *
+ * - on L2 put: perform the inverse transformation, so that the result of L2
+ * running becomes visible to L1 in the VNCR-accessible registers.
+ *
+ * - there is nothing to do on L2 entry, as everything will have happened
+ * on load. However, this is the point where we detect that an interrupt
+ * targeting L1 and prepare the grand switcheroo.
+ *
+ * - on L2 exit: emulate the HW bit, and deactivate corresponding the L1
+ * interrupt. The L0 active state will be cleared by the HW if the L1
+ * interrupt was itself backed by a HW interrupt.
+ *
+ * Maintenance Interrupt (MI) management:
+ *
+ * Since the L2 guest runs the vgic in its full glory, MIs get delivered and
+ * used as a handover point between L2 and L1.
+ *
+ * - on delivery of a MI to L0 while L2 is running: make the L1 MI pending,
+ * and let it rip. This will initiate a vcpu_put() on L2, and allow L1 to
+ * run and process the MI.
+ *
+ * - L1 MI is a fully virtual interrupt, not linked to the host's MI. Its
+ * state must be computed at each entry/exit of the guest, much like we do
+ * it for the PMU interrupt.
+ *
+ * - because most of the ICH_*_EL2 registers live in the VNCR page, the
+ * quality of emulation is poor: L1 can setup the vgic so that an MI would
+ * immediately fire, and not observe anything until the next exit. Trying
+ * to read ICH_MISR_EL2 would do the trick, for example.
+ *
+ * System register emulation:
+ *
+ * We get two classes of registers:
+ *
+ * - those backed by memory (LRs, APRs, HCR, VMCR): L1 can freely access
+ * them, and L0 doesn't see a thing.
+ *
+ * - those that always trap (ELRSR, EISR, MISR): these are status registers
+ * that are built on the fly based on the in-memory state.
+ *
+ * Only L1 can access the ICH_*_EL2 registers. A non-NV L2 obviously cannot,
+ * and a NV L2 would either access the VNCR page provided by L1 (memory
+ * based registers), or see the access redirected to L1 (registers that
+ * trap) thanks to NV being set by L1.
+ */
+
+bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
+{
+ u64 xmo;
+
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO);
+ WARN_ONCE(xmo && xmo != (HCR_IMO | HCR_FMO),
+ "Separate virtual IRQ/FIQ settings not supported\n");
+
+ return !!xmo;
+ }
+
+ return false;
+}
+
+static struct shadow_if *get_shadow_if(void)
+{
+ return this_cpu_ptr(&shadow_if);
+}
+
+static bool lr_triggers_eoi(u64 lr)
+{
+ return !(lr & (ICH_LR_STATE | ICH_LR_HW)) && (lr & ICH_LR_EOI);
+}
+
+static void vgic_compute_mi_state(struct kvm_vcpu *vcpu, struct mi_state *mi_state)
+{
+ u16 eisr = 0, elrsr = 0;
+ bool pend = false;
+
+ for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
+ u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
+
+ if (lr_triggers_eoi(lr))
+ eisr |= BIT(i);
+ if (!(lr & ICH_LR_STATE))
+ elrsr |= BIT(i);
+ pend |= (lr & ICH_LR_PENDING_BIT);
+ }
+
+ mi_state->eisr = eisr;
+ mi_state->elrsr = elrsr;
+ mi_state->pend = pend;
+}
+
+u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu)
+{
+ struct mi_state mi_state;
+
+ vgic_compute_mi_state(vcpu, &mi_state);
+ return mi_state.eisr;
+}
+
+u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu)
+{
+ struct mi_state mi_state;
+
+ vgic_compute_mi_state(vcpu, &mi_state);
+ return mi_state.elrsr;
+}
+
+u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
+{
+ struct mi_state mi_state;
+ u64 reg = 0, hcr, vmcr;
+
+ hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
+ vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);
+
+ vgic_compute_mi_state(vcpu, &mi_state);
+
+ if (mi_state.eisr)
+ reg |= ICH_MISR_EL2_EOI;
+
+ if (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_UIE) {
+ int used_lrs = kvm_vgic_global_state.nr_lr;
+
+ used_lrs -= hweight16(mi_state.elrsr);
+ reg |= (used_lrs <= 1) ? ICH_MISR_EL2_U : 0;
+ }
+
+ if ((hcr & ICH_HCR_EL2_LRENPIE) && FIELD_GET(ICH_HCR_EL2_EOIcount_MASK, hcr))
+ reg |= ICH_MISR_EL2_LRENP;
+
+ if ((hcr & ICH_HCR_EL2_NPIE) && !mi_state.pend)
+ reg |= ICH_MISR_EL2_NP;
+
+ if ((hcr & ICH_HCR_EL2_VGrp0EIE) && (vmcr & ICH_VMCR_ENG0_MASK))
+ reg |= ICH_MISR_EL2_VGrp0E;
+
+ if ((hcr & ICH_HCR_EL2_VGrp0DIE) && !(vmcr & ICH_VMCR_ENG0_MASK))
+ reg |= ICH_MISR_EL2_VGrp0D;
+
+ if ((hcr & ICH_HCR_EL2_VGrp1EIE) && (vmcr & ICH_VMCR_ENG1_MASK))
+ reg |= ICH_MISR_EL2_VGrp1E;
+
+ if ((hcr & ICH_HCR_EL2_VGrp1DIE) && !(vmcr & ICH_VMCR_ENG1_MASK))
+ reg |= ICH_MISR_EL2_VGrp1D;
+
+ return reg;
+}
+
+/*
+ * For LRs which have HW bit set such as timer interrupts, we modify them to
+ * have the host hardware interrupt number instead of the virtual one programmed
+ * by the guest hypervisor.
+ */
+static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
+ struct vgic_v3_cpu_if *s_cpu_if)
+{
+ unsigned long lr_map = 0;
+ int index = 0;
+
+ for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
+ u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
+ struct vgic_irq *irq;
+
+ if (!(lr & ICH_LR_STATE))
+ lr = 0;
+
+ if (!(lr & ICH_LR_HW))
+ goto next;
+
+ /* We have the HW bit set, check for validity of pINTID */
+ irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
+ if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) {
+ /* There was no real mapping, so nuke the HW bit */
+ lr &= ~ICH_LR_HW;
+ if (irq)
+ vgic_put_irq(vcpu->kvm, irq);
+ goto next;
+ }
+
+ /* Translate the virtual mapping to the real one */
+ lr &= ~ICH_LR_PHYS_ID_MASK;
+ lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
+
+ vgic_put_irq(vcpu->kvm, irq);
+
+next:
+ s_cpu_if->vgic_lr[index] = lr;
+ if (lr) {
+ lr_map |= BIT(i);
+ index++;
+ }
+ }
+
+ container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map;
+ s_cpu_if->used_lrs = index;
+}
+
+void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
+{
+ struct shadow_if *shadow_if = get_shadow_if();
+ int i, index = 0;
+
+ for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
+ u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
+ struct vgic_irq *irq;
+
+ if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE))
+ goto next;
+
+ /*
+ * If we had a HW lr programmed by the guest hypervisor, we
+ * need to emulate the HW effect between the guest hypervisor
+ * and the nested guest.
+ */
+ irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
+ if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
+ goto next;
+
+ lr = __gic_v3_get_lr(index);
+ if (!(lr & ICH_LR_STATE))
+ irq->active = false;
+
+ vgic_put_irq(vcpu->kvm, irq);
+ next:
+ index++;
+ }
+}
+
+static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu,
+ struct vgic_v3_cpu_if *s_cpu_if)
+{
+ struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ u64 val = 0;
+ int i;
+
+ /*
+ * If we're on a system with a broken vgic that requires
+ * trapping, propagate the trapping requirements.
+ *
+ * Ah, the smell of rotten fruits...
+ */
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap))
+ val = host_if->vgic_hcr & (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
+ ICH_HCR_EL2_TC | ICH_HCR_EL2_TDIR);
+ s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) | val;
+ s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);
+ s_cpu_if->vgic_sre = host_if->vgic_sre;
+
+ for (i = 0; i < 4; i++) {
+ s_cpu_if->vgic_ap0r[i] = __vcpu_sys_reg(vcpu, ICH_AP0RN(i));
+ s_cpu_if->vgic_ap1r[i] = __vcpu_sys_reg(vcpu, ICH_AP1RN(i));
+ }
+
+ vgic_v3_create_shadow_lr(vcpu, s_cpu_if);
+}
+
+void vgic_v3_load_nested(struct kvm_vcpu *vcpu)
+{
+ struct shadow_if *shadow_if = get_shadow_if();
+ struct vgic_v3_cpu_if *cpu_if = &shadow_if->cpuif;
+
+ BUG_ON(!vgic_state_is_nested(vcpu));
+
+ vgic_v3_create_shadow_state(vcpu, cpu_if);
+
+ __vgic_v3_restore_vmcr_aprs(cpu_if);
+ __vgic_v3_activate_traps(cpu_if);
+
+ __vgic_v3_restore_state(cpu_if);
+
+ /*
+ * Propagate the number of used LRs for the benefit of the HYP
+ * GICv3 emulation code. Yes, this is a pretty sorry hack.
+ */
+ vcpu->arch.vgic_cpu.vgic_v3.used_lrs = cpu_if->used_lrs;
+}
+
+void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
+{
+ struct shadow_if *shadow_if = get_shadow_if();
+ struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif;
+ u64 val;
+ int i;
+
+ __vgic_v3_save_vmcr_aprs(s_cpu_if);
+ __vgic_v3_deactivate_traps(s_cpu_if);
+ __vgic_v3_save_state(s_cpu_if);
+
+ /*
+ * Translate the shadow state HW fields back to the virtual ones
+ * before copying the shadow struct back to the nested one.
+ */
+ val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
+ val &= ~ICH_HCR_EL2_EOIcount_MASK;
+ val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK);
+ __vcpu_sys_reg(vcpu, ICH_HCR_EL2) = val;
+ __vcpu_sys_reg(vcpu, ICH_VMCR_EL2) = s_cpu_if->vgic_vmcr;
+
+ for (i = 0; i < 4; i++) {
+ __vcpu_sys_reg(vcpu, ICH_AP0RN(i)) = s_cpu_if->vgic_ap0r[i];
+ __vcpu_sys_reg(vcpu, ICH_AP1RN(i)) = s_cpu_if->vgic_ap1r[i];
+ }
+
+ for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
+ val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
+
+ val &= ~ICH_LR_STATE;
+ val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE;
+
+ __vcpu_sys_reg(vcpu, ICH_LRN(i)) = val;
+ s_cpu_if->vgic_lr[i] = 0;
+ }
+
+ shadow_if->lr_map = 0;
+ vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
+}
+
+/*
+ * If we exit a L2 VM with a pending maintenance interrupt from the GIC,
+ * then we need to forward this to L1 so that it can re-sync the appropriate
+ * LRs and sample level triggered interrupts again.
+ */
+void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu)
+{
+ bool state = read_sysreg_s(SYS_ICH_MISR_EL2);
+
+ /* This will force a switch back to L1 if the level is high */
+ kvm_vgic_inject_irq(vcpu->kvm, vcpu,
+ vcpu->kvm->arch.vgic.mi_intid, state, vcpu);
+
+ sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0);
+}
+
+void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu)
+{
+ bool level;
+
+ level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En;
+ if (level)
+ level &= vgic_v3_get_misr(vcpu);
+ kvm_vgic_inject_irq(vcpu->kvm, vcpu,
+ vcpu->kvm->arch.vgic.mi_intid, level, vcpu);
+}
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index d7233ab982d0..b9ad7c42c5b0 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -24,7 +24,7 @@ void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
- cpuif->vgic_hcr |= ICH_HCR_UIE;
+ cpuif->vgic_hcr |= ICH_HCR_EL2_UIE;
}
static bool lr_signals_eoi_mi(u64 lr_val)
@@ -42,7 +42,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
- cpuif->vgic_hcr &= ~ICH_HCR_UIE;
+ cpuif->vgic_hcr &= ~ICH_HCR_EL2_UIE;
for (lr = 0; lr < cpuif->used_lrs; lr++) {
u64 val = cpuif->vgic_lr[lr];
@@ -284,15 +284,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
vgic_v3->vgic_sre = 0;
}
- vcpu->arch.vgic_cpu.num_id_bits = (kvm_vgic_global_state.ich_vtr_el2 &
- ICH_VTR_ID_BITS_MASK) >>
- ICH_VTR_ID_BITS_SHIFT;
- vcpu->arch.vgic_cpu.num_pri_bits = ((kvm_vgic_global_state.ich_vtr_el2 &
- ICH_VTR_PRI_BITS_MASK) >>
- ICH_VTR_PRI_BITS_SHIFT) + 1;
+ vcpu->arch.vgic_cpu.num_id_bits = FIELD_GET(ICH_VTR_EL2_IDbits,
+ kvm_vgic_global_state.ich_vtr_el2);
+ vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits,
+ kvm_vgic_global_state.ich_vtr_el2) + 1;
/* Get the show on the road... */
- vgic_v3->vgic_hcr = ICH_HCR_EN;
+ vgic_v3->vgic_hcr = ICH_HCR_EL2_En;
}
void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
@@ -301,18 +299,19 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
/* Hide GICv3 sysreg if necessary */
if (!kvm_has_gicv3(vcpu->kvm)) {
- vgic_v3->vgic_hcr |= ICH_HCR_TALL0 | ICH_HCR_TALL1 | ICH_HCR_TC;
+ vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
+ ICH_HCR_EL2_TC);
return;
}
if (group0_trap)
- vgic_v3->vgic_hcr |= ICH_HCR_TALL0;
+ vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL0;
if (group1_trap)
- vgic_v3->vgic_hcr |= ICH_HCR_TALL1;
+ vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL1;
if (common_trap)
- vgic_v3->vgic_hcr |= ICH_HCR_TC;
+ vgic_v3->vgic_hcr |= ICH_HCR_EL2_TC;
if (dir_trap)
- vgic_v3->vgic_hcr |= ICH_HCR_TDIR;
+ vgic_v3->vgic_hcr |= ICH_HCR_EL2_TDIR;
}
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
@@ -632,8 +631,8 @@ static const struct midr_range broken_seis[] = {
static bool vgic_v3_broken_seis(void)
{
- return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) &&
- is_midr_in_range_list(read_cpuid_id(), broken_seis));
+ return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_SEIS) &&
+ is_midr_in_range_list(broken_seis));
}
/**
@@ -706,10 +705,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (vgic_v3_broken_seis()) {
kvm_info("GICv3 with broken locally generated SEI\n");
- kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK;
+ kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS;
group0_trap = true;
group1_trap = true;
- if (ich_vtr_el2 & ICH_VTR_TDS_MASK)
+ if (ich_vtr_el2 & ICH_VTR_EL2_TDS)
dir_trap = true;
else
common_trap = true;
@@ -735,6 +734,12 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ /* If the vgic is nested, perform the full state loading */
+ if (vgic_state_is_nested(vcpu)) {
+ vgic_v3_load_nested(vcpu);
+ return;
+ }
+
if (likely(!is_protected_kvm_enabled()))
kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
@@ -748,6 +753,11 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ if (vgic_state_is_nested(vcpu)) {
+ vgic_v3_put_nested(vcpu);
+ return;
+ }
+
if (likely(!is_protected_kvm_enabled()))
kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
WARN_ON(vgic_v4_put(vcpu));
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index eedecbbbcf31..c7de6154627c 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -336,6 +336,22 @@ void vgic_v4_teardown(struct kvm *kvm)
its_vm->vpes = NULL;
}
+static inline bool vgic_v4_want_doorbell(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_get_flag(vcpu, IN_WFI))
+ return true;
+
+ if (likely(!vcpu_has_nv(vcpu)))
+ return false;
+
+ /*
+ * GICv4 hardware is only ever used for the L1. Mark the vPE (i.e. the
+ * L1 context) nonresident and request a doorbell to kick us out of the
+ * L2 when an IRQ becomes pending.
+ */
+ return vcpu_get_flag(vcpu, IN_NESTED_ERET);
+}
+
int vgic_v4_put(struct kvm_vcpu *vcpu)
{
struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
@@ -343,7 +359,7 @@ int vgic_v4_put(struct kvm_vcpu *vcpu)
if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
return 0;
- return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI));
+ return its_make_vpe_non_resident(vpe, vgic_v4_want_doorbell(vcpu));
}
int vgic_v4_load(struct kvm_vcpu *vcpu)
@@ -415,7 +431,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
struct vgic_irq *irq;
struct its_vlpi_map map;
unsigned long flags;
- int ret;
+ int ret = 0;
if (!vgic_supports_direct_msis(kvm))
return 0;
@@ -430,10 +446,15 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
mutex_lock(&its->its_lock);
- /* Perform the actual DevID/EventID -> LPI translation. */
- ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
- irq_entry->msi.data, &irq);
- if (ret)
+ /*
+ * Perform the actual DevID/EventID -> LPI translation.
+ *
+ * Silently exit if translation fails as the guest (or userspace!) has
+ * managed to do something stupid. Emulated LPI injection will still
+ * work if the guest figures itself out at a later time.
+ */
+ if (vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
+ irq_entry->msi.data, &irq))
goto out;
/* Silently exit if the vLPI is already mapped */
@@ -512,7 +533,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
if (ret)
goto out;
- WARN_ON(!(irq->hw && irq->host_irq == virq));
+ WARN_ON(irq->hw && irq->host_irq != virq);
if (irq->hw) {
atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
irq->hw = false;
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index cc8c6b9b5dd8..8f8096d48925 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -872,6 +872,15 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
int used_lrs;
+ /* If nesting, emulate the HW effect from L0 to L1 */
+ if (vgic_state_is_nested(vcpu)) {
+ vgic_v3_sync_nested(vcpu);
+ return;
+ }
+
+ if (vcpu_has_nv(vcpu))
+ vgic_v3_nested_update_mi(vcpu);
+
/* An empty ap_list_head implies used_lrs == 0 */
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
@@ -901,6 +910,35 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
{
/*
+ * If in a nested state, we must return early. Two possibilities:
+ *
+ * - If we have any pending IRQ for the guest and the guest
+ * expects IRQs to be handled in its virtual EL2 mode (the
+ * virtual IMO bit is set) and it is not already running in
+ * virtual EL2 mode, then we have to emulate an IRQ
+ * exception to virtual EL2.
+ *
+ * We do that by placing a request to ourselves which will
+ * abort the entry procedure and inject the exception at the
+ * beginning of the run loop.
+ *
+ * - Otherwise, do exactly *NOTHING*. The guest state is
+ * already loaded, and we can carry on with running it.
+ *
+ * If we have NV, but are not in a nested state, compute the
+ * maintenance interrupt state, as it may fire.
+ */
+ if (vgic_state_is_nested(vcpu)) {
+ if (kvm_vgic_vcpu_pending_irq(vcpu))
+ kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu);
+
+ return;
+ }
+
+ if (vcpu_has_nv(vcpu))
+ vgic_v3_nested_update_mi(vcpu);
+
+ /*
* If there are no virtual interrupts active or pending for this
* VCPU, then there is no work to do and we can bail out without
* taking any lock. There is a potential race with someone injecting
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 122d95b4e284..4349084cb9a6 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -172,6 +172,36 @@ struct vgic_reg_attr {
gpa_t addr;
};
+struct its_device {
+ struct list_head dev_list;
+
+ /* the head for the list of ITTEs */
+ struct list_head itt_head;
+ u32 num_eventid_bits;
+ gpa_t itt_addr;
+ u32 device_id;
+};
+
+#define COLLECTION_NOT_MAPPED ((u32)~0)
+
+struct its_collection {
+ struct list_head coll_list;
+
+ u32 collection_id;
+ u32 target_addr;
+};
+
+#define its_is_collection_mapped(coll) ((coll) && \
+ ((coll)->target_addr != COLLECTION_NOT_MAPPED))
+
+struct its_ite {
+ struct list_head ite_list;
+
+ struct vgic_irq *irq;
+ struct its_collection *collection;
+ u32 event_id;
+};
+
int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
struct vgic_reg_attr *reg_attr);
int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
@@ -353,4 +383,13 @@ static inline bool kvm_has_gicv3(struct kvm *kvm)
return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP);
}
+void vgic_v3_sync_nested(struct kvm_vcpu *vcpu);
+void vgic_v3_load_nested(struct kvm_vcpu *vcpu);
+void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
+void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu);
+void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu);
+
+int vgic_its_debug_init(struct kvm_device *dev);
+void vgic_its_debug_destroy(struct kvm_device *dev);
+
#endif