diff options
| -rw-r--r-- | Documentation/virt/kvm/devices/vcpu.rst | 57 | ||||
| -rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 7 | ||||
| -rw-r--r-- | arch/arm64/include/uapi/asm/kvm.h | 16 | ||||
| -rw-r--r-- | arch/arm64/kvm/arm.c | 2 | ||||
| -rw-r--r-- | arch/arm64/kvm/pmu-emul.c | 195 | ||||
| -rw-r--r-- | arch/arm64/kvm/sys_regs.c | 5 | ||||
| -rw-r--r-- | include/kvm/arm_pmu.h | 5 | 
7 files changed, 257 insertions, 30 deletions
| diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index ca374d3fe085..da7c2ef7dafc 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -25,8 +25,10 @@ Returns:  	 =======  ========================================================  	 -EBUSY   The PMU overflow interrupt is already set -	 -ENXIO   The overflow interrupt not set when attempting to get it -	 -ENODEV  PMUv3 not supported +	 -EFAULT  Error reading interrupt number +	 -ENXIO   PMUv3 not supported or the overflow interrupt not set +		  when attempting to get it +	 -ENODEV  KVM_ARM_VCPU_PMU_V3 feature missing from VCPU  	 -EINVAL  Invalid PMU overflow interrupt number supplied or  		  trying to set the IRQ number without using an in-kernel  		  irqchip. @@ -45,9 +47,10 @@ all vcpus, while as an SPI it must be a separate number per vcpu.  Returns:  	 =======  ====================================================== +	 -EEXIST  Interrupt number already used  	 -ENODEV  PMUv3 not supported or GIC not initialized -	 -ENXIO   PMUv3 not properly configured or in-kernel irqchip not -		  configured as required prior to calling this attribute +	 -ENXIO   PMUv3 not supported, missing VCPU feature or interrupt +		  number not set  	 -EBUSY   PMUv3 already initialized  	 =======  ====================================================== @@ -55,6 +58,52 @@ Request the initialization of the PMUv3.  If using the PMUv3 with an in-kernel  virtual GIC implementation, this must be done after initializing the in-kernel  irqchip. +1.3 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_FILTER +----------------------------------------- + +:Parameters: in kvm_device_attr.addr the address for a PMU event filter is a +             pointer to a struct kvm_pmu_event_filter + +:Returns: + +	 =======  ====================================================== +	 -ENODEV: PMUv3 not supported or GIC not initialized +	 -ENXIO:  PMUv3 not properly configured or in-kernel irqchip not +	 	  configured as required prior to calling this attribute +	 -EBUSY:  PMUv3 already initialized +	 -EINVAL: Invalid filter range +	 =======  ====================================================== + +Request the installation of a PMU event filter described as follows: + +struct kvm_pmu_event_filter { +	__u16	base_event; +	__u16	nevents; + +#define KVM_PMU_EVENT_ALLOW	0 +#define KVM_PMU_EVENT_DENY	1 + +	__u8	action; +	__u8	pad[3]; +}; + +A filter range is defined as the range [@base_event, @base_event + @nevents), +together with an @action (KVM_PMU_EVENT_ALLOW or KVM_PMU_EVENT_DENY). The +first registered range defines the global policy (global ALLOW if the first +@action is DENY, global DENY if the first @action is ALLOW). Multiple ranges +can be programmed, and must fit within the event space defined by the PMU +architecture (10 bits on ARMv8.0, 16 bits from ARMv8.1 onwards). + +Note: "Cancelling" a filter by registering the opposite action for the same +range doesn't change the default action. For example, installing an ALLOW +filter for event range [0:10) as the first filter and then applying a DENY +action for the same range will leave the whole range as disabled. + +Restrictions: Event 0 (SW_INCR) is never filtered, as it doesn't count a +hardware event. Filtering event 0x1E (CHAIN) has no effect either, as it +isn't strictly speaking an event. Filtering the cycle counter is possible +using event 0x11 (CPU_CYCLES). +  2. GROUP: KVM_ARM_VCPU_TIMER_CTRL  ================================= diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b537ab05a4d3..d56d67c3787e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -111,6 +111,13 @@ struct kvm_arch {  	 * supported.  	 */  	bool return_nisv_io_abort_to_user; + +	/* +	 * VM-wide PMU filter, implemented as a bitmap and big enough for +	 * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+). +	 */ +	unsigned long *pmu_filter; +	unsigned int pmuver;  };  struct kvm_vcpu_fault_info { diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index ba85bb23f060..7b1511d6ce44 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -159,6 +159,21 @@ struct kvm_sync_regs {  struct kvm_arch_memory_slot {  }; +/* + * PMU filter structure. Describe a range of events with a particular + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. + */ +struct kvm_pmu_event_filter { +	__u16	base_event; +	__u16	nevents; + +#define KVM_PMU_EVENT_ALLOW	0 +#define KVM_PMU_EVENT_DENY	1 + +	__u8	action; +	__u8	pad[3]; +}; +  /* for KVM_GET/SET_VCPU_EVENTS */  struct kvm_vcpu_events {  	struct { @@ -329,6 +344,7 @@ struct kvm_vcpu_events {  #define KVM_ARM_VCPU_PMU_V3_CTRL	0  #define   KVM_ARM_VCPU_PMU_V3_IRQ	0  #define   KVM_ARM_VCPU_PMU_V3_INIT	1 +#define   KVM_ARM_VCPU_PMU_V3_FILTER	2  #define KVM_ARM_VCPU_TIMER_CTRL		1  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER		0  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 28d1e9858743..e49189012af1 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -147,6 +147,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)  {  	int i; +	bitmap_free(kvm->arch.pmu_filter); +  	kvm_vgic_destroy(kvm);  	for (i = 0; i < KVM_MAX_VCPUS; ++i) { diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index f0d0312c0a55..ee13c5eecd3d 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -20,6 +20,21 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);  #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 +static u32 kvm_pmu_event_mask(struct kvm *kvm) +{ +	switch (kvm->arch.pmuver) { +	case 1:			/* ARMv8.0 */ +		return GENMASK(9, 0); +	case 4:			/* ARMv8.1 */ +	case 5:			/* ARMv8.4 */ +	case 6:			/* ARMv8.5 */ +		return GENMASK(15, 0); +	default:		/* Shouldn't be here, just for sanity */ +		WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver); +		return 0; +	} +} +  /**   * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter   * @vcpu: The vcpu pointer @@ -100,7 +115,7 @@ static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)  		return false;  	reg = PMEVTYPER0_EL0 + select_idx; -	eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT; +	eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);  	return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;  } @@ -495,7 +510,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)  		/* PMSWINC only applies to ... SW_INC! */  		type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); -		type &= ARMV8_PMU_EVTYPE_EVENT; +		type &= kvm_pmu_event_mask(vcpu->kvm);  		if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)  			continue; @@ -578,11 +593,21 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)  	data = __vcpu_sys_reg(vcpu, reg);  	kvm_pmu_stop_counter(vcpu, pmc); -	eventsel = data & ARMV8_PMU_EVTYPE_EVENT; +	if (pmc->idx == ARMV8_PMU_CYCLE_IDX) +		eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; +	else +		eventsel = data & kvm_pmu_event_mask(vcpu->kvm); -	/* Software increment event does't need to be backed by a perf event */ -	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR && -	    pmc->idx != ARMV8_PMU_CYCLE_IDX) +	/* Software increment event doesn't need to be backed by a perf event */ +	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) +		return; + +	/* +	 * If we have a filter in place and that the event isn't allowed, do +	 * not install a perf event either. +	 */ +	if (vcpu->kvm->arch.pmu_filter && +	    !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))  		return;  	memset(&attr, 0, sizeof(struct perf_event_attr)); @@ -594,8 +619,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)  	attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;  	attr.exclude_hv = 1; /* Don't count EL2 events */  	attr.exclude_host = 1; /* Don't count host events */ -	attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? -		ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel; +	attr.config = eventsel;  	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); @@ -679,17 +703,95 @@ static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)  void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,  				    u64 select_idx)  { -	u64 reg, event_type = data & ARMV8_PMU_EVTYPE_MASK; +	u64 reg, mask; + +	mask  =  ARMV8_PMU_EVTYPE_MASK; +	mask &= ~ARMV8_PMU_EVTYPE_EVENT; +	mask |= kvm_pmu_event_mask(vcpu->kvm);  	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)  	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; -	__vcpu_sys_reg(vcpu, reg) = event_type; +	__vcpu_sys_reg(vcpu, reg) = data & mask;  	kvm_pmu_update_pmc_chained(vcpu, select_idx);  	kvm_pmu_create_perf_event(vcpu, select_idx);  } +static int kvm_pmu_probe_pmuver(void) +{ +	struct perf_event_attr attr = { }; +	struct perf_event *event; +	struct arm_pmu *pmu; +	int pmuver = 0xf; + +	/* +	 * Create a dummy event that only counts user cycles. As we'll never +	 * leave this function with the event being live, it will never +	 * count anything. But it allows us to probe some of the PMU +	 * details. Yes, this is terrible. +	 */ +	attr.type = PERF_TYPE_RAW; +	attr.size = sizeof(attr); +	attr.pinned = 1; +	attr.disabled = 0; +	attr.exclude_user = 0; +	attr.exclude_kernel = 1; +	attr.exclude_hv = 1; +	attr.exclude_host = 1; +	attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; +	attr.sample_period = GENMASK(63, 0); + +	event = perf_event_create_kernel_counter(&attr, -1, current, +						 kvm_pmu_perf_overflow, &attr); + +	if (IS_ERR(event)) { +		pr_err_once("kvm: pmu event creation failed %ld\n", +			    PTR_ERR(event)); +		return 0xf; +	} + +	if (event->pmu) { +		pmu = to_arm_pmu(event->pmu); +		if (pmu->pmuver) +			pmuver = pmu->pmuver; +	} + +	perf_event_disable(event); +	perf_event_release_kernel(event); + +	return pmuver; +} + +u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) +{ +	unsigned long *bmap = vcpu->kvm->arch.pmu_filter; +	u64 val, mask = 0; +	int base, i; + +	if (!pmceid1) { +		val = read_sysreg(pmceid0_el0); +		base = 0; +	} else { +		val = read_sysreg(pmceid1_el0); +		base = 32; +	} + +	if (!bmap) +		return val; + +	for (i = 0; i < 32; i += 8) { +		u64 byte; + +		byte = bitmap_get_value8(bmap, base + i); +		mask |= byte << i; +		byte = bitmap_get_value8(bmap, 0x4000 + base + i); +		mask |= byte << (32 + i); +	} + +	return val & mask; +} +  bool kvm_arm_support_pmu_v3(void)  {  	/* @@ -735,15 +837,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)  static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)  { -	if (!kvm_arm_support_pmu_v3()) -		return -ENODEV; - -	if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) -		return -ENXIO; - -	if (vcpu->arch.pmu.created) -		return -EBUSY; -  	if (irqchip_in_kernel(vcpu->kvm)) {  		int ret; @@ -796,6 +889,19 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)  { +	if (!kvm_arm_support_pmu_v3() || +	    !test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) +		return -ENODEV; + +	if (vcpu->arch.pmu.created) +		return -EBUSY; + +	if (!vcpu->kvm->arch.pmuver) +		vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver(); + +	if (vcpu->kvm->arch.pmuver == 0xf) +		return -ENODEV; +  	switch (attr->attr) {  	case KVM_ARM_VCPU_PMU_V3_IRQ: {  		int __user *uaddr = (int __user *)(long)attr->addr; @@ -804,9 +910,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)  		if (!irqchip_in_kernel(vcpu->kvm))  			return -EINVAL; -		if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) -			return -ENODEV; -  		if (get_user(irq, uaddr))  			return -EFAULT; @@ -824,6 +927,53 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)  		vcpu->arch.pmu.irq_num = irq;  		return 0;  	} +	case KVM_ARM_VCPU_PMU_V3_FILTER: { +		struct kvm_pmu_event_filter __user *uaddr; +		struct kvm_pmu_event_filter filter; +		int nr_events; + +		nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; + +		uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; + +		if (copy_from_user(&filter, uaddr, sizeof(filter))) +			return -EFAULT; + +		if (((u32)filter.base_event + filter.nevents) > nr_events || +		    (filter.action != KVM_PMU_EVENT_ALLOW && +		     filter.action != KVM_PMU_EVENT_DENY)) +			return -EINVAL; + +		mutex_lock(&vcpu->kvm->lock); + +		if (!vcpu->kvm->arch.pmu_filter) { +			vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL); +			if (!vcpu->kvm->arch.pmu_filter) { +				mutex_unlock(&vcpu->kvm->lock); +				return -ENOMEM; +			} + +			/* +			 * The default depends on the first applied filter. +			 * If it allows events, the default is to deny. +			 * Conversely, if the first filter denies a set of +			 * events, the default is to allow. +			 */ +			if (filter.action == KVM_PMU_EVENT_ALLOW) +				bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events); +			else +				bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events); +		} + +		if (filter.action == KVM_PMU_EVENT_ALLOW) +			bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); +		else +			bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); + +		mutex_unlock(&vcpu->kvm->lock); + +		return 0; +	}  	case KVM_ARM_VCPU_PMU_V3_INIT:  		return kvm_arm_pmu_v3_init(vcpu);  	} @@ -860,6 +1010,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)  	switch (attr->attr) {  	case KVM_ARM_VCPU_PMU_V3_IRQ:  	case KVM_ARM_VCPU_PMU_V3_INIT: +	case KVM_ARM_VCPU_PMU_V3_FILTER:  		if (kvm_arm_support_pmu_v3() &&  		    test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))  			return 0; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 077293b5115f..20ab2a7d37ca 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -769,10 +769,7 @@ static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,  	if (pmu_access_el0_disabled(vcpu))  		return false; -	if (!(p->Op2 & 1)) -		pmceid = read_sysreg(pmceid0_el0); -	else -		pmceid = read_sysreg(pmceid1_el0); +	pmceid = kvm_pmu_get_pmceid(vcpu, (p->Op2 & 1));  	p->regval = pmceid; diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 6db030439e29..98cbfe885a53 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -34,6 +34,7 @@ struct kvm_pmu {  u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);  void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);  u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu); +u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1);  void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu);  void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);  void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu); @@ -108,6 +109,10 @@ static inline int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)  {  	return 0;  } +static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) +{ +	return 0; +}  #endif  #endif | 
