diff options
73 files changed, 6526 insertions, 481 deletions
diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index ee9b790c0d72..83e037351f6d 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -223,6 +223,47 @@ Before jumping into the kernel, the following conditions must be met: - SCR_EL3.HCE (bit 8) must be initialised to 0b1. + For systems with a GICv5 interrupt controller to be used in v5 mode: + + - If the kernel is entered at EL1 and EL2 is present: + + - ICH_HFGRTR_EL2.ICC_PPI_ACTIVERn_EL1 (bit 20) must be initialised to 0b1. + - ICH_HFGRTR_EL2.ICC_PPI_PRIORITYRn_EL1 (bit 19) must be initialised to 0b1. + - ICH_HFGRTR_EL2.ICC_PPI_PENDRn_EL1 (bit 18) must be initialised to 0b1. + - ICH_HFGRTR_EL2.ICC_PPI_ENABLERn_EL1 (bit 17) must be initialised to 0b1. + - ICH_HFGRTR_EL2.ICC_PPI_HMRn_EL1 (bit 16) must be initialised to 0b1. + - ICH_HFGRTR_EL2.ICC_IAFFIDR_EL1 (bit 7) must be initialised to 0b1. + - ICH_HFGRTR_EL2.ICC_ICSR_EL1 (bit 6) must be initialised to 0b1. + - ICH_HFGRTR_EL2.ICC_PCR_EL1 (bit 5) must be initialised to 0b1. + - ICH_HFGRTR_EL2.ICC_HPPIR_EL1 (bit 4) must be initialised to 0b1. + - ICH_HFGRTR_EL2.ICC_HAPR_EL1 (bit 3) must be initialised to 0b1. + - ICH_HFGRTR_EL2.ICC_CR0_EL1 (bit 2) must be initialised to 0b1. + - ICH_HFGRTR_EL2.ICC_IDRn_EL1 (bit 1) must be initialised to 0b1. + - ICH_HFGRTR_EL2.ICC_APR_EL1 (bit 0) must be initialised to 0b1. + + - ICH_HFGWTR_EL2.ICC_PPI_ACTIVERn_EL1 (bit 20) must be initialised to 0b1. + - ICH_HFGWTR_EL2.ICC_PPI_PRIORITYRn_EL1 (bit 19) must be initialised to 0b1. + - ICH_HFGWTR_EL2.ICC_PPI_PENDRn_EL1 (bit 18) must be initialised to 0b1. + - ICH_HFGWTR_EL2.ICC_PPI_ENABLERn_EL1 (bit 17) must be initialised to 0b1. + - ICH_HFGWTR_EL2.ICC_ICSR_EL1 (bit 6) must be initialised to 0b1. + - ICH_HFGWTR_EL2.ICC_PCR_EL1 (bit 5) must be initialised to 0b1. + - ICH_HFGWTR_EL2.ICC_CR0_EL1 (bit 2) must be initialised to 0b1. + - ICH_HFGWTR_EL2.ICC_APR_EL1 (bit 0) must be initialised to 0b1. + + - ICH_HFGITR_EL2.GICRCDNMIA (bit 10) must be initialised to 0b1. + - ICH_HFGITR_EL2.GICRCDIA (bit 9) must be initialised to 0b1. + - ICH_HFGITR_EL2.GICCDDI (bit 8) must be initialised to 0b1. + - ICH_HFGITR_EL2.GICCDEOI (bit 7) must be initialised to 0b1. + - ICH_HFGITR_EL2.GICCDHM (bit 6) must be initialised to 0b1. + - ICH_HFGITR_EL2.GICCDRCFG (bit 5) must be initialised to 0b1. + - ICH_HFGITR_EL2.GICCDPEND (bit 4) must be initialised to 0b1. + - ICH_HFGITR_EL2.GICCDAFF (bit 3) must be initialised to 0b1. + - ICH_HFGITR_EL2.GICCDPRI (bit 2) must be initialised to 0b1. + - ICH_HFGITR_EL2.GICCDDIS (bit 1) must be initialised to 0b1. + - ICH_HFGITR_EL2.GICCDEN (bit 0) must be initialised to 0b1. + + - The DT or ACPI tables must describe a GICv5 interrupt controller. + For systems with a GICv3 interrupt controller to be used in v3 mode: - If EL3 is present: diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5-iwb.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5-iwb.yaml new file mode 100644 index 000000000000..99a266a62385 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5-iwb.yaml @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/arm,gic-v5-iwb.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ARM Generic Interrupt Controller, version 5 Interrupt Wire Bridge (IWB) + +maintainers: + - Lorenzo Pieralisi <lpieralisi@kernel.org> + - Marc Zyngier <maz@kernel.org> + +description: | + The GICv5 architecture defines the guidelines to implement GICv5 + compliant interrupt controllers for AArch64 systems. + + The GICv5 specification can be found at + https://developer.arm.com/documentation/aes0070 + + GICv5 has zero or more Interrupt Wire Bridges (IWB) that are responsible + for translating wire signals into interrupt messages to the GICv5 ITS. + +allOf: + - $ref: /schemas/interrupt-controller.yaml# + +properties: + compatible: + const: arm,gic-v5-iwb + + reg: + items: + - description: IWB control frame + + "#address-cells": + const: 0 + + "#interrupt-cells": + description: | + The 1st cell corresponds to the IWB wire. + + The 2nd cell is the flags, encoded as follows: + bits[3:0] trigger type and level flags. + + 1 = low-to-high edge triggered + 2 = high-to-low edge triggered + 4 = active high level-sensitive + 8 = active low level-sensitive + + const: 2 + + interrupt-controller: true + + msi-parent: + maxItems: 1 + +required: + - compatible + - reg + - "#interrupt-cells" + - interrupt-controller + - msi-parent + +additionalProperties: false + +examples: + - | + interrupt-controller@2f000000 { + compatible = "arm,gic-v5-iwb"; + reg = <0x2f000000 0x10000>; + + #address-cells = <0>; + + #interrupt-cells = <2>; + interrupt-controller; + + msi-parent = <&its0 64>; + }; +... diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5.yaml new file mode 100644 index 000000000000..86ca7f3ac281 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5.yaml @@ -0,0 +1,267 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/arm,gic-v5.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ARM Generic Interrupt Controller, version 5 + +maintainers: + - Lorenzo Pieralisi <lpieralisi@kernel.org> + - Marc Zyngier <maz@kernel.org> + +description: | + The GICv5 architecture defines the guidelines to implement GICv5 + compliant interrupt controllers for AArch64 systems. + + The GICv5 specification can be found at + https://developer.arm.com/documentation/aes0070 + + The GICv5 architecture is composed of multiple components: + - one or more IRS (Interrupt Routing Service) + - zero or more ITS (Interrupt Translation Service) + + The architecture defines: + - PE-Private Peripheral Interrupts (PPI) + - Shared Peripheral Interrupts (SPI) + - Logical Peripheral Interrupts (LPI) + +allOf: + - $ref: /schemas/interrupt-controller.yaml# + +properties: + compatible: + const: arm,gic-v5 + + "#address-cells": + enum: [ 1, 2 ] + + "#size-cells": + enum: [ 1, 2 ] + + ranges: true + + "#interrupt-cells": + description: | + The 1st cell corresponds to the INTID.Type field in the INTID; 1 for PPI, + 3 for SPI. LPI interrupts must not be described in the bindings since + they are allocated dynamically by the software component managing them. + + The 2nd cell contains the interrupt INTID.ID field. + + The 3rd cell is the flags, encoded as follows: + bits[3:0] trigger type and level flags. + + 1 = low-to-high edge triggered + 2 = high-to-low edge triggered + 4 = active high level-sensitive + 8 = active low level-sensitive + + const: 3 + + interrupt-controller: true + + interrupts: + description: + The VGIC maintenance interrupt. + maxItems: 1 + +required: + - compatible + - "#address-cells" + - "#size-cells" + - ranges + - "#interrupt-cells" + - interrupt-controller + +patternProperties: + "^irs@[0-9a-f]+$": + type: object + description: + GICv5 has one or more Interrupt Routing Services (IRS) that are + responsible for handling IRQ state and routing. + + additionalProperties: false + + properties: + compatible: + const: arm,gic-v5-irs + + reg: + minItems: 1 + items: + - description: IRS config frames + - description: IRS setlpi frames + + reg-names: + description: + Describe config and setlpi frames that are present. + "ns-" stands for non-secure, "s-" for secure, "realm-" for realm + and "el3-" for EL3. + minItems: 1 + maxItems: 8 + items: + enum: [ ns-config, s-config, realm-config, el3-config, ns-setlpi, + s-setlpi, realm-setlpi, el3-setlpi ] + + "#address-cells": + enum: [ 1, 2 ] + + "#size-cells": + enum: [ 1, 2 ] + + ranges: true + + dma-noncoherent: + description: + Present if the GIC IRS permits programming shareability and + cacheability attributes but is connected to a non-coherent + downstream interconnect. + + cpus: + description: + CPUs managed by the IRS. + + arm,iaffids: + $ref: /schemas/types.yaml#/definitions/uint16-array + description: + Interrupt AFFinity ID (IAFFID) associated with the CPU whose + CPU node phandle is at the same index in the cpus array. + + patternProperties: + "^its@[0-9a-f]+$": + type: object + description: + GICv5 has zero or more Interrupt Translation Services (ITS) that are + used to route Message Signalled Interrupts (MSI) to the CPUs. Each + ITS is connected to an IRS. + additionalProperties: false + + properties: + compatible: + const: arm,gic-v5-its + + reg: + items: + - description: ITS config frames + + reg-names: + description: + Describe config frames that are present. + "ns-" stands for non-secure, "s-" for secure, "realm-" for realm + and "el3-" for EL3. + minItems: 1 + maxItems: 4 + items: + enum: [ ns-config, s-config, realm-config, el3-config ] + + "#address-cells": + enum: [ 1, 2 ] + + "#size-cells": + enum: [ 1, 2 ] + + ranges: true + + dma-noncoherent: + description: + Present if the GIC ITS permits programming shareability and + cacheability attributes but is connected to a non-coherent + downstream interconnect. + + patternProperties: + "^msi-controller@[0-9a-f]+$": + type: object + description: + GICv5 ITS has one or more translate register frames. + additionalProperties: false + + properties: + reg: + items: + - description: ITS translate frames + + reg-names: + description: + Describe translate frames that are present. + "ns-" stands for non-secure, "s-" for secure, "realm-" for realm + and "el3-" for EL3. + minItems: 1 + maxItems: 4 + items: + enum: [ ns-translate, s-translate, realm-translate, el3-translate ] + + "#msi-cells": + description: + The single msi-cell is the DeviceID of the device which will + generate the MSI. + const: 1 + + msi-controller: true + + required: + - reg + - reg-names + - "#msi-cells" + - msi-controller + + required: + - compatible + - reg + - reg-names + + required: + - compatible + - reg + - reg-names + - cpus + - arm,iaffids + +additionalProperties: false + +examples: + - | + interrupt-controller { + compatible = "arm,gic-v5"; + + #interrupt-cells = <3>; + interrupt-controller; + + #address-cells = <1>; + #size-cells = <1>; + ranges; + + interrupts = <1 25 4>; + + irs@2f1a0000 { + compatible = "arm,gic-v5-irs"; + reg = <0x2f1a0000 0x10000>; // IRS_CONFIG_FRAME + reg-names = "ns-config"; + + #address-cells = <1>; + #size-cells = <1>; + ranges; + + cpus = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>, <&cpu4>, <&cpu5>, <&cpu6>, <&cpu7>; + arm,iaffids = /bits/ 16 <0 1 2 3 4 5 6 7>; + + its@2f120000 { + compatible = "arm,gic-v5-its"; + reg = <0x2f120000 0x10000>; // ITS_CONFIG_FRAME + reg-names = "ns-config"; + + #address-cells = <1>; + #size-cells = <1>; + ranges; + + msi-controller@2f130000 { + reg = <0x2f130000 0x10000>; // ITS_TRANSLATE_FRAME + reg-names = "ns-translate"; + + #msi-cells = <1>; + msi-controller; + }; + }; + }; + }; +... diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9abf93ee5f65..53e0179d5294 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8585,7 +8585,7 @@ ENOSYS for the others. When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request. -7.37 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS +7.42 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS ------------------------------------- :Architectures: arm64 @@ -8614,6 +8614,17 @@ given VM. When this capability is enabled, KVM resets the VCPU when setting MP_STATE_INIT_RECEIVED through IOCTL. The original MP_STATE is preserved. +7.43 KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED +------------------------------------------- + +:Architectures: arm64 +:Target: VM +:Parameters: None + +This capability indicate to the userspace whether a PFNMAP memory region +can be safely mapped as cacheable. This relies on the presence of +force write back (FWB) feature support on the hardware. + 8. Other capabilities. ====================== diff --git a/MAINTAINERS b/MAINTAINERS index c3f7fbd0d67a..b035802ea41d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1964,6 +1964,16 @@ F: drivers/irqchip/irq-gic*.[ch] F: include/linux/irqchip/arm-gic*.h F: include/linux/irqchip/arm-vgic-info.h +ARM GENERIC INTERRUPT CONTROLLER V5 DRIVERS +M: Lorenzo Pieralisi <lpieralisi@kernel.org> +M: Marc Zyngier <maz@kernel.org> +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5*.yaml +F: drivers/irqchip/irq-gic-its-msi-parent.[ch] +F: drivers/irqchip/irq-gic-v5*.[ch] +F: include/linux/irqchip/arm-gic-v5.h + ARM HDLCD DRM DRIVER M: Liviu Dudau <liviu.dudau@arm.com> S: Supported diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 55fc331af337..5ff757cb7cd2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -129,6 +129,7 @@ config ARM64 select ARM_GIC_V2M if PCI select ARM_GIC_V3 select ARM_GIC_V3_ITS if PCI + select ARM_GIC_V5 select ARM_PSCI_FW select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 1ca947d5c939..f5801b0ba9e9 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -44,6 +44,9 @@ SB_BARRIER_INSN"nop\n", \ ARM64_HAS_SB)) +#define gsb_ack() asm volatile(GSB_ACK_BARRIER_INSN : : : "memory") +#define gsb_sys() asm volatile(GSB_SYS_BARRIER_INSN : : : "memory") + #ifdef CONFIG_ARM64_PSEUDO_NMI #define pmr_sync() \ do { \ diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index ba5df0df02a4..54abcb13e51f 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -165,6 +165,50 @@ .Lskip_gicv3_\@: .endm +/* GICv5 system register access */ +.macro __init_el2_gicv5 + mrs_s x0, SYS_ID_AA64PFR2_EL1 + ubfx x0, x0, #ID_AA64PFR2_EL1_GCIE_SHIFT, #4 + cbz x0, .Lskip_gicv5_\@ + + mov x0, #(ICH_HFGITR_EL2_GICRCDNMIA | \ + ICH_HFGITR_EL2_GICRCDIA | \ + ICH_HFGITR_EL2_GICCDDI | \ + ICH_HFGITR_EL2_GICCDEOI | \ + ICH_HFGITR_EL2_GICCDHM | \ + ICH_HFGITR_EL2_GICCDRCFG | \ + ICH_HFGITR_EL2_GICCDPEND | \ + ICH_HFGITR_EL2_GICCDAFF | \ + ICH_HFGITR_EL2_GICCDPRI | \ + ICH_HFGITR_EL2_GICCDDIS | \ + ICH_HFGITR_EL2_GICCDEN) + msr_s SYS_ICH_HFGITR_EL2, x0 // Disable instruction traps + mov_q x0, (ICH_HFGRTR_EL2_ICC_PPI_ACTIVERn_EL1 | \ + ICH_HFGRTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \ + ICH_HFGRTR_EL2_ICC_PPI_PENDRn_EL1 | \ + ICH_HFGRTR_EL2_ICC_PPI_ENABLERn_EL1 | \ + ICH_HFGRTR_EL2_ICC_PPI_HMRn_EL1 | \ + ICH_HFGRTR_EL2_ICC_IAFFIDR_EL1 | \ + ICH_HFGRTR_EL2_ICC_ICSR_EL1 | \ + ICH_HFGRTR_EL2_ICC_PCR_EL1 | \ + ICH_HFGRTR_EL2_ICC_HPPIR_EL1 | \ + ICH_HFGRTR_EL2_ICC_HAPR_EL1 | \ + ICH_HFGRTR_EL2_ICC_CR0_EL1 | \ + ICH_HFGRTR_EL2_ICC_IDRn_EL1 | \ + ICH_HFGRTR_EL2_ICC_APR_EL1) + msr_s SYS_ICH_HFGRTR_EL2, x0 // Disable reg read traps + mov_q x0, (ICH_HFGWTR_EL2_ICC_PPI_ACTIVERn_EL1 | \ + ICH_HFGWTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \ + ICH_HFGWTR_EL2_ICC_PPI_PENDRn_EL1 | \ + ICH_HFGWTR_EL2_ICC_PPI_ENABLERn_EL1 | \ + ICH_HFGWTR_EL2_ICC_ICSR_EL1 | \ + ICH_HFGWTR_EL2_ICC_PCR_EL1 | \ + ICH_HFGWTR_EL2_ICC_CR0_EL1 | \ + ICH_HFGWTR_EL2_ICC_APR_EL1) + msr_s SYS_ICH_HFGWTR_EL2, x0 // Disable reg write traps +.Lskip_gicv5_\@: +.endm + .macro __init_el2_hstr msr hstr_el2, xzr // Disable CP15 traps to EL2 .endm @@ -314,6 +358,7 @@ __init_el2_lor __init_el2_stage2 __init_el2_gicv3 + __init_el2_gicv5 __init_el2_hstr __init_el2_nvhe_idregs __init_el2_cptr diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 0720898f563e..fa8a08a1ccd5 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -45,16 +45,39 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu); void kvm_inject_undefined(struct kvm_vcpu *vcpu); -void kvm_inject_vabt(struct kvm_vcpu *vcpu); -void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr); +int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr); void kvm_inject_size_fault(struct kvm_vcpu *vcpu); +static inline int kvm_inject_sea_dabt(struct kvm_vcpu *vcpu, u64 addr) +{ + return kvm_inject_sea(vcpu, false, addr); +} + +static inline int kvm_inject_sea_iabt(struct kvm_vcpu *vcpu, u64 addr) +{ + return kvm_inject_sea(vcpu, true, addr); +} + +static inline int kvm_inject_serror(struct kvm_vcpu *vcpu) +{ + /* + * ESR_ELx.ISV (later renamed to IDS) indicates whether or not + * ESR_ELx.ISS contains IMPLEMENTATION DEFINED syndrome information. + * + * Set the bit when injecting an SError w/o an ESR to indicate ISS + * does not follow the architected format. + */ + return kvm_inject_serror_esr(vcpu, ESR_ELx_ISV); +} + void kvm_vcpu_wfi(struct kvm_vcpu *vcpu); void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); +int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr); +int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr); static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu) { @@ -195,6 +218,11 @@ static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu) return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE; } +static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu) +{ + return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO; +} + static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu) { bool e2h, tge; @@ -224,6 +252,20 @@ static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu) return is_hyp_ctxt(vcpu) && !vcpu_is_el2(vcpu); } +static inline bool is_nested_ctxt(struct kvm_vcpu *vcpu) +{ + return vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu); +} + +static inline bool vserror_state_is_nested(struct kvm_vcpu *vcpu) +{ + if (!is_nested_ctxt(vcpu)) + return false; + + return vcpu_el2_amo_is_set(vcpu) || + (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA); +} + /* * The layout of SPSR for an AArch32 state is different when observed from an * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 @@ -627,6 +669,9 @@ static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu) if (kvm_has_fpmr(kvm)) vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM; + + if (kvm_has_sctlr2(kvm)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_SCTLR2En; } } #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d27079968341..d373d555a69b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -523,6 +523,7 @@ enum vcpu_sysreg { /* Anything from this can be RES0/RES1 sanitised */ MARKER(__SANITISED_REG_START__), TCR2_EL2, /* Extended Translation Control Register (EL2) */ + SCTLR2_EL2, /* System Control Register 2 (EL2) */ MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ @@ -537,6 +538,7 @@ enum vcpu_sysreg { VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */ VNCR(TCR_EL1), /* Translation Control Register */ VNCR(TCR2_EL1), /* Extended Translation Control Register */ + VNCR(SCTLR2_EL1), /* System Control Register 2 */ VNCR(ESR_EL1), /* Exception Syndrome Register */ VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */ VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */ @@ -565,6 +567,10 @@ enum vcpu_sysreg { VNCR(POR_EL1), /* Permission Overlay Register 1 (EL1) */ + /* FEAT_RAS registers */ + VNCR(VDISR_EL2), + VNCR(VSESR_EL2), + VNCR(HFGRTR_EL2), VNCR(HFGWTR_EL2), VNCR(HFGITR_EL2), @@ -817,7 +823,7 @@ struct kvm_vcpu_arch { u8 iflags; /* State flags for kernel bookkeeping, unused by the hypervisor code */ - u8 sflags; + u16 sflags; /* * Don't run the guest (internal implementation need). @@ -953,9 +959,21 @@ struct kvm_vcpu_arch { __vcpu_flags_preempt_enable(); \ } while (0) +#define __vcpu_test_and_clear_flag(v, flagset, f, m) \ + ({ \ + typeof(v->arch.flagset) set; \ + \ + set = __vcpu_get_flag(v, flagset, f, m); \ + __vcpu_clear_flag(v, flagset, f, m); \ + \ + set; \ + }) + #define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__) #define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__) #define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__) +#define vcpu_test_and_clear_flag(v, ...) \ + __vcpu_test_and_clear_flag((v), __VA_ARGS__) /* KVM_ARM_VCPU_INIT completed */ #define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(0)) @@ -1015,6 +1033,8 @@ struct kvm_vcpu_arch { #define IN_WFI __vcpu_single_flag(sflags, BIT(6)) /* KVM is currently emulating a nested ERET */ #define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(7)) +/* SError pending for nested guest */ +#define NESTED_SERROR_PENDING __vcpu_single_flag(sflags, BIT(8)) /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ @@ -1149,6 +1169,8 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) * System registers listed in the switch are not saved on every * exit from the guest but are only saved on vcpu_put. * + * SYSREGS_ON_CPU *MUST* be checked before using this helper. + * * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but * should never be listed below, because the guest cannot modify its * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's @@ -1186,6 +1208,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; case ZCR_EL1: *val = read_sysreg_s(SYS_ZCR_EL12); break; + case SCTLR2_EL1: *val = read_sysreg_s(SYS_SCTLR2_EL12); break; default: return false; } @@ -1200,6 +1223,8 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) * System registers listed in the switch are not restored on every * entry to the guest but are only restored on vcpu_load. * + * SYSREGS_ON_CPU *MUST* be checked before using this helper. + * * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but * should never be listed below, because the MPIDR should only be set * once, before running the VCPU, and never changed later. @@ -1236,6 +1261,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break; + case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break; default: return false; } @@ -1387,8 +1413,6 @@ static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch) return (vcpu_arch->steal.base != INVALID_GPA); } -void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); - struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); @@ -1666,6 +1690,12 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); #define kvm_has_s1poe(k) \ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) +#define kvm_has_ras(k) \ + (kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP)) + +#define kvm_has_sctlr2(k) \ + (kvm_has_feat((k), ID_AA64MMFR3_EL1, SCTLRX, IMP)) + static inline bool kvm_arch_has_irq_bypass(void) { return true; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b98ac6aa631f..ae563ebd6aee 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -371,6 +371,24 @@ static inline void kvm_fault_unlock(struct kvm *kvm) read_unlock(&kvm->mmu_lock); } +/* + * ARM64 KVM relies on a simple conversion from physaddr to a kernel + * virtual address (KVA) when it does cache maintenance as the CMO + * instructions work on virtual addresses. This is incompatible with + * VM_PFNMAP VMAs which may not have a kernel direct mapping to a + * virtual address. + * + * With S2FWB and CACHE DIC features, KVM need not do cache flushing + * and CMOs are NOP'd. This has the effect of no longer requiring a + * KVA for addresses mapped into the S2. The presence of these features + * are thus necessary to support cacheable S2 mapping of VM_PFNMAP. + */ +static inline bool kvm_supports_cacheable_pfnmap(void) +{ + return cpus_have_final_cap(ARM64_HAS_STAGE2_FWB) && + cpus_have_final_cap(ARM64_HAS_CACHE_DIC); +} + #ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); #else diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 0bd07ea068a1..7fd76f41c296 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -80,6 +80,8 @@ extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu); extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu); extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu); +extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu); +extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu); struct kvm_s2_trans { phys_addr_t output; diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 2510eec026f7..d48ef6d5abcc 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -50,10 +50,32 @@ struct seq_file; */ extern void smp_init_cpus(void); +enum ipi_msg_type { + IPI_RESCHEDULE, + IPI_CALL_FUNC, + IPI_CPU_STOP, + IPI_CPU_STOP_NMI, + IPI_TIMER, + IPI_IRQ_WORK, + NR_IPI, + /* + * Any enum >= NR_IPI and < MAX_IPI is special and not tracable + * with trace_ipi_* + */ + IPI_CPU_BACKTRACE = NR_IPI, + IPI_KGDB_ROUNDUP, + MAX_IPI +}; + /* * Register IPI interrupts with the arch SMP code */ -extern void set_smp_ipi_range(int ipi_base, int nr_ipi); +extern void set_smp_ipi_range_percpu(int ipi_base, int nr_ipi, int ncpus); + +static inline void set_smp_ipi_range(int ipi_base, int n) +{ + set_smp_ipi_range_percpu(ipi_base, n, 0); +} /* * Called from the secondary holding pen, this is the secondary CPU entry point. diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f1bb0d10c39a..948007cd3684 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -113,10 +113,14 @@ /* Register-based PAN access, for save/restore purposes */ #define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3) -#define __SYS_BARRIER_INSN(CRm, op2, Rt) \ - __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) +#define __SYS_BARRIER_INSN(op0, op1, CRn, CRm, op2, Rt) \ + __emit_inst(0xd5000000 | \ + sys_insn((op0), (op1), (CRn), (CRm), (op2)) | \ + ((Rt) & 0x1f)) -#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31) +#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 3, 3, 0, 7, 31) +#define GSB_SYS_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 0, 31) +#define GSB_ACK_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 1, 31) /* Data cache zero operations */ #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) @@ -1078,6 +1082,67 @@ #define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \ GCS_CAP_VALID_TOKEN) +/* + * Definitions for GICv5 instructions + */ +#define GICV5_OP_GIC_CDAFF sys_insn(1, 0, 12, 1, 3) +#define GICV5_OP_GIC_CDDI sys_insn(1, 0, 12, 2, 0) +#define GICV5_OP_GIC_CDDIS sys_insn(1, 0, 12, 1, 0) +#define GICV5_OP_GIC_CDHM sys_insn(1, 0, 12, 2, 1) +#define GICV5_OP_GIC_CDEN sys_insn(1, 0, 12, 1, 1) +#define GICV5_OP_GIC_CDEOI sys_insn(1, 0, 12, 1, 7) +#define GICV5_OP_GIC_CDPEND sys_insn(1, 0, 12, 1, 4) +#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2) +#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5) +#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0) + +/* Definitions for GIC CDAFF */ +#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32) +#define GICV5_GIC_CDAFF_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDAFF_IRM_MASK BIT_ULL(28) +#define GICV5_GIC_CDAFF_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDDI */ +#define GICV5_GIC_CDDI_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDDI_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDDIS */ +#define GICV5_GIC_CDDIS_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDDIS_TYPE(r) FIELD_GET(GICV5_GIC_CDDIS_TYPE_MASK, r) +#define GICV5_GIC_CDDIS_ID_MASK GENMASK_ULL(23, 0) +#define GICV5_GIC_CDDIS_ID(r) FIELD_GET(GICV5_GIC_CDDIS_ID_MASK, r) + +/* Definitions for GIC CDEN */ +#define GICV5_GIC_CDEN_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDEN_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDHM */ +#define GICV5_GIC_CDHM_HM_MASK BIT_ULL(32) +#define GICV5_GIC_CDHM_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDHM_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDPEND */ +#define GICV5_GIC_CDPEND_PENDING_MASK BIT_ULL(32) +#define GICV5_GIC_CDPEND_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDPEND_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDPRI */ +#define GICV5_GIC_CDPRI_PRIORITY_MASK GENMASK_ULL(39, 35) +#define GICV5_GIC_CDPRI_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDPRI_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDRCFG */ +#define GICV5_GIC_CDRCFG_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDRCFG_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GICR CDIA */ +#define GICV5_GIC_CDIA_VALID_MASK BIT_ULL(32) +#define GICV5_GICR_CDIA_VALID(r) FIELD_GET(GICV5_GIC_CDIA_VALID_MASK, r) +#define GICV5_GIC_CDIA_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDIA_ID_MASK GENMASK_ULL(23, 0) + +#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn) +#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn) #define ARM64_FEATURE_FIELD_BITS 4 diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h index 6f556e993644..f6ec500ad3fa 100644 --- a/arch/arm64/include/asm/vncr_mapping.h +++ b/arch/arm64/include/asm/vncr_mapping.h @@ -51,6 +51,7 @@ #define VNCR_SP_EL1 0x240 #define VNCR_VBAR_EL1 0x250 #define VNCR_TCR2_EL1 0x270 +#define VNCR_SCTLR2_EL1 0x278 #define VNCR_PIRE0_EL1 0x290 #define VNCR_PIR_EL1 0x2A0 #define VNCR_POR_EL1 0x2A8 @@ -84,6 +85,7 @@ #define VNCR_ICH_HCR_EL2 0x4C0 #define VNCR_ICH_VMCR_EL2 0x4C8 #define VNCR_VDISR_EL2 0x500 +#define VNCR_VSESR_EL2 0x508 #define VNCR_PMBLIMITR_EL1 0x800 #define VNCR_PMBPTR_EL1 0x810 #define VNCR_PMBSR_EL1 0x820 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b34044e20128..4dece9ca68bc 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -303,6 +303,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_DF2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_frac_SHIFT, 4, 0), @@ -500,6 +501,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE), FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_SCTLRX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2296,11 +2298,11 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, int scope) { /* - * ARM64_HAS_GIC_CPUIF_SYSREGS has a lower index, and is a boot CPU + * ARM64_HAS_GICV3_CPUIF has a lower index, and is a boot CPU * feature, so will be detected earlier. */ - BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GIC_CPUIF_SYSREGS); - if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS)) + BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GICV3_CPUIF); + if (!cpus_have_cap(ARM64_HAS_GICV3_CPUIF)) return false; return enable_pseudo_nmi; @@ -2496,8 +2498,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_always, }, { - .desc = "GIC system register CPU interface", - .capability = ARM64_HAS_GIC_CPUIF_SYSREGS, + .desc = "GICv3 CPU interface", + .capability = ARM64_HAS_GICV3_CPUIF, .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = has_useable_gicv3_cpuif, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, GIC, IMP) @@ -3061,6 +3063,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_pmuv3, }, #endif + { + .desc = "SCTLR2", + .capability = ARM64_HAS_SCTLR2, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, SCTLRX, IMP) + }, + { + .desc = "GICv5 CPU interface", + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, + .capability = ARM64_HAS_GICV5_CPUIF, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, GCIE, IMP) + }, {}, }; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 3b3f6b56e733..a900835a3adf 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -64,26 +64,18 @@ struct secondary_data secondary_data; /* Number of CPUs which aren't online, but looping in kernel text. */ static int cpus_stuck_in_kernel; -enum ipi_msg_type { - IPI_RESCHEDULE, - IPI_CALL_FUNC, - IPI_CPU_STOP, - IPI_CPU_STOP_NMI, - IPI_TIMER, - IPI_IRQ_WORK, - NR_IPI, - /* - * Any enum >= NR_IPI and < MAX_IPI is special and not tracable - * with trace_ipi_* - */ - IPI_CPU_BACKTRACE = NR_IPI, - IPI_KGDB_ROUNDUP, - MAX_IPI -}; - static int ipi_irq_base __ro_after_init; static int nr_ipi __ro_after_init = NR_IPI; -static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init; + +struct ipi_descs { + struct irq_desc *descs[MAX_IPI]; +}; + +static DEFINE_PER_CPU_READ_MOSTLY(struct ipi_descs, pcpu_ipi_desc); + +#define get_ipi_desc(__cpu, __ipi) (per_cpu_ptr(&pcpu_ipi_desc, __cpu)->descs[__ipi]) + +static bool percpu_ipi_descs __ro_after_init; static bool crash_stop; @@ -844,7 +836,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu)); + seq_printf(p, "%10u ", irq_desc_kstat_cpu(get_ipi_desc(cpu, i), cpu)); seq_printf(p, " %s\n", ipi_types[i]); } @@ -917,9 +909,20 @@ static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs #endif } +static void arm64_send_ipi(const cpumask_t *mask, unsigned int nr) +{ + unsigned int cpu; + + if (!percpu_ipi_descs) + __ipi_send_mask(get_ipi_desc(0, nr), mask); + else + for_each_cpu(cpu, mask) + __ipi_send_single(get_ipi_desc(cpu, nr), cpu); +} + static void arm64_backtrace_ipi(cpumask_t *mask) { - __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask); + arm64_send_ipi(mask, IPI_CPU_BACKTRACE); } void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) @@ -944,7 +947,7 @@ void kgdb_roundup_cpus(void) if (cpu == this_cpu) continue; - __ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu); + __ipi_send_single(get_ipi_desc(cpu, IPI_KGDB_ROUNDUP), cpu); } } #endif @@ -1013,14 +1016,16 @@ static void do_handle_IPI(int ipinr) static irqreturn_t ipi_handler(int irq, void *data) { - do_handle_IPI(irq - ipi_irq_base); + unsigned int ipi = (irq - ipi_irq_base) % nr_ipi; + + do_handle_IPI(ipi); return IRQ_HANDLED; } static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) { trace_ipi_raise(target, ipi_types[ipinr]); - __ipi_send_mask(ipi_desc[ipinr], target); + arm64_send_ipi(target, ipinr); } static bool ipi_should_be_nmi(enum ipi_msg_type ipi) @@ -1046,11 +1051,15 @@ static void ipi_setup(int cpu) return; for (i = 0; i < nr_ipi; i++) { - if (ipi_should_be_nmi(i)) { - prepare_percpu_nmi(ipi_irq_base + i); - enable_percpu_nmi(ipi_irq_base + i, 0); + if (!percpu_ipi_descs) { + if (ipi_should_be_nmi(i)) { + prepare_percpu_nmi(ipi_irq_base + i); + enable_percpu_nmi(ipi_irq_base + i, 0); + } else { + enable_percpu_irq(ipi_irq_base + i, 0); + } } else { - enable_percpu_irq(ipi_irq_base + i, 0); + enable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i))); } } } @@ -1064,44 +1073,77 @@ static void ipi_teardown(int cpu) return; for (i = 0; i < nr_ipi; i++) { - if (ipi_should_be_nmi(i)) { - disable_percpu_nmi(ipi_irq_base + i); - teardown_percpu_nmi(ipi_irq_base + i); + if (!percpu_ipi_descs) { + if (ipi_should_be_nmi(i)) { + disable_percpu_nmi(ipi_irq_base + i); + teardown_percpu_nmi(ipi_irq_base + i); + } else { + disable_percpu_irq(ipi_irq_base + i); + } } else { - disable_percpu_irq(ipi_irq_base + i); + disable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i))); } } } #endif -void __init set_smp_ipi_range(int ipi_base, int n) +static void ipi_setup_sgi(int ipi) { - int i; + int err, irq, cpu; - WARN_ON(n < MAX_IPI); - nr_ipi = min(n, MAX_IPI); + irq = ipi_irq_base + ipi; - for (i = 0; i < nr_ipi; i++) { - int err; + if (ipi_should_be_nmi(ipi)) { + err = request_percpu_nmi(irq, ipi_handler, "IPI", &irq_stat); + WARN(err, "Could not request IRQ %d as NMI, err=%d\n", irq, err); + } else { + err = request_percpu_irq(irq, ipi_handler, "IPI", &irq_stat); + WARN(err, "Could not request IRQ %d as IRQ, err=%d\n", irq, err); + } - if (ipi_should_be_nmi(i)) { - err = request_percpu_nmi(ipi_base + i, ipi_handler, - "IPI", &irq_stat); - WARN(err, "Could not request IPI %d as NMI, err=%d\n", - i, err); - } else { - err = request_percpu_irq(ipi_base + i, ipi_handler, - "IPI", &irq_stat); - WARN(err, "Could not request IPI %d as IRQ, err=%d\n", - i, err); - } + for_each_possible_cpu(cpu) + get_ipi_desc(cpu, ipi) = irq_to_desc(irq); + + irq_set_status_flags(irq, IRQ_HIDDEN); +} + +static void ipi_setup_lpi(int ipi, int ncpus) +{ + for (int cpu = 0; cpu < ncpus; cpu++) { + int err, irq; + + irq = ipi_irq_base + (cpu * nr_ipi) + ipi; + + err = irq_force_affinity(irq, cpumask_of(cpu)); + WARN(err, "Could not force affinity IRQ %d, err=%d\n", irq, err); + + err = request_irq(irq, ipi_handler, IRQF_NO_AUTOEN, "IPI", + NULL); + WARN(err, "Could not request IRQ %d, err=%d\n", irq, err); + + irq_set_status_flags(irq, (IRQ_HIDDEN | IRQ_NO_BALANCING_MASK)); - ipi_desc[i] = irq_to_desc(ipi_base + i); - irq_set_status_flags(ipi_base + i, IRQ_HIDDEN); + get_ipi_desc(cpu, ipi) = irq_to_desc(irq); } +} + +void __init set_smp_ipi_range_percpu(int ipi_base, int n, int ncpus) +{ + int i; + + WARN_ON(n < MAX_IPI); + nr_ipi = min(n, MAX_IPI); + percpu_ipi_descs = !!ncpus; ipi_irq_base = ipi_base; + for (i = 0; i < nr_ipi; i++) { + if (!percpu_ipi_descs) + ipi_setup_sgi(i); + else + ipi_setup_lpi(i, ncpus); + } + /* Setup the boot CPU immediately */ ipi_setup(smp_processor_id()); } diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 7c329e01c557..3ebc0570345c 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -23,7 +23,8 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ - vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o + vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o \ + vgic/vgic-v5.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 701ea10a63f1..dbd74e4885e2 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -830,7 +830,7 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but * not both). This simplifies the handling of the EL1NV* bits. */ - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + if (is_nested_ctxt(vcpu)) { u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); /* Use the VHE format for mental sanity */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 38a91bb5d4c7..7a1a8210ff91 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -408,6 +408,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES: r = BIT(0); break; + case KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED: + if (!kvm) + r = -EINVAL; + else + r = kvm_supports_cacheable_pfnmap(); + break; + default: r = 0; } @@ -521,7 +528,7 @@ static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) * Either we're running an L2 guest, and the API/APK bits come * from L1's HCR_EL2, or API/APK are both set. */ - if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) { + if (unlikely(is_nested_ctxt(vcpu))) { u64 val; val = __vcpu_sys_reg(vcpu, HCR_EL2); @@ -740,7 +747,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, */ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); + bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF | HCR_VSE); + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); } @@ -1187,6 +1195,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ preempt_disable(); + kvm_nested_flush_hwstate(vcpu); + if (kvm_vcpu_has_pmu(vcpu)) kvm_pmu_flush_hwstate(vcpu); @@ -1286,6 +1296,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* Exit types that need handling before we can be preempted */ handle_exit_early(vcpu, ret); + kvm_nested_sync_hwstate(vcpu); + preempt_enable(); /* diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index a25be111cd8f..0e5610533949 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1047,34 +1047,51 @@ static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu, idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc); - switch (wi->regime) { - case TR_EL10: - pov_perms = perm_idx(vcpu, POR_EL1, idx); - uov_perms = perm_idx(vcpu, POR_EL0, idx); - break; - case TR_EL20: - pov_perms = perm_idx(vcpu, POR_EL2, idx); - uov_perms = perm_idx(vcpu, POR_EL0, idx); - break; - case TR_EL2: - pov_perms = perm_idx(vcpu, POR_EL2, idx); - uov_perms = 0; - break; - } + if (wr->pov) { + switch (wi->regime) { + case TR_EL10: + pov_perms = perm_idx(vcpu, POR_EL1, idx); + break; + case TR_EL20: + pov_perms = perm_idx(vcpu, POR_EL2, idx); + break; + case TR_EL2: + pov_perms = perm_idx(vcpu, POR_EL2, idx); + break; + } + + if (pov_perms & ~POE_RWX) + pov_perms = POE_NONE; - if (pov_perms & ~POE_RWX) - pov_perms = POE_NONE; + /* R_QXXPC, S1PrivOverflow enabled */ + if (wr->pwxn && (pov_perms & POE_X)) + pov_perms &= ~POE_W; - if (wi->poe && wr->pov) { wr->pr &= pov_perms & POE_R; wr->pw &= pov_perms & POE_W; wr->px &= pov_perms & POE_X; } - if (uov_perms & ~POE_RWX) - uov_perms = POE_NONE; + if (wr->uov) { + switch (wi->regime) { + case TR_EL10: + uov_perms = perm_idx(vcpu, POR_EL0, idx); + break; + case TR_EL20: + uov_perms = perm_idx(vcpu, POR_EL0, idx); + break; + case TR_EL2: + uov_perms = 0; + break; + } + + if (uov_perms & ~POE_RWX) + uov_perms = POE_NONE; + + /* R_NPBXC, S1UnprivOverlay enabled */ + if (wr->uwxn && (uov_perms & POE_X)) + uov_perms &= ~POE_W; - if (wi->e0poe && wr->uov) { wr->ur &= uov_perms & POE_R; wr->uw &= uov_perms & POE_W; wr->ux &= uov_perms & POE_X; @@ -1095,24 +1112,15 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu, if (!wi->hpd) compute_s1_hierarchical_permissions(vcpu, wi, wr); - if (wi->poe || wi->e0poe) - compute_s1_overlay_permissions(vcpu, wi, wr); + compute_s1_overlay_permissions(vcpu, wi, wr); - /* R_QXXPC */ - if (wr->pwxn) { - if (!wr->pov && wr->pw) - wr->px = false; - if (wr->pov && wr->px) - wr->pw = false; - } + /* R_QXXPC, S1PrivOverlay disabled */ + if (!wr->pov) + wr->px &= !(wr->pwxn && wr->pw); - /* R_NPBXC */ - if (wr->uwxn) { - if (!wr->uov && wr->uw) - wr->ux = false; - if (wr->uov && wr->ux) - wr->uw = false; - } + /* R_NPBXC, S1UnprivOverlay disabled */ + if (!wr->uov) + wr->ux &= !(wr->uwxn && wr->uw); pan = wi->pan && (wr->ur || wr->uw || (pan3_enabled(vcpu, wi->regime) && wr->ux)); diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index c829f9a385cc..da66c4a14775 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -131,6 +131,8 @@ struct reg_bits_to_feat_map { #define FEAT_SPMU ID_AA64DFR1_EL1, SPMU, IMP #define FEAT_SPE_nVM ID_AA64DFR2_EL1, SPE_nVM, IMP #define FEAT_STEP2 ID_AA64DFR2_EL1, STEP, IMP +#define FEAT_SYSREG128 ID_AA64ISAR2_EL1, SYSREG_128, IMP +#define FEAT_CPA2 ID_AA64ISAR3_EL1, CPA, CPA2 #define FEAT_ASID2 ID_AA64MMFR4_EL1, ASID2, IMP #define FEAT_MEC ID_AA64MMFR3_EL1, MEC, IMP #define FEAT_HAFT ID_AA64MMFR1_EL1, HAFDBS, HAFT @@ -902,6 +904,23 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = { NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h), }; +static const struct reg_bits_to_feat_map sctlr2_feat_map[] = { + NEEDS_FEAT(SCTLR2_EL1_NMEA | + SCTLR2_EL1_EASE, + FEAT_DoubleFault2), + NEEDS_FEAT(SCTLR2_EL1_EnADERR, feat_aderr), + NEEDS_FEAT(SCTLR2_EL1_EnANERR, feat_anerr), + NEEDS_FEAT(SCTLR2_EL1_EnIDCP128, FEAT_SYSREG128), + NEEDS_FEAT(SCTLR2_EL1_EnPACM | + SCTLR2_EL1_EnPACM0, + feat_pauth_lr), + NEEDS_FEAT(SCTLR2_EL1_CPTA | + SCTLR2_EL1_CPTA0 | + SCTLR2_EL1_CPTM | + SCTLR2_EL1_CPTM0, + FEAT_CPA2), +}; + static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = { NEEDS_FEAT(TCR2_EL2_FNG1 | TCR2_EL2_FNG0 | @@ -1060,6 +1079,8 @@ void __init check_feature_map(void) __HCRX_EL2_RES0, "HCRX_EL2"); check_feat_map(hcr_feat_map, ARRAY_SIZE(hcr_feat_map), HCR_EL2_RES0, "HCR_EL2"); + check_feat_map(sctlr2_feat_map, ARRAY_SIZE(sctlr2_feat_map), + SCTLR2_EL1_RES0, "SCTLR2_EL1"); check_feat_map(tcr2_el2_feat_map, ARRAY_SIZE(tcr2_el2_feat_map), TCR2_EL2_RES0, "TCR2_EL2"); check_feat_map(sctlr_el1_feat_map, ARRAY_SIZE(sctlr_el1_feat_map), @@ -1280,6 +1301,13 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r *res0 |= HCR_EL2_RES0 | (mask & ~fixed); *res1 = HCR_EL2_RES1 | (mask & fixed); break; + case SCTLR2_EL1: + case SCTLR2_EL2: + *res0 = compute_res0_bits(kvm, sctlr2_feat_map, + ARRAY_SIZE(sctlr2_feat_map), 0, 0); + *res0 |= SCTLR2_EL1_RES0; + *res1 = SCTLR2_EL1_RES1; + break; case TCR2_EL2: *res0 = compute_res0_bits(kvm, tcr2_el2_feat_map, ARRAY_SIZE(tcr2_el2_feat_map), 0, 0); diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 3a384e9660b8..90cb4b7ae0ff 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -88,6 +88,7 @@ enum cgt_group_id { CGT_HCRX_EnFPM, CGT_HCRX_TCR2En, + CGT_HCRX_SCTLR2En, CGT_CNTHCTL_EL1TVT, CGT_CNTHCTL_EL1TVCT, @@ -108,6 +109,7 @@ enum cgt_group_id { CGT_HCR_TTLB_TTLBOS, CGT_HCR_TVM_TRVM, CGT_HCR_TVM_TRVM_HCRX_TCR2En, + CGT_HCR_TVM_TRVM_HCRX_SCTLR2En, CGT_HCR_TPU_TICAB, CGT_HCR_TPU_TOCU, CGT_HCR_NV1_nNV2_ENSCXT, @@ -398,6 +400,12 @@ static const struct trap_bits coarse_trap_bits[] = { .mask = HCRX_EL2_TCR2En, .behaviour = BEHAVE_FORWARD_RW, }, + [CGT_HCRX_SCTLR2En] = { + .index = HCRX_EL2, + .value = 0, + .mask = HCRX_EL2_SCTLR2En, + .behaviour = BEHAVE_FORWARD_RW, + }, [CGT_CNTHCTL_EL1TVT] = { .index = CNTHCTL_EL2, .value = CNTHCTL_EL1TVT, @@ -449,6 +457,8 @@ static const enum cgt_group_id *coarse_control_combo[] = { MCB(CGT_HCR_TVM_TRVM, CGT_HCR_TVM, CGT_HCR_TRVM), MCB(CGT_HCR_TVM_TRVM_HCRX_TCR2En, CGT_HCR_TVM, CGT_HCR_TRVM, CGT_HCRX_TCR2En), + MCB(CGT_HCR_TVM_TRVM_HCRX_SCTLR2En, + CGT_HCR_TVM, CGT_HCR_TRVM, CGT_HCRX_SCTLR2En), MCB(CGT_HCR_TPU_TICAB, CGT_HCR_TPU, CGT_HCR_TICAB), MCB(CGT_HCR_TPU_TOCU, CGT_HCR_TPU, CGT_HCR_TOCU), MCB(CGT_HCR_NV1_nNV2_ENSCXT, CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT), @@ -782,6 +792,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(OP_TLBI_RVALE1OSNXS, CGT_HCR_TTLB_TTLBOS), SR_TRAP(OP_TLBI_RVAALE1OSNXS, CGT_HCR_TTLB_TTLBOS), SR_TRAP(SYS_SCTLR_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_SCTLR2_EL1, CGT_HCR_TVM_TRVM_HCRX_SCTLR2En), SR_TRAP(SYS_TTBR0_EL1, CGT_HCR_TVM_TRVM), SR_TRAP(SYS_TTBR1_EL1, CGT_HCR_TVM_TRVM), SR_TRAP(SYS_TCR_EL1, CGT_HCR_TVM_TRVM), @@ -1354,6 +1365,7 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { SR_FGT(SYS_SCXTNUM_EL0, HFGRTR, SCXTNUM_EL0, 1), SR_FGT(SYS_SCXTNUM_EL1, HFGRTR, SCXTNUM_EL1, 1), SR_FGT(SYS_SCTLR_EL1, HFGRTR, SCTLR_EL1, 1), + SR_FGT(SYS_SCTLR2_EL1, HFGRTR, SCTLR_EL1, 1), SR_FGT(SYS_REVIDR_EL1, HFGRTR, REVIDR_EL1, 1), SR_FGT(SYS_PAR_EL1, HFGRTR, PAR_EL1, 1), SR_FGT(SYS_MPIDR_EL1, HFGRTR, MPIDR_EL1, 1), @@ -2592,13 +2604,8 @@ inject: static bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg, u64 control_bit) { - bool control_bit_set; - - if (!vcpu_has_nv(vcpu)) - return false; - - control_bit_set = __vcpu_sys_reg(vcpu, reg) & control_bit; - if (!is_hyp_ctxt(vcpu) && control_bit_set) { + if (is_nested_ctxt(vcpu) && + (__vcpu_sys_reg(vcpu, reg) & control_bit)) { kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); return true; } @@ -2719,6 +2726,9 @@ static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2, case except_type_irq: kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_IRQ); break; + case except_type_serror: + kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SERR); + break; default: WARN_ONCE(1, "Unsupported EL2 exception injection %d\n", type); } @@ -2816,3 +2826,28 @@ int kvm_inject_nested_irq(struct kvm_vcpu *vcpu) /* esr_el2 value doesn't matter for exits due to irqs. */ return kvm_inject_nested(vcpu, 0, except_type_irq); } + +int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) +{ + u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, + iabt ? ESR_ELx_EC_IABT_LOW : ESR_ELx_EC_DABT_LOW); + esr |= ESR_ELx_FSC_EXTABT | ESR_ELx_IL; + + vcpu_write_sys_reg(vcpu, FAR_EL2, addr); + + if (__vcpu_sys_reg(vcpu, SCTLR2_EL2) & SCTLR2_EL1_EASE) + return kvm_inject_nested(vcpu, esr, except_type_serror); + + return kvm_inject_nested_sync(vcpu, esr); +} + +int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr) +{ + /* + * Hardware sets up the EC field when propagating ESR as a result of + * vSError injection. Manually populate EC for an emulated SError + * exception. + */ + esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SERROR); + return kvm_inject_nested(vcpu, esr, except_type_serror); +} diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2196979a24a3..16ba5e9ac86c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -818,8 +818,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { - events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE); events->exception.serror_has_esr = cpus_have_final_cap(ARM64_HAS_RAS_EXTN); + events->exception.serror_pending = (vcpu->arch.hcr_el2 & HCR_VSE) || + vcpu_get_flag(vcpu, NESTED_SERROR_PENDING); if (events->exception.serror_pending && events->exception.serror_has_esr) events->exception.serror_esr = vcpu_get_vsesr(vcpu); @@ -833,29 +834,62 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, return 0; } +static void commit_pending_events(struct kvm_vcpu *vcpu) +{ + if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION)) + return; + + /* + * Reset the MMIO emulation state to avoid stepping PC after emulating + * the exception entry. + */ + vcpu->mmio_needed = false; + kvm_call_hyp(__kvm_adjust_pc, vcpu); +} + int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { bool serror_pending = events->exception.serror_pending; bool has_esr = events->exception.serror_has_esr; bool ext_dabt_pending = events->exception.ext_dabt_pending; + u64 esr = events->exception.serror_esr; + int ret = 0; - if (serror_pending && has_esr) { - if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - return -EINVAL; - - if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK)) - kvm_set_sei_esr(vcpu, events->exception.serror_esr); - else - return -EINVAL; - } else if (serror_pending) { - kvm_inject_vabt(vcpu); + /* + * Immediately commit the pending SEA to the vCPU's architectural + * state which is necessary since we do not return a pending SEA + * to userspace via KVM_GET_VCPU_EVENTS. + */ + if (ext_dabt_pending) { + ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + commit_pending_events(vcpu); } - if (ext_dabt_pending) - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + if (ret < 0) + return ret; - return 0; + if (!serror_pending) + return 0; + + if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && has_esr) + return -EINVAL; + + if (has_esr && (esr & ~ESR_ELx_ISS_MASK)) + return -EINVAL; + + if (has_esr) + ret = kvm_inject_serror_esr(vcpu, esr); + else + ret = kvm_inject_serror(vcpu); + + /* + * We could've decided that the SError is due for immediate software + * injection; commit the exception in case userspace decides it wants + * to inject more exceptions for some strange reason. + */ + commit_pending_events(vcpu); + return (ret < 0) ? ret : 0; } u32 __attribute_const__ kvm_target_cpu(void) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 453266c96481..a598072f36d2 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -32,7 +32,7 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *); static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr) { if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr)) - kvm_inject_vabt(vcpu); + kvm_inject_serror(vcpu); } static int handle_hvc(struct kvm_vcpu *vcpu) @@ -252,7 +252,7 @@ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu) return 1; } - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + if (is_nested_ctxt(vcpu)) { kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); return 1; } @@ -311,12 +311,11 @@ static int kvm_handle_gcs(struct kvm_vcpu *vcpu) static int handle_other(struct kvm_vcpu *vcpu) { - bool is_l2 = vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu); + bool allowed, fwd = is_nested_ctxt(vcpu); u64 hcrx = __vcpu_sys_reg(vcpu, HCRX_EL2); u64 esr = kvm_vcpu_get_esr(vcpu); u64 iss = ESR_ELx_ISS(esr); struct kvm *kvm = vcpu->kvm; - bool allowed, fwd = false; /* * We only trap for two reasons: @@ -335,28 +334,23 @@ static int handle_other(struct kvm_vcpu *vcpu) switch (iss) { case ESR_ELx_ISS_OTHER_ST64BV: allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V); - if (is_l2) - fwd = !(hcrx & HCRX_EL2_EnASR); + fwd &= !(hcrx & HCRX_EL2_EnASR); break; case ESR_ELx_ISS_OTHER_ST64BV0: allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA); - if (is_l2) - fwd = !(hcrx & HCRX_EL2_EnAS0); + fwd &= !(hcrx & HCRX_EL2_EnAS0); break; case ESR_ELx_ISS_OTHER_LDST64B: allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64); - if (is_l2) - fwd = !(hcrx & HCRX_EL2_EnALS); + fwd &= !(hcrx & HCRX_EL2_EnALS); break; case ESR_ELx_ISS_OTHER_TSBCSYNC: allowed = kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, TRBE_V1P1); - if (is_l2) - fwd = (__vcpu_sys_reg(vcpu, HFGITR2_EL2) & HFGITR2_EL2_TSBCSYNC); + fwd &= (__vcpu_sys_reg(vcpu, HFGITR2_EL2) & HFGITR2_EL2_TSBCSYNC); break; case ESR_ELx_ISS_OTHER_PSBCSYNC: allowed = kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P5); - if (is_l2) - fwd = (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_PSBCSYNC); + fwd &= (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_PSBCSYNC); break; default: /* Clearly, we're missing something. */ @@ -496,7 +490,7 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index) kvm_handle_guest_serror(vcpu, disr_to_esr(disr)); } else { - kvm_inject_vabt(vcpu); + kvm_inject_serror(vcpu); } return; diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 6a2a899a344e..95d186e0bf54 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -26,7 +26,8 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) if (unlikely(vcpu_has_nv(vcpu))) return vcpu_read_sys_reg(vcpu, reg); - else if (__vcpu_read_sys_reg_from_cpu(reg, &val)) + else if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) && + __vcpu_read_sys_reg_from_cpu(reg, &val)) return val; return __vcpu_sys_reg(vcpu, reg); @@ -36,7 +37,8 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) { if (unlikely(vcpu_has_nv(vcpu))) vcpu_write_sys_reg(vcpu, val, reg); - else if (!__vcpu_write_sys_reg_to_cpu(val, reg)) + else if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU) || + !__vcpu_write_sys_reg_to_cpu(val, reg)) __vcpu_assign_sys_reg(vcpu, reg, val); } @@ -339,6 +341,10 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu) enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); break; + case unpack_vcpu_flag(EXCEPT_AA64_EL1_SERR): + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_serror); + break; + case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC): enter_exception64(vcpu, PSR_MODE_EL2h, except_type_sync); break; @@ -347,9 +353,13 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu) enter_exception64(vcpu, PSR_MODE_EL2h, except_type_irq); break; + case unpack_vcpu_flag(EXCEPT_AA64_EL2_SERR): + enter_exception64(vcpu, PSR_MODE_EL2h, except_type_serror); + break; + default: /* - * Only EL1_SYNC and EL2_{SYNC,IRQ} makes + * Only EL1_{SYNC,SERR} and EL2_{SYNC,IRQ,SERR} makes * sense so far. Everything else gets silently * ignored. */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 2ad57b117385..84ec4e100fbb 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -298,7 +298,7 @@ static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) u64 val; \ \ ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) \ + if (is_nested_ctxt(vcpu)) \ compute_clr_set(vcpu, reg, c, s); \ \ compute_undef_clr_set(vcpu, kvm, reg, c, s); \ @@ -436,7 +436,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) if (cpus_have_final_cap(ARM64_HAS_HCX)) { u64 hcrx = vcpu->arch.hcrx_el2; - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + if (is_nested_ctxt(vcpu)) { u64 val = __vcpu_sys_reg(vcpu, HCRX_EL2); hcrx |= val & __HCRX_EL2_MASK; hcrx &= ~(~val & __HCRX_EL2_nMASK); @@ -476,21 +476,56 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr) write_sysreg_hcr(hcr); - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) - write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) { + u64 vsesr; + + /* + * When HCR_EL2.AMO is set, physical SErrors are taken to EL2 + * and vSError injection is enabled for EL1. Conveniently, for + * NV this means that it is never the case where a 'physical' + * SError (injected by KVM or userspace) and vSError are + * deliverable to the same context. + * + * As such, we can trivially select between the host or guest's + * VSESR_EL2. Except for the case that FEAT_RAS hasn't been + * exposed to the guest, where ESR propagation in hardware + * occurs unconditionally. + * + * Paper over the architectural wart and use an IMPLEMENTATION + * DEFINED ESR value in case FEAT_RAS is hidden from the guest. + */ + if (!vserror_state_is_nested(vcpu)) + vsesr = vcpu->arch.vsesr_el2; + else if (kvm_has_ras(kern_hyp_va(vcpu->kvm))) + vsesr = __vcpu_sys_reg(vcpu, VSESR_EL2); + else + vsesr = ESR_ELx_ISV; + + write_sysreg_s(vsesr, SYS_VSESR_EL2); + } } static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) { + u64 *hcr; + + if (vserror_state_is_nested(vcpu)) + hcr = __ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2); + else + hcr = &vcpu->arch.hcr_el2; + /* * If we pended a virtual abort, preserve it until it gets * cleared. See D1.14.3 (Virtual Interrupts) for details, but * the crucial bit is "On taking a vSError interrupt, * HCR_EL2.VSE is cleared to 0." + * + * Additionally, when in a nested context we need to propagate the + * updated state to the guest hypervisor's HCR_EL2. */ - if (vcpu->arch.hcr_el2 & HCR_VSE) { - vcpu->arch.hcr_el2 &= ~HCR_VSE; - vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; + if (*hcr & HCR_VSE) { + *hcr &= ~HCR_VSE; + *hcr |= read_sysreg(hcr_el2) & HCR_VSE; } } @@ -531,7 +566,7 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) * nested guest, as the guest hypervisor could select a smaller VL. Slap * that into hardware before wrapping up. */ - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) + if (is_nested_ctxt(vcpu)) sve_cond_update_zcr_vq(__vcpu_sys_reg(vcpu, ZCR_EL2), SYS_ZCR_EL2); write_sysreg_el1(__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)), SYS_ZCR); @@ -557,7 +592,7 @@ static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu) if (vcpu_has_sve(vcpu)) { /* A guest hypervisor may restrict the effective max VL. */ - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) + if (is_nested_ctxt(vcpu)) zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2); else zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 4d0dbea4c56f..a17cbe7582de 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -109,6 +109,28 @@ static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt) return kvm_has_s1poe(kern_hyp_va(vcpu->kvm)); } +static inline bool ctxt_has_ras(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu; + + if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + return false; + + vcpu = ctxt_to_vcpu(ctxt); + return kvm_has_ras(kern_hyp_va(vcpu->kvm)); +} + +static inline bool ctxt_has_sctlr2(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu; + + if (!cpus_have_final_cap(ARM64_HAS_SCTLR2)) + return false; + + vcpu = ctxt_to_vcpu(ctxt); + return kvm_has_sctlr2(kern_hyp_va(vcpu->kvm)); +} + static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); @@ -147,6 +169,9 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); + + if (ctxt_has_sctlr2(ctxt)) + ctxt_sys_reg(ctxt, SCTLR2_EL1) = read_sysreg_el1(SYS_SCTLR2); } static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) @@ -159,8 +184,13 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) if (!has_vhe() && ctxt->__hyp_running_vcpu) ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + return; + + if (!vserror_state_is_nested(ctxt_to_vcpu(ctxt))) ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); + else if (ctxt_has_ras(ctxt)) + ctxt_sys_reg(ctxt, VDISR_EL2) = read_sysreg_s(SYS_VDISR_EL2); } static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) @@ -252,6 +282,9 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt, write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR); + + if (ctxt_has_sctlr2(ctxt)) + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR2_EL1), SYS_SCTLR2); } /* Read the VCPU state's PSTATE, but translate (v)EL2 to EL1. */ @@ -275,6 +308,7 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx { u64 pstate = to_hw_pstate(ctxt); u64 mode = pstate & PSR_AA32_MODE_MASK; + u64 vdisr; /* * Safety check to ensure we're setting the CPU up to enter the guest @@ -293,8 +327,17 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx write_sysreg_el2(ctxt->regs.pc, SYS_ELR); write_sysreg_el2(pstate, SYS_SPSR); - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2); + if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + return; + + if (!vserror_state_is_nested(ctxt_to_vcpu(ctxt))) + vdisr = ctxt_sys_reg(ctxt, DISR_EL1); + else if (ctxt_has_ras(ctxt)) + vdisr = ctxt_sys_reg(ctxt, VDISR_EL2); + else + vdisr = 0; + + write_sysreg_s(vdisr, SYS_VDISR_EL2); } static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index f162b0df5cae..d81275790e69 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -296,12 +296,19 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) } /* - * Prevent the guest from touching the ICC_SRE_EL1 system - * register. Note that this may not have any effect, as - * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation. + * GICv5 BET0 FEAT_GCIE_LEGACY doesn't include ICC_SRE_EL2. This is due + * to be relaxed in a future spec release, at which point this in + * condition can be dropped. */ - write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, - ICC_SRE_EL2); + if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) { + /* + * Prevent the guest from touching the ICC_SRE_EL1 system + * register. Note that this may not have any effect, as + * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation. + */ + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); + } /* * If we need to trap system registers, we must write @@ -322,8 +329,14 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); } - val = read_gicreg(ICC_SRE_EL2); - write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + /* + * Can be dropped in the future when GICv5 spec is relaxed. See comment + * above. + */ + if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) { + val = read_gicreg(ICC_SRE_EL2); + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + } if (!cpu_if->vgic_sre) { /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ @@ -423,10 +436,20 @@ void __vgic_v3_init_lrs(void) */ u64 __vgic_v3_get_gic_config(void) { - u64 val, sre = read_gicreg(ICC_SRE_EL1); + u64 val, sre; unsigned long flags = 0; /* + * In compat mode, we cannot access ICC_SRE_EL1 at any EL + * other than EL1 itself; just return the + * ICH_VTR_EL2. ICC_IDR0_EL1 is only implemented on a GICv5 + * system, so we first check if we have GICv5 support. + */ + if (cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) + return read_gicreg(ICH_VTR_EL2); + + sre = read_gicreg(ICC_SRE_EL1); + /* * To check whether we have a MMIO-based (GICv2 compatible) * CPU interface, we need to disable the system register * view. @@ -471,6 +494,16 @@ u64 __vgic_v3_get_gic_config(void) return val; } +static void __vgic_v3_compat_mode_enable(void) +{ + if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) + return; + + sysreg_clear_set_s(SYS_ICH_VCTLR_EL2, 0, ICH_VCTLR_EL2_V3); + /* Wait for V3 to become enabled */ + isb(); +} + static u64 __vgic_v3_read_vmcr(void) { return read_gicreg(ICH_VMCR_EL2); @@ -490,6 +523,8 @@ void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) { + __vgic_v3_compat_mode_enable(); + /* * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen * is dependent on ICC_SRE_EL1.SRE, and we have to perform the @@ -1050,7 +1085,7 @@ static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu, { u64 ich_hcr; - if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) + if (!is_nested_ctxt(vcpu)) return false; ich_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 477f1580ffea..e482181c6632 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -48,8 +48,7 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); static u64 __compute_hcr(struct kvm_vcpu *vcpu) { - u64 guest_hcr = __vcpu_sys_reg(vcpu, HCR_EL2); - u64 hcr = vcpu->arch.hcr_el2; + u64 guest_hcr, hcr = vcpu->arch.hcr_el2; if (!vcpu_has_nv(vcpu)) return hcr; @@ -68,10 +67,21 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu) if (!vcpu_el2_e2h_is_set(vcpu)) hcr |= HCR_NV1; + /* + * Nothing in HCR_EL2 should impact running in hypervisor + * context, apart from bits we have defined as RESx (E2H, + * HCD and co), or that cannot be set directly (the EXCLUDE + * bits). Given that we OR the guest's view with the host's, + * we can use the 0 value as the starting point, and only + * use the config-driven RES1 bits. + */ + guest_hcr = kvm_vcpu_apply_reg_masks(vcpu, HCR_EL2, 0); + write_sysreg_s(vcpu->arch.ctxt.vncr_array, SYS_VNCR_EL2); } else { host_data_clear_flag(VCPU_IN_HYP_CONTEXT); + guest_hcr = __vcpu_sys_reg(vcpu, HCR_EL2); if (guest_hcr & HCR_NV) { u64 va = __fix_to_virt(vncr_fixmap(smp_processor_id())); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 73e4bc7fde9e..f28c6cf4fe1b 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -77,6 +77,9 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) __vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1)); __vcpu_assign_sys_reg(vcpu, ELR_EL2, read_sysreg_el1(SYS_ELR)); __vcpu_assign_sys_reg(vcpu, SPSR_EL2, read_sysreg_el1(SYS_SPSR)); + + if (ctxt_has_sctlr2(&vcpu->arch.ctxt)) + __vcpu_assign_sys_reg(vcpu, SCTLR2_EL2, read_sysreg_el1(SYS_SCTLR2)); } static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) @@ -139,6 +142,9 @@ static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) write_sysreg(__vcpu_sys_reg(vcpu, SP_EL2), sp_el1); write_sysreg_el1(__vcpu_sys_reg(vcpu, ELR_EL2), SYS_ELR); write_sysreg_el1(__vcpu_sys_reg(vcpu, SPSR_EL2), SYS_SPSR); + + if (ctxt_has_sctlr2(&vcpu->arch.ctxt)) + write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR2_EL2), SYS_SCTLR2); } /* diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index a640e839848e..6745f38b64f9 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -15,13 +15,11 @@ #include <asm/kvm_nested.h> #include <asm/esr.h> -static void pend_sync_exception(struct kvm_vcpu *vcpu) +static unsigned int exception_target_el(struct kvm_vcpu *vcpu) { /* If not nesting, EL1 is the only possible exception target */ - if (likely(!vcpu_has_nv(vcpu))) { - kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); - return; - } + if (likely(!vcpu_has_nv(vcpu))) + return PSR_MODE_EL1h; /* * With NV, we need to pick between EL1 and EL2. Note that we @@ -32,26 +30,76 @@ static void pend_sync_exception(struct kvm_vcpu *vcpu) switch(*vcpu_cpsr(vcpu) & PSR_MODE_MASK) { case PSR_MODE_EL2h: case PSR_MODE_EL2t: - kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC); - break; + return PSR_MODE_EL2h; case PSR_MODE_EL1h: case PSR_MODE_EL1t: - kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); - break; + return PSR_MODE_EL1h; case PSR_MODE_EL0t: - if (vcpu_el2_tge_is_set(vcpu)) - kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC); - else - kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); - break; + return vcpu_el2_tge_is_set(vcpu) ? PSR_MODE_EL2h : PSR_MODE_EL1h; default: BUG(); } } -static bool match_target_el(struct kvm_vcpu *vcpu, unsigned long target) +static enum vcpu_sysreg exception_esr_elx(struct kvm_vcpu *vcpu) +{ + if (exception_target_el(vcpu) == PSR_MODE_EL2h) + return ESR_EL2; + + return ESR_EL1; +} + +static enum vcpu_sysreg exception_far_elx(struct kvm_vcpu *vcpu) +{ + if (exception_target_el(vcpu) == PSR_MODE_EL2h) + return FAR_EL2; + + return FAR_EL1; +} + +static void pend_sync_exception(struct kvm_vcpu *vcpu) +{ + if (exception_target_el(vcpu) == PSR_MODE_EL1h) + kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); + else + kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC); +} + +static void pend_serror_exception(struct kvm_vcpu *vcpu) { - return (vcpu_get_flag(vcpu, EXCEPT_MASK) == target); + if (exception_target_el(vcpu) == PSR_MODE_EL1h) + kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SERR); + else + kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SERR); +} + +static bool __effective_sctlr2_bit(struct kvm_vcpu *vcpu, unsigned int idx) +{ + u64 sctlr2; + + if (!kvm_has_sctlr2(vcpu->kvm)) + return false; + + if (is_nested_ctxt(vcpu) && + !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_SCTLR2En)) + return false; + + if (exception_target_el(vcpu) == PSR_MODE_EL1h) + sctlr2 = vcpu_read_sys_reg(vcpu, SCTLR2_EL1); + else + sctlr2 = vcpu_read_sys_reg(vcpu, SCTLR2_EL2); + + return sctlr2 & BIT(idx); +} + +static bool effective_sctlr2_ease(struct kvm_vcpu *vcpu) +{ + return __effective_sctlr2_bit(vcpu, SCTLR2_EL1_EASE_SHIFT); +} + +static bool effective_sctlr2_nmea(struct kvm_vcpu *vcpu) +{ + return __effective_sctlr2_bit(vcpu, SCTLR2_EL1_NMEA_SHIFT); } static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) @@ -60,7 +108,11 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u64 esr = 0; - pend_sync_exception(vcpu); + /* This delight is brought to you by FEAT_DoubleFault2. */ + if (effective_sctlr2_ease(vcpu)) + pend_serror_exception(vcpu); + else + pend_sync_exception(vcpu); /* * Build an {i,d}abort, depending on the level and the @@ -83,13 +135,8 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr esr |= ESR_ELx_FSC_EXTABT; - if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC))) { - vcpu_write_sys_reg(vcpu, addr, FAR_EL1); - vcpu_write_sys_reg(vcpu, esr, ESR_EL1); - } else { - vcpu_write_sys_reg(vcpu, addr, FAR_EL2); - vcpu_write_sys_reg(vcpu, esr, ESR_EL2); - } + vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu)); + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); } static void inject_undef64(struct kvm_vcpu *vcpu) @@ -105,10 +152,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) if (kvm_vcpu_trap_il_is32bit(vcpu)) esr |= ESR_ELx_IL; - if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC))) - vcpu_write_sys_reg(vcpu, esr, ESR_EL1); - else - vcpu_write_sys_reg(vcpu, esr, ESR_EL2); + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); } #define DFSR_FSC_EXTABT_LPAE 0x10 @@ -155,36 +199,35 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr) vcpu_write_sys_reg(vcpu, far, FAR_EL1); } -/** - * kvm_inject_dabt - inject a data abort into the guest - * @vcpu: The VCPU to receive the data abort - * @addr: The address to report in the DFAR - * - * It is assumed that this code is called from the VCPU thread and that the - * VCPU therefore is not currently executing guest code. - */ -void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) +static void __kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) { if (vcpu_el1_is_32bit(vcpu)) - inject_abt32(vcpu, false, addr); + inject_abt32(vcpu, iabt, addr); else - inject_abt64(vcpu, false, addr); + inject_abt64(vcpu, iabt, addr); } -/** - * kvm_inject_pabt - inject a prefetch abort into the guest - * @vcpu: The VCPU to receive the prefetch abort - * @addr: The address to report in the DFAR - * - * It is assumed that this code is called from the VCPU thread and that the - * VCPU therefore is not currently executing guest code. - */ -void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) +static bool kvm_sea_target_is_el2(struct kvm_vcpu *vcpu) { - if (vcpu_el1_is_32bit(vcpu)) - inject_abt32(vcpu, true, addr); - else - inject_abt64(vcpu, true, addr); + if (__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_TGE | HCR_TEA)) + return true; + + if (!vcpu_mode_priv(vcpu)) + return false; + + return (*vcpu_cpsr(vcpu) & PSR_A_BIT) && + (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA); +} + +int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) +{ + lockdep_assert_held(&vcpu->mutex); + + if (is_nested_ctxt(vcpu) && kvm_sea_target_is_el2(vcpu)) + return kvm_inject_nested_sea(vcpu, iabt, addr); + + __kvm_inject_sea(vcpu, iabt, addr); + return 1; } void kvm_inject_size_fault(struct kvm_vcpu *vcpu) @@ -194,10 +237,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu) addr = kvm_vcpu_get_fault_ipa(vcpu); addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); - if (kvm_vcpu_trap_is_iabt(vcpu)) - kvm_inject_pabt(vcpu, addr); - else - kvm_inject_dabt(vcpu, addr); + __kvm_inject_sea(vcpu, kvm_vcpu_trap_is_iabt(vcpu), addr); /* * If AArch64 or LPAE, set FSC to 0 to indicate an Address @@ -210,9 +250,9 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu) !(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE)) return; - esr = vcpu_read_sys_reg(vcpu, ESR_EL1); + esr = vcpu_read_sys_reg(vcpu, exception_esr_elx(vcpu)); esr &= ~GENMASK_ULL(5, 0); - vcpu_write_sys_reg(vcpu, esr, ESR_EL1); + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); } /** @@ -230,25 +270,70 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) inject_undef64(vcpu); } -void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr) +static bool serror_is_masked(struct kvm_vcpu *vcpu) { - vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK); - *vcpu_hcr(vcpu) |= HCR_VSE; + return (*vcpu_cpsr(vcpu) & PSR_A_BIT) && !effective_sctlr2_nmea(vcpu); } -/** - * kvm_inject_vabt - inject an async abort / SError into the guest - * @vcpu: The VCPU to receive the exception - * - * It is assumed that this code is called from the VCPU thread and that the - * VCPU therefore is not currently executing guest code. - * - * Systems with the RAS Extensions specify an imp-def ESR (ISV/IDS = 1) with - * the remaining ISS all-zeros so that this error is not interpreted as an - * uncategorized RAS error. Without the RAS Extensions we can't specify an ESR - * value, so the CPU generates an imp-def value. - */ -void kvm_inject_vabt(struct kvm_vcpu *vcpu) +static bool kvm_serror_target_is_el2(struct kvm_vcpu *vcpu) +{ + if (is_hyp_ctxt(vcpu) || vcpu_el2_amo_is_set(vcpu)) + return true; + + if (!(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA)) + return false; + + /* + * In another example where FEAT_DoubleFault2 is entirely backwards, + * "masked" as it relates to the routing effects of HCRX_EL2.TMEA + * doesn't consider SCTLR2_EL1.NMEA. That is to say, even if EL1 asked + * for non-maskable SErrors, the EL2 bit takes priority if A is set. + */ + if (vcpu_mode_priv(vcpu)) + return *vcpu_cpsr(vcpu) & PSR_A_BIT; + + /* + * Otherwise SErrors are considered unmasked when taken from EL0 and + * NMEA is set. + */ + return serror_is_masked(vcpu); +} + +static bool kvm_serror_undeliverable_at_el2(struct kvm_vcpu *vcpu) +{ + return !(vcpu_el2_tge_is_set(vcpu) || vcpu_el2_amo_is_set(vcpu)); +} + +int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr) { - kvm_set_sei_esr(vcpu, ESR_ELx_ISV); + lockdep_assert_held(&vcpu->mutex); + + if (is_nested_ctxt(vcpu) && kvm_serror_target_is_el2(vcpu)) + return kvm_inject_nested_serror(vcpu, esr); + + if (vcpu_is_el2(vcpu) && kvm_serror_undeliverable_at_el2(vcpu)) { + vcpu_set_vsesr(vcpu, esr); + vcpu_set_flag(vcpu, NESTED_SERROR_PENDING); + return 1; + } + + /* + * Emulate the exception entry if SErrors are unmasked. This is useful if + * the vCPU is in a nested context w/ vSErrors enabled then we've already + * delegated he hardware vSError context (i.e. HCR_EL2.VSE, VSESR_EL2, + * VDISR_EL2) to the guest hypervisor. + * + * As we're emulating the SError injection we need to explicitly populate + * ESR_ELx.EC because hardware will not do it on our behalf. + */ + if (!serror_is_masked(vcpu)) { + pend_serror_exception(vcpu); + esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SERROR); + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); + return 1; + } + + vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK); + *vcpu_hcr(vcpu) |= HCR_VSE; + return 1; } diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index ab365e839874..54f9358c9e0e 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -72,7 +72,7 @@ unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len) return data; } -static bool kvm_pending_sync_exception(struct kvm_vcpu *vcpu) +static bool kvm_pending_external_abort(struct kvm_vcpu *vcpu) { if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION)) return false; @@ -90,6 +90,8 @@ static bool kvm_pending_sync_exception(struct kvm_vcpu *vcpu) switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) { case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC): case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC): + case unpack_vcpu_flag(EXCEPT_AA64_EL1_SERR): + case unpack_vcpu_flag(EXCEPT_AA64_EL2_SERR): return true; default: return false; @@ -113,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) * Detect if the MMIO return was already handled or if userspace aborted * the MMIO access. */ - if (unlikely(!vcpu->mmio_needed || kvm_pending_sync_exception(vcpu))) + if (unlikely(!vcpu->mmio_needed || kvm_pending_external_abort(vcpu))) return 1; vcpu->mmio_needed = 0; @@ -169,10 +171,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); - if (vcpu_is_protected(vcpu)) { - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } + if (vcpu_is_protected(vcpu)) + return kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, &vcpu->kvm->arch.flags)) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2942ec92c5a4..1c78864767c5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -193,11 +193,6 @@ int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, return 0; } -static bool kvm_is_device_pfn(unsigned long pfn) -{ - return !pfn_is_map_memory(pfn); -} - static void *stage2_memcache_zalloc_page(void *arg) { struct kvm_mmu_memory_cache *mc = arg; @@ -1470,6 +1465,18 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) return vma->vm_flags & VM_MTE_ALLOWED; } +static bool kvm_vma_is_cacheable(struct vm_area_struct *vma) +{ + switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) { + case MT_NORMAL_NC: + case MT_DEVICE_nGnRnE: + case MT_DEVICE_nGnRE: + return false; + default: + return true; + } +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_s2_trans *nested, struct kvm_memory_slot *memslot, unsigned long hva, @@ -1477,8 +1484,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, { int ret = 0; bool write_fault, writable, force_pte = false; - bool exec_fault, mte_allowed; - bool device = false, vfio_allow_any_uc = false; + bool exec_fault, mte_allowed, is_vma_cacheable; + bool s2_force_noncacheable = false, vfio_allow_any_uc = false; unsigned long mmu_seq; phys_addr_t ipa = fault_ipa; struct kvm *kvm = vcpu->kvm; @@ -1492,6 +1499,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt; struct page *page; + vm_flags_t vm_flags; enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; if (fault_is_perm) @@ -1619,6 +1627,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED; + vm_flags = vma->vm_flags; + + is_vma_cacheable = kvm_vma_is_cacheable(vma); + /* Don't use the VMA after the unlock -- it may have vanished */ vma = NULL; @@ -1642,18 +1654,39 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (is_error_noslot_pfn(pfn)) return -EFAULT; - if (kvm_is_device_pfn(pfn)) { - /* - * If the page was identified as device early by looking at - * the VMA flags, vma_pagesize is already representing the - * largest quantity we can map. If instead it was mapped - * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE - * and must not be upgraded. - * - * In both cases, we don't let transparent_hugepage_adjust() - * change things at the last minute. - */ - device = true; + /* + * Check if this is non-struct page memory PFN, and cannot support + * CMOs. It could potentially be unsafe to access as cachable. + */ + if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(pfn)) { + if (is_vma_cacheable) { + /* + * Whilst the VMA owner expects cacheable mapping to this + * PFN, hardware also has to support the FWB and CACHE DIC + * features. + * + * ARM64 KVM relies on kernel VA mapping to the PFN to + * perform cache maintenance as the CMO instructions work on + * virtual addresses. VM_PFNMAP region are not necessarily + * mapped to a KVA and hence the presence of hardware features + * S2FWB and CACHE DIC are mandatory to avoid the need for + * cache maintenance. + */ + if (!kvm_supports_cacheable_pfnmap()) + return -EFAULT; + } else { + /* + * If the page was identified as device early by looking at + * the VMA flags, vma_pagesize is already representing the + * largest quantity we can map. If instead it was mapped + * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE + * and must not be upgraded. + * + * In both cases, we don't let transparent_hugepage_adjust() + * change things at the last minute. + */ + s2_force_noncacheable = true; + } } else if (logging_active && !write_fault) { /* * Only actually map the page as writable if this was a write @@ -1662,7 +1695,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, writable = false; } - if (exec_fault && device) + if (exec_fault && s2_force_noncacheable) return -ENOEXEC; /* @@ -1695,7 +1728,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * If we are not forced to use page mapping, check if we are * backed by a THP and thus use block mapping if possible. */ - if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) { + if (vma_pagesize == PAGE_SIZE && !(force_pte || s2_force_noncacheable)) { if (fault_is_perm && fault_granule > PAGE_SIZE) vma_pagesize = fault_granule; else @@ -1709,7 +1742,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } } - if (!fault_is_perm && !device && kvm_has_mte(kvm)) { + if (!fault_is_perm && !s2_force_noncacheable && kvm_has_mte(kvm)) { /* Check the VMM hasn't introduced a new disallowed VMA */ if (mte_allowed) { sanitise_mte_tags(kvm, pfn, vma_pagesize); @@ -1725,7 +1758,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault) prot |= KVM_PGTABLE_PROT_X; - if (device) { + if (s2_force_noncacheable) { if (vfio_allow_any_uc) prot |= KVM_PGTABLE_PROT_NORMAL_NC; else @@ -1808,7 +1841,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * There is no need to pass the error into the guest. */ if (kvm_handle_guest_sea()) - kvm_inject_vabt(vcpu); + return kvm_inject_serror(vcpu); return 1; } @@ -1836,11 +1869,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) { fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); - if (is_iabt) - kvm_inject_pabt(vcpu, fault_ipa); - else - kvm_inject_dabt(vcpu, fault_ipa); - return 1; + return kvm_inject_sea(vcpu, is_iabt, fault_ipa); } } @@ -1912,8 +1941,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) } if (kvm_vcpu_abt_iss1tw(vcpu)) { - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - ret = 1; + ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); goto out_unlock; } @@ -1958,10 +1986,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) if (ret == 0) ret = 1; out: - if (ret == -ENOEXEC) { - kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - ret = 1; - } + if (ret == -ENOEXEC) + ret = kvm_inject_sea_iabt(vcpu, kvm_vcpu_get_hfar(vcpu)); out_unlock: srcu_read_unlock(&vcpu->kvm->srcu, idx); return ret; @@ -2221,6 +2247,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ret = -EINVAL; break; } + + /* + * Cacheable PFNMAP is allowed only if the hardware + * supports it. + */ + if (kvm_vma_is_cacheable(vma) && !kvm_supports_cacheable_pfnmap()) { + ret = -EINVAL; + break; + } } hva = min(reg_end, vma->vm_end); } while (hva < reg_end); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 77f92d79d442..efb37aad11ec 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1424,12 +1424,11 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) break; case SYS_ID_AA64PFR0_EL1: - /* No RME, AMU, MPAM, S-EL2, or RAS */ + /* No RME, AMU, MPAM, or S-EL2 */ val &= ~(ID_AA64PFR0_EL1_RME | ID_AA64PFR0_EL1_AMU | ID_AA64PFR0_EL1_MPAM | ID_AA64PFR0_EL1_SEL2 | - ID_AA64PFR0_EL1_RAS | ID_AA64PFR0_EL1_EL3 | ID_AA64PFR0_EL1_EL2 | ID_AA64PFR0_EL1_EL1 | @@ -1670,6 +1669,12 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) get_reg_fixed_bits(kvm, SCTLR_EL1, &res0, &res1); set_sysreg_masks(kvm, SCTLR_EL1, res0, res1); + /* SCTLR2_ELx */ + get_reg_fixed_bits(kvm, SCTLR2_EL1, &res0, &res1); + set_sysreg_masks(kvm, SCTLR2_EL1, res0, res1); + get_reg_fixed_bits(kvm, SCTLR2_EL2, &res0, &res1); + set_sysreg_masks(kvm, SCTLR2_EL2, res0, res1); + /* MDCR_EL2 */ get_reg_fixed_bits(kvm, MDCR_EL2, &res0, &res1); set_sysreg_masks(kvm, MDCR_EL2, res0, res1); @@ -1728,3 +1733,43 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu)) kvm_inject_nested_irq(vcpu); } + +/* + * One of the many architectural bugs in FEAT_NV2 is that the guest hypervisor + * can write to HCR_EL2 behind our back, potentially changing the exception + * routing / masking for even the host context. + * + * What follows is some slop to (1) react to exception routing / masking and (2) + * preserve the pending SError state across translation regimes. + */ +void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu) +{ + if (!vcpu_has_nv(vcpu)) + return; + + if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING))) + kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu)); +} + +void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu) +{ + unsigned long *hcr = vcpu_hcr(vcpu); + + if (!vcpu_has_nv(vcpu)) + return; + + /* + * We previously decided that an SError was deliverable to the guest. + * Reap the pending state from HCR_EL2 and... + */ + if (unlikely(__test_and_clear_bit(__ffs(HCR_VSE), hcr))) + vcpu_set_flag(vcpu, NESTED_SERROR_PENDING); + + /* + * Re-attempt SError injection in case the deliverability has changed, + * which is necessary to faithfully emulate WFI the case of a pending + * SError being a wakeup condition. + */ + if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING))) + kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu)); +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 76c2f0da821f..93a6f87748ef 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -144,6 +144,7 @@ static bool get_el2_to_el1_mapping(unsigned int reg, MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL ); + MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL ); default: return false; } @@ -1612,7 +1613,6 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); break; @@ -1643,8 +1643,10 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ID_AA64MMFR2_EL1_NV; break; case SYS_ID_AA64MMFR3_EL1: - val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE | - ID_AA64MMFR3_EL1_S1PIE; + val &= ID_AA64MMFR3_EL1_TCRX | + ID_AA64MMFR3_EL1_SCTLRX | + ID_AA64MMFR3_EL1_S1POE | + ID_AA64MMFR3_EL1_S1PIE; break; case SYS_ID_MMFR4_EL1: val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX); @@ -1811,7 +1813,7 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val) val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, CSV3, IMP); } - if (kvm_vgic_global_state.type == VGIC_V3) { + if (vgic_is_v3(vcpu->kvm)) { val &= ~ID_AA64PFR0_EL1_GIC_MASK; val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP); } @@ -1953,6 +1955,14 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, (vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val))) return -EINVAL; + /* + * If we are running on a GICv5 host and support FEAT_GCIE_LEGACY, then + * we support GICv3. Fail attempts to do anything but set that to IMP. + */ + if (vgic_is_v3_compat(vcpu->kvm) && + FIELD_GET(ID_AA64PFR0_EL1_GIC_MASK, user_val) != ID_AA64PFR0_EL1_GIC_IMP) + return -EINVAL; + return set_id_reg(vcpu, rd, user_val); } @@ -2483,6 +2493,21 @@ static unsigned int vncr_el2_visibility(const struct kvm_vcpu *vcpu, return REG_HIDDEN; } +static unsigned int sctlr2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (kvm_has_sctlr2(vcpu->kvm)) + return 0; + + return REG_HIDDEN; +} + +static unsigned int sctlr2_el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + return __el2_visibility(vcpu, rd, sctlr2_visibility); +} + static bool access_zcr_el2(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -2639,6 +2664,23 @@ static bool access_mdcr(struct kvm_vcpu *vcpu, return true; } +static bool access_ras(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct kvm *kvm = vcpu->kvm; + + switch(reg_to_encoding(r)) { + default: + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) { + kvm_inject_undefined(vcpu); + return false; + } + } + + return trap_raz_wi(vcpu, p, r); +} + /* * For historical (ahem ABI) reasons, KVM treated MIDR_EL1, REVIDR_EL1, and * AIDR_EL1 as "invariant" registers, meaning userspace cannot change them. @@ -2866,7 +2908,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64PFR0_EL1_FP)), ID_FILTERED(ID_AA64PFR1_EL1, id_aa64pfr1_el1, ~(ID_AA64PFR1_EL1_PFAR | - ID_AA64PFR1_EL1_DF2 | ID_AA64PFR1_EL1_MTEX | ID_AA64PFR1_EL1_THE | ID_AA64PFR1_EL1_GCS | @@ -2945,6 +2986,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64MMFR2_EL1_NV | ID_AA64MMFR2_EL1_CCIDX)), ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX | + ID_AA64MMFR3_EL1_SCTLRX | ID_AA64MMFR3_EL1_S1PIE | ID_AA64MMFR3_EL1_S1POE)), ID_WRITABLE(ID_AA64MMFR4_EL1, ID_AA64MMFR4_EL1_NV_frac), @@ -2955,6 +2997,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, + { SYS_DESC(SYS_SCTLR2_EL1), access_vm_reg, reset_val, SCTLR2_EL1, 0, + .visibility = sctlr2_visibility }, MTE_REG(RGSR_EL1), MTE_REG(GCR_EL1), @@ -2984,14 +3028,14 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 }, { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 }, - { SYS_DESC(SYS_ERRIDR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERRSELR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXFR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXCTLR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXSTATUS_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXADDR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, + { SYS_DESC(SYS_ERRIDR_EL1), access_ras }, + { SYS_DESC(SYS_ERRSELR_EL1), access_ras }, + { SYS_DESC(SYS_ERXFR_EL1), access_ras }, + { SYS_DESC(SYS_ERXCTLR_EL1), access_ras }, + { SYS_DESC(SYS_ERXSTATUS_EL1), access_ras }, + { SYS_DESC(SYS_ERXADDR_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC0_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC1_EL1), access_ras }, MTE_REG(TFSR_EL1), MTE_REG(TFSRE0_EL1), @@ -3302,6 +3346,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0), EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1), EL2_REG(ACTLR_EL2, access_rw, reset_val, 0), + EL2_REG_FILTERED(SCTLR2_EL2, access_vm_reg, reset_val, 0, + sctlr2_el2_visibility), EL2_REG_VNCR(HCR_EL2, reset_hcr, 0), EL2_REG(MDCR_EL2, access_mdcr, reset_mdcr, 0), EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1), @@ -3344,6 +3390,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(AFSR0_EL2, access_rw, reset_val, 0), EL2_REG(AFSR1_EL2, access_rw, reset_val, 0), EL2_REG_REDIR(ESR_EL2, reset_val, 0), + EL2_REG_VNCR(VSESR_EL2, reset_unknown, 0), { SYS_DESC(SYS_FPEXC32_EL2), undef_access, reset_val, FPEXC32_EL2, 0x700 }, EL2_REG_REDIR(FAR_EL2, reset_val, 0), @@ -3372,6 +3419,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(VBAR_EL2, access_rw, reset_val, 0), EL2_REG(RVBAR_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_RMR_EL2), undef_access }, + EL2_REG_VNCR(VDISR_EL2, reset_unknown, 0), EL2_REG_VNCR(ICH_AP0R0_EL2, reset_val, 0), EL2_REG_VNCR(ICH_AP0R1_EL2, reset_val, 0), @@ -4475,7 +4523,7 @@ static bool kvm_esr_cp10_id_to_sys64(u64 esr, struct sys_reg_params *params) return true; kvm_pr_unimpl("Unhandled cp10 register %s: %u\n", - params->is_write ? "write" : "read", reg_id); + str_write_read(params->is_write), reg_id); return false; } diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index ef97d9fc67cc..317abc490368 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -108,7 +108,7 @@ inline void print_sys_reg_msg(const struct sys_reg_params *p, /* Look, we even formatted it for you to paste into the table! */ kvm_pr_unimpl("%pV { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n", &(struct va_format){ fmt, &va }, - p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read"); + p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, str_write_read(p->is_write)); va_end(va); } diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h index f85415db7713..a7ab9a3bbed0 100644 --- a/arch/arm64/kvm/trace_handle_exit.h +++ b/arch/arm64/kvm/trace_handle_exit.h @@ -113,7 +113,7 @@ TRACE_EVENT(kvm_sys_access, __entry->vcpu_pc, __entry->name ?: "UNKN", __entry->Op0, __entry->Op1, __entry->CRn, __entry->CRm, __entry->Op2, - __entry->is_write ? "write" : "read") + str_write_read(__entry->is_write)) ); TRACE_EVENT(kvm_set_guest_debug, diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index eb1205654ac8..72442c825d19 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -674,10 +674,12 @@ void kvm_vgic_init_cpu_hardware(void) * We want to make sure the list registers start out clear so that we * only have the program the used registers. */ - if (kvm_vgic_global_state.type == VGIC_V2) + if (kvm_vgic_global_state.type == VGIC_V2) { vgic_v2_init_lrs(); - else + } else if (kvm_vgic_global_state.type == VGIC_V3 || + kvm_vgic_global_state.has_gcie_v3_compat) { kvm_call_hyp(__vgic_v3_init_lrs); + } } /** @@ -722,6 +724,9 @@ int kvm_vgic_hyp_init(void) kvm_info("GIC system register CPU interface enabled\n"); } break; + case GIC_V5: + ret = vgic_v5_probe(gic_kvm_info); + break; default: ret = -ENODEV; } diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 534049c7c94b..b34f8976c9cc 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2694,6 +2694,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) case KVM_DEV_ARM_ITS_RESTORE_TABLES: ret = abi->restore_tables(its); break; + default: + ret = -ENXIO; + break; } mutex_unlock(&its->its_lock); diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index a50fb7e6841f..9c3997776f50 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -116,7 +116,7 @@ bool vgic_state_is_nested(struct kvm_vcpu *vcpu) { u64 xmo; - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + if (is_nested_ctxt(vcpu)) { xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO); WARN_ONCE(xmo && xmo != (HCR_IMO | HCR_FMO), "Separate virtual IRQ/FIQ settings not supported\n"); diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c new file mode 100644 index 000000000000..6bdbb221bcde --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <kvm/arm_vgic.h> +#include <linux/irqchip/arm-vgic-info.h> + +#include "vgic.h" + +/* + * Probe for a vGICv5 compatible interrupt controller, returning 0 on success. + * Currently only supports GICv3-based VMs on a GICv5 host, and hence only + * registers a VGIC_V3 device. + */ +int vgic_v5_probe(const struct gic_kvm_info *info) +{ + u64 ich_vtr_el2; + int ret; + + if (!info->has_gcie_v3_compat) + return -ENODEV; + + kvm_vgic_global_state.type = VGIC_V5; + kvm_vgic_global_state.has_gcie_v3_compat = true; + + /* We only support v3 compat mode - use vGICv3 limits */ + kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; + + kvm_vgic_global_state.vcpu_base = 0; + kvm_vgic_global_state.vctrl_base = NULL; + kvm_vgic_global_state.can_emulate_gicv2 = false; + kvm_vgic_global_state.has_gicv4 = false; + kvm_vgic_global_state.has_gicv4_1 = false; + + ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); + kvm_vgic_global_state.ich_vtr_el2 = (u32)ich_vtr_el2; + + /* + * The ListRegs field is 5 bits, but there is an architectural + * maximum of 16 list registers. Just ignore bit 4... + */ + kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; + + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); + if (ret) { + kvm_err("Cannot register GICv3-legacy KVM device.\n"); + return ret; + } + + static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif); + kvm_info("GCIE legacy system register CPU interface\n"); + + return 0; +} diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 4349084cb9a6..4f1e123b063e 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -308,6 +308,8 @@ int vgic_init(struct kvm *kvm); void vgic_debug_init(struct kvm *kvm); void vgic_debug_destroy(struct kvm *kvm); +int vgic_v5_probe(const struct gic_kvm_info *info); + static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) { struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; @@ -389,6 +391,17 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu); void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu); void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu); +static inline bool vgic_is_v3_compat(struct kvm *kvm) +{ + return cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF) && + kvm_vgic_global_state.has_gcie_v3_compat; +} + +static inline bool vgic_is_v3(struct kvm *kvm) +{ + return kvm_vgic_global_state.type == VGIC_V3 || vgic_is_v3_compat(kvm); +} + int vgic_its_debug_init(struct kvm_device *dev); void vgic_its_debug_destroy(struct kvm_device *dev); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 10effd4cff6b..115161dd9a24 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -35,7 +35,8 @@ HAS_GENERIC_AUTH HAS_GENERIC_AUTH_ARCH_QARMA3 HAS_GENERIC_AUTH_ARCH_QARMA5 HAS_GENERIC_AUTH_IMP_DEF -HAS_GIC_CPUIF_SYSREGS +HAS_GICV3_CPUIF +HAS_GICV5_CPUIF HAS_GIC_PRIO_MASKING HAS_GIC_PRIO_RELAXED_SYNC HAS_HCR_NV1 @@ -49,6 +50,7 @@ HAS_PAN HAS_PMUV3 HAS_S1PIE HAS_S1POE +HAS_SCTLR2 HAS_RAS_EXTN HAS_RNG HAS_SB diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index c9dc6645a552..89857cb2725b 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1314,7 +1314,10 @@ UnsignedEnum 19:16 UINJ 0b0000 NI 0b0001 IMP EndEnum -Res0 15:12 +UnsignedEnum 15:12 GCIE + 0b0000 NI + 0b0001 IMP +EndEnum UnsignedEnum 11:8 MTEFAR 0b0000 NI 0b0001 IMP @@ -3021,6 +3024,435 @@ Sysreg PMIAR_EL1 3 0 9 14 7 Field 63:0 ADDRESS EndSysreg +SysregFields ICC_PPI_HMRx_EL1 +Field 63 HM63 +Field 62 HM62 +Field 61 HM61 +Field 60 HM60 +Field 59 HM59 +Field 58 HM58 +Field 57 HM57 +Field 56 HM56 +Field 55 HM55 +Field 54 HM54 +Field 53 HM53 +Field 52 HM52 +Field 51 HM51 +Field 50 HM50 +Field 49 HM49 +Field 48 HM48 +Field 47 HM47 +Field 46 HM46 +Field 45 HM45 +Field 44 HM44 +Field 43 HM43 +Field 42 HM42 +Field 41 HM41 +Field 40 HM40 +Field 39 HM39 +Field 38 HM38 +Field 37 HM37 +Field 36 HM36 +Field 35 HM35 +Field 34 HM34 +Field 33 HM33 +Field 32 HM32 +Field 31 HM31 +Field 30 HM30 +Field 29 HM29 +Field 28 HM28 +Field 27 HM27 +Field 26 HM26 +Field 25 HM25 +Field 24 HM24 +Field 23 HM23 +Field 22 HM22 +Field 21 HM21 +Field 20 HM20 +Field 19 HM19 +Field 18 HM18 +Field 17 HM17 +Field 16 HM16 +Field 15 HM15 +Field 14 HM14 +Field 13 HM13 +Field 12 HM12 +Field 11 HM11 +Field 10 HM10 +Field 9 HM9 +Field 8 HM8 +Field 7 HM7 +Field 6 HM6 +Field 5 HM5 +Field 4 HM4 +Field 3 HM3 +Field 2 HM2 +Field 1 HM1 +Field 0 HM0 +EndSysregFields + +Sysreg ICC_PPI_HMR0_EL1 3 0 12 10 0 +Fields ICC_PPI_HMRx_EL1 +EndSysreg + +Sysreg ICC_PPI_HMR1_EL1 3 0 12 10 1 +Fields ICC_PPI_HMRx_EL1 +EndSysreg + +Sysreg ICC_IDR0_EL1 3 0 12 10 2 +Res0 63:12 +UnsignedEnum 11:8 GCIE_LEGACY + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 7:4 PRI_BITS + 0b0011 4BITS + 0b0100 5BITS +EndEnum +UnsignedEnum 3:0 ID_BITS + 0b0000 16BITS + 0b0001 24BITS +EndEnum +EndSysreg + +Sysreg ICC_ICSR_EL1 3 0 12 10 4 +Res0 63:48 +Field 47:32 IAFFID +Res0 31:16 +Field 15:11 Priority +Res0 10:6 +Field 5 HM +Field 4 Active +Field 3 IRM +Field 2 Pending +Field 1 Enabled +Field 0 F +EndSysreg + +SysregFields ICC_PPI_ENABLERx_EL1 +Field 63 EN63 +Field 62 EN62 +Field 61 EN61 +Field 60 EN60 +Field 59 EN59 +Field 58 EN58 +Field 57 EN57 +Field 56 EN56 +Field 55 EN55 +Field 54 EN54 +Field 53 EN53 +Field 52 EN52 +Field 51 EN51 +Field 50 EN50 +Field 49 EN49 +Field 48 EN48 +Field 47 EN47 +Field 46 EN46 +Field 45 EN45 +Field 44 EN44 +Field 43 EN43 +Field 42 EN42 +Field 41 EN41 +Field 40 EN40 +Field 39 EN39 +Field 38 EN38 +Field 37 EN37 +Field 36 EN36 +Field 35 EN35 +Field 34 EN34 +Field 33 EN33 +Field 32 EN32 +Field 31 EN31 +Field 30 EN30 +Field 29 EN29 +Field 28 EN28 +Field 27 EN27 +Field 26 EN26 +Field 25 EN25 +Field 24 EN24 +Field 23 EN23 +Field 22 EN22 +Field 21 EN21 +Field 20 EN20 +Field 19 EN19 +Field 18 EN18 +Field 17 EN17 +Field 16 EN16 +Field 15 EN15 +Field 14 EN14 +Field 13 EN13 +Field 12 EN12 +Field 11 EN11 +Field 10 EN10 +Field 9 EN9 +Field 8 EN8 +Field 7 EN7 +Field 6 EN6 +Field 5 EN5 +Field 4 EN4 +Field 3 EN3 +Field 2 EN2 +Field 1 EN1 +Field 0 EN0 +EndSysregFields + +Sysreg ICC_PPI_ENABLER0_EL1 3 0 12 10 6 +Fields ICC_PPI_ENABLERx_EL1 +EndSysreg + +Sysreg ICC_PPI_ENABLER1_EL1 3 0 12 10 7 +Fields ICC_PPI_ENABLERx_EL1 +EndSysreg + +SysregFields ICC_PPI_ACTIVERx_EL1 +Field 63 Active63 +Field 62 Active62 +Field 61 Active61 +Field 60 Active60 +Field 59 Active59 +Field 58 Active58 +Field 57 Active57 +Field 56 Active56 +Field 55 Active55 +Field 54 Active54 +Field 53 Active53 +Field 52 Active52 +Field 51 Active51 +Field 50 Active50 +Field 49 Active49 +Field 48 Active48 +Field 47 Active47 +Field 46 Active46 +Field 45 Active45 +Field 44 Active44 +Field 43 Active43 +Field 42 Active42 +Field 41 Active41 +Field 40 Active40 +Field 39 Active39 +Field 38 Active38 +Field 37 Active37 +Field 36 Active36 +Field 35 Active35 +Field 34 Active34 +Field 33 Active33 +Field 32 Active32 +Field 31 Active31 +Field 30 Active30 +Field 29 Active29 +Field 28 Active28 +Field 27 Active27 +Field 26 Active26 +Field 25 Active25 +Field 24 Active24 +Field 23 Active23 +Field 22 Active22 +Field 21 Active21 +Field 20 Active20 +Field 19 Active19 +Field 18 Active18 +Field 17 Active17 +Field 16 Active16 +Field 15 Active15 +Field 14 Active14 +Field 13 Active13 +Field 12 Active12 +Field 11 Active11 +Field 10 Active10 +Field 9 Active9 +Field 8 Active8 +Field 7 Active7 +Field 6 Active6 +Field 5 Active5 +Field 4 Active4 +Field 3 Active3 +Field 2 Active2 +Field 1 Active1 +Field 0 Active0 +EndSysregFields + +Sysreg ICC_PPI_CACTIVER0_EL1 3 0 12 13 0 +Fields ICC_PPI_ACTIVERx_EL1 +EndSysreg + +Sysreg ICC_PPI_CACTIVER1_EL1 3 0 12 13 1 +Fields ICC_PPI_ACTIVERx_EL1 +EndSysreg + +Sysreg ICC_PPI_SACTIVER0_EL1 3 0 12 13 2 +Fields ICC_PPI_ACTIVERx_EL1 +EndSysreg + +Sysreg ICC_PPI_SACTIVER1_EL1 3 0 12 13 3 +Fields ICC_PPI_ACTIVERx_EL1 +EndSysreg + +SysregFields ICC_PPI_PENDRx_EL1 +Field 63 Pend63 +Field 62 Pend62 +Field 61 Pend61 +Field 60 Pend60 +Field 59 Pend59 +Field 58 Pend58 +Field 57 Pend57 +Field 56 Pend56 +Field 55 Pend55 +Field 54 Pend54 +Field 53 Pend53 +Field 52 Pend52 +Field 51 Pend51 +Field 50 Pend50 +Field 49 Pend49 +Field 48 Pend48 +Field 47 Pend47 +Field 46 Pend46 +Field 45 Pend45 +Field 44 Pend44 +Field 43 Pend43 +Field 42 Pend42 +Field 41 Pend41 +Field 40 Pend40 +Field 39 Pend39 +Field 38 Pend38 +Field 37 Pend37 +Field 36 Pend36 +Field 35 Pend35 +Field 34 Pend34 +Field 33 Pend33 +Field 32 Pend32 +Field 31 Pend31 +Field 30 Pend30 +Field 29 Pend29 +Field 28 Pend28 +Field 27 Pend27 +Field 26 Pend26 +Field 25 Pend25 +Field 24 Pend24 +Field 23 Pend23 +Field 22 Pend22 +Field 21 Pend21 +Field 20 Pend20 +Field 19 Pend19 +Field 18 Pend18 +Field 17 Pend17 +Field 16 Pend16 +Field 15 Pend15 +Field 14 Pend14 +Field 13 Pend13 +Field 12 Pend12 +Field 11 Pend11 +Field 10 Pend10 +Field 9 Pend9 +Field 8 Pend8 +Field 7 Pend7 +Field 6 Pend6 +Field 5 Pend5 +Field 4 Pend4 +Field 3 Pend3 +Field 2 Pend2 +Field 1 Pend1 +Field 0 Pend0 +EndSysregFields + +Sysreg ICC_PPI_CPENDR0_EL1 3 0 12 13 4 +Fields ICC_PPI_PENDRx_EL1 +EndSysreg + +Sysreg ICC_PPI_CPENDR1_EL1 3 0 12 13 5 +Fields ICC_PPI_PENDRx_EL1 +EndSysreg + +Sysreg ICC_PPI_SPENDR0_EL1 3 0 12 13 6 +Fields ICC_PPI_PENDRx_EL1 +EndSysreg + +Sysreg ICC_PPI_SPENDR1_EL1 3 0 12 13 7 +Fields ICC_PPI_PENDRx_EL1 +EndSysreg + +SysregFields ICC_PPI_PRIORITYRx_EL1 +Res0 63:61 +Field 60:56 Priority7 +Res0 55:53 +Field 52:48 Priority6 +Res0 47:45 +Field 44:40 Priority5 +Res0 39:37 +Field 36:32 Priority4 +Res0 31:29 +Field 28:24 Priority3 +Res0 23:21 +Field 20:16 Priority2 +Res0 15:13 +Field 12:8 Priority1 +Res0 7:5 +Field 4:0 Priority0 +EndSysregFields + +Sysreg ICC_PPI_PRIORITYR0_EL1 3 0 12 14 0 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR1_EL1 3 0 12 14 1 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR2_EL1 3 0 12 14 2 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR3_EL1 3 0 12 14 3 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR4_EL1 3 0 12 14 4 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR5_EL1 3 0 12 14 5 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR6_EL1 3 0 12 14 6 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR7_EL1 3 0 12 14 7 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR8_EL1 3 0 12 15 0 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR9_EL1 3 0 12 15 1 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR10_EL1 3 0 12 15 2 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR11_EL1 3 0 12 15 3 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR12_EL1 3 0 12 15 4 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR13_EL1 3 0 12 15 5 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR14_EL1 3 0 12 15 6 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + +Sysreg ICC_PPI_PRIORITYR15_EL1 3 0 12 15 7 +Fields ICC_PPI_PRIORITYRx_EL1 +EndSysreg + Sysreg PMSELR_EL0 3 3 9 12 5 Res0 63:5 Field 4:0 SEL @@ -3103,6 +3535,19 @@ Res0 14:12 Field 11:0 AFFINITY EndSysreg +Sysreg ICC_CR0_EL1 3 1 12 0 1 +Res0 63:39 +Field 38 PID +Field 37:32 IPPT +Res0 31:1 +Field 0 EN +EndSysreg + +Sysreg ICC_PCR_EL1 3 1 12 0 2 +Res0 63:5 +Field 4:0 PRIORITY +EndSysreg + Sysreg CSSELR_EL1 3 2 0 0 0 Res0 63:5 Field 4 TnD @@ -3989,6 +4434,54 @@ Field 31:16 PhyPARTID29 Field 15:0 PhyPARTID28 EndSysreg +Sysreg ICH_HFGRTR_EL2 3 4 12 9 4 +Res0 63:21 +Field 20 ICC_PPI_ACTIVERn_EL1 +Field 19 ICC_PPI_PRIORITYRn_EL1 +Field 18 ICC_PPI_PENDRn_EL1 +Field 17 ICC_PPI_ENABLERn_EL1 +Field 16 ICC_PPI_HMRn_EL1 +Res0 15:8 +Field 7 ICC_IAFFIDR_EL1 +Field 6 ICC_ICSR_EL1 +Field 5 ICC_PCR_EL1 +Field 4 ICC_HPPIR_EL1 +Field 3 ICC_HAPR_EL1 +Field 2 ICC_CR0_EL1 +Field 1 ICC_IDRn_EL1 +Field 0 ICC_APR_EL1 +EndSysreg + +Sysreg ICH_HFGWTR_EL2 3 4 12 9 6 +Res0 63:21 +Field 20 ICC_PPI_ACTIVERn_EL1 +Field 19 ICC_PPI_PRIORITYRn_EL1 +Field 18 ICC_PPI_PENDRn_EL1 +Field 17 ICC_PPI_ENABLERn_EL1 +Res0 16:7 +Field 6 ICC_ICSR_EL1 +Field 5 ICC_PCR_EL1 +Res0 4:3 +Field 2 ICC_CR0_EL1 +Res0 1 +Field 0 ICC_APR_EL1 +EndSysreg + +Sysreg ICH_HFGITR_EL2 3 4 12 9 7 +Res0 63:11 +Field 10 GICRCDNMIA +Field 9 GICRCDIA +Field 8 GICCDDI +Field 7 GICCDEOI +Field 6 GICCDHM +Field 5 GICCDRCFG +Field 4 GICCDPEND +Field 3 GICCDAFF +Field 2 GICCDPRI +Field 1 GICCDDIS +Field 0 GICCDEN +EndSysreg + Sysreg ICH_HCR_EL2 3 4 12 11 0 Res0 63:32 Field 31:27 EOIcount @@ -4037,6 +4530,12 @@ Field 1 U Field 0 EOI EndSysreg +Sysreg ICH_VCTLR_EL2 3 4 12 11 4 +Res0 63:2 +Field 1 V3 +Field 0 En +EndSysreg + Sysreg CONTEXTIDR_EL2 3 4 13 0 1 Fields CONTEXTIDR_ELx EndSysreg diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 0d196e447142..67d79d33407a 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -41,10 +41,14 @@ config ARM_GIC_V3 select HAVE_ARM_SMCCC_DISCOVERY select IRQ_MSI_IOMMU +config ARM_GIC_ITS_PARENT + bool + config ARM_GIC_V3_ITS bool select GENERIC_MSI_IRQ select IRQ_MSI_LIB + select ARM_GIC_ITS_PARENT default ARM_GIC_V3 select IRQ_MSI_IOMMU @@ -54,6 +58,14 @@ config ARM_GIC_V3_ITS_FSL_MC depends on FSL_MC_BUS default ARM_GIC_V3_ITS +config ARM_GIC_V5 + bool + select IRQ_DOMAIN_HIERARCHY + select GENERIC_IRQ_EFFECTIVE_AFF_MASK + select GENERIC_MSI_IRQ + select IRQ_MSI_LIB + select ARM_GIC_ITS_PARENT + config ARM_NVIC bool select IRQ_DOMAIN_HIERARCHY diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 23ca4959e6ce..e83dad932ac0 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -33,9 +33,12 @@ obj-$(CONFIG_ARCH_REALVIEW) += irq-gic-realview.o obj-$(CONFIG_IRQ_MSI_LIB) += irq-msi-lib.o obj-$(CONFIG_ARM_GIC_V2M) += irq-gic-v2m.o obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-v3-mbi.o irq-gic-common.o -obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v4.o irq-gic-v3-its-msi-parent.o +obj-$(CONFIG_ARM_GIC_ITS_PARENT) += irq-gic-its-msi-parent.o +obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v4.o obj-$(CONFIG_ARM_GIC_V3_ITS_FSL_MC) += irq-gic-v3-its-fsl-mc-msi.o obj-$(CONFIG_PARTITION_PERCPU) += irq-partition-percpu.o +obj-$(CONFIG_ARM_GIC_V5) += irq-gic-v5.o irq-gic-v5-irs.o irq-gic-v5-its.o \ + irq-gic-v5-iwb.o obj-$(CONFIG_HISILICON_IRQ_MBIGEN) += irq-mbigen.o obj-$(CONFIG_ARM_NVIC) += irq-nvic.o obj-$(CONFIG_ARM_VIC) += irq-vic.o diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h index 020ecdf16901..710cab61d919 100644 --- a/drivers/irqchip/irq-gic-common.h +++ b/drivers/irqchip/irq-gic-common.h @@ -29,8 +29,6 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, void gic_enable_of_quirks(const struct device_node *np, const struct gic_quirk *quirks, void *data); -extern const struct msi_parent_ops gic_v3_its_msi_parent_ops; - #define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0) #define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1) #define RDIST_FLAGS_FORCE_NON_SHAREABLE (1 << 2) diff --git a/drivers/irqchip/irq-gic-v3-its-msi-parent.c b/drivers/irqchip/irq-gic-its-msi-parent.c index a5e110ffdd88..eb1473f1448a 100644 --- a/drivers/irqchip/irq-gic-v3-its-msi-parent.c +++ b/drivers/irqchip/irq-gic-its-msi-parent.c @@ -5,9 +5,10 @@ // Copyright (C) 2022 Intel #include <linux/acpi_iort.h> +#include <linux/of_address.h> #include <linux/pci.h> -#include "irq-gic-common.h" +#include "irq-gic-its-msi-parent.h" #include <linux/irqchip/irq-msi-lib.h> #define ITS_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ @@ -18,6 +19,23 @@ MSI_FLAG_PCI_MSIX | \ MSI_FLAG_MULTI_PCI_MSI) +static int its_translate_frame_address(struct device_node *msi_node, phys_addr_t *pa) +{ + struct resource res; + int ret; + + ret = of_property_match_string(msi_node, "reg-names", "ns-translate"); + if (ret < 0) + return ret; + + ret = of_address_to_resource(msi_node, ret, &res); + if (ret) + return ret; + + *pa = res.start; + return 0; +} + #ifdef CONFIG_PCI_MSI static int its_pci_msi_vec_count(struct pci_dev *pdev, void *data) { @@ -82,8 +100,46 @@ static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev, msi_info = msi_get_domain_info(domain->parent); return msi_info->ops->msi_prepare(domain->parent, dev, nvec, info); } + +static int its_v5_pci_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *info) +{ + struct device_node *msi_node = NULL; + struct msi_domain_info *msi_info; + struct pci_dev *pdev; + phys_addr_t pa; + u32 rid; + int ret; + + if (!dev_is_pci(dev)) + return -EINVAL; + + pdev = to_pci_dev(dev); + + rid = pci_msi_map_rid_ctlr_node(pdev, &msi_node); + if (!msi_node) + return -ENODEV; + + ret = its_translate_frame_address(msi_node, &pa); + if (ret) + return -ENODEV; + + of_node_put(msi_node); + + /* ITS specific DeviceID */ + info->scratchpad[0].ul = rid; + /* ITS translate frame physical address */ + info->scratchpad[1].ul = pa; + + /* Always allocate power of two vectors */ + nvec = roundup_pow_of_two(nvec); + + msi_info = msi_get_domain_info(domain->parent); + return msi_info->ops->msi_prepare(domain->parent, dev, nvec, info); +} #else /* CONFIG_PCI_MSI */ #define its_pci_msi_prepare NULL +#define its_v5_pci_msi_prepare NULL #endif /* !CONFIG_PCI_MSI */ static int of_pmsi_get_dev_id(struct irq_domain *domain, struct device *dev, @@ -118,6 +174,53 @@ static int of_pmsi_get_dev_id(struct irq_domain *domain, struct device *dev, return ret; } +static int of_v5_pmsi_get_msi_info(struct irq_domain *domain, struct device *dev, + u32 *dev_id, phys_addr_t *pa) +{ + int ret, index = 0; + /* + * Retrieve the DeviceID and the ITS translate frame node pointer + * out of the msi-parent property. + */ + do { + struct of_phandle_args args; + + ret = of_parse_phandle_with_args(dev->of_node, + "msi-parent", "#msi-cells", + index, &args); + if (ret) + break; + /* + * The IRQ domain fwnode is the msi controller parent + * in GICv5 (where the msi controller nodes are the + * ITS translate frames). + */ + if (args.np->parent == irq_domain_get_of_node(domain)) { + if (WARN_ON(args.args_count != 1)) + return -EINVAL; + *dev_id = args.args[0]; + + ret = its_translate_frame_address(args.np, pa); + if (ret) + return -ENODEV; + break; + } + index++; + } while (!ret); + + if (ret) { + struct device_node *np = NULL; + + ret = of_map_id(dev->of_node, dev->id, "msi-map", "msi-map-mask", &np, dev_id); + if (np) { + ret = its_translate_frame_address(np, pa); + of_node_put(np); + } + } + + return ret; +} + int __weak iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id) { return -1; @@ -148,6 +251,33 @@ static int its_pmsi_prepare(struct irq_domain *domain, struct device *dev, dev, nvec, info); } +static int its_v5_pmsi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *info) +{ + struct msi_domain_info *msi_info; + phys_addr_t pa; + u32 dev_id; + int ret; + + if (!dev->of_node) + return -ENODEV; + + ret = of_v5_pmsi_get_msi_info(domain->parent, dev, &dev_id, &pa); + if (ret) + return ret; + + /* ITS specific DeviceID */ + info->scratchpad[0].ul = dev_id; + /* ITS translate frame physical address */ + info->scratchpad[1].ul = pa; + + /* Allocate always as a power of 2 */ + nvec = roundup_pow_of_two(nvec); + + msi_info = msi_get_domain_info(domain->parent); + return msi_info->ops->msi_prepare(domain->parent, dev, nvec, info); +} + static void its_msi_teardown(struct irq_domain *domain, msi_alloc_info_t *info) { struct msi_domain_info *msi_info; @@ -199,6 +329,32 @@ static bool its_init_dev_msi_info(struct device *dev, struct irq_domain *domain, return true; } +static bool its_v5_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, struct msi_domain_info *info) +{ + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) + return false; + + switch (info->bus_token) { + case DOMAIN_BUS_PCI_DEVICE_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSIX: + info->ops->msi_prepare = its_v5_pci_msi_prepare; + info->ops->msi_teardown = its_msi_teardown; + break; + case DOMAIN_BUS_DEVICE_MSI: + case DOMAIN_BUS_WIRED_TO_MSI: + info->ops->msi_prepare = its_v5_pmsi_prepare; + info->ops->msi_teardown = its_msi_teardown; + break; + default: + /* Confused. How did the lib return true? */ + WARN_ON_ONCE(1); + return false; + } + + return true; +} + const struct msi_parent_ops gic_v3_its_msi_parent_ops = { .supported_flags = ITS_MSI_FLAGS_SUPPORTED, .required_flags = ITS_MSI_FLAGS_REQUIRED, @@ -208,3 +364,13 @@ const struct msi_parent_ops gic_v3_its_msi_parent_ops = { .prefix = "ITS-", .init_dev_msi_info = its_init_dev_msi_info, }; + +const struct msi_parent_ops gic_v5_its_msi_parent_ops = { + .supported_flags = ITS_MSI_FLAGS_SUPPORTED, + .required_flags = ITS_MSI_FLAGS_REQUIRED, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, + .bus_select_token = DOMAIN_BUS_NEXUS, + .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, + .prefix = "ITS-v5-", + .init_dev_msi_info = its_v5_init_dev_msi_info, +}; diff --git a/drivers/irqchip/irq-gic-its-msi-parent.h b/drivers/irqchip/irq-gic-its-msi-parent.h new file mode 100644 index 000000000000..df016f347337 --- /dev/null +++ b/drivers/irqchip/irq-gic-its-msi-parent.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 ARM Limited, All Rights Reserved. + */ + +#ifndef _IRQ_GIC_ITS_MSI_PARENT_H +#define _IRQ_GIC_ITS_MSI_PARENT_H + +extern const struct msi_parent_ops gic_v3_its_msi_parent_ops; +extern const struct msi_parent_ops gic_v5_its_msi_parent_ops; + +#endif /* _IRQ_GIC_ITS_MSI_PARENT_H */ diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d54fa0638dc4..467cb78435a9 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -41,6 +41,7 @@ #include <asm/exception.h> #include "irq-gic-common.h" +#include "irq-gic-its-msi-parent.h" #include <linux/irqchip/irq-msi-lib.h> #define ITS_FLAGS_CMDQ_NEEDS_FLUSHING (1ULL << 0) diff --git a/drivers/irqchip/irq-gic-v5-irs.c b/drivers/irqchip/irq-gic-v5-irs.c new file mode 100644 index 000000000000..f845415f9143 --- /dev/null +++ b/drivers/irqchip/irq-gic-v5-irs.c @@ -0,0 +1,822 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024-2025 ARM Limited, All Rights Reserved. + */ + +#define pr_fmt(fmt) "GICv5 IRS: " fmt + +#include <linux/log2.h> +#include <linux/of.h> +#include <linux/of_address.h> + +#include <linux/irqchip.h> +#include <linux/irqchip/arm-gic-v5.h> + +/* + * Hardcoded ID_BITS limit for systems supporting only a 1-level IST + * table. Systems supporting only a 1-level IST table aren't expected + * to require more than 2^12 LPIs. Tweak as required. + */ +#define LPI_ID_BITS_LINEAR 12 + +#define IRS_FLAGS_NON_COHERENT BIT(0) + +static DEFINE_PER_CPU_READ_MOSTLY(struct gicv5_irs_chip_data *, per_cpu_irs_data); +static LIST_HEAD(irs_nodes); + +static u32 irs_readl_relaxed(struct gicv5_irs_chip_data *irs_data, + const u32 reg_offset) +{ + return readl_relaxed(irs_data->irs_base + reg_offset); +} + +static void irs_writel_relaxed(struct gicv5_irs_chip_data *irs_data, + const u32 val, const u32 reg_offset) +{ + writel_relaxed(val, irs_data->irs_base + reg_offset); +} + +static u64 irs_readq_relaxed(struct gicv5_irs_chip_data *irs_data, + const u32 reg_offset) +{ + return readq_relaxed(irs_data->irs_base + reg_offset); +} + +static void irs_writeq_relaxed(struct gicv5_irs_chip_data *irs_data, + const u64 val, const u32 reg_offset) +{ + writeq_relaxed(val, irs_data->irs_base + reg_offset); +} + +/* + * The polling wait (in gicv5_wait_for_op_s_atomic()) on a GIC register + * provides the memory barriers (through MMIO accessors) + * required to synchronize CPU and GIC access to IST memory. + */ +static int gicv5_irs_ist_synchronise(struct gicv5_irs_chip_data *irs_data) +{ + return gicv5_wait_for_op_atomic(irs_data->irs_base, GICV5_IRS_IST_STATUSR, + GICV5_IRS_IST_STATUSR_IDLE, NULL); +} + +static int __init gicv5_irs_init_ist_linear(struct gicv5_irs_chip_data *irs_data, + unsigned int lpi_id_bits, + unsigned int istsz) +{ + size_t l2istsz; + u32 n, cfgr; + void *ist; + u64 baser; + int ret; + + /* Taken from GICv5 specifications 10.2.1.13 IRS_IST_BASER */ + n = max(5, lpi_id_bits + 1 + istsz); + + l2istsz = BIT(n + 1); + /* + * Check memory requirements. For a linear IST we cap the + * number of ID bits to a value that should never exceed + * kmalloc interface memory allocation limits, so this + * check is really belt and braces. + */ + if (l2istsz > KMALLOC_MAX_SIZE) { + u8 lpi_id_cap = ilog2(KMALLOC_MAX_SIZE) - 2 + istsz; + + pr_warn("Limiting LPI ID bits from %u to %u\n", + lpi_id_bits, lpi_id_cap); + lpi_id_bits = lpi_id_cap; + l2istsz = KMALLOC_MAX_SIZE; + } + + ist = kzalloc(l2istsz, GFP_KERNEL); + if (!ist) + return -ENOMEM; + + if (irs_data->flags & IRS_FLAGS_NON_COHERENT) + dcache_clean_inval_poc((unsigned long)ist, + (unsigned long)ist + l2istsz); + else + dsb(ishst); + + cfgr = FIELD_PREP(GICV5_IRS_IST_CFGR_STRUCTURE, + GICV5_IRS_IST_CFGR_STRUCTURE_LINEAR) | + FIELD_PREP(GICV5_IRS_IST_CFGR_ISTSZ, istsz) | + FIELD_PREP(GICV5_IRS_IST_CFGR_L2SZ, + GICV5_IRS_IST_CFGR_L2SZ_4K) | + FIELD_PREP(GICV5_IRS_IST_CFGR_LPI_ID_BITS, lpi_id_bits); + irs_writel_relaxed(irs_data, cfgr, GICV5_IRS_IST_CFGR); + + gicv5_global_data.ist.l2 = false; + + baser = (virt_to_phys(ist) & GICV5_IRS_IST_BASER_ADDR_MASK) | + FIELD_PREP(GICV5_IRS_IST_BASER_VALID, 0x1); + irs_writeq_relaxed(irs_data, baser, GICV5_IRS_IST_BASER); + + ret = gicv5_irs_ist_synchronise(irs_data); + if (ret) { + kfree(ist); + return ret; + } + + return 0; +} + +static int __init gicv5_irs_init_ist_two_level(struct gicv5_irs_chip_data *irs_data, + unsigned int lpi_id_bits, + unsigned int istsz, + unsigned int l2sz) +{ + __le64 *l1ist; + u32 cfgr, n; + size_t l1sz; + u64 baser; + int ret; + + /* Taken from GICv5 specifications 10.2.1.13 IRS_IST_BASER */ + n = max(5, lpi_id_bits - ((10 - istsz) + (2 * l2sz)) + 2); + + l1sz = BIT(n + 1); + + l1ist = kzalloc(l1sz, GFP_KERNEL); + if (!l1ist) + return -ENOMEM; + + if (irs_data->flags & IRS_FLAGS_NON_COHERENT) + dcache_clean_inval_poc((unsigned long)l1ist, + (unsigned long)l1ist + l1sz); + else + dsb(ishst); + + cfgr = FIELD_PREP(GICV5_IRS_IST_CFGR_STRUCTURE, + GICV5_IRS_IST_CFGR_STRUCTURE_TWO_LEVEL) | + FIELD_PREP(GICV5_IRS_IST_CFGR_ISTSZ, istsz) | + FIELD_PREP(GICV5_IRS_IST_CFGR_L2SZ, l2sz) | + FIELD_PREP(GICV5_IRS_IST_CFGR_LPI_ID_BITS, lpi_id_bits); + irs_writel_relaxed(irs_data, cfgr, GICV5_IRS_IST_CFGR); + + /* + * The L2SZ determine bits required at L2 level. Number of bytes + * required by metadata is reported through istsz - the number of bits + * covered by L2 entries scales accordingly. + */ + gicv5_global_data.ist.l2_size = BIT(11 + (2 * l2sz) + 1); + gicv5_global_data.ist.l2_bits = (10 - istsz) + (2 * l2sz); + gicv5_global_data.ist.l1ist_addr = l1ist; + gicv5_global_data.ist.l2 = true; + + baser = (virt_to_phys(l1ist) & GICV5_IRS_IST_BASER_ADDR_MASK) | + FIELD_PREP(GICV5_IRS_IST_BASER_VALID, 0x1); + irs_writeq_relaxed(irs_data, baser, GICV5_IRS_IST_BASER); + + ret = gicv5_irs_ist_synchronise(irs_data); + if (ret) { + kfree(l1ist); + return ret; + } + + return 0; +} + +/* + * Alloc L2 IST entries on demand. + * + * Locking/serialization is guaranteed by irqdomain core code by + * taking the hierarchical domain struct irq_domain.root->mutex. + */ +int gicv5_irs_iste_alloc(const u32 lpi) +{ + struct gicv5_irs_chip_data *irs_data; + unsigned int index; + u32 l2istr, l2bits; + __le64 *l1ist; + size_t l2size; + void *l2ist; + int ret; + + if (!gicv5_global_data.ist.l2) + return 0; + + irs_data = per_cpu(per_cpu_irs_data, smp_processor_id()); + if (!irs_data) + return -ENOENT; + + l2size = gicv5_global_data.ist.l2_size; + l2bits = gicv5_global_data.ist.l2_bits; + l1ist = gicv5_global_data.ist.l1ist_addr; + index = lpi >> l2bits; + + if (FIELD_GET(GICV5_ISTL1E_VALID, le64_to_cpu(l1ist[index]))) + return 0; + + l2ist = kzalloc(l2size, GFP_KERNEL); + if (!l2ist) + return -ENOMEM; + + l1ist[index] = cpu_to_le64(virt_to_phys(l2ist) & GICV5_ISTL1E_L2_ADDR_MASK); + + if (irs_data->flags & IRS_FLAGS_NON_COHERENT) { + dcache_clean_inval_poc((unsigned long)l2ist, + (unsigned long)l2ist + l2size); + dcache_clean_poc((unsigned long)(l1ist + index), + (unsigned long)(l1ist + index) + sizeof(*l1ist)); + } else { + dsb(ishst); + } + + l2istr = FIELD_PREP(GICV5_IRS_MAP_L2_ISTR_ID, lpi); + irs_writel_relaxed(irs_data, l2istr, GICV5_IRS_MAP_L2_ISTR); + + ret = gicv5_irs_ist_synchronise(irs_data); + if (ret) { + l1ist[index] = 0; + kfree(l2ist); + return ret; + } + + /* + * Make sure we invalidate the cache line pulled before the IRS + * had a chance to update the L1 entry and mark it valid. + */ + if (irs_data->flags & IRS_FLAGS_NON_COHERENT) { + /* + * gicv5_irs_ist_synchronise() includes memory + * barriers (MMIO accessors) required to guarantee that the + * following dcache invalidation is not executed before the + * IST mapping operation has completed. + */ + dcache_inval_poc((unsigned long)(l1ist + index), + (unsigned long)(l1ist + index) + sizeof(*l1ist)); + } + + return 0; +} + +/* + * Try to match the L2 IST size to the pagesize, and if this is not possible + * pick the smallest supported L2 size in order to minimise the requirement for + * physically contiguous blocks of memory as page-sized allocations are + * guaranteed to be physically contiguous, and are by definition the easiest to + * find. + * + * Fall back to the smallest supported size (in the event that the pagesize + * itself is not supported) again serves to make it easier to find physically + * contiguous blocks of memory. + */ +static unsigned int gicv5_irs_l2_sz(u32 idr2) +{ + switch (PAGE_SIZE) { + case SZ_64K: + if (GICV5_IRS_IST_L2SZ_SUPPORT_64KB(idr2)) + return GICV5_IRS_IST_CFGR_L2SZ_64K; + fallthrough; + case SZ_4K: + if (GICV5_IRS_IST_L2SZ_SUPPORT_4KB(idr2)) + return GICV5_IRS_IST_CFGR_L2SZ_4K; + fallthrough; + case SZ_16K: + if (GICV5_IRS_IST_L2SZ_SUPPORT_16KB(idr2)) + return GICV5_IRS_IST_CFGR_L2SZ_16K; + break; + } + + if (GICV5_IRS_IST_L2SZ_SUPPORT_4KB(idr2)) + return GICV5_IRS_IST_CFGR_L2SZ_4K; + + return GICV5_IRS_IST_CFGR_L2SZ_64K; +} + +static int __init gicv5_irs_init_ist(struct gicv5_irs_chip_data *irs_data) +{ + u32 lpi_id_bits, idr2_id_bits, idr2_min_lpi_id_bits, l2_iste_sz, l2sz; + u32 l2_iste_sz_split, idr2; + bool two_levels, istmd; + u64 baser; + int ret; + + baser = irs_readq_relaxed(irs_data, GICV5_IRS_IST_BASER); + if (FIELD_GET(GICV5_IRS_IST_BASER_VALID, baser)) { + pr_err("IST is marked as valid already; cannot allocate\n"); + return -EPERM; + } + + idr2 = irs_readl_relaxed(irs_data, GICV5_IRS_IDR2); + two_levels = !!FIELD_GET(GICV5_IRS_IDR2_IST_LEVELS, idr2); + + idr2_id_bits = FIELD_GET(GICV5_IRS_IDR2_ID_BITS, idr2); + idr2_min_lpi_id_bits = FIELD_GET(GICV5_IRS_IDR2_MIN_LPI_ID_BITS, idr2); + + /* + * For two level tables we are always supporting the maximum allowed + * number of IDs. + * + * For 1-level tables, we should support a number of bits that + * is >= min_lpi_id_bits but cap it to LPI_ID_BITS_LINEAR lest + * the level 1-table gets too large and its memory allocation + * may fail. + */ + if (two_levels) { + lpi_id_bits = idr2_id_bits; + } else { + lpi_id_bits = max(LPI_ID_BITS_LINEAR, idr2_min_lpi_id_bits); + lpi_id_bits = min(lpi_id_bits, idr2_id_bits); + } + + /* + * Cap the ID bits according to the CPUIF supported ID bits + */ + lpi_id_bits = min(lpi_id_bits, gicv5_global_data.cpuif_id_bits); + + if (two_levels) + l2sz = gicv5_irs_l2_sz(idr2); + + istmd = !!FIELD_GET(GICV5_IRS_IDR2_ISTMD, idr2); + + l2_iste_sz = GICV5_IRS_IST_CFGR_ISTSZ_4; + + if (istmd) { + l2_iste_sz_split = FIELD_GET(GICV5_IRS_IDR2_ISTMD_SZ, idr2); + + if (lpi_id_bits < l2_iste_sz_split) + l2_iste_sz = GICV5_IRS_IST_CFGR_ISTSZ_8; + else + l2_iste_sz = GICV5_IRS_IST_CFGR_ISTSZ_16; + } + + /* + * Follow GICv5 specification recommendation to opt in for two + * level tables (ref: 10.2.1.14 IRS_IST_CFGR). + */ + if (two_levels && (lpi_id_bits > ((10 - l2_iste_sz) + (2 * l2sz)))) { + ret = gicv5_irs_init_ist_two_level(irs_data, lpi_id_bits, + l2_iste_sz, l2sz); + } else { + ret = gicv5_irs_init_ist_linear(irs_data, lpi_id_bits, + l2_iste_sz); + } + if (ret) + return ret; + + gicv5_init_lpis(BIT(lpi_id_bits)); + + return 0; +} + +struct iaffid_entry { + u16 iaffid; + bool valid; +}; + +static DEFINE_PER_CPU(struct iaffid_entry, cpu_iaffid); + +int gicv5_irs_cpu_to_iaffid(int cpuid, u16 *iaffid) +{ + if (!per_cpu(cpu_iaffid, cpuid).valid) { + pr_err("IAFFID for CPU %d has not been initialised\n", cpuid); + return -ENODEV; + } + + *iaffid = per_cpu(cpu_iaffid, cpuid).iaffid; + + return 0; +} + +struct gicv5_irs_chip_data *gicv5_irs_lookup_by_spi_id(u32 spi_id) +{ + struct gicv5_irs_chip_data *irs_data; + u32 min, max; + + list_for_each_entry(irs_data, &irs_nodes, entry) { + if (!irs_data->spi_range) + continue; + + min = irs_data->spi_min; + max = irs_data->spi_min + irs_data->spi_range - 1; + if (spi_id >= min && spi_id <= max) + return irs_data; + } + + return NULL; +} + +static int gicv5_irs_wait_for_spi_op(struct gicv5_irs_chip_data *irs_data) +{ + u32 statusr; + int ret; + + ret = gicv5_wait_for_op_atomic(irs_data->irs_base, GICV5_IRS_SPI_STATUSR, + GICV5_IRS_SPI_STATUSR_IDLE, &statusr); + if (ret) + return ret; + + return !!FIELD_GET(GICV5_IRS_SPI_STATUSR_V, statusr) ? 0 : -EIO; +} + +static int gicv5_irs_wait_for_irs_pe(struct gicv5_irs_chip_data *irs_data, + bool selr) +{ + bool valid = true; + u32 statusr; + int ret; + + ret = gicv5_wait_for_op_atomic(irs_data->irs_base, GICV5_IRS_PE_STATUSR, + GICV5_IRS_PE_STATUSR_IDLE, &statusr); + if (ret) + return ret; + + if (selr) + valid = !!FIELD_GET(GICV5_IRS_PE_STATUSR_V, statusr); + + return valid ? 0 : -EIO; +} + +static int gicv5_irs_wait_for_pe_selr(struct gicv5_irs_chip_data *irs_data) +{ + return gicv5_irs_wait_for_irs_pe(irs_data, true); +} + +static int gicv5_irs_wait_for_pe_cr0(struct gicv5_irs_chip_data *irs_data) +{ + return gicv5_irs_wait_for_irs_pe(irs_data, false); +} + +int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type) +{ + struct gicv5_irs_chip_data *irs_data = d->chip_data; + u32 selr, cfgr; + bool level; + int ret; + + /* + * There is no distinction between HIGH/LOW for level IRQs + * and RISING/FALLING for edge IRQs in the architecture, + * hence consider them equivalent. + */ + switch (type) { + case IRQ_TYPE_EDGE_RISING: + case IRQ_TYPE_EDGE_FALLING: + level = false; + break; + case IRQ_TYPE_LEVEL_HIGH: + case IRQ_TYPE_LEVEL_LOW: + level = true; + break; + default: + return -EINVAL; + } + + guard(raw_spinlock)(&irs_data->spi_config_lock); + + selr = FIELD_PREP(GICV5_IRS_SPI_SELR_ID, d->hwirq); + irs_writel_relaxed(irs_data, selr, GICV5_IRS_SPI_SELR); + ret = gicv5_irs_wait_for_spi_op(irs_data); + if (ret) + return ret; + + cfgr = FIELD_PREP(GICV5_IRS_SPI_CFGR_TM, level); + irs_writel_relaxed(irs_data, cfgr, GICV5_IRS_SPI_CFGR); + + return gicv5_irs_wait_for_spi_op(irs_data); +} + +static int gicv5_irs_wait_for_idle(struct gicv5_irs_chip_data *irs_data) +{ + return gicv5_wait_for_op_atomic(irs_data->irs_base, GICV5_IRS_CR0, + GICV5_IRS_CR0_IDLE, NULL); +} + +void gicv5_irs_syncr(void) +{ + struct gicv5_irs_chip_data *irs_data; + u32 syncr; + + irs_data = list_first_entry_or_null(&irs_nodes, struct gicv5_irs_chip_data, entry); + if (WARN_ON_ONCE(!irs_data)) + return; + + syncr = FIELD_PREP(GICV5_IRS_SYNCR_SYNC, 1); + irs_writel_relaxed(irs_data, syncr, GICV5_IRS_SYNCR); + + gicv5_wait_for_op(irs_data->irs_base, GICV5_IRS_SYNC_STATUSR, + GICV5_IRS_SYNC_STATUSR_IDLE); +} + +int gicv5_irs_register_cpu(int cpuid) +{ + struct gicv5_irs_chip_data *irs_data; + u32 selr, cr0; + u16 iaffid; + int ret; + + ret = gicv5_irs_cpu_to_iaffid(cpuid, &iaffid); + if (ret) { + pr_err("IAFFID for CPU %d has not been initialised\n", cpuid); + return ret; + } + + irs_data = per_cpu(per_cpu_irs_data, cpuid); + if (!irs_data) { + pr_err("No IRS associated with CPU %u\n", cpuid); + return -ENXIO; + } + + selr = FIELD_PREP(GICV5_IRS_PE_SELR_IAFFID, iaffid); + irs_writel_relaxed(irs_data, selr, GICV5_IRS_PE_SELR); + + ret = gicv5_irs_wait_for_pe_selr(irs_data); + if (ret) { + pr_err("IAFFID 0x%x used in IRS_PE_SELR is invalid\n", iaffid); + return -ENXIO; + } + + cr0 = FIELD_PREP(GICV5_IRS_PE_CR0_DPS, 0x1); + irs_writel_relaxed(irs_data, cr0, GICV5_IRS_PE_CR0); + + ret = gicv5_irs_wait_for_pe_cr0(irs_data); + if (ret) + return ret; + + pr_debug("CPU %d enabled PE IAFFID 0x%x\n", cpuid, iaffid); + + return 0; +} + +static void __init gicv5_irs_init_bases(struct gicv5_irs_chip_data *irs_data, + void __iomem *irs_base, + struct fwnode_handle *handle) +{ + struct device_node *np = to_of_node(handle); + u32 cr0, cr1; + + irs_data->fwnode = handle; + irs_data->irs_base = irs_base; + + if (of_property_read_bool(np, "dma-noncoherent")) { + /* + * A non-coherent IRS implies that some cache levels cannot be + * used coherently by the cores and GIC. Our only option is to mark + * memory attributes for the GIC as non-cacheable; by default, + * non-cacheable memory attributes imply outer-shareable + * shareability, the value written into IRS_CR1_SH is ignored. + */ + cr1 = FIELD_PREP(GICV5_IRS_CR1_VPED_WA, GICV5_NO_WRITE_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_VPED_RA, GICV5_NO_READ_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_VMD_WA, GICV5_NO_WRITE_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_VMD_RA, GICV5_NO_READ_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_VPET_RA, GICV5_NO_READ_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_VMT_RA, GICV5_NO_READ_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_IST_WA, GICV5_NO_WRITE_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_IST_RA, GICV5_NO_READ_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_IC, GICV5_NON_CACHE) | + FIELD_PREP(GICV5_IRS_CR1_OC, GICV5_NON_CACHE); + irs_data->flags |= IRS_FLAGS_NON_COHERENT; + } else { + cr1 = FIELD_PREP(GICV5_IRS_CR1_VPED_WA, GICV5_WRITE_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_VPED_RA, GICV5_READ_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_VMD_WA, GICV5_WRITE_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_VMD_RA, GICV5_READ_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_VPET_RA, GICV5_READ_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_VMT_RA, GICV5_READ_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_IST_WA, GICV5_WRITE_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_IST_RA, GICV5_READ_ALLOC) | + FIELD_PREP(GICV5_IRS_CR1_IC, GICV5_WB_CACHE) | + FIELD_PREP(GICV5_IRS_CR1_OC, GICV5_WB_CACHE) | + FIELD_PREP(GICV5_IRS_CR1_SH, GICV5_INNER_SHARE); + } + + irs_writel_relaxed(irs_data, cr1, GICV5_IRS_CR1); + + cr0 = FIELD_PREP(GICV5_IRS_CR0_IRSEN, 0x1); + irs_writel_relaxed(irs_data, cr0, GICV5_IRS_CR0); + gicv5_irs_wait_for_idle(irs_data); +} + +static int __init gicv5_irs_of_init_affinity(struct device_node *node, + struct gicv5_irs_chip_data *irs_data, + u8 iaffid_bits) +{ + /* + * Detect IAFFID<->CPU mappings from the device tree and + * record IRS<->CPU topology information. + */ + u16 iaffid_mask = GENMASK(iaffid_bits - 1, 0); + int ret, i, ncpus, niaffids; + + ncpus = of_count_phandle_with_args(node, "cpus", NULL); + if (ncpus < 0) + return -EINVAL; + + niaffids = of_property_count_elems_of_size(node, "arm,iaffids", + sizeof(u16)); + if (niaffids != ncpus) + return -EINVAL; + + u16 *iaffids __free(kfree) = kcalloc(niaffids, sizeof(*iaffids), GFP_KERNEL); + if (!iaffids) + return -ENOMEM; + + ret = of_property_read_u16_array(node, "arm,iaffids", iaffids, niaffids); + if (ret) + return ret; + + for (i = 0; i < ncpus; i++) { + struct device_node *cpu_node; + int cpu; + + cpu_node = of_parse_phandle(node, "cpus", i); + if (WARN_ON(!cpu_node)) + continue; + + cpu = of_cpu_node_to_id(cpu_node); + of_node_put(cpu_node); + if (WARN_ON(cpu < 0)) + continue; + + if (iaffids[i] & ~iaffid_mask) { + pr_warn("CPU %d iaffid 0x%x exceeds IRS iaffid bits\n", + cpu, iaffids[i]); + continue; + } + + per_cpu(cpu_iaffid, cpu).iaffid = iaffids[i]; + per_cpu(cpu_iaffid, cpu).valid = true; + + /* We also know that the CPU is connected to this IRS */ + per_cpu(per_cpu_irs_data, cpu) = irs_data; + } + + return ret; +} + +static void irs_setup_pri_bits(u32 idr1) +{ + switch (FIELD_GET(GICV5_IRS_IDR1_PRIORITY_BITS, idr1)) { + case GICV5_IRS_IDR1_PRIORITY_BITS_1BITS: + gicv5_global_data.irs_pri_bits = 1; + break; + case GICV5_IRS_IDR1_PRIORITY_BITS_2BITS: + gicv5_global_data.irs_pri_bits = 2; + break; + case GICV5_IRS_IDR1_PRIORITY_BITS_3BITS: + gicv5_global_data.irs_pri_bits = 3; + break; + case GICV5_IRS_IDR1_PRIORITY_BITS_4BITS: + gicv5_global_data.irs_pri_bits = 4; + break; + case GICV5_IRS_IDR1_PRIORITY_BITS_5BITS: + gicv5_global_data.irs_pri_bits = 5; + break; + default: + pr_warn("Detected wrong IDR priority bits value 0x%lx\n", + FIELD_GET(GICV5_IRS_IDR1_PRIORITY_BITS, idr1)); + gicv5_global_data.irs_pri_bits = 1; + break; + } +} + +static int __init gicv5_irs_init(struct device_node *node) +{ + struct gicv5_irs_chip_data *irs_data; + void __iomem *irs_base; + u32 idr, spi_count; + u8 iaffid_bits; + int ret; + + irs_data = kzalloc(sizeof(*irs_data), GFP_KERNEL); + if (!irs_data) + return -ENOMEM; + + raw_spin_lock_init(&irs_data->spi_config_lock); + + ret = of_property_match_string(node, "reg-names", "ns-config"); + if (ret < 0) { + pr_err("%pOF: ns-config reg-name not present\n", node); + goto out_err; + } + + irs_base = of_io_request_and_map(node, ret, of_node_full_name(node)); + if (IS_ERR(irs_base)) { + pr_err("%pOF: unable to map GICv5 IRS registers\n", node); + ret = PTR_ERR(irs_base); + goto out_err; + } + + gicv5_irs_init_bases(irs_data, irs_base, &node->fwnode); + + idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR1); + iaffid_bits = FIELD_GET(GICV5_IRS_IDR1_IAFFID_BITS, idr) + 1; + + ret = gicv5_irs_of_init_affinity(node, irs_data, iaffid_bits); + if (ret) { + pr_err("Failed to parse CPU IAFFIDs from the device tree!\n"); + goto out_iomem; + } + + idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR2); + if (WARN(!FIELD_GET(GICV5_IRS_IDR2_LPI, idr), + "LPI support not available - no IPIs, can't proceed\n")) { + ret = -ENODEV; + goto out_iomem; + } + + idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR7); + irs_data->spi_min = FIELD_GET(GICV5_IRS_IDR7_SPI_BASE, idr); + + idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR6); + irs_data->spi_range = FIELD_GET(GICV5_IRS_IDR6_SPI_IRS_RANGE, idr); + + if (irs_data->spi_range) { + pr_info("%s detected SPI range [%u-%u]\n", + of_node_full_name(node), + irs_data->spi_min, + irs_data->spi_min + + irs_data->spi_range - 1); + } + + /* + * Do the global setting only on the first IRS. + * Global properties (iaffid_bits, global spi count) are guaranteed to + * be consistent across IRSes by the architecture. + */ + if (list_empty(&irs_nodes)) { + + idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR1); + irs_setup_pri_bits(idr); + + idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR5); + + spi_count = FIELD_GET(GICV5_IRS_IDR5_SPI_RANGE, idr); + gicv5_global_data.global_spi_count = spi_count; + + gicv5_init_lpi_domain(); + + pr_debug("Detected %u SPIs globally\n", spi_count); + } + + list_add_tail(&irs_data->entry, &irs_nodes); + + return 0; + +out_iomem: + iounmap(irs_base); +out_err: + kfree(irs_data); + return ret; +} + +void __init gicv5_irs_remove(void) +{ + struct gicv5_irs_chip_data *irs_data, *tmp_data; + + gicv5_free_lpi_domain(); + gicv5_deinit_lpis(); + + list_for_each_entry_safe(irs_data, tmp_data, &irs_nodes, entry) { + iounmap(irs_data->irs_base); + list_del(&irs_data->entry); + kfree(irs_data); + } +} + +int __init gicv5_irs_enable(void) +{ + struct gicv5_irs_chip_data *irs_data; + int ret; + + irs_data = list_first_entry_or_null(&irs_nodes, + struct gicv5_irs_chip_data, entry); + if (!irs_data) + return -ENODEV; + + ret = gicv5_irs_init_ist(irs_data); + if (ret) { + pr_err("Failed to init IST\n"); + return ret; + } + + return 0; +} + +void __init gicv5_irs_its_probe(void) +{ + struct gicv5_irs_chip_data *irs_data; + + list_for_each_entry(irs_data, &irs_nodes, entry) + gicv5_its_of_probe(to_of_node(irs_data->fwnode)); +} + +int __init gicv5_irs_of_probe(struct device_node *parent) +{ + struct device_node *np; + int ret; + + for_each_available_child_of_node(parent, np) { + if (!of_device_is_compatible(np, "arm,gic-v5-irs")) + continue; + + ret = gicv5_irs_init(np); + if (ret) + pr_err("Failed to init IRS %s\n", np->full_name); + } + + return list_empty(&irs_nodes) ? -ENODEV : 0; +} diff --git a/drivers/irqchip/irq-gic-v5-its.c b/drivers/irqchip/irq-gic-v5-its.c new file mode 100644 index 000000000000..340640fdbdf6 --- /dev/null +++ b/drivers/irqchip/irq-gic-v5-its.c @@ -0,0 +1,1228 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024-2025 ARM Limited, All Rights Reserved. + */ + +#define pr_fmt(fmt) "GICv5 ITS: " fmt + +#include <linux/bitmap.h> +#include <linux/iommu.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/msi.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/slab.h> + +#include <linux/irqchip.h> +#include <linux/irqchip/arm-gic-v5.h> +#include <linux/irqchip/irq-msi-lib.h> + +#include "irq-gic-its-msi-parent.h" + +#define ITS_FLAGS_NON_COHERENT BIT(0) + +struct gicv5_its_chip_data { + struct xarray its_devices; + struct mutex dev_alloc_lock; + struct fwnode_handle *fwnode; + struct gicv5_its_devtab_cfg devtab_cfgr; + void __iomem *its_base; + u32 flags; + unsigned int msi_domain_flags; +}; + +struct gicv5_its_dev { + struct gicv5_its_chip_data *its_node; + struct gicv5_its_itt_cfg itt_cfg; + unsigned long *event_map; + u32 device_id; + u32 num_events; + phys_addr_t its_trans_phys_base; +}; + +static u32 its_readl_relaxed(struct gicv5_its_chip_data *its_node, const u32 reg_offset) +{ + return readl_relaxed(its_node->its_base + reg_offset); +} + +static void its_writel_relaxed(struct gicv5_its_chip_data *its_node, const u32 val, + const u32 reg_offset) +{ + writel_relaxed(val, its_node->its_base + reg_offset); +} + +static void its_writeq_relaxed(struct gicv5_its_chip_data *its_node, const u64 val, + const u32 reg_offset) +{ + writeq_relaxed(val, its_node->its_base + reg_offset); +} + +static void gicv5_its_dcache_clean(struct gicv5_its_chip_data *its, void *start, + size_t sz) +{ + void *end = start + sz; + + if (its->flags & ITS_FLAGS_NON_COHERENT) + dcache_clean_inval_poc((unsigned long)start, (unsigned long)end); + else + dsb(ishst); +} + +static void its_write_table_entry(struct gicv5_its_chip_data *its, __le64 *entry, + u64 val) +{ + WRITE_ONCE(*entry, cpu_to_le64(val)); + gicv5_its_dcache_clean(its, entry, sizeof(*entry)); +} + +#define devtab_cfgr_field(its, f) \ + FIELD_GET(GICV5_ITS_DT_CFGR_##f, (its)->devtab_cfgr.cfgr) + +static int gicv5_its_cache_sync(struct gicv5_its_chip_data *its) +{ + return gicv5_wait_for_op_atomic(its->its_base, GICV5_ITS_STATUSR, + GICV5_ITS_STATUSR_IDLE, NULL); +} + +static void gicv5_its_syncr(struct gicv5_its_chip_data *its, + struct gicv5_its_dev *its_dev) +{ + u64 syncr; + + syncr = FIELD_PREP(GICV5_ITS_SYNCR_SYNC, 1) | + FIELD_PREP(GICV5_ITS_SYNCR_DEVICEID, its_dev->device_id); + + its_writeq_relaxed(its, syncr, GICV5_ITS_SYNCR); + + gicv5_wait_for_op(its->its_base, GICV5_ITS_SYNC_STATUSR, GICV5_ITS_SYNC_STATUSR_IDLE); +} + +/* Number of bits required for each L2 {device/interrupt translation} table size */ +#define ITS_L2SZ_64K_L2_BITS 13 +#define ITS_L2SZ_16K_L2_BITS 11 +#define ITS_L2SZ_4K_L2_BITS 9 + +static unsigned int gicv5_its_l2sz_to_l2_bits(unsigned int sz) +{ + switch (sz) { + case GICV5_ITS_DT_ITT_CFGR_L2SZ_64k: + return ITS_L2SZ_64K_L2_BITS; + case GICV5_ITS_DT_ITT_CFGR_L2SZ_16k: + return ITS_L2SZ_16K_L2_BITS; + case GICV5_ITS_DT_ITT_CFGR_L2SZ_4k: + default: + return ITS_L2SZ_4K_L2_BITS; + } +} + +static int gicv5_its_itt_cache_inv(struct gicv5_its_chip_data *its, u32 device_id, + u16 event_id) +{ + u32 eventr, eidr; + u64 didr; + + didr = FIELD_PREP(GICV5_ITS_DIDR_DEVICEID, device_id); + eidr = FIELD_PREP(GICV5_ITS_EIDR_EVENTID, event_id); + eventr = FIELD_PREP(GICV5_ITS_INV_EVENTR_I, 0x1); + + its_writeq_relaxed(its, didr, GICV5_ITS_DIDR); + its_writel_relaxed(its, eidr, GICV5_ITS_EIDR); + its_writel_relaxed(its, eventr, GICV5_ITS_INV_EVENTR); + + return gicv5_its_cache_sync(its); +} + +static void gicv5_its_free_itt_linear(struct gicv5_its_dev *its_dev) +{ + kfree(its_dev->itt_cfg.linear.itt); +} + +static void gicv5_its_free_itt_two_level(struct gicv5_its_dev *its_dev) +{ + unsigned int i, num_ents = its_dev->itt_cfg.l2.num_l1_ents; + + for (i = 0; i < num_ents; i++) + kfree(its_dev->itt_cfg.l2.l2ptrs[i]); + + kfree(its_dev->itt_cfg.l2.l2ptrs); + kfree(its_dev->itt_cfg.l2.l1itt); +} + +static void gicv5_its_free_itt(struct gicv5_its_dev *its_dev) +{ + if (!its_dev->itt_cfg.l2itt) + gicv5_its_free_itt_linear(its_dev); + else + gicv5_its_free_itt_two_level(its_dev); +} + +static int gicv5_its_create_itt_linear(struct gicv5_its_chip_data *its, + struct gicv5_its_dev *its_dev, + unsigned int event_id_bits) +{ + unsigned int num_ents = BIT(event_id_bits); + __le64 *itt; + + itt = kcalloc(num_ents, sizeof(*itt), GFP_KERNEL); + if (!itt) + return -ENOMEM; + + its_dev->itt_cfg.linear.itt = itt; + its_dev->itt_cfg.linear.num_ents = num_ents; + its_dev->itt_cfg.l2itt = false; + its_dev->itt_cfg.event_id_bits = event_id_bits; + + gicv5_its_dcache_clean(its, itt, num_ents * sizeof(*itt)); + + return 0; +} + +/* + * Allocate a two-level ITT. All ITT entries are allocated in one go, unlike + * with the device table. Span may be used to limit the second level table + * size, where possible. + */ +static int gicv5_its_create_itt_two_level(struct gicv5_its_chip_data *its, + struct gicv5_its_dev *its_dev, + unsigned int event_id_bits, + unsigned int itt_l2sz, + unsigned int num_events) +{ + unsigned int l1_bits, l2_bits, span, events_per_l2_table; + unsigned int i, complete_tables, final_span, num_ents; + __le64 *itt_l1, *itt_l2, **l2ptrs; + int ret; + u64 val; + + ret = gicv5_its_l2sz_to_l2_bits(itt_l2sz); + if (ret >= event_id_bits) { + pr_debug("Incorrect l2sz (0x%x) for %u EventID bits. Cannot allocate ITT\n", + itt_l2sz, event_id_bits); + return -EINVAL; + } + + l2_bits = ret; + + l1_bits = event_id_bits - l2_bits; + + num_ents = BIT(l1_bits); + + itt_l1 = kcalloc(num_ents, sizeof(*itt_l1), GFP_KERNEL); + if (!itt_l1) + return -ENOMEM; + + l2ptrs = kcalloc(num_ents, sizeof(*l2ptrs), GFP_KERNEL); + if (!l2ptrs) { + kfree(itt_l1); + return -ENOMEM; + } + + its_dev->itt_cfg.l2.l2ptrs = l2ptrs; + + its_dev->itt_cfg.l2.l2sz = itt_l2sz; + its_dev->itt_cfg.l2.l1itt = itt_l1; + its_dev->itt_cfg.l2.num_l1_ents = num_ents; + its_dev->itt_cfg.l2itt = true; + its_dev->itt_cfg.event_id_bits = event_id_bits; + + /* + * Need to determine how many entries there are per L2 - this is based + * on the number of bits in the table. + */ + events_per_l2_table = BIT(l2_bits); + complete_tables = num_events / events_per_l2_table; + final_span = order_base_2(num_events % events_per_l2_table); + + for (i = 0; i < num_ents; i++) { + size_t l2sz; + + span = i == complete_tables ? final_span : l2_bits; + + itt_l2 = kcalloc(BIT(span), sizeof(*itt_l2), GFP_KERNEL); + if (!itt_l2) { + ret = -ENOMEM; + goto out_free; + } + + its_dev->itt_cfg.l2.l2ptrs[i] = itt_l2; + + l2sz = BIT(span) * sizeof(*itt_l2); + + gicv5_its_dcache_clean(its, itt_l2, l2sz); + + val = (virt_to_phys(itt_l2) & GICV5_ITTL1E_L2_ADDR_MASK) | + FIELD_PREP(GICV5_ITTL1E_SPAN, span) | + FIELD_PREP(GICV5_ITTL1E_VALID, 0x1); + + WRITE_ONCE(itt_l1[i], cpu_to_le64(val)); + } + + gicv5_its_dcache_clean(its, itt_l1, num_ents * sizeof(*itt_l1)); + + return 0; + +out_free: + for (i = i - 1; i >= 0; i--) + kfree(its_dev->itt_cfg.l2.l2ptrs[i]); + + kfree(its_dev->itt_cfg.l2.l2ptrs); + kfree(itt_l1); + return ret; +} + +/* + * Function to check whether the device table or ITT table support + * a two-level table and if so depending on the number of id_bits + * requested, determine whether a two-level table is required. + * + * Return the 2-level size value if a two level table is deemed + * necessary. + */ +static bool gicv5_its_l2sz_two_level(bool devtab, u32 its_idr1, u8 id_bits, u8 *sz) +{ + unsigned int l2_bits, l2_sz; + + if (devtab && !FIELD_GET(GICV5_ITS_IDR1_DT_LEVELS, its_idr1)) + return false; + + if (!devtab && !FIELD_GET(GICV5_ITS_IDR1_ITT_LEVELS, its_idr1)) + return false; + + /* + * Pick an L2 size that matches the pagesize; if a match + * is not found, go for the smallest supported l2 size granule. + * + * This ensures that we will always be able to allocate + * contiguous memory at L2. + */ + switch (PAGE_SIZE) { + case SZ_64K: + if (GICV5_ITS_IDR1_L2SZ_SUPPORT_64KB(its_idr1)) { + l2_sz = GICV5_ITS_DT_ITT_CFGR_L2SZ_64k; + break; + } + fallthrough; + case SZ_4K: + if (GICV5_ITS_IDR1_L2SZ_SUPPORT_4KB(its_idr1)) { + l2_sz = GICV5_ITS_DT_ITT_CFGR_L2SZ_4k; + break; + } + fallthrough; + case SZ_16K: + if (GICV5_ITS_IDR1_L2SZ_SUPPORT_16KB(its_idr1)) { + l2_sz = GICV5_ITS_DT_ITT_CFGR_L2SZ_16k; + break; + } + if (GICV5_ITS_IDR1_L2SZ_SUPPORT_4KB(its_idr1)) { + l2_sz = GICV5_ITS_DT_ITT_CFGR_L2SZ_4k; + break; + } + if (GICV5_ITS_IDR1_L2SZ_SUPPORT_64KB(its_idr1)) { + l2_sz = GICV5_ITS_DT_ITT_CFGR_L2SZ_64k; + break; + } + + l2_sz = GICV5_ITS_DT_ITT_CFGR_L2SZ_4k; + break; + } + + l2_bits = gicv5_its_l2sz_to_l2_bits(l2_sz); + + if (l2_bits > id_bits) + return false; + + *sz = l2_sz; + + return true; +} + +static __le64 *gicv5_its_device_get_itte_ref(struct gicv5_its_dev *its_dev, + u16 event_id) +{ + unsigned int l1_idx, l2_idx, l2_bits; + __le64 *l2_itt; + + if (!its_dev->itt_cfg.l2itt) { + __le64 *itt = its_dev->itt_cfg.linear.itt; + + return &itt[event_id]; + } + + l2_bits = gicv5_its_l2sz_to_l2_bits(its_dev->itt_cfg.l2.l2sz); + l1_idx = event_id >> l2_bits; + l2_idx = event_id & GENMASK(l2_bits - 1, 0); + l2_itt = its_dev->itt_cfg.l2.l2ptrs[l1_idx]; + + return &l2_itt[l2_idx]; +} + +static int gicv5_its_device_cache_inv(struct gicv5_its_chip_data *its, + struct gicv5_its_dev *its_dev) +{ + u32 devicer; + u64 didr; + + didr = FIELD_PREP(GICV5_ITS_DIDR_DEVICEID, its_dev->device_id); + devicer = FIELD_PREP(GICV5_ITS_INV_DEVICER_I, 0x1) | + FIELD_PREP(GICV5_ITS_INV_DEVICER_EVENTID_BITS, + its_dev->itt_cfg.event_id_bits) | + FIELD_PREP(GICV5_ITS_INV_DEVICER_L1, 0x0); + its_writeq_relaxed(its, didr, GICV5_ITS_DIDR); + its_writel_relaxed(its, devicer, GICV5_ITS_INV_DEVICER); + + return gicv5_its_cache_sync(its); +} + +/* + * Allocate a level 2 device table entry, update L1 parent to reference it. + * Only used for 2-level device tables, and it is called on demand. + */ +static int gicv5_its_alloc_l2_devtab(struct gicv5_its_chip_data *its, + unsigned int l1_index) +{ + __le64 *l2devtab, *l1devtab = its->devtab_cfgr.l2.l1devtab; + u8 span, l2sz, l2_bits; + u64 l1dte; + + if (FIELD_GET(GICV5_DTL1E_VALID, le64_to_cpu(l1devtab[l1_index]))) + return 0; + + span = FIELD_GET(GICV5_DTL1E_SPAN, le64_to_cpu(l1devtab[l1_index])); + l2sz = devtab_cfgr_field(its, L2SZ); + + l2_bits = gicv5_its_l2sz_to_l2_bits(l2sz); + + /* + * Span allows us to create a smaller L2 device table. + * If it is too large, use the number of allowed L2 bits. + */ + if (span > l2_bits) + span = l2_bits; + + l2devtab = kcalloc(BIT(span), sizeof(*l2devtab), GFP_KERNEL); + if (!l2devtab) + return -ENOMEM; + + its->devtab_cfgr.l2.l2ptrs[l1_index] = l2devtab; + + l1dte = FIELD_PREP(GICV5_DTL1E_SPAN, span) | + (virt_to_phys(l2devtab) & GICV5_DTL1E_L2_ADDR_MASK) | + FIELD_PREP(GICV5_DTL1E_VALID, 0x1); + its_write_table_entry(its, &l1devtab[l1_index], l1dte); + + return 0; +} + +static __le64 *gicv5_its_devtab_get_dte_ref(struct gicv5_its_chip_data *its, + u32 device_id, bool alloc) +{ + u8 str = devtab_cfgr_field(its, STRUCTURE); + unsigned int l2sz, l2_bits, l1_idx, l2_idx; + __le64 *l2devtab; + int ret; + + if (str == GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR) { + l2devtab = its->devtab_cfgr.linear.devtab; + return &l2devtab[device_id]; + } + + l2sz = devtab_cfgr_field(its, L2SZ); + l2_bits = gicv5_its_l2sz_to_l2_bits(l2sz); + l1_idx = device_id >> l2_bits; + l2_idx = device_id & GENMASK(l2_bits - 1, 0); + + if (alloc) { + /* + * Allocate a new L2 device table here before + * continuing. We make the assumption that the span in + * the L1 table has been set correctly, and blindly use + * that value. + */ + ret = gicv5_its_alloc_l2_devtab(its, l1_idx); + if (ret) + return NULL; + } + + l2devtab = its->devtab_cfgr.l2.l2ptrs[l1_idx]; + return &l2devtab[l2_idx]; +} + +/* + * Register a new device in the device table. Allocate an ITT and + * program the L2DTE entry according to the ITT structure that + * was chosen. + */ +static int gicv5_its_device_register(struct gicv5_its_chip_data *its, + struct gicv5_its_dev *its_dev) +{ + u8 event_id_bits, device_id_bits, itt_struct, itt_l2sz; + phys_addr_t itt_phys_base; + bool two_level_itt; + u32 idr1, idr2; + __le64 *dte; + u64 val; + int ret; + + device_id_bits = devtab_cfgr_field(its, DEVICEID_BITS); + + if (its_dev->device_id >= BIT(device_id_bits)) { + pr_err("Supplied DeviceID (%u) outside of Device Table range (%u)!", + its_dev->device_id, (u32)GENMASK(device_id_bits - 1, 0)); + return -EINVAL; + } + + dte = gicv5_its_devtab_get_dte_ref(its, its_dev->device_id, true); + if (!dte) + return -ENOMEM; + + if (FIELD_GET(GICV5_DTL2E_VALID, le64_to_cpu(*dte))) + return -EBUSY; + + /* + * Determine how many bits we need, validate those against the max. + * Based on these, determine if we should go for a 1- or 2-level ITT. + */ + event_id_bits = order_base_2(its_dev->num_events); + + idr2 = its_readl_relaxed(its, GICV5_ITS_IDR2); + + if (event_id_bits > FIELD_GET(GICV5_ITS_IDR2_EVENTID_BITS, idr2)) { + pr_err("Required EventID bits (%u) larger than supported bits (%u)!", + event_id_bits, + (u8)FIELD_GET(GICV5_ITS_IDR2_EVENTID_BITS, idr2)); + return -EINVAL; + } + + idr1 = its_readl_relaxed(its, GICV5_ITS_IDR1); + + /* + * L2 ITT size is programmed into the L2DTE regardless of + * whether a two-level or linear ITT is built, init it. + */ + itt_l2sz = 0; + + two_level_itt = gicv5_its_l2sz_two_level(false, idr1, event_id_bits, + &itt_l2sz); + if (two_level_itt) + ret = gicv5_its_create_itt_two_level(its, its_dev, event_id_bits, + itt_l2sz, + its_dev->num_events); + else + ret = gicv5_its_create_itt_linear(its, its_dev, event_id_bits); + if (ret) + return ret; + + itt_phys_base = two_level_itt ? virt_to_phys(its_dev->itt_cfg.l2.l1itt) : + virt_to_phys(its_dev->itt_cfg.linear.itt); + + itt_struct = two_level_itt ? GICV5_ITS_DT_ITT_CFGR_STRUCTURE_TWO_LEVEL : + GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR; + + val = FIELD_PREP(GICV5_DTL2E_EVENT_ID_BITS, event_id_bits) | + FIELD_PREP(GICV5_DTL2E_ITT_STRUCTURE, itt_struct) | + (itt_phys_base & GICV5_DTL2E_ITT_ADDR_MASK) | + FIELD_PREP(GICV5_DTL2E_ITT_L2SZ, itt_l2sz) | + FIELD_PREP(GICV5_DTL2E_VALID, 0x1); + + its_write_table_entry(its, dte, val); + + ret = gicv5_its_device_cache_inv(its, its_dev); + if (ret) { + its_write_table_entry(its, dte, 0); + gicv5_its_free_itt(its_dev); + return ret; + } + + return 0; +} + +/* + * Unregister a device in the device table. Lookup the device by ID, free the + * corresponding ITT, mark the device as invalid in the device table. + */ +static int gicv5_its_device_unregister(struct gicv5_its_chip_data *its, + struct gicv5_its_dev *its_dev) +{ + __le64 *dte; + + dte = gicv5_its_devtab_get_dte_ref(its, its_dev->device_id, false); + + if (!FIELD_GET(GICV5_DTL2E_VALID, le64_to_cpu(*dte))) { + pr_debug("Device table entry for DeviceID 0x%x is not valid. Nothing to clean up!", + its_dev->device_id); + return -EINVAL; + } + + /* Zero everything - make it clear that this is an invalid entry */ + its_write_table_entry(its, dte, 0); + + gicv5_its_free_itt(its_dev); + + return gicv5_its_device_cache_inv(its, its_dev); +} + +/* + * Allocate a 1-level device table. All entries are allocated, but marked + * invalid. + */ +static int gicv5_its_alloc_devtab_linear(struct gicv5_its_chip_data *its, + u8 device_id_bits) +{ + __le64 *devtab; + size_t sz; + u64 baser; + u32 cfgr; + + /* + * We expect a GICv5 implementation requiring a large number of + * deviceID bits to support a 2-level device table. If that's not + * the case, cap the number of deviceIDs supported according to the + * kmalloc limits so that the system can chug along with a linear + * device table. + */ + sz = BIT_ULL(device_id_bits) * sizeof(*devtab); + if (sz > KMALLOC_MAX_SIZE) { + u8 device_id_cap = ilog2(KMALLOC_MAX_SIZE/sizeof(*devtab)); + + pr_warn("Limiting device ID bits from %u to %u\n", + device_id_bits, device_id_cap); + device_id_bits = device_id_cap; + } + + devtab = kcalloc(BIT(device_id_bits), sizeof(*devtab), GFP_KERNEL); + if (!devtab) + return -ENOMEM; + + gicv5_its_dcache_clean(its, devtab, sz); + + cfgr = FIELD_PREP(GICV5_ITS_DT_CFGR_STRUCTURE, + GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR) | + FIELD_PREP(GICV5_ITS_DT_CFGR_L2SZ, 0) | + FIELD_PREP(GICV5_ITS_DT_CFGR_DEVICEID_BITS, device_id_bits); + its_writel_relaxed(its, cfgr, GICV5_ITS_DT_CFGR); + + baser = virt_to_phys(devtab) & GICV5_ITS_DT_BASER_ADDR_MASK; + its_writeq_relaxed(its, baser, GICV5_ITS_DT_BASER); + + its->devtab_cfgr.cfgr = cfgr; + its->devtab_cfgr.linear.devtab = devtab; + + return 0; +} + +/* + * Allocate a 2-level device table. L2 entries are not allocated, + * they are allocated on-demand. + */ +static int gicv5_its_alloc_devtab_two_level(struct gicv5_its_chip_data *its, + u8 device_id_bits, + u8 devtab_l2sz) +{ + unsigned int l1_bits, l2_bits, i; + __le64 *l1devtab, **l2ptrs; + size_t l1_sz; + u64 baser; + u32 cfgr; + + l2_bits = gicv5_its_l2sz_to_l2_bits(devtab_l2sz); + + l1_bits = device_id_bits - l2_bits; + l1_sz = BIT(l1_bits) * sizeof(*l1devtab); + /* + * With 2-level device table support it is highly unlikely + * that we are not able to allocate the required amount of + * device table memory to cover deviceID space; cap the + * deviceID space if we encounter such set-up. + * If this ever becomes a problem we could revisit the policy + * behind level 2 size selection to reduce level-1 deviceID bits. + */ + if (l1_sz > KMALLOC_MAX_SIZE) { + l1_bits = ilog2(KMALLOC_MAX_SIZE/sizeof(*l1devtab)); + + pr_warn("Limiting device ID bits from %u to %u\n", + device_id_bits, l1_bits + l2_bits); + device_id_bits = l1_bits + l2_bits; + l1_sz = KMALLOC_MAX_SIZE; + } + + l1devtab = kcalloc(BIT(l1_bits), sizeof(*l1devtab), GFP_KERNEL); + if (!l1devtab) + return -ENOMEM; + + l2ptrs = kcalloc(BIT(l1_bits), sizeof(*l2ptrs), GFP_KERNEL); + if (!l2ptrs) { + kfree(l1devtab); + return -ENOMEM; + } + + for (i = 0; i < BIT(l1_bits); i++) + l1devtab[i] = cpu_to_le64(FIELD_PREP(GICV5_DTL1E_SPAN, l2_bits)); + + gicv5_its_dcache_clean(its, l1devtab, l1_sz); + + cfgr = FIELD_PREP(GICV5_ITS_DT_CFGR_STRUCTURE, + GICV5_ITS_DT_ITT_CFGR_STRUCTURE_TWO_LEVEL) | + FIELD_PREP(GICV5_ITS_DT_CFGR_L2SZ, devtab_l2sz) | + FIELD_PREP(GICV5_ITS_DT_CFGR_DEVICEID_BITS, device_id_bits); + its_writel_relaxed(its, cfgr, GICV5_ITS_DT_CFGR); + + baser = virt_to_phys(l1devtab) & GICV5_ITS_DT_BASER_ADDR_MASK; + its_writeq_relaxed(its, baser, GICV5_ITS_DT_BASER); + + its->devtab_cfgr.cfgr = cfgr; + its->devtab_cfgr.l2.l1devtab = l1devtab; + its->devtab_cfgr.l2.l2ptrs = l2ptrs; + + return 0; +} + +/* + * Initialise the device table as either 1- or 2-level depending on what is + * supported by the hardware. + */ +static int gicv5_its_init_devtab(struct gicv5_its_chip_data *its) +{ + u8 device_id_bits, devtab_l2sz; + bool two_level_devtab; + u32 idr1; + + idr1 = its_readl_relaxed(its, GICV5_ITS_IDR1); + + device_id_bits = FIELD_GET(GICV5_ITS_IDR1_DEVICEID_BITS, idr1); + two_level_devtab = gicv5_its_l2sz_two_level(true, idr1, device_id_bits, + &devtab_l2sz); + if (two_level_devtab) + return gicv5_its_alloc_devtab_two_level(its, device_id_bits, + devtab_l2sz); + else + return gicv5_its_alloc_devtab_linear(its, device_id_bits); +} + +static void gicv5_its_deinit_devtab(struct gicv5_its_chip_data *its) +{ + u8 str = devtab_cfgr_field(its, STRUCTURE); + + if (str == GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR) { + kfree(its->devtab_cfgr.linear.devtab); + } else { + kfree(its->devtab_cfgr.l2.l1devtab); + kfree(its->devtab_cfgr.l2.l2ptrs); + } +} + +static void gicv5_its_compose_msi_msg(struct irq_data *d, struct msi_msg *msg) +{ + struct gicv5_its_dev *its_dev = irq_data_get_irq_chip_data(d); + u64 addr = its_dev->its_trans_phys_base; + + msg->data = FIELD_GET(GICV5_ITS_HWIRQ_EVENT_ID, d->hwirq); + msi_msg_set_addr(irq_data_get_msi_desc(d), msg, addr); +} + +static const struct irq_chip gicv5_its_irq_chip = { + .name = "GICv5-ITS-MSI", + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = irq_chip_set_affinity_parent, + .irq_get_irqchip_state = irq_chip_get_parent_state, + .irq_set_irqchip_state = irq_chip_set_parent_state, + .irq_compose_msi_msg = gicv5_its_compose_msi_msg, +}; + +static struct gicv5_its_dev *gicv5_its_find_device(struct gicv5_its_chip_data *its, + u32 device_id) +{ + struct gicv5_its_dev *dev = xa_load(&its->its_devices, device_id); + + return dev ? dev : ERR_PTR(-ENODEV); +} + +static struct gicv5_its_dev *gicv5_its_alloc_device(struct gicv5_its_chip_data *its, int nvec, + u32 dev_id) +{ + struct gicv5_its_dev *its_dev; + void *entry; + int ret; + + its_dev = gicv5_its_find_device(its, dev_id); + if (!IS_ERR(its_dev)) { + pr_err("A device with this DeviceID (0x%x) has already been registered.\n", + dev_id); + + return ERR_PTR(-EBUSY); + } + + its_dev = kzalloc(sizeof(*its_dev), GFP_KERNEL); + if (!its_dev) + return ERR_PTR(-ENOMEM); + + its_dev->device_id = dev_id; + its_dev->num_events = nvec; + + ret = gicv5_its_device_register(its, its_dev); + if (ret) { + pr_err("Failed to register the device\n"); + goto out_dev_free; + } + + gicv5_its_device_cache_inv(its, its_dev); + + its_dev->its_node = its; + + its_dev->event_map = (unsigned long *)bitmap_zalloc(its_dev->num_events, GFP_KERNEL); + if (!its_dev->event_map) { + ret = -ENOMEM; + goto out_unregister; + } + + entry = xa_store(&its->its_devices, dev_id, its_dev, GFP_KERNEL); + if (xa_is_err(entry)) { + ret = xa_err(entry); + goto out_bitmap_free; + } + + return its_dev; + +out_bitmap_free: + bitmap_free(its_dev->event_map); +out_unregister: + gicv5_its_device_unregister(its, its_dev); +out_dev_free: + kfree(its_dev); + return ERR_PTR(ret); +} + +static int gicv5_its_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *info) +{ + u32 dev_id = info->scratchpad[0].ul; + struct msi_domain_info *msi_info; + struct gicv5_its_chip_data *its; + struct gicv5_its_dev *its_dev; + + msi_info = msi_get_domain_info(domain); + its = msi_info->data; + + guard(mutex)(&its->dev_alloc_lock); + + its_dev = gicv5_its_alloc_device(its, nvec, dev_id); + if (IS_ERR(its_dev)) + return PTR_ERR(its_dev); + + its_dev->its_trans_phys_base = info->scratchpad[1].ul; + info->scratchpad[0].ptr = its_dev; + + return 0; +} + +static void gicv5_its_msi_teardown(struct irq_domain *domain, msi_alloc_info_t *info) +{ + struct gicv5_its_dev *its_dev = info->scratchpad[0].ptr; + struct msi_domain_info *msi_info; + struct gicv5_its_chip_data *its; + + msi_info = msi_get_domain_info(domain); + its = msi_info->data; + + guard(mutex)(&its->dev_alloc_lock); + + if (WARN_ON_ONCE(!bitmap_empty(its_dev->event_map, its_dev->num_events))) + return; + + xa_erase(&its->its_devices, its_dev->device_id); + bitmap_free(its_dev->event_map); + gicv5_its_device_unregister(its, its_dev); + kfree(its_dev); +} + +static struct msi_domain_ops gicv5_its_msi_domain_ops = { + .msi_prepare = gicv5_its_msi_prepare, + .msi_teardown = gicv5_its_msi_teardown, +}; + +static int gicv5_its_map_event(struct gicv5_its_dev *its_dev, u16 event_id, u32 lpi) +{ + struct gicv5_its_chip_data *its = its_dev->its_node; + u64 itt_entry; + __le64 *itte; + + itte = gicv5_its_device_get_itte_ref(its_dev, event_id); + + if (FIELD_GET(GICV5_ITTL2E_VALID, *itte)) + return -EEXIST; + + itt_entry = FIELD_PREP(GICV5_ITTL2E_LPI_ID, lpi) | + FIELD_PREP(GICV5_ITTL2E_VALID, 0x1); + + its_write_table_entry(its, itte, itt_entry); + + gicv5_its_itt_cache_inv(its, its_dev->device_id, event_id); + + return 0; +} + +static void gicv5_its_unmap_event(struct gicv5_its_dev *its_dev, u16 event_id) +{ + struct gicv5_its_chip_data *its = its_dev->its_node; + u64 itte_val; + __le64 *itte; + + itte = gicv5_its_device_get_itte_ref(its_dev, event_id); + + itte_val = le64_to_cpu(*itte); + itte_val &= ~GICV5_ITTL2E_VALID; + + its_write_table_entry(its, itte, itte_val); + + gicv5_its_itt_cache_inv(its, its_dev->device_id, event_id); +} + +static int gicv5_its_alloc_eventid(struct gicv5_its_dev *its_dev, msi_alloc_info_t *info, + unsigned int nr_irqs, u32 *eventid) +{ + int event_id_base; + + if (!(info->flags & MSI_ALLOC_FLAGS_FIXED_MSG_DATA)) { + event_id_base = bitmap_find_free_region(its_dev->event_map, + its_dev->num_events, + get_count_order(nr_irqs)); + if (event_id_base < 0) + return event_id_base; + } else { + /* + * We want to have a fixed EventID mapped for hardcoded + * message data allocations. + */ + if (WARN_ON_ONCE(nr_irqs != 1)) + return -EINVAL; + + event_id_base = info->hwirq; + + if (event_id_base >= its_dev->num_events) { + pr_err("EventID ouside of ITT range; cannot allocate an ITT entry!\n"); + + return -EINVAL; + } + + if (test_and_set_bit(event_id_base, its_dev->event_map)) { + pr_warn("Can't reserve event_id bitmap\n"); + return -EINVAL; + + } + } + + *eventid = event_id_base; + + return 0; +} + +static void gicv5_its_free_eventid(struct gicv5_its_dev *its_dev, u32 event_id_base, + unsigned int nr_irqs) +{ + bitmap_release_region(its_dev->event_map, event_id_base, + get_count_order(nr_irqs)); +} + +static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + u32 device_id, event_id_base, lpi; + struct gicv5_its_dev *its_dev; + msi_alloc_info_t *info = arg; + irq_hw_number_t hwirq; + struct irq_data *irqd; + int ret, i; + + its_dev = info->scratchpad[0].ptr; + + ret = gicv5_its_alloc_eventid(its_dev, info, nr_irqs, &event_id_base); + if (ret) + return ret; + + ret = iommu_dma_prepare_msi(info->desc, its_dev->its_trans_phys_base); + if (ret) + goto out_eventid; + + device_id = its_dev->device_id; + + for (i = 0; i < nr_irqs; i++) { + lpi = gicv5_alloc_lpi(); + if (ret < 0) { + pr_debug("Failed to find free LPI!\n"); + goto out_eventid; + } + + ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, &lpi); + if (ret) + goto out_free_lpi; + + /* + * Store eventid and deviceid into the hwirq for later use. + * + * hwirq = event_id << 32 | device_id + */ + hwirq = FIELD_PREP(GICV5_ITS_HWIRQ_DEVICE_ID, device_id) | + FIELD_PREP(GICV5_ITS_HWIRQ_EVENT_ID, (u64)event_id_base + i); + irq_domain_set_info(domain, virq + i, hwirq, + &gicv5_its_irq_chip, its_dev, + handle_fasteoi_irq, NULL, NULL); + + irqd = irq_get_irq_data(virq + i); + irqd_set_single_target(irqd); + irqd_set_affinity_on_activate(irqd); + irqd_set_resend_when_in_progress(irqd); + } + + return 0; + +out_free_lpi: + gicv5_free_lpi(lpi); +out_eventid: + gicv5_its_free_eventid(its_dev, event_id_base, nr_irqs); + return ret; +} + +static void gicv5_its_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct gicv5_its_chip_data *its; + struct gicv5_its_dev *its_dev; + u16 event_id_base; + unsigned int i; + + its_dev = irq_data_get_irq_chip_data(d); + its = its_dev->its_node; + + event_id_base = FIELD_GET(GICV5_ITS_HWIRQ_EVENT_ID, d->hwirq); + + bitmap_release_region(its_dev->event_map, event_id_base, + get_count_order(nr_irqs)); + + /* Hierarchically free irq data */ + for (i = 0; i < nr_irqs; i++) { + d = irq_domain_get_irq_data(domain, virq + i); + + gicv5_free_lpi(d->parent_data->hwirq); + irq_domain_reset_irq_data(d); + irq_domain_free_irqs_parent(domain, virq + i, 1); + } + + gicv5_its_syncr(its, its_dev); + gicv5_irs_syncr(); +} + +static int gicv5_its_irq_domain_activate(struct irq_domain *domain, struct irq_data *d, + bool reserve) +{ + struct gicv5_its_dev *its_dev = irq_data_get_irq_chip_data(d); + u16 event_id; + u32 lpi; + + event_id = FIELD_GET(GICV5_ITS_HWIRQ_EVENT_ID, d->hwirq); + lpi = d->parent_data->hwirq; + + return gicv5_its_map_event(its_dev, event_id, lpi); +} + +static void gicv5_its_irq_domain_deactivate(struct irq_domain *domain, + struct irq_data *d) +{ + struct gicv5_its_dev *its_dev = irq_data_get_irq_chip_data(d); + u16 event_id; + + event_id = FIELD_GET(GICV5_ITS_HWIRQ_EVENT_ID, d->hwirq); + + gicv5_its_unmap_event(its_dev, event_id); +} + +static const struct irq_domain_ops gicv5_its_irq_domain_ops = { + .alloc = gicv5_its_irq_domain_alloc, + .free = gicv5_its_irq_domain_free, + .activate = gicv5_its_irq_domain_activate, + .deactivate = gicv5_its_irq_domain_deactivate, + .select = msi_lib_irq_domain_select, +}; + +static int gicv5_its_write_cr0(struct gicv5_its_chip_data *its, bool enable) +{ + u32 cr0 = FIELD_PREP(GICV5_ITS_CR0_ITSEN, enable); + + its_writel_relaxed(its, cr0, GICV5_ITS_CR0); + return gicv5_wait_for_op_atomic(its->its_base, GICV5_ITS_CR0, + GICV5_ITS_CR0_IDLE, NULL); +} + +static int gicv5_its_enable(struct gicv5_its_chip_data *its) +{ + return gicv5_its_write_cr0(its, true); +} + +static int gicv5_its_disable(struct gicv5_its_chip_data *its) +{ + return gicv5_its_write_cr0(its, false); +} + +static void gicv5_its_print_info(struct gicv5_its_chip_data *its_node) +{ + bool devtab_linear; + u8 device_id_bits; + u8 str; + + device_id_bits = devtab_cfgr_field(its_node, DEVICEID_BITS); + + str = devtab_cfgr_field(its_node, STRUCTURE); + devtab_linear = (str == GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR); + + pr_info("ITS %s enabled using %s device table device_id_bits %u\n", + fwnode_get_name(its_node->fwnode), + devtab_linear ? "linear" : "2-level", + device_id_bits); +} + +static int gicv5_its_init_domain(struct gicv5_its_chip_data *its, struct irq_domain *parent) +{ + struct irq_domain_info dom_info = { + .fwnode = its->fwnode, + .ops = &gicv5_its_irq_domain_ops, + .domain_flags = its->msi_domain_flags, + .parent = parent, + }; + struct msi_domain_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->ops = &gicv5_its_msi_domain_ops; + info->data = its; + dom_info.host_data = info; + + if (!msi_create_parent_irq_domain(&dom_info, &gic_v5_its_msi_parent_ops)) { + kfree(info); + return -ENOMEM; + } + + return 0; +} + +static int __init gicv5_its_init_bases(void __iomem *its_base, struct fwnode_handle *handle, + struct irq_domain *parent_domain) +{ + struct device_node *np = to_of_node(handle); + struct gicv5_its_chip_data *its_node; + u32 cr0, cr1; + bool enabled; + int ret; + + its_node = kzalloc(sizeof(*its_node), GFP_KERNEL); + if (!its_node) + return -ENOMEM; + + mutex_init(&its_node->dev_alloc_lock); + xa_init(&its_node->its_devices); + its_node->fwnode = handle; + its_node->its_base = its_base; + its_node->msi_domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI | + IRQ_DOMAIN_FLAG_FWNODE_PARENT; + + cr0 = its_readl_relaxed(its_node, GICV5_ITS_CR0); + enabled = FIELD_GET(GICV5_ITS_CR0_ITSEN, cr0); + if (WARN(enabled, "ITS %s enabled, disabling it before proceeding\n", np->full_name)) { + ret = gicv5_its_disable(its_node); + if (ret) + goto out_free_node; + } + + if (of_property_read_bool(np, "dma-noncoherent")) { + /* + * A non-coherent ITS implies that some cache levels cannot be + * used coherently by the cores and GIC. Our only option is to mark + * memory attributes for the GIC as non-cacheable; by default, + * non-cacheable memory attributes imply outer-shareable + * shareability, the value written into ITS_CR1_SH is ignored. + */ + cr1 = FIELD_PREP(GICV5_ITS_CR1_ITT_RA, GICV5_NO_READ_ALLOC) | + FIELD_PREP(GICV5_ITS_CR1_DT_RA, GICV5_NO_READ_ALLOC) | + FIELD_PREP(GICV5_ITS_CR1_IC, GICV5_NON_CACHE) | + FIELD_PREP(GICV5_ITS_CR1_OC, GICV5_NON_CACHE); + its_node->flags |= ITS_FLAGS_NON_COHERENT; + } else { + cr1 = FIELD_PREP(GICV5_ITS_CR1_ITT_RA, GICV5_READ_ALLOC) | + FIELD_PREP(GICV5_ITS_CR1_DT_RA, GICV5_READ_ALLOC) | + FIELD_PREP(GICV5_ITS_CR1_IC, GICV5_WB_CACHE) | + FIELD_PREP(GICV5_ITS_CR1_OC, GICV5_WB_CACHE) | + FIELD_PREP(GICV5_ITS_CR1_SH, GICV5_INNER_SHARE); + } + + its_writel_relaxed(its_node, cr1, GICV5_ITS_CR1); + + ret = gicv5_its_init_devtab(its_node); + if (ret) + goto out_free_node; + + ret = gicv5_its_enable(its_node); + if (ret) + goto out_free_devtab; + + ret = gicv5_its_init_domain(its_node, parent_domain); + if (ret) + goto out_disable_its; + + gicv5_its_print_info(its_node); + + return 0; + +out_disable_its: + gicv5_its_disable(its_node); +out_free_devtab: + gicv5_its_deinit_devtab(its_node); +out_free_node: + kfree(its_node); + return ret; +} + +static int __init gicv5_its_init(struct device_node *node) +{ + void __iomem *its_base; + int ret, idx; + + idx = of_property_match_string(node, "reg-names", "ns-config"); + if (idx < 0) { + pr_err("%pOF: ns-config reg-name not present\n", node); + return -ENODEV; + } + + its_base = of_io_request_and_map(node, idx, of_node_full_name(node)); + if (IS_ERR(its_base)) { + pr_err("%pOF: unable to map GICv5 ITS_CONFIG_FRAME\n", node); + return PTR_ERR(its_base); + } + + ret = gicv5_its_init_bases(its_base, of_fwnode_handle(node), + gicv5_global_data.lpi_domain); + if (ret) + goto out_unmap; + + return 0; + +out_unmap: + iounmap(its_base); + return ret; +} + +void __init gicv5_its_of_probe(struct device_node *parent) +{ + struct device_node *np; + + for_each_available_child_of_node(parent, np) { + if (!of_device_is_compatible(np, "arm,gic-v5-its")) + continue; + + if (gicv5_its_init(np)) + pr_err("Failed to init ITS %s\n", np->full_name); + } +} diff --git a/drivers/irqchip/irq-gic-v5-iwb.c b/drivers/irqchip/irq-gic-v5-iwb.c new file mode 100644 index 000000000000..ed72fbdd4900 --- /dev/null +++ b/drivers/irqchip/irq-gic-v5-iwb.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024-2025 ARM Limited, All Rights Reserved. + */ +#define pr_fmt(fmt) "GICv5 IWB: " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/msi.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> + +#include <linux/irqchip.h> +#include <linux/irqchip/arm-gic-v5.h> + +struct gicv5_iwb_chip_data { + void __iomem *iwb_base; + u16 nr_regs; +}; + +static u32 iwb_readl_relaxed(struct gicv5_iwb_chip_data *iwb_node, const u32 reg_offset) +{ + return readl_relaxed(iwb_node->iwb_base + reg_offset); +} + +static void iwb_writel_relaxed(struct gicv5_iwb_chip_data *iwb_node, const u32 val, + const u32 reg_offset) +{ + writel_relaxed(val, iwb_node->iwb_base + reg_offset); +} + +static int gicv5_iwb_wait_for_wenabler(struct gicv5_iwb_chip_data *iwb_node) +{ + return gicv5_wait_for_op_atomic(iwb_node->iwb_base, GICV5_IWB_WENABLE_STATUSR, + GICV5_IWB_WENABLE_STATUSR_IDLE, NULL); +} + +static int __gicv5_iwb_set_wire_enable(struct gicv5_iwb_chip_data *iwb_node, + u32 iwb_wire, bool enable) +{ + u32 n = iwb_wire / 32; + u8 i = iwb_wire % 32; + u32 val; + + if (n >= iwb_node->nr_regs) { + pr_err("IWB_WENABLER<n> is invalid for n=%u\n", n); + return -EINVAL; + } + + /* + * Enable IWB wire/pin at this point + * Note: This is not the same as enabling the interrupt + */ + val = iwb_readl_relaxed(iwb_node, GICV5_IWB_WENABLER + (4 * n)); + if (enable) + val |= BIT(i); + else + val &= ~BIT(i); + iwb_writel_relaxed(iwb_node, val, GICV5_IWB_WENABLER + (4 * n)); + + return gicv5_iwb_wait_for_wenabler(iwb_node); +} + +static int gicv5_iwb_enable_wire(struct gicv5_iwb_chip_data *iwb_node, + u32 iwb_wire) +{ + return __gicv5_iwb_set_wire_enable(iwb_node, iwb_wire, true); +} + +static int gicv5_iwb_disable_wire(struct gicv5_iwb_chip_data *iwb_node, + u32 iwb_wire) +{ + return __gicv5_iwb_set_wire_enable(iwb_node, iwb_wire, false); +} + +static void gicv5_iwb_irq_disable(struct irq_data *d) +{ + struct gicv5_iwb_chip_data *iwb_node = irq_data_get_irq_chip_data(d); + + gicv5_iwb_disable_wire(iwb_node, d->hwirq); + irq_chip_disable_parent(d); +} + +static void gicv5_iwb_irq_enable(struct irq_data *d) +{ + struct gicv5_iwb_chip_data *iwb_node = irq_data_get_irq_chip_data(d); + + gicv5_iwb_enable_wire(iwb_node, d->hwirq); + irq_chip_enable_parent(d); +} + +static int gicv5_iwb_set_type(struct irq_data *d, unsigned int type) +{ + struct gicv5_iwb_chip_data *iwb_node = irq_data_get_irq_chip_data(d); + u32 iwb_wire, n, wtmr; + u8 i; + + iwb_wire = d->hwirq; + i = iwb_wire % 32; + n = iwb_wire / 32; + + if (n >= iwb_node->nr_regs) { + pr_err_once("reg %u out of range\n", n); + return -EINVAL; + } + + wtmr = iwb_readl_relaxed(iwb_node, GICV5_IWB_WTMR + (4 * n)); + + switch (type) { + case IRQ_TYPE_LEVEL_HIGH: + case IRQ_TYPE_LEVEL_LOW: + wtmr |= BIT(i); + break; + case IRQ_TYPE_EDGE_RISING: + case IRQ_TYPE_EDGE_FALLING: + wtmr &= ~BIT(i); + break; + default: + pr_debug("unexpected wire trigger mode"); + return -EINVAL; + } + + iwb_writel_relaxed(iwb_node, wtmr, GICV5_IWB_WTMR + (4 * n)); + + return 0; +} + +static void gicv5_iwb_domain_set_desc(msi_alloc_info_t *alloc_info, struct msi_desc *desc) +{ + alloc_info->desc = desc; + alloc_info->hwirq = (u32)desc->data.icookie.value; +} + +static int gicv5_iwb_irq_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec, + irq_hw_number_t *hwirq, + unsigned int *type) +{ + if (!is_of_node(fwspec->fwnode)) + return -EINVAL; + + if (fwspec->param_count < 2) + return -EINVAL; + + /* + * param[0] is be the wire + * param[1] is the interrupt type + */ + *hwirq = fwspec->param[0]; + *type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; + + return 0; +} + +static void gicv5_iwb_write_msi_msg(struct irq_data *d, struct msi_msg *msg) {} + +static const struct msi_domain_template iwb_msi_template = { + .chip = { + .name = "GICv5-IWB", + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_enable = gicv5_iwb_irq_enable, + .irq_disable = gicv5_iwb_irq_disable, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_type = gicv5_iwb_set_type, + .irq_write_msi_msg = gicv5_iwb_write_msi_msg, + .irq_set_affinity = irq_chip_set_affinity_parent, + .irq_get_irqchip_state = irq_chip_get_parent_state, + .irq_set_irqchip_state = irq_chip_set_parent_state, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, + }, + + .ops = { + .set_desc = gicv5_iwb_domain_set_desc, + .msi_translate = gicv5_iwb_irq_domain_translate, + }, + + .info = { + .bus_token = DOMAIN_BUS_WIRED_TO_MSI, + .flags = MSI_FLAG_USE_DEV_FWNODE, + }, + + .alloc_info = { + .flags = MSI_ALLOC_FLAGS_FIXED_MSG_DATA, + }, +}; + +static bool gicv5_iwb_create_device_domain(struct device *dev, unsigned int size, + struct gicv5_iwb_chip_data *iwb_node) +{ + if (WARN_ON_ONCE(!dev->msi.domain)) + return false; + + return msi_create_device_irq_domain(dev, MSI_DEFAULT_DOMAIN, + &iwb_msi_template, size, + NULL, iwb_node); +} + +static struct gicv5_iwb_chip_data * +gicv5_iwb_init_bases(void __iomem *iwb_base, struct platform_device *pdev) +{ + u32 nr_wires, idr0, cr0; + unsigned int n; + int ret; + + struct gicv5_iwb_chip_data *iwb_node __free(kfree) = kzalloc(sizeof(*iwb_node), + GFP_KERNEL); + if (!iwb_node) + return ERR_PTR(-ENOMEM); + + iwb_node->iwb_base = iwb_base; + + idr0 = iwb_readl_relaxed(iwb_node, GICV5_IWB_IDR0); + nr_wires = (FIELD_GET(GICV5_IWB_IDR0_IW_RANGE, idr0) + 1) * 32; + + cr0 = iwb_readl_relaxed(iwb_node, GICV5_IWB_CR0); + if (!FIELD_GET(GICV5_IWB_CR0_IWBEN, cr0)) { + dev_err(&pdev->dev, "IWB must be enabled in firmware\n"); + return ERR_PTR(-EINVAL); + } + + iwb_node->nr_regs = FIELD_GET(GICV5_IWB_IDR0_IW_RANGE, idr0) + 1; + + for (n = 0; n < iwb_node->nr_regs; n++) + iwb_writel_relaxed(iwb_node, 0, GICV5_IWB_WENABLER + (sizeof(u32) * n)); + + ret = gicv5_iwb_wait_for_wenabler(iwb_node); + if (ret) + return ERR_PTR(ret); + + if (!gicv5_iwb_create_device_domain(&pdev->dev, nr_wires, iwb_node)) + return ERR_PTR(-ENOMEM); + + return_ptr(iwb_node); +} + +static int gicv5_iwb_device_probe(struct platform_device *pdev) +{ + struct gicv5_iwb_chip_data *iwb_node; + void __iomem *iwb_base; + struct resource *res; + int ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; + + iwb_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!iwb_base) { + dev_err(&pdev->dev, "failed to ioremap %pR\n", res); + return -ENOMEM; + } + + iwb_node = gicv5_iwb_init_bases(iwb_base, pdev); + if (IS_ERR(iwb_node)) { + ret = PTR_ERR(iwb_node); + goto out_unmap; + } + + return 0; + +out_unmap: + iounmap(iwb_base); + return ret; +} + +static const struct of_device_id gicv5_iwb_of_match[] = { + { .compatible = "arm,gic-v5-iwb" }, + { /* END */ } +}; +MODULE_DEVICE_TABLE(of, gicv5_iwb_of_match); + +static struct platform_driver gicv5_iwb_platform_driver = { + .driver = { + .name = "GICv5 IWB", + .of_match_table = gicv5_iwb_of_match, + .suppress_bind_attrs = true, + }, + .probe = gicv5_iwb_device_probe, +}; + +module_platform_driver(gicv5_iwb_platform_driver); diff --git a/drivers/irqchip/irq-gic-v5.c b/drivers/irqchip/irq-gic-v5.c new file mode 100644 index 000000000000..4bd224f359a7 --- /dev/null +++ b/drivers/irqchip/irq-gic-v5.c @@ -0,0 +1,1137 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024-2025 ARM Limited, All Rights Reserved. + */ + +#define pr_fmt(fmt) "GICv5: " fmt + +#include <linux/cpuhotplug.h> +#include <linux/idr.h> +#include <linux/irqdomain.h> +#include <linux/slab.h> +#include <linux/wordpart.h> + +#include <linux/irqchip.h> +#include <linux/irqchip/arm-gic-v5.h> +#include <linux/irqchip/arm-vgic-info.h> + +#include <asm/cpufeature.h> +#include <asm/exception.h> + +static u8 pri_bits __ro_after_init = 5; + +#define GICV5_IRQ_PRI_MASK 0x1f +#define GICV5_IRQ_PRI_MI (GICV5_IRQ_PRI_MASK & GENMASK(4, 5 - pri_bits)) + +#define PPI_NR 128 + +static bool gicv5_cpuif_has_gcie(void) +{ + return this_cpu_has_cap(ARM64_HAS_GICV5_CPUIF); +} + +struct gicv5_chip_data gicv5_global_data __read_mostly; + +static DEFINE_IDA(lpi_ida); +static u32 num_lpis __ro_after_init; + +void __init gicv5_init_lpis(u32 lpis) +{ + num_lpis = lpis; +} + +void __init gicv5_deinit_lpis(void) +{ + num_lpis = 0; +} + +static int alloc_lpi(void) +{ + if (!num_lpis) + return -ENOSPC; + + return ida_alloc_max(&lpi_ida, num_lpis - 1, GFP_KERNEL); +} + +static void release_lpi(u32 lpi) +{ + ida_free(&lpi_ida, lpi); +} + +int gicv5_alloc_lpi(void) +{ + return alloc_lpi(); +} + +void gicv5_free_lpi(u32 lpi) +{ + release_lpi(lpi); +} + +static void gicv5_ppi_priority_init(void) +{ + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR0_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR1_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR2_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR3_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR4_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR5_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR6_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR7_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR8_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR9_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR10_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR11_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR12_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR13_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR14_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR15_EL1); + + /* + * Context syncronization required to make sure system register writes + * effects are synchronised. + */ + isb(); +} + +static void gicv5_hwirq_init(irq_hw_number_t hwirq, u8 priority, u8 hwirq_type) +{ + u64 cdpri, cdaff; + u16 iaffid; + int ret; + + if (hwirq_type == GICV5_HWIRQ_TYPE_LPI || hwirq_type == GICV5_HWIRQ_TYPE_SPI) { + cdpri = FIELD_PREP(GICV5_GIC_CDPRI_PRIORITY_MASK, priority) | + FIELD_PREP(GICV5_GIC_CDPRI_TYPE_MASK, hwirq_type) | + FIELD_PREP(GICV5_GIC_CDPRI_ID_MASK, hwirq); + gic_insn(cdpri, CDPRI); + + ret = gicv5_irs_cpu_to_iaffid(smp_processor_id(), &iaffid); + + if (WARN_ON_ONCE(ret)) + return; + + cdaff = FIELD_PREP(GICV5_GIC_CDAFF_IAFFID_MASK, iaffid) | + FIELD_PREP(GICV5_GIC_CDAFF_TYPE_MASK, hwirq_type) | + FIELD_PREP(GICV5_GIC_CDAFF_ID_MASK, hwirq); + gic_insn(cdaff, CDAFF); + } +} + +static void gicv5_ppi_irq_mask(struct irq_data *d) +{ + u64 hwirq_id_bit = BIT_ULL(d->hwirq % 64); + + if (d->hwirq < 64) + sysreg_clear_set_s(SYS_ICC_PPI_ENABLER0_EL1, hwirq_id_bit, 0); + else + sysreg_clear_set_s(SYS_ICC_PPI_ENABLER1_EL1, hwirq_id_bit, 0); + + /* + * We must ensure that the disable takes effect immediately to + * guarantee that the lazy-disabled IRQ mechanism works. + * A context synchronization event is required to guarantee it. + * Reference: I_ZLTKB/R_YRGMH GICv5 specification - section 2.9.1. + */ + isb(); +} + +static void gicv5_iri_irq_mask(struct irq_data *d, u8 hwirq_type) +{ + u64 cddis; + + cddis = FIELD_PREP(GICV5_GIC_CDDIS_ID_MASK, d->hwirq) | + FIELD_PREP(GICV5_GIC_CDDIS_TYPE_MASK, hwirq_type); + + gic_insn(cddis, CDDIS); + /* + * We must make sure that GIC CDDIS write effects are propagated + * immediately to make sure the disable takes effect to guarantee + * that the lazy-disabled IRQ mechanism works. + * Rule R_XCLJC states that the effects of a GIC system instruction + * complete in finite time. + * The GSB ensures completion of the GIC instruction and prevents + * loads, stores and GIC instructions from executing part of their + * functionality before the GSB SYS. + */ + gsb_sys(); +} + +static void gicv5_spi_irq_mask(struct irq_data *d) +{ + gicv5_iri_irq_mask(d, GICV5_HWIRQ_TYPE_SPI); +} + +static void gicv5_lpi_irq_mask(struct irq_data *d) +{ + gicv5_iri_irq_mask(d, GICV5_HWIRQ_TYPE_LPI); +} + +static void gicv5_ppi_irq_unmask(struct irq_data *d) +{ + u64 hwirq_id_bit = BIT_ULL(d->hwirq % 64); + + if (d->hwirq < 64) + sysreg_clear_set_s(SYS_ICC_PPI_ENABLER0_EL1, 0, hwirq_id_bit); + else + sysreg_clear_set_s(SYS_ICC_PPI_ENABLER1_EL1, 0, hwirq_id_bit); + /* + * We must ensure that the enable takes effect in finite time - a + * context synchronization event is required to guarantee it, we + * can not take for granted that would happen (eg a core going straight + * into idle after enabling a PPI). + * Reference: I_ZLTKB/R_YRGMH GICv5 specification - section 2.9.1. + */ + isb(); +} + +static void gicv5_iri_irq_unmask(struct irq_data *d, u8 hwirq_type) +{ + u64 cden; + + cden = FIELD_PREP(GICV5_GIC_CDEN_ID_MASK, d->hwirq) | + FIELD_PREP(GICV5_GIC_CDEN_TYPE_MASK, hwirq_type); + /* + * Rule R_XCLJC states that the effects of a GIC system instruction + * complete in finite time and that's the only requirement when + * unmasking an SPI/LPI IRQ. + */ + gic_insn(cden, CDEN); +} + +static void gicv5_spi_irq_unmask(struct irq_data *d) +{ + gicv5_iri_irq_unmask(d, GICV5_HWIRQ_TYPE_SPI); +} + +static void gicv5_lpi_irq_unmask(struct irq_data *d) +{ + gicv5_iri_irq_unmask(d, GICV5_HWIRQ_TYPE_LPI); +} + +static void gicv5_hwirq_eoi(u32 hwirq_id, u8 hwirq_type) +{ + u64 cddi; + + cddi = FIELD_PREP(GICV5_GIC_CDDI_ID_MASK, hwirq_id) | + FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, hwirq_type); + + gic_insn(cddi, CDDI); + + gic_insn(0, CDEOI); +} + +static void gicv5_ppi_irq_eoi(struct irq_data *d) +{ + /* Skip deactivate for forwarded PPI interrupts */ + if (irqd_is_forwarded_to_vcpu(d)) { + gic_insn(0, CDEOI); + return; + } + + gicv5_hwirq_eoi(d->hwirq, GICV5_HWIRQ_TYPE_PPI); +} + +static void gicv5_spi_irq_eoi(struct irq_data *d) +{ + gicv5_hwirq_eoi(d->hwirq, GICV5_HWIRQ_TYPE_SPI); +} + +static void gicv5_lpi_irq_eoi(struct irq_data *d) +{ + gicv5_hwirq_eoi(d->hwirq, GICV5_HWIRQ_TYPE_LPI); +} + +static int gicv5_iri_irq_set_affinity(struct irq_data *d, + const struct cpumask *mask_val, + bool force, u8 hwirq_type) +{ + int ret, cpuid; + u16 iaffid; + u64 cdaff; + + if (force) + cpuid = cpumask_first(mask_val); + else + cpuid = cpumask_any_and(mask_val, cpu_online_mask); + + ret = gicv5_irs_cpu_to_iaffid(cpuid, &iaffid); + if (ret) + return ret; + + cdaff = FIELD_PREP(GICV5_GIC_CDAFF_IAFFID_MASK, iaffid) | + FIELD_PREP(GICV5_GIC_CDAFF_TYPE_MASK, hwirq_type) | + FIELD_PREP(GICV5_GIC_CDAFF_ID_MASK, d->hwirq); + gic_insn(cdaff, CDAFF); + + irq_data_update_effective_affinity(d, cpumask_of(cpuid)); + + return IRQ_SET_MASK_OK_DONE; +} + +static int gicv5_spi_irq_set_affinity(struct irq_data *d, + const struct cpumask *mask_val, + bool force) +{ + return gicv5_iri_irq_set_affinity(d, mask_val, force, + GICV5_HWIRQ_TYPE_SPI); +} + +static int gicv5_lpi_irq_set_affinity(struct irq_data *d, + const struct cpumask *mask_val, + bool force) +{ + return gicv5_iri_irq_set_affinity(d, mask_val, force, + GICV5_HWIRQ_TYPE_LPI); +} + +enum ppi_reg { + PPI_PENDING, + PPI_ACTIVE, + PPI_HM +}; + +static __always_inline u64 read_ppi_sysreg_s(unsigned int irq, + const enum ppi_reg which) +{ + switch (which) { + case PPI_PENDING: + return irq < 64 ? read_sysreg_s(SYS_ICC_PPI_SPENDR0_EL1) : + read_sysreg_s(SYS_ICC_PPI_SPENDR1_EL1); + case PPI_ACTIVE: + return irq < 64 ? read_sysreg_s(SYS_ICC_PPI_SACTIVER0_EL1) : + read_sysreg_s(SYS_ICC_PPI_SACTIVER1_EL1); + case PPI_HM: + return irq < 64 ? read_sysreg_s(SYS_ICC_PPI_HMR0_EL1) : + read_sysreg_s(SYS_ICC_PPI_HMR1_EL1); + default: + BUILD_BUG_ON(1); + } +} + +static __always_inline void write_ppi_sysreg_s(unsigned int irq, bool set, + const enum ppi_reg which) +{ + u64 bit = BIT_ULL(irq % 64); + + switch (which) { + case PPI_PENDING: + if (set) { + if (irq < 64) + write_sysreg_s(bit, SYS_ICC_PPI_SPENDR0_EL1); + else + write_sysreg_s(bit, SYS_ICC_PPI_SPENDR1_EL1); + } else { + if (irq < 64) + write_sysreg_s(bit, SYS_ICC_PPI_CPENDR0_EL1); + else + write_sysreg_s(bit, SYS_ICC_PPI_CPENDR1_EL1); + } + return; + case PPI_ACTIVE: + if (set) { + if (irq < 64) + write_sysreg_s(bit, SYS_ICC_PPI_SACTIVER0_EL1); + else + write_sysreg_s(bit, SYS_ICC_PPI_SACTIVER1_EL1); + } else { + if (irq < 64) + write_sysreg_s(bit, SYS_ICC_PPI_CACTIVER0_EL1); + else + write_sysreg_s(bit, SYS_ICC_PPI_CACTIVER1_EL1); + } + return; + default: + BUILD_BUG_ON(1); + } +} + +static int gicv5_ppi_irq_get_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, + bool *state) +{ + u64 hwirq_id_bit = BIT_ULL(d->hwirq % 64); + + switch (which) { + case IRQCHIP_STATE_PENDING: + *state = !!(read_ppi_sysreg_s(d->hwirq, PPI_PENDING) & hwirq_id_bit); + return 0; + case IRQCHIP_STATE_ACTIVE: + *state = !!(read_ppi_sysreg_s(d->hwirq, PPI_ACTIVE) & hwirq_id_bit); + return 0; + default: + pr_debug("Unexpected PPI irqchip state\n"); + return -EINVAL; + } +} + +static int gicv5_iri_irq_get_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, + bool *state, u8 hwirq_type) +{ + u64 icsr, cdrcfg; + + cdrcfg = d->hwirq | FIELD_PREP(GICV5_GIC_CDRCFG_TYPE_MASK, hwirq_type); + + gic_insn(cdrcfg, CDRCFG); + isb(); + icsr = read_sysreg_s(SYS_ICC_ICSR_EL1); + + if (FIELD_GET(ICC_ICSR_EL1_F, icsr)) { + pr_err("ICSR_EL1 is invalid\n"); + return -EINVAL; + } + + switch (which) { + case IRQCHIP_STATE_PENDING: + *state = !!(FIELD_GET(ICC_ICSR_EL1_Pending, icsr)); + return 0; + + case IRQCHIP_STATE_ACTIVE: + *state = !!(FIELD_GET(ICC_ICSR_EL1_Active, icsr)); + return 0; + + default: + pr_debug("Unexpected irqchip_irq_state\n"); + return -EINVAL; + } +} + +static int gicv5_spi_irq_get_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, + bool *state) +{ + return gicv5_iri_irq_get_irqchip_state(d, which, state, + GICV5_HWIRQ_TYPE_SPI); +} + +static int gicv5_lpi_irq_get_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, + bool *state) +{ + return gicv5_iri_irq_get_irqchip_state(d, which, state, + GICV5_HWIRQ_TYPE_LPI); +} + +static int gicv5_ppi_irq_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, + bool state) +{ + switch (which) { + case IRQCHIP_STATE_PENDING: + write_ppi_sysreg_s(d->hwirq, state, PPI_PENDING); + return 0; + case IRQCHIP_STATE_ACTIVE: + write_ppi_sysreg_s(d->hwirq, state, PPI_ACTIVE); + return 0; + default: + pr_debug("Unexpected PPI irqchip state\n"); + return -EINVAL; + } +} + +static void gicv5_iri_irq_write_pending_state(struct irq_data *d, bool state, + u8 hwirq_type) +{ + u64 cdpend; + + cdpend = FIELD_PREP(GICV5_GIC_CDPEND_TYPE_MASK, hwirq_type) | + FIELD_PREP(GICV5_GIC_CDPEND_ID_MASK, d->hwirq) | + FIELD_PREP(GICV5_GIC_CDPEND_PENDING_MASK, state); + + gic_insn(cdpend, CDPEND); +} + +static void gicv5_spi_irq_write_pending_state(struct irq_data *d, bool state) +{ + gicv5_iri_irq_write_pending_state(d, state, GICV5_HWIRQ_TYPE_SPI); +} + +static void gicv5_lpi_irq_write_pending_state(struct irq_data *d, bool state) +{ + gicv5_iri_irq_write_pending_state(d, state, GICV5_HWIRQ_TYPE_LPI); +} + +static int gicv5_spi_irq_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, + bool state) +{ + switch (which) { + case IRQCHIP_STATE_PENDING: + gicv5_spi_irq_write_pending_state(d, state); + break; + default: + pr_debug("Unexpected irqchip_irq_state\n"); + return -EINVAL; + } + + return 0; +} + +static int gicv5_lpi_irq_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, + bool state) +{ + switch (which) { + case IRQCHIP_STATE_PENDING: + gicv5_lpi_irq_write_pending_state(d, state); + break; + + default: + pr_debug("Unexpected irqchip_irq_state\n"); + return -EINVAL; + } + + return 0; +} + +static int gicv5_spi_irq_retrigger(struct irq_data *data) +{ + return !gicv5_spi_irq_set_irqchip_state(data, IRQCHIP_STATE_PENDING, + true); +} + +static int gicv5_lpi_irq_retrigger(struct irq_data *data) +{ + return !gicv5_lpi_irq_set_irqchip_state(data, IRQCHIP_STATE_PENDING, + true); +} + +static void gicv5_ipi_send_single(struct irq_data *d, unsigned int cpu) +{ + /* Mark the LPI pending */ + irq_chip_retrigger_hierarchy(d); +} + +static bool gicv5_ppi_irq_is_level(irq_hw_number_t hwirq) +{ + u64 bit = BIT_ULL(hwirq % 64); + + return !!(read_ppi_sysreg_s(hwirq, PPI_HM) & bit); +} + +static int gicv5_ppi_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) +{ + if (vcpu) + irqd_set_forwarded_to_vcpu(d); + else + irqd_clr_forwarded_to_vcpu(d); + + return 0; +} + +static const struct irq_chip gicv5_ppi_irq_chip = { + .name = "GICv5-PPI", + .irq_mask = gicv5_ppi_irq_mask, + .irq_unmask = gicv5_ppi_irq_unmask, + .irq_eoi = gicv5_ppi_irq_eoi, + .irq_get_irqchip_state = gicv5_ppi_irq_get_irqchip_state, + .irq_set_irqchip_state = gicv5_ppi_irq_set_irqchip_state, + .irq_set_vcpu_affinity = gicv5_ppi_irq_set_vcpu_affinity, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, +}; + +static const struct irq_chip gicv5_spi_irq_chip = { + .name = "GICv5-SPI", + .irq_mask = gicv5_spi_irq_mask, + .irq_unmask = gicv5_spi_irq_unmask, + .irq_eoi = gicv5_spi_irq_eoi, + .irq_set_type = gicv5_spi_irq_set_type, + .irq_set_affinity = gicv5_spi_irq_set_affinity, + .irq_retrigger = gicv5_spi_irq_retrigger, + .irq_get_irqchip_state = gicv5_spi_irq_get_irqchip_state, + .irq_set_irqchip_state = gicv5_spi_irq_set_irqchip_state, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, +}; + +static const struct irq_chip gicv5_lpi_irq_chip = { + .name = "GICv5-LPI", + .irq_mask = gicv5_lpi_irq_mask, + .irq_unmask = gicv5_lpi_irq_unmask, + .irq_eoi = gicv5_lpi_irq_eoi, + .irq_set_affinity = gicv5_lpi_irq_set_affinity, + .irq_retrigger = gicv5_lpi_irq_retrigger, + .irq_get_irqchip_state = gicv5_lpi_irq_get_irqchip_state, + .irq_set_irqchip_state = gicv5_lpi_irq_set_irqchip_state, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, +}; + +static const struct irq_chip gicv5_ipi_irq_chip = { + .name = "GICv5-IPI", + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = irq_chip_set_affinity_parent, + .irq_get_irqchip_state = irq_chip_get_parent_state, + .irq_set_irqchip_state = irq_chip_set_parent_state, + .ipi_send_single = gicv5_ipi_send_single, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, +}; + +static __always_inline int gicv5_irq_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + irq_hw_number_t *hwirq, + unsigned int *type, + const u8 hwirq_type) +{ + if (!is_of_node(fwspec->fwnode)) + return -EINVAL; + + if (fwspec->param_count < 3) + return -EINVAL; + + if (fwspec->param[0] != hwirq_type) + return -EINVAL; + + *hwirq = fwspec->param[1]; + + switch (hwirq_type) { + case GICV5_HWIRQ_TYPE_PPI: + /* + * Handling mode is hardcoded for PPIs, set the type using + * HW reported value. + */ + *type = gicv5_ppi_irq_is_level(*hwirq) ? IRQ_TYPE_LEVEL_LOW : + IRQ_TYPE_EDGE_RISING; + break; + case GICV5_HWIRQ_TYPE_SPI: + *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; + break; + default: + BUILD_BUG_ON(1); + } + + return 0; +} + +static int gicv5_irq_ppi_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + irq_hw_number_t *hwirq, + unsigned int *type) +{ + return gicv5_irq_domain_translate(d, fwspec, hwirq, type, + GICV5_HWIRQ_TYPE_PPI); +} + +static int gicv5_irq_ppi_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + unsigned int type = IRQ_TYPE_NONE; + struct irq_fwspec *fwspec = arg; + irq_hw_number_t hwirq; + int ret; + + if (WARN_ON_ONCE(nr_irqs != 1)) + return -EINVAL; + + ret = gicv5_irq_ppi_domain_translate(domain, fwspec, &hwirq, &type); + if (ret) + return ret; + + if (type & IRQ_TYPE_LEVEL_MASK) + irq_set_status_flags(virq, IRQ_LEVEL); + + irq_set_percpu_devid(virq); + irq_domain_set_info(domain, virq, hwirq, &gicv5_ppi_irq_chip, NULL, + handle_percpu_devid_irq, NULL, NULL); + + return 0; +} + +static void gicv5_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d; + + if (WARN_ON_ONCE(nr_irqs != 1)) + return; + + d = irq_domain_get_irq_data(domain, virq); + + irq_set_handler(virq, NULL); + irq_domain_reset_irq_data(d); +} + +static int gicv5_irq_ppi_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token) +{ + if (fwspec->fwnode != d->fwnode) + return 0; + + if (fwspec->param[0] != GICV5_HWIRQ_TYPE_PPI) + return 0; + + return (d == gicv5_global_data.ppi_domain); +} + +static const struct irq_domain_ops gicv5_irq_ppi_domain_ops = { + .translate = gicv5_irq_ppi_domain_translate, + .alloc = gicv5_irq_ppi_domain_alloc, + .free = gicv5_irq_domain_free, + .select = gicv5_irq_ppi_domain_select +}; + +static int gicv5_irq_spi_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + irq_hw_number_t *hwirq, + unsigned int *type) +{ + return gicv5_irq_domain_translate(d, fwspec, hwirq, type, + GICV5_HWIRQ_TYPE_SPI); +} + +static int gicv5_irq_spi_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct gicv5_irs_chip_data *chip_data; + unsigned int type = IRQ_TYPE_NONE; + struct irq_fwspec *fwspec = arg; + struct irq_data *irqd; + irq_hw_number_t hwirq; + int ret; + + if (WARN_ON_ONCE(nr_irqs != 1)) + return -EINVAL; + + ret = gicv5_irq_spi_domain_translate(domain, fwspec, &hwirq, &type); + if (ret) + return ret; + + irqd = irq_desc_get_irq_data(irq_to_desc(virq)); + chip_data = gicv5_irs_lookup_by_spi_id(hwirq); + + irq_domain_set_info(domain, virq, hwirq, &gicv5_spi_irq_chip, chip_data, + handle_fasteoi_irq, NULL, NULL); + irq_set_probe(virq); + irqd_set_single_target(irqd); + + gicv5_hwirq_init(hwirq, GICV5_IRQ_PRI_MI, GICV5_HWIRQ_TYPE_SPI); + + return 0; +} + +static int gicv5_irq_spi_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token) +{ + if (fwspec->fwnode != d->fwnode) + return 0; + + if (fwspec->param[0] != GICV5_HWIRQ_TYPE_SPI) + return 0; + + return (d == gicv5_global_data.spi_domain); +} + +static const struct irq_domain_ops gicv5_irq_spi_domain_ops = { + .translate = gicv5_irq_spi_domain_translate, + .alloc = gicv5_irq_spi_domain_alloc, + .free = gicv5_irq_domain_free, + .select = gicv5_irq_spi_domain_select +}; + +static void gicv5_lpi_config_reset(struct irq_data *d) +{ + u64 cdhm; + + /* + * Reset LPIs handling mode to edge by default and clear pending + * state to make sure we start the LPI with a clean state from + * previous incarnations. + */ + cdhm = FIELD_PREP(GICV5_GIC_CDHM_HM_MASK, 0) | + FIELD_PREP(GICV5_GIC_CDHM_TYPE_MASK, GICV5_HWIRQ_TYPE_LPI) | + FIELD_PREP(GICV5_GIC_CDHM_ID_MASK, d->hwirq); + gic_insn(cdhm, CDHM); + + gicv5_lpi_irq_write_pending_state(d, false); +} + +static int gicv5_irq_lpi_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + irq_hw_number_t hwirq; + struct irq_data *irqd; + u32 *lpi = arg; + int ret; + + if (WARN_ON_ONCE(nr_irqs != 1)) + return -EINVAL; + + hwirq = *lpi; + + irqd = irq_domain_get_irq_data(domain, virq); + + irq_domain_set_info(domain, virq, hwirq, &gicv5_lpi_irq_chip, NULL, + handle_fasteoi_irq, NULL, NULL); + irqd_set_single_target(irqd); + + ret = gicv5_irs_iste_alloc(hwirq); + if (ret < 0) + return ret; + + gicv5_hwirq_init(hwirq, GICV5_IRQ_PRI_MI, GICV5_HWIRQ_TYPE_LPI); + gicv5_lpi_config_reset(irqd); + + return 0; +} + +static const struct irq_domain_ops gicv5_irq_lpi_domain_ops = { + .alloc = gicv5_irq_lpi_domain_alloc, + .free = gicv5_irq_domain_free, +}; + +void __init gicv5_init_lpi_domain(void) +{ + struct irq_domain *d; + + d = irq_domain_create_tree(NULL, &gicv5_irq_lpi_domain_ops, NULL); + gicv5_global_data.lpi_domain = d; +} + +void __init gicv5_free_lpi_domain(void) +{ + irq_domain_remove(gicv5_global_data.lpi_domain); + gicv5_global_data.lpi_domain = NULL; +} + +static int gicv5_irq_ipi_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_data *irqd; + int ret, i; + u32 lpi; + + for (i = 0; i < nr_irqs; i++) { + ret = gicv5_alloc_lpi(); + if (ret < 0) + return ret; + + lpi = ret; + + ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, &lpi); + if (ret) { + gicv5_free_lpi(lpi); + return ret; + } + + irqd = irq_domain_get_irq_data(domain, virq + i); + + irq_domain_set_hwirq_and_chip(domain, virq + i, i, + &gicv5_ipi_irq_chip, NULL); + + irqd_set_single_target(irqd); + + irq_set_handler(virq + i, handle_percpu_irq); + } + + return 0; +} + +static void gicv5_irq_ipi_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d; + unsigned int i; + + for (i = 0; i < nr_irqs; i++) { + d = irq_domain_get_irq_data(domain, virq + i); + + if (!d) + return; + + gicv5_free_lpi(d->parent_data->hwirq); + + irq_set_handler(virq + i, NULL); + irq_domain_reset_irq_data(d); + irq_domain_free_irqs_parent(domain, virq + i, 1); + } +} + +static const struct irq_domain_ops gicv5_irq_ipi_domain_ops = { + .alloc = gicv5_irq_ipi_domain_alloc, + .free = gicv5_irq_ipi_domain_free, +}; + +static void handle_irq_per_domain(u32 hwirq) +{ + u8 hwirq_type = FIELD_GET(GICV5_HWIRQ_TYPE, hwirq); + u32 hwirq_id = FIELD_GET(GICV5_HWIRQ_ID, hwirq); + struct irq_domain *domain; + + switch (hwirq_type) { + case GICV5_HWIRQ_TYPE_PPI: + domain = gicv5_global_data.ppi_domain; + break; + case GICV5_HWIRQ_TYPE_SPI: + domain = gicv5_global_data.spi_domain; + break; + case GICV5_HWIRQ_TYPE_LPI: + domain = gicv5_global_data.lpi_domain; + break; + default: + pr_err_once("Unknown IRQ type, bail out\n"); + return; + } + + if (generic_handle_domain_irq(domain, hwirq_id)) { + pr_err_once("Could not handle, hwirq = 0x%x", hwirq_id); + gicv5_hwirq_eoi(hwirq_id, hwirq_type); + } +} + +static void __exception_irq_entry gicv5_handle_irq(struct pt_regs *regs) +{ + bool valid; + u32 hwirq; + u64 ia; + + ia = gicr_insn(CDIA); + valid = GICV5_GICR_CDIA_VALID(ia); + + if (!valid) + return; + + /* + * Ensure that the CDIA instruction effects (ie IRQ activation) are + * completed before handling the interrupt. + */ + gsb_ack(); + + /* + * Ensure instruction ordering between an acknowledgment and subsequent + * instructions in the IRQ handler using an ISB. + */ + isb(); + + hwirq = FIELD_GET(GICV5_HWIRQ_INTID, ia); + + handle_irq_per_domain(hwirq); +} + +static void gicv5_cpu_disable_interrupts(void) +{ + u64 cr0; + + cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 0); + write_sysreg_s(cr0, SYS_ICC_CR0_EL1); +} + +static void gicv5_cpu_enable_interrupts(void) +{ + u64 cr0, pcr; + + write_sysreg_s(0, SYS_ICC_PPI_ENABLER0_EL1); + write_sysreg_s(0, SYS_ICC_PPI_ENABLER1_EL1); + + gicv5_ppi_priority_init(); + + pcr = FIELD_PREP(ICC_PCR_EL1_PRIORITY, GICV5_IRQ_PRI_MI); + write_sysreg_s(pcr, SYS_ICC_PCR_EL1); + + cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 1); + write_sysreg_s(cr0, SYS_ICC_CR0_EL1); +} + +static int base_ipi_virq; + +static int gicv5_starting_cpu(unsigned int cpu) +{ + if (WARN(!gicv5_cpuif_has_gcie(), + "GICv5 system components present but CPU does not have FEAT_GCIE")) + return -ENODEV; + + gicv5_cpu_enable_interrupts(); + + return gicv5_irs_register_cpu(cpu); +} + +static void __init gicv5_smp_init(void) +{ + unsigned int num_ipis = GICV5_IPIS_PER_CPU * nr_cpu_ids; + + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING, + "irqchip/arm/gicv5:starting", + gicv5_starting_cpu, NULL); + + base_ipi_virq = irq_domain_alloc_irqs(gicv5_global_data.ipi_domain, + num_ipis, NUMA_NO_NODE, NULL); + if (WARN(base_ipi_virq <= 0, "IPI IRQ allocation was not successful")) + return; + + set_smp_ipi_range_percpu(base_ipi_virq, GICV5_IPIS_PER_CPU, nr_cpu_ids); +} + +static void __init gicv5_free_domains(void) +{ + if (gicv5_global_data.ppi_domain) + irq_domain_remove(gicv5_global_data.ppi_domain); + if (gicv5_global_data.spi_domain) + irq_domain_remove(gicv5_global_data.spi_domain); + if (gicv5_global_data.ipi_domain) + irq_domain_remove(gicv5_global_data.ipi_domain); + + gicv5_global_data.ppi_domain = NULL; + gicv5_global_data.spi_domain = NULL; + gicv5_global_data.ipi_domain = NULL; +} + +static int __init gicv5_init_domains(struct fwnode_handle *handle) +{ + u32 spi_count = gicv5_global_data.global_spi_count; + struct irq_domain *d; + + d = irq_domain_create_linear(handle, PPI_NR, &gicv5_irq_ppi_domain_ops, NULL); + if (!d) + return -ENOMEM; + + irq_domain_update_bus_token(d, DOMAIN_BUS_WIRED); + gicv5_global_data.ppi_domain = d; + + if (spi_count) { + d = irq_domain_create_linear(handle, spi_count, + &gicv5_irq_spi_domain_ops, NULL); + + if (!d) { + gicv5_free_domains(); + return -ENOMEM; + } + + gicv5_global_data.spi_domain = d; + irq_domain_update_bus_token(d, DOMAIN_BUS_WIRED); + } + + if (!WARN(!gicv5_global_data.lpi_domain, + "LPI domain uninitialized, can't set up IPIs")) { + d = irq_domain_create_hierarchy(gicv5_global_data.lpi_domain, + 0, GICV5_IPIS_PER_CPU * nr_cpu_ids, + NULL, &gicv5_irq_ipi_domain_ops, + NULL); + + if (!d) { + gicv5_free_domains(); + return -ENOMEM; + } + gicv5_global_data.ipi_domain = d; + } + gicv5_global_data.fwnode = handle; + + return 0; +} + +static void gicv5_set_cpuif_pribits(void) +{ + u64 icc_idr0 = read_sysreg_s(SYS_ICC_IDR0_EL1); + + switch (FIELD_GET(ICC_IDR0_EL1_PRI_BITS, icc_idr0)) { + case ICC_IDR0_EL1_PRI_BITS_4BITS: + gicv5_global_data.cpuif_pri_bits = 4; + break; + case ICC_IDR0_EL1_PRI_BITS_5BITS: + gicv5_global_data.cpuif_pri_bits = 5; + break; + default: + pr_err("Unexpected ICC_IDR0_EL1_PRI_BITS value, default to 4"); + gicv5_global_data.cpuif_pri_bits = 4; + break; + } +} + +static void gicv5_set_cpuif_idbits(void) +{ + u32 icc_idr0 = read_sysreg_s(SYS_ICC_IDR0_EL1); + + switch (FIELD_GET(ICC_IDR0_EL1_ID_BITS, icc_idr0)) { + case ICC_IDR0_EL1_ID_BITS_16BITS: + gicv5_global_data.cpuif_id_bits = 16; + break; + case ICC_IDR0_EL1_ID_BITS_24BITS: + gicv5_global_data.cpuif_id_bits = 24; + break; + default: + pr_err("Unexpected ICC_IDR0_EL1_ID_BITS value, default to 16"); + gicv5_global_data.cpuif_id_bits = 16; + break; + } +} + +#ifdef CONFIG_KVM +static struct gic_kvm_info gic_v5_kvm_info __initdata; + +static bool __init gicv5_cpuif_has_gcie_legacy(void) +{ + u64 idr0 = read_sysreg_s(SYS_ICC_IDR0_EL1); + return !!FIELD_GET(ICC_IDR0_EL1_GCIE_LEGACY, idr0); +} + +static void __init gic_of_setup_kvm_info(struct device_node *node) +{ + gic_v5_kvm_info.type = GIC_V5; + gic_v5_kvm_info.has_gcie_v3_compat = gicv5_cpuif_has_gcie_legacy(); + + /* GIC Virtual CPU interface maintenance interrupt */ + gic_v5_kvm_info.no_maint_irq_mask = false; + gic_v5_kvm_info.maint_irq = irq_of_parse_and_map(node, 0); + if (!gic_v5_kvm_info.maint_irq) { + pr_warn("cannot find GICv5 virtual CPU interface maintenance interrupt\n"); + return; + } + + vgic_set_kvm_info(&gic_v5_kvm_info); +} +#else +static inline void __init gic_of_setup_kvm_info(struct device_node *node) +{ +} +#endif // CONFIG_KVM + +static int __init gicv5_of_init(struct device_node *node, struct device_node *parent) +{ + int ret = gicv5_irs_of_probe(node); + if (ret) + return ret; + + ret = gicv5_init_domains(of_fwnode_handle(node)); + if (ret) + goto out_irs; + + gicv5_set_cpuif_pribits(); + gicv5_set_cpuif_idbits(); + + pri_bits = min_not_zero(gicv5_global_data.cpuif_pri_bits, + gicv5_global_data.irs_pri_bits); + + ret = gicv5_starting_cpu(smp_processor_id()); + if (ret) + goto out_dom; + + ret = set_handle_irq(gicv5_handle_irq); + if (ret) + goto out_int; + + ret = gicv5_irs_enable(); + if (ret) + goto out_int; + + gicv5_smp_init(); + + gicv5_irs_its_probe(); + + gic_of_setup_kvm_info(node); + + return 0; + +out_int: + gicv5_cpu_disable_interrupts(); +out_dom: + gicv5_free_domains(); +out_irs: + gicv5_irs_remove(); + + return ret; +} +IRQCHIP_DECLARE(gic_v5, "arm,gic-v5", gicv5_of_init); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 6503573557fd..1269ab8eb726 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -54,7 +54,7 @@ static void gic_check_cpu_features(void) { - WARN_TAINT_ONCE(this_cpu_has_cap(ARM64_HAS_GIC_CPUIF_SYSREGS), + WARN_TAINT_ONCE(this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF), TAINT_CPU_OUT_OF_SPEC, "GICv3 system registers enabled, broken firmware!\n"); } diff --git a/drivers/irqchip/irq-msi-lib.c b/drivers/irqchip/irq-msi-lib.c index 246c30205af4..454c7f16dd4d 100644 --- a/drivers/irqchip/irq-msi-lib.c +++ b/drivers/irqchip/irq-msi-lib.c @@ -133,11 +133,14 @@ int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, { const struct msi_parent_ops *ops = d->msi_parent_ops; u32 busmask = BIT(bus_token); + struct fwnode_handle *fwh; if (!ops) return 0; - if (fwspec->fwnode != d->fwnode || fwspec->param_count != 0) + fwh = d->flags & IRQ_DOMAIN_FLAG_FWNODE_PARENT ? fwnode_get_parent(fwspec->fwnode) + : fwspec->fwnode; + if (fwh != d->fwnode || fwspec->param_count != 0) return 0; /* Handle pure domain searches */ diff --git a/drivers/of/irq.c b/drivers/of/irq.c index f8ad79b9b1c9..74aaea61de13 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -670,8 +670,20 @@ err: } } -static u32 __of_msi_map_id(struct device *dev, struct device_node **np, - u32 id_in) +/** + * of_msi_xlate - map a MSI ID and find relevant MSI controller node + * @dev: device for which the mapping is to be done. + * @msi_np: Pointer to store the MSI controller node + * @id_in: Device ID. + * + * Walk up the device hierarchy looking for devices with a "msi-map" + * property. If found, apply the mapping to @id_in. @msi_np pointed + * value must be NULL on entry, if an MSI controller is found @msi_np is + * initialized to the MSI controller node with a reference held. + * + * Returns: The mapped MSI id. + */ +u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in) { struct device *parent_dev; u32 id_out = id_in; @@ -682,7 +694,7 @@ static u32 __of_msi_map_id(struct device *dev, struct device_node **np, */ for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) if (!of_map_id(parent_dev->of_node, id_in, "msi-map", - "msi-map-mask", np, &id_out)) + "msi-map-mask", msi_np, &id_out)) break; return id_out; } @@ -700,7 +712,7 @@ static u32 __of_msi_map_id(struct device *dev, struct device_node **np, */ u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in) { - return __of_msi_map_id(dev, &msi_np, id_in); + return of_msi_xlate(dev, &msi_np, id_in); } /** @@ -719,7 +731,7 @@ struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 id, { struct device_node *np = NULL; - __of_msi_map_id(dev, &np, id); + of_msi_xlate(dev, &np, id); return irq_find_matching_host(np, bus_token); } diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index c05152733993..8a6e80d3963a 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -428,6 +428,26 @@ u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev) } /** + * pci_msi_map_rid_ctlr_node - Get the MSI controller node and MSI requester id (RID) + * @pdev: The PCI device + * @node: Pointer to store the MSI controller device node + * + * Use the firmware data to find the MSI controller node for @pdev. + * If found map the RID and initialize @node with it. @node value must + * be set to NULL on entry. + * + * Returns: The RID. + */ +u32 pci_msi_map_rid_ctlr_node(struct pci_dev *pdev, struct device_node **node) +{ + u32 rid = pci_dev_id(pdev); + + pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid); + + return of_msi_xlate(&pdev->dev, node, rid); +} + +/** * pci_msi_get_device_domain - Get the MSI domain for a given PCI device * @pdev: The PCI device * diff --git a/include/asm-generic/msi.h b/include/asm-generic/msi.h index 124c734ca5d9..92cca4b23f13 100644 --- a/include/asm-generic/msi.h +++ b/include/asm-generic/msi.h @@ -33,6 +33,7 @@ typedef struct msi_alloc_info { /* Device generating MSIs is proxying for another device */ #define MSI_ALLOC_FLAGS_PROXY_DEVICE (1UL << 0) +#define MSI_ALLOC_FLAGS_FIXED_MSG_DATA (1UL << 1) #define GENERIC_MSI_DOMAIN_OPS 1 diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4a34f7f0a864..5c293e0ff5c1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -38,6 +38,7 @@ enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ VGIC_V3, /* New fancy GICv3 */ + VGIC_V5, /* Newer, fancier GICv5 */ }; /* same for all guests, as depending only on the _host's_ GIC model */ @@ -77,9 +78,12 @@ struct vgic_global { /* Pseudo GICv3 from outer space */ bool no_hw_deactivation; - /* GIC system register CPU interface */ + /* GICv3 system register CPU interface */ struct static_key_false gicv3_cpuif; + /* GICv3 compat mode on a GICv5 host */ + bool has_gcie_v3_compat; + u32 ich_vtr_el2; }; diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h new file mode 100644 index 000000000000..68ddcdb1cec5 --- /dev/null +++ b/include/linux/irqchip/arm-gic-v5.h @@ -0,0 +1,394 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 ARM Limited, All Rights Reserved. + */ +#ifndef __LINUX_IRQCHIP_ARM_GIC_V5_H +#define __LINUX_IRQCHIP_ARM_GIC_V5_H + +#include <linux/iopoll.h> + +#include <asm/cacheflush.h> +#include <asm/smp.h> +#include <asm/sysreg.h> + +#define GICV5_IPIS_PER_CPU MAX_IPI + +/* + * INTID handling + */ +#define GICV5_HWIRQ_ID GENMASK(23, 0) +#define GICV5_HWIRQ_TYPE GENMASK(31, 29) +#define GICV5_HWIRQ_INTID GENMASK_ULL(31, 0) + +#define GICV5_HWIRQ_TYPE_PPI UL(0x1) +#define GICV5_HWIRQ_TYPE_LPI UL(0x2) +#define GICV5_HWIRQ_TYPE_SPI UL(0x3) + +/* + * Tables attributes + */ +#define GICV5_NO_READ_ALLOC 0b0 +#define GICV5_READ_ALLOC 0b1 +#define GICV5_NO_WRITE_ALLOC 0b0 +#define GICV5_WRITE_ALLOC 0b1 + +#define GICV5_NON_CACHE 0b00 +#define GICV5_WB_CACHE 0b01 +#define GICV5_WT_CACHE 0b10 + +#define GICV5_NON_SHARE 0b00 +#define GICV5_OUTER_SHARE 0b10 +#define GICV5_INNER_SHARE 0b11 + +/* + * IRS registers and tables structures + */ +#define GICV5_IRS_IDR1 0x0004 +#define GICV5_IRS_IDR2 0x0008 +#define GICV5_IRS_IDR5 0x0014 +#define GICV5_IRS_IDR6 0x0018 +#define GICV5_IRS_IDR7 0x001c +#define GICV5_IRS_CR0 0x0080 +#define GICV5_IRS_CR1 0x0084 +#define GICV5_IRS_SYNCR 0x00c0 +#define GICV5_IRS_SYNC_STATUSR 0x00c4 +#define GICV5_IRS_SPI_SELR 0x0108 +#define GICV5_IRS_SPI_CFGR 0x0114 +#define GICV5_IRS_SPI_STATUSR 0x0118 +#define GICV5_IRS_PE_SELR 0x0140 +#define GICV5_IRS_PE_STATUSR 0x0144 +#define GICV5_IRS_PE_CR0 0x0148 +#define GICV5_IRS_IST_BASER 0x0180 +#define GICV5_IRS_IST_CFGR 0x0190 +#define GICV5_IRS_IST_STATUSR 0x0194 +#define GICV5_IRS_MAP_L2_ISTR 0x01c0 + +#define GICV5_IRS_IDR1_PRIORITY_BITS GENMASK(22, 20) +#define GICV5_IRS_IDR1_IAFFID_BITS GENMASK(19, 16) + +#define GICV5_IRS_IDR1_PRIORITY_BITS_1BITS 0b000 +#define GICV5_IRS_IDR1_PRIORITY_BITS_2BITS 0b001 +#define GICV5_IRS_IDR1_PRIORITY_BITS_3BITS 0b010 +#define GICV5_IRS_IDR1_PRIORITY_BITS_4BITS 0b011 +#define GICV5_IRS_IDR1_PRIORITY_BITS_5BITS 0b100 + +#define GICV5_IRS_IDR2_ISTMD_SZ GENMASK(19, 15) +#define GICV5_IRS_IDR2_ISTMD BIT(14) +#define GICV5_IRS_IDR2_IST_L2SZ GENMASK(13, 11) +#define GICV5_IRS_IDR2_IST_LEVELS BIT(10) +#define GICV5_IRS_IDR2_MIN_LPI_ID_BITS GENMASK(9, 6) +#define GICV5_IRS_IDR2_LPI BIT(5) +#define GICV5_IRS_IDR2_ID_BITS GENMASK(4, 0) + +#define GICV5_IRS_IDR5_SPI_RANGE GENMASK(24, 0) +#define GICV5_IRS_IDR6_SPI_IRS_RANGE GENMASK(24, 0) +#define GICV5_IRS_IDR7_SPI_BASE GENMASK(23, 0) + +#define GICV5_IRS_IST_L2SZ_SUPPORT_4KB(r) FIELD_GET(BIT(11), (r)) +#define GICV5_IRS_IST_L2SZ_SUPPORT_16KB(r) FIELD_GET(BIT(12), (r)) +#define GICV5_IRS_IST_L2SZ_SUPPORT_64KB(r) FIELD_GET(BIT(13), (r)) + +#define GICV5_IRS_CR0_IDLE BIT(1) +#define GICV5_IRS_CR0_IRSEN BIT(0) + +#define GICV5_IRS_CR1_VPED_WA BIT(15) +#define GICV5_IRS_CR1_VPED_RA BIT(14) +#define GICV5_IRS_CR1_VMD_WA BIT(13) +#define GICV5_IRS_CR1_VMD_RA BIT(12) +#define GICV5_IRS_CR1_VPET_WA BIT(11) +#define GICV5_IRS_CR1_VPET_RA BIT(10) +#define GICV5_IRS_CR1_VMT_WA BIT(9) +#define GICV5_IRS_CR1_VMT_RA BIT(8) +#define GICV5_IRS_CR1_IST_WA BIT(7) +#define GICV5_IRS_CR1_IST_RA BIT(6) +#define GICV5_IRS_CR1_IC GENMASK(5, 4) +#define GICV5_IRS_CR1_OC GENMASK(3, 2) +#define GICV5_IRS_CR1_SH GENMASK(1, 0) + +#define GICV5_IRS_SYNCR_SYNC BIT(31) + +#define GICV5_IRS_SYNC_STATUSR_IDLE BIT(0) + +#define GICV5_IRS_SPI_STATUSR_V BIT(1) +#define GICV5_IRS_SPI_STATUSR_IDLE BIT(0) + +#define GICV5_IRS_SPI_SELR_ID GENMASK(23, 0) + +#define GICV5_IRS_SPI_CFGR_TM BIT(0) + +#define GICV5_IRS_PE_SELR_IAFFID GENMASK(15, 0) + +#define GICV5_IRS_PE_STATUSR_V BIT(1) +#define GICV5_IRS_PE_STATUSR_IDLE BIT(0) + +#define GICV5_IRS_PE_CR0_DPS BIT(0) + +#define GICV5_IRS_IST_STATUSR_IDLE BIT(0) + +#define GICV5_IRS_IST_CFGR_STRUCTURE BIT(16) +#define GICV5_IRS_IST_CFGR_ISTSZ GENMASK(8, 7) +#define GICV5_IRS_IST_CFGR_L2SZ GENMASK(6, 5) +#define GICV5_IRS_IST_CFGR_LPI_ID_BITS GENMASK(4, 0) + +#define GICV5_IRS_IST_CFGR_STRUCTURE_LINEAR 0b0 +#define GICV5_IRS_IST_CFGR_STRUCTURE_TWO_LEVEL 0b1 + +#define GICV5_IRS_IST_CFGR_ISTSZ_4 0b00 +#define GICV5_IRS_IST_CFGR_ISTSZ_8 0b01 +#define GICV5_IRS_IST_CFGR_ISTSZ_16 0b10 + +#define GICV5_IRS_IST_CFGR_L2SZ_4K 0b00 +#define GICV5_IRS_IST_CFGR_L2SZ_16K 0b01 +#define GICV5_IRS_IST_CFGR_L2SZ_64K 0b10 + +#define GICV5_IRS_IST_BASER_ADDR_MASK GENMASK_ULL(55, 6) +#define GICV5_IRS_IST_BASER_VALID BIT_ULL(0) + +#define GICV5_IRS_MAP_L2_ISTR_ID GENMASK(23, 0) + +#define GICV5_ISTL1E_VALID BIT_ULL(0) + +#define GICV5_ISTL1E_L2_ADDR_MASK GENMASK_ULL(55, 12) + +/* + * ITS registers and tables structures + */ +#define GICV5_ITS_IDR1 0x0004 +#define GICV5_ITS_IDR2 0x0008 +#define GICV5_ITS_CR0 0x0080 +#define GICV5_ITS_CR1 0x0084 +#define GICV5_ITS_DT_BASER 0x00c0 +#define GICV5_ITS_DT_CFGR 0x00d0 +#define GICV5_ITS_DIDR 0x0100 +#define GICV5_ITS_EIDR 0x0108 +#define GICV5_ITS_INV_EVENTR 0x010c +#define GICV5_ITS_INV_DEVICER 0x0110 +#define GICV5_ITS_STATUSR 0x0120 +#define GICV5_ITS_SYNCR 0x0140 +#define GICV5_ITS_SYNC_STATUSR 0x0148 + +#define GICV5_ITS_IDR1_L2SZ GENMASK(10, 8) +#define GICV5_ITS_IDR1_ITT_LEVELS BIT(7) +#define GICV5_ITS_IDR1_DT_LEVELS BIT(6) +#define GICV5_ITS_IDR1_DEVICEID_BITS GENMASK(5, 0) + +#define GICV5_ITS_IDR1_L2SZ_SUPPORT_4KB(r) FIELD_GET(BIT(8), (r)) +#define GICV5_ITS_IDR1_L2SZ_SUPPORT_16KB(r) FIELD_GET(BIT(9), (r)) +#define GICV5_ITS_IDR1_L2SZ_SUPPORT_64KB(r) FIELD_GET(BIT(10), (r)) + +#define GICV5_ITS_IDR2_XDMN_EVENTs GENMASK(6, 5) +#define GICV5_ITS_IDR2_EVENTID_BITS GENMASK(4, 0) + +#define GICV5_ITS_CR0_IDLE BIT(1) +#define GICV5_ITS_CR0_ITSEN BIT(0) + +#define GICV5_ITS_CR1_ITT_RA BIT(7) +#define GICV5_ITS_CR1_DT_RA BIT(6) +#define GICV5_ITS_CR1_IC GENMASK(5, 4) +#define GICV5_ITS_CR1_OC GENMASK(3, 2) +#define GICV5_ITS_CR1_SH GENMASK(1, 0) + +#define GICV5_ITS_DT_CFGR_STRUCTURE BIT(16) +#define GICV5_ITS_DT_CFGR_L2SZ GENMASK(7, 6) +#define GICV5_ITS_DT_CFGR_DEVICEID_BITS GENMASK(5, 0) + +#define GICV5_ITS_DT_BASER_ADDR_MASK GENMASK_ULL(55, 3) + +#define GICV5_ITS_INV_DEVICER_I BIT(31) +#define GICV5_ITS_INV_DEVICER_EVENTID_BITS GENMASK(5, 1) +#define GICV5_ITS_INV_DEVICER_L1 BIT(0) + +#define GICV5_ITS_DIDR_DEVICEID GENMASK_ULL(31, 0) + +#define GICV5_ITS_EIDR_EVENTID GENMASK(15, 0) + +#define GICV5_ITS_INV_EVENTR_I BIT(31) +#define GICV5_ITS_INV_EVENTR_ITT_L2SZ GENMASK(2, 1) +#define GICV5_ITS_INV_EVENTR_L1 BIT(0) + +#define GICV5_ITS_STATUSR_IDLE BIT(0) + +#define GICV5_ITS_SYNCR_SYNC BIT_ULL(63) +#define GICV5_ITS_SYNCR_SYNCALL BIT_ULL(32) +#define GICV5_ITS_SYNCR_DEVICEID GENMASK_ULL(31, 0) + +#define GICV5_ITS_SYNC_STATUSR_IDLE BIT(0) + +#define GICV5_DTL1E_VALID BIT_ULL(0) +/* Note that there is no shift for the address by design */ +#define GICV5_DTL1E_L2_ADDR_MASK GENMASK_ULL(55, 3) +#define GICV5_DTL1E_SPAN GENMASK_ULL(63, 60) + +#define GICV5_DTL2E_VALID BIT_ULL(0) +#define GICV5_DTL2E_ITT_L2SZ GENMASK_ULL(2, 1) +/* Note that there is no shift for the address by design */ +#define GICV5_DTL2E_ITT_ADDR_MASK GENMASK_ULL(55, 3) +#define GICV5_DTL2E_ITT_DSWE BIT_ULL(57) +#define GICV5_DTL2E_ITT_STRUCTURE BIT_ULL(58) +#define GICV5_DTL2E_EVENT_ID_BITS GENMASK_ULL(63, 59) + +#define GICV5_ITTL1E_VALID BIT_ULL(0) +/* Note that there is no shift for the address by design */ +#define GICV5_ITTL1E_L2_ADDR_MASK GENMASK_ULL(55, 3) +#define GICV5_ITTL1E_SPAN GENMASK_ULL(63, 60) + +#define GICV5_ITTL2E_LPI_ID GENMASK_ULL(23, 0) +#define GICV5_ITTL2E_DAC GENMASK_ULL(29, 28) +#define GICV5_ITTL2E_VIRTUAL BIT_ULL(30) +#define GICV5_ITTL2E_VALID BIT_ULL(31) +#define GICV5_ITTL2E_VM_ID GENMASK_ULL(47, 32) + +#define GICV5_ITS_DT_ITT_CFGR_L2SZ_4k 0b00 +#define GICV5_ITS_DT_ITT_CFGR_L2SZ_16k 0b01 +#define GICV5_ITS_DT_ITT_CFGR_L2SZ_64k 0b10 + +#define GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR 0 +#define GICV5_ITS_DT_ITT_CFGR_STRUCTURE_TWO_LEVEL 1 + +#define GICV5_ITS_HWIRQ_DEVICE_ID GENMASK_ULL(31, 0) +#define GICV5_ITS_HWIRQ_EVENT_ID GENMASK_ULL(63, 32) + +/* + * IWB registers + */ +#define GICV5_IWB_IDR0 0x0000 +#define GICV5_IWB_CR0 0x0080 +#define GICV5_IWB_WENABLE_STATUSR 0x00c0 +#define GICV5_IWB_WENABLER 0x2000 +#define GICV5_IWB_WTMR 0x4000 + +#define GICV5_IWB_IDR0_INT_DOMS GENMASK(14, 11) +#define GICV5_IWB_IDR0_IW_RANGE GENMASK(10, 0) + +#define GICV5_IWB_CR0_IDLE BIT(1) +#define GICV5_IWB_CR0_IWBEN BIT(0) + +#define GICV5_IWB_WENABLE_STATUSR_IDLE BIT(0) + +/* + * Global Data structures and functions + */ +struct gicv5_chip_data { + struct fwnode_handle *fwnode; + struct irq_domain *ppi_domain; + struct irq_domain *spi_domain; + struct irq_domain *lpi_domain; + struct irq_domain *ipi_domain; + u32 global_spi_count; + u8 cpuif_pri_bits; + u8 cpuif_id_bits; + u8 irs_pri_bits; + struct { + __le64 *l1ist_addr; + u32 l2_size; + u8 l2_bits; + bool l2; + } ist; +}; + +extern struct gicv5_chip_data gicv5_global_data __read_mostly; + +struct gicv5_irs_chip_data { + struct list_head entry; + struct fwnode_handle *fwnode; + void __iomem *irs_base; + u32 flags; + u32 spi_min; + u32 spi_range; + raw_spinlock_t spi_config_lock; +}; + +static inline int gicv5_wait_for_op_s_atomic(void __iomem *addr, u32 offset, + const char *reg_s, u32 mask, + u32 *val) +{ + void __iomem *reg = addr + offset; + u32 tmp; + int ret; + + ret = readl_poll_timeout_atomic(reg, tmp, tmp & mask, 1, 10 * USEC_PER_MSEC); + if (unlikely(ret == -ETIMEDOUT)) { + pr_err_ratelimited("%s timeout...\n", reg_s); + return ret; + } + + if (val) + *val = tmp; + + return 0; +} + +static inline int gicv5_wait_for_op_s(void __iomem *addr, u32 offset, + const char *reg_s, u32 mask) +{ + void __iomem *reg = addr + offset; + u32 val; + int ret; + + ret = readl_poll_timeout(reg, val, val & mask, 1, 10 * USEC_PER_MSEC); + if (unlikely(ret == -ETIMEDOUT)) { + pr_err_ratelimited("%s timeout...\n", reg_s); + return ret; + } + + return 0; +} + +#define gicv5_wait_for_op_atomic(base, reg, mask, val) \ + gicv5_wait_for_op_s_atomic(base, reg, #reg, mask, val) + +#define gicv5_wait_for_op(base, reg, mask) \ + gicv5_wait_for_op_s(base, reg, #reg, mask) + +void __init gicv5_init_lpi_domain(void); +void __init gicv5_free_lpi_domain(void); + +int gicv5_irs_of_probe(struct device_node *parent); +void gicv5_irs_remove(void); +int gicv5_irs_enable(void); +void gicv5_irs_its_probe(void); +int gicv5_irs_register_cpu(int cpuid); +int gicv5_irs_cpu_to_iaffid(int cpu_id, u16 *iaffid); +struct gicv5_irs_chip_data *gicv5_irs_lookup_by_spi_id(u32 spi_id); +int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type); +int gicv5_irs_iste_alloc(u32 lpi); +void gicv5_irs_syncr(void); + +struct gicv5_its_devtab_cfg { + union { + struct { + __le64 *devtab; + } linear; + struct { + __le64 *l1devtab; + __le64 **l2ptrs; + } l2; + }; + u32 cfgr; +}; + +struct gicv5_its_itt_cfg { + union { + struct { + __le64 *itt; + unsigned int num_ents; + } linear; + struct { + __le64 *l1itt; + __le64 **l2ptrs; + unsigned int num_l1_ents; + u8 l2sz; + } l2; + }; + u8 event_id_bits; + bool l2itt; +}; + +void gicv5_init_lpis(u32 max); +void gicv5_deinit_lpis(void); + +int gicv5_alloc_lpi(void); +void gicv5_free_lpi(u32 lpi); + +void __init gicv5_its_of_probe(struct device_node *parent); +#endif diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h index a75b2c7de69d..ca1713fac6e3 100644 --- a/include/linux/irqchip/arm-vgic-info.h +++ b/include/linux/irqchip/arm-vgic-info.h @@ -15,6 +15,8 @@ enum gic_type { GIC_V2, /* Full GICv3, optionally with v2 compat */ GIC_V3, + /* Full GICv5, optionally with v3 compat */ + GIC_V5, }; struct gic_kvm_info { @@ -34,6 +36,8 @@ struct gic_kvm_info { bool has_v4_1; /* Deactivation impared, subpar stuff */ bool no_hw_deactivation; + /* v3 compat support (GICv5 hosts, only) */ + bool has_gcie_v3_compat; }; #ifdef CONFIG_KVM diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 7387d183029b..25c7cbeed393 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -212,6 +212,9 @@ enum { /* Address and data pair is mutable when irq_set_affinity() */ IRQ_DOMAIN_FLAG_MSI_IMMUTABLE = (1 << 11), + /* IRQ domain requires parent fwnode matching */ + IRQ_DOMAIN_FLAG_FWNODE_PARENT = (1 << 12), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the diff --git a/include/linux/msi.h b/include/linux/msi.h index 6863540f4b71..a418e2695b05 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -705,6 +705,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); +u32 pci_msi_map_rid_ctlr_node(struct pci_dev *pdev, struct device_node **node); struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); #else /* CONFIG_PCI_MSI */ static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 6337ad4e5fe8..a480063c9cb1 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -54,6 +54,7 @@ extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 id, u32 bus_token); extern void of_msi_configure(struct device *dev, const struct device_node *np); +extern u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in); u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in); #else static inline void of_irq_init(const struct of_device_id *matches) @@ -100,6 +101,10 @@ static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev static inline void of_msi_configure(struct device *dev, struct device_node *np) { } +static inline u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in) +{ + return id_in; +} static inline u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in) { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 37891580d05d..e4e566ff348b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -956,6 +956,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2 240 #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 +#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 38b95998e1e6..ce817a975e50 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -156,7 +156,7 @@ TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions TEST_GEN_PROGS_arm64 += arm64/host_sve TEST_GEN_PROGS_arm64 += arm64/hypercalls -TEST_GEN_PROGS_arm64 += arm64/mmio_abort +TEST_GEN_PROGS_arm64 += arm64/external_aborts TEST_GEN_PROGS_arm64 += arm64/page_fault_test TEST_GEN_PROGS_arm64 += arm64/psci_test TEST_GEN_PROGS_arm64 += arm64/set_id_regs diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c new file mode 100644 index 000000000000..062bf84cced1 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * external_abort - Tests for userspace external abort injection + * + * Copyright (c) 2024 Google LLC + */ +#include "processor.h" +#include "test_util.h" + +#define MMIO_ADDR 0x8000000ULL +#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed) + +static u64 expected_abort_pc; + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + GUEST_DONE(); +} + +static void unexpected_dabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); +} + +static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, + handler_fn dabt_handler) +{ + struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); + + virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); + + return vm; +} + +static void vcpu_inject_sea(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static bool vcpu_has_ras(struct kvm_vcpu *vcpu) +{ + u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0); +} + +static bool guest_has_ras(void) +{ + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1)); +} + +static void vcpu_inject_serror(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.serror_pending = true; + if (vcpu_has_ras(vcpu)) { + events.exception.serror_has_esr = true; + events.exception.serror_esr = EXPECTED_SERROR_ISS; + } + + vcpu_events_set(vcpu, &events); +} + +static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + if (uc.cmd == cmd) + return; + + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_DONE); +} + +static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_SYNC); +} + +extern char test_mmio_abort_insn; + +static noinline void test_mmio_abort_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); + + asm volatile("test_mmio_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +extern char test_mmio_nisv_insn; + +static void test_mmio_nisv_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); + + asm volatile("test_mmio_nisv_insn:\n\t" + "ldr x0, [%0], #8\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace + * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. + */ +static void test_mmio_nisv(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + unexpected_dabt_handler); + + TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); + TEST_ASSERT_EQ(errno, ENOSYS); + + kvm_vm_free(vm); +} + +/* + * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA + * reaches the guest. + */ +static void test_mmio_nisv_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); + TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void unexpected_serror_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Took unexpected SError exception"); +} + +static void test_serror_masked_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + isb(); + + GUEST_DONE(); +} + +static void test_serror_masked(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void expect_serror_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR); + if (guest_has_ras()) + GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS); + + GUEST_DONE(); +} + +static void test_serror_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_serror_emulated_guest(void) +{ + GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A)); + + local_serror_enable(); + GUEST_SYNC(0); + local_serror_disable(); + + GUEST_FAIL("Should've taken unmasked SError exception"); +} + +static void test_serror_emulated(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_mmio_ease_guest(void) +{ + sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE); + isb(); + + test_mmio_abort_guest(); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_ease(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest, + unexpected_dabt_handler); + struct kvm_run *run = vcpu->run; + u64 pfr1; + + pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) { + pr_debug("Skipping %s\n", __func__); + return; + } + + /* + * SCTLR2_ELx.EASE changes the exception vector to the SError vector but + * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx). + */ + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +int main(void) +{ + test_mmio_abort(); + test_mmio_nisv(); + test_mmio_nisv_abort(); + test_serror(); + test_serror_masked(); + test_serror_emulated(); + test_mmio_ease(); +} diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index d01798b6b3b4..684c52b1b881 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -52,6 +52,12 @@ static struct feature_id_reg feat_id_regs[] = { ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ 16, 1 + }, + { + KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1), + KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR3_EL1), + ID_AA64MMFR3_EL1_SCTLRX_SHIFT, + ID_AA64MMFR3_EL1_SCTLRX_IMP } }; @@ -469,6 +475,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1), ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c deleted file mode 100644 index 8b7a80a51b1c..000000000000 --- a/tools/testing/selftests/kvm/arm64/mmio_abort.c +++ /dev/null @@ -1,159 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * mmio_abort - Tests for userspace MMIO abort injection - * - * Copyright (c) 2024 Google LLC - */ -#include "processor.h" -#include "test_util.h" - -#define MMIO_ADDR 0x8000000ULL - -static u64 expected_abort_pc; - -static void expect_sea_handler(struct ex_regs *regs) -{ - u64 esr = read_sysreg(esr_el1); - - GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); - GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); - GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); - - GUEST_DONE(); -} - -static void unexpected_dabt_handler(struct ex_regs *regs) -{ - GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); -} - -static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, - handler_fn dabt_handler) -{ - struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(*vcpu); - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); - - virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); - - return vm; -} - -static void vcpu_inject_extabt(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu_events events = {}; - - events.exception.ext_dabt_pending = true; - vcpu_events_set(vcpu, &events); -} - -static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } -} - -extern char test_mmio_abort_insn; - -static void test_mmio_abort_guest(void) -{ - WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); - - asm volatile("test_mmio_abort_insn:\n\t" - "ldr x0, [%0]\n\t" - : : "r" (MMIO_ADDR) : "x0", "memory"); - - GUEST_FAIL("MMIO instruction should not retire"); -} - -/* - * Test that KVM doesn't complete MMIO emulation when userspace has made an - * external abort pending for the instruction. - */ -static void test_mmio_abort(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, - expect_sea_handler); - struct kvm_run *run = vcpu->run; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); - TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); - TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); - TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); - - vcpu_inject_extabt(vcpu); - vcpu_run_expect_done(vcpu); - kvm_vm_free(vm); -} - -extern char test_mmio_nisv_insn; - -static void test_mmio_nisv_guest(void) -{ - WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); - - asm volatile("test_mmio_nisv_insn:\n\t" - "ldr x0, [%0], #8\n\t" - : : "r" (MMIO_ADDR) : "x0", "memory"); - - GUEST_FAIL("MMIO instruction should not retire"); -} - -/* - * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace - * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. - */ -static void test_mmio_nisv(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, - unexpected_dabt_handler); - - TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); - TEST_ASSERT_EQ(errno, ENOSYS); - - kvm_vm_free(vm); -} - -/* - * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA - * reaches the guest. - */ -static void test_mmio_nisv_abort(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, - expect_sea_handler); - struct kvm_run *run = vcpu->run; - - vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); - TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); - - vcpu_inject_extabt(vcpu); - vcpu_run_expect_done(vcpu); - kvm_vm_free(vm); -} - -int main(void) -{ - test_mmio_abort(); - test_mmio_nisv(); - test_mmio_nisv_abort(); -} diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 8f422bfdfcb9..d3bf9204409c 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -139,6 +139,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { }; static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0), @@ -187,6 +188,14 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = { REG_FTR_END, }; +static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0), + REG_FTR_END, +}; + static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0), @@ -217,6 +226,7 @@ static struct test_feature_reg test_regs[] = { TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), + TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1), TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1), }; @@ -774,8 +784,8 @@ int main(void) ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + - ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 + - MPAM_IDREG_TEST + MTE_IDREG_TEST; + ARRAY_SIZE(ftr_id_aa64mmfr3_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - + ARRAY_SIZE(test_regs) + 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; ksft_set_plan(test_cnt); diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index b0fc0f945766..255fed769a8a 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -254,6 +254,16 @@ static inline void local_irq_disable(void) asm volatile("msr daifset, #3" : : : "memory"); } +static inline void local_serror_enable(void) +{ + asm volatile("msr daifclr, #4" : : : "memory"); +} + +static inline void local_serror_disable(void) +{ + asm volatile("msr daifset, #4" : : : "memory"); +} + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 |