summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arch/arm64/booting.rst41
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5-iwb.yaml78
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5.yaml267
-rw-r--r--Documentation/virt/kvm/api.rst13
-rw-r--r--MAINTAINERS10
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/barrier.h3
-rw-r--r--arch/arm64/include/asm/el2_setup.h45
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h51
-rw-r--r--arch/arm64/include/asm/kvm_host.h36
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h18
-rw-r--r--arch/arm64/include/asm/kvm_nested.h2
-rw-r--r--arch/arm64/include/asm/smp.h24
-rw-r--r--arch/arm64/include/asm/sysreg.h71
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h2
-rw-r--r--arch/arm64/kernel/cpufeature.c26
-rw-r--r--arch/arm64/kernel/smp.c142
-rw-r--r--arch/arm64/kvm/Makefile3
-rw-r--r--arch/arm64/kvm/arch_timer.c2
-rw-r--r--arch/arm64/kvm/arm.c16
-rw-r--r--arch/arm64/kvm/at.c80
-rw-r--r--arch/arm64/kvm/config.c28
-rw-r--r--arch/arm64/kvm/emulate-nested.c49
-rw-r--r--arch/arm64/kvm/guest.c62
-rw-r--r--arch/arm64/kvm/handle_exit.c24
-rw-r--r--arch/arm64/kvm/hyp/exception.c16
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h53
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h49
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c53
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c14
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c6
-rw-r--r--arch/arm64/kvm/inject_fault.c235
-rw-r--r--arch/arm64/kvm/mmio.c12
-rw-r--r--arch/arm64/kvm/mmu.c105
-rw-r--r--arch/arm64/kvm/nested.c49
-rw-r--r--arch/arm64/kvm/sys_regs.c76
-rw-r--r--arch/arm64/kvm/sys_regs.h2
-rw-r--r--arch/arm64/kvm/trace_handle_exit.h2
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c9
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c3
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3-nested.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-v5.c52
-rw-r--r--arch/arm64/kvm/vgic/vgic.h13
-rw-r--r--arch/arm64/tools/cpucaps4
-rw-r--r--arch/arm64/tools/sysreg501
-rw-r--r--drivers/irqchip/Kconfig12
-rw-r--r--drivers/irqchip/Makefile5
-rw-r--r--drivers/irqchip/irq-gic-common.h2
-rw-r--r--drivers/irqchip/irq-gic-its-msi-parent.c (renamed from drivers/irqchip/irq-gic-v3-its-msi-parent.c)168
-rw-r--r--drivers/irqchip/irq-gic-its-msi-parent.h12
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c1
-rw-r--r--drivers/irqchip/irq-gic-v5-irs.c822
-rw-r--r--drivers/irqchip/irq-gic-v5-its.c1228
-rw-r--r--drivers/irqchip/irq-gic-v5-iwb.c284
-rw-r--r--drivers/irqchip/irq-gic-v5.c1137
-rw-r--r--drivers/irqchip/irq-gic.c2
-rw-r--r--drivers/irqchip/irq-msi-lib.c5
-rw-r--r--drivers/of/irq.c22
-rw-r--r--drivers/pci/msi/irqdomain.c20
-rw-r--r--include/asm-generic/msi.h1
-rw-r--r--include/kvm/arm_vgic.h6
-rw-r--r--include/linux/irqchip/arm-gic-v5.h394
-rw-r--r--include/linux/irqchip/arm-vgic-info.h4
-rw-r--r--include/linux/irqdomain.h3
-rw-r--r--include/linux/msi.h1
-rw-r--r--include/linux/of_irq.h5
-rw-r--r--include/uapi/linux/kvm.h1
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm2
-rw-r--r--tools/testing/selftests/kvm/arm64/external_aborts.c330
-rw-r--r--tools/testing/selftests/kvm/arm64/get-reg-list.c7
-rw-r--r--tools/testing/selftests/kvm/arm64/mmio_abort.c159
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c14
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h10
73 files changed, 6526 insertions, 481 deletions
diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst
index ee9b790c0d72..83e037351f6d 100644
--- a/Documentation/arch/arm64/booting.rst
+++ b/Documentation/arch/arm64/booting.rst
@@ -223,6 +223,47 @@ Before jumping into the kernel, the following conditions must be met:
- SCR_EL3.HCE (bit 8) must be initialised to 0b1.
+ For systems with a GICv5 interrupt controller to be used in v5 mode:
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - ICH_HFGRTR_EL2.ICC_PPI_ACTIVERn_EL1 (bit 20) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PPI_PRIORITYRn_EL1 (bit 19) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PPI_PENDRn_EL1 (bit 18) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PPI_ENABLERn_EL1 (bit 17) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PPI_HMRn_EL1 (bit 16) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_IAFFIDR_EL1 (bit 7) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_ICSR_EL1 (bit 6) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PCR_EL1 (bit 5) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_HPPIR_EL1 (bit 4) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_HAPR_EL1 (bit 3) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_CR0_EL1 (bit 2) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_IDRn_EL1 (bit 1) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_APR_EL1 (bit 0) must be initialised to 0b1.
+
+ - ICH_HFGWTR_EL2.ICC_PPI_ACTIVERn_EL1 (bit 20) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_PPI_PRIORITYRn_EL1 (bit 19) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_PPI_PENDRn_EL1 (bit 18) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_PPI_ENABLERn_EL1 (bit 17) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_ICSR_EL1 (bit 6) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_PCR_EL1 (bit 5) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_CR0_EL1 (bit 2) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_APR_EL1 (bit 0) must be initialised to 0b1.
+
+ - ICH_HFGITR_EL2.GICRCDNMIA (bit 10) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICRCDIA (bit 9) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDDI (bit 8) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDEOI (bit 7) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDHM (bit 6) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDRCFG (bit 5) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDPEND (bit 4) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDAFF (bit 3) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDPRI (bit 2) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDDIS (bit 1) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDEN (bit 0) must be initialised to 0b1.
+
+ - The DT or ACPI tables must describe a GICv5 interrupt controller.
+
For systems with a GICv3 interrupt controller to be used in v3 mode:
- If EL3 is present:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5-iwb.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5-iwb.yaml
new file mode 100644
index 000000000000..99a266a62385
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5-iwb.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/arm,gic-v5-iwb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Generic Interrupt Controller, version 5 Interrupt Wire Bridge (IWB)
+
+maintainers:
+ - Lorenzo Pieralisi <lpieralisi@kernel.org>
+ - Marc Zyngier <maz@kernel.org>
+
+description: |
+ The GICv5 architecture defines the guidelines to implement GICv5
+ compliant interrupt controllers for AArch64 systems.
+
+ The GICv5 specification can be found at
+ https://developer.arm.com/documentation/aes0070
+
+ GICv5 has zero or more Interrupt Wire Bridges (IWB) that are responsible
+ for translating wire signals into interrupt messages to the GICv5 ITS.
+
+allOf:
+ - $ref: /schemas/interrupt-controller.yaml#
+
+properties:
+ compatible:
+ const: arm,gic-v5-iwb
+
+ reg:
+ items:
+ - description: IWB control frame
+
+ "#address-cells":
+ const: 0
+
+ "#interrupt-cells":
+ description: |
+ The 1st cell corresponds to the IWB wire.
+
+ The 2nd cell is the flags, encoded as follows:
+ bits[3:0] trigger type and level flags.
+
+ 1 = low-to-high edge triggered
+ 2 = high-to-low edge triggered
+ 4 = active high level-sensitive
+ 8 = active low level-sensitive
+
+ const: 2
+
+ interrupt-controller: true
+
+ msi-parent:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - "#interrupt-cells"
+ - interrupt-controller
+ - msi-parent
+
+additionalProperties: false
+
+examples:
+ - |
+ interrupt-controller@2f000000 {
+ compatible = "arm,gic-v5-iwb";
+ reg = <0x2f000000 0x10000>;
+
+ #address-cells = <0>;
+
+ #interrupt-cells = <2>;
+ interrupt-controller;
+
+ msi-parent = <&its0 64>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5.yaml
new file mode 100644
index 000000000000..86ca7f3ac281
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5.yaml
@@ -0,0 +1,267 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/arm,gic-v5.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Generic Interrupt Controller, version 5
+
+maintainers:
+ - Lorenzo Pieralisi <lpieralisi@kernel.org>
+ - Marc Zyngier <maz@kernel.org>
+
+description: |
+ The GICv5 architecture defines the guidelines to implement GICv5
+ compliant interrupt controllers for AArch64 systems.
+
+ The GICv5 specification can be found at
+ https://developer.arm.com/documentation/aes0070
+
+ The GICv5 architecture is composed of multiple components:
+ - one or more IRS (Interrupt Routing Service)
+ - zero or more ITS (Interrupt Translation Service)
+
+ The architecture defines:
+ - PE-Private Peripheral Interrupts (PPI)
+ - Shared Peripheral Interrupts (SPI)
+ - Logical Peripheral Interrupts (LPI)
+
+allOf:
+ - $ref: /schemas/interrupt-controller.yaml#
+
+properties:
+ compatible:
+ const: arm,gic-v5
+
+ "#address-cells":
+ enum: [ 1, 2 ]
+
+ "#size-cells":
+ enum: [ 1, 2 ]
+
+ ranges: true
+
+ "#interrupt-cells":
+ description: |
+ The 1st cell corresponds to the INTID.Type field in the INTID; 1 for PPI,
+ 3 for SPI. LPI interrupts must not be described in the bindings since
+ they are allocated dynamically by the software component managing them.
+
+ The 2nd cell contains the interrupt INTID.ID field.
+
+ The 3rd cell is the flags, encoded as follows:
+ bits[3:0] trigger type and level flags.
+
+ 1 = low-to-high edge triggered
+ 2 = high-to-low edge triggered
+ 4 = active high level-sensitive
+ 8 = active low level-sensitive
+
+ const: 3
+
+ interrupt-controller: true
+
+ interrupts:
+ description:
+ The VGIC maintenance interrupt.
+ maxItems: 1
+
+required:
+ - compatible
+ - "#address-cells"
+ - "#size-cells"
+ - ranges
+ - "#interrupt-cells"
+ - interrupt-controller
+
+patternProperties:
+ "^irs@[0-9a-f]+$":
+ type: object
+ description:
+ GICv5 has one or more Interrupt Routing Services (IRS) that are
+ responsible for handling IRQ state and routing.
+
+ additionalProperties: false
+
+ properties:
+ compatible:
+ const: arm,gic-v5-irs
+
+ reg:
+ minItems: 1
+ items:
+ - description: IRS config frames
+ - description: IRS setlpi frames
+
+ reg-names:
+ description:
+ Describe config and setlpi frames that are present.
+ "ns-" stands for non-secure, "s-" for secure, "realm-" for realm
+ and "el3-" for EL3.
+ minItems: 1
+ maxItems: 8
+ items:
+ enum: [ ns-config, s-config, realm-config, el3-config, ns-setlpi,
+ s-setlpi, realm-setlpi, el3-setlpi ]
+
+ "#address-cells":
+ enum: [ 1, 2 ]
+
+ "#size-cells":
+ enum: [ 1, 2 ]
+
+ ranges: true
+
+ dma-noncoherent:
+ description:
+ Present if the GIC IRS permits programming shareability and
+ cacheability attributes but is connected to a non-coherent
+ downstream interconnect.
+
+ cpus:
+ description:
+ CPUs managed by the IRS.
+
+ arm,iaffids:
+ $ref: /schemas/types.yaml#/definitions/uint16-array
+ description:
+ Interrupt AFFinity ID (IAFFID) associated with the CPU whose
+ CPU node phandle is at the same index in the cpus array.
+
+ patternProperties:
+ "^its@[0-9a-f]+$":
+ type: object
+ description:
+ GICv5 has zero or more Interrupt Translation Services (ITS) that are
+ used to route Message Signalled Interrupts (MSI) to the CPUs. Each
+ ITS is connected to an IRS.
+ additionalProperties: false
+
+ properties:
+ compatible:
+ const: arm,gic-v5-its
+
+ reg:
+ items:
+ - description: ITS config frames
+
+ reg-names:
+ description:
+ Describe config frames that are present.
+ "ns-" stands for non-secure, "s-" for secure, "realm-" for realm
+ and "el3-" for EL3.
+ minItems: 1
+ maxItems: 4
+ items:
+ enum: [ ns-config, s-config, realm-config, el3-config ]
+
+ "#address-cells":
+ enum: [ 1, 2 ]
+
+ "#size-cells":
+ enum: [ 1, 2 ]
+
+ ranges: true
+
+ dma-noncoherent:
+ description:
+ Present if the GIC ITS permits programming shareability and
+ cacheability attributes but is connected to a non-coherent
+ downstream interconnect.
+
+ patternProperties:
+ "^msi-controller@[0-9a-f]+$":
+ type: object
+ description:
+ GICv5 ITS has one or more translate register frames.
+ additionalProperties: false
+
+ properties:
+ reg:
+ items:
+ - description: ITS translate frames
+
+ reg-names:
+ description:
+ Describe translate frames that are present.
+ "ns-" stands for non-secure, "s-" for secure, "realm-" for realm
+ and "el3-" for EL3.
+ minItems: 1
+ maxItems: 4
+ items:
+ enum: [ ns-translate, s-translate, realm-translate, el3-translate ]
+
+ "#msi-cells":
+ description:
+ The single msi-cell is the DeviceID of the device which will
+ generate the MSI.
+ const: 1
+
+ msi-controller: true
+
+ required:
+ - reg
+ - reg-names
+ - "#msi-cells"
+ - msi-controller
+
+ required:
+ - compatible
+ - reg
+ - reg-names
+
+ required:
+ - compatible
+ - reg
+ - reg-names
+ - cpus
+ - arm,iaffids
+
+additionalProperties: false
+
+examples:
+ - |
+ interrupt-controller {
+ compatible = "arm,gic-v5";
+
+ #interrupt-cells = <3>;
+ interrupt-controller;
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ interrupts = <1 25 4>;
+
+ irs@2f1a0000 {
+ compatible = "arm,gic-v5-irs";
+ reg = <0x2f1a0000 0x10000>; // IRS_CONFIG_FRAME
+ reg-names = "ns-config";
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ cpus = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>, <&cpu4>, <&cpu5>, <&cpu6>, <&cpu7>;
+ arm,iaffids = /bits/ 16 <0 1 2 3 4 5 6 7>;
+
+ its@2f120000 {
+ compatible = "arm,gic-v5-its";
+ reg = <0x2f120000 0x10000>; // ITS_CONFIG_FRAME
+ reg-names = "ns-config";
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ msi-controller@2f130000 {
+ reg = <0x2f130000 0x10000>; // ITS_TRANSLATE_FRAME
+ reg-names = "ns-translate";
+
+ #msi-cells = <1>;
+ msi-controller;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 9abf93ee5f65..53e0179d5294 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8585,7 +8585,7 @@ ENOSYS for the others.
When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
-7.37 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS
+7.42 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS
-------------------------------------
:Architectures: arm64
@@ -8614,6 +8614,17 @@ given VM.
When this capability is enabled, KVM resets the VCPU when setting
MP_STATE_INIT_RECEIVED through IOCTL. The original MP_STATE is preserved.
+7.43 KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED
+-------------------------------------------
+
+:Architectures: arm64
+:Target: VM
+:Parameters: None
+
+This capability indicate to the userspace whether a PFNMAP memory region
+can be safely mapped as cacheable. This relies on the presence of
+force write back (FWB) feature support on the hardware.
+
8. Other capabilities.
======================
diff --git a/MAINTAINERS b/MAINTAINERS
index c3f7fbd0d67a..b035802ea41d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1964,6 +1964,16 @@ F: drivers/irqchip/irq-gic*.[ch]
F: include/linux/irqchip/arm-gic*.h
F: include/linux/irqchip/arm-vgic-info.h
+ARM GENERIC INTERRUPT CONTROLLER V5 DRIVERS
+M: Lorenzo Pieralisi <lpieralisi@kernel.org>
+M: Marc Zyngier <maz@kernel.org>
+L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S: Maintained
+F: Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5*.yaml
+F: drivers/irqchip/irq-gic-its-msi-parent.[ch]
+F: drivers/irqchip/irq-gic-v5*.[ch]
+F: include/linux/irqchip/arm-gic-v5.h
+
ARM HDLCD DRM DRIVER
M: Liviu Dudau <liviu.dudau@arm.com>
S: Supported
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 55fc331af337..5ff757cb7cd2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -129,6 +129,7 @@ config ARM64
select ARM_GIC_V2M if PCI
select ARM_GIC_V3
select ARM_GIC_V3_ITS if PCI
+ select ARM_GIC_V5
select ARM_PSCI_FW
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 1ca947d5c939..f5801b0ba9e9 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -44,6 +44,9 @@
SB_BARRIER_INSN"nop\n", \
ARM64_HAS_SB))
+#define gsb_ack() asm volatile(GSB_ACK_BARRIER_INSN : : : "memory")
+#define gsb_sys() asm volatile(GSB_SYS_BARRIER_INSN : : : "memory")
+
#ifdef CONFIG_ARM64_PSEUDO_NMI
#define pmr_sync() \
do { \
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index ba5df0df02a4..54abcb13e51f 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -165,6 +165,50 @@
.Lskip_gicv3_\@:
.endm
+/* GICv5 system register access */
+.macro __init_el2_gicv5
+ mrs_s x0, SYS_ID_AA64PFR2_EL1
+ ubfx x0, x0, #ID_AA64PFR2_EL1_GCIE_SHIFT, #4
+ cbz x0, .Lskip_gicv5_\@
+
+ mov x0, #(ICH_HFGITR_EL2_GICRCDNMIA | \
+ ICH_HFGITR_EL2_GICRCDIA | \
+ ICH_HFGITR_EL2_GICCDDI | \
+ ICH_HFGITR_EL2_GICCDEOI | \
+ ICH_HFGITR_EL2_GICCDHM | \
+ ICH_HFGITR_EL2_GICCDRCFG | \
+ ICH_HFGITR_EL2_GICCDPEND | \
+ ICH_HFGITR_EL2_GICCDAFF | \
+ ICH_HFGITR_EL2_GICCDPRI | \
+ ICH_HFGITR_EL2_GICCDDIS | \
+ ICH_HFGITR_EL2_GICCDEN)
+ msr_s SYS_ICH_HFGITR_EL2, x0 // Disable instruction traps
+ mov_q x0, (ICH_HFGRTR_EL2_ICC_PPI_ACTIVERn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_PENDRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_ENABLERn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_HMRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_IAFFIDR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_ICSR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PCR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_HPPIR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_HAPR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_CR0_EL1 | \
+ ICH_HFGRTR_EL2_ICC_IDRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_APR_EL1)
+ msr_s SYS_ICH_HFGRTR_EL2, x0 // Disable reg read traps
+ mov_q x0, (ICH_HFGWTR_EL2_ICC_PPI_ACTIVERn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_PENDRn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_ENABLERn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_ICSR_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PCR_EL1 | \
+ ICH_HFGWTR_EL2_ICC_CR0_EL1 | \
+ ICH_HFGWTR_EL2_ICC_APR_EL1)
+ msr_s SYS_ICH_HFGWTR_EL2, x0 // Disable reg write traps
+.Lskip_gicv5_\@:
+.endm
+
.macro __init_el2_hstr
msr hstr_el2, xzr // Disable CP15 traps to EL2
.endm
@@ -314,6 +358,7 @@
__init_el2_lor
__init_el2_stage2
__init_el2_gicv3
+ __init_el2_gicv5
__init_el2_hstr
__init_el2_nvhe_idregs
__init_el2_cptr
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 0720898f563e..fa8a08a1ccd5 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -45,16 +45,39 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
void kvm_skip_instr32(struct kvm_vcpu *vcpu);
void kvm_inject_undefined(struct kvm_vcpu *vcpu);
-void kvm_inject_vabt(struct kvm_vcpu *vcpu);
-void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr);
+int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
+static inline int kvm_inject_sea_dabt(struct kvm_vcpu *vcpu, u64 addr)
+{
+ return kvm_inject_sea(vcpu, false, addr);
+}
+
+static inline int kvm_inject_sea_iabt(struct kvm_vcpu *vcpu, u64 addr)
+{
+ return kvm_inject_sea(vcpu, true, addr);
+}
+
+static inline int kvm_inject_serror(struct kvm_vcpu *vcpu)
+{
+ /*
+ * ESR_ELx.ISV (later renamed to IDS) indicates whether or not
+ * ESR_ELx.ISS contains IMPLEMENTATION DEFINED syndrome information.
+ *
+ * Set the bit when injecting an SError w/o an ESR to indicate ISS
+ * does not follow the architected format.
+ */
+ return kvm_inject_serror_esr(vcpu, ESR_ELx_ISV);
+}
+
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
+int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
+int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr);
static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu)
{
@@ -195,6 +218,11 @@ static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE;
}
+static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu)
+{
+ return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO;
+}
+
static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
{
bool e2h, tge;
@@ -224,6 +252,20 @@ static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu)
return is_hyp_ctxt(vcpu) && !vcpu_is_el2(vcpu);
}
+static inline bool is_nested_ctxt(struct kvm_vcpu *vcpu)
+{
+ return vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu);
+}
+
+static inline bool vserror_state_is_nested(struct kvm_vcpu *vcpu)
+{
+ if (!is_nested_ctxt(vcpu))
+ return false;
+
+ return vcpu_el2_amo_is_set(vcpu) ||
+ (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA);
+}
+
/*
* The layout of SPSR for an AArch32 state is different when observed from an
* AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
@@ -627,6 +669,9 @@ static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu)
if (kvm_has_fpmr(kvm))
vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM;
+
+ if (kvm_has_sctlr2(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_SCTLR2En;
}
}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d27079968341..d373d555a69b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -523,6 +523,7 @@ enum vcpu_sysreg {
/* Anything from this can be RES0/RES1 sanitised */
MARKER(__SANITISED_REG_START__),
TCR2_EL2, /* Extended Translation Control Register (EL2) */
+ SCTLR2_EL2, /* System Control Register 2 (EL2) */
MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
@@ -537,6 +538,7 @@ enum vcpu_sysreg {
VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */
VNCR(TCR_EL1), /* Translation Control Register */
VNCR(TCR2_EL1), /* Extended Translation Control Register */
+ VNCR(SCTLR2_EL1), /* System Control Register 2 */
VNCR(ESR_EL1), /* Exception Syndrome Register */
VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */
VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */
@@ -565,6 +567,10 @@ enum vcpu_sysreg {
VNCR(POR_EL1), /* Permission Overlay Register 1 (EL1) */
+ /* FEAT_RAS registers */
+ VNCR(VDISR_EL2),
+ VNCR(VSESR_EL2),
+
VNCR(HFGRTR_EL2),
VNCR(HFGWTR_EL2),
VNCR(HFGITR_EL2),
@@ -817,7 +823,7 @@ struct kvm_vcpu_arch {
u8 iflags;
/* State flags for kernel bookkeeping, unused by the hypervisor code */
- u8 sflags;
+ u16 sflags;
/*
* Don't run the guest (internal implementation need).
@@ -953,9 +959,21 @@ struct kvm_vcpu_arch {
__vcpu_flags_preempt_enable(); \
} while (0)
+#define __vcpu_test_and_clear_flag(v, flagset, f, m) \
+ ({ \
+ typeof(v->arch.flagset) set; \
+ \
+ set = __vcpu_get_flag(v, flagset, f, m); \
+ __vcpu_clear_flag(v, flagset, f, m); \
+ \
+ set; \
+ })
+
#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
+#define vcpu_test_and_clear_flag(v, ...) \
+ __vcpu_test_and_clear_flag((v), __VA_ARGS__)
/* KVM_ARM_VCPU_INIT completed */
#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(0))
@@ -1015,6 +1033,8 @@ struct kvm_vcpu_arch {
#define IN_WFI __vcpu_single_flag(sflags, BIT(6))
/* KVM is currently emulating a nested ERET */
#define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(7))
+/* SError pending for nested guest */
+#define NESTED_SERROR_PENDING __vcpu_single_flag(sflags, BIT(8))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -1149,6 +1169,8 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
* System registers listed in the switch are not saved on every
* exit from the guest but are only saved on vcpu_put.
*
+ * SYSREGS_ON_CPU *MUST* be checked before using this helper.
+ *
* Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
* should never be listed below, because the guest cannot modify its
* own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
@@ -1186,6 +1208,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
case ZCR_EL1: *val = read_sysreg_s(SYS_ZCR_EL12); break;
+ case SCTLR2_EL1: *val = read_sysreg_s(SYS_SCTLR2_EL12); break;
default: return false;
}
@@ -1200,6 +1223,8 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
* System registers listed in the switch are not restored on every
* entry to the guest but are only restored on vcpu_load.
*
+ * SYSREGS_ON_CPU *MUST* be checked before using this helper.
+ *
* Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
* should never be listed below, because the MPIDR should only be set
* once, before running the VCPU, and never changed later.
@@ -1236,6 +1261,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break;
+ case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break;
default: return false;
}
@@ -1387,8 +1413,6 @@ static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
return (vcpu_arch->steal.base != INVALID_GPA);
}
-void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
-
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
@@ -1666,6 +1690,12 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
#define kvm_has_s1poe(k) \
(kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
+#define kvm_has_ras(k) \
+ (kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP))
+
+#define kvm_has_sctlr2(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, SCTLRX, IMP))
+
static inline bool kvm_arch_has_irq_bypass(void)
{
return true;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b98ac6aa631f..ae563ebd6aee 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -371,6 +371,24 @@ static inline void kvm_fault_unlock(struct kvm *kvm)
read_unlock(&kvm->mmu_lock);
}
+/*
+ * ARM64 KVM relies on a simple conversion from physaddr to a kernel
+ * virtual address (KVA) when it does cache maintenance as the CMO
+ * instructions work on virtual addresses. This is incompatible with
+ * VM_PFNMAP VMAs which may not have a kernel direct mapping to a
+ * virtual address.
+ *
+ * With S2FWB and CACHE DIC features, KVM need not do cache flushing
+ * and CMOs are NOP'd. This has the effect of no longer requiring a
+ * KVA for addresses mapped into the S2. The presence of these features
+ * are thus necessary to support cacheable S2 mapping of VM_PFNMAP.
+ */
+static inline bool kvm_supports_cacheable_pfnmap(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_STAGE2_FWB) &&
+ cpus_have_final_cap(ARM64_HAS_CACHE_DIC);
+}
+
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
#else
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 0bd07ea068a1..7fd76f41c296 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -80,6 +80,8 @@ extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu);
+extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu);
+extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu);
struct kvm_s2_trans {
phys_addr_t output;
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 2510eec026f7..d48ef6d5abcc 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -50,10 +50,32 @@ struct seq_file;
*/
extern void smp_init_cpus(void);
+enum ipi_msg_type {
+ IPI_RESCHEDULE,
+ IPI_CALL_FUNC,
+ IPI_CPU_STOP,
+ IPI_CPU_STOP_NMI,
+ IPI_TIMER,
+ IPI_IRQ_WORK,
+ NR_IPI,
+ /*
+ * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
+ * with trace_ipi_*
+ */
+ IPI_CPU_BACKTRACE = NR_IPI,
+ IPI_KGDB_ROUNDUP,
+ MAX_IPI
+};
+
/*
* Register IPI interrupts with the arch SMP code
*/
-extern void set_smp_ipi_range(int ipi_base, int nr_ipi);
+extern void set_smp_ipi_range_percpu(int ipi_base, int nr_ipi, int ncpus);
+
+static inline void set_smp_ipi_range(int ipi_base, int n)
+{
+ set_smp_ipi_range_percpu(ipi_base, n, 0);
+}
/*
* Called from the secondary holding pen, this is the secondary CPU entry point.
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index f1bb0d10c39a..948007cd3684 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -113,10 +113,14 @@
/* Register-based PAN access, for save/restore purposes */
#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3)
-#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
- __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
+#define __SYS_BARRIER_INSN(op0, op1, CRn, CRm, op2, Rt) \
+ __emit_inst(0xd5000000 | \
+ sys_insn((op0), (op1), (CRn), (CRm), (op2)) | \
+ ((Rt) & 0x1f))
-#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
+#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 3, 3, 0, 7, 31)
+#define GSB_SYS_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 0, 31)
+#define GSB_ACK_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 1, 31)
/* Data cache zero operations */
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
@@ -1078,6 +1082,67 @@
#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \
GCS_CAP_VALID_TOKEN)
+/*
+ * Definitions for GICv5 instructions
+ */
+#define GICV5_OP_GIC_CDAFF sys_insn(1, 0, 12, 1, 3)
+#define GICV5_OP_GIC_CDDI sys_insn(1, 0, 12, 2, 0)
+#define GICV5_OP_GIC_CDDIS sys_insn(1, 0, 12, 1, 0)
+#define GICV5_OP_GIC_CDHM sys_insn(1, 0, 12, 2, 1)
+#define GICV5_OP_GIC_CDEN sys_insn(1, 0, 12, 1, 1)
+#define GICV5_OP_GIC_CDEOI sys_insn(1, 0, 12, 1, 7)
+#define GICV5_OP_GIC_CDPEND sys_insn(1, 0, 12, 1, 4)
+#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2)
+#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5)
+#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0)
+
+/* Definitions for GIC CDAFF */
+#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32)
+#define GICV5_GIC_CDAFF_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDAFF_IRM_MASK BIT_ULL(28)
+#define GICV5_GIC_CDAFF_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDI */
+#define GICV5_GIC_CDDI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDIS */
+#define GICV5_GIC_CDDIS_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDIS_TYPE(r) FIELD_GET(GICV5_GIC_CDDIS_TYPE_MASK, r)
+#define GICV5_GIC_CDDIS_ID_MASK GENMASK_ULL(23, 0)
+#define GICV5_GIC_CDDIS_ID(r) FIELD_GET(GICV5_GIC_CDDIS_ID_MASK, r)
+
+/* Definitions for GIC CDEN */
+#define GICV5_GIC_CDEN_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDEN_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDHM */
+#define GICV5_GIC_CDHM_HM_MASK BIT_ULL(32)
+#define GICV5_GIC_CDHM_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDHM_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPEND */
+#define GICV5_GIC_CDPEND_PENDING_MASK BIT_ULL(32)
+#define GICV5_GIC_CDPEND_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPEND_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPRI */
+#define GICV5_GIC_CDPRI_PRIORITY_MASK GENMASK_ULL(39, 35)
+#define GICV5_GIC_CDPRI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPRI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDRCFG */
+#define GICV5_GIC_CDRCFG_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDRCFG_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GICR CDIA */
+#define GICV5_GIC_CDIA_VALID_MASK BIT_ULL(32)
+#define GICV5_GICR_CDIA_VALID(r) FIELD_GET(GICV5_GIC_CDIA_VALID_MASK, r)
+#define GICV5_GIC_CDIA_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDIA_ID_MASK GENMASK_ULL(23, 0)
+
+#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn)
+#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn)
#define ARM64_FEATURE_FIELD_BITS 4
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
index 6f556e993644..f6ec500ad3fa 100644
--- a/arch/arm64/include/asm/vncr_mapping.h
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -51,6 +51,7 @@
#define VNCR_SP_EL1 0x240
#define VNCR_VBAR_EL1 0x250
#define VNCR_TCR2_EL1 0x270
+#define VNCR_SCTLR2_EL1 0x278
#define VNCR_PIRE0_EL1 0x290
#define VNCR_PIR_EL1 0x2A0
#define VNCR_POR_EL1 0x2A8
@@ -84,6 +85,7 @@
#define VNCR_ICH_HCR_EL2 0x4C0
#define VNCR_ICH_VMCR_EL2 0x4C8
#define VNCR_VDISR_EL2 0x500
+#define VNCR_VSESR_EL2 0x508
#define VNCR_PMBLIMITR_EL1 0x800
#define VNCR_PMBPTR_EL1 0x810
#define VNCR_PMBSR_EL1 0x820
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index b34044e20128..4dece9ca68bc 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -303,6 +303,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_DF2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0),
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_frac_SHIFT, 4, 0),
@@ -500,6 +501,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE),
FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_SCTLRX_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -2296,11 +2298,11 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry,
int scope)
{
/*
- * ARM64_HAS_GIC_CPUIF_SYSREGS has a lower index, and is a boot CPU
+ * ARM64_HAS_GICV3_CPUIF has a lower index, and is a boot CPU
* feature, so will be detected earlier.
*/
- BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GIC_CPUIF_SYSREGS);
- if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS))
+ BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GICV3_CPUIF);
+ if (!cpus_have_cap(ARM64_HAS_GICV3_CPUIF))
return false;
return enable_pseudo_nmi;
@@ -2496,8 +2498,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_always,
},
{
- .desc = "GIC system register CPU interface",
- .capability = ARM64_HAS_GIC_CPUIF_SYSREGS,
+ .desc = "GICv3 CPU interface",
+ .capability = ARM64_HAS_GICV3_CPUIF,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = has_useable_gicv3_cpuif,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, GIC, IMP)
@@ -3061,6 +3063,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_pmuv3,
},
#endif
+ {
+ .desc = "SCTLR2",
+ .capability = ARM64_HAS_SCTLR2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, SCTLRX, IMP)
+ },
+ {
+ .desc = "GICv5 CPU interface",
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .capability = ARM64_HAS_GICV5_CPUIF,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, GCIE, IMP)
+ },
{},
};
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3b3f6b56e733..a900835a3adf 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -64,26 +64,18 @@ struct secondary_data secondary_data;
/* Number of CPUs which aren't online, but looping in kernel text. */
static int cpus_stuck_in_kernel;
-enum ipi_msg_type {
- IPI_RESCHEDULE,
- IPI_CALL_FUNC,
- IPI_CPU_STOP,
- IPI_CPU_STOP_NMI,
- IPI_TIMER,
- IPI_IRQ_WORK,
- NR_IPI,
- /*
- * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
- * with trace_ipi_*
- */
- IPI_CPU_BACKTRACE = NR_IPI,
- IPI_KGDB_ROUNDUP,
- MAX_IPI
-};
-
static int ipi_irq_base __ro_after_init;
static int nr_ipi __ro_after_init = NR_IPI;
-static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
+
+struct ipi_descs {
+ struct irq_desc *descs[MAX_IPI];
+};
+
+static DEFINE_PER_CPU_READ_MOSTLY(struct ipi_descs, pcpu_ipi_desc);
+
+#define get_ipi_desc(__cpu, __ipi) (per_cpu_ptr(&pcpu_ipi_desc, __cpu)->descs[__ipi])
+
+static bool percpu_ipi_descs __ro_after_init;
static bool crash_stop;
@@ -844,7 +836,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
+ seq_printf(p, "%10u ", irq_desc_kstat_cpu(get_ipi_desc(cpu, i), cpu));
seq_printf(p, " %s\n", ipi_types[i]);
}
@@ -917,9 +909,20 @@ static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs
#endif
}
+static void arm64_send_ipi(const cpumask_t *mask, unsigned int nr)
+{
+ unsigned int cpu;
+
+ if (!percpu_ipi_descs)
+ __ipi_send_mask(get_ipi_desc(0, nr), mask);
+ else
+ for_each_cpu(cpu, mask)
+ __ipi_send_single(get_ipi_desc(cpu, nr), cpu);
+}
+
static void arm64_backtrace_ipi(cpumask_t *mask)
{
- __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
+ arm64_send_ipi(mask, IPI_CPU_BACKTRACE);
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
@@ -944,7 +947,7 @@ void kgdb_roundup_cpus(void)
if (cpu == this_cpu)
continue;
- __ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu);
+ __ipi_send_single(get_ipi_desc(cpu, IPI_KGDB_ROUNDUP), cpu);
}
}
#endif
@@ -1013,14 +1016,16 @@ static void do_handle_IPI(int ipinr)
static irqreturn_t ipi_handler(int irq, void *data)
{
- do_handle_IPI(irq - ipi_irq_base);
+ unsigned int ipi = (irq - ipi_irq_base) % nr_ipi;
+
+ do_handle_IPI(ipi);
return IRQ_HANDLED;
}
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
{
trace_ipi_raise(target, ipi_types[ipinr]);
- __ipi_send_mask(ipi_desc[ipinr], target);
+ arm64_send_ipi(target, ipinr);
}
static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
@@ -1046,11 +1051,15 @@ static void ipi_setup(int cpu)
return;
for (i = 0; i < nr_ipi; i++) {
- if (ipi_should_be_nmi(i)) {
- prepare_percpu_nmi(ipi_irq_base + i);
- enable_percpu_nmi(ipi_irq_base + i, 0);
+ if (!percpu_ipi_descs) {
+ if (ipi_should_be_nmi(i)) {
+ prepare_percpu_nmi(ipi_irq_base + i);
+ enable_percpu_nmi(ipi_irq_base + i, 0);
+ } else {
+ enable_percpu_irq(ipi_irq_base + i, 0);
+ }
} else {
- enable_percpu_irq(ipi_irq_base + i, 0);
+ enable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i)));
}
}
}
@@ -1064,44 +1073,77 @@ static void ipi_teardown(int cpu)
return;
for (i = 0; i < nr_ipi; i++) {
- if (ipi_should_be_nmi(i)) {
- disable_percpu_nmi(ipi_irq_base + i);
- teardown_percpu_nmi(ipi_irq_base + i);
+ if (!percpu_ipi_descs) {
+ if (ipi_should_be_nmi(i)) {
+ disable_percpu_nmi(ipi_irq_base + i);
+ teardown_percpu_nmi(ipi_irq_base + i);
+ } else {
+ disable_percpu_irq(ipi_irq_base + i);
+ }
} else {
- disable_percpu_irq(ipi_irq_base + i);
+ disable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i)));
}
}
}
#endif
-void __init set_smp_ipi_range(int ipi_base, int n)
+static void ipi_setup_sgi(int ipi)
{
- int i;
+ int err, irq, cpu;
- WARN_ON(n < MAX_IPI);
- nr_ipi = min(n, MAX_IPI);
+ irq = ipi_irq_base + ipi;
- for (i = 0; i < nr_ipi; i++) {
- int err;
+ if (ipi_should_be_nmi(ipi)) {
+ err = request_percpu_nmi(irq, ipi_handler, "IPI", &irq_stat);
+ WARN(err, "Could not request IRQ %d as NMI, err=%d\n", irq, err);
+ } else {
+ err = request_percpu_irq(irq, ipi_handler, "IPI", &irq_stat);
+ WARN(err, "Could not request IRQ %d as IRQ, err=%d\n", irq, err);
+ }
- if (ipi_should_be_nmi(i)) {
- err = request_percpu_nmi(ipi_base + i, ipi_handler,
- "IPI", &irq_stat);
- WARN(err, "Could not request IPI %d as NMI, err=%d\n",
- i, err);
- } else {
- err = request_percpu_irq(ipi_base + i, ipi_handler,
- "IPI", &irq_stat);
- WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
- i, err);
- }
+ for_each_possible_cpu(cpu)
+ get_ipi_desc(cpu, ipi) = irq_to_desc(irq);
+
+ irq_set_status_flags(irq, IRQ_HIDDEN);
+}
+
+static void ipi_setup_lpi(int ipi, int ncpus)
+{
+ for (int cpu = 0; cpu < ncpus; cpu++) {
+ int err, irq;
+
+ irq = ipi_irq_base + (cpu * nr_ipi) + ipi;
+
+ err = irq_force_affinity(irq, cpumask_of(cpu));
+ WARN(err, "Could not force affinity IRQ %d, err=%d\n", irq, err);
+
+ err = request_irq(irq, ipi_handler, IRQF_NO_AUTOEN, "IPI",
+ NULL);
+ WARN(err, "Could not request IRQ %d, err=%d\n", irq, err);
+
+ irq_set_status_flags(irq, (IRQ_HIDDEN | IRQ_NO_BALANCING_MASK));
- ipi_desc[i] = irq_to_desc(ipi_base + i);
- irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
+ get_ipi_desc(cpu, ipi) = irq_to_desc(irq);
}
+}
+
+void __init set_smp_ipi_range_percpu(int ipi_base, int n, int ncpus)
+{
+ int i;
+
+ WARN_ON(n < MAX_IPI);
+ nr_ipi = min(n, MAX_IPI);
+ percpu_ipi_descs = !!ncpus;
ipi_irq_base = ipi_base;
+ for (i = 0; i < nr_ipi; i++) {
+ if (!percpu_ipi_descs)
+ ipi_setup_sgi(i);
+ else
+ ipi_setup_lpi(i, ncpus);
+ }
+
/* Setup the boot CPU immediately */
ipi_setup(smp_processor_id());
}
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 7c329e01c557..3ebc0570345c 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -23,7 +23,8 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
- vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o
+ vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o \
+ vgic/vgic-v5.o
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 701ea10a63f1..dbd74e4885e2 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -830,7 +830,7 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
* by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but
* not both). This simplifies the handling of the EL1NV* bits.
*/
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ if (is_nested_ctxt(vcpu)) {
u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
/* Use the VHE format for mental sanity */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 38a91bb5d4c7..7a1a8210ff91 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -408,6 +408,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES:
r = BIT(0);
break;
+ case KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED:
+ if (!kvm)
+ r = -EINVAL;
+ else
+ r = kvm_supports_cacheable_pfnmap();
+ break;
+
default:
r = 0;
}
@@ -521,7 +528,7 @@ static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu)
* Either we're running an L2 guest, and the API/APK bits come
* from L1's HCR_EL2, or API/APK are both set.
*/
- if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) {
+ if (unlikely(is_nested_ctxt(vcpu))) {
u64 val;
val = __vcpu_sys_reg(vcpu, HCR_EL2);
@@ -740,7 +747,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
*/
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
+ bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF | HCR_VSE);
+
return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
&& !kvm_arm_vcpu_stopped(v) && !v->arch.pause);
}
@@ -1187,6 +1195,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
preempt_disable();
+ kvm_nested_flush_hwstate(vcpu);
+
if (kvm_vcpu_has_pmu(vcpu))
kvm_pmu_flush_hwstate(vcpu);
@@ -1286,6 +1296,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/* Exit types that need handling before we can be preempted */
handle_exit_early(vcpu, ret);
+ kvm_nested_sync_hwstate(vcpu);
+
preempt_enable();
/*
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index a25be111cd8f..0e5610533949 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -1047,34 +1047,51 @@ static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
- switch (wi->regime) {
- case TR_EL10:
- pov_perms = perm_idx(vcpu, POR_EL1, idx);
- uov_perms = perm_idx(vcpu, POR_EL0, idx);
- break;
- case TR_EL20:
- pov_perms = perm_idx(vcpu, POR_EL2, idx);
- uov_perms = perm_idx(vcpu, POR_EL0, idx);
- break;
- case TR_EL2:
- pov_perms = perm_idx(vcpu, POR_EL2, idx);
- uov_perms = 0;
- break;
- }
+ if (wr->pov) {
+ switch (wi->regime) {
+ case TR_EL10:
+ pov_perms = perm_idx(vcpu, POR_EL1, idx);
+ break;
+ case TR_EL20:
+ pov_perms = perm_idx(vcpu, POR_EL2, idx);
+ break;
+ case TR_EL2:
+ pov_perms = perm_idx(vcpu, POR_EL2, idx);
+ break;
+ }
+
+ if (pov_perms & ~POE_RWX)
+ pov_perms = POE_NONE;
- if (pov_perms & ~POE_RWX)
- pov_perms = POE_NONE;
+ /* R_QXXPC, S1PrivOverflow enabled */
+ if (wr->pwxn && (pov_perms & POE_X))
+ pov_perms &= ~POE_W;
- if (wi->poe && wr->pov) {
wr->pr &= pov_perms & POE_R;
wr->pw &= pov_perms & POE_W;
wr->px &= pov_perms & POE_X;
}
- if (uov_perms & ~POE_RWX)
- uov_perms = POE_NONE;
+ if (wr->uov) {
+ switch (wi->regime) {
+ case TR_EL10:
+ uov_perms = perm_idx(vcpu, POR_EL0, idx);
+ break;
+ case TR_EL20:
+ uov_perms = perm_idx(vcpu, POR_EL0, idx);
+ break;
+ case TR_EL2:
+ uov_perms = 0;
+ break;
+ }
+
+ if (uov_perms & ~POE_RWX)
+ uov_perms = POE_NONE;
+
+ /* R_NPBXC, S1UnprivOverlay enabled */
+ if (wr->uwxn && (uov_perms & POE_X))
+ uov_perms &= ~POE_W;
- if (wi->e0poe && wr->uov) {
wr->ur &= uov_perms & POE_R;
wr->uw &= uov_perms & POE_W;
wr->ux &= uov_perms & POE_X;
@@ -1095,24 +1112,15 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu,
if (!wi->hpd)
compute_s1_hierarchical_permissions(vcpu, wi, wr);
- if (wi->poe || wi->e0poe)
- compute_s1_overlay_permissions(vcpu, wi, wr);
+ compute_s1_overlay_permissions(vcpu, wi, wr);
- /* R_QXXPC */
- if (wr->pwxn) {
- if (!wr->pov && wr->pw)
- wr->px = false;
- if (wr->pov && wr->px)
- wr->pw = false;
- }
+ /* R_QXXPC, S1PrivOverlay disabled */
+ if (!wr->pov)
+ wr->px &= !(wr->pwxn && wr->pw);
- /* R_NPBXC */
- if (wr->uwxn) {
- if (!wr->uov && wr->uw)
- wr->ux = false;
- if (wr->uov && wr->ux)
- wr->uw = false;
- }
+ /* R_NPBXC, S1UnprivOverlay disabled */
+ if (!wr->uov)
+ wr->ux &= !(wr->uwxn && wr->uw);
pan = wi->pan && (wr->ur || wr->uw ||
(pan3_enabled(vcpu, wi->regime) && wr->ux));
diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c
index c829f9a385cc..da66c4a14775 100644
--- a/arch/arm64/kvm/config.c
+++ b/arch/arm64/kvm/config.c
@@ -131,6 +131,8 @@ struct reg_bits_to_feat_map {
#define FEAT_SPMU ID_AA64DFR1_EL1, SPMU, IMP
#define FEAT_SPE_nVM ID_AA64DFR2_EL1, SPE_nVM, IMP
#define FEAT_STEP2 ID_AA64DFR2_EL1, STEP, IMP
+#define FEAT_SYSREG128 ID_AA64ISAR2_EL1, SYSREG_128, IMP
+#define FEAT_CPA2 ID_AA64ISAR3_EL1, CPA, CPA2
#define FEAT_ASID2 ID_AA64MMFR4_EL1, ASID2, IMP
#define FEAT_MEC ID_AA64MMFR3_EL1, MEC, IMP
#define FEAT_HAFT ID_AA64MMFR1_EL1, HAFDBS, HAFT
@@ -902,6 +904,23 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = {
NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h),
};
+static const struct reg_bits_to_feat_map sctlr2_feat_map[] = {
+ NEEDS_FEAT(SCTLR2_EL1_NMEA |
+ SCTLR2_EL1_EASE,
+ FEAT_DoubleFault2),
+ NEEDS_FEAT(SCTLR2_EL1_EnADERR, feat_aderr),
+ NEEDS_FEAT(SCTLR2_EL1_EnANERR, feat_anerr),
+ NEEDS_FEAT(SCTLR2_EL1_EnIDCP128, FEAT_SYSREG128),
+ NEEDS_FEAT(SCTLR2_EL1_EnPACM |
+ SCTLR2_EL1_EnPACM0,
+ feat_pauth_lr),
+ NEEDS_FEAT(SCTLR2_EL1_CPTA |
+ SCTLR2_EL1_CPTA0 |
+ SCTLR2_EL1_CPTM |
+ SCTLR2_EL1_CPTM0,
+ FEAT_CPA2),
+};
+
static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = {
NEEDS_FEAT(TCR2_EL2_FNG1 |
TCR2_EL2_FNG0 |
@@ -1060,6 +1079,8 @@ void __init check_feature_map(void)
__HCRX_EL2_RES0, "HCRX_EL2");
check_feat_map(hcr_feat_map, ARRAY_SIZE(hcr_feat_map),
HCR_EL2_RES0, "HCR_EL2");
+ check_feat_map(sctlr2_feat_map, ARRAY_SIZE(sctlr2_feat_map),
+ SCTLR2_EL1_RES0, "SCTLR2_EL1");
check_feat_map(tcr2_el2_feat_map, ARRAY_SIZE(tcr2_el2_feat_map),
TCR2_EL2_RES0, "TCR2_EL2");
check_feat_map(sctlr_el1_feat_map, ARRAY_SIZE(sctlr_el1_feat_map),
@@ -1280,6 +1301,13 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r
*res0 |= HCR_EL2_RES0 | (mask & ~fixed);
*res1 = HCR_EL2_RES1 | (mask & fixed);
break;
+ case SCTLR2_EL1:
+ case SCTLR2_EL2:
+ *res0 = compute_res0_bits(kvm, sctlr2_feat_map,
+ ARRAY_SIZE(sctlr2_feat_map), 0, 0);
+ *res0 |= SCTLR2_EL1_RES0;
+ *res1 = SCTLR2_EL1_RES1;
+ break;
case TCR2_EL2:
*res0 = compute_res0_bits(kvm, tcr2_el2_feat_map,
ARRAY_SIZE(tcr2_el2_feat_map), 0, 0);
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 3a384e9660b8..90cb4b7ae0ff 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -88,6 +88,7 @@ enum cgt_group_id {
CGT_HCRX_EnFPM,
CGT_HCRX_TCR2En,
+ CGT_HCRX_SCTLR2En,
CGT_CNTHCTL_EL1TVT,
CGT_CNTHCTL_EL1TVCT,
@@ -108,6 +109,7 @@ enum cgt_group_id {
CGT_HCR_TTLB_TTLBOS,
CGT_HCR_TVM_TRVM,
CGT_HCR_TVM_TRVM_HCRX_TCR2En,
+ CGT_HCR_TVM_TRVM_HCRX_SCTLR2En,
CGT_HCR_TPU_TICAB,
CGT_HCR_TPU_TOCU,
CGT_HCR_NV1_nNV2_ENSCXT,
@@ -398,6 +400,12 @@ static const struct trap_bits coarse_trap_bits[] = {
.mask = HCRX_EL2_TCR2En,
.behaviour = BEHAVE_FORWARD_RW,
},
+ [CGT_HCRX_SCTLR2En] = {
+ .index = HCRX_EL2,
+ .value = 0,
+ .mask = HCRX_EL2_SCTLR2En,
+ .behaviour = BEHAVE_FORWARD_RW,
+ },
[CGT_CNTHCTL_EL1TVT] = {
.index = CNTHCTL_EL2,
.value = CNTHCTL_EL1TVT,
@@ -449,6 +457,8 @@ static const enum cgt_group_id *coarse_control_combo[] = {
MCB(CGT_HCR_TVM_TRVM, CGT_HCR_TVM, CGT_HCR_TRVM),
MCB(CGT_HCR_TVM_TRVM_HCRX_TCR2En,
CGT_HCR_TVM, CGT_HCR_TRVM, CGT_HCRX_TCR2En),
+ MCB(CGT_HCR_TVM_TRVM_HCRX_SCTLR2En,
+ CGT_HCR_TVM, CGT_HCR_TRVM, CGT_HCRX_SCTLR2En),
MCB(CGT_HCR_TPU_TICAB, CGT_HCR_TPU, CGT_HCR_TICAB),
MCB(CGT_HCR_TPU_TOCU, CGT_HCR_TPU, CGT_HCR_TOCU),
MCB(CGT_HCR_NV1_nNV2_ENSCXT, CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT),
@@ -782,6 +792,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(OP_TLBI_RVALE1OSNXS, CGT_HCR_TTLB_TTLBOS),
SR_TRAP(OP_TLBI_RVAALE1OSNXS, CGT_HCR_TTLB_TTLBOS),
SR_TRAP(SYS_SCTLR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_SCTLR2_EL1, CGT_HCR_TVM_TRVM_HCRX_SCTLR2En),
SR_TRAP(SYS_TTBR0_EL1, CGT_HCR_TVM_TRVM),
SR_TRAP(SYS_TTBR1_EL1, CGT_HCR_TVM_TRVM),
SR_TRAP(SYS_TCR_EL1, CGT_HCR_TVM_TRVM),
@@ -1354,6 +1365,7 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
SR_FGT(SYS_SCXTNUM_EL0, HFGRTR, SCXTNUM_EL0, 1),
SR_FGT(SYS_SCXTNUM_EL1, HFGRTR, SCXTNUM_EL1, 1),
SR_FGT(SYS_SCTLR_EL1, HFGRTR, SCTLR_EL1, 1),
+ SR_FGT(SYS_SCTLR2_EL1, HFGRTR, SCTLR_EL1, 1),
SR_FGT(SYS_REVIDR_EL1, HFGRTR, REVIDR_EL1, 1),
SR_FGT(SYS_PAR_EL1, HFGRTR, PAR_EL1, 1),
SR_FGT(SYS_MPIDR_EL1, HFGRTR, MPIDR_EL1, 1),
@@ -2592,13 +2604,8 @@ inject:
static bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg, u64 control_bit)
{
- bool control_bit_set;
-
- if (!vcpu_has_nv(vcpu))
- return false;
-
- control_bit_set = __vcpu_sys_reg(vcpu, reg) & control_bit;
- if (!is_hyp_ctxt(vcpu) && control_bit_set) {
+ if (is_nested_ctxt(vcpu) &&
+ (__vcpu_sys_reg(vcpu, reg) & control_bit)) {
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return true;
}
@@ -2719,6 +2726,9 @@ static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2,
case except_type_irq:
kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_IRQ);
break;
+ case except_type_serror:
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SERR);
+ break;
default:
WARN_ONCE(1, "Unsupported EL2 exception injection %d\n", type);
}
@@ -2816,3 +2826,28 @@ int kvm_inject_nested_irq(struct kvm_vcpu *vcpu)
/* esr_el2 value doesn't matter for exits due to irqs. */
return kvm_inject_nested(vcpu, 0, except_type_irq);
}
+
+int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr)
+{
+ u64 esr = FIELD_PREP(ESR_ELx_EC_MASK,
+ iabt ? ESR_ELx_EC_IABT_LOW : ESR_ELx_EC_DABT_LOW);
+ esr |= ESR_ELx_FSC_EXTABT | ESR_ELx_IL;
+
+ vcpu_write_sys_reg(vcpu, FAR_EL2, addr);
+
+ if (__vcpu_sys_reg(vcpu, SCTLR2_EL2) & SCTLR2_EL1_EASE)
+ return kvm_inject_nested(vcpu, esr, except_type_serror);
+
+ return kvm_inject_nested_sync(vcpu, esr);
+}
+
+int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr)
+{
+ /*
+ * Hardware sets up the EC field when propagating ESR as a result of
+ * vSError injection. Manually populate EC for an emulated SError
+ * exception.
+ */
+ esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SERROR);
+ return kvm_inject_nested(vcpu, esr, except_type_serror);
+}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 2196979a24a3..16ba5e9ac86c 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -818,8 +818,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
- events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
events->exception.serror_has_esr = cpus_have_final_cap(ARM64_HAS_RAS_EXTN);
+ events->exception.serror_pending = (vcpu->arch.hcr_el2 & HCR_VSE) ||
+ vcpu_get_flag(vcpu, NESTED_SERROR_PENDING);
if (events->exception.serror_pending && events->exception.serror_has_esr)
events->exception.serror_esr = vcpu_get_vsesr(vcpu);
@@ -833,29 +834,62 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
return 0;
}
+static void commit_pending_events(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION))
+ return;
+
+ /*
+ * Reset the MMIO emulation state to avoid stepping PC after emulating
+ * the exception entry.
+ */
+ vcpu->mmio_needed = false;
+ kvm_call_hyp(__kvm_adjust_pc, vcpu);
+}
+
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
bool serror_pending = events->exception.serror_pending;
bool has_esr = events->exception.serror_has_esr;
bool ext_dabt_pending = events->exception.ext_dabt_pending;
+ u64 esr = events->exception.serror_esr;
+ int ret = 0;
- if (serror_pending && has_esr) {
- if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
- return -EINVAL;
-
- if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
- kvm_set_sei_esr(vcpu, events->exception.serror_esr);
- else
- return -EINVAL;
- } else if (serror_pending) {
- kvm_inject_vabt(vcpu);
+ /*
+ * Immediately commit the pending SEA to the vCPU's architectural
+ * state which is necessary since we do not return a pending SEA
+ * to userspace via KVM_GET_VCPU_EVENTS.
+ */
+ if (ext_dabt_pending) {
+ ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ commit_pending_events(vcpu);
}
- if (ext_dabt_pending)
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ if (ret < 0)
+ return ret;
- return 0;
+ if (!serror_pending)
+ return 0;
+
+ if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && has_esr)
+ return -EINVAL;
+
+ if (has_esr && (esr & ~ESR_ELx_ISS_MASK))
+ return -EINVAL;
+
+ if (has_esr)
+ ret = kvm_inject_serror_esr(vcpu, esr);
+ else
+ ret = kvm_inject_serror(vcpu);
+
+ /*
+ * We could've decided that the SError is due for immediate software
+ * injection; commit the exception in case userspace decides it wants
+ * to inject more exceptions for some strange reason.
+ */
+ commit_pending_events(vcpu);
+ return (ret < 0) ? ret : 0;
}
u32 __attribute_const__ kvm_target_cpu(void)
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 453266c96481..a598072f36d2 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -32,7 +32,7 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *);
static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
{
if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr))
- kvm_inject_vabt(vcpu);
+ kvm_inject_serror(vcpu);
}
static int handle_hvc(struct kvm_vcpu *vcpu)
@@ -252,7 +252,7 @@ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
return 1;
}
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ if (is_nested_ctxt(vcpu)) {
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return 1;
}
@@ -311,12 +311,11 @@ static int kvm_handle_gcs(struct kvm_vcpu *vcpu)
static int handle_other(struct kvm_vcpu *vcpu)
{
- bool is_l2 = vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu);
+ bool allowed, fwd = is_nested_ctxt(vcpu);
u64 hcrx = __vcpu_sys_reg(vcpu, HCRX_EL2);
u64 esr = kvm_vcpu_get_esr(vcpu);
u64 iss = ESR_ELx_ISS(esr);
struct kvm *kvm = vcpu->kvm;
- bool allowed, fwd = false;
/*
* We only trap for two reasons:
@@ -335,28 +334,23 @@ static int handle_other(struct kvm_vcpu *vcpu)
switch (iss) {
case ESR_ELx_ISS_OTHER_ST64BV:
allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V);
- if (is_l2)
- fwd = !(hcrx & HCRX_EL2_EnASR);
+ fwd &= !(hcrx & HCRX_EL2_EnASR);
break;
case ESR_ELx_ISS_OTHER_ST64BV0:
allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA);
- if (is_l2)
- fwd = !(hcrx & HCRX_EL2_EnAS0);
+ fwd &= !(hcrx & HCRX_EL2_EnAS0);
break;
case ESR_ELx_ISS_OTHER_LDST64B:
allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64);
- if (is_l2)
- fwd = !(hcrx & HCRX_EL2_EnALS);
+ fwd &= !(hcrx & HCRX_EL2_EnALS);
break;
case ESR_ELx_ISS_OTHER_TSBCSYNC:
allowed = kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, TRBE_V1P1);
- if (is_l2)
- fwd = (__vcpu_sys_reg(vcpu, HFGITR2_EL2) & HFGITR2_EL2_TSBCSYNC);
+ fwd &= (__vcpu_sys_reg(vcpu, HFGITR2_EL2) & HFGITR2_EL2_TSBCSYNC);
break;
case ESR_ELx_ISS_OTHER_PSBCSYNC:
allowed = kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P5);
- if (is_l2)
- fwd = (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_PSBCSYNC);
+ fwd &= (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_PSBCSYNC);
break;
default:
/* Clearly, we're missing something. */
@@ -496,7 +490,7 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
kvm_handle_guest_serror(vcpu, disr_to_esr(disr));
} else {
- kvm_inject_vabt(vcpu);
+ kvm_inject_serror(vcpu);
}
return;
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 6a2a899a344e..95d186e0bf54 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -26,7 +26,8 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
if (unlikely(vcpu_has_nv(vcpu)))
return vcpu_read_sys_reg(vcpu, reg);
- else if (__vcpu_read_sys_reg_from_cpu(reg, &val))
+ else if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
+ __vcpu_read_sys_reg_from_cpu(reg, &val))
return val;
return __vcpu_sys_reg(vcpu, reg);
@@ -36,7 +37,8 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
{
if (unlikely(vcpu_has_nv(vcpu)))
vcpu_write_sys_reg(vcpu, val, reg);
- else if (!__vcpu_write_sys_reg_to_cpu(val, reg))
+ else if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU) ||
+ !__vcpu_write_sys_reg_to_cpu(val, reg))
__vcpu_assign_sys_reg(vcpu, reg, val);
}
@@ -339,6 +341,10 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
break;
+ case unpack_vcpu_flag(EXCEPT_AA64_EL1_SERR):
+ enter_exception64(vcpu, PSR_MODE_EL1h, except_type_serror);
+ break;
+
case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC):
enter_exception64(vcpu, PSR_MODE_EL2h, except_type_sync);
break;
@@ -347,9 +353,13 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
enter_exception64(vcpu, PSR_MODE_EL2h, except_type_irq);
break;
+ case unpack_vcpu_flag(EXCEPT_AA64_EL2_SERR):
+ enter_exception64(vcpu, PSR_MODE_EL2h, except_type_serror);
+ break;
+
default:
/*
- * Only EL1_SYNC and EL2_{SYNC,IRQ} makes
+ * Only EL1_{SYNC,SERR} and EL2_{SYNC,IRQ,SERR} makes
* sense so far. Everything else gets silently
* ignored.
*/
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 2ad57b117385..84ec4e100fbb 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -298,7 +298,7 @@ static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
u64 val; \
\
ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) \
+ if (is_nested_ctxt(vcpu)) \
compute_clr_set(vcpu, reg, c, s); \
\
compute_undef_clr_set(vcpu, kvm, reg, c, s); \
@@ -436,7 +436,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
u64 hcrx = vcpu->arch.hcrx_el2;
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ if (is_nested_ctxt(vcpu)) {
u64 val = __vcpu_sys_reg(vcpu, HCRX_EL2);
hcrx |= val & __HCRX_EL2_MASK;
hcrx &= ~(~val & __HCRX_EL2_nMASK);
@@ -476,21 +476,56 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr)
write_sysreg_hcr(hcr);
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
- write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) {
+ u64 vsesr;
+
+ /*
+ * When HCR_EL2.AMO is set, physical SErrors are taken to EL2
+ * and vSError injection is enabled for EL1. Conveniently, for
+ * NV this means that it is never the case where a 'physical'
+ * SError (injected by KVM or userspace) and vSError are
+ * deliverable to the same context.
+ *
+ * As such, we can trivially select between the host or guest's
+ * VSESR_EL2. Except for the case that FEAT_RAS hasn't been
+ * exposed to the guest, where ESR propagation in hardware
+ * occurs unconditionally.
+ *
+ * Paper over the architectural wart and use an IMPLEMENTATION
+ * DEFINED ESR value in case FEAT_RAS is hidden from the guest.
+ */
+ if (!vserror_state_is_nested(vcpu))
+ vsesr = vcpu->arch.vsesr_el2;
+ else if (kvm_has_ras(kern_hyp_va(vcpu->kvm)))
+ vsesr = __vcpu_sys_reg(vcpu, VSESR_EL2);
+ else
+ vsesr = ESR_ELx_ISV;
+
+ write_sysreg_s(vsesr, SYS_VSESR_EL2);
+ }
}
static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
{
+ u64 *hcr;
+
+ if (vserror_state_is_nested(vcpu))
+ hcr = __ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2);
+ else
+ hcr = &vcpu->arch.hcr_el2;
+
/*
* If we pended a virtual abort, preserve it until it gets
* cleared. See D1.14.3 (Virtual Interrupts) for details, but
* the crucial bit is "On taking a vSError interrupt,
* HCR_EL2.VSE is cleared to 0."
+ *
+ * Additionally, when in a nested context we need to propagate the
+ * updated state to the guest hypervisor's HCR_EL2.
*/
- if (vcpu->arch.hcr_el2 & HCR_VSE) {
- vcpu->arch.hcr_el2 &= ~HCR_VSE;
- vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
+ if (*hcr & HCR_VSE) {
+ *hcr &= ~HCR_VSE;
+ *hcr |= read_sysreg(hcr_el2) & HCR_VSE;
}
}
@@ -531,7 +566,7 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
* nested guest, as the guest hypervisor could select a smaller VL. Slap
* that into hardware before wrapping up.
*/
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))
+ if (is_nested_ctxt(vcpu))
sve_cond_update_zcr_vq(__vcpu_sys_reg(vcpu, ZCR_EL2), SYS_ZCR_EL2);
write_sysreg_el1(__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)), SYS_ZCR);
@@ -557,7 +592,7 @@ static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu)
if (vcpu_has_sve(vcpu)) {
/* A guest hypervisor may restrict the effective max VL. */
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))
+ if (is_nested_ctxt(vcpu))
zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2);
else
zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index 4d0dbea4c56f..a17cbe7582de 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -109,6 +109,28 @@ static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt)
return kvm_has_s1poe(kern_hyp_va(vcpu->kvm));
}
+static inline bool ctxt_has_ras(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ return false;
+
+ vcpu = ctxt_to_vcpu(ctxt);
+ return kvm_has_ras(kern_hyp_va(vcpu->kvm));
+}
+
+static inline bool ctxt_has_sctlr2(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!cpus_have_final_cap(ARM64_HAS_SCTLR2))
+ return false;
+
+ vcpu = ctxt_to_vcpu(ctxt);
+ return kvm_has_sctlr2(kern_hyp_va(vcpu->kvm));
+}
+
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
@@ -147,6 +169,9 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
+
+ if (ctxt_has_sctlr2(ctxt))
+ ctxt_sys_reg(ctxt, SCTLR2_EL1) = read_sysreg_el1(SYS_SCTLR2);
}
static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
@@ -159,8 +184,13 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
if (!has_vhe() && ctxt->__hyp_running_vcpu)
ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ return;
+
+ if (!vserror_state_is_nested(ctxt_to_vcpu(ctxt)))
ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
+ else if (ctxt_has_ras(ctxt))
+ ctxt_sys_reg(ctxt, VDISR_EL2) = read_sysreg_s(SYS_VDISR_EL2);
}
static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
@@ -252,6 +282,9 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt,
write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1);
write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR);
write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR);
+
+ if (ctxt_has_sctlr2(ctxt))
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR2_EL1), SYS_SCTLR2);
}
/* Read the VCPU state's PSTATE, but translate (v)EL2 to EL1. */
@@ -275,6 +308,7 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx
{
u64 pstate = to_hw_pstate(ctxt);
u64 mode = pstate & PSR_AA32_MODE_MASK;
+ u64 vdisr;
/*
* Safety check to ensure we're setting the CPU up to enter the guest
@@ -293,8 +327,17 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx
write_sysreg_el2(ctxt->regs.pc, SYS_ELR);
write_sysreg_el2(pstate, SYS_SPSR);
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
- write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2);
+ if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ return;
+
+ if (!vserror_state_is_nested(ctxt_to_vcpu(ctxt)))
+ vdisr = ctxt_sys_reg(ctxt, DISR_EL1);
+ else if (ctxt_has_ras(ctxt))
+ vdisr = ctxt_sys_reg(ctxt, VDISR_EL2);
+ else
+ vdisr = 0;
+
+ write_sysreg_s(vdisr, SYS_VDISR_EL2);
}
static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index f162b0df5cae..d81275790e69 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -296,12 +296,19 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
}
/*
- * Prevent the guest from touching the ICC_SRE_EL1 system
- * register. Note that this may not have any effect, as
- * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation.
+ * GICv5 BET0 FEAT_GCIE_LEGACY doesn't include ICC_SRE_EL2. This is due
+ * to be relaxed in a future spec release, at which point this in
+ * condition can be dropped.
*/
- write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
- ICC_SRE_EL2);
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) {
+ /*
+ * Prevent the guest from touching the ICC_SRE_EL1 system
+ * register. Note that this may not have any effect, as
+ * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation.
+ */
+ write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
+ ICC_SRE_EL2);
+ }
/*
* If we need to trap system registers, we must write
@@ -322,8 +329,14 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
}
- val = read_gicreg(ICC_SRE_EL2);
- write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+ /*
+ * Can be dropped in the future when GICv5 spec is relaxed. See comment
+ * above.
+ */
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) {
+ val = read_gicreg(ICC_SRE_EL2);
+ write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+ }
if (!cpu_if->vgic_sre) {
/* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
@@ -423,10 +436,20 @@ void __vgic_v3_init_lrs(void)
*/
u64 __vgic_v3_get_gic_config(void)
{
- u64 val, sre = read_gicreg(ICC_SRE_EL1);
+ u64 val, sre;
unsigned long flags = 0;
/*
+ * In compat mode, we cannot access ICC_SRE_EL1 at any EL
+ * other than EL1 itself; just return the
+ * ICH_VTR_EL2. ICC_IDR0_EL1 is only implemented on a GICv5
+ * system, so we first check if we have GICv5 support.
+ */
+ if (cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
+ return read_gicreg(ICH_VTR_EL2);
+
+ sre = read_gicreg(ICC_SRE_EL1);
+ /*
* To check whether we have a MMIO-based (GICv2 compatible)
* CPU interface, we need to disable the system register
* view.
@@ -471,6 +494,16 @@ u64 __vgic_v3_get_gic_config(void)
return val;
}
+static void __vgic_v3_compat_mode_enable(void)
+{
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
+ return;
+
+ sysreg_clear_set_s(SYS_ICH_VCTLR_EL2, 0, ICH_VCTLR_EL2_V3);
+ /* Wait for V3 to become enabled */
+ isb();
+}
+
static u64 __vgic_v3_read_vmcr(void)
{
return read_gicreg(ICH_VMCR_EL2);
@@ -490,6 +523,8 @@ void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
{
+ __vgic_v3_compat_mode_enable();
+
/*
* If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
* is dependent on ICC_SRE_EL1.SRE, and we have to perform the
@@ -1050,7 +1085,7 @@ static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu,
{
u64 ich_hcr;
- if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ if (!is_nested_ctxt(vcpu))
return false;
ich_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 477f1580ffea..e482181c6632 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -48,8 +48,7 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
static u64 __compute_hcr(struct kvm_vcpu *vcpu)
{
- u64 guest_hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
- u64 hcr = vcpu->arch.hcr_el2;
+ u64 guest_hcr, hcr = vcpu->arch.hcr_el2;
if (!vcpu_has_nv(vcpu))
return hcr;
@@ -68,10 +67,21 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu)
if (!vcpu_el2_e2h_is_set(vcpu))
hcr |= HCR_NV1;
+ /*
+ * Nothing in HCR_EL2 should impact running in hypervisor
+ * context, apart from bits we have defined as RESx (E2H,
+ * HCD and co), or that cannot be set directly (the EXCLUDE
+ * bits). Given that we OR the guest's view with the host's,
+ * we can use the 0 value as the starting point, and only
+ * use the config-driven RES1 bits.
+ */
+ guest_hcr = kvm_vcpu_apply_reg_masks(vcpu, HCR_EL2, 0);
+
write_sysreg_s(vcpu->arch.ctxt.vncr_array, SYS_VNCR_EL2);
} else {
host_data_clear_flag(VCPU_IN_HYP_CONTEXT);
+ guest_hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
if (guest_hcr & HCR_NV) {
u64 va = __fix_to_virt(vncr_fixmap(smp_processor_id()));
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 73e4bc7fde9e..f28c6cf4fe1b 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -77,6 +77,9 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
__vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1));
__vcpu_assign_sys_reg(vcpu, ELR_EL2, read_sysreg_el1(SYS_ELR));
__vcpu_assign_sys_reg(vcpu, SPSR_EL2, read_sysreg_el1(SYS_SPSR));
+
+ if (ctxt_has_sctlr2(&vcpu->arch.ctxt))
+ __vcpu_assign_sys_reg(vcpu, SCTLR2_EL2, read_sysreg_el1(SYS_SCTLR2));
}
static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
@@ -139,6 +142,9 @@ static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
write_sysreg(__vcpu_sys_reg(vcpu, SP_EL2), sp_el1);
write_sysreg_el1(__vcpu_sys_reg(vcpu, ELR_EL2), SYS_ELR);
write_sysreg_el1(__vcpu_sys_reg(vcpu, SPSR_EL2), SYS_SPSR);
+
+ if (ctxt_has_sctlr2(&vcpu->arch.ctxt))
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR2_EL2), SYS_SCTLR2);
}
/*
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index a640e839848e..6745f38b64f9 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -15,13 +15,11 @@
#include <asm/kvm_nested.h>
#include <asm/esr.h>
-static void pend_sync_exception(struct kvm_vcpu *vcpu)
+static unsigned int exception_target_el(struct kvm_vcpu *vcpu)
{
/* If not nesting, EL1 is the only possible exception target */
- if (likely(!vcpu_has_nv(vcpu))) {
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
- return;
- }
+ if (likely(!vcpu_has_nv(vcpu)))
+ return PSR_MODE_EL1h;
/*
* With NV, we need to pick between EL1 and EL2. Note that we
@@ -32,26 +30,76 @@ static void pend_sync_exception(struct kvm_vcpu *vcpu)
switch(*vcpu_cpsr(vcpu) & PSR_MODE_MASK) {
case PSR_MODE_EL2h:
case PSR_MODE_EL2t:
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
- break;
+ return PSR_MODE_EL2h;
case PSR_MODE_EL1h:
case PSR_MODE_EL1t:
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
- break;
+ return PSR_MODE_EL1h;
case PSR_MODE_EL0t:
- if (vcpu_el2_tge_is_set(vcpu))
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
- else
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
- break;
+ return vcpu_el2_tge_is_set(vcpu) ? PSR_MODE_EL2h : PSR_MODE_EL1h;
default:
BUG();
}
}
-static bool match_target_el(struct kvm_vcpu *vcpu, unsigned long target)
+static enum vcpu_sysreg exception_esr_elx(struct kvm_vcpu *vcpu)
+{
+ if (exception_target_el(vcpu) == PSR_MODE_EL2h)
+ return ESR_EL2;
+
+ return ESR_EL1;
+}
+
+static enum vcpu_sysreg exception_far_elx(struct kvm_vcpu *vcpu)
+{
+ if (exception_target_el(vcpu) == PSR_MODE_EL2h)
+ return FAR_EL2;
+
+ return FAR_EL1;
+}
+
+static void pend_sync_exception(struct kvm_vcpu *vcpu)
+{
+ if (exception_target_el(vcpu) == PSR_MODE_EL1h)
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+ else
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
+}
+
+static void pend_serror_exception(struct kvm_vcpu *vcpu)
{
- return (vcpu_get_flag(vcpu, EXCEPT_MASK) == target);
+ if (exception_target_el(vcpu) == PSR_MODE_EL1h)
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SERR);
+ else
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SERR);
+}
+
+static bool __effective_sctlr2_bit(struct kvm_vcpu *vcpu, unsigned int idx)
+{
+ u64 sctlr2;
+
+ if (!kvm_has_sctlr2(vcpu->kvm))
+ return false;
+
+ if (is_nested_ctxt(vcpu) &&
+ !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_SCTLR2En))
+ return false;
+
+ if (exception_target_el(vcpu) == PSR_MODE_EL1h)
+ sctlr2 = vcpu_read_sys_reg(vcpu, SCTLR2_EL1);
+ else
+ sctlr2 = vcpu_read_sys_reg(vcpu, SCTLR2_EL2);
+
+ return sctlr2 & BIT(idx);
+}
+
+static bool effective_sctlr2_ease(struct kvm_vcpu *vcpu)
+{
+ return __effective_sctlr2_bit(vcpu, SCTLR2_EL1_EASE_SHIFT);
+}
+
+static bool effective_sctlr2_nmea(struct kvm_vcpu *vcpu)
+{
+ return __effective_sctlr2_bit(vcpu, SCTLR2_EL1_NMEA_SHIFT);
}
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
@@ -60,7 +108,11 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u64 esr = 0;
- pend_sync_exception(vcpu);
+ /* This delight is brought to you by FEAT_DoubleFault2. */
+ if (effective_sctlr2_ease(vcpu))
+ pend_serror_exception(vcpu);
+ else
+ pend_sync_exception(vcpu);
/*
* Build an {i,d}abort, depending on the level and the
@@ -83,13 +135,8 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
esr |= ESR_ELx_FSC_EXTABT;
- if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC))) {
- vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
- vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
- } else {
- vcpu_write_sys_reg(vcpu, addr, FAR_EL2);
- vcpu_write_sys_reg(vcpu, esr, ESR_EL2);
- }
+ vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu));
+ vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
}
static void inject_undef64(struct kvm_vcpu *vcpu)
@@ -105,10 +152,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
if (kvm_vcpu_trap_il_is32bit(vcpu))
esr |= ESR_ELx_IL;
- if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC)))
- vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
- else
- vcpu_write_sys_reg(vcpu, esr, ESR_EL2);
+ vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
}
#define DFSR_FSC_EXTABT_LPAE 0x10
@@ -155,36 +199,35 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr)
vcpu_write_sys_reg(vcpu, far, FAR_EL1);
}
-/**
- * kvm_inject_dabt - inject a data abort into the guest
- * @vcpu: The VCPU to receive the data abort
- * @addr: The address to report in the DFAR
- *
- * It is assumed that this code is called from the VCPU thread and that the
- * VCPU therefore is not currently executing guest code.
- */
-void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
+static void __kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr)
{
if (vcpu_el1_is_32bit(vcpu))
- inject_abt32(vcpu, false, addr);
+ inject_abt32(vcpu, iabt, addr);
else
- inject_abt64(vcpu, false, addr);
+ inject_abt64(vcpu, iabt, addr);
}
-/**
- * kvm_inject_pabt - inject a prefetch abort into the guest
- * @vcpu: The VCPU to receive the prefetch abort
- * @addr: The address to report in the DFAR
- *
- * It is assumed that this code is called from the VCPU thread and that the
- * VCPU therefore is not currently executing guest code.
- */
-void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
+static bool kvm_sea_target_is_el2(struct kvm_vcpu *vcpu)
{
- if (vcpu_el1_is_32bit(vcpu))
- inject_abt32(vcpu, true, addr);
- else
- inject_abt64(vcpu, true, addr);
+ if (__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_TGE | HCR_TEA))
+ return true;
+
+ if (!vcpu_mode_priv(vcpu))
+ return false;
+
+ return (*vcpu_cpsr(vcpu) & PSR_A_BIT) &&
+ (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA);
+}
+
+int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr)
+{
+ lockdep_assert_held(&vcpu->mutex);
+
+ if (is_nested_ctxt(vcpu) && kvm_sea_target_is_el2(vcpu))
+ return kvm_inject_nested_sea(vcpu, iabt, addr);
+
+ __kvm_inject_sea(vcpu, iabt, addr);
+ return 1;
}
void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
@@ -194,10 +237,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
addr = kvm_vcpu_get_fault_ipa(vcpu);
addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
- if (kvm_vcpu_trap_is_iabt(vcpu))
- kvm_inject_pabt(vcpu, addr);
- else
- kvm_inject_dabt(vcpu, addr);
+ __kvm_inject_sea(vcpu, kvm_vcpu_trap_is_iabt(vcpu), addr);
/*
* If AArch64 or LPAE, set FSC to 0 to indicate an Address
@@ -210,9 +250,9 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
!(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))
return;
- esr = vcpu_read_sys_reg(vcpu, ESR_EL1);
+ esr = vcpu_read_sys_reg(vcpu, exception_esr_elx(vcpu));
esr &= ~GENMASK_ULL(5, 0);
- vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+ vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
}
/**
@@ -230,25 +270,70 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
inject_undef64(vcpu);
}
-void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr)
+static bool serror_is_masked(struct kvm_vcpu *vcpu)
{
- vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
- *vcpu_hcr(vcpu) |= HCR_VSE;
+ return (*vcpu_cpsr(vcpu) & PSR_A_BIT) && !effective_sctlr2_nmea(vcpu);
}
-/**
- * kvm_inject_vabt - inject an async abort / SError into the guest
- * @vcpu: The VCPU to receive the exception
- *
- * It is assumed that this code is called from the VCPU thread and that the
- * VCPU therefore is not currently executing guest code.
- *
- * Systems with the RAS Extensions specify an imp-def ESR (ISV/IDS = 1) with
- * the remaining ISS all-zeros so that this error is not interpreted as an
- * uncategorized RAS error. Without the RAS Extensions we can't specify an ESR
- * value, so the CPU generates an imp-def value.
- */
-void kvm_inject_vabt(struct kvm_vcpu *vcpu)
+static bool kvm_serror_target_is_el2(struct kvm_vcpu *vcpu)
+{
+ if (is_hyp_ctxt(vcpu) || vcpu_el2_amo_is_set(vcpu))
+ return true;
+
+ if (!(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA))
+ return false;
+
+ /*
+ * In another example where FEAT_DoubleFault2 is entirely backwards,
+ * "masked" as it relates to the routing effects of HCRX_EL2.TMEA
+ * doesn't consider SCTLR2_EL1.NMEA. That is to say, even if EL1 asked
+ * for non-maskable SErrors, the EL2 bit takes priority if A is set.
+ */
+ if (vcpu_mode_priv(vcpu))
+ return *vcpu_cpsr(vcpu) & PSR_A_BIT;
+
+ /*
+ * Otherwise SErrors are considered unmasked when taken from EL0 and
+ * NMEA is set.
+ */
+ return serror_is_masked(vcpu);
+}
+
+static bool kvm_serror_undeliverable_at_el2(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu_el2_tge_is_set(vcpu) || vcpu_el2_amo_is_set(vcpu));
+}
+
+int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr)
{
- kvm_set_sei_esr(vcpu, ESR_ELx_ISV);
+ lockdep_assert_held(&vcpu->mutex);
+
+ if (is_nested_ctxt(vcpu) && kvm_serror_target_is_el2(vcpu))
+ return kvm_inject_nested_serror(vcpu, esr);
+
+ if (vcpu_is_el2(vcpu) && kvm_serror_undeliverable_at_el2(vcpu)) {
+ vcpu_set_vsesr(vcpu, esr);
+ vcpu_set_flag(vcpu, NESTED_SERROR_PENDING);
+ return 1;
+ }
+
+ /*
+ * Emulate the exception entry if SErrors are unmasked. This is useful if
+ * the vCPU is in a nested context w/ vSErrors enabled then we've already
+ * delegated he hardware vSError context (i.e. HCR_EL2.VSE, VSESR_EL2,
+ * VDISR_EL2) to the guest hypervisor.
+ *
+ * As we're emulating the SError injection we need to explicitly populate
+ * ESR_ELx.EC because hardware will not do it on our behalf.
+ */
+ if (!serror_is_masked(vcpu)) {
+ pend_serror_exception(vcpu);
+ esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SERROR);
+ vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
+ return 1;
+ }
+
+ vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
+ *vcpu_hcr(vcpu) |= HCR_VSE;
+ return 1;
}
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index ab365e839874..54f9358c9e0e 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -72,7 +72,7 @@ unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len)
return data;
}
-static bool kvm_pending_sync_exception(struct kvm_vcpu *vcpu)
+static bool kvm_pending_external_abort(struct kvm_vcpu *vcpu)
{
if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION))
return false;
@@ -90,6 +90,8 @@ static bool kvm_pending_sync_exception(struct kvm_vcpu *vcpu)
switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC):
case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC):
+ case unpack_vcpu_flag(EXCEPT_AA64_EL1_SERR):
+ case unpack_vcpu_flag(EXCEPT_AA64_EL2_SERR):
return true;
default:
return false;
@@ -113,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
* Detect if the MMIO return was already handled or if userspace aborted
* the MMIO access.
*/
- if (unlikely(!vcpu->mmio_needed || kvm_pending_sync_exception(vcpu)))
+ if (unlikely(!vcpu->mmio_needed || kvm_pending_external_abort(vcpu)))
return 1;
vcpu->mmio_needed = 0;
@@ -169,10 +171,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
- if (vcpu_is_protected(vcpu)) {
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- return 1;
- }
+ if (vcpu_is_protected(vcpu))
+ return kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&vcpu->kvm->arch.flags)) {
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 2942ec92c5a4..1c78864767c5 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -193,11 +193,6 @@ int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
return 0;
}
-static bool kvm_is_device_pfn(unsigned long pfn)
-{
- return !pfn_is_map_memory(pfn);
-}
-
static void *stage2_memcache_zalloc_page(void *arg)
{
struct kvm_mmu_memory_cache *mc = arg;
@@ -1470,6 +1465,18 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
return vma->vm_flags & VM_MTE_ALLOWED;
}
+static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
+{
+ switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) {
+ case MT_NORMAL_NC:
+ case MT_DEVICE_nGnRnE:
+ case MT_DEVICE_nGnRE:
+ return false;
+ default:
+ return true;
+ }
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_s2_trans *nested,
struct kvm_memory_slot *memslot, unsigned long hva,
@@ -1477,8 +1484,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
{
int ret = 0;
bool write_fault, writable, force_pte = false;
- bool exec_fault, mte_allowed;
- bool device = false, vfio_allow_any_uc = false;
+ bool exec_fault, mte_allowed, is_vma_cacheable;
+ bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
unsigned long mmu_seq;
phys_addr_t ipa = fault_ipa;
struct kvm *kvm = vcpu->kvm;
@@ -1492,6 +1499,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
struct page *page;
+ vm_flags_t vm_flags;
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED;
if (fault_is_perm)
@@ -1619,6 +1627,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
+ vm_flags = vma->vm_flags;
+
+ is_vma_cacheable = kvm_vma_is_cacheable(vma);
+
/* Don't use the VMA after the unlock -- it may have vanished */
vma = NULL;
@@ -1642,18 +1654,39 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (is_error_noslot_pfn(pfn))
return -EFAULT;
- if (kvm_is_device_pfn(pfn)) {
- /*
- * If the page was identified as device early by looking at
- * the VMA flags, vma_pagesize is already representing the
- * largest quantity we can map. If instead it was mapped
- * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE
- * and must not be upgraded.
- *
- * In both cases, we don't let transparent_hugepage_adjust()
- * change things at the last minute.
- */
- device = true;
+ /*
+ * Check if this is non-struct page memory PFN, and cannot support
+ * CMOs. It could potentially be unsafe to access as cachable.
+ */
+ if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(pfn)) {
+ if (is_vma_cacheable) {
+ /*
+ * Whilst the VMA owner expects cacheable mapping to this
+ * PFN, hardware also has to support the FWB and CACHE DIC
+ * features.
+ *
+ * ARM64 KVM relies on kernel VA mapping to the PFN to
+ * perform cache maintenance as the CMO instructions work on
+ * virtual addresses. VM_PFNMAP region are not necessarily
+ * mapped to a KVA and hence the presence of hardware features
+ * S2FWB and CACHE DIC are mandatory to avoid the need for
+ * cache maintenance.
+ */
+ if (!kvm_supports_cacheable_pfnmap())
+ return -EFAULT;
+ } else {
+ /*
+ * If the page was identified as device early by looking at
+ * the VMA flags, vma_pagesize is already representing the
+ * largest quantity we can map. If instead it was mapped
+ * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE
+ * and must not be upgraded.
+ *
+ * In both cases, we don't let transparent_hugepage_adjust()
+ * change things at the last minute.
+ */
+ s2_force_noncacheable = true;
+ }
} else if (logging_active && !write_fault) {
/*
* Only actually map the page as writable if this was a write
@@ -1662,7 +1695,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
writable = false;
}
- if (exec_fault && device)
+ if (exec_fault && s2_force_noncacheable)
return -ENOEXEC;
/*
@@ -1695,7 +1728,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* If we are not forced to use page mapping, check if we are
* backed by a THP and thus use block mapping if possible.
*/
- if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
+ if (vma_pagesize == PAGE_SIZE && !(force_pte || s2_force_noncacheable)) {
if (fault_is_perm && fault_granule > PAGE_SIZE)
vma_pagesize = fault_granule;
else
@@ -1709,7 +1742,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
}
- if (!fault_is_perm && !device && kvm_has_mte(kvm)) {
+ if (!fault_is_perm && !s2_force_noncacheable && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new disallowed VMA */
if (mte_allowed) {
sanitise_mte_tags(kvm, pfn, vma_pagesize);
@@ -1725,7 +1758,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (exec_fault)
prot |= KVM_PGTABLE_PROT_X;
- if (device) {
+ if (s2_force_noncacheable) {
if (vfio_allow_any_uc)
prot |= KVM_PGTABLE_PROT_NORMAL_NC;
else
@@ -1808,7 +1841,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
* There is no need to pass the error into the guest.
*/
if (kvm_handle_guest_sea())
- kvm_inject_vabt(vcpu);
+ return kvm_inject_serror(vcpu);
return 1;
}
@@ -1836,11 +1869,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) {
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
- if (is_iabt)
- kvm_inject_pabt(vcpu, fault_ipa);
- else
- kvm_inject_dabt(vcpu, fault_ipa);
- return 1;
+ return kvm_inject_sea(vcpu, is_iabt, fault_ipa);
}
}
@@ -1912,8 +1941,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
}
if (kvm_vcpu_abt_iss1tw(vcpu)) {
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- ret = 1;
+ ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
goto out_unlock;
}
@@ -1958,10 +1986,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
if (ret == 0)
ret = 1;
out:
- if (ret == -ENOEXEC) {
- kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- ret = 1;
- }
+ if (ret == -ENOEXEC)
+ ret = kvm_inject_sea_iabt(vcpu, kvm_vcpu_get_hfar(vcpu));
out_unlock:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
@@ -2221,6 +2247,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
ret = -EINVAL;
break;
}
+
+ /*
+ * Cacheable PFNMAP is allowed only if the hardware
+ * supports it.
+ */
+ if (kvm_vma_is_cacheable(vma) && !kvm_supports_cacheable_pfnmap()) {
+ ret = -EINVAL;
+ break;
+ }
}
hva = min(reg_end, vma->vm_end);
} while (hva < reg_end);
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 77f92d79d442..efb37aad11ec 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -1424,12 +1424,11 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
break;
case SYS_ID_AA64PFR0_EL1:
- /* No RME, AMU, MPAM, S-EL2, or RAS */
+ /* No RME, AMU, MPAM, or S-EL2 */
val &= ~(ID_AA64PFR0_EL1_RME |
ID_AA64PFR0_EL1_AMU |
ID_AA64PFR0_EL1_MPAM |
ID_AA64PFR0_EL1_SEL2 |
- ID_AA64PFR0_EL1_RAS |
ID_AA64PFR0_EL1_EL3 |
ID_AA64PFR0_EL1_EL2 |
ID_AA64PFR0_EL1_EL1 |
@@ -1670,6 +1669,12 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
get_reg_fixed_bits(kvm, SCTLR_EL1, &res0, &res1);
set_sysreg_masks(kvm, SCTLR_EL1, res0, res1);
+ /* SCTLR2_ELx */
+ get_reg_fixed_bits(kvm, SCTLR2_EL1, &res0, &res1);
+ set_sysreg_masks(kvm, SCTLR2_EL1, res0, res1);
+ get_reg_fixed_bits(kvm, SCTLR2_EL2, &res0, &res1);
+ set_sysreg_masks(kvm, SCTLR2_EL2, res0, res1);
+
/* MDCR_EL2 */
get_reg_fixed_bits(kvm, MDCR_EL2, &res0, &res1);
set_sysreg_masks(kvm, MDCR_EL2, res0, res1);
@@ -1728,3 +1733,43 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu))
kvm_inject_nested_irq(vcpu);
}
+
+/*
+ * One of the many architectural bugs in FEAT_NV2 is that the guest hypervisor
+ * can write to HCR_EL2 behind our back, potentially changing the exception
+ * routing / masking for even the host context.
+ *
+ * What follows is some slop to (1) react to exception routing / masking and (2)
+ * preserve the pending SError state across translation regimes.
+ */
+void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_has_nv(vcpu))
+ return;
+
+ if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING)))
+ kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu));
+}
+
+void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu)
+{
+ unsigned long *hcr = vcpu_hcr(vcpu);
+
+ if (!vcpu_has_nv(vcpu))
+ return;
+
+ /*
+ * We previously decided that an SError was deliverable to the guest.
+ * Reap the pending state from HCR_EL2 and...
+ */
+ if (unlikely(__test_and_clear_bit(__ffs(HCR_VSE), hcr)))
+ vcpu_set_flag(vcpu, NESTED_SERROR_PENDING);
+
+ /*
+ * Re-attempt SError injection in case the deliverability has changed,
+ * which is necessary to faithfully emulate WFI the case of a pending
+ * SError being a wakeup condition.
+ */
+ if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING)))
+ kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu));
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 76c2f0da821f..93a6f87748ef 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -144,6 +144,7 @@ static bool get_el2_to_el1_mapping(unsigned int reg,
MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL );
MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL );
MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL );
+ MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL );
default:
return false;
}
@@ -1612,7 +1613,6 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac);
break;
@@ -1643,8 +1643,10 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ID_AA64MMFR2_EL1_NV;
break;
case SYS_ID_AA64MMFR3_EL1:
- val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE |
- ID_AA64MMFR3_EL1_S1PIE;
+ val &= ID_AA64MMFR3_EL1_TCRX |
+ ID_AA64MMFR3_EL1_SCTLRX |
+ ID_AA64MMFR3_EL1_S1POE |
+ ID_AA64MMFR3_EL1_S1PIE;
break;
case SYS_ID_MMFR4_EL1:
val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
@@ -1811,7 +1813,7 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, CSV3, IMP);
}
- if (kvm_vgic_global_state.type == VGIC_V3) {
+ if (vgic_is_v3(vcpu->kvm)) {
val &= ~ID_AA64PFR0_EL1_GIC_MASK;
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
}
@@ -1953,6 +1955,14 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
(vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val)))
return -EINVAL;
+ /*
+ * If we are running on a GICv5 host and support FEAT_GCIE_LEGACY, then
+ * we support GICv3. Fail attempts to do anything but set that to IMP.
+ */
+ if (vgic_is_v3_compat(vcpu->kvm) &&
+ FIELD_GET(ID_AA64PFR0_EL1_GIC_MASK, user_val) != ID_AA64PFR0_EL1_GIC_IMP)
+ return -EINVAL;
+
return set_id_reg(vcpu, rd, user_val);
}
@@ -2483,6 +2493,21 @@ static unsigned int vncr_el2_visibility(const struct kvm_vcpu *vcpu,
return REG_HIDDEN;
}
+static unsigned int sctlr2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_sctlr2(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+static unsigned int sctlr2_el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return __el2_visibility(vcpu, rd, sctlr2_visibility);
+}
+
static bool access_zcr_el2(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -2639,6 +2664,23 @@ static bool access_mdcr(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_ras(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ switch(reg_to_encoding(r)) {
+ default:
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+ }
+
+ return trap_raz_wi(vcpu, p, r);
+}
+
/*
* For historical (ahem ABI) reasons, KVM treated MIDR_EL1, REVIDR_EL1, and
* AIDR_EL1 as "invariant" registers, meaning userspace cannot change them.
@@ -2866,7 +2908,6 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64PFR0_EL1_FP)),
ID_FILTERED(ID_AA64PFR1_EL1, id_aa64pfr1_el1,
~(ID_AA64PFR1_EL1_PFAR |
- ID_AA64PFR1_EL1_DF2 |
ID_AA64PFR1_EL1_MTEX |
ID_AA64PFR1_EL1_THE |
ID_AA64PFR1_EL1_GCS |
@@ -2945,6 +2986,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR2_EL1_NV |
ID_AA64MMFR2_EL1_CCIDX)),
ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX |
+ ID_AA64MMFR3_EL1_SCTLRX |
ID_AA64MMFR3_EL1_S1PIE |
ID_AA64MMFR3_EL1_S1POE)),
ID_WRITABLE(ID_AA64MMFR4_EL1, ID_AA64MMFR4_EL1_NV_frac),
@@ -2955,6 +2997,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
+ { SYS_DESC(SYS_SCTLR2_EL1), access_vm_reg, reset_val, SCTLR2_EL1, 0,
+ .visibility = sctlr2_visibility },
MTE_REG(RGSR_EL1),
MTE_REG(GCR_EL1),
@@ -2984,14 +3028,14 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
{ SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
- { SYS_DESC(SYS_ERRIDR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERRSELR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXFR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXCTLR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXSTATUS_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXADDR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERRIDR_EL1), access_ras },
+ { SYS_DESC(SYS_ERRSELR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXFR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXCTLR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXSTATUS_EL1), access_ras },
+ { SYS_DESC(SYS_ERXADDR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXMISC0_EL1), access_ras },
+ { SYS_DESC(SYS_ERXMISC1_EL1), access_ras },
MTE_REG(TFSR_EL1),
MTE_REG(TFSRE0_EL1),
@@ -3302,6 +3346,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0),
EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
+ EL2_REG_FILTERED(SCTLR2_EL2, access_vm_reg, reset_val, 0,
+ sctlr2_el2_visibility),
EL2_REG_VNCR(HCR_EL2, reset_hcr, 0),
EL2_REG(MDCR_EL2, access_mdcr, reset_mdcr, 0),
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
@@ -3344,6 +3390,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
EL2_REG_REDIR(ESR_EL2, reset_val, 0),
+ EL2_REG_VNCR(VSESR_EL2, reset_unknown, 0),
{ SYS_DESC(SYS_FPEXC32_EL2), undef_access, reset_val, FPEXC32_EL2, 0x700 },
EL2_REG_REDIR(FAR_EL2, reset_val, 0),
@@ -3372,6 +3419,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(VBAR_EL2, access_rw, reset_val, 0),
EL2_REG(RVBAR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_RMR_EL2), undef_access },
+ EL2_REG_VNCR(VDISR_EL2, reset_unknown, 0),
EL2_REG_VNCR(ICH_AP0R0_EL2, reset_val, 0),
EL2_REG_VNCR(ICH_AP0R1_EL2, reset_val, 0),
@@ -4475,7 +4523,7 @@ static bool kvm_esr_cp10_id_to_sys64(u64 esr, struct sys_reg_params *params)
return true;
kvm_pr_unimpl("Unhandled cp10 register %s: %u\n",
- params->is_write ? "write" : "read", reg_id);
+ str_write_read(params->is_write), reg_id);
return false;
}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index ef97d9fc67cc..317abc490368 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -108,7 +108,7 @@ inline void print_sys_reg_msg(const struct sys_reg_params *p,
/* Look, we even formatted it for you to paste into the table! */
kvm_pr_unimpl("%pV { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
&(struct va_format){ fmt, &va },
- p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+ p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, str_write_read(p->is_write));
va_end(va);
}
diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h
index f85415db7713..a7ab9a3bbed0 100644
--- a/arch/arm64/kvm/trace_handle_exit.h
+++ b/arch/arm64/kvm/trace_handle_exit.h
@@ -113,7 +113,7 @@ TRACE_EVENT(kvm_sys_access,
__entry->vcpu_pc, __entry->name ?: "UNKN",
__entry->Op0, __entry->Op1, __entry->CRn,
__entry->CRm, __entry->Op2,
- __entry->is_write ? "write" : "read")
+ str_write_read(__entry->is_write))
);
TRACE_EVENT(kvm_set_guest_debug,
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index eb1205654ac8..72442c825d19 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -674,10 +674,12 @@ void kvm_vgic_init_cpu_hardware(void)
* We want to make sure the list registers start out clear so that we
* only have the program the used registers.
*/
- if (kvm_vgic_global_state.type == VGIC_V2)
+ if (kvm_vgic_global_state.type == VGIC_V2) {
vgic_v2_init_lrs();
- else
+ } else if (kvm_vgic_global_state.type == VGIC_V3 ||
+ kvm_vgic_global_state.has_gcie_v3_compat) {
kvm_call_hyp(__vgic_v3_init_lrs);
+ }
}
/**
@@ -722,6 +724,9 @@ int kvm_vgic_hyp_init(void)
kvm_info("GIC system register CPU interface enabled\n");
}
break;
+ case GIC_V5:
+ ret = vgic_v5_probe(gic_kvm_info);
+ break;
default:
ret = -ENODEV;
}
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 534049c7c94b..b34f8976c9cc 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -2694,6 +2694,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
case KVM_DEV_ARM_ITS_RESTORE_TABLES:
ret = abi->restore_tables(its);
break;
+ default:
+ ret = -ENXIO;
+ break;
}
mutex_unlock(&its->its_lock);
diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c
index a50fb7e6841f..9c3997776f50 100644
--- a/arch/arm64/kvm/vgic/vgic-v3-nested.c
+++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c
@@ -116,7 +116,7 @@ bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
{
u64 xmo;
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ if (is_nested_ctxt(vcpu)) {
xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO);
WARN_ONCE(xmo && xmo != (HCR_IMO | HCR_FMO),
"Separate virtual IRQ/FIQ settings not supported\n");
diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
new file mode 100644
index 000000000000..6bdbb221bcde
--- /dev/null
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <kvm/arm_vgic.h>
+#include <linux/irqchip/arm-vgic-info.h>
+
+#include "vgic.h"
+
+/*
+ * Probe for a vGICv5 compatible interrupt controller, returning 0 on success.
+ * Currently only supports GICv3-based VMs on a GICv5 host, and hence only
+ * registers a VGIC_V3 device.
+ */
+int vgic_v5_probe(const struct gic_kvm_info *info)
+{
+ u64 ich_vtr_el2;
+ int ret;
+
+ if (!info->has_gcie_v3_compat)
+ return -ENODEV;
+
+ kvm_vgic_global_state.type = VGIC_V5;
+ kvm_vgic_global_state.has_gcie_v3_compat = true;
+
+ /* We only support v3 compat mode - use vGICv3 limits */
+ kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
+
+ kvm_vgic_global_state.vcpu_base = 0;
+ kvm_vgic_global_state.vctrl_base = NULL;
+ kvm_vgic_global_state.can_emulate_gicv2 = false;
+ kvm_vgic_global_state.has_gicv4 = false;
+ kvm_vgic_global_state.has_gicv4_1 = false;
+
+ ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
+ kvm_vgic_global_state.ich_vtr_el2 = (u32)ich_vtr_el2;
+
+ /*
+ * The ListRegs field is 5 bits, but there is an architectural
+ * maximum of 16 list registers. Just ignore bit 4...
+ */
+ kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
+
+ ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (ret) {
+ kvm_err("Cannot register GICv3-legacy KVM device.\n");
+ return ret;
+ }
+
+ static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif);
+ kvm_info("GCIE legacy system register CPU interface\n");
+
+ return 0;
+}
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 4349084cb9a6..4f1e123b063e 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -308,6 +308,8 @@ int vgic_init(struct kvm *kvm);
void vgic_debug_init(struct kvm *kvm);
void vgic_debug_destroy(struct kvm *kvm);
+int vgic_v5_probe(const struct gic_kvm_info *info);
+
static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu;
@@ -389,6 +391,17 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu);
void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu);
+static inline bool vgic_is_v3_compat(struct kvm *kvm)
+{
+ return cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF) &&
+ kvm_vgic_global_state.has_gcie_v3_compat;
+}
+
+static inline bool vgic_is_v3(struct kvm *kvm)
+{
+ return kvm_vgic_global_state.type == VGIC_V3 || vgic_is_v3_compat(kvm);
+}
+
int vgic_its_debug_init(struct kvm_device *dev);
void vgic_its_debug_destroy(struct kvm_device *dev);
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 10effd4cff6b..115161dd9a24 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -35,7 +35,8 @@ HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH_QARMA3
HAS_GENERIC_AUTH_ARCH_QARMA5
HAS_GENERIC_AUTH_IMP_DEF
-HAS_GIC_CPUIF_SYSREGS
+HAS_GICV3_CPUIF
+HAS_GICV5_CPUIF
HAS_GIC_PRIO_MASKING
HAS_GIC_PRIO_RELAXED_SYNC
HAS_HCR_NV1
@@ -49,6 +50,7 @@ HAS_PAN
HAS_PMUV3
HAS_S1PIE
HAS_S1POE
+HAS_SCTLR2
HAS_RAS_EXTN
HAS_RNG
HAS_SB
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index c9dc6645a552..89857cb2725b 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -1314,7 +1314,10 @@ UnsignedEnum 19:16 UINJ
0b0000 NI
0b0001 IMP
EndEnum
-Res0 15:12
+UnsignedEnum 15:12 GCIE
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
UnsignedEnum 11:8 MTEFAR
0b0000 NI
0b0001 IMP
@@ -3021,6 +3024,435 @@ Sysreg PMIAR_EL1 3 0 9 14 7
Field 63:0 ADDRESS
EndSysreg
+SysregFields ICC_PPI_HMRx_EL1
+Field 63 HM63
+Field 62 HM62
+Field 61 HM61
+Field 60 HM60
+Field 59 HM59
+Field 58 HM58
+Field 57 HM57
+Field 56 HM56
+Field 55 HM55
+Field 54 HM54
+Field 53 HM53
+Field 52 HM52
+Field 51 HM51
+Field 50 HM50
+Field 49 HM49
+Field 48 HM48
+Field 47 HM47
+Field 46 HM46
+Field 45 HM45
+Field 44 HM44
+Field 43 HM43
+Field 42 HM42
+Field 41 HM41
+Field 40 HM40
+Field 39 HM39
+Field 38 HM38
+Field 37 HM37
+Field 36 HM36
+Field 35 HM35
+Field 34 HM34
+Field 33 HM33
+Field 32 HM32
+Field 31 HM31
+Field 30 HM30
+Field 29 HM29
+Field 28 HM28
+Field 27 HM27
+Field 26 HM26
+Field 25 HM25
+Field 24 HM24
+Field 23 HM23
+Field 22 HM22
+Field 21 HM21
+Field 20 HM20
+Field 19 HM19
+Field 18 HM18
+Field 17 HM17
+Field 16 HM16
+Field 15 HM15
+Field 14 HM14
+Field 13 HM13
+Field 12 HM12
+Field 11 HM11
+Field 10 HM10
+Field 9 HM9
+Field 8 HM8
+Field 7 HM7
+Field 6 HM6
+Field 5 HM5
+Field 4 HM4
+Field 3 HM3
+Field 2 HM2
+Field 1 HM1
+Field 0 HM0
+EndSysregFields
+
+Sysreg ICC_PPI_HMR0_EL1 3 0 12 10 0
+Fields ICC_PPI_HMRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_HMR1_EL1 3 0 12 10 1
+Fields ICC_PPI_HMRx_EL1
+EndSysreg
+
+Sysreg ICC_IDR0_EL1 3 0 12 10 2
+Res0 63:12
+UnsignedEnum 11:8 GCIE_LEGACY
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 7:4 PRI_BITS
+ 0b0011 4BITS
+ 0b0100 5BITS
+EndEnum
+UnsignedEnum 3:0 ID_BITS
+ 0b0000 16BITS
+ 0b0001 24BITS
+EndEnum
+EndSysreg
+
+Sysreg ICC_ICSR_EL1 3 0 12 10 4
+Res0 63:48
+Field 47:32 IAFFID
+Res0 31:16
+Field 15:11 Priority
+Res0 10:6
+Field 5 HM
+Field 4 Active
+Field 3 IRM
+Field 2 Pending
+Field 1 Enabled
+Field 0 F
+EndSysreg
+
+SysregFields ICC_PPI_ENABLERx_EL1
+Field 63 EN63
+Field 62 EN62
+Field 61 EN61
+Field 60 EN60
+Field 59 EN59
+Field 58 EN58
+Field 57 EN57
+Field 56 EN56
+Field 55 EN55
+Field 54 EN54
+Field 53 EN53
+Field 52 EN52
+Field 51 EN51
+Field 50 EN50
+Field 49 EN49
+Field 48 EN48
+Field 47 EN47
+Field 46 EN46
+Field 45 EN45
+Field 44 EN44
+Field 43 EN43
+Field 42 EN42
+Field 41 EN41
+Field 40 EN40
+Field 39 EN39
+Field 38 EN38
+Field 37 EN37
+Field 36 EN36
+Field 35 EN35
+Field 34 EN34
+Field 33 EN33
+Field 32 EN32
+Field 31 EN31
+Field 30 EN30
+Field 29 EN29
+Field 28 EN28
+Field 27 EN27
+Field 26 EN26
+Field 25 EN25
+Field 24 EN24
+Field 23 EN23
+Field 22 EN22
+Field 21 EN21
+Field 20 EN20
+Field 19 EN19
+Field 18 EN18
+Field 17 EN17
+Field 16 EN16
+Field 15 EN15
+Field 14 EN14
+Field 13 EN13
+Field 12 EN12
+Field 11 EN11
+Field 10 EN10
+Field 9 EN9
+Field 8 EN8
+Field 7 EN7
+Field 6 EN6
+Field 5 EN5
+Field 4 EN4
+Field 3 EN3
+Field 2 EN2
+Field 1 EN1
+Field 0 EN0
+EndSysregFields
+
+Sysreg ICC_PPI_ENABLER0_EL1 3 0 12 10 6
+Fields ICC_PPI_ENABLERx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_ENABLER1_EL1 3 0 12 10 7
+Fields ICC_PPI_ENABLERx_EL1
+EndSysreg
+
+SysregFields ICC_PPI_ACTIVERx_EL1
+Field 63 Active63
+Field 62 Active62
+Field 61 Active61
+Field 60 Active60
+Field 59 Active59
+Field 58 Active58
+Field 57 Active57
+Field 56 Active56
+Field 55 Active55
+Field 54 Active54
+Field 53 Active53
+Field 52 Active52
+Field 51 Active51
+Field 50 Active50
+Field 49 Active49
+Field 48 Active48
+Field 47 Active47
+Field 46 Active46
+Field 45 Active45
+Field 44 Active44
+Field 43 Active43
+Field 42 Active42
+Field 41 Active41
+Field 40 Active40
+Field 39 Active39
+Field 38 Active38
+Field 37 Active37
+Field 36 Active36
+Field 35 Active35
+Field 34 Active34
+Field 33 Active33
+Field 32 Active32
+Field 31 Active31
+Field 30 Active30
+Field 29 Active29
+Field 28 Active28
+Field 27 Active27
+Field 26 Active26
+Field 25 Active25
+Field 24 Active24
+Field 23 Active23
+Field 22 Active22
+Field 21 Active21
+Field 20 Active20
+Field 19 Active19
+Field 18 Active18
+Field 17 Active17
+Field 16 Active16
+Field 15 Active15
+Field 14 Active14
+Field 13 Active13
+Field 12 Active12
+Field 11 Active11
+Field 10 Active10
+Field 9 Active9
+Field 8 Active8
+Field 7 Active7
+Field 6 Active6
+Field 5 Active5
+Field 4 Active4
+Field 3 Active3
+Field 2 Active2
+Field 1 Active1
+Field 0 Active0
+EndSysregFields
+
+Sysreg ICC_PPI_CACTIVER0_EL1 3 0 12 13 0
+Fields ICC_PPI_ACTIVERx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_CACTIVER1_EL1 3 0 12 13 1
+Fields ICC_PPI_ACTIVERx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_SACTIVER0_EL1 3 0 12 13 2
+Fields ICC_PPI_ACTIVERx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_SACTIVER1_EL1 3 0 12 13 3
+Fields ICC_PPI_ACTIVERx_EL1
+EndSysreg
+
+SysregFields ICC_PPI_PENDRx_EL1
+Field 63 Pend63
+Field 62 Pend62
+Field 61 Pend61
+Field 60 Pend60
+Field 59 Pend59
+Field 58 Pend58
+Field 57 Pend57
+Field 56 Pend56
+Field 55 Pend55
+Field 54 Pend54
+Field 53 Pend53
+Field 52 Pend52
+Field 51 Pend51
+Field 50 Pend50
+Field 49 Pend49
+Field 48 Pend48
+Field 47 Pend47
+Field 46 Pend46
+Field 45 Pend45
+Field 44 Pend44
+Field 43 Pend43
+Field 42 Pend42
+Field 41 Pend41
+Field 40 Pend40
+Field 39 Pend39
+Field 38 Pend38
+Field 37 Pend37
+Field 36 Pend36
+Field 35 Pend35
+Field 34 Pend34
+Field 33 Pend33
+Field 32 Pend32
+Field 31 Pend31
+Field 30 Pend30
+Field 29 Pend29
+Field 28 Pend28
+Field 27 Pend27
+Field 26 Pend26
+Field 25 Pend25
+Field 24 Pend24
+Field 23 Pend23
+Field 22 Pend22
+Field 21 Pend21
+Field 20 Pend20
+Field 19 Pend19
+Field 18 Pend18
+Field 17 Pend17
+Field 16 Pend16
+Field 15 Pend15
+Field 14 Pend14
+Field 13 Pend13
+Field 12 Pend12
+Field 11 Pend11
+Field 10 Pend10
+Field 9 Pend9
+Field 8 Pend8
+Field 7 Pend7
+Field 6 Pend6
+Field 5 Pend5
+Field 4 Pend4
+Field 3 Pend3
+Field 2 Pend2
+Field 1 Pend1
+Field 0 Pend0
+EndSysregFields
+
+Sysreg ICC_PPI_CPENDR0_EL1 3 0 12 13 4
+Fields ICC_PPI_PENDRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_CPENDR1_EL1 3 0 12 13 5
+Fields ICC_PPI_PENDRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_SPENDR0_EL1 3 0 12 13 6
+Fields ICC_PPI_PENDRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_SPENDR1_EL1 3 0 12 13 7
+Fields ICC_PPI_PENDRx_EL1
+EndSysreg
+
+SysregFields ICC_PPI_PRIORITYRx_EL1
+Res0 63:61
+Field 60:56 Priority7
+Res0 55:53
+Field 52:48 Priority6
+Res0 47:45
+Field 44:40 Priority5
+Res0 39:37
+Field 36:32 Priority4
+Res0 31:29
+Field 28:24 Priority3
+Res0 23:21
+Field 20:16 Priority2
+Res0 15:13
+Field 12:8 Priority1
+Res0 7:5
+Field 4:0 Priority0
+EndSysregFields
+
+Sysreg ICC_PPI_PRIORITYR0_EL1 3 0 12 14 0
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR1_EL1 3 0 12 14 1
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR2_EL1 3 0 12 14 2
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR3_EL1 3 0 12 14 3
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR4_EL1 3 0 12 14 4
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR5_EL1 3 0 12 14 5
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR6_EL1 3 0 12 14 6
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR7_EL1 3 0 12 14 7
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR8_EL1 3 0 12 15 0
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR9_EL1 3 0 12 15 1
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR10_EL1 3 0 12 15 2
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR11_EL1 3 0 12 15 3
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR12_EL1 3 0 12 15 4
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR13_EL1 3 0 12 15 5
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR14_EL1 3 0 12 15 6
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR15_EL1 3 0 12 15 7
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
Sysreg PMSELR_EL0 3 3 9 12 5
Res0 63:5
Field 4:0 SEL
@@ -3103,6 +3535,19 @@ Res0 14:12
Field 11:0 AFFINITY
EndSysreg
+Sysreg ICC_CR0_EL1 3 1 12 0 1
+Res0 63:39
+Field 38 PID
+Field 37:32 IPPT
+Res0 31:1
+Field 0 EN
+EndSysreg
+
+Sysreg ICC_PCR_EL1 3 1 12 0 2
+Res0 63:5
+Field 4:0 PRIORITY
+EndSysreg
+
Sysreg CSSELR_EL1 3 2 0 0 0
Res0 63:5
Field 4 TnD
@@ -3989,6 +4434,54 @@ Field 31:16 PhyPARTID29
Field 15:0 PhyPARTID28
EndSysreg
+Sysreg ICH_HFGRTR_EL2 3 4 12 9 4
+Res0 63:21
+Field 20 ICC_PPI_ACTIVERn_EL1
+Field 19 ICC_PPI_PRIORITYRn_EL1
+Field 18 ICC_PPI_PENDRn_EL1
+Field 17 ICC_PPI_ENABLERn_EL1
+Field 16 ICC_PPI_HMRn_EL1
+Res0 15:8
+Field 7 ICC_IAFFIDR_EL1
+Field 6 ICC_ICSR_EL1
+Field 5 ICC_PCR_EL1
+Field 4 ICC_HPPIR_EL1
+Field 3 ICC_HAPR_EL1
+Field 2 ICC_CR0_EL1
+Field 1 ICC_IDRn_EL1
+Field 0 ICC_APR_EL1
+EndSysreg
+
+Sysreg ICH_HFGWTR_EL2 3 4 12 9 6
+Res0 63:21
+Field 20 ICC_PPI_ACTIVERn_EL1
+Field 19 ICC_PPI_PRIORITYRn_EL1
+Field 18 ICC_PPI_PENDRn_EL1
+Field 17 ICC_PPI_ENABLERn_EL1
+Res0 16:7
+Field 6 ICC_ICSR_EL1
+Field 5 ICC_PCR_EL1
+Res0 4:3
+Field 2 ICC_CR0_EL1
+Res0 1
+Field 0 ICC_APR_EL1
+EndSysreg
+
+Sysreg ICH_HFGITR_EL2 3 4 12 9 7
+Res0 63:11
+Field 10 GICRCDNMIA
+Field 9 GICRCDIA
+Field 8 GICCDDI
+Field 7 GICCDEOI
+Field 6 GICCDHM
+Field 5 GICCDRCFG
+Field 4 GICCDPEND
+Field 3 GICCDAFF
+Field 2 GICCDPRI
+Field 1 GICCDDIS
+Field 0 GICCDEN
+EndSysreg
+
Sysreg ICH_HCR_EL2 3 4 12 11 0
Res0 63:32
Field 31:27 EOIcount
@@ -4037,6 +4530,12 @@ Field 1 U
Field 0 EOI
EndSysreg
+Sysreg ICH_VCTLR_EL2 3 4 12 11 4
+Res0 63:2
+Field 1 V3
+Field 0 En
+EndSysreg
+
Sysreg CONTEXTIDR_EL2 3 4 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 0d196e447142..67d79d33407a 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -41,10 +41,14 @@ config ARM_GIC_V3
select HAVE_ARM_SMCCC_DISCOVERY
select IRQ_MSI_IOMMU
+config ARM_GIC_ITS_PARENT
+ bool
+
config ARM_GIC_V3_ITS
bool
select GENERIC_MSI_IRQ
select IRQ_MSI_LIB
+ select ARM_GIC_ITS_PARENT
default ARM_GIC_V3
select IRQ_MSI_IOMMU
@@ -54,6 +58,14 @@ config ARM_GIC_V3_ITS_FSL_MC
depends on FSL_MC_BUS
default ARM_GIC_V3_ITS
+config ARM_GIC_V5
+ bool
+ select IRQ_DOMAIN_HIERARCHY
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ select GENERIC_MSI_IRQ
+ select IRQ_MSI_LIB
+ select ARM_GIC_ITS_PARENT
+
config ARM_NVIC
bool
select IRQ_DOMAIN_HIERARCHY
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 23ca4959e6ce..e83dad932ac0 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -33,9 +33,12 @@ obj-$(CONFIG_ARCH_REALVIEW) += irq-gic-realview.o
obj-$(CONFIG_IRQ_MSI_LIB) += irq-msi-lib.o
obj-$(CONFIG_ARM_GIC_V2M) += irq-gic-v2m.o
obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-v3-mbi.o irq-gic-common.o
-obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v4.o irq-gic-v3-its-msi-parent.o
+obj-$(CONFIG_ARM_GIC_ITS_PARENT) += irq-gic-its-msi-parent.o
+obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v4.o
obj-$(CONFIG_ARM_GIC_V3_ITS_FSL_MC) += irq-gic-v3-its-fsl-mc-msi.o
obj-$(CONFIG_PARTITION_PERCPU) += irq-partition-percpu.o
+obj-$(CONFIG_ARM_GIC_V5) += irq-gic-v5.o irq-gic-v5-irs.o irq-gic-v5-its.o \
+ irq-gic-v5-iwb.o
obj-$(CONFIG_HISILICON_IRQ_MBIGEN) += irq-mbigen.o
obj-$(CONFIG_ARM_NVIC) += irq-nvic.o
obj-$(CONFIG_ARM_VIC) += irq-vic.o
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index 020ecdf16901..710cab61d919 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -29,8 +29,6 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
void gic_enable_of_quirks(const struct device_node *np,
const struct gic_quirk *quirks, void *data);
-extern const struct msi_parent_ops gic_v3_its_msi_parent_ops;
-
#define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0)
#define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1)
#define RDIST_FLAGS_FORCE_NON_SHAREABLE (1 << 2)
diff --git a/drivers/irqchip/irq-gic-v3-its-msi-parent.c b/drivers/irqchip/irq-gic-its-msi-parent.c
index a5e110ffdd88..eb1473f1448a 100644
--- a/drivers/irqchip/irq-gic-v3-its-msi-parent.c
+++ b/drivers/irqchip/irq-gic-its-msi-parent.c
@@ -5,9 +5,10 @@
// Copyright (C) 2022 Intel
#include <linux/acpi_iort.h>
+#include <linux/of_address.h>
#include <linux/pci.h>
-#include "irq-gic-common.h"
+#include "irq-gic-its-msi-parent.h"
#include <linux/irqchip/irq-msi-lib.h>
#define ITS_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \
@@ -18,6 +19,23 @@
MSI_FLAG_PCI_MSIX | \
MSI_FLAG_MULTI_PCI_MSI)
+static int its_translate_frame_address(struct device_node *msi_node, phys_addr_t *pa)
+{
+ struct resource res;
+ int ret;
+
+ ret = of_property_match_string(msi_node, "reg-names", "ns-translate");
+ if (ret < 0)
+ return ret;
+
+ ret = of_address_to_resource(msi_node, ret, &res);
+ if (ret)
+ return ret;
+
+ *pa = res.start;
+ return 0;
+}
+
#ifdef CONFIG_PCI_MSI
static int its_pci_msi_vec_count(struct pci_dev *pdev, void *data)
{
@@ -82,8 +100,46 @@ static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev,
msi_info = msi_get_domain_info(domain->parent);
return msi_info->ops->msi_prepare(domain->parent, dev, nvec, info);
}
+
+static int its_v5_pci_msi_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *info)
+{
+ struct device_node *msi_node = NULL;
+ struct msi_domain_info *msi_info;
+ struct pci_dev *pdev;
+ phys_addr_t pa;
+ u32 rid;
+ int ret;
+
+ if (!dev_is_pci(dev))
+ return -EINVAL;
+
+ pdev = to_pci_dev(dev);
+
+ rid = pci_msi_map_rid_ctlr_node(pdev, &msi_node);
+ if (!msi_node)
+ return -ENODEV;
+
+ ret = its_translate_frame_address(msi_node, &pa);
+ if (ret)
+ return -ENODEV;
+
+ of_node_put(msi_node);
+
+ /* ITS specific DeviceID */
+ info->scratchpad[0].ul = rid;
+ /* ITS translate frame physical address */
+ info->scratchpad[1].ul = pa;
+
+ /* Always allocate power of two vectors */
+ nvec = roundup_pow_of_two(nvec);
+
+ msi_info = msi_get_domain_info(domain->parent);
+ return msi_info->ops->msi_prepare(domain->parent, dev, nvec, info);
+}
#else /* CONFIG_PCI_MSI */
#define its_pci_msi_prepare NULL
+#define its_v5_pci_msi_prepare NULL
#endif /* !CONFIG_PCI_MSI */
static int of_pmsi_get_dev_id(struct irq_domain *domain, struct device *dev,
@@ -118,6 +174,53 @@ static int of_pmsi_get_dev_id(struct irq_domain *domain, struct device *dev,
return ret;
}
+static int of_v5_pmsi_get_msi_info(struct irq_domain *domain, struct device *dev,
+ u32 *dev_id, phys_addr_t *pa)
+{
+ int ret, index = 0;
+ /*
+ * Retrieve the DeviceID and the ITS translate frame node pointer
+ * out of the msi-parent property.
+ */
+ do {
+ struct of_phandle_args args;
+
+ ret = of_parse_phandle_with_args(dev->of_node,
+ "msi-parent", "#msi-cells",
+ index, &args);
+ if (ret)
+ break;
+ /*
+ * The IRQ domain fwnode is the msi controller parent
+ * in GICv5 (where the msi controller nodes are the
+ * ITS translate frames).
+ */
+ if (args.np->parent == irq_domain_get_of_node(domain)) {
+ if (WARN_ON(args.args_count != 1))
+ return -EINVAL;
+ *dev_id = args.args[0];
+
+ ret = its_translate_frame_address(args.np, pa);
+ if (ret)
+ return -ENODEV;
+ break;
+ }
+ index++;
+ } while (!ret);
+
+ if (ret) {
+ struct device_node *np = NULL;
+
+ ret = of_map_id(dev->of_node, dev->id, "msi-map", "msi-map-mask", &np, dev_id);
+ if (np) {
+ ret = its_translate_frame_address(np, pa);
+ of_node_put(np);
+ }
+ }
+
+ return ret;
+}
+
int __weak iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
{
return -1;
@@ -148,6 +251,33 @@ static int its_pmsi_prepare(struct irq_domain *domain, struct device *dev,
dev, nvec, info);
}
+static int its_v5_pmsi_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *info)
+{
+ struct msi_domain_info *msi_info;
+ phys_addr_t pa;
+ u32 dev_id;
+ int ret;
+
+ if (!dev->of_node)
+ return -ENODEV;
+
+ ret = of_v5_pmsi_get_msi_info(domain->parent, dev, &dev_id, &pa);
+ if (ret)
+ return ret;
+
+ /* ITS specific DeviceID */
+ info->scratchpad[0].ul = dev_id;
+ /* ITS translate frame physical address */
+ info->scratchpad[1].ul = pa;
+
+ /* Allocate always as a power of 2 */
+ nvec = roundup_pow_of_two(nvec);
+
+ msi_info = msi_get_domain_info(domain->parent);
+ return msi_info->ops->msi_prepare(domain->parent, dev, nvec, info);
+}
+
static void its_msi_teardown(struct irq_domain *domain, msi_alloc_info_t *info)
{
struct msi_domain_info *msi_info;
@@ -199,6 +329,32 @@ static bool its_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
return true;
}
+static bool its_v5_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+ struct irq_domain *real_parent, struct msi_domain_info *info)
+{
+ if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
+ return false;
+
+ switch (info->bus_token) {
+ case DOMAIN_BUS_PCI_DEVICE_MSI:
+ case DOMAIN_BUS_PCI_DEVICE_MSIX:
+ info->ops->msi_prepare = its_v5_pci_msi_prepare;
+ info->ops->msi_teardown = its_msi_teardown;
+ break;
+ case DOMAIN_BUS_DEVICE_MSI:
+ case DOMAIN_BUS_WIRED_TO_MSI:
+ info->ops->msi_prepare = its_v5_pmsi_prepare;
+ info->ops->msi_teardown = its_msi_teardown;
+ break;
+ default:
+ /* Confused. How did the lib return true? */
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ return true;
+}
+
const struct msi_parent_ops gic_v3_its_msi_parent_ops = {
.supported_flags = ITS_MSI_FLAGS_SUPPORTED,
.required_flags = ITS_MSI_FLAGS_REQUIRED,
@@ -208,3 +364,13 @@ const struct msi_parent_ops gic_v3_its_msi_parent_ops = {
.prefix = "ITS-",
.init_dev_msi_info = its_init_dev_msi_info,
};
+
+const struct msi_parent_ops gic_v5_its_msi_parent_ops = {
+ .supported_flags = ITS_MSI_FLAGS_SUPPORTED,
+ .required_flags = ITS_MSI_FLAGS_REQUIRED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI,
+ .bus_select_token = DOMAIN_BUS_NEXUS,
+ .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
+ .prefix = "ITS-v5-",
+ .init_dev_msi_info = its_v5_init_dev_msi_info,
+};
diff --git a/drivers/irqchip/irq-gic-its-msi-parent.h b/drivers/irqchip/irq-gic-its-msi-parent.h
new file mode 100644
index 000000000000..df016f347337
--- /dev/null
+++ b/drivers/irqchip/irq-gic-its-msi-parent.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 ARM Limited, All Rights Reserved.
+ */
+
+#ifndef _IRQ_GIC_ITS_MSI_PARENT_H
+#define _IRQ_GIC_ITS_MSI_PARENT_H
+
+extern const struct msi_parent_ops gic_v3_its_msi_parent_ops;
+extern const struct msi_parent_ops gic_v5_its_msi_parent_ops;
+
+#endif /* _IRQ_GIC_ITS_MSI_PARENT_H */
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index d54fa0638dc4..467cb78435a9 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -41,6 +41,7 @@
#include <asm/exception.h>
#include "irq-gic-common.h"
+#include "irq-gic-its-msi-parent.h"
#include <linux/irqchip/irq-msi-lib.h>
#define ITS_FLAGS_CMDQ_NEEDS_FLUSHING (1ULL << 0)
diff --git a/drivers/irqchip/irq-gic-v5-irs.c b/drivers/irqchip/irq-gic-v5-irs.c
new file mode 100644
index 000000000000..f845415f9143
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v5-irs.c
@@ -0,0 +1,822 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024-2025 ARM Limited, All Rights Reserved.
+ */
+
+#define pr_fmt(fmt) "GICv5 IRS: " fmt
+
+#include <linux/log2.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic-v5.h>
+
+/*
+ * Hardcoded ID_BITS limit for systems supporting only a 1-level IST
+ * table. Systems supporting only a 1-level IST table aren't expected
+ * to require more than 2^12 LPIs. Tweak as required.
+ */
+#define LPI_ID_BITS_LINEAR 12
+
+#define IRS_FLAGS_NON_COHERENT BIT(0)
+
+static DEFINE_PER_CPU_READ_MOSTLY(struct gicv5_irs_chip_data *, per_cpu_irs_data);
+static LIST_HEAD(irs_nodes);
+
+static u32 irs_readl_relaxed(struct gicv5_irs_chip_data *irs_data,
+ const u32 reg_offset)
+{
+ return readl_relaxed(irs_data->irs_base + reg_offset);
+}
+
+static void irs_writel_relaxed(struct gicv5_irs_chip_data *irs_data,
+ const u32 val, const u32 reg_offset)
+{
+ writel_relaxed(val, irs_data->irs_base + reg_offset);
+}
+
+static u64 irs_readq_relaxed(struct gicv5_irs_chip_data *irs_data,
+ const u32 reg_offset)
+{
+ return readq_relaxed(irs_data->irs_base + reg_offset);
+}
+
+static void irs_writeq_relaxed(struct gicv5_irs_chip_data *irs_data,
+ const u64 val, const u32 reg_offset)
+{
+ writeq_relaxed(val, irs_data->irs_base + reg_offset);
+}
+
+/*
+ * The polling wait (in gicv5_wait_for_op_s_atomic()) on a GIC register
+ * provides the memory barriers (through MMIO accessors)
+ * required to synchronize CPU and GIC access to IST memory.
+ */
+static int gicv5_irs_ist_synchronise(struct gicv5_irs_chip_data *irs_data)
+{
+ return gicv5_wait_for_op_atomic(irs_data->irs_base, GICV5_IRS_IST_STATUSR,
+ GICV5_IRS_IST_STATUSR_IDLE, NULL);
+}
+
+static int __init gicv5_irs_init_ist_linear(struct gicv5_irs_chip_data *irs_data,
+ unsigned int lpi_id_bits,
+ unsigned int istsz)
+{
+ size_t l2istsz;
+ u32 n, cfgr;
+ void *ist;
+ u64 baser;
+ int ret;
+
+ /* Taken from GICv5 specifications 10.2.1.13 IRS_IST_BASER */
+ n = max(5, lpi_id_bits + 1 + istsz);
+
+ l2istsz = BIT(n + 1);
+ /*
+ * Check memory requirements. For a linear IST we cap the
+ * number of ID bits to a value that should never exceed
+ * kmalloc interface memory allocation limits, so this
+ * check is really belt and braces.
+ */
+ if (l2istsz > KMALLOC_MAX_SIZE) {
+ u8 lpi_id_cap = ilog2(KMALLOC_MAX_SIZE) - 2 + istsz;
+
+ pr_warn("Limiting LPI ID bits from %u to %u\n",
+ lpi_id_bits, lpi_id_cap);
+ lpi_id_bits = lpi_id_cap;
+ l2istsz = KMALLOC_MAX_SIZE;
+ }
+
+ ist = kzalloc(l2istsz, GFP_KERNEL);
+ if (!ist)
+ return -ENOMEM;
+
+ if (irs_data->flags & IRS_FLAGS_NON_COHERENT)
+ dcache_clean_inval_poc((unsigned long)ist,
+ (unsigned long)ist + l2istsz);
+ else
+ dsb(ishst);
+
+ cfgr = FIELD_PREP(GICV5_IRS_IST_CFGR_STRUCTURE,
+ GICV5_IRS_IST_CFGR_STRUCTURE_LINEAR) |
+ FIELD_PREP(GICV5_IRS_IST_CFGR_ISTSZ, istsz) |
+ FIELD_PREP(GICV5_IRS_IST_CFGR_L2SZ,
+ GICV5_IRS_IST_CFGR_L2SZ_4K) |
+ FIELD_PREP(GICV5_IRS_IST_CFGR_LPI_ID_BITS, lpi_id_bits);
+ irs_writel_relaxed(irs_data, cfgr, GICV5_IRS_IST_CFGR);
+
+ gicv5_global_data.ist.l2 = false;
+
+ baser = (virt_to_phys(ist) & GICV5_IRS_IST_BASER_ADDR_MASK) |
+ FIELD_PREP(GICV5_IRS_IST_BASER_VALID, 0x1);
+ irs_writeq_relaxed(irs_data, baser, GICV5_IRS_IST_BASER);
+
+ ret = gicv5_irs_ist_synchronise(irs_data);
+ if (ret) {
+ kfree(ist);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int __init gicv5_irs_init_ist_two_level(struct gicv5_irs_chip_data *irs_data,
+ unsigned int lpi_id_bits,
+ unsigned int istsz,
+ unsigned int l2sz)
+{
+ __le64 *l1ist;
+ u32 cfgr, n;
+ size_t l1sz;
+ u64 baser;
+ int ret;
+
+ /* Taken from GICv5 specifications 10.2.1.13 IRS_IST_BASER */
+ n = max(5, lpi_id_bits - ((10 - istsz) + (2 * l2sz)) + 2);
+
+ l1sz = BIT(n + 1);
+
+ l1ist = kzalloc(l1sz, GFP_KERNEL);
+ if (!l1ist)
+ return -ENOMEM;
+
+ if (irs_data->flags & IRS_FLAGS_NON_COHERENT)
+ dcache_clean_inval_poc((unsigned long)l1ist,
+ (unsigned long)l1ist + l1sz);
+ else
+ dsb(ishst);
+
+ cfgr = FIELD_PREP(GICV5_IRS_IST_CFGR_STRUCTURE,
+ GICV5_IRS_IST_CFGR_STRUCTURE_TWO_LEVEL) |
+ FIELD_PREP(GICV5_IRS_IST_CFGR_ISTSZ, istsz) |
+ FIELD_PREP(GICV5_IRS_IST_CFGR_L2SZ, l2sz) |
+ FIELD_PREP(GICV5_IRS_IST_CFGR_LPI_ID_BITS, lpi_id_bits);
+ irs_writel_relaxed(irs_data, cfgr, GICV5_IRS_IST_CFGR);
+
+ /*
+ * The L2SZ determine bits required at L2 level. Number of bytes
+ * required by metadata is reported through istsz - the number of bits
+ * covered by L2 entries scales accordingly.
+ */
+ gicv5_global_data.ist.l2_size = BIT(11 + (2 * l2sz) + 1);
+ gicv5_global_data.ist.l2_bits = (10 - istsz) + (2 * l2sz);
+ gicv5_global_data.ist.l1ist_addr = l1ist;
+ gicv5_global_data.ist.l2 = true;
+
+ baser = (virt_to_phys(l1ist) & GICV5_IRS_IST_BASER_ADDR_MASK) |
+ FIELD_PREP(GICV5_IRS_IST_BASER_VALID, 0x1);
+ irs_writeq_relaxed(irs_data, baser, GICV5_IRS_IST_BASER);
+
+ ret = gicv5_irs_ist_synchronise(irs_data);
+ if (ret) {
+ kfree(l1ist);
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Alloc L2 IST entries on demand.
+ *
+ * Locking/serialization is guaranteed by irqdomain core code by
+ * taking the hierarchical domain struct irq_domain.root->mutex.
+ */
+int gicv5_irs_iste_alloc(const u32 lpi)
+{
+ struct gicv5_irs_chip_data *irs_data;
+ unsigned int index;
+ u32 l2istr, l2bits;
+ __le64 *l1ist;
+ size_t l2size;
+ void *l2ist;
+ int ret;
+
+ if (!gicv5_global_data.ist.l2)
+ return 0;
+
+ irs_data = per_cpu(per_cpu_irs_data, smp_processor_id());
+ if (!irs_data)
+ return -ENOENT;
+
+ l2size = gicv5_global_data.ist.l2_size;
+ l2bits = gicv5_global_data.ist.l2_bits;
+ l1ist = gicv5_global_data.ist.l1ist_addr;
+ index = lpi >> l2bits;
+
+ if (FIELD_GET(GICV5_ISTL1E_VALID, le64_to_cpu(l1ist[index])))
+ return 0;
+
+ l2ist = kzalloc(l2size, GFP_KERNEL);
+ if (!l2ist)
+ return -ENOMEM;
+
+ l1ist[index] = cpu_to_le64(virt_to_phys(l2ist) & GICV5_ISTL1E_L2_ADDR_MASK);
+
+ if (irs_data->flags & IRS_FLAGS_NON_COHERENT) {
+ dcache_clean_inval_poc((unsigned long)l2ist,
+ (unsigned long)l2ist + l2size);
+ dcache_clean_poc((unsigned long)(l1ist + index),
+ (unsigned long)(l1ist + index) + sizeof(*l1ist));
+ } else {
+ dsb(ishst);
+ }
+
+ l2istr = FIELD_PREP(GICV5_IRS_MAP_L2_ISTR_ID, lpi);
+ irs_writel_relaxed(irs_data, l2istr, GICV5_IRS_MAP_L2_ISTR);
+
+ ret = gicv5_irs_ist_synchronise(irs_data);
+ if (ret) {
+ l1ist[index] = 0;
+ kfree(l2ist);
+ return ret;
+ }
+
+ /*
+ * Make sure we invalidate the cache line pulled before the IRS
+ * had a chance to update the L1 entry and mark it valid.
+ */
+ if (irs_data->flags & IRS_FLAGS_NON_COHERENT) {
+ /*
+ * gicv5_irs_ist_synchronise() includes memory
+ * barriers (MMIO accessors) required to guarantee that the
+ * following dcache invalidation is not executed before the
+ * IST mapping operation has completed.
+ */
+ dcache_inval_poc((unsigned long)(l1ist + index),
+ (unsigned long)(l1ist + index) + sizeof(*l1ist));
+ }
+
+ return 0;
+}
+
+/*
+ * Try to match the L2 IST size to the pagesize, and if this is not possible
+ * pick the smallest supported L2 size in order to minimise the requirement for
+ * physically contiguous blocks of memory as page-sized allocations are
+ * guaranteed to be physically contiguous, and are by definition the easiest to
+ * find.
+ *
+ * Fall back to the smallest supported size (in the event that the pagesize
+ * itself is not supported) again serves to make it easier to find physically
+ * contiguous blocks of memory.
+ */
+static unsigned int gicv5_irs_l2_sz(u32 idr2)
+{
+ switch (PAGE_SIZE) {
+ case SZ_64K:
+ if (GICV5_IRS_IST_L2SZ_SUPPORT_64KB(idr2))
+ return GICV5_IRS_IST_CFGR_L2SZ_64K;
+ fallthrough;
+ case SZ_4K:
+ if (GICV5_IRS_IST_L2SZ_SUPPORT_4KB(idr2))
+ return GICV5_IRS_IST_CFGR_L2SZ_4K;
+ fallthrough;
+ case SZ_16K:
+ if (GICV5_IRS_IST_L2SZ_SUPPORT_16KB(idr2))
+ return GICV5_IRS_IST_CFGR_L2SZ_16K;
+ break;
+ }
+
+ if (GICV5_IRS_IST_L2SZ_SUPPORT_4KB(idr2))
+ return GICV5_IRS_IST_CFGR_L2SZ_4K;
+
+ return GICV5_IRS_IST_CFGR_L2SZ_64K;
+}
+
+static int __init gicv5_irs_init_ist(struct gicv5_irs_chip_data *irs_data)
+{
+ u32 lpi_id_bits, idr2_id_bits, idr2_min_lpi_id_bits, l2_iste_sz, l2sz;
+ u32 l2_iste_sz_split, idr2;
+ bool two_levels, istmd;
+ u64 baser;
+ int ret;
+
+ baser = irs_readq_relaxed(irs_data, GICV5_IRS_IST_BASER);
+ if (FIELD_GET(GICV5_IRS_IST_BASER_VALID, baser)) {
+ pr_err("IST is marked as valid already; cannot allocate\n");
+ return -EPERM;
+ }
+
+ idr2 = irs_readl_relaxed(irs_data, GICV5_IRS_IDR2);
+ two_levels = !!FIELD_GET(GICV5_IRS_IDR2_IST_LEVELS, idr2);
+
+ idr2_id_bits = FIELD_GET(GICV5_IRS_IDR2_ID_BITS, idr2);
+ idr2_min_lpi_id_bits = FIELD_GET(GICV5_IRS_IDR2_MIN_LPI_ID_BITS, idr2);
+
+ /*
+ * For two level tables we are always supporting the maximum allowed
+ * number of IDs.
+ *
+ * For 1-level tables, we should support a number of bits that
+ * is >= min_lpi_id_bits but cap it to LPI_ID_BITS_LINEAR lest
+ * the level 1-table gets too large and its memory allocation
+ * may fail.
+ */
+ if (two_levels) {
+ lpi_id_bits = idr2_id_bits;
+ } else {
+ lpi_id_bits = max(LPI_ID_BITS_LINEAR, idr2_min_lpi_id_bits);
+ lpi_id_bits = min(lpi_id_bits, idr2_id_bits);
+ }
+
+ /*
+ * Cap the ID bits according to the CPUIF supported ID bits
+ */
+ lpi_id_bits = min(lpi_id_bits, gicv5_global_data.cpuif_id_bits);
+
+ if (two_levels)
+ l2sz = gicv5_irs_l2_sz(idr2);
+
+ istmd = !!FIELD_GET(GICV5_IRS_IDR2_ISTMD, idr2);
+
+ l2_iste_sz = GICV5_IRS_IST_CFGR_ISTSZ_4;
+
+ if (istmd) {
+ l2_iste_sz_split = FIELD_GET(GICV5_IRS_IDR2_ISTMD_SZ, idr2);
+
+ if (lpi_id_bits < l2_iste_sz_split)
+ l2_iste_sz = GICV5_IRS_IST_CFGR_ISTSZ_8;
+ else
+ l2_iste_sz = GICV5_IRS_IST_CFGR_ISTSZ_16;
+ }
+
+ /*
+ * Follow GICv5 specification recommendation to opt in for two
+ * level tables (ref: 10.2.1.14 IRS_IST_CFGR).
+ */
+ if (two_levels && (lpi_id_bits > ((10 - l2_iste_sz) + (2 * l2sz)))) {
+ ret = gicv5_irs_init_ist_two_level(irs_data, lpi_id_bits,
+ l2_iste_sz, l2sz);
+ } else {
+ ret = gicv5_irs_init_ist_linear(irs_data, lpi_id_bits,
+ l2_iste_sz);
+ }
+ if (ret)
+ return ret;
+
+ gicv5_init_lpis(BIT(lpi_id_bits));
+
+ return 0;
+}
+
+struct iaffid_entry {
+ u16 iaffid;
+ bool valid;
+};
+
+static DEFINE_PER_CPU(struct iaffid_entry, cpu_iaffid);
+
+int gicv5_irs_cpu_to_iaffid(int cpuid, u16 *iaffid)
+{
+ if (!per_cpu(cpu_iaffid, cpuid).valid) {
+ pr_err("IAFFID for CPU %d has not been initialised\n", cpuid);
+ return -ENODEV;
+ }
+
+ *iaffid = per_cpu(cpu_iaffid, cpuid).iaffid;
+
+ return 0;
+}
+
+struct gicv5_irs_chip_data *gicv5_irs_lookup_by_spi_id(u32 spi_id)
+{
+ struct gicv5_irs_chip_data *irs_data;
+ u32 min, max;
+
+ list_for_each_entry(irs_data, &irs_nodes, entry) {
+ if (!irs_data->spi_range)
+ continue;
+
+ min = irs_data->spi_min;
+ max = irs_data->spi_min + irs_data->spi_range - 1;
+ if (spi_id >= min && spi_id <= max)
+ return irs_data;
+ }
+
+ return NULL;
+}
+
+static int gicv5_irs_wait_for_spi_op(struct gicv5_irs_chip_data *irs_data)
+{
+ u32 statusr;
+ int ret;
+
+ ret = gicv5_wait_for_op_atomic(irs_data->irs_base, GICV5_IRS_SPI_STATUSR,
+ GICV5_IRS_SPI_STATUSR_IDLE, &statusr);
+ if (ret)
+ return ret;
+
+ return !!FIELD_GET(GICV5_IRS_SPI_STATUSR_V, statusr) ? 0 : -EIO;
+}
+
+static int gicv5_irs_wait_for_irs_pe(struct gicv5_irs_chip_data *irs_data,
+ bool selr)
+{
+ bool valid = true;
+ u32 statusr;
+ int ret;
+
+ ret = gicv5_wait_for_op_atomic(irs_data->irs_base, GICV5_IRS_PE_STATUSR,
+ GICV5_IRS_PE_STATUSR_IDLE, &statusr);
+ if (ret)
+ return ret;
+
+ if (selr)
+ valid = !!FIELD_GET(GICV5_IRS_PE_STATUSR_V, statusr);
+
+ return valid ? 0 : -EIO;
+}
+
+static int gicv5_irs_wait_for_pe_selr(struct gicv5_irs_chip_data *irs_data)
+{
+ return gicv5_irs_wait_for_irs_pe(irs_data, true);
+}
+
+static int gicv5_irs_wait_for_pe_cr0(struct gicv5_irs_chip_data *irs_data)
+{
+ return gicv5_irs_wait_for_irs_pe(irs_data, false);
+}
+
+int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type)
+{
+ struct gicv5_irs_chip_data *irs_data = d->chip_data;
+ u32 selr, cfgr;
+ bool level;
+ int ret;
+
+ /*
+ * There is no distinction between HIGH/LOW for level IRQs
+ * and RISING/FALLING for edge IRQs in the architecture,
+ * hence consider them equivalent.
+ */
+ switch (type) {
+ case IRQ_TYPE_EDGE_RISING:
+ case IRQ_TYPE_EDGE_FALLING:
+ level = false;
+ break;
+ case IRQ_TYPE_LEVEL_HIGH:
+ case IRQ_TYPE_LEVEL_LOW:
+ level = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ guard(raw_spinlock)(&irs_data->spi_config_lock);
+
+ selr = FIELD_PREP(GICV5_IRS_SPI_SELR_ID, d->hwirq);
+ irs_writel_relaxed(irs_data, selr, GICV5_IRS_SPI_SELR);
+ ret = gicv5_irs_wait_for_spi_op(irs_data);
+ if (ret)
+ return ret;
+
+ cfgr = FIELD_PREP(GICV5_IRS_SPI_CFGR_TM, level);
+ irs_writel_relaxed(irs_data, cfgr, GICV5_IRS_SPI_CFGR);
+
+ return gicv5_irs_wait_for_spi_op(irs_data);
+}
+
+static int gicv5_irs_wait_for_idle(struct gicv5_irs_chip_data *irs_data)
+{
+ return gicv5_wait_for_op_atomic(irs_data->irs_base, GICV5_IRS_CR0,
+ GICV5_IRS_CR0_IDLE, NULL);
+}
+
+void gicv5_irs_syncr(void)
+{
+ struct gicv5_irs_chip_data *irs_data;
+ u32 syncr;
+
+ irs_data = list_first_entry_or_null(&irs_nodes, struct gicv5_irs_chip_data, entry);
+ if (WARN_ON_ONCE(!irs_data))
+ return;
+
+ syncr = FIELD_PREP(GICV5_IRS_SYNCR_SYNC, 1);
+ irs_writel_relaxed(irs_data, syncr, GICV5_IRS_SYNCR);
+
+ gicv5_wait_for_op(irs_data->irs_base, GICV5_IRS_SYNC_STATUSR,
+ GICV5_IRS_SYNC_STATUSR_IDLE);
+}
+
+int gicv5_irs_register_cpu(int cpuid)
+{
+ struct gicv5_irs_chip_data *irs_data;
+ u32 selr, cr0;
+ u16 iaffid;
+ int ret;
+
+ ret = gicv5_irs_cpu_to_iaffid(cpuid, &iaffid);
+ if (ret) {
+ pr_err("IAFFID for CPU %d has not been initialised\n", cpuid);
+ return ret;
+ }
+
+ irs_data = per_cpu(per_cpu_irs_data, cpuid);
+ if (!irs_data) {
+ pr_err("No IRS associated with CPU %u\n", cpuid);
+ return -ENXIO;
+ }
+
+ selr = FIELD_PREP(GICV5_IRS_PE_SELR_IAFFID, iaffid);
+ irs_writel_relaxed(irs_data, selr, GICV5_IRS_PE_SELR);
+
+ ret = gicv5_irs_wait_for_pe_selr(irs_data);
+ if (ret) {
+ pr_err("IAFFID 0x%x used in IRS_PE_SELR is invalid\n", iaffid);
+ return -ENXIO;
+ }
+
+ cr0 = FIELD_PREP(GICV5_IRS_PE_CR0_DPS, 0x1);
+ irs_writel_relaxed(irs_data, cr0, GICV5_IRS_PE_CR0);
+
+ ret = gicv5_irs_wait_for_pe_cr0(irs_data);
+ if (ret)
+ return ret;
+
+ pr_debug("CPU %d enabled PE IAFFID 0x%x\n", cpuid, iaffid);
+
+ return 0;
+}
+
+static void __init gicv5_irs_init_bases(struct gicv5_irs_chip_data *irs_data,
+ void __iomem *irs_base,
+ struct fwnode_handle *handle)
+{
+ struct device_node *np = to_of_node(handle);
+ u32 cr0, cr1;
+
+ irs_data->fwnode = handle;
+ irs_data->irs_base = irs_base;
+
+ if (of_property_read_bool(np, "dma-noncoherent")) {
+ /*
+ * A non-coherent IRS implies that some cache levels cannot be
+ * used coherently by the cores and GIC. Our only option is to mark
+ * memory attributes for the GIC as non-cacheable; by default,
+ * non-cacheable memory attributes imply outer-shareable
+ * shareability, the value written into IRS_CR1_SH is ignored.
+ */
+ cr1 = FIELD_PREP(GICV5_IRS_CR1_VPED_WA, GICV5_NO_WRITE_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_VPED_RA, GICV5_NO_READ_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_VMD_WA, GICV5_NO_WRITE_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_VMD_RA, GICV5_NO_READ_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_VPET_RA, GICV5_NO_READ_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_VMT_RA, GICV5_NO_READ_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_IST_WA, GICV5_NO_WRITE_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_IST_RA, GICV5_NO_READ_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_IC, GICV5_NON_CACHE) |
+ FIELD_PREP(GICV5_IRS_CR1_OC, GICV5_NON_CACHE);
+ irs_data->flags |= IRS_FLAGS_NON_COHERENT;
+ } else {
+ cr1 = FIELD_PREP(GICV5_IRS_CR1_VPED_WA, GICV5_WRITE_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_VPED_RA, GICV5_READ_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_VMD_WA, GICV5_WRITE_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_VMD_RA, GICV5_READ_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_VPET_RA, GICV5_READ_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_VMT_RA, GICV5_READ_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_IST_WA, GICV5_WRITE_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_IST_RA, GICV5_READ_ALLOC) |
+ FIELD_PREP(GICV5_IRS_CR1_IC, GICV5_WB_CACHE) |
+ FIELD_PREP(GICV5_IRS_CR1_OC, GICV5_WB_CACHE) |
+ FIELD_PREP(GICV5_IRS_CR1_SH, GICV5_INNER_SHARE);
+ }
+
+ irs_writel_relaxed(irs_data, cr1, GICV5_IRS_CR1);
+
+ cr0 = FIELD_PREP(GICV5_IRS_CR0_IRSEN, 0x1);
+ irs_writel_relaxed(irs_data, cr0, GICV5_IRS_CR0);
+ gicv5_irs_wait_for_idle(irs_data);
+}
+
+static int __init gicv5_irs_of_init_affinity(struct device_node *node,
+ struct gicv5_irs_chip_data *irs_data,
+ u8 iaffid_bits)
+{
+ /*
+ * Detect IAFFID<->CPU mappings from the device tree and
+ * record IRS<->CPU topology information.
+ */
+ u16 iaffid_mask = GENMASK(iaffid_bits - 1, 0);
+ int ret, i, ncpus, niaffids;
+
+ ncpus = of_count_phandle_with_args(node, "cpus", NULL);
+ if (ncpus < 0)
+ return -EINVAL;
+
+ niaffids = of_property_count_elems_of_size(node, "arm,iaffids",
+ sizeof(u16));
+ if (niaffids != ncpus)
+ return -EINVAL;
+
+ u16 *iaffids __free(kfree) = kcalloc(niaffids, sizeof(*iaffids), GFP_KERNEL);
+ if (!iaffids)
+ return -ENOMEM;
+
+ ret = of_property_read_u16_array(node, "arm,iaffids", iaffids, niaffids);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ncpus; i++) {
+ struct device_node *cpu_node;
+ int cpu;
+
+ cpu_node = of_parse_phandle(node, "cpus", i);
+ if (WARN_ON(!cpu_node))
+ continue;
+
+ cpu = of_cpu_node_to_id(cpu_node);
+ of_node_put(cpu_node);
+ if (WARN_ON(cpu < 0))
+ continue;
+
+ if (iaffids[i] & ~iaffid_mask) {
+ pr_warn("CPU %d iaffid 0x%x exceeds IRS iaffid bits\n",
+ cpu, iaffids[i]);
+ continue;
+ }
+
+ per_cpu(cpu_iaffid, cpu).iaffid = iaffids[i];
+ per_cpu(cpu_iaffid, cpu).valid = true;
+
+ /* We also know that the CPU is connected to this IRS */
+ per_cpu(per_cpu_irs_data, cpu) = irs_data;
+ }
+
+ return ret;
+}
+
+static void irs_setup_pri_bits(u32 idr1)
+{
+ switch (FIELD_GET(GICV5_IRS_IDR1_PRIORITY_BITS, idr1)) {
+ case GICV5_IRS_IDR1_PRIORITY_BITS_1BITS:
+ gicv5_global_data.irs_pri_bits = 1;
+ break;
+ case GICV5_IRS_IDR1_PRIORITY_BITS_2BITS:
+ gicv5_global_data.irs_pri_bits = 2;
+ break;
+ case GICV5_IRS_IDR1_PRIORITY_BITS_3BITS:
+ gicv5_global_data.irs_pri_bits = 3;
+ break;
+ case GICV5_IRS_IDR1_PRIORITY_BITS_4BITS:
+ gicv5_global_data.irs_pri_bits = 4;
+ break;
+ case GICV5_IRS_IDR1_PRIORITY_BITS_5BITS:
+ gicv5_global_data.irs_pri_bits = 5;
+ break;
+ default:
+ pr_warn("Detected wrong IDR priority bits value 0x%lx\n",
+ FIELD_GET(GICV5_IRS_IDR1_PRIORITY_BITS, idr1));
+ gicv5_global_data.irs_pri_bits = 1;
+ break;
+ }
+}
+
+static int __init gicv5_irs_init(struct device_node *node)
+{
+ struct gicv5_irs_chip_data *irs_data;
+ void __iomem *irs_base;
+ u32 idr, spi_count;
+ u8 iaffid_bits;
+ int ret;
+
+ irs_data = kzalloc(sizeof(*irs_data), GFP_KERNEL);
+ if (!irs_data)
+ return -ENOMEM;
+
+ raw_spin_lock_init(&irs_data->spi_config_lock);
+
+ ret = of_property_match_string(node, "reg-names", "ns-config");
+ if (ret < 0) {
+ pr_err("%pOF: ns-config reg-name not present\n", node);
+ goto out_err;
+ }
+
+ irs_base = of_io_request_and_map(node, ret, of_node_full_name(node));
+ if (IS_ERR(irs_base)) {
+ pr_err("%pOF: unable to map GICv5 IRS registers\n", node);
+ ret = PTR_ERR(irs_base);
+ goto out_err;
+ }
+
+ gicv5_irs_init_bases(irs_data, irs_base, &node->fwnode);
+
+ idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR1);
+ iaffid_bits = FIELD_GET(GICV5_IRS_IDR1_IAFFID_BITS, idr) + 1;
+
+ ret = gicv5_irs_of_init_affinity(node, irs_data, iaffid_bits);
+ if (ret) {
+ pr_err("Failed to parse CPU IAFFIDs from the device tree!\n");
+ goto out_iomem;
+ }
+
+ idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR2);
+ if (WARN(!FIELD_GET(GICV5_IRS_IDR2_LPI, idr),
+ "LPI support not available - no IPIs, can't proceed\n")) {
+ ret = -ENODEV;
+ goto out_iomem;
+ }
+
+ idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR7);
+ irs_data->spi_min = FIELD_GET(GICV5_IRS_IDR7_SPI_BASE, idr);
+
+ idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR6);
+ irs_data->spi_range = FIELD_GET(GICV5_IRS_IDR6_SPI_IRS_RANGE, idr);
+
+ if (irs_data->spi_range) {
+ pr_info("%s detected SPI range [%u-%u]\n",
+ of_node_full_name(node),
+ irs_data->spi_min,
+ irs_data->spi_min +
+ irs_data->spi_range - 1);
+ }
+
+ /*
+ * Do the global setting only on the first IRS.
+ * Global properties (iaffid_bits, global spi count) are guaranteed to
+ * be consistent across IRSes by the architecture.
+ */
+ if (list_empty(&irs_nodes)) {
+
+ idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR1);
+ irs_setup_pri_bits(idr);
+
+ idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR5);
+
+ spi_count = FIELD_GET(GICV5_IRS_IDR5_SPI_RANGE, idr);
+ gicv5_global_data.global_spi_count = spi_count;
+
+ gicv5_init_lpi_domain();
+
+ pr_debug("Detected %u SPIs globally\n", spi_count);
+ }
+
+ list_add_tail(&irs_data->entry, &irs_nodes);
+
+ return 0;
+
+out_iomem:
+ iounmap(irs_base);
+out_err:
+ kfree(irs_data);
+ return ret;
+}
+
+void __init gicv5_irs_remove(void)
+{
+ struct gicv5_irs_chip_data *irs_data, *tmp_data;
+
+ gicv5_free_lpi_domain();
+ gicv5_deinit_lpis();
+
+ list_for_each_entry_safe(irs_data, tmp_data, &irs_nodes, entry) {
+ iounmap(irs_data->irs_base);
+ list_del(&irs_data->entry);
+ kfree(irs_data);
+ }
+}
+
+int __init gicv5_irs_enable(void)
+{
+ struct gicv5_irs_chip_data *irs_data;
+ int ret;
+
+ irs_data = list_first_entry_or_null(&irs_nodes,
+ struct gicv5_irs_chip_data, entry);
+ if (!irs_data)
+ return -ENODEV;
+
+ ret = gicv5_irs_init_ist(irs_data);
+ if (ret) {
+ pr_err("Failed to init IST\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+void __init gicv5_irs_its_probe(void)
+{
+ struct gicv5_irs_chip_data *irs_data;
+
+ list_for_each_entry(irs_data, &irs_nodes, entry)
+ gicv5_its_of_probe(to_of_node(irs_data->fwnode));
+}
+
+int __init gicv5_irs_of_probe(struct device_node *parent)
+{
+ struct device_node *np;
+ int ret;
+
+ for_each_available_child_of_node(parent, np) {
+ if (!of_device_is_compatible(np, "arm,gic-v5-irs"))
+ continue;
+
+ ret = gicv5_irs_init(np);
+ if (ret)
+ pr_err("Failed to init IRS %s\n", np->full_name);
+ }
+
+ return list_empty(&irs_nodes) ? -ENODEV : 0;
+}
diff --git a/drivers/irqchip/irq-gic-v5-its.c b/drivers/irqchip/irq-gic-v5-its.c
new file mode 100644
index 000000000000..340640fdbdf6
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v5-its.c
@@ -0,0 +1,1228 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024-2025 ARM Limited, All Rights Reserved.
+ */
+
+#define pr_fmt(fmt) "GICv5 ITS: " fmt
+
+#include <linux/bitmap.h>
+#include <linux/iommu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+
+#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic-v5.h>
+#include <linux/irqchip/irq-msi-lib.h>
+
+#include "irq-gic-its-msi-parent.h"
+
+#define ITS_FLAGS_NON_COHERENT BIT(0)
+
+struct gicv5_its_chip_data {
+ struct xarray its_devices;
+ struct mutex dev_alloc_lock;
+ struct fwnode_handle *fwnode;
+ struct gicv5_its_devtab_cfg devtab_cfgr;
+ void __iomem *its_base;
+ u32 flags;
+ unsigned int msi_domain_flags;
+};
+
+struct gicv5_its_dev {
+ struct gicv5_its_chip_data *its_node;
+ struct gicv5_its_itt_cfg itt_cfg;
+ unsigned long *event_map;
+ u32 device_id;
+ u32 num_events;
+ phys_addr_t its_trans_phys_base;
+};
+
+static u32 its_readl_relaxed(struct gicv5_its_chip_data *its_node, const u32 reg_offset)
+{
+ return readl_relaxed(its_node->its_base + reg_offset);
+}
+
+static void its_writel_relaxed(struct gicv5_its_chip_data *its_node, const u32 val,
+ const u32 reg_offset)
+{
+ writel_relaxed(val, its_node->its_base + reg_offset);
+}
+
+static void its_writeq_relaxed(struct gicv5_its_chip_data *its_node, const u64 val,
+ const u32 reg_offset)
+{
+ writeq_relaxed(val, its_node->its_base + reg_offset);
+}
+
+static void gicv5_its_dcache_clean(struct gicv5_its_chip_data *its, void *start,
+ size_t sz)
+{
+ void *end = start + sz;
+
+ if (its->flags & ITS_FLAGS_NON_COHERENT)
+ dcache_clean_inval_poc((unsigned long)start, (unsigned long)end);
+ else
+ dsb(ishst);
+}
+
+static void its_write_table_entry(struct gicv5_its_chip_data *its, __le64 *entry,
+ u64 val)
+{
+ WRITE_ONCE(*entry, cpu_to_le64(val));
+ gicv5_its_dcache_clean(its, entry, sizeof(*entry));
+}
+
+#define devtab_cfgr_field(its, f) \
+ FIELD_GET(GICV5_ITS_DT_CFGR_##f, (its)->devtab_cfgr.cfgr)
+
+static int gicv5_its_cache_sync(struct gicv5_its_chip_data *its)
+{
+ return gicv5_wait_for_op_atomic(its->its_base, GICV5_ITS_STATUSR,
+ GICV5_ITS_STATUSR_IDLE, NULL);
+}
+
+static void gicv5_its_syncr(struct gicv5_its_chip_data *its,
+ struct gicv5_its_dev *its_dev)
+{
+ u64 syncr;
+
+ syncr = FIELD_PREP(GICV5_ITS_SYNCR_SYNC, 1) |
+ FIELD_PREP(GICV5_ITS_SYNCR_DEVICEID, its_dev->device_id);
+
+ its_writeq_relaxed(its, syncr, GICV5_ITS_SYNCR);
+
+ gicv5_wait_for_op(its->its_base, GICV5_ITS_SYNC_STATUSR, GICV5_ITS_SYNC_STATUSR_IDLE);
+}
+
+/* Number of bits required for each L2 {device/interrupt translation} table size */
+#define ITS_L2SZ_64K_L2_BITS 13
+#define ITS_L2SZ_16K_L2_BITS 11
+#define ITS_L2SZ_4K_L2_BITS 9
+
+static unsigned int gicv5_its_l2sz_to_l2_bits(unsigned int sz)
+{
+ switch (sz) {
+ case GICV5_ITS_DT_ITT_CFGR_L2SZ_64k:
+ return ITS_L2SZ_64K_L2_BITS;
+ case GICV5_ITS_DT_ITT_CFGR_L2SZ_16k:
+ return ITS_L2SZ_16K_L2_BITS;
+ case GICV5_ITS_DT_ITT_CFGR_L2SZ_4k:
+ default:
+ return ITS_L2SZ_4K_L2_BITS;
+ }
+}
+
+static int gicv5_its_itt_cache_inv(struct gicv5_its_chip_data *its, u32 device_id,
+ u16 event_id)
+{
+ u32 eventr, eidr;
+ u64 didr;
+
+ didr = FIELD_PREP(GICV5_ITS_DIDR_DEVICEID, device_id);
+ eidr = FIELD_PREP(GICV5_ITS_EIDR_EVENTID, event_id);
+ eventr = FIELD_PREP(GICV5_ITS_INV_EVENTR_I, 0x1);
+
+ its_writeq_relaxed(its, didr, GICV5_ITS_DIDR);
+ its_writel_relaxed(its, eidr, GICV5_ITS_EIDR);
+ its_writel_relaxed(its, eventr, GICV5_ITS_INV_EVENTR);
+
+ return gicv5_its_cache_sync(its);
+}
+
+static void gicv5_its_free_itt_linear(struct gicv5_its_dev *its_dev)
+{
+ kfree(its_dev->itt_cfg.linear.itt);
+}
+
+static void gicv5_its_free_itt_two_level(struct gicv5_its_dev *its_dev)
+{
+ unsigned int i, num_ents = its_dev->itt_cfg.l2.num_l1_ents;
+
+ for (i = 0; i < num_ents; i++)
+ kfree(its_dev->itt_cfg.l2.l2ptrs[i]);
+
+ kfree(its_dev->itt_cfg.l2.l2ptrs);
+ kfree(its_dev->itt_cfg.l2.l1itt);
+}
+
+static void gicv5_its_free_itt(struct gicv5_its_dev *its_dev)
+{
+ if (!its_dev->itt_cfg.l2itt)
+ gicv5_its_free_itt_linear(its_dev);
+ else
+ gicv5_its_free_itt_two_level(its_dev);
+}
+
+static int gicv5_its_create_itt_linear(struct gicv5_its_chip_data *its,
+ struct gicv5_its_dev *its_dev,
+ unsigned int event_id_bits)
+{
+ unsigned int num_ents = BIT(event_id_bits);
+ __le64 *itt;
+
+ itt = kcalloc(num_ents, sizeof(*itt), GFP_KERNEL);
+ if (!itt)
+ return -ENOMEM;
+
+ its_dev->itt_cfg.linear.itt = itt;
+ its_dev->itt_cfg.linear.num_ents = num_ents;
+ its_dev->itt_cfg.l2itt = false;
+ its_dev->itt_cfg.event_id_bits = event_id_bits;
+
+ gicv5_its_dcache_clean(its, itt, num_ents * sizeof(*itt));
+
+ return 0;
+}
+
+/*
+ * Allocate a two-level ITT. All ITT entries are allocated in one go, unlike
+ * with the device table. Span may be used to limit the second level table
+ * size, where possible.
+ */
+static int gicv5_its_create_itt_two_level(struct gicv5_its_chip_data *its,
+ struct gicv5_its_dev *its_dev,
+ unsigned int event_id_bits,
+ unsigned int itt_l2sz,
+ unsigned int num_events)
+{
+ unsigned int l1_bits, l2_bits, span, events_per_l2_table;
+ unsigned int i, complete_tables, final_span, num_ents;
+ __le64 *itt_l1, *itt_l2, **l2ptrs;
+ int ret;
+ u64 val;
+
+ ret = gicv5_its_l2sz_to_l2_bits(itt_l2sz);
+ if (ret >= event_id_bits) {
+ pr_debug("Incorrect l2sz (0x%x) for %u EventID bits. Cannot allocate ITT\n",
+ itt_l2sz, event_id_bits);
+ return -EINVAL;
+ }
+
+ l2_bits = ret;
+
+ l1_bits = event_id_bits - l2_bits;
+
+ num_ents = BIT(l1_bits);
+
+ itt_l1 = kcalloc(num_ents, sizeof(*itt_l1), GFP_KERNEL);
+ if (!itt_l1)
+ return -ENOMEM;
+
+ l2ptrs = kcalloc(num_ents, sizeof(*l2ptrs), GFP_KERNEL);
+ if (!l2ptrs) {
+ kfree(itt_l1);
+ return -ENOMEM;
+ }
+
+ its_dev->itt_cfg.l2.l2ptrs = l2ptrs;
+
+ its_dev->itt_cfg.l2.l2sz = itt_l2sz;
+ its_dev->itt_cfg.l2.l1itt = itt_l1;
+ its_dev->itt_cfg.l2.num_l1_ents = num_ents;
+ its_dev->itt_cfg.l2itt = true;
+ its_dev->itt_cfg.event_id_bits = event_id_bits;
+
+ /*
+ * Need to determine how many entries there are per L2 - this is based
+ * on the number of bits in the table.
+ */
+ events_per_l2_table = BIT(l2_bits);
+ complete_tables = num_events / events_per_l2_table;
+ final_span = order_base_2(num_events % events_per_l2_table);
+
+ for (i = 0; i < num_ents; i++) {
+ size_t l2sz;
+
+ span = i == complete_tables ? final_span : l2_bits;
+
+ itt_l2 = kcalloc(BIT(span), sizeof(*itt_l2), GFP_KERNEL);
+ if (!itt_l2) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ its_dev->itt_cfg.l2.l2ptrs[i] = itt_l2;
+
+ l2sz = BIT(span) * sizeof(*itt_l2);
+
+ gicv5_its_dcache_clean(its, itt_l2, l2sz);
+
+ val = (virt_to_phys(itt_l2) & GICV5_ITTL1E_L2_ADDR_MASK) |
+ FIELD_PREP(GICV5_ITTL1E_SPAN, span) |
+ FIELD_PREP(GICV5_ITTL1E_VALID, 0x1);
+
+ WRITE_ONCE(itt_l1[i], cpu_to_le64(val));
+ }
+
+ gicv5_its_dcache_clean(its, itt_l1, num_ents * sizeof(*itt_l1));
+
+ return 0;
+
+out_free:
+ for (i = i - 1; i >= 0; i--)
+ kfree(its_dev->itt_cfg.l2.l2ptrs[i]);
+
+ kfree(its_dev->itt_cfg.l2.l2ptrs);
+ kfree(itt_l1);
+ return ret;
+}
+
+/*
+ * Function to check whether the device table or ITT table support
+ * a two-level table and if so depending on the number of id_bits
+ * requested, determine whether a two-level table is required.
+ *
+ * Return the 2-level size value if a two level table is deemed
+ * necessary.
+ */
+static bool gicv5_its_l2sz_two_level(bool devtab, u32 its_idr1, u8 id_bits, u8 *sz)
+{
+ unsigned int l2_bits, l2_sz;
+
+ if (devtab && !FIELD_GET(GICV5_ITS_IDR1_DT_LEVELS, its_idr1))
+ return false;
+
+ if (!devtab && !FIELD_GET(GICV5_ITS_IDR1_ITT_LEVELS, its_idr1))
+ return false;
+
+ /*
+ * Pick an L2 size that matches the pagesize; if a match
+ * is not found, go for the smallest supported l2 size granule.
+ *
+ * This ensures that we will always be able to allocate
+ * contiguous memory at L2.
+ */
+ switch (PAGE_SIZE) {
+ case SZ_64K:
+ if (GICV5_ITS_IDR1_L2SZ_SUPPORT_64KB(its_idr1)) {
+ l2_sz = GICV5_ITS_DT_ITT_CFGR_L2SZ_64k;
+ break;
+ }
+ fallthrough;
+ case SZ_4K:
+ if (GICV5_ITS_IDR1_L2SZ_SUPPORT_4KB(its_idr1)) {
+ l2_sz = GICV5_ITS_DT_ITT_CFGR_L2SZ_4k;
+ break;
+ }
+ fallthrough;
+ case SZ_16K:
+ if (GICV5_ITS_IDR1_L2SZ_SUPPORT_16KB(its_idr1)) {
+ l2_sz = GICV5_ITS_DT_ITT_CFGR_L2SZ_16k;
+ break;
+ }
+ if (GICV5_ITS_IDR1_L2SZ_SUPPORT_4KB(its_idr1)) {
+ l2_sz = GICV5_ITS_DT_ITT_CFGR_L2SZ_4k;
+ break;
+ }
+ if (GICV5_ITS_IDR1_L2SZ_SUPPORT_64KB(its_idr1)) {
+ l2_sz = GICV5_ITS_DT_ITT_CFGR_L2SZ_64k;
+ break;
+ }
+
+ l2_sz = GICV5_ITS_DT_ITT_CFGR_L2SZ_4k;
+ break;
+ }
+
+ l2_bits = gicv5_its_l2sz_to_l2_bits(l2_sz);
+
+ if (l2_bits > id_bits)
+ return false;
+
+ *sz = l2_sz;
+
+ return true;
+}
+
+static __le64 *gicv5_its_device_get_itte_ref(struct gicv5_its_dev *its_dev,
+ u16 event_id)
+{
+ unsigned int l1_idx, l2_idx, l2_bits;
+ __le64 *l2_itt;
+
+ if (!its_dev->itt_cfg.l2itt) {
+ __le64 *itt = its_dev->itt_cfg.linear.itt;
+
+ return &itt[event_id];
+ }
+
+ l2_bits = gicv5_its_l2sz_to_l2_bits(its_dev->itt_cfg.l2.l2sz);
+ l1_idx = event_id >> l2_bits;
+ l2_idx = event_id & GENMASK(l2_bits - 1, 0);
+ l2_itt = its_dev->itt_cfg.l2.l2ptrs[l1_idx];
+
+ return &l2_itt[l2_idx];
+}
+
+static int gicv5_its_device_cache_inv(struct gicv5_its_chip_data *its,
+ struct gicv5_its_dev *its_dev)
+{
+ u32 devicer;
+ u64 didr;
+
+ didr = FIELD_PREP(GICV5_ITS_DIDR_DEVICEID, its_dev->device_id);
+ devicer = FIELD_PREP(GICV5_ITS_INV_DEVICER_I, 0x1) |
+ FIELD_PREP(GICV5_ITS_INV_DEVICER_EVENTID_BITS,
+ its_dev->itt_cfg.event_id_bits) |
+ FIELD_PREP(GICV5_ITS_INV_DEVICER_L1, 0x0);
+ its_writeq_relaxed(its, didr, GICV5_ITS_DIDR);
+ its_writel_relaxed(its, devicer, GICV5_ITS_INV_DEVICER);
+
+ return gicv5_its_cache_sync(its);
+}
+
+/*
+ * Allocate a level 2 device table entry, update L1 parent to reference it.
+ * Only used for 2-level device tables, and it is called on demand.
+ */
+static int gicv5_its_alloc_l2_devtab(struct gicv5_its_chip_data *its,
+ unsigned int l1_index)
+{
+ __le64 *l2devtab, *l1devtab = its->devtab_cfgr.l2.l1devtab;
+ u8 span, l2sz, l2_bits;
+ u64 l1dte;
+
+ if (FIELD_GET(GICV5_DTL1E_VALID, le64_to_cpu(l1devtab[l1_index])))
+ return 0;
+
+ span = FIELD_GET(GICV5_DTL1E_SPAN, le64_to_cpu(l1devtab[l1_index]));
+ l2sz = devtab_cfgr_field(its, L2SZ);
+
+ l2_bits = gicv5_its_l2sz_to_l2_bits(l2sz);
+
+ /*
+ * Span allows us to create a smaller L2 device table.
+ * If it is too large, use the number of allowed L2 bits.
+ */
+ if (span > l2_bits)
+ span = l2_bits;
+
+ l2devtab = kcalloc(BIT(span), sizeof(*l2devtab), GFP_KERNEL);
+ if (!l2devtab)
+ return -ENOMEM;
+
+ its->devtab_cfgr.l2.l2ptrs[l1_index] = l2devtab;
+
+ l1dte = FIELD_PREP(GICV5_DTL1E_SPAN, span) |
+ (virt_to_phys(l2devtab) & GICV5_DTL1E_L2_ADDR_MASK) |
+ FIELD_PREP(GICV5_DTL1E_VALID, 0x1);
+ its_write_table_entry(its, &l1devtab[l1_index], l1dte);
+
+ return 0;
+}
+
+static __le64 *gicv5_its_devtab_get_dte_ref(struct gicv5_its_chip_data *its,
+ u32 device_id, bool alloc)
+{
+ u8 str = devtab_cfgr_field(its, STRUCTURE);
+ unsigned int l2sz, l2_bits, l1_idx, l2_idx;
+ __le64 *l2devtab;
+ int ret;
+
+ if (str == GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR) {
+ l2devtab = its->devtab_cfgr.linear.devtab;
+ return &l2devtab[device_id];
+ }
+
+ l2sz = devtab_cfgr_field(its, L2SZ);
+ l2_bits = gicv5_its_l2sz_to_l2_bits(l2sz);
+ l1_idx = device_id >> l2_bits;
+ l2_idx = device_id & GENMASK(l2_bits - 1, 0);
+
+ if (alloc) {
+ /*
+ * Allocate a new L2 device table here before
+ * continuing. We make the assumption that the span in
+ * the L1 table has been set correctly, and blindly use
+ * that value.
+ */
+ ret = gicv5_its_alloc_l2_devtab(its, l1_idx);
+ if (ret)
+ return NULL;
+ }
+
+ l2devtab = its->devtab_cfgr.l2.l2ptrs[l1_idx];
+ return &l2devtab[l2_idx];
+}
+
+/*
+ * Register a new device in the device table. Allocate an ITT and
+ * program the L2DTE entry according to the ITT structure that
+ * was chosen.
+ */
+static int gicv5_its_device_register(struct gicv5_its_chip_data *its,
+ struct gicv5_its_dev *its_dev)
+{
+ u8 event_id_bits, device_id_bits, itt_struct, itt_l2sz;
+ phys_addr_t itt_phys_base;
+ bool two_level_itt;
+ u32 idr1, idr2;
+ __le64 *dte;
+ u64 val;
+ int ret;
+
+ device_id_bits = devtab_cfgr_field(its, DEVICEID_BITS);
+
+ if (its_dev->device_id >= BIT(device_id_bits)) {
+ pr_err("Supplied DeviceID (%u) outside of Device Table range (%u)!",
+ its_dev->device_id, (u32)GENMASK(device_id_bits - 1, 0));
+ return -EINVAL;
+ }
+
+ dte = gicv5_its_devtab_get_dte_ref(its, its_dev->device_id, true);
+ if (!dte)
+ return -ENOMEM;
+
+ if (FIELD_GET(GICV5_DTL2E_VALID, le64_to_cpu(*dte)))
+ return -EBUSY;
+
+ /*
+ * Determine how many bits we need, validate those against the max.
+ * Based on these, determine if we should go for a 1- or 2-level ITT.
+ */
+ event_id_bits = order_base_2(its_dev->num_events);
+
+ idr2 = its_readl_relaxed(its, GICV5_ITS_IDR2);
+
+ if (event_id_bits > FIELD_GET(GICV5_ITS_IDR2_EVENTID_BITS, idr2)) {
+ pr_err("Required EventID bits (%u) larger than supported bits (%u)!",
+ event_id_bits,
+ (u8)FIELD_GET(GICV5_ITS_IDR2_EVENTID_BITS, idr2));
+ return -EINVAL;
+ }
+
+ idr1 = its_readl_relaxed(its, GICV5_ITS_IDR1);
+
+ /*
+ * L2 ITT size is programmed into the L2DTE regardless of
+ * whether a two-level or linear ITT is built, init it.
+ */
+ itt_l2sz = 0;
+
+ two_level_itt = gicv5_its_l2sz_two_level(false, idr1, event_id_bits,
+ &itt_l2sz);
+ if (two_level_itt)
+ ret = gicv5_its_create_itt_two_level(its, its_dev, event_id_bits,
+ itt_l2sz,
+ its_dev->num_events);
+ else
+ ret = gicv5_its_create_itt_linear(its, its_dev, event_id_bits);
+ if (ret)
+ return ret;
+
+ itt_phys_base = two_level_itt ? virt_to_phys(its_dev->itt_cfg.l2.l1itt) :
+ virt_to_phys(its_dev->itt_cfg.linear.itt);
+
+ itt_struct = two_level_itt ? GICV5_ITS_DT_ITT_CFGR_STRUCTURE_TWO_LEVEL :
+ GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR;
+
+ val = FIELD_PREP(GICV5_DTL2E_EVENT_ID_BITS, event_id_bits) |
+ FIELD_PREP(GICV5_DTL2E_ITT_STRUCTURE, itt_struct) |
+ (itt_phys_base & GICV5_DTL2E_ITT_ADDR_MASK) |
+ FIELD_PREP(GICV5_DTL2E_ITT_L2SZ, itt_l2sz) |
+ FIELD_PREP(GICV5_DTL2E_VALID, 0x1);
+
+ its_write_table_entry(its, dte, val);
+
+ ret = gicv5_its_device_cache_inv(its, its_dev);
+ if (ret) {
+ its_write_table_entry(its, dte, 0);
+ gicv5_its_free_itt(its_dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Unregister a device in the device table. Lookup the device by ID, free the
+ * corresponding ITT, mark the device as invalid in the device table.
+ */
+static int gicv5_its_device_unregister(struct gicv5_its_chip_data *its,
+ struct gicv5_its_dev *its_dev)
+{
+ __le64 *dte;
+
+ dte = gicv5_its_devtab_get_dte_ref(its, its_dev->device_id, false);
+
+ if (!FIELD_GET(GICV5_DTL2E_VALID, le64_to_cpu(*dte))) {
+ pr_debug("Device table entry for DeviceID 0x%x is not valid. Nothing to clean up!",
+ its_dev->device_id);
+ return -EINVAL;
+ }
+
+ /* Zero everything - make it clear that this is an invalid entry */
+ its_write_table_entry(its, dte, 0);
+
+ gicv5_its_free_itt(its_dev);
+
+ return gicv5_its_device_cache_inv(its, its_dev);
+}
+
+/*
+ * Allocate a 1-level device table. All entries are allocated, but marked
+ * invalid.
+ */
+static int gicv5_its_alloc_devtab_linear(struct gicv5_its_chip_data *its,
+ u8 device_id_bits)
+{
+ __le64 *devtab;
+ size_t sz;
+ u64 baser;
+ u32 cfgr;
+
+ /*
+ * We expect a GICv5 implementation requiring a large number of
+ * deviceID bits to support a 2-level device table. If that's not
+ * the case, cap the number of deviceIDs supported according to the
+ * kmalloc limits so that the system can chug along with a linear
+ * device table.
+ */
+ sz = BIT_ULL(device_id_bits) * sizeof(*devtab);
+ if (sz > KMALLOC_MAX_SIZE) {
+ u8 device_id_cap = ilog2(KMALLOC_MAX_SIZE/sizeof(*devtab));
+
+ pr_warn("Limiting device ID bits from %u to %u\n",
+ device_id_bits, device_id_cap);
+ device_id_bits = device_id_cap;
+ }
+
+ devtab = kcalloc(BIT(device_id_bits), sizeof(*devtab), GFP_KERNEL);
+ if (!devtab)
+ return -ENOMEM;
+
+ gicv5_its_dcache_clean(its, devtab, sz);
+
+ cfgr = FIELD_PREP(GICV5_ITS_DT_CFGR_STRUCTURE,
+ GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR) |
+ FIELD_PREP(GICV5_ITS_DT_CFGR_L2SZ, 0) |
+ FIELD_PREP(GICV5_ITS_DT_CFGR_DEVICEID_BITS, device_id_bits);
+ its_writel_relaxed(its, cfgr, GICV5_ITS_DT_CFGR);
+
+ baser = virt_to_phys(devtab) & GICV5_ITS_DT_BASER_ADDR_MASK;
+ its_writeq_relaxed(its, baser, GICV5_ITS_DT_BASER);
+
+ its->devtab_cfgr.cfgr = cfgr;
+ its->devtab_cfgr.linear.devtab = devtab;
+
+ return 0;
+}
+
+/*
+ * Allocate a 2-level device table. L2 entries are not allocated,
+ * they are allocated on-demand.
+ */
+static int gicv5_its_alloc_devtab_two_level(struct gicv5_its_chip_data *its,
+ u8 device_id_bits,
+ u8 devtab_l2sz)
+{
+ unsigned int l1_bits, l2_bits, i;
+ __le64 *l1devtab, **l2ptrs;
+ size_t l1_sz;
+ u64 baser;
+ u32 cfgr;
+
+ l2_bits = gicv5_its_l2sz_to_l2_bits(devtab_l2sz);
+
+ l1_bits = device_id_bits - l2_bits;
+ l1_sz = BIT(l1_bits) * sizeof(*l1devtab);
+ /*
+ * With 2-level device table support it is highly unlikely
+ * that we are not able to allocate the required amount of
+ * device table memory to cover deviceID space; cap the
+ * deviceID space if we encounter such set-up.
+ * If this ever becomes a problem we could revisit the policy
+ * behind level 2 size selection to reduce level-1 deviceID bits.
+ */
+ if (l1_sz > KMALLOC_MAX_SIZE) {
+ l1_bits = ilog2(KMALLOC_MAX_SIZE/sizeof(*l1devtab));
+
+ pr_warn("Limiting device ID bits from %u to %u\n",
+ device_id_bits, l1_bits + l2_bits);
+ device_id_bits = l1_bits + l2_bits;
+ l1_sz = KMALLOC_MAX_SIZE;
+ }
+
+ l1devtab = kcalloc(BIT(l1_bits), sizeof(*l1devtab), GFP_KERNEL);
+ if (!l1devtab)
+ return -ENOMEM;
+
+ l2ptrs = kcalloc(BIT(l1_bits), sizeof(*l2ptrs), GFP_KERNEL);
+ if (!l2ptrs) {
+ kfree(l1devtab);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < BIT(l1_bits); i++)
+ l1devtab[i] = cpu_to_le64(FIELD_PREP(GICV5_DTL1E_SPAN, l2_bits));
+
+ gicv5_its_dcache_clean(its, l1devtab, l1_sz);
+
+ cfgr = FIELD_PREP(GICV5_ITS_DT_CFGR_STRUCTURE,
+ GICV5_ITS_DT_ITT_CFGR_STRUCTURE_TWO_LEVEL) |
+ FIELD_PREP(GICV5_ITS_DT_CFGR_L2SZ, devtab_l2sz) |
+ FIELD_PREP(GICV5_ITS_DT_CFGR_DEVICEID_BITS, device_id_bits);
+ its_writel_relaxed(its, cfgr, GICV5_ITS_DT_CFGR);
+
+ baser = virt_to_phys(l1devtab) & GICV5_ITS_DT_BASER_ADDR_MASK;
+ its_writeq_relaxed(its, baser, GICV5_ITS_DT_BASER);
+
+ its->devtab_cfgr.cfgr = cfgr;
+ its->devtab_cfgr.l2.l1devtab = l1devtab;
+ its->devtab_cfgr.l2.l2ptrs = l2ptrs;
+
+ return 0;
+}
+
+/*
+ * Initialise the device table as either 1- or 2-level depending on what is
+ * supported by the hardware.
+ */
+static int gicv5_its_init_devtab(struct gicv5_its_chip_data *its)
+{
+ u8 device_id_bits, devtab_l2sz;
+ bool two_level_devtab;
+ u32 idr1;
+
+ idr1 = its_readl_relaxed(its, GICV5_ITS_IDR1);
+
+ device_id_bits = FIELD_GET(GICV5_ITS_IDR1_DEVICEID_BITS, idr1);
+ two_level_devtab = gicv5_its_l2sz_two_level(true, idr1, device_id_bits,
+ &devtab_l2sz);
+ if (two_level_devtab)
+ return gicv5_its_alloc_devtab_two_level(its, device_id_bits,
+ devtab_l2sz);
+ else
+ return gicv5_its_alloc_devtab_linear(its, device_id_bits);
+}
+
+static void gicv5_its_deinit_devtab(struct gicv5_its_chip_data *its)
+{
+ u8 str = devtab_cfgr_field(its, STRUCTURE);
+
+ if (str == GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR) {
+ kfree(its->devtab_cfgr.linear.devtab);
+ } else {
+ kfree(its->devtab_cfgr.l2.l1devtab);
+ kfree(its->devtab_cfgr.l2.l2ptrs);
+ }
+}
+
+static void gicv5_its_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
+{
+ struct gicv5_its_dev *its_dev = irq_data_get_irq_chip_data(d);
+ u64 addr = its_dev->its_trans_phys_base;
+
+ msg->data = FIELD_GET(GICV5_ITS_HWIRQ_EVENT_ID, d->hwirq);
+ msi_msg_set_addr(irq_data_get_msi_desc(d), msg, addr);
+}
+
+static const struct irq_chip gicv5_its_irq_chip = {
+ .name = "GICv5-ITS-MSI",
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+ .irq_get_irqchip_state = irq_chip_get_parent_state,
+ .irq_set_irqchip_state = irq_chip_set_parent_state,
+ .irq_compose_msi_msg = gicv5_its_compose_msi_msg,
+};
+
+static struct gicv5_its_dev *gicv5_its_find_device(struct gicv5_its_chip_data *its,
+ u32 device_id)
+{
+ struct gicv5_its_dev *dev = xa_load(&its->its_devices, device_id);
+
+ return dev ? dev : ERR_PTR(-ENODEV);
+}
+
+static struct gicv5_its_dev *gicv5_its_alloc_device(struct gicv5_its_chip_data *its, int nvec,
+ u32 dev_id)
+{
+ struct gicv5_its_dev *its_dev;
+ void *entry;
+ int ret;
+
+ its_dev = gicv5_its_find_device(its, dev_id);
+ if (!IS_ERR(its_dev)) {
+ pr_err("A device with this DeviceID (0x%x) has already been registered.\n",
+ dev_id);
+
+ return ERR_PTR(-EBUSY);
+ }
+
+ its_dev = kzalloc(sizeof(*its_dev), GFP_KERNEL);
+ if (!its_dev)
+ return ERR_PTR(-ENOMEM);
+
+ its_dev->device_id = dev_id;
+ its_dev->num_events = nvec;
+
+ ret = gicv5_its_device_register(its, its_dev);
+ if (ret) {
+ pr_err("Failed to register the device\n");
+ goto out_dev_free;
+ }
+
+ gicv5_its_device_cache_inv(its, its_dev);
+
+ its_dev->its_node = its;
+
+ its_dev->event_map = (unsigned long *)bitmap_zalloc(its_dev->num_events, GFP_KERNEL);
+ if (!its_dev->event_map) {
+ ret = -ENOMEM;
+ goto out_unregister;
+ }
+
+ entry = xa_store(&its->its_devices, dev_id, its_dev, GFP_KERNEL);
+ if (xa_is_err(entry)) {
+ ret = xa_err(entry);
+ goto out_bitmap_free;
+ }
+
+ return its_dev;
+
+out_bitmap_free:
+ bitmap_free(its_dev->event_map);
+out_unregister:
+ gicv5_its_device_unregister(its, its_dev);
+out_dev_free:
+ kfree(its_dev);
+ return ERR_PTR(ret);
+}
+
+static int gicv5_its_msi_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *info)
+{
+ u32 dev_id = info->scratchpad[0].ul;
+ struct msi_domain_info *msi_info;
+ struct gicv5_its_chip_data *its;
+ struct gicv5_its_dev *its_dev;
+
+ msi_info = msi_get_domain_info(domain);
+ its = msi_info->data;
+
+ guard(mutex)(&its->dev_alloc_lock);
+
+ its_dev = gicv5_its_alloc_device(its, nvec, dev_id);
+ if (IS_ERR(its_dev))
+ return PTR_ERR(its_dev);
+
+ its_dev->its_trans_phys_base = info->scratchpad[1].ul;
+ info->scratchpad[0].ptr = its_dev;
+
+ return 0;
+}
+
+static void gicv5_its_msi_teardown(struct irq_domain *domain, msi_alloc_info_t *info)
+{
+ struct gicv5_its_dev *its_dev = info->scratchpad[0].ptr;
+ struct msi_domain_info *msi_info;
+ struct gicv5_its_chip_data *its;
+
+ msi_info = msi_get_domain_info(domain);
+ its = msi_info->data;
+
+ guard(mutex)(&its->dev_alloc_lock);
+
+ if (WARN_ON_ONCE(!bitmap_empty(its_dev->event_map, its_dev->num_events)))
+ return;
+
+ xa_erase(&its->its_devices, its_dev->device_id);
+ bitmap_free(its_dev->event_map);
+ gicv5_its_device_unregister(its, its_dev);
+ kfree(its_dev);
+}
+
+static struct msi_domain_ops gicv5_its_msi_domain_ops = {
+ .msi_prepare = gicv5_its_msi_prepare,
+ .msi_teardown = gicv5_its_msi_teardown,
+};
+
+static int gicv5_its_map_event(struct gicv5_its_dev *its_dev, u16 event_id, u32 lpi)
+{
+ struct gicv5_its_chip_data *its = its_dev->its_node;
+ u64 itt_entry;
+ __le64 *itte;
+
+ itte = gicv5_its_device_get_itte_ref(its_dev, event_id);
+
+ if (FIELD_GET(GICV5_ITTL2E_VALID, *itte))
+ return -EEXIST;
+
+ itt_entry = FIELD_PREP(GICV5_ITTL2E_LPI_ID, lpi) |
+ FIELD_PREP(GICV5_ITTL2E_VALID, 0x1);
+
+ its_write_table_entry(its, itte, itt_entry);
+
+ gicv5_its_itt_cache_inv(its, its_dev->device_id, event_id);
+
+ return 0;
+}
+
+static void gicv5_its_unmap_event(struct gicv5_its_dev *its_dev, u16 event_id)
+{
+ struct gicv5_its_chip_data *its = its_dev->its_node;
+ u64 itte_val;
+ __le64 *itte;
+
+ itte = gicv5_its_device_get_itte_ref(its_dev, event_id);
+
+ itte_val = le64_to_cpu(*itte);
+ itte_val &= ~GICV5_ITTL2E_VALID;
+
+ its_write_table_entry(its, itte, itte_val);
+
+ gicv5_its_itt_cache_inv(its, its_dev->device_id, event_id);
+}
+
+static int gicv5_its_alloc_eventid(struct gicv5_its_dev *its_dev, msi_alloc_info_t *info,
+ unsigned int nr_irqs, u32 *eventid)
+{
+ int event_id_base;
+
+ if (!(info->flags & MSI_ALLOC_FLAGS_FIXED_MSG_DATA)) {
+ event_id_base = bitmap_find_free_region(its_dev->event_map,
+ its_dev->num_events,
+ get_count_order(nr_irqs));
+ if (event_id_base < 0)
+ return event_id_base;
+ } else {
+ /*
+ * We want to have a fixed EventID mapped for hardcoded
+ * message data allocations.
+ */
+ if (WARN_ON_ONCE(nr_irqs != 1))
+ return -EINVAL;
+
+ event_id_base = info->hwirq;
+
+ if (event_id_base >= its_dev->num_events) {
+ pr_err("EventID ouside of ITT range; cannot allocate an ITT entry!\n");
+
+ return -EINVAL;
+ }
+
+ if (test_and_set_bit(event_id_base, its_dev->event_map)) {
+ pr_warn("Can't reserve event_id bitmap\n");
+ return -EINVAL;
+
+ }
+ }
+
+ *eventid = event_id_base;
+
+ return 0;
+}
+
+static void gicv5_its_free_eventid(struct gicv5_its_dev *its_dev, u32 event_id_base,
+ unsigned int nr_irqs)
+{
+ bitmap_release_region(its_dev->event_map, event_id_base,
+ get_count_order(nr_irqs));
+}
+
+static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ u32 device_id, event_id_base, lpi;
+ struct gicv5_its_dev *its_dev;
+ msi_alloc_info_t *info = arg;
+ irq_hw_number_t hwirq;
+ struct irq_data *irqd;
+ int ret, i;
+
+ its_dev = info->scratchpad[0].ptr;
+
+ ret = gicv5_its_alloc_eventid(its_dev, info, nr_irqs, &event_id_base);
+ if (ret)
+ return ret;
+
+ ret = iommu_dma_prepare_msi(info->desc, its_dev->its_trans_phys_base);
+ if (ret)
+ goto out_eventid;
+
+ device_id = its_dev->device_id;
+
+ for (i = 0; i < nr_irqs; i++) {
+ lpi = gicv5_alloc_lpi();
+ if (ret < 0) {
+ pr_debug("Failed to find free LPI!\n");
+ goto out_eventid;
+ }
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, &lpi);
+ if (ret)
+ goto out_free_lpi;
+
+ /*
+ * Store eventid and deviceid into the hwirq for later use.
+ *
+ * hwirq = event_id << 32 | device_id
+ */
+ hwirq = FIELD_PREP(GICV5_ITS_HWIRQ_DEVICE_ID, device_id) |
+ FIELD_PREP(GICV5_ITS_HWIRQ_EVENT_ID, (u64)event_id_base + i);
+ irq_domain_set_info(domain, virq + i, hwirq,
+ &gicv5_its_irq_chip, its_dev,
+ handle_fasteoi_irq, NULL, NULL);
+
+ irqd = irq_get_irq_data(virq + i);
+ irqd_set_single_target(irqd);
+ irqd_set_affinity_on_activate(irqd);
+ irqd_set_resend_when_in_progress(irqd);
+ }
+
+ return 0;
+
+out_free_lpi:
+ gicv5_free_lpi(lpi);
+out_eventid:
+ gicv5_its_free_eventid(its_dev, event_id_base, nr_irqs);
+ return ret;
+}
+
+static void gicv5_its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct gicv5_its_chip_data *its;
+ struct gicv5_its_dev *its_dev;
+ u16 event_id_base;
+ unsigned int i;
+
+ its_dev = irq_data_get_irq_chip_data(d);
+ its = its_dev->its_node;
+
+ event_id_base = FIELD_GET(GICV5_ITS_HWIRQ_EVENT_ID, d->hwirq);
+
+ bitmap_release_region(its_dev->event_map, event_id_base,
+ get_count_order(nr_irqs));
+
+ /* Hierarchically free irq data */
+ for (i = 0; i < nr_irqs; i++) {
+ d = irq_domain_get_irq_data(domain, virq + i);
+
+ gicv5_free_lpi(d->parent_data->hwirq);
+ irq_domain_reset_irq_data(d);
+ irq_domain_free_irqs_parent(domain, virq + i, 1);
+ }
+
+ gicv5_its_syncr(its, its_dev);
+ gicv5_irs_syncr();
+}
+
+static int gicv5_its_irq_domain_activate(struct irq_domain *domain, struct irq_data *d,
+ bool reserve)
+{
+ struct gicv5_its_dev *its_dev = irq_data_get_irq_chip_data(d);
+ u16 event_id;
+ u32 lpi;
+
+ event_id = FIELD_GET(GICV5_ITS_HWIRQ_EVENT_ID, d->hwirq);
+ lpi = d->parent_data->hwirq;
+
+ return gicv5_its_map_event(its_dev, event_id, lpi);
+}
+
+static void gicv5_its_irq_domain_deactivate(struct irq_domain *domain,
+ struct irq_data *d)
+{
+ struct gicv5_its_dev *its_dev = irq_data_get_irq_chip_data(d);
+ u16 event_id;
+
+ event_id = FIELD_GET(GICV5_ITS_HWIRQ_EVENT_ID, d->hwirq);
+
+ gicv5_its_unmap_event(its_dev, event_id);
+}
+
+static const struct irq_domain_ops gicv5_its_irq_domain_ops = {
+ .alloc = gicv5_its_irq_domain_alloc,
+ .free = gicv5_its_irq_domain_free,
+ .activate = gicv5_its_irq_domain_activate,
+ .deactivate = gicv5_its_irq_domain_deactivate,
+ .select = msi_lib_irq_domain_select,
+};
+
+static int gicv5_its_write_cr0(struct gicv5_its_chip_data *its, bool enable)
+{
+ u32 cr0 = FIELD_PREP(GICV5_ITS_CR0_ITSEN, enable);
+
+ its_writel_relaxed(its, cr0, GICV5_ITS_CR0);
+ return gicv5_wait_for_op_atomic(its->its_base, GICV5_ITS_CR0,
+ GICV5_ITS_CR0_IDLE, NULL);
+}
+
+static int gicv5_its_enable(struct gicv5_its_chip_data *its)
+{
+ return gicv5_its_write_cr0(its, true);
+}
+
+static int gicv5_its_disable(struct gicv5_its_chip_data *its)
+{
+ return gicv5_its_write_cr0(its, false);
+}
+
+static void gicv5_its_print_info(struct gicv5_its_chip_data *its_node)
+{
+ bool devtab_linear;
+ u8 device_id_bits;
+ u8 str;
+
+ device_id_bits = devtab_cfgr_field(its_node, DEVICEID_BITS);
+
+ str = devtab_cfgr_field(its_node, STRUCTURE);
+ devtab_linear = (str == GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR);
+
+ pr_info("ITS %s enabled using %s device table device_id_bits %u\n",
+ fwnode_get_name(its_node->fwnode),
+ devtab_linear ? "linear" : "2-level",
+ device_id_bits);
+}
+
+static int gicv5_its_init_domain(struct gicv5_its_chip_data *its, struct irq_domain *parent)
+{
+ struct irq_domain_info dom_info = {
+ .fwnode = its->fwnode,
+ .ops = &gicv5_its_irq_domain_ops,
+ .domain_flags = its->msi_domain_flags,
+ .parent = parent,
+ };
+ struct msi_domain_info *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->ops = &gicv5_its_msi_domain_ops;
+ info->data = its;
+ dom_info.host_data = info;
+
+ if (!msi_create_parent_irq_domain(&dom_info, &gic_v5_its_msi_parent_ops)) {
+ kfree(info);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int __init gicv5_its_init_bases(void __iomem *its_base, struct fwnode_handle *handle,
+ struct irq_domain *parent_domain)
+{
+ struct device_node *np = to_of_node(handle);
+ struct gicv5_its_chip_data *its_node;
+ u32 cr0, cr1;
+ bool enabled;
+ int ret;
+
+ its_node = kzalloc(sizeof(*its_node), GFP_KERNEL);
+ if (!its_node)
+ return -ENOMEM;
+
+ mutex_init(&its_node->dev_alloc_lock);
+ xa_init(&its_node->its_devices);
+ its_node->fwnode = handle;
+ its_node->its_base = its_base;
+ its_node->msi_domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI |
+ IRQ_DOMAIN_FLAG_FWNODE_PARENT;
+
+ cr0 = its_readl_relaxed(its_node, GICV5_ITS_CR0);
+ enabled = FIELD_GET(GICV5_ITS_CR0_ITSEN, cr0);
+ if (WARN(enabled, "ITS %s enabled, disabling it before proceeding\n", np->full_name)) {
+ ret = gicv5_its_disable(its_node);
+ if (ret)
+ goto out_free_node;
+ }
+
+ if (of_property_read_bool(np, "dma-noncoherent")) {
+ /*
+ * A non-coherent ITS implies that some cache levels cannot be
+ * used coherently by the cores and GIC. Our only option is to mark
+ * memory attributes for the GIC as non-cacheable; by default,
+ * non-cacheable memory attributes imply outer-shareable
+ * shareability, the value written into ITS_CR1_SH is ignored.
+ */
+ cr1 = FIELD_PREP(GICV5_ITS_CR1_ITT_RA, GICV5_NO_READ_ALLOC) |
+ FIELD_PREP(GICV5_ITS_CR1_DT_RA, GICV5_NO_READ_ALLOC) |
+ FIELD_PREP(GICV5_ITS_CR1_IC, GICV5_NON_CACHE) |
+ FIELD_PREP(GICV5_ITS_CR1_OC, GICV5_NON_CACHE);
+ its_node->flags |= ITS_FLAGS_NON_COHERENT;
+ } else {
+ cr1 = FIELD_PREP(GICV5_ITS_CR1_ITT_RA, GICV5_READ_ALLOC) |
+ FIELD_PREP(GICV5_ITS_CR1_DT_RA, GICV5_READ_ALLOC) |
+ FIELD_PREP(GICV5_ITS_CR1_IC, GICV5_WB_CACHE) |
+ FIELD_PREP(GICV5_ITS_CR1_OC, GICV5_WB_CACHE) |
+ FIELD_PREP(GICV5_ITS_CR1_SH, GICV5_INNER_SHARE);
+ }
+
+ its_writel_relaxed(its_node, cr1, GICV5_ITS_CR1);
+
+ ret = gicv5_its_init_devtab(its_node);
+ if (ret)
+ goto out_free_node;
+
+ ret = gicv5_its_enable(its_node);
+ if (ret)
+ goto out_free_devtab;
+
+ ret = gicv5_its_init_domain(its_node, parent_domain);
+ if (ret)
+ goto out_disable_its;
+
+ gicv5_its_print_info(its_node);
+
+ return 0;
+
+out_disable_its:
+ gicv5_its_disable(its_node);
+out_free_devtab:
+ gicv5_its_deinit_devtab(its_node);
+out_free_node:
+ kfree(its_node);
+ return ret;
+}
+
+static int __init gicv5_its_init(struct device_node *node)
+{
+ void __iomem *its_base;
+ int ret, idx;
+
+ idx = of_property_match_string(node, "reg-names", "ns-config");
+ if (idx < 0) {
+ pr_err("%pOF: ns-config reg-name not present\n", node);
+ return -ENODEV;
+ }
+
+ its_base = of_io_request_and_map(node, idx, of_node_full_name(node));
+ if (IS_ERR(its_base)) {
+ pr_err("%pOF: unable to map GICv5 ITS_CONFIG_FRAME\n", node);
+ return PTR_ERR(its_base);
+ }
+
+ ret = gicv5_its_init_bases(its_base, of_fwnode_handle(node),
+ gicv5_global_data.lpi_domain);
+ if (ret)
+ goto out_unmap;
+
+ return 0;
+
+out_unmap:
+ iounmap(its_base);
+ return ret;
+}
+
+void __init gicv5_its_of_probe(struct device_node *parent)
+{
+ struct device_node *np;
+
+ for_each_available_child_of_node(parent, np) {
+ if (!of_device_is_compatible(np, "arm,gic-v5-its"))
+ continue;
+
+ if (gicv5_its_init(np))
+ pr_err("Failed to init ITS %s\n", np->full_name);
+ }
+}
diff --git a/drivers/irqchip/irq-gic-v5-iwb.c b/drivers/irqchip/irq-gic-v5-iwb.c
new file mode 100644
index 000000000000..ed72fbdd4900
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v5-iwb.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024-2025 ARM Limited, All Rights Reserved.
+ */
+#define pr_fmt(fmt) "GICv5 IWB: " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic-v5.h>
+
+struct gicv5_iwb_chip_data {
+ void __iomem *iwb_base;
+ u16 nr_regs;
+};
+
+static u32 iwb_readl_relaxed(struct gicv5_iwb_chip_data *iwb_node, const u32 reg_offset)
+{
+ return readl_relaxed(iwb_node->iwb_base + reg_offset);
+}
+
+static void iwb_writel_relaxed(struct gicv5_iwb_chip_data *iwb_node, const u32 val,
+ const u32 reg_offset)
+{
+ writel_relaxed(val, iwb_node->iwb_base + reg_offset);
+}
+
+static int gicv5_iwb_wait_for_wenabler(struct gicv5_iwb_chip_data *iwb_node)
+{
+ return gicv5_wait_for_op_atomic(iwb_node->iwb_base, GICV5_IWB_WENABLE_STATUSR,
+ GICV5_IWB_WENABLE_STATUSR_IDLE, NULL);
+}
+
+static int __gicv5_iwb_set_wire_enable(struct gicv5_iwb_chip_data *iwb_node,
+ u32 iwb_wire, bool enable)
+{
+ u32 n = iwb_wire / 32;
+ u8 i = iwb_wire % 32;
+ u32 val;
+
+ if (n >= iwb_node->nr_regs) {
+ pr_err("IWB_WENABLER<n> is invalid for n=%u\n", n);
+ return -EINVAL;
+ }
+
+ /*
+ * Enable IWB wire/pin at this point
+ * Note: This is not the same as enabling the interrupt
+ */
+ val = iwb_readl_relaxed(iwb_node, GICV5_IWB_WENABLER + (4 * n));
+ if (enable)
+ val |= BIT(i);
+ else
+ val &= ~BIT(i);
+ iwb_writel_relaxed(iwb_node, val, GICV5_IWB_WENABLER + (4 * n));
+
+ return gicv5_iwb_wait_for_wenabler(iwb_node);
+}
+
+static int gicv5_iwb_enable_wire(struct gicv5_iwb_chip_data *iwb_node,
+ u32 iwb_wire)
+{
+ return __gicv5_iwb_set_wire_enable(iwb_node, iwb_wire, true);
+}
+
+static int gicv5_iwb_disable_wire(struct gicv5_iwb_chip_data *iwb_node,
+ u32 iwb_wire)
+{
+ return __gicv5_iwb_set_wire_enable(iwb_node, iwb_wire, false);
+}
+
+static void gicv5_iwb_irq_disable(struct irq_data *d)
+{
+ struct gicv5_iwb_chip_data *iwb_node = irq_data_get_irq_chip_data(d);
+
+ gicv5_iwb_disable_wire(iwb_node, d->hwirq);
+ irq_chip_disable_parent(d);
+}
+
+static void gicv5_iwb_irq_enable(struct irq_data *d)
+{
+ struct gicv5_iwb_chip_data *iwb_node = irq_data_get_irq_chip_data(d);
+
+ gicv5_iwb_enable_wire(iwb_node, d->hwirq);
+ irq_chip_enable_parent(d);
+}
+
+static int gicv5_iwb_set_type(struct irq_data *d, unsigned int type)
+{
+ struct gicv5_iwb_chip_data *iwb_node = irq_data_get_irq_chip_data(d);
+ u32 iwb_wire, n, wtmr;
+ u8 i;
+
+ iwb_wire = d->hwirq;
+ i = iwb_wire % 32;
+ n = iwb_wire / 32;
+
+ if (n >= iwb_node->nr_regs) {
+ pr_err_once("reg %u out of range\n", n);
+ return -EINVAL;
+ }
+
+ wtmr = iwb_readl_relaxed(iwb_node, GICV5_IWB_WTMR + (4 * n));
+
+ switch (type) {
+ case IRQ_TYPE_LEVEL_HIGH:
+ case IRQ_TYPE_LEVEL_LOW:
+ wtmr |= BIT(i);
+ break;
+ case IRQ_TYPE_EDGE_RISING:
+ case IRQ_TYPE_EDGE_FALLING:
+ wtmr &= ~BIT(i);
+ break;
+ default:
+ pr_debug("unexpected wire trigger mode");
+ return -EINVAL;
+ }
+
+ iwb_writel_relaxed(iwb_node, wtmr, GICV5_IWB_WTMR + (4 * n));
+
+ return 0;
+}
+
+static void gicv5_iwb_domain_set_desc(msi_alloc_info_t *alloc_info, struct msi_desc *desc)
+{
+ alloc_info->desc = desc;
+ alloc_info->hwirq = (u32)desc->data.icookie.value;
+}
+
+static int gicv5_iwb_irq_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
+ irq_hw_number_t *hwirq,
+ unsigned int *type)
+{
+ if (!is_of_node(fwspec->fwnode))
+ return -EINVAL;
+
+ if (fwspec->param_count < 2)
+ return -EINVAL;
+
+ /*
+ * param[0] is be the wire
+ * param[1] is the interrupt type
+ */
+ *hwirq = fwspec->param[0];
+ *type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
+
+ return 0;
+}
+
+static void gicv5_iwb_write_msi_msg(struct irq_data *d, struct msi_msg *msg) {}
+
+static const struct msi_domain_template iwb_msi_template = {
+ .chip = {
+ .name = "GICv5-IWB",
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_enable = gicv5_iwb_irq_enable,
+ .irq_disable = gicv5_iwb_irq_disable,
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_set_type = gicv5_iwb_set_type,
+ .irq_write_msi_msg = gicv5_iwb_write_msi_msg,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+ .irq_get_irqchip_state = irq_chip_get_parent_state,
+ .irq_set_irqchip_state = irq_chip_set_parent_state,
+ .flags = IRQCHIP_SET_TYPE_MASKED |
+ IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_MASK_ON_SUSPEND,
+ },
+
+ .ops = {
+ .set_desc = gicv5_iwb_domain_set_desc,
+ .msi_translate = gicv5_iwb_irq_domain_translate,
+ },
+
+ .info = {
+ .bus_token = DOMAIN_BUS_WIRED_TO_MSI,
+ .flags = MSI_FLAG_USE_DEV_FWNODE,
+ },
+
+ .alloc_info = {
+ .flags = MSI_ALLOC_FLAGS_FIXED_MSG_DATA,
+ },
+};
+
+static bool gicv5_iwb_create_device_domain(struct device *dev, unsigned int size,
+ struct gicv5_iwb_chip_data *iwb_node)
+{
+ if (WARN_ON_ONCE(!dev->msi.domain))
+ return false;
+
+ return msi_create_device_irq_domain(dev, MSI_DEFAULT_DOMAIN,
+ &iwb_msi_template, size,
+ NULL, iwb_node);
+}
+
+static struct gicv5_iwb_chip_data *
+gicv5_iwb_init_bases(void __iomem *iwb_base, struct platform_device *pdev)
+{
+ u32 nr_wires, idr0, cr0;
+ unsigned int n;
+ int ret;
+
+ struct gicv5_iwb_chip_data *iwb_node __free(kfree) = kzalloc(sizeof(*iwb_node),
+ GFP_KERNEL);
+ if (!iwb_node)
+ return ERR_PTR(-ENOMEM);
+
+ iwb_node->iwb_base = iwb_base;
+
+ idr0 = iwb_readl_relaxed(iwb_node, GICV5_IWB_IDR0);
+ nr_wires = (FIELD_GET(GICV5_IWB_IDR0_IW_RANGE, idr0) + 1) * 32;
+
+ cr0 = iwb_readl_relaxed(iwb_node, GICV5_IWB_CR0);
+ if (!FIELD_GET(GICV5_IWB_CR0_IWBEN, cr0)) {
+ dev_err(&pdev->dev, "IWB must be enabled in firmware\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ iwb_node->nr_regs = FIELD_GET(GICV5_IWB_IDR0_IW_RANGE, idr0) + 1;
+
+ for (n = 0; n < iwb_node->nr_regs; n++)
+ iwb_writel_relaxed(iwb_node, 0, GICV5_IWB_WENABLER + (sizeof(u32) * n));
+
+ ret = gicv5_iwb_wait_for_wenabler(iwb_node);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (!gicv5_iwb_create_device_domain(&pdev->dev, nr_wires, iwb_node))
+ return ERR_PTR(-ENOMEM);
+
+ return_ptr(iwb_node);
+}
+
+static int gicv5_iwb_device_probe(struct platform_device *pdev)
+{
+ struct gicv5_iwb_chip_data *iwb_node;
+ void __iomem *iwb_base;
+ struct resource *res;
+ int ret;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -EINVAL;
+
+ iwb_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+ if (!iwb_base) {
+ dev_err(&pdev->dev, "failed to ioremap %pR\n", res);
+ return -ENOMEM;
+ }
+
+ iwb_node = gicv5_iwb_init_bases(iwb_base, pdev);
+ if (IS_ERR(iwb_node)) {
+ ret = PTR_ERR(iwb_node);
+ goto out_unmap;
+ }
+
+ return 0;
+
+out_unmap:
+ iounmap(iwb_base);
+ return ret;
+}
+
+static const struct of_device_id gicv5_iwb_of_match[] = {
+ { .compatible = "arm,gic-v5-iwb" },
+ { /* END */ }
+};
+MODULE_DEVICE_TABLE(of, gicv5_iwb_of_match);
+
+static struct platform_driver gicv5_iwb_platform_driver = {
+ .driver = {
+ .name = "GICv5 IWB",
+ .of_match_table = gicv5_iwb_of_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = gicv5_iwb_device_probe,
+};
+
+module_platform_driver(gicv5_iwb_platform_driver);
diff --git a/drivers/irqchip/irq-gic-v5.c b/drivers/irqchip/irq-gic-v5.c
new file mode 100644
index 000000000000..4bd224f359a7
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v5.c
@@ -0,0 +1,1137 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024-2025 ARM Limited, All Rights Reserved.
+ */
+
+#define pr_fmt(fmt) "GICv5: " fmt
+
+#include <linux/cpuhotplug.h>
+#include <linux/idr.h>
+#include <linux/irqdomain.h>
+#include <linux/slab.h>
+#include <linux/wordpart.h>
+
+#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic-v5.h>
+#include <linux/irqchip/arm-vgic-info.h>
+
+#include <asm/cpufeature.h>
+#include <asm/exception.h>
+
+static u8 pri_bits __ro_after_init = 5;
+
+#define GICV5_IRQ_PRI_MASK 0x1f
+#define GICV5_IRQ_PRI_MI (GICV5_IRQ_PRI_MASK & GENMASK(4, 5 - pri_bits))
+
+#define PPI_NR 128
+
+static bool gicv5_cpuif_has_gcie(void)
+{
+ return this_cpu_has_cap(ARM64_HAS_GICV5_CPUIF);
+}
+
+struct gicv5_chip_data gicv5_global_data __read_mostly;
+
+static DEFINE_IDA(lpi_ida);
+static u32 num_lpis __ro_after_init;
+
+void __init gicv5_init_lpis(u32 lpis)
+{
+ num_lpis = lpis;
+}
+
+void __init gicv5_deinit_lpis(void)
+{
+ num_lpis = 0;
+}
+
+static int alloc_lpi(void)
+{
+ if (!num_lpis)
+ return -ENOSPC;
+
+ return ida_alloc_max(&lpi_ida, num_lpis - 1, GFP_KERNEL);
+}
+
+static void release_lpi(u32 lpi)
+{
+ ida_free(&lpi_ida, lpi);
+}
+
+int gicv5_alloc_lpi(void)
+{
+ return alloc_lpi();
+}
+
+void gicv5_free_lpi(u32 lpi)
+{
+ release_lpi(lpi);
+}
+
+static void gicv5_ppi_priority_init(void)
+{
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR0_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR1_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR2_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR3_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR4_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR5_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR6_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR7_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR8_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR9_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR10_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR11_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR12_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR13_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR14_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR15_EL1);
+
+ /*
+ * Context syncronization required to make sure system register writes
+ * effects are synchronised.
+ */
+ isb();
+}
+
+static void gicv5_hwirq_init(irq_hw_number_t hwirq, u8 priority, u8 hwirq_type)
+{
+ u64 cdpri, cdaff;
+ u16 iaffid;
+ int ret;
+
+ if (hwirq_type == GICV5_HWIRQ_TYPE_LPI || hwirq_type == GICV5_HWIRQ_TYPE_SPI) {
+ cdpri = FIELD_PREP(GICV5_GIC_CDPRI_PRIORITY_MASK, priority) |
+ FIELD_PREP(GICV5_GIC_CDPRI_TYPE_MASK, hwirq_type) |
+ FIELD_PREP(GICV5_GIC_CDPRI_ID_MASK, hwirq);
+ gic_insn(cdpri, CDPRI);
+
+ ret = gicv5_irs_cpu_to_iaffid(smp_processor_id(), &iaffid);
+
+ if (WARN_ON_ONCE(ret))
+ return;
+
+ cdaff = FIELD_PREP(GICV5_GIC_CDAFF_IAFFID_MASK, iaffid) |
+ FIELD_PREP(GICV5_GIC_CDAFF_TYPE_MASK, hwirq_type) |
+ FIELD_PREP(GICV5_GIC_CDAFF_ID_MASK, hwirq);
+ gic_insn(cdaff, CDAFF);
+ }
+}
+
+static void gicv5_ppi_irq_mask(struct irq_data *d)
+{
+ u64 hwirq_id_bit = BIT_ULL(d->hwirq % 64);
+
+ if (d->hwirq < 64)
+ sysreg_clear_set_s(SYS_ICC_PPI_ENABLER0_EL1, hwirq_id_bit, 0);
+ else
+ sysreg_clear_set_s(SYS_ICC_PPI_ENABLER1_EL1, hwirq_id_bit, 0);
+
+ /*
+ * We must ensure that the disable takes effect immediately to
+ * guarantee that the lazy-disabled IRQ mechanism works.
+ * A context synchronization event is required to guarantee it.
+ * Reference: I_ZLTKB/R_YRGMH GICv5 specification - section 2.9.1.
+ */
+ isb();
+}
+
+static void gicv5_iri_irq_mask(struct irq_data *d, u8 hwirq_type)
+{
+ u64 cddis;
+
+ cddis = FIELD_PREP(GICV5_GIC_CDDIS_ID_MASK, d->hwirq) |
+ FIELD_PREP(GICV5_GIC_CDDIS_TYPE_MASK, hwirq_type);
+
+ gic_insn(cddis, CDDIS);
+ /*
+ * We must make sure that GIC CDDIS write effects are propagated
+ * immediately to make sure the disable takes effect to guarantee
+ * that the lazy-disabled IRQ mechanism works.
+ * Rule R_XCLJC states that the effects of a GIC system instruction
+ * complete in finite time.
+ * The GSB ensures completion of the GIC instruction and prevents
+ * loads, stores and GIC instructions from executing part of their
+ * functionality before the GSB SYS.
+ */
+ gsb_sys();
+}
+
+static void gicv5_spi_irq_mask(struct irq_data *d)
+{
+ gicv5_iri_irq_mask(d, GICV5_HWIRQ_TYPE_SPI);
+}
+
+static void gicv5_lpi_irq_mask(struct irq_data *d)
+{
+ gicv5_iri_irq_mask(d, GICV5_HWIRQ_TYPE_LPI);
+}
+
+static void gicv5_ppi_irq_unmask(struct irq_data *d)
+{
+ u64 hwirq_id_bit = BIT_ULL(d->hwirq % 64);
+
+ if (d->hwirq < 64)
+ sysreg_clear_set_s(SYS_ICC_PPI_ENABLER0_EL1, 0, hwirq_id_bit);
+ else
+ sysreg_clear_set_s(SYS_ICC_PPI_ENABLER1_EL1, 0, hwirq_id_bit);
+ /*
+ * We must ensure that the enable takes effect in finite time - a
+ * context synchronization event is required to guarantee it, we
+ * can not take for granted that would happen (eg a core going straight
+ * into idle after enabling a PPI).
+ * Reference: I_ZLTKB/R_YRGMH GICv5 specification - section 2.9.1.
+ */
+ isb();
+}
+
+static void gicv5_iri_irq_unmask(struct irq_data *d, u8 hwirq_type)
+{
+ u64 cden;
+
+ cden = FIELD_PREP(GICV5_GIC_CDEN_ID_MASK, d->hwirq) |
+ FIELD_PREP(GICV5_GIC_CDEN_TYPE_MASK, hwirq_type);
+ /*
+ * Rule R_XCLJC states that the effects of a GIC system instruction
+ * complete in finite time and that's the only requirement when
+ * unmasking an SPI/LPI IRQ.
+ */
+ gic_insn(cden, CDEN);
+}
+
+static void gicv5_spi_irq_unmask(struct irq_data *d)
+{
+ gicv5_iri_irq_unmask(d, GICV5_HWIRQ_TYPE_SPI);
+}
+
+static void gicv5_lpi_irq_unmask(struct irq_data *d)
+{
+ gicv5_iri_irq_unmask(d, GICV5_HWIRQ_TYPE_LPI);
+}
+
+static void gicv5_hwirq_eoi(u32 hwirq_id, u8 hwirq_type)
+{
+ u64 cddi;
+
+ cddi = FIELD_PREP(GICV5_GIC_CDDI_ID_MASK, hwirq_id) |
+ FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, hwirq_type);
+
+ gic_insn(cddi, CDDI);
+
+ gic_insn(0, CDEOI);
+}
+
+static void gicv5_ppi_irq_eoi(struct irq_data *d)
+{
+ /* Skip deactivate for forwarded PPI interrupts */
+ if (irqd_is_forwarded_to_vcpu(d)) {
+ gic_insn(0, CDEOI);
+ return;
+ }
+
+ gicv5_hwirq_eoi(d->hwirq, GICV5_HWIRQ_TYPE_PPI);
+}
+
+static void gicv5_spi_irq_eoi(struct irq_data *d)
+{
+ gicv5_hwirq_eoi(d->hwirq, GICV5_HWIRQ_TYPE_SPI);
+}
+
+static void gicv5_lpi_irq_eoi(struct irq_data *d)
+{
+ gicv5_hwirq_eoi(d->hwirq, GICV5_HWIRQ_TYPE_LPI);
+}
+
+static int gicv5_iri_irq_set_affinity(struct irq_data *d,
+ const struct cpumask *mask_val,
+ bool force, u8 hwirq_type)
+{
+ int ret, cpuid;
+ u16 iaffid;
+ u64 cdaff;
+
+ if (force)
+ cpuid = cpumask_first(mask_val);
+ else
+ cpuid = cpumask_any_and(mask_val, cpu_online_mask);
+
+ ret = gicv5_irs_cpu_to_iaffid(cpuid, &iaffid);
+ if (ret)
+ return ret;
+
+ cdaff = FIELD_PREP(GICV5_GIC_CDAFF_IAFFID_MASK, iaffid) |
+ FIELD_PREP(GICV5_GIC_CDAFF_TYPE_MASK, hwirq_type) |
+ FIELD_PREP(GICV5_GIC_CDAFF_ID_MASK, d->hwirq);
+ gic_insn(cdaff, CDAFF);
+
+ irq_data_update_effective_affinity(d, cpumask_of(cpuid));
+
+ return IRQ_SET_MASK_OK_DONE;
+}
+
+static int gicv5_spi_irq_set_affinity(struct irq_data *d,
+ const struct cpumask *mask_val,
+ bool force)
+{
+ return gicv5_iri_irq_set_affinity(d, mask_val, force,
+ GICV5_HWIRQ_TYPE_SPI);
+}
+
+static int gicv5_lpi_irq_set_affinity(struct irq_data *d,
+ const struct cpumask *mask_val,
+ bool force)
+{
+ return gicv5_iri_irq_set_affinity(d, mask_val, force,
+ GICV5_HWIRQ_TYPE_LPI);
+}
+
+enum ppi_reg {
+ PPI_PENDING,
+ PPI_ACTIVE,
+ PPI_HM
+};
+
+static __always_inline u64 read_ppi_sysreg_s(unsigned int irq,
+ const enum ppi_reg which)
+{
+ switch (which) {
+ case PPI_PENDING:
+ return irq < 64 ? read_sysreg_s(SYS_ICC_PPI_SPENDR0_EL1) :
+ read_sysreg_s(SYS_ICC_PPI_SPENDR1_EL1);
+ case PPI_ACTIVE:
+ return irq < 64 ? read_sysreg_s(SYS_ICC_PPI_SACTIVER0_EL1) :
+ read_sysreg_s(SYS_ICC_PPI_SACTIVER1_EL1);
+ case PPI_HM:
+ return irq < 64 ? read_sysreg_s(SYS_ICC_PPI_HMR0_EL1) :
+ read_sysreg_s(SYS_ICC_PPI_HMR1_EL1);
+ default:
+ BUILD_BUG_ON(1);
+ }
+}
+
+static __always_inline void write_ppi_sysreg_s(unsigned int irq, bool set,
+ const enum ppi_reg which)
+{
+ u64 bit = BIT_ULL(irq % 64);
+
+ switch (which) {
+ case PPI_PENDING:
+ if (set) {
+ if (irq < 64)
+ write_sysreg_s(bit, SYS_ICC_PPI_SPENDR0_EL1);
+ else
+ write_sysreg_s(bit, SYS_ICC_PPI_SPENDR1_EL1);
+ } else {
+ if (irq < 64)
+ write_sysreg_s(bit, SYS_ICC_PPI_CPENDR0_EL1);
+ else
+ write_sysreg_s(bit, SYS_ICC_PPI_CPENDR1_EL1);
+ }
+ return;
+ case PPI_ACTIVE:
+ if (set) {
+ if (irq < 64)
+ write_sysreg_s(bit, SYS_ICC_PPI_SACTIVER0_EL1);
+ else
+ write_sysreg_s(bit, SYS_ICC_PPI_SACTIVER1_EL1);
+ } else {
+ if (irq < 64)
+ write_sysreg_s(bit, SYS_ICC_PPI_CACTIVER0_EL1);
+ else
+ write_sysreg_s(bit, SYS_ICC_PPI_CACTIVER1_EL1);
+ }
+ return;
+ default:
+ BUILD_BUG_ON(1);
+ }
+}
+
+static int gicv5_ppi_irq_get_irqchip_state(struct irq_data *d,
+ enum irqchip_irq_state which,
+ bool *state)
+{
+ u64 hwirq_id_bit = BIT_ULL(d->hwirq % 64);
+
+ switch (which) {
+ case IRQCHIP_STATE_PENDING:
+ *state = !!(read_ppi_sysreg_s(d->hwirq, PPI_PENDING) & hwirq_id_bit);
+ return 0;
+ case IRQCHIP_STATE_ACTIVE:
+ *state = !!(read_ppi_sysreg_s(d->hwirq, PPI_ACTIVE) & hwirq_id_bit);
+ return 0;
+ default:
+ pr_debug("Unexpected PPI irqchip state\n");
+ return -EINVAL;
+ }
+}
+
+static int gicv5_iri_irq_get_irqchip_state(struct irq_data *d,
+ enum irqchip_irq_state which,
+ bool *state, u8 hwirq_type)
+{
+ u64 icsr, cdrcfg;
+
+ cdrcfg = d->hwirq | FIELD_PREP(GICV5_GIC_CDRCFG_TYPE_MASK, hwirq_type);
+
+ gic_insn(cdrcfg, CDRCFG);
+ isb();
+ icsr = read_sysreg_s(SYS_ICC_ICSR_EL1);
+
+ if (FIELD_GET(ICC_ICSR_EL1_F, icsr)) {
+ pr_err("ICSR_EL1 is invalid\n");
+ return -EINVAL;
+ }
+
+ switch (which) {
+ case IRQCHIP_STATE_PENDING:
+ *state = !!(FIELD_GET(ICC_ICSR_EL1_Pending, icsr));
+ return 0;
+
+ case IRQCHIP_STATE_ACTIVE:
+ *state = !!(FIELD_GET(ICC_ICSR_EL1_Active, icsr));
+ return 0;
+
+ default:
+ pr_debug("Unexpected irqchip_irq_state\n");
+ return -EINVAL;
+ }
+}
+
+static int gicv5_spi_irq_get_irqchip_state(struct irq_data *d,
+ enum irqchip_irq_state which,
+ bool *state)
+{
+ return gicv5_iri_irq_get_irqchip_state(d, which, state,
+ GICV5_HWIRQ_TYPE_SPI);
+}
+
+static int gicv5_lpi_irq_get_irqchip_state(struct irq_data *d,
+ enum irqchip_irq_state which,
+ bool *state)
+{
+ return gicv5_iri_irq_get_irqchip_state(d, which, state,
+ GICV5_HWIRQ_TYPE_LPI);
+}
+
+static int gicv5_ppi_irq_set_irqchip_state(struct irq_data *d,
+ enum irqchip_irq_state which,
+ bool state)
+{
+ switch (which) {
+ case IRQCHIP_STATE_PENDING:
+ write_ppi_sysreg_s(d->hwirq, state, PPI_PENDING);
+ return 0;
+ case IRQCHIP_STATE_ACTIVE:
+ write_ppi_sysreg_s(d->hwirq, state, PPI_ACTIVE);
+ return 0;
+ default:
+ pr_debug("Unexpected PPI irqchip state\n");
+ return -EINVAL;
+ }
+}
+
+static void gicv5_iri_irq_write_pending_state(struct irq_data *d, bool state,
+ u8 hwirq_type)
+{
+ u64 cdpend;
+
+ cdpend = FIELD_PREP(GICV5_GIC_CDPEND_TYPE_MASK, hwirq_type) |
+ FIELD_PREP(GICV5_GIC_CDPEND_ID_MASK, d->hwirq) |
+ FIELD_PREP(GICV5_GIC_CDPEND_PENDING_MASK, state);
+
+ gic_insn(cdpend, CDPEND);
+}
+
+static void gicv5_spi_irq_write_pending_state(struct irq_data *d, bool state)
+{
+ gicv5_iri_irq_write_pending_state(d, state, GICV5_HWIRQ_TYPE_SPI);
+}
+
+static void gicv5_lpi_irq_write_pending_state(struct irq_data *d, bool state)
+{
+ gicv5_iri_irq_write_pending_state(d, state, GICV5_HWIRQ_TYPE_LPI);
+}
+
+static int gicv5_spi_irq_set_irqchip_state(struct irq_data *d,
+ enum irqchip_irq_state which,
+ bool state)
+{
+ switch (which) {
+ case IRQCHIP_STATE_PENDING:
+ gicv5_spi_irq_write_pending_state(d, state);
+ break;
+ default:
+ pr_debug("Unexpected irqchip_irq_state\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gicv5_lpi_irq_set_irqchip_state(struct irq_data *d,
+ enum irqchip_irq_state which,
+ bool state)
+{
+ switch (which) {
+ case IRQCHIP_STATE_PENDING:
+ gicv5_lpi_irq_write_pending_state(d, state);
+ break;
+
+ default:
+ pr_debug("Unexpected irqchip_irq_state\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gicv5_spi_irq_retrigger(struct irq_data *data)
+{
+ return !gicv5_spi_irq_set_irqchip_state(data, IRQCHIP_STATE_PENDING,
+ true);
+}
+
+static int gicv5_lpi_irq_retrigger(struct irq_data *data)
+{
+ return !gicv5_lpi_irq_set_irqchip_state(data, IRQCHIP_STATE_PENDING,
+ true);
+}
+
+static void gicv5_ipi_send_single(struct irq_data *d, unsigned int cpu)
+{
+ /* Mark the LPI pending */
+ irq_chip_retrigger_hierarchy(d);
+}
+
+static bool gicv5_ppi_irq_is_level(irq_hw_number_t hwirq)
+{
+ u64 bit = BIT_ULL(hwirq % 64);
+
+ return !!(read_ppi_sysreg_s(hwirq, PPI_HM) & bit);
+}
+
+static int gicv5_ppi_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
+{
+ if (vcpu)
+ irqd_set_forwarded_to_vcpu(d);
+ else
+ irqd_clr_forwarded_to_vcpu(d);
+
+ return 0;
+}
+
+static const struct irq_chip gicv5_ppi_irq_chip = {
+ .name = "GICv5-PPI",
+ .irq_mask = gicv5_ppi_irq_mask,
+ .irq_unmask = gicv5_ppi_irq_unmask,
+ .irq_eoi = gicv5_ppi_irq_eoi,
+ .irq_get_irqchip_state = gicv5_ppi_irq_get_irqchip_state,
+ .irq_set_irqchip_state = gicv5_ppi_irq_set_irqchip_state,
+ .irq_set_vcpu_affinity = gicv5_ppi_irq_set_vcpu_affinity,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_MASK_ON_SUSPEND,
+};
+
+static const struct irq_chip gicv5_spi_irq_chip = {
+ .name = "GICv5-SPI",
+ .irq_mask = gicv5_spi_irq_mask,
+ .irq_unmask = gicv5_spi_irq_unmask,
+ .irq_eoi = gicv5_spi_irq_eoi,
+ .irq_set_type = gicv5_spi_irq_set_type,
+ .irq_set_affinity = gicv5_spi_irq_set_affinity,
+ .irq_retrigger = gicv5_spi_irq_retrigger,
+ .irq_get_irqchip_state = gicv5_spi_irq_get_irqchip_state,
+ .irq_set_irqchip_state = gicv5_spi_irq_set_irqchip_state,
+ .flags = IRQCHIP_SET_TYPE_MASKED |
+ IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_MASK_ON_SUSPEND,
+};
+
+static const struct irq_chip gicv5_lpi_irq_chip = {
+ .name = "GICv5-LPI",
+ .irq_mask = gicv5_lpi_irq_mask,
+ .irq_unmask = gicv5_lpi_irq_unmask,
+ .irq_eoi = gicv5_lpi_irq_eoi,
+ .irq_set_affinity = gicv5_lpi_irq_set_affinity,
+ .irq_retrigger = gicv5_lpi_irq_retrigger,
+ .irq_get_irqchip_state = gicv5_lpi_irq_get_irqchip_state,
+ .irq_set_irqchip_state = gicv5_lpi_irq_set_irqchip_state,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_MASK_ON_SUSPEND,
+};
+
+static const struct irq_chip gicv5_ipi_irq_chip = {
+ .name = "GICv5-IPI",
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+ .irq_get_irqchip_state = irq_chip_get_parent_state,
+ .irq_set_irqchip_state = irq_chip_set_parent_state,
+ .ipi_send_single = gicv5_ipi_send_single,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_MASK_ON_SUSPEND,
+};
+
+static __always_inline int gicv5_irq_domain_translate(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ irq_hw_number_t *hwirq,
+ unsigned int *type,
+ const u8 hwirq_type)
+{
+ if (!is_of_node(fwspec->fwnode))
+ return -EINVAL;
+
+ if (fwspec->param_count < 3)
+ return -EINVAL;
+
+ if (fwspec->param[0] != hwirq_type)
+ return -EINVAL;
+
+ *hwirq = fwspec->param[1];
+
+ switch (hwirq_type) {
+ case GICV5_HWIRQ_TYPE_PPI:
+ /*
+ * Handling mode is hardcoded for PPIs, set the type using
+ * HW reported value.
+ */
+ *type = gicv5_ppi_irq_is_level(*hwirq) ? IRQ_TYPE_LEVEL_LOW :
+ IRQ_TYPE_EDGE_RISING;
+ break;
+ case GICV5_HWIRQ_TYPE_SPI:
+ *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
+ break;
+ default:
+ BUILD_BUG_ON(1);
+ }
+
+ return 0;
+}
+
+static int gicv5_irq_ppi_domain_translate(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ irq_hw_number_t *hwirq,
+ unsigned int *type)
+{
+ return gicv5_irq_domain_translate(d, fwspec, hwirq, type,
+ GICV5_HWIRQ_TYPE_PPI);
+}
+
+static int gicv5_irq_ppi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ unsigned int type = IRQ_TYPE_NONE;
+ struct irq_fwspec *fwspec = arg;
+ irq_hw_number_t hwirq;
+ int ret;
+
+ if (WARN_ON_ONCE(nr_irqs != 1))
+ return -EINVAL;
+
+ ret = gicv5_irq_ppi_domain_translate(domain, fwspec, &hwirq, &type);
+ if (ret)
+ return ret;
+
+ if (type & IRQ_TYPE_LEVEL_MASK)
+ irq_set_status_flags(virq, IRQ_LEVEL);
+
+ irq_set_percpu_devid(virq);
+ irq_domain_set_info(domain, virq, hwirq, &gicv5_ppi_irq_chip, NULL,
+ handle_percpu_devid_irq, NULL, NULL);
+
+ return 0;
+}
+
+static void gicv5_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d;
+
+ if (WARN_ON_ONCE(nr_irqs != 1))
+ return;
+
+ d = irq_domain_get_irq_data(domain, virq);
+
+ irq_set_handler(virq, NULL);
+ irq_domain_reset_irq_data(d);
+}
+
+static int gicv5_irq_ppi_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
+{
+ if (fwspec->fwnode != d->fwnode)
+ return 0;
+
+ if (fwspec->param[0] != GICV5_HWIRQ_TYPE_PPI)
+ return 0;
+
+ return (d == gicv5_global_data.ppi_domain);
+}
+
+static const struct irq_domain_ops gicv5_irq_ppi_domain_ops = {
+ .translate = gicv5_irq_ppi_domain_translate,
+ .alloc = gicv5_irq_ppi_domain_alloc,
+ .free = gicv5_irq_domain_free,
+ .select = gicv5_irq_ppi_domain_select
+};
+
+static int gicv5_irq_spi_domain_translate(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ irq_hw_number_t *hwirq,
+ unsigned int *type)
+{
+ return gicv5_irq_domain_translate(d, fwspec, hwirq, type,
+ GICV5_HWIRQ_TYPE_SPI);
+}
+
+static int gicv5_irq_spi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct gicv5_irs_chip_data *chip_data;
+ unsigned int type = IRQ_TYPE_NONE;
+ struct irq_fwspec *fwspec = arg;
+ struct irq_data *irqd;
+ irq_hw_number_t hwirq;
+ int ret;
+
+ if (WARN_ON_ONCE(nr_irqs != 1))
+ return -EINVAL;
+
+ ret = gicv5_irq_spi_domain_translate(domain, fwspec, &hwirq, &type);
+ if (ret)
+ return ret;
+
+ irqd = irq_desc_get_irq_data(irq_to_desc(virq));
+ chip_data = gicv5_irs_lookup_by_spi_id(hwirq);
+
+ irq_domain_set_info(domain, virq, hwirq, &gicv5_spi_irq_chip, chip_data,
+ handle_fasteoi_irq, NULL, NULL);
+ irq_set_probe(virq);
+ irqd_set_single_target(irqd);
+
+ gicv5_hwirq_init(hwirq, GICV5_IRQ_PRI_MI, GICV5_HWIRQ_TYPE_SPI);
+
+ return 0;
+}
+
+static int gicv5_irq_spi_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
+{
+ if (fwspec->fwnode != d->fwnode)
+ return 0;
+
+ if (fwspec->param[0] != GICV5_HWIRQ_TYPE_SPI)
+ return 0;
+
+ return (d == gicv5_global_data.spi_domain);
+}
+
+static const struct irq_domain_ops gicv5_irq_spi_domain_ops = {
+ .translate = gicv5_irq_spi_domain_translate,
+ .alloc = gicv5_irq_spi_domain_alloc,
+ .free = gicv5_irq_domain_free,
+ .select = gicv5_irq_spi_domain_select
+};
+
+static void gicv5_lpi_config_reset(struct irq_data *d)
+{
+ u64 cdhm;
+
+ /*
+ * Reset LPIs handling mode to edge by default and clear pending
+ * state to make sure we start the LPI with a clean state from
+ * previous incarnations.
+ */
+ cdhm = FIELD_PREP(GICV5_GIC_CDHM_HM_MASK, 0) |
+ FIELD_PREP(GICV5_GIC_CDHM_TYPE_MASK, GICV5_HWIRQ_TYPE_LPI) |
+ FIELD_PREP(GICV5_GIC_CDHM_ID_MASK, d->hwirq);
+ gic_insn(cdhm, CDHM);
+
+ gicv5_lpi_irq_write_pending_state(d, false);
+}
+
+static int gicv5_irq_lpi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ irq_hw_number_t hwirq;
+ struct irq_data *irqd;
+ u32 *lpi = arg;
+ int ret;
+
+ if (WARN_ON_ONCE(nr_irqs != 1))
+ return -EINVAL;
+
+ hwirq = *lpi;
+
+ irqd = irq_domain_get_irq_data(domain, virq);
+
+ irq_domain_set_info(domain, virq, hwirq, &gicv5_lpi_irq_chip, NULL,
+ handle_fasteoi_irq, NULL, NULL);
+ irqd_set_single_target(irqd);
+
+ ret = gicv5_irs_iste_alloc(hwirq);
+ if (ret < 0)
+ return ret;
+
+ gicv5_hwirq_init(hwirq, GICV5_IRQ_PRI_MI, GICV5_HWIRQ_TYPE_LPI);
+ gicv5_lpi_config_reset(irqd);
+
+ return 0;
+}
+
+static const struct irq_domain_ops gicv5_irq_lpi_domain_ops = {
+ .alloc = gicv5_irq_lpi_domain_alloc,
+ .free = gicv5_irq_domain_free,
+};
+
+void __init gicv5_init_lpi_domain(void)
+{
+ struct irq_domain *d;
+
+ d = irq_domain_create_tree(NULL, &gicv5_irq_lpi_domain_ops, NULL);
+ gicv5_global_data.lpi_domain = d;
+}
+
+void __init gicv5_free_lpi_domain(void)
+{
+ irq_domain_remove(gicv5_global_data.lpi_domain);
+ gicv5_global_data.lpi_domain = NULL;
+}
+
+static int gicv5_irq_ipi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct irq_data *irqd;
+ int ret, i;
+ u32 lpi;
+
+ for (i = 0; i < nr_irqs; i++) {
+ ret = gicv5_alloc_lpi();
+ if (ret < 0)
+ return ret;
+
+ lpi = ret;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, &lpi);
+ if (ret) {
+ gicv5_free_lpi(lpi);
+ return ret;
+ }
+
+ irqd = irq_domain_get_irq_data(domain, virq + i);
+
+ irq_domain_set_hwirq_and_chip(domain, virq + i, i,
+ &gicv5_ipi_irq_chip, NULL);
+
+ irqd_set_single_target(irqd);
+
+ irq_set_handler(virq + i, handle_percpu_irq);
+ }
+
+ return 0;
+}
+
+static void gicv5_irq_ipi_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d;
+ unsigned int i;
+
+ for (i = 0; i < nr_irqs; i++) {
+ d = irq_domain_get_irq_data(domain, virq + i);
+
+ if (!d)
+ return;
+
+ gicv5_free_lpi(d->parent_data->hwirq);
+
+ irq_set_handler(virq + i, NULL);
+ irq_domain_reset_irq_data(d);
+ irq_domain_free_irqs_parent(domain, virq + i, 1);
+ }
+}
+
+static const struct irq_domain_ops gicv5_irq_ipi_domain_ops = {
+ .alloc = gicv5_irq_ipi_domain_alloc,
+ .free = gicv5_irq_ipi_domain_free,
+};
+
+static void handle_irq_per_domain(u32 hwirq)
+{
+ u8 hwirq_type = FIELD_GET(GICV5_HWIRQ_TYPE, hwirq);
+ u32 hwirq_id = FIELD_GET(GICV5_HWIRQ_ID, hwirq);
+ struct irq_domain *domain;
+
+ switch (hwirq_type) {
+ case GICV5_HWIRQ_TYPE_PPI:
+ domain = gicv5_global_data.ppi_domain;
+ break;
+ case GICV5_HWIRQ_TYPE_SPI:
+ domain = gicv5_global_data.spi_domain;
+ break;
+ case GICV5_HWIRQ_TYPE_LPI:
+ domain = gicv5_global_data.lpi_domain;
+ break;
+ default:
+ pr_err_once("Unknown IRQ type, bail out\n");
+ return;
+ }
+
+ if (generic_handle_domain_irq(domain, hwirq_id)) {
+ pr_err_once("Could not handle, hwirq = 0x%x", hwirq_id);
+ gicv5_hwirq_eoi(hwirq_id, hwirq_type);
+ }
+}
+
+static void __exception_irq_entry gicv5_handle_irq(struct pt_regs *regs)
+{
+ bool valid;
+ u32 hwirq;
+ u64 ia;
+
+ ia = gicr_insn(CDIA);
+ valid = GICV5_GICR_CDIA_VALID(ia);
+
+ if (!valid)
+ return;
+
+ /*
+ * Ensure that the CDIA instruction effects (ie IRQ activation) are
+ * completed before handling the interrupt.
+ */
+ gsb_ack();
+
+ /*
+ * Ensure instruction ordering between an acknowledgment and subsequent
+ * instructions in the IRQ handler using an ISB.
+ */
+ isb();
+
+ hwirq = FIELD_GET(GICV5_HWIRQ_INTID, ia);
+
+ handle_irq_per_domain(hwirq);
+}
+
+static void gicv5_cpu_disable_interrupts(void)
+{
+ u64 cr0;
+
+ cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 0);
+ write_sysreg_s(cr0, SYS_ICC_CR0_EL1);
+}
+
+static void gicv5_cpu_enable_interrupts(void)
+{
+ u64 cr0, pcr;
+
+ write_sysreg_s(0, SYS_ICC_PPI_ENABLER0_EL1);
+ write_sysreg_s(0, SYS_ICC_PPI_ENABLER1_EL1);
+
+ gicv5_ppi_priority_init();
+
+ pcr = FIELD_PREP(ICC_PCR_EL1_PRIORITY, GICV5_IRQ_PRI_MI);
+ write_sysreg_s(pcr, SYS_ICC_PCR_EL1);
+
+ cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 1);
+ write_sysreg_s(cr0, SYS_ICC_CR0_EL1);
+}
+
+static int base_ipi_virq;
+
+static int gicv5_starting_cpu(unsigned int cpu)
+{
+ if (WARN(!gicv5_cpuif_has_gcie(),
+ "GICv5 system components present but CPU does not have FEAT_GCIE"))
+ return -ENODEV;
+
+ gicv5_cpu_enable_interrupts();
+
+ return gicv5_irs_register_cpu(cpu);
+}
+
+static void __init gicv5_smp_init(void)
+{
+ unsigned int num_ipis = GICV5_IPIS_PER_CPU * nr_cpu_ids;
+
+ cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
+ "irqchip/arm/gicv5:starting",
+ gicv5_starting_cpu, NULL);
+
+ base_ipi_virq = irq_domain_alloc_irqs(gicv5_global_data.ipi_domain,
+ num_ipis, NUMA_NO_NODE, NULL);
+ if (WARN(base_ipi_virq <= 0, "IPI IRQ allocation was not successful"))
+ return;
+
+ set_smp_ipi_range_percpu(base_ipi_virq, GICV5_IPIS_PER_CPU, nr_cpu_ids);
+}
+
+static void __init gicv5_free_domains(void)
+{
+ if (gicv5_global_data.ppi_domain)
+ irq_domain_remove(gicv5_global_data.ppi_domain);
+ if (gicv5_global_data.spi_domain)
+ irq_domain_remove(gicv5_global_data.spi_domain);
+ if (gicv5_global_data.ipi_domain)
+ irq_domain_remove(gicv5_global_data.ipi_domain);
+
+ gicv5_global_data.ppi_domain = NULL;
+ gicv5_global_data.spi_domain = NULL;
+ gicv5_global_data.ipi_domain = NULL;
+}
+
+static int __init gicv5_init_domains(struct fwnode_handle *handle)
+{
+ u32 spi_count = gicv5_global_data.global_spi_count;
+ struct irq_domain *d;
+
+ d = irq_domain_create_linear(handle, PPI_NR, &gicv5_irq_ppi_domain_ops, NULL);
+ if (!d)
+ return -ENOMEM;
+
+ irq_domain_update_bus_token(d, DOMAIN_BUS_WIRED);
+ gicv5_global_data.ppi_domain = d;
+
+ if (spi_count) {
+ d = irq_domain_create_linear(handle, spi_count,
+ &gicv5_irq_spi_domain_ops, NULL);
+
+ if (!d) {
+ gicv5_free_domains();
+ return -ENOMEM;
+ }
+
+ gicv5_global_data.spi_domain = d;
+ irq_domain_update_bus_token(d, DOMAIN_BUS_WIRED);
+ }
+
+ if (!WARN(!gicv5_global_data.lpi_domain,
+ "LPI domain uninitialized, can't set up IPIs")) {
+ d = irq_domain_create_hierarchy(gicv5_global_data.lpi_domain,
+ 0, GICV5_IPIS_PER_CPU * nr_cpu_ids,
+ NULL, &gicv5_irq_ipi_domain_ops,
+ NULL);
+
+ if (!d) {
+ gicv5_free_domains();
+ return -ENOMEM;
+ }
+ gicv5_global_data.ipi_domain = d;
+ }
+ gicv5_global_data.fwnode = handle;
+
+ return 0;
+}
+
+static void gicv5_set_cpuif_pribits(void)
+{
+ u64 icc_idr0 = read_sysreg_s(SYS_ICC_IDR0_EL1);
+
+ switch (FIELD_GET(ICC_IDR0_EL1_PRI_BITS, icc_idr0)) {
+ case ICC_IDR0_EL1_PRI_BITS_4BITS:
+ gicv5_global_data.cpuif_pri_bits = 4;
+ break;
+ case ICC_IDR0_EL1_PRI_BITS_5BITS:
+ gicv5_global_data.cpuif_pri_bits = 5;
+ break;
+ default:
+ pr_err("Unexpected ICC_IDR0_EL1_PRI_BITS value, default to 4");
+ gicv5_global_data.cpuif_pri_bits = 4;
+ break;
+ }
+}
+
+static void gicv5_set_cpuif_idbits(void)
+{
+ u32 icc_idr0 = read_sysreg_s(SYS_ICC_IDR0_EL1);
+
+ switch (FIELD_GET(ICC_IDR0_EL1_ID_BITS, icc_idr0)) {
+ case ICC_IDR0_EL1_ID_BITS_16BITS:
+ gicv5_global_data.cpuif_id_bits = 16;
+ break;
+ case ICC_IDR0_EL1_ID_BITS_24BITS:
+ gicv5_global_data.cpuif_id_bits = 24;
+ break;
+ default:
+ pr_err("Unexpected ICC_IDR0_EL1_ID_BITS value, default to 16");
+ gicv5_global_data.cpuif_id_bits = 16;
+ break;
+ }
+}
+
+#ifdef CONFIG_KVM
+static struct gic_kvm_info gic_v5_kvm_info __initdata;
+
+static bool __init gicv5_cpuif_has_gcie_legacy(void)
+{
+ u64 idr0 = read_sysreg_s(SYS_ICC_IDR0_EL1);
+ return !!FIELD_GET(ICC_IDR0_EL1_GCIE_LEGACY, idr0);
+}
+
+static void __init gic_of_setup_kvm_info(struct device_node *node)
+{
+ gic_v5_kvm_info.type = GIC_V5;
+ gic_v5_kvm_info.has_gcie_v3_compat = gicv5_cpuif_has_gcie_legacy();
+
+ /* GIC Virtual CPU interface maintenance interrupt */
+ gic_v5_kvm_info.no_maint_irq_mask = false;
+ gic_v5_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
+ if (!gic_v5_kvm_info.maint_irq) {
+ pr_warn("cannot find GICv5 virtual CPU interface maintenance interrupt\n");
+ return;
+ }
+
+ vgic_set_kvm_info(&gic_v5_kvm_info);
+}
+#else
+static inline void __init gic_of_setup_kvm_info(struct device_node *node)
+{
+}
+#endif // CONFIG_KVM
+
+static int __init gicv5_of_init(struct device_node *node, struct device_node *parent)
+{
+ int ret = gicv5_irs_of_probe(node);
+ if (ret)
+ return ret;
+
+ ret = gicv5_init_domains(of_fwnode_handle(node));
+ if (ret)
+ goto out_irs;
+
+ gicv5_set_cpuif_pribits();
+ gicv5_set_cpuif_idbits();
+
+ pri_bits = min_not_zero(gicv5_global_data.cpuif_pri_bits,
+ gicv5_global_data.irs_pri_bits);
+
+ ret = gicv5_starting_cpu(smp_processor_id());
+ if (ret)
+ goto out_dom;
+
+ ret = set_handle_irq(gicv5_handle_irq);
+ if (ret)
+ goto out_int;
+
+ ret = gicv5_irs_enable();
+ if (ret)
+ goto out_int;
+
+ gicv5_smp_init();
+
+ gicv5_irs_its_probe();
+
+ gic_of_setup_kvm_info(node);
+
+ return 0;
+
+out_int:
+ gicv5_cpu_disable_interrupts();
+out_dom:
+ gicv5_free_domains();
+out_irs:
+ gicv5_irs_remove();
+
+ return ret;
+}
+IRQCHIP_DECLARE(gic_v5, "arm,gic-v5", gicv5_of_init);
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 6503573557fd..1269ab8eb726 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -54,7 +54,7 @@
static void gic_check_cpu_features(void)
{
- WARN_TAINT_ONCE(this_cpu_has_cap(ARM64_HAS_GIC_CPUIF_SYSREGS),
+ WARN_TAINT_ONCE(this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF),
TAINT_CPU_OUT_OF_SPEC,
"GICv3 system registers enabled, broken firmware!\n");
}
diff --git a/drivers/irqchip/irq-msi-lib.c b/drivers/irqchip/irq-msi-lib.c
index 246c30205af4..454c7f16dd4d 100644
--- a/drivers/irqchip/irq-msi-lib.c
+++ b/drivers/irqchip/irq-msi-lib.c
@@ -133,11 +133,14 @@ int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec,
{
const struct msi_parent_ops *ops = d->msi_parent_ops;
u32 busmask = BIT(bus_token);
+ struct fwnode_handle *fwh;
if (!ops)
return 0;
- if (fwspec->fwnode != d->fwnode || fwspec->param_count != 0)
+ fwh = d->flags & IRQ_DOMAIN_FLAG_FWNODE_PARENT ? fwnode_get_parent(fwspec->fwnode)
+ : fwspec->fwnode;
+ if (fwh != d->fwnode || fwspec->param_count != 0)
return 0;
/* Handle pure domain searches */
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index f8ad79b9b1c9..74aaea61de13 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -670,8 +670,20 @@ err:
}
}
-static u32 __of_msi_map_id(struct device *dev, struct device_node **np,
- u32 id_in)
+/**
+ * of_msi_xlate - map a MSI ID and find relevant MSI controller node
+ * @dev: device for which the mapping is to be done.
+ * @msi_np: Pointer to store the MSI controller node
+ * @id_in: Device ID.
+ *
+ * Walk up the device hierarchy looking for devices with a "msi-map"
+ * property. If found, apply the mapping to @id_in. @msi_np pointed
+ * value must be NULL on entry, if an MSI controller is found @msi_np is
+ * initialized to the MSI controller node with a reference held.
+ *
+ * Returns: The mapped MSI id.
+ */
+u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in)
{
struct device *parent_dev;
u32 id_out = id_in;
@@ -682,7 +694,7 @@ static u32 __of_msi_map_id(struct device *dev, struct device_node **np,
*/
for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent)
if (!of_map_id(parent_dev->of_node, id_in, "msi-map",
- "msi-map-mask", np, &id_out))
+ "msi-map-mask", msi_np, &id_out))
break;
return id_out;
}
@@ -700,7 +712,7 @@ static u32 __of_msi_map_id(struct device *dev, struct device_node **np,
*/
u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in)
{
- return __of_msi_map_id(dev, &msi_np, id_in);
+ return of_msi_xlate(dev, &msi_np, id_in);
}
/**
@@ -719,7 +731,7 @@ struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 id,
{
struct device_node *np = NULL;
- __of_msi_map_id(dev, &np, id);
+ of_msi_xlate(dev, &np, id);
return irq_find_matching_host(np, bus_token);
}
diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c
index c05152733993..8a6e80d3963a 100644
--- a/drivers/pci/msi/irqdomain.c
+++ b/drivers/pci/msi/irqdomain.c
@@ -428,6 +428,26 @@ u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev)
}
/**
+ * pci_msi_map_rid_ctlr_node - Get the MSI controller node and MSI requester id (RID)
+ * @pdev: The PCI device
+ * @node: Pointer to store the MSI controller device node
+ *
+ * Use the firmware data to find the MSI controller node for @pdev.
+ * If found map the RID and initialize @node with it. @node value must
+ * be set to NULL on entry.
+ *
+ * Returns: The RID.
+ */
+u32 pci_msi_map_rid_ctlr_node(struct pci_dev *pdev, struct device_node **node)
+{
+ u32 rid = pci_dev_id(pdev);
+
+ pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
+
+ return of_msi_xlate(&pdev->dev, node, rid);
+}
+
+/**
* pci_msi_get_device_domain - Get the MSI domain for a given PCI device
* @pdev: The PCI device
*
diff --git a/include/asm-generic/msi.h b/include/asm-generic/msi.h
index 124c734ca5d9..92cca4b23f13 100644
--- a/include/asm-generic/msi.h
+++ b/include/asm-generic/msi.h
@@ -33,6 +33,7 @@ typedef struct msi_alloc_info {
/* Device generating MSIs is proxying for another device */
#define MSI_ALLOC_FLAGS_PROXY_DEVICE (1UL << 0)
+#define MSI_ALLOC_FLAGS_FIXED_MSG_DATA (1UL << 1)
#define GENERIC_MSI_DOMAIN_OPS 1
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 4a34f7f0a864..5c293e0ff5c1 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -38,6 +38,7 @@
enum vgic_type {
VGIC_V2, /* Good ol' GICv2 */
VGIC_V3, /* New fancy GICv3 */
+ VGIC_V5, /* Newer, fancier GICv5 */
};
/* same for all guests, as depending only on the _host's_ GIC model */
@@ -77,9 +78,12 @@ struct vgic_global {
/* Pseudo GICv3 from outer space */
bool no_hw_deactivation;
- /* GIC system register CPU interface */
+ /* GICv3 system register CPU interface */
struct static_key_false gicv3_cpuif;
+ /* GICv3 compat mode on a GICv5 host */
+ bool has_gcie_v3_compat;
+
u32 ich_vtr_el2;
};
diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h
new file mode 100644
index 000000000000..68ddcdb1cec5
--- /dev/null
+++ b/include/linux/irqchip/arm-gic-v5.h
@@ -0,0 +1,394 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2025 ARM Limited, All Rights Reserved.
+ */
+#ifndef __LINUX_IRQCHIP_ARM_GIC_V5_H
+#define __LINUX_IRQCHIP_ARM_GIC_V5_H
+
+#include <linux/iopoll.h>
+
+#include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <asm/sysreg.h>
+
+#define GICV5_IPIS_PER_CPU MAX_IPI
+
+/*
+ * INTID handling
+ */
+#define GICV5_HWIRQ_ID GENMASK(23, 0)
+#define GICV5_HWIRQ_TYPE GENMASK(31, 29)
+#define GICV5_HWIRQ_INTID GENMASK_ULL(31, 0)
+
+#define GICV5_HWIRQ_TYPE_PPI UL(0x1)
+#define GICV5_HWIRQ_TYPE_LPI UL(0x2)
+#define GICV5_HWIRQ_TYPE_SPI UL(0x3)
+
+/*
+ * Tables attributes
+ */
+#define GICV5_NO_READ_ALLOC 0b0
+#define GICV5_READ_ALLOC 0b1
+#define GICV5_NO_WRITE_ALLOC 0b0
+#define GICV5_WRITE_ALLOC 0b1
+
+#define GICV5_NON_CACHE 0b00
+#define GICV5_WB_CACHE 0b01
+#define GICV5_WT_CACHE 0b10
+
+#define GICV5_NON_SHARE 0b00
+#define GICV5_OUTER_SHARE 0b10
+#define GICV5_INNER_SHARE 0b11
+
+/*
+ * IRS registers and tables structures
+ */
+#define GICV5_IRS_IDR1 0x0004
+#define GICV5_IRS_IDR2 0x0008
+#define GICV5_IRS_IDR5 0x0014
+#define GICV5_IRS_IDR6 0x0018
+#define GICV5_IRS_IDR7 0x001c
+#define GICV5_IRS_CR0 0x0080
+#define GICV5_IRS_CR1 0x0084
+#define GICV5_IRS_SYNCR 0x00c0
+#define GICV5_IRS_SYNC_STATUSR 0x00c4
+#define GICV5_IRS_SPI_SELR 0x0108
+#define GICV5_IRS_SPI_CFGR 0x0114
+#define GICV5_IRS_SPI_STATUSR 0x0118
+#define GICV5_IRS_PE_SELR 0x0140
+#define GICV5_IRS_PE_STATUSR 0x0144
+#define GICV5_IRS_PE_CR0 0x0148
+#define GICV5_IRS_IST_BASER 0x0180
+#define GICV5_IRS_IST_CFGR 0x0190
+#define GICV5_IRS_IST_STATUSR 0x0194
+#define GICV5_IRS_MAP_L2_ISTR 0x01c0
+
+#define GICV5_IRS_IDR1_PRIORITY_BITS GENMASK(22, 20)
+#define GICV5_IRS_IDR1_IAFFID_BITS GENMASK(19, 16)
+
+#define GICV5_IRS_IDR1_PRIORITY_BITS_1BITS 0b000
+#define GICV5_IRS_IDR1_PRIORITY_BITS_2BITS 0b001
+#define GICV5_IRS_IDR1_PRIORITY_BITS_3BITS 0b010
+#define GICV5_IRS_IDR1_PRIORITY_BITS_4BITS 0b011
+#define GICV5_IRS_IDR1_PRIORITY_BITS_5BITS 0b100
+
+#define GICV5_IRS_IDR2_ISTMD_SZ GENMASK(19, 15)
+#define GICV5_IRS_IDR2_ISTMD BIT(14)
+#define GICV5_IRS_IDR2_IST_L2SZ GENMASK(13, 11)
+#define GICV5_IRS_IDR2_IST_LEVELS BIT(10)
+#define GICV5_IRS_IDR2_MIN_LPI_ID_BITS GENMASK(9, 6)
+#define GICV5_IRS_IDR2_LPI BIT(5)
+#define GICV5_IRS_IDR2_ID_BITS GENMASK(4, 0)
+
+#define GICV5_IRS_IDR5_SPI_RANGE GENMASK(24, 0)
+#define GICV5_IRS_IDR6_SPI_IRS_RANGE GENMASK(24, 0)
+#define GICV5_IRS_IDR7_SPI_BASE GENMASK(23, 0)
+
+#define GICV5_IRS_IST_L2SZ_SUPPORT_4KB(r) FIELD_GET(BIT(11), (r))
+#define GICV5_IRS_IST_L2SZ_SUPPORT_16KB(r) FIELD_GET(BIT(12), (r))
+#define GICV5_IRS_IST_L2SZ_SUPPORT_64KB(r) FIELD_GET(BIT(13), (r))
+
+#define GICV5_IRS_CR0_IDLE BIT(1)
+#define GICV5_IRS_CR0_IRSEN BIT(0)
+
+#define GICV5_IRS_CR1_VPED_WA BIT(15)
+#define GICV5_IRS_CR1_VPED_RA BIT(14)
+#define GICV5_IRS_CR1_VMD_WA BIT(13)
+#define GICV5_IRS_CR1_VMD_RA BIT(12)
+#define GICV5_IRS_CR1_VPET_WA BIT(11)
+#define GICV5_IRS_CR1_VPET_RA BIT(10)
+#define GICV5_IRS_CR1_VMT_WA BIT(9)
+#define GICV5_IRS_CR1_VMT_RA BIT(8)
+#define GICV5_IRS_CR1_IST_WA BIT(7)
+#define GICV5_IRS_CR1_IST_RA BIT(6)
+#define GICV5_IRS_CR1_IC GENMASK(5, 4)
+#define GICV5_IRS_CR1_OC GENMASK(3, 2)
+#define GICV5_IRS_CR1_SH GENMASK(1, 0)
+
+#define GICV5_IRS_SYNCR_SYNC BIT(31)
+
+#define GICV5_IRS_SYNC_STATUSR_IDLE BIT(0)
+
+#define GICV5_IRS_SPI_STATUSR_V BIT(1)
+#define GICV5_IRS_SPI_STATUSR_IDLE BIT(0)
+
+#define GICV5_IRS_SPI_SELR_ID GENMASK(23, 0)
+
+#define GICV5_IRS_SPI_CFGR_TM BIT(0)
+
+#define GICV5_IRS_PE_SELR_IAFFID GENMASK(15, 0)
+
+#define GICV5_IRS_PE_STATUSR_V BIT(1)
+#define GICV5_IRS_PE_STATUSR_IDLE BIT(0)
+
+#define GICV5_IRS_PE_CR0_DPS BIT(0)
+
+#define GICV5_IRS_IST_STATUSR_IDLE BIT(0)
+
+#define GICV5_IRS_IST_CFGR_STRUCTURE BIT(16)
+#define GICV5_IRS_IST_CFGR_ISTSZ GENMASK(8, 7)
+#define GICV5_IRS_IST_CFGR_L2SZ GENMASK(6, 5)
+#define GICV5_IRS_IST_CFGR_LPI_ID_BITS GENMASK(4, 0)
+
+#define GICV5_IRS_IST_CFGR_STRUCTURE_LINEAR 0b0
+#define GICV5_IRS_IST_CFGR_STRUCTURE_TWO_LEVEL 0b1
+
+#define GICV5_IRS_IST_CFGR_ISTSZ_4 0b00
+#define GICV5_IRS_IST_CFGR_ISTSZ_8 0b01
+#define GICV5_IRS_IST_CFGR_ISTSZ_16 0b10
+
+#define GICV5_IRS_IST_CFGR_L2SZ_4K 0b00
+#define GICV5_IRS_IST_CFGR_L2SZ_16K 0b01
+#define GICV5_IRS_IST_CFGR_L2SZ_64K 0b10
+
+#define GICV5_IRS_IST_BASER_ADDR_MASK GENMASK_ULL(55, 6)
+#define GICV5_IRS_IST_BASER_VALID BIT_ULL(0)
+
+#define GICV5_IRS_MAP_L2_ISTR_ID GENMASK(23, 0)
+
+#define GICV5_ISTL1E_VALID BIT_ULL(0)
+
+#define GICV5_ISTL1E_L2_ADDR_MASK GENMASK_ULL(55, 12)
+
+/*
+ * ITS registers and tables structures
+ */
+#define GICV5_ITS_IDR1 0x0004
+#define GICV5_ITS_IDR2 0x0008
+#define GICV5_ITS_CR0 0x0080
+#define GICV5_ITS_CR1 0x0084
+#define GICV5_ITS_DT_BASER 0x00c0
+#define GICV5_ITS_DT_CFGR 0x00d0
+#define GICV5_ITS_DIDR 0x0100
+#define GICV5_ITS_EIDR 0x0108
+#define GICV5_ITS_INV_EVENTR 0x010c
+#define GICV5_ITS_INV_DEVICER 0x0110
+#define GICV5_ITS_STATUSR 0x0120
+#define GICV5_ITS_SYNCR 0x0140
+#define GICV5_ITS_SYNC_STATUSR 0x0148
+
+#define GICV5_ITS_IDR1_L2SZ GENMASK(10, 8)
+#define GICV5_ITS_IDR1_ITT_LEVELS BIT(7)
+#define GICV5_ITS_IDR1_DT_LEVELS BIT(6)
+#define GICV5_ITS_IDR1_DEVICEID_BITS GENMASK(5, 0)
+
+#define GICV5_ITS_IDR1_L2SZ_SUPPORT_4KB(r) FIELD_GET(BIT(8), (r))
+#define GICV5_ITS_IDR1_L2SZ_SUPPORT_16KB(r) FIELD_GET(BIT(9), (r))
+#define GICV5_ITS_IDR1_L2SZ_SUPPORT_64KB(r) FIELD_GET(BIT(10), (r))
+
+#define GICV5_ITS_IDR2_XDMN_EVENTs GENMASK(6, 5)
+#define GICV5_ITS_IDR2_EVENTID_BITS GENMASK(4, 0)
+
+#define GICV5_ITS_CR0_IDLE BIT(1)
+#define GICV5_ITS_CR0_ITSEN BIT(0)
+
+#define GICV5_ITS_CR1_ITT_RA BIT(7)
+#define GICV5_ITS_CR1_DT_RA BIT(6)
+#define GICV5_ITS_CR1_IC GENMASK(5, 4)
+#define GICV5_ITS_CR1_OC GENMASK(3, 2)
+#define GICV5_ITS_CR1_SH GENMASK(1, 0)
+
+#define GICV5_ITS_DT_CFGR_STRUCTURE BIT(16)
+#define GICV5_ITS_DT_CFGR_L2SZ GENMASK(7, 6)
+#define GICV5_ITS_DT_CFGR_DEVICEID_BITS GENMASK(5, 0)
+
+#define GICV5_ITS_DT_BASER_ADDR_MASK GENMASK_ULL(55, 3)
+
+#define GICV5_ITS_INV_DEVICER_I BIT(31)
+#define GICV5_ITS_INV_DEVICER_EVENTID_BITS GENMASK(5, 1)
+#define GICV5_ITS_INV_DEVICER_L1 BIT(0)
+
+#define GICV5_ITS_DIDR_DEVICEID GENMASK_ULL(31, 0)
+
+#define GICV5_ITS_EIDR_EVENTID GENMASK(15, 0)
+
+#define GICV5_ITS_INV_EVENTR_I BIT(31)
+#define GICV5_ITS_INV_EVENTR_ITT_L2SZ GENMASK(2, 1)
+#define GICV5_ITS_INV_EVENTR_L1 BIT(0)
+
+#define GICV5_ITS_STATUSR_IDLE BIT(0)
+
+#define GICV5_ITS_SYNCR_SYNC BIT_ULL(63)
+#define GICV5_ITS_SYNCR_SYNCALL BIT_ULL(32)
+#define GICV5_ITS_SYNCR_DEVICEID GENMASK_ULL(31, 0)
+
+#define GICV5_ITS_SYNC_STATUSR_IDLE BIT(0)
+
+#define GICV5_DTL1E_VALID BIT_ULL(0)
+/* Note that there is no shift for the address by design */
+#define GICV5_DTL1E_L2_ADDR_MASK GENMASK_ULL(55, 3)
+#define GICV5_DTL1E_SPAN GENMASK_ULL(63, 60)
+
+#define GICV5_DTL2E_VALID BIT_ULL(0)
+#define GICV5_DTL2E_ITT_L2SZ GENMASK_ULL(2, 1)
+/* Note that there is no shift for the address by design */
+#define GICV5_DTL2E_ITT_ADDR_MASK GENMASK_ULL(55, 3)
+#define GICV5_DTL2E_ITT_DSWE BIT_ULL(57)
+#define GICV5_DTL2E_ITT_STRUCTURE BIT_ULL(58)
+#define GICV5_DTL2E_EVENT_ID_BITS GENMASK_ULL(63, 59)
+
+#define GICV5_ITTL1E_VALID BIT_ULL(0)
+/* Note that there is no shift for the address by design */
+#define GICV5_ITTL1E_L2_ADDR_MASK GENMASK_ULL(55, 3)
+#define GICV5_ITTL1E_SPAN GENMASK_ULL(63, 60)
+
+#define GICV5_ITTL2E_LPI_ID GENMASK_ULL(23, 0)
+#define GICV5_ITTL2E_DAC GENMASK_ULL(29, 28)
+#define GICV5_ITTL2E_VIRTUAL BIT_ULL(30)
+#define GICV5_ITTL2E_VALID BIT_ULL(31)
+#define GICV5_ITTL2E_VM_ID GENMASK_ULL(47, 32)
+
+#define GICV5_ITS_DT_ITT_CFGR_L2SZ_4k 0b00
+#define GICV5_ITS_DT_ITT_CFGR_L2SZ_16k 0b01
+#define GICV5_ITS_DT_ITT_CFGR_L2SZ_64k 0b10
+
+#define GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR 0
+#define GICV5_ITS_DT_ITT_CFGR_STRUCTURE_TWO_LEVEL 1
+
+#define GICV5_ITS_HWIRQ_DEVICE_ID GENMASK_ULL(31, 0)
+#define GICV5_ITS_HWIRQ_EVENT_ID GENMASK_ULL(63, 32)
+
+/*
+ * IWB registers
+ */
+#define GICV5_IWB_IDR0 0x0000
+#define GICV5_IWB_CR0 0x0080
+#define GICV5_IWB_WENABLE_STATUSR 0x00c0
+#define GICV5_IWB_WENABLER 0x2000
+#define GICV5_IWB_WTMR 0x4000
+
+#define GICV5_IWB_IDR0_INT_DOMS GENMASK(14, 11)
+#define GICV5_IWB_IDR0_IW_RANGE GENMASK(10, 0)
+
+#define GICV5_IWB_CR0_IDLE BIT(1)
+#define GICV5_IWB_CR0_IWBEN BIT(0)
+
+#define GICV5_IWB_WENABLE_STATUSR_IDLE BIT(0)
+
+/*
+ * Global Data structures and functions
+ */
+struct gicv5_chip_data {
+ struct fwnode_handle *fwnode;
+ struct irq_domain *ppi_domain;
+ struct irq_domain *spi_domain;
+ struct irq_domain *lpi_domain;
+ struct irq_domain *ipi_domain;
+ u32 global_spi_count;
+ u8 cpuif_pri_bits;
+ u8 cpuif_id_bits;
+ u8 irs_pri_bits;
+ struct {
+ __le64 *l1ist_addr;
+ u32 l2_size;
+ u8 l2_bits;
+ bool l2;
+ } ist;
+};
+
+extern struct gicv5_chip_data gicv5_global_data __read_mostly;
+
+struct gicv5_irs_chip_data {
+ struct list_head entry;
+ struct fwnode_handle *fwnode;
+ void __iomem *irs_base;
+ u32 flags;
+ u32 spi_min;
+ u32 spi_range;
+ raw_spinlock_t spi_config_lock;
+};
+
+static inline int gicv5_wait_for_op_s_atomic(void __iomem *addr, u32 offset,
+ const char *reg_s, u32 mask,
+ u32 *val)
+{
+ void __iomem *reg = addr + offset;
+ u32 tmp;
+ int ret;
+
+ ret = readl_poll_timeout_atomic(reg, tmp, tmp & mask, 1, 10 * USEC_PER_MSEC);
+ if (unlikely(ret == -ETIMEDOUT)) {
+ pr_err_ratelimited("%s timeout...\n", reg_s);
+ return ret;
+ }
+
+ if (val)
+ *val = tmp;
+
+ return 0;
+}
+
+static inline int gicv5_wait_for_op_s(void __iomem *addr, u32 offset,
+ const char *reg_s, u32 mask)
+{
+ void __iomem *reg = addr + offset;
+ u32 val;
+ int ret;
+
+ ret = readl_poll_timeout(reg, val, val & mask, 1, 10 * USEC_PER_MSEC);
+ if (unlikely(ret == -ETIMEDOUT)) {
+ pr_err_ratelimited("%s timeout...\n", reg_s);
+ return ret;
+ }
+
+ return 0;
+}
+
+#define gicv5_wait_for_op_atomic(base, reg, mask, val) \
+ gicv5_wait_for_op_s_atomic(base, reg, #reg, mask, val)
+
+#define gicv5_wait_for_op(base, reg, mask) \
+ gicv5_wait_for_op_s(base, reg, #reg, mask)
+
+void __init gicv5_init_lpi_domain(void);
+void __init gicv5_free_lpi_domain(void);
+
+int gicv5_irs_of_probe(struct device_node *parent);
+void gicv5_irs_remove(void);
+int gicv5_irs_enable(void);
+void gicv5_irs_its_probe(void);
+int gicv5_irs_register_cpu(int cpuid);
+int gicv5_irs_cpu_to_iaffid(int cpu_id, u16 *iaffid);
+struct gicv5_irs_chip_data *gicv5_irs_lookup_by_spi_id(u32 spi_id);
+int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type);
+int gicv5_irs_iste_alloc(u32 lpi);
+void gicv5_irs_syncr(void);
+
+struct gicv5_its_devtab_cfg {
+ union {
+ struct {
+ __le64 *devtab;
+ } linear;
+ struct {
+ __le64 *l1devtab;
+ __le64 **l2ptrs;
+ } l2;
+ };
+ u32 cfgr;
+};
+
+struct gicv5_its_itt_cfg {
+ union {
+ struct {
+ __le64 *itt;
+ unsigned int num_ents;
+ } linear;
+ struct {
+ __le64 *l1itt;
+ __le64 **l2ptrs;
+ unsigned int num_l1_ents;
+ u8 l2sz;
+ } l2;
+ };
+ u8 event_id_bits;
+ bool l2itt;
+};
+
+void gicv5_init_lpis(u32 max);
+void gicv5_deinit_lpis(void);
+
+int gicv5_alloc_lpi(void);
+void gicv5_free_lpi(u32 lpi);
+
+void __init gicv5_its_of_probe(struct device_node *parent);
+#endif
diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h
index a75b2c7de69d..ca1713fac6e3 100644
--- a/include/linux/irqchip/arm-vgic-info.h
+++ b/include/linux/irqchip/arm-vgic-info.h
@@ -15,6 +15,8 @@ enum gic_type {
GIC_V2,
/* Full GICv3, optionally with v2 compat */
GIC_V3,
+ /* Full GICv5, optionally with v3 compat */
+ GIC_V5,
};
struct gic_kvm_info {
@@ -34,6 +36,8 @@ struct gic_kvm_info {
bool has_v4_1;
/* Deactivation impared, subpar stuff */
bool no_hw_deactivation;
+ /* v3 compat support (GICv5 hosts, only) */
+ bool has_gcie_v3_compat;
};
#ifdef CONFIG_KVM
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 7387d183029b..25c7cbeed393 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -212,6 +212,9 @@ enum {
/* Address and data pair is mutable when irq_set_affinity() */
IRQ_DOMAIN_FLAG_MSI_IMMUTABLE = (1 << 11),
+ /* IRQ domain requires parent fwnode matching */
+ IRQ_DOMAIN_FLAG_FWNODE_PARENT = (1 << 12),
+
/*
* Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
* for implementation specific purposes and ignored by the
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 6863540f4b71..a418e2695b05 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -705,6 +705,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
struct msi_domain_info *info,
struct irq_domain *parent);
u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev);
+u32 pci_msi_map_rid_ctlr_node(struct pci_dev *pdev, struct device_node **node);
struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev);
#else /* CONFIG_PCI_MSI */
static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev)
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 6337ad4e5fe8..a480063c9cb1 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -54,6 +54,7 @@ extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
u32 id,
u32 bus_token);
extern void of_msi_configure(struct device *dev, const struct device_node *np);
+extern u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in);
u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in);
#else
static inline void of_irq_init(const struct of_device_id *matches)
@@ -100,6 +101,10 @@ static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev
static inline void of_msi_configure(struct device *dev, struct device_node *np)
{
}
+static inline u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in)
+{
+ return id_in;
+}
static inline u32 of_msi_map_id(struct device *dev,
struct device_node *msi_np, u32 id_in)
{
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 37891580d05d..e4e566ff348b 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -956,6 +956,7 @@ struct kvm_enable_cap {
#define KVM_CAP_ARM_EL2 240
#define KVM_CAP_ARM_EL2_E2H0 241
#define KVM_CAP_RISCV_MP_STATE_RESET 242
+#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
struct kvm_irq_routing_irqchip {
__u32 irqchip;
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 38b95998e1e6..ce817a975e50 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -156,7 +156,7 @@ TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
TEST_GEN_PROGS_arm64 += arm64/host_sve
TEST_GEN_PROGS_arm64 += arm64/hypercalls
-TEST_GEN_PROGS_arm64 += arm64/mmio_abort
+TEST_GEN_PROGS_arm64 += arm64/external_aborts
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
TEST_GEN_PROGS_arm64 += arm64/psci_test
TEST_GEN_PROGS_arm64 += arm64/set_id_regs
diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c
new file mode 100644
index 000000000000..062bf84cced1
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/external_aborts.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * external_abort - Tests for userspace external abort injection
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+#include "processor.h"
+#include "test_util.h"
+
+#define MMIO_ADDR 0x8000000ULL
+#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed)
+
+static u64 expected_abort_pc;
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ GUEST_DONE();
+}
+
+static void unexpected_dabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
+}
+
+static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
+ handler_fn dabt_handler)
+{
+ struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
+
+ virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
+
+ return vm;
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static bool vcpu_has_ras(struct kvm_vcpu *vcpu)
+{
+ u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+ return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0);
+}
+
+static bool guest_has_ras(void)
+{
+ return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1));
+}
+
+static void vcpu_inject_serror(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.serror_pending = true;
+ if (vcpu_has_ras(vcpu)) {
+ events.exception.serror_has_esr = true;
+ events.exception.serror_esr = EXPECTED_SERROR_ISS;
+ }
+
+ vcpu_events_set(vcpu, &events);
+}
+
+static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ if (uc.cmd == cmd)
+ return;
+
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
+
+static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
+{
+ __vcpu_run_expect(vcpu, UCALL_DONE);
+}
+
+static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu)
+{
+ __vcpu_run_expect(vcpu, UCALL_SYNC);
+}
+
+extern char test_mmio_abort_insn;
+
+static noinline void test_mmio_abort_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
+
+ asm volatile("test_mmio_abort_insn:\n\t"
+ "ldr x0, [%0]\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+extern char test_mmio_nisv_insn;
+
+static void test_mmio_nisv_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
+
+ asm volatile("test_mmio_nisv_insn:\n\t"
+ "ldr x0, [%0], #8\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
+ * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
+ */
+static void test_mmio_nisv(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ unexpected_dabt_handler);
+
+ TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
+ TEST_ASSERT_EQ(errno, ENOSYS);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
+ * reaches the guest.
+ */
+static void test_mmio_nisv_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
+ TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void unexpected_serror_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Took unexpected SError exception");
+}
+
+static void test_serror_masked_guest(void)
+{
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ isb();
+
+ GUEST_DONE();
+}
+
+static void test_serror_masked(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler);
+
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void expect_serror_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR);
+ if (guest_has_ras())
+ GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS);
+
+ GUEST_DONE();
+}
+
+static void test_serror_guest(void)
+{
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ local_serror_enable();
+ isb();
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_serror_emulated_guest(void)
+{
+ GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A));
+
+ local_serror_enable();
+ GUEST_SYNC(0);
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken unmasked SError exception");
+}
+
+static void test_serror_emulated(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+ vcpu_run_expect_sync(vcpu);
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_mmio_ease_guest(void)
+{
+ sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE);
+ isb();
+
+ test_mmio_abort_guest();
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_ease(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest,
+ unexpected_dabt_handler);
+ struct kvm_run *run = vcpu->run;
+ u64 pfr1;
+
+ pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) {
+ pr_debug("Skipping %s\n", __func__);
+ return;
+ }
+
+ /*
+ * SCTLR2_ELx.EASE changes the exception vector to the SError vector but
+ * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx).
+ */
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ test_mmio_abort();
+ test_mmio_nisv();
+ test_mmio_nisv_abort();
+ test_serror();
+ test_serror_masked();
+ test_serror_emulated();
+ test_mmio_ease();
+}
diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index d01798b6b3b4..684c52b1b881 100644
--- a/tools/testing/selftests/kvm/arm64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -52,6 +52,12 @@ static struct feature_id_reg feat_id_regs[] = {
ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
16,
1
+ },
+ {
+ KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR3_EL1),
+ ID_AA64MMFR3_EL1_SCTLRX_SHIFT,
+ ID_AA64MMFR3_EL1_SCTLRX_IMP
}
};
@@ -469,6 +475,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */
ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */
ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */
+ KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1),
ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */
diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c
deleted file mode 100644
index 8b7a80a51b1c..000000000000
--- a/tools/testing/selftests/kvm/arm64/mmio_abort.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mmio_abort - Tests for userspace MMIO abort injection
- *
- * Copyright (c) 2024 Google LLC
- */
-#include "processor.h"
-#include "test_util.h"
-
-#define MMIO_ADDR 0x8000000ULL
-
-static u64 expected_abort_pc;
-
-static void expect_sea_handler(struct ex_regs *regs)
-{
- u64 esr = read_sysreg(esr_el1);
-
- GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
- GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
- GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
-
- GUEST_DONE();
-}
-
-static void unexpected_dabt_handler(struct ex_regs *regs)
-{
- GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
-}
-
-static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
- handler_fn dabt_handler)
-{
- struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(*vcpu);
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
-
- virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
-
- return vm;
-}
-
-static void vcpu_inject_extabt(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_events events = {};
-
- events.exception.ext_dabt_pending = true;
- vcpu_events_set(vcpu, &events);
-}
-
-static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- vcpu_run(vcpu);
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
-}
-
-extern char test_mmio_abort_insn;
-
-static void test_mmio_abort_guest(void)
-{
- WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
-
- asm volatile("test_mmio_abort_insn:\n\t"
- "ldr x0, [%0]\n\t"
- : : "r" (MMIO_ADDR) : "x0", "memory");
-
- GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that KVM doesn't complete MMIO emulation when userspace has made an
- * external abort pending for the instruction.
- */
-static void test_mmio_abort(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
- expect_sea_handler);
- struct kvm_run *run = vcpu->run;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
- TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
- TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
- TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
-
- vcpu_inject_extabt(vcpu);
- vcpu_run_expect_done(vcpu);
- kvm_vm_free(vm);
-}
-
-extern char test_mmio_nisv_insn;
-
-static void test_mmio_nisv_guest(void)
-{
- WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
-
- asm volatile("test_mmio_nisv_insn:\n\t"
- "ldr x0, [%0], #8\n\t"
- : : "r" (MMIO_ADDR) : "x0", "memory");
-
- GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
- * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
- */
-static void test_mmio_nisv(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
- unexpected_dabt_handler);
-
- TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
- TEST_ASSERT_EQ(errno, ENOSYS);
-
- kvm_vm_free(vm);
-}
-
-/*
- * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
- * reaches the guest.
- */
-static void test_mmio_nisv_abort(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
- expect_sea_handler);
- struct kvm_run *run = vcpu->run;
-
- vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
- TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
-
- vcpu_inject_extabt(vcpu);
- vcpu_run_expect_done(vcpu);
- kvm_vm_free(vm);
-}
-
-int main(void)
-{
- test_mmio_abort();
- test_mmio_nisv();
- test_mmio_nisv_abort();
-}
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 8f422bfdfcb9..d3bf9204409c 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -139,6 +139,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
};
static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
@@ -187,6 +188,14 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
REG_FTR_END,
};
+static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0),
+ REG_FTR_END,
+};
+
static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
@@ -217,6 +226,7 @@ static struct test_feature_reg test_regs[] = {
TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+ TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1),
TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
};
@@ -774,8 +784,8 @@ int main(void)
ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
- ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 +
- MPAM_IDREG_TEST + MTE_IDREG_TEST;
+ ARRAY_SIZE(ftr_id_aa64mmfr3_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) -
+ ARRAY_SIZE(test_regs) + 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST;
ksft_set_plan(test_cnt);
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index b0fc0f945766..255fed769a8a 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -254,6 +254,16 @@ static inline void local_irq_disable(void)
asm volatile("msr daifset, #3" : : : "memory");
}
+static inline void local_serror_enable(void)
+{
+ asm volatile("msr daifclr, #4" : : : "memory");
+}
+
+static inline void local_serror_disable(void)
+{
+ asm volatile("msr daifset, #4" : : : "memory");
+}
+
/**
* struct arm_smccc_res - Result from SMC/HVC call
* @a0-a3 result values from registers 0 to 3