diff options
Diffstat (limited to 'drivers/irqchip/irq-gic-v3.c')
| -rw-r--r-- | drivers/irqchip/irq-gic-v3.c | 1623 |
1 files changed, 1276 insertions, 347 deletions
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 0868a9d81c3c..6607ab58f72e 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2013-2017 ARM Limited, All Rights Reserved. * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "GICv3: " fmt @@ -23,16 +12,23 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/irqdomain.h> +#include <linux/kernel.h> +#include <linux/kstrtox.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/percpu.h> +#include <linux/refcount.h> #include <linux/slab.h> +#include <linux/iopoll.h> #include <linux/irqchip.h> #include <linux/irqchip/arm-gic-common.h> #include <linux/irqchip/arm-gic-v3.h> -#include <linux/irqchip/irq-partition-percpu.h> +#include <linux/irqchip/arm-gic-v3-prio.h> +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/arm-smccc.h> #include <asm/cputype.h> #include <asm/exception.h> @@ -41,7 +37,15 @@ #include "irq-gic-common.h" +static u8 dist_prio_irq __ro_after_init = GICV3_PRIO_IRQ; +static u8 dist_prio_nmi __ro_after_init = GICV3_PRIO_NMI; + #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0) +#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1) +#define FLAGS_WORKAROUND_ASR_ERRATUM_8601001 (1ULL << 2) +#define FLAGS_WORKAROUND_INSECURE (1ULL << 3) + +static struct cpumask broken_rdists __read_mostly __maybe_unused; struct redist_region { void __iomem *redist_base; @@ -51,6 +55,7 @@ struct redist_region { struct gic_chip_data { struct fwnode_handle *fwnode; + phys_addr_t dist_phys_base; void __iomem *dist_base; struct redist_region *redist_regions; struct rdists rdists; @@ -59,14 +64,174 @@ struct gic_chip_data { u32 nr_redist_regions; u64 flags; bool has_rss; - unsigned int irq_nr; - struct partition_desc *ppi_descs[16]; + unsigned int ppi_nr; + struct partition_affinity *parts; + unsigned int nr_parts; +}; + +struct partition_affinity { + cpumask_t mask; + struct fwnode_handle *partition_id; }; +#define T241_CHIPS_MAX 4 +static void __iomem *t241_dist_base_alias[T241_CHIPS_MAX] __read_mostly; +static DEFINE_STATIC_KEY_FALSE(gic_nvidia_t241_erratum); + +static DEFINE_STATIC_KEY_FALSE(gic_arm64_2941627_erratum); + static struct gic_chip_data gic_data __read_mostly; static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); -static struct gic_kvm_info gic_v3_kvm_info; +#define GIC_ID_NR (1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer)) +#define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U) +#define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer) + +static bool nmi_support_forbidden; + +/* + * There are 16 SGIs, though we only actually use 8 in Linux. The other 8 SGIs + * are potentially stolen by the secure side. Some code, especially code dealing + * with hwirq IDs, is simplified by accounting for all 16. + */ +#define SGI_NR 16 + +/* + * The behaviours of RPR and PMR registers differ depending on the value of + * SCR_EL3.FIQ, and the behaviour of non-secure priority registers of the + * distributor and redistributors depends on whether security is enabled in the + * GIC. + * + * When security is enabled, non-secure priority values from the (re)distributor + * are presented to the GIC CPUIF as follow: + * (GIC_(R)DIST_PRI[irq] >> 1) | 0x80; + * + * If SCR_EL3.FIQ == 1, the values written to/read from PMR and RPR at non-secure + * EL1 are subject to a similar operation thus matching the priorities presented + * from the (re)distributor when security is enabled. When SCR_EL3.FIQ == 0, + * these values are unchanged by the GIC. + * + * see GICv3/GICv4 Architecture Specification (IHI0069D): + * - section 4.8.1 Non-secure accesses to register fields for Secure interrupt + * priorities. + * - Figure 4-7 Secure read of the priority field for a Non-secure Group 1 + * interrupt. + */ +static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); + +static u32 gic_get_pribits(void) +{ + u32 pribits; + + pribits = gic_read_ctlr(); + pribits &= ICC_CTLR_EL1_PRI_BITS_MASK; + pribits >>= ICC_CTLR_EL1_PRI_BITS_SHIFT; + pribits++; + + return pribits; +} + +static bool gic_has_group0(void) +{ + u32 val; + u32 old_pmr; + + old_pmr = gic_read_pmr(); + + /* + * Let's find out if Group0 is under control of EL3 or not by + * setting the highest possible, non-zero priority in PMR. + * + * If SCR_EL3.FIQ is set, the priority gets shifted down in + * order for the CPU interface to set bit 7, and keep the + * actual priority in the non-secure range. In the process, it + * looses the least significant bit and the actual priority + * becomes 0x80. Reading it back returns 0, indicating that + * we're don't have access to Group0. + */ + gic_write_pmr(BIT(8 - gic_get_pribits())); + val = gic_read_pmr(); + + gic_write_pmr(old_pmr); + + return val != 0; +} + +static inline bool gic_dist_security_disabled(void) +{ + return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS; +} + +static bool cpus_have_security_disabled __ro_after_init; +static bool cpus_have_group0 __ro_after_init; + +static void __init gic_prio_init(void) +{ + bool ds; + + cpus_have_group0 = gic_has_group0(); + + ds = gic_dist_security_disabled(); + if ((gic_data.flags & FLAGS_WORKAROUND_INSECURE) && !ds) { + if (cpus_have_group0) { + u32 val; + + val = readl_relaxed(gic_data.dist_base + GICD_CTLR); + val |= GICD_CTLR_DS; + writel_relaxed(val, gic_data.dist_base + GICD_CTLR); + + ds = gic_dist_security_disabled(); + if (ds) + pr_warn("Broken GIC integration, security disabled\n"); + } else { + pr_warn("Broken GIC integration, pNMI forbidden\n"); + nmi_support_forbidden = true; + } + } + + cpus_have_security_disabled = ds; + + /* + * How priority values are used by the GIC depends on two things: + * the security state of the GIC (controlled by the GICD_CTLR.DS bit) + * and if Group 0 interrupts can be delivered to Linux in the non-secure + * world as FIQs (controlled by the SCR_EL3.FIQ bit). These affect the + * way priorities are presented in ICC_PMR_EL1 and in the distributor: + * + * GICD_CTLR.DS | SCR_EL3.FIQ | ICC_PMR_EL1 | Distributor + * ------------------------------------------------------- + * 1 | - | unchanged | unchanged + * ------------------------------------------------------- + * 0 | 1 | non-secure | non-secure + * ------------------------------------------------------- + * 0 | 0 | unchanged | non-secure + * + * In the non-secure view reads and writes are modified: + * + * - A value written is right-shifted by one and the MSB is set, + * forcing the priority into the non-secure range. + * + * - A value read is left-shifted by one. + * + * In the first two cases, where ICC_PMR_EL1 and the interrupt priority + * are both either modified or unchanged, we can use the same set of + * priorities. + * + * In the last case, where only the interrupt priorities are modified to + * be in the non-secure range, we program the non-secure values into + * the distributor to match the PMR values we want. + */ + if (cpus_have_group0 && !cpus_have_security_disabled) { + dist_prio_irq = __gicv3_prio_to_ns(dist_prio_irq); + dist_prio_nmi = __gicv3_prio_to_ns(dist_prio_nmi); + } + + pr_info("GICD_CTLR.DS=%d, SCR_EL3.FIQ=%d\n", + cpus_have_security_disabled, + !cpus_have_group0); +} + +static struct gic_kvm_info gic_v3_kvm_info __initdata; static DEFINE_PER_CPU(bool, has_rss); #define MPIDR_RS(mpidr) (((mpidr) & 0xF0UL) >> 4) @@ -77,70 +242,133 @@ static DEFINE_PER_CPU(bool, has_rss); /* Our default, arbitrary priority value. Linux only uses one anyway. */ #define DEFAULT_PMR_VALUE 0xf0 -static inline unsigned int gic_irq(struct irq_data *d) +enum gic_intid_range { + SGI_RANGE, + PPI_RANGE, + SPI_RANGE, + EPPI_RANGE, + ESPI_RANGE, + LPI_RANGE, + __INVALID_RANGE__ +}; + +static enum gic_intid_range __get_intid_range(irq_hw_number_t hwirq) +{ + switch (hwirq) { + case 0 ... 15: + return SGI_RANGE; + case 16 ... 31: + return PPI_RANGE; + case 32 ... 1019: + return SPI_RANGE; + case EPPI_BASE_INTID ... (EPPI_BASE_INTID + 63): + return EPPI_RANGE; + case ESPI_BASE_INTID ... (ESPI_BASE_INTID + 1023): + return ESPI_RANGE; + case 8192 ... GENMASK(23, 0): + return LPI_RANGE; + default: + return __INVALID_RANGE__; + } +} + +static enum gic_intid_range get_intid_range(struct irq_data *d) { - return d->hwirq; + return __get_intid_range(d->hwirq); } -static inline int gic_irq_in_rdist(struct irq_data *d) +static inline bool gic_irq_in_rdist(struct irq_data *d) { - return gic_irq(d) < 32; + switch (get_intid_range(d)) { + case SGI_RANGE: + case PPI_RANGE: + case EPPI_RANGE: + return true; + default: + return false; + } +} + +static inline void __iomem *gic_dist_base_alias(struct irq_data *d) +{ + if (static_branch_unlikely(&gic_nvidia_t241_erratum)) { + irq_hw_number_t hwirq = irqd_to_hwirq(d); + u32 chip; + + /* + * For the erratum T241-FABRIC-4, read accesses to GICD_In{E} + * registers are directed to the chip that owns the SPI. The + * the alias region can also be used for writes to the + * GICD_In{E} except GICD_ICENABLERn. Each chip has support + * for 320 {E}SPIs. Mappings for all 4 chips: + * Chip0 = 32-351 + * Chip1 = 352-671 + * Chip2 = 672-991 + * Chip3 = 4096-4415 + */ + switch (__get_intid_range(hwirq)) { + case SPI_RANGE: + chip = (hwirq - 32) / 320; + break; + case ESPI_RANGE: + chip = 3; + break; + default: + unreachable(); + } + return t241_dist_base_alias[chip]; + } + + return gic_data.dist_base; } static inline void __iomem *gic_dist_base(struct irq_data *d) { - if (gic_irq_in_rdist(d)) /* SGI+PPI -> SGI_base for this CPU */ + switch (get_intid_range(d)) { + case SGI_RANGE: + case PPI_RANGE: + case EPPI_RANGE: + /* SGI+PPI -> SGI_base for this CPU */ return gic_data_rdist_sgi_base(); - if (d->hwirq <= 1023) /* SPI -> dist_base */ + case SPI_RANGE: + case ESPI_RANGE: + /* SPI -> dist_base */ return gic_data.dist_base; - return NULL; + default: + return NULL; + } } -static void gic_do_wait_for_rwp(void __iomem *base) +static void gic_do_wait_for_rwp(void __iomem *base, u32 bit) { - u32 count = 1000000; /* 1s! */ + u32 val; + int ret; - while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) { - count--; - if (!count) { - pr_err_ratelimited("RWP timeout, gone fishing\n"); - return; - } - cpu_relax(); - udelay(1); - }; + ret = readl_relaxed_poll_timeout_atomic(base + GICD_CTLR, val, !(val & bit), + 1, USEC_PER_SEC); + if (ret == -ETIMEDOUT) + pr_err_ratelimited("RWP timeout, gone fishing\n"); } /* Wait for completion of a distributor change */ static void gic_dist_wait_for_rwp(void) { - gic_do_wait_for_rwp(gic_data.dist_base); + gic_do_wait_for_rwp(gic_data.dist_base, GICD_CTLR_RWP); } /* Wait for completion of a redistributor change */ static void gic_redist_wait_for_rwp(void) { - gic_do_wait_for_rwp(gic_data_rdist_rd_base()); + gic_do_wait_for_rwp(gic_data_rdist_rd_base(), GICR_CTLR_RWP); } -#ifdef CONFIG_ARM64 - -static u64 __maybe_unused gic_read_iar(void) -{ - if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154)) - return gic_read_iar_cavium_thunderx(); - else - return gic_read_iar_common(); -} -#endif - static void gic_enable_redist(bool enable) { void __iomem *rbase; - u32 count = 1000000; /* 1s! */ u32 val; + int ret; if (gic_data.flags & FLAGS_WORKAROUND_GICR_WAKER_MSM8996) return; @@ -161,55 +389,107 @@ static void gic_enable_redist(bool enable) return; /* No PM support in this redistributor */ } - while (--count) { - val = readl_relaxed(rbase + GICR_WAKER); - if (enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep)) - break; - cpu_relax(); - udelay(1); - }; - if (!count) + ret = readl_relaxed_poll_timeout_atomic(rbase + GICR_WAKER, val, + enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep), + 1, USEC_PER_SEC); + if (ret == -ETIMEDOUT) { pr_err_ratelimited("redistributor failed to %s...\n", enable ? "wakeup" : "sleep"); + } } /* * Routines to disable, enable, EOI and route interrupts */ +static u32 convert_offset_index(struct irq_data *d, u32 offset, u32 *index) +{ + switch (get_intid_range(d)) { + case SGI_RANGE: + case PPI_RANGE: + case SPI_RANGE: + *index = d->hwirq; + return offset; + case EPPI_RANGE: + /* + * Contrary to the ESPI range, the EPPI range is contiguous + * to the PPI range in the registers, so let's adjust the + * displacement accordingly. Consistency is overrated. + */ + *index = d->hwirq - EPPI_BASE_INTID + 32; + return offset; + case ESPI_RANGE: + *index = d->hwirq - ESPI_BASE_INTID; + switch (offset) { + case GICD_ISENABLER: + return GICD_ISENABLERnE; + case GICD_ICENABLER: + return GICD_ICENABLERnE; + case GICD_ISPENDR: + return GICD_ISPENDRnE; + case GICD_ICPENDR: + return GICD_ICPENDRnE; + case GICD_ISACTIVER: + return GICD_ISACTIVERnE; + case GICD_ICACTIVER: + return GICD_ICACTIVERnE; + case GICD_IPRIORITYR: + return GICD_IPRIORITYRnE; + case GICD_ICFGR: + return GICD_ICFGRnE; + case GICD_IROUTER: + return GICD_IROUTERnE; + default: + break; + } + break; + default: + break; + } + + WARN_ON(1); + *index = d->hwirq; + return offset; +} + static int gic_peek_irq(struct irq_data *d, u32 offset) { - u32 mask = 1 << (gic_irq(d) % 32); void __iomem *base; + u32 index, mask; + + offset = convert_offset_index(d, offset, &index); + mask = 1 << (index % 32); if (gic_irq_in_rdist(d)) base = gic_data_rdist_sgi_base(); else - base = gic_data.dist_base; + base = gic_dist_base_alias(d); - return !!(readl_relaxed(base + offset + (gic_irq(d) / 32) * 4) & mask); + return !!(readl_relaxed(base + offset + (index / 32) * 4) & mask); } static void gic_poke_irq(struct irq_data *d, u32 offset) { - u32 mask = 1 << (gic_irq(d) % 32); - void (*rwp_wait)(void); void __iomem *base; + u32 index, mask; - if (gic_irq_in_rdist(d)) { + offset = convert_offset_index(d, offset, &index); + mask = 1 << (index % 32); + + if (gic_irq_in_rdist(d)) base = gic_data_rdist_sgi_base(); - rwp_wait = gic_redist_wait_for_rwp; - } else { + else base = gic_data.dist_base; - rwp_wait = gic_dist_wait_for_rwp; - } - writel_relaxed(mask, base + offset + (gic_irq(d) / 32) * 4); - rwp_wait(); + writel_relaxed(mask, base + offset + (index / 32) * 4); } static void gic_mask_irq(struct irq_data *d) { gic_poke_irq(d, GICD_ICENABLER); + if (gic_irq_in_rdist(d)) + gic_redist_wait_for_rwp(); + else + gic_dist_wait_for_rwp(); } static void gic_eoimode1_mask_irq(struct irq_data *d) @@ -232,12 +512,18 @@ static void gic_unmask_irq(struct irq_data *d) gic_poke_irq(d, GICD_ISENABLER); } +static inline bool gic_supports_nmi(void) +{ + return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && + static_branch_likely(&supports_pseudo_nmis); +} + static int gic_irq_set_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool val) { u32 reg; - if (d->hwirq >= gic_data.irq_nr) /* PPI/SPI only */ + if (d->hwirq >= 8192) /* SGI/PPI/SPI only */ return -EINVAL; switch (which) { @@ -250,7 +536,11 @@ static int gic_irq_set_irqchip_state(struct irq_data *d, break; case IRQCHIP_STATE_MASKED: - reg = val ? GICD_ICENABLER : GICD_ISENABLER; + if (val) { + gic_mask_irq(d); + return 0; + } + reg = GICD_ISENABLER; break; default: @@ -258,13 +548,20 @@ static int gic_irq_set_irqchip_state(struct irq_data *d, } gic_poke_irq(d, reg); + + /* + * Force read-back to guarantee that the active state has taken + * effect, and won't race with a guest-driven deactivation. + */ + if (reg == GICD_ISACTIVER) + gic_peek_irq(d, reg); return 0; } static int gic_irq_get_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool *val) { - if (d->hwirq >= gic_data.irq_nr) /* PPI/SPI only */ + if (d->hwirq >= 8192) /* PPI/SPI only */ return -EINVAL; switch (which) { @@ -287,9 +584,103 @@ static int gic_irq_get_irqchip_state(struct irq_data *d, return 0; } +static void gic_irq_set_prio(struct irq_data *d, u8 prio) +{ + void __iomem *base = gic_dist_base(d); + u32 offset, index; + + offset = convert_offset_index(d, GICD_IPRIORITYR, &index); + + writeb_relaxed(prio, base + offset + index); +} + +static int gic_irq_nmi_setup(struct irq_data *d) +{ + struct irq_desc *desc = irq_to_desc(d->irq); + + if (!gic_supports_nmi()) + return -EINVAL; + + if (gic_peek_irq(d, GICD_ISENABLER)) { + pr_err("Cannot set NMI property of enabled IRQ %u\n", d->irq); + return -EINVAL; + } + + /* + * A secondary irq_chip should be in charge of LPI request, + * it should not be possible to get there + */ + if (WARN_ON(irqd_to_hwirq(d) >= 8192)) + return -EINVAL; + + /* desc lock should already be held */ + if (!gic_irq_in_rdist(d)) + desc->handle_irq = handle_fasteoi_nmi; + + gic_irq_set_prio(d, dist_prio_nmi); + + return 0; +} + +static void gic_irq_nmi_teardown(struct irq_data *d) +{ + struct irq_desc *desc = irq_to_desc(d->irq); + + if (WARN_ON(!gic_supports_nmi())) + return; + + if (gic_peek_irq(d, GICD_ISENABLER)) { + pr_err("Cannot set NMI property of enabled IRQ %u\n", d->irq); + return; + } + + /* + * A secondary irq_chip should be in charge of LPI request, + * it should not be possible to get there + */ + if (WARN_ON(irqd_to_hwirq(d) >= 8192)) + return; + + /* desc lock should already be held */ + if (!gic_irq_in_rdist(d)) + desc->handle_irq = handle_fasteoi_irq; + + gic_irq_set_prio(d, dist_prio_irq); +} + +static bool gic_arm64_erratum_2941627_needed(struct irq_data *d) +{ + enum gic_intid_range range; + + if (!static_branch_unlikely(&gic_arm64_2941627_erratum)) + return false; + + range = get_intid_range(d); + + /* + * The workaround is needed if the IRQ is an SPI and + * the target cpu is different from the one we are + * executing on. + */ + return (range == SPI_RANGE || range == ESPI_RANGE) && + !cpumask_test_cpu(raw_smp_processor_id(), + irq_data_get_effective_affinity_mask(d)); +} + static void gic_eoi_irq(struct irq_data *d) { - gic_write_eoir(gic_irq(d)); + write_gicreg(irqd_to_hwirq(d), ICC_EOIR1_EL1); + isb(); + + if (gic_arm64_erratum_2941627_needed(d)) { + /* + * Make sure the GIC stream deactivate packet + * issued by ICC_EOIR1_EL1 has completed before + * deactivating through GICD_IACTIVER. + */ + dsb(sy); + gic_poke_irq(d, GICD_ICACTIVER); + } } static void gic_eoimode1_eoi_irq(struct irq_data *d) @@ -298,39 +689,56 @@ static void gic_eoimode1_eoi_irq(struct irq_data *d) * No need to deactivate an LPI, or an interrupt that * is is getting forwarded to a vcpu. */ - if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d)) + if (irqd_to_hwirq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d)) return; - gic_write_dir(gic_irq(d)); + + if (!gic_arm64_erratum_2941627_needed(d)) + gic_write_dir(irqd_to_hwirq(d)); + else + gic_poke_irq(d, GICD_ICACTIVER); } static int gic_set_type(struct irq_data *d, unsigned int type) { - unsigned int irq = gic_irq(d); - void (*rwp_wait)(void); + irq_hw_number_t irq = irqd_to_hwirq(d); + enum gic_intid_range range; void __iomem *base; + u32 offset, index; + int ret; + + range = get_intid_range(d); /* Interrupt configuration for SGIs can't be changed */ - if (irq < 16) - return -EINVAL; + if (range == SGI_RANGE) + return type != IRQ_TYPE_EDGE_RISING ? -EINVAL : 0; /* SPIs have restrictions on the supported types */ - if (irq >= 32 && type != IRQ_TYPE_LEVEL_HIGH && - type != IRQ_TYPE_EDGE_RISING) + if ((range == SPI_RANGE || range == ESPI_RANGE) && + type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING) return -EINVAL; - if (gic_irq_in_rdist(d)) { + if (gic_irq_in_rdist(d)) base = gic_data_rdist_sgi_base(); - rwp_wait = gic_redist_wait_for_rwp; - } else { - base = gic_data.dist_base; - rwp_wait = gic_dist_wait_for_rwp; + else + base = gic_dist_base_alias(d); + + offset = convert_offset_index(d, GICD_ICFGR, &index); + + ret = gic_configure_irq(index, type, base + offset); + if (ret && (range == PPI_RANGE || range == EPPI_RANGE)) { + /* Misconfigured PPIs are usually not fatal */ + pr_warn("GIC: PPI INTID%ld is secure or misconfigured\n", irq); + ret = 0; } - return gic_configure_irq(irq, type, base, rwp_wait); + return ret; } static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) { + if (get_intid_range(d) == SGI_RANGE) + return -EINVAL; + if (vcpu) irqd_set_forwarded_to_vcpu(d); else @@ -338,10 +746,16 @@ static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) return 0; } -static u64 gic_mpidr_to_affinity(unsigned long mpidr) +static u64 gic_cpu_to_affinity(int cpu) { + u64 mpidr = cpu_logical_map(cpu); u64 aff; + /* ASR8601 needs to have its affinities shifted down... */ + if (unlikely(gic_data.flags & FLAGS_WORKAROUND_ASR_ERRATUM_8601001)) + mpidr = (MPIDR_AFFINITY_LEVEL(mpidr, 1) | + (MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8)); + aff = ((u64)MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 | MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | @@ -350,49 +764,160 @@ static u64 gic_mpidr_to_affinity(unsigned long mpidr) return aff; } -static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +static void gic_deactivate_unhandled(u32 irqnr) { - u32 irqnr; + if (static_branch_likely(&supports_deactivate_key)) { + if (irqnr < 8192) + gic_write_dir(irqnr); + } else { + write_gicreg(irqnr, ICC_EOIR1_EL1); + isb(); + } +} - irqnr = gic_read_iar(); +/* + * Follow a read of the IAR with any HW maintenance that needs to happen prior + * to invoking the relevant IRQ handler. We must do two things: + * + * (1) Ensure instruction ordering between a read of IAR and subsequent + * instructions in the IRQ handler using an ISB. + * + * It is possible for the IAR to report an IRQ which was signalled *after* + * the CPU took an IRQ exception as multiple interrupts can race to be + * recognized by the GIC, earlier interrupts could be withdrawn, and/or + * later interrupts could be prioritized by the GIC. + * + * For devices which are tightly coupled to the CPU, such as PMUs, a + * context synchronization event is necessary to ensure that system + * register state is not stale, as these may have been indirectly written + * *after* exception entry. + * + * (2) Execute an interrupt priority drop when EOI mode 1 is in use. + */ +static inline void gic_complete_ack(u32 irqnr) +{ + if (static_branch_likely(&supports_deactivate_key)) + write_gicreg(irqnr, ICC_EOIR1_EL1); + + isb(); +} - if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) { - int err; +static bool gic_rpr_is_nmi_prio(void) +{ + if (!gic_supports_nmi()) + return false; - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb(); + return unlikely(gic_read_rpr() == GICV3_PRIO_NMI); +} - err = handle_domain_irq(gic_data.domain, irqnr, regs); - if (err) { - WARN_ONCE(true, "Unexpected interrupt received!\n"); - if (static_branch_likely(&supports_deactivate_key)) { - if (irqnr < 8192) - gic_write_dir(irqnr); - } else { - gic_write_eoir(irqnr); - } - } +static bool gic_irqnr_is_special(u32 irqnr) +{ + return irqnr >= 1020 && irqnr <= 1023; +} + +static void __gic_handle_irq(u32 irqnr, struct pt_regs *regs) +{ + if (gic_irqnr_is_special(irqnr)) return; + + gic_complete_ack(irqnr); + + if (generic_handle_domain_irq(gic_data.domain, irqnr)) { + WARN_ONCE(true, "Unexpected interrupt (irqnr %u)\n", irqnr); + gic_deactivate_unhandled(irqnr); } - if (irqnr < 16) { - gic_write_eoir(irqnr); - if (static_branch_likely(&supports_deactivate_key)) - gic_write_dir(irqnr); -#ifdef CONFIG_SMP - /* - * Unlike GICv2, we don't need an smp_rmb() here. - * The control dependency from gic_read_iar to - * the ISB in gic_write_eoir is enough to ensure - * that any shared data read by handle_IPI will - * be read after the ACK. - */ - handle_IPI(irqnr, regs); -#else - WARN_ONCE(true, "Unexpected SGI received!\n"); -#endif +} + +static void __gic_handle_nmi(u32 irqnr, struct pt_regs *regs) +{ + if (gic_irqnr_is_special(irqnr)) + return; + + gic_complete_ack(irqnr); + + if (generic_handle_domain_nmi(gic_data.domain, irqnr)) { + WARN_ONCE(true, "Unexpected pseudo-NMI (irqnr %u)\n", irqnr); + gic_deactivate_unhandled(irqnr); + } +} + +/* + * An exception has been taken from a context with IRQs enabled, and this could + * be an IRQ or an NMI. + * + * The entry code called us with DAIF.IF set to keep NMIs masked. We must clear + * DAIF.IF (and update ICC_PMR_EL1 to mask regular IRQs) prior to returning, + * after handling any NMI but before handling any IRQ. + * + * The entry code has performed IRQ entry, and if an NMI is detected we must + * perform NMI entry/exit around invoking the handler. + */ +static void __gic_handle_irq_from_irqson(struct pt_regs *regs) +{ + bool is_nmi; + u32 irqnr; + + irqnr = gic_read_iar(); + + is_nmi = gic_rpr_is_nmi_prio(); + + if (is_nmi) { + nmi_enter(); + __gic_handle_nmi(irqnr, regs); + nmi_exit(); + } + + if (gic_prio_masking_enabled()) { + gic_pmr_mask_irqs(); + gic_arch_enable_irqs(); } + + if (!is_nmi) + __gic_handle_irq(irqnr, regs); +} + +/* + * An exception has been taken from a context with IRQs disabled, which can only + * be an NMI. + * + * The entry code called us with DAIF.IF set to keep NMIs masked. We must leave + * DAIF.IF (and ICC_PMR_EL1) unchanged. + * + * The entry code has performed NMI entry. + */ +static void __gic_handle_irq_from_irqsoff(struct pt_regs *regs) +{ + u64 pmr; + u32 irqnr; + + /* + * We were in a context with IRQs disabled. However, the + * entry code has set PMR to a value that allows any + * interrupt to be acknowledged, and not just NMIs. This can + * lead to surprising effects if the NMI has been retired in + * the meantime, and that there is an IRQ pending. The IRQ + * would then be taken in NMI context, something that nobody + * wants to debug twice. + * + * Until we sort this, drop PMR again to a level that will + * actually only allow NMIs before reading IAR, and then + * restore it to what it was. + */ + pmr = gic_read_pmr(); + gic_pmr_mask_irqs(); + isb(); + irqnr = gic_read_iar(); + gic_write_pmr(pmr); + + __gic_handle_nmi(irqnr, regs); +} + +static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +{ + if (unlikely(gic_supports_nmi() && !interrupts_enabled(regs))) + __gic_handle_irq_from_irqsoff(regs); + else + __gic_handle_irq_from_irqson(regs); } static void __init gic_dist_init(void) @@ -400,6 +925,7 @@ static void __init gic_dist_init(void) unsigned int i; u64 affinity; void __iomem *base = gic_data.dist_base; + u32 val; /* Disable the distributor */ writel_relaxed(0, base + GICD_CTLR); @@ -411,22 +937,48 @@ static void __init gic_dist_init(void) * do the right thing if the kernel is running in secure mode, * but that's not the intended use case anyway. */ - for (i = 32; i < gic_data.irq_nr; i += 32) + for (i = 32; i < GIC_LINE_NR; i += 32) writel_relaxed(~0, base + GICD_IGROUPR + i / 8); - gic_dist_config(base, gic_data.irq_nr, gic_dist_wait_for_rwp); + /* Extended SPI range, not handled by the GICv2/GICv3 common code */ + for (i = 0; i < GIC_ESPI_NR; i += 32) { + writel_relaxed(~0U, base + GICD_ICENABLERnE + i / 8); + writel_relaxed(~0U, base + GICD_ICACTIVERnE + i / 8); + } + + for (i = 0; i < GIC_ESPI_NR; i += 32) + writel_relaxed(~0U, base + GICD_IGROUPRnE + i / 8); + + for (i = 0; i < GIC_ESPI_NR; i += 16) + writel_relaxed(0, base + GICD_ICFGRnE + i / 4); - /* Enable distributor with ARE, Group1 */ - writel_relaxed(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1, - base + GICD_CTLR); + for (i = 0; i < GIC_ESPI_NR; i += 4) + writel_relaxed(REPEAT_BYTE_U32(dist_prio_irq), + base + GICD_IPRIORITYRnE + i); + + /* Now do the common stuff */ + gic_dist_config(base, GIC_LINE_NR, dist_prio_irq); + + val = GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1; + if (gic_data.rdists.gicd_typer2 & GICD_TYPER2_nASSGIcap) { + pr_info("Enabling SGIs without active state\n"); + val |= GICD_CTLR_nASSGIreq; + } + + /* Enable distributor with ARE, Group1, and wait for it to drain */ + writel_relaxed(val, base + GICD_CTLR); + gic_dist_wait_for_rwp(); /* * Set all global interrupts to the boot CPU only. ARE must be * enabled. */ - affinity = gic_mpidr_to_affinity(cpu_logical_map(smp_processor_id())); - for (i = 32; i < gic_data.irq_nr; i++) + affinity = gic_cpu_to_affinity(smp_processor_id()); + for (i = 32; i < GIC_LINE_NR; i++) gic_write_irouter(affinity, base + GICD_IROUTER + i * 8); + + for (i = 0; i < GIC_ESPI_NR; i++) + gic_write_irouter(affinity, base + GICD_IROUTERnE + i * 8); } static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *)) @@ -470,7 +1022,7 @@ static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *)) static int __gic_populate_rdist(struct redist_region *region, void __iomem *ptr) { - unsigned long mpidr = cpu_logical_map(smp_processor_id()); + unsigned long mpidr; u64 typer; u32 aff; @@ -478,6 +1030,8 @@ static int __gic_populate_rdist(struct redist_region *region, void __iomem *ptr) * Convert affinity to a 32bit value that can be matched to * GICR_TYPER bits [63:32]. */ + mpidr = gic_cpu_to_affinity(smp_processor_id()); + aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 | MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | @@ -486,6 +1040,7 @@ static int __gic_populate_rdist(struct redist_region *region, void __iomem *ptr) typer = gic_read_typer(ptr + GICR_TYPER); if ((typer >> 32) == aff) { u64 offset = ptr - region->redist_base; + raw_spin_lock_init(&gic_data_rdist()->rd_lock); gic_data_rdist_rd_base() = ptr; gic_data_rdist()->phys_base = region->phys_base + offset; @@ -512,32 +1067,77 @@ static int gic_populate_rdist(void) return -ENODEV; } -static int __gic_update_vlpi_properties(struct redist_region *region, - void __iomem *ptr) +static int __gic_update_rdist_properties(struct redist_region *region, + void __iomem *ptr) { u64 typer = gic_read_typer(ptr + GICR_TYPER); + u32 ctlr = readl_relaxed(ptr + GICR_CTLR); + + /* Boot-time cleanup */ + if ((typer & GICR_TYPER_VLPIS) && (typer & GICR_TYPER_RVPEID)) { + u64 val; + + /* Deactivate any present vPE */ + val = gicr_read_vpendbaser(ptr + SZ_128K + GICR_VPENDBASER); + if (val & GICR_VPENDBASER_Valid) + gicr_write_vpendbaser(GICR_VPENDBASER_PendingLast, + ptr + SZ_128K + GICR_VPENDBASER); + + /* Mark the VPE table as invalid */ + val = gicr_read_vpropbaser(ptr + SZ_128K + GICR_VPROPBASER); + val &= ~GICR_VPROPBASER_4_1_VALID; + gicr_write_vpropbaser(val, ptr + SZ_128K + GICR_VPROPBASER); + } + gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS); - gic_data.rdists.has_direct_lpi &= !!(typer & GICR_TYPER_DirectLPIS); + + /* + * TYPER.RVPEID implies some form of DirectLPI, no matter what the + * doc says... :-/ And CTLR.IR implies another subset of DirectLPI + * that the ITS driver can make use of for LPIs (and not VLPIs). + * + * These are 3 different ways to express the same thing, depending + * on the revision of the architecture and its relaxations over + * time. Just group them under the 'direct_lpi' banner. + */ + gic_data.rdists.has_rvpeid &= !!(typer & GICR_TYPER_RVPEID); + gic_data.rdists.has_direct_lpi &= (!!(typer & GICR_TYPER_DirectLPIS) | + !!(ctlr & GICR_CTLR_IR) | + gic_data.rdists.has_rvpeid); + gic_data.rdists.has_vpend_valid_dirty &= !!(typer & GICR_TYPER_DIRTY); + + /* Detect non-sensical configurations */ + if (WARN_ON_ONCE(gic_data.rdists.has_rvpeid && !gic_data.rdists.has_vlpis)) { + gic_data.rdists.has_direct_lpi = false; + gic_data.rdists.has_vlpis = false; + gic_data.rdists.has_rvpeid = false; + } + + gic_data.ppi_nr = min(GICR_TYPER_NR_PPIS(typer), gic_data.ppi_nr); return 1; } -static void gic_update_vlpi_properties(void) +static void gic_update_rdist_properties(void) { - gic_iterate_rdists(__gic_update_vlpi_properties); - pr_info("%sVLPI support, %sdirect LPI support\n", - !gic_data.rdists.has_vlpis ? "no " : "", - !gic_data.rdists.has_direct_lpi ? "no " : ""); + gic_data.ppi_nr = UINT_MAX; + gic_iterate_rdists(__gic_update_rdist_properties); + if (WARN_ON(gic_data.ppi_nr == UINT_MAX)) + gic_data.ppi_nr = 0; + pr_info("GICv3 features: %d PPIs%s%s\n", + gic_data.ppi_nr, + gic_data.has_rss ? ", RSS" : "", + gic_data.rdists.has_direct_lpi ? ", DirectLPI" : ""); + + if (gic_data.rdists.has_vlpis) + pr_info("GICv4 features: %s%s%s\n", + gic_data.rdists.has_direct_lpi ? "DirectLPI " : "", + gic_data.rdists.has_rvpeid ? "RVPEID " : "", + gic_data.rdists.has_vpend_valid_dirty ? "Valid+Dirty " : ""); } -static void gic_cpu_sys_reg_init(void) +static void gic_cpu_sys_reg_enable(void) { - int i, cpu = smp_processor_id(); - u64 mpidr = cpu_logical_map(cpu); - u64 need_rss = MPIDR_RS(mpidr); - bool group0; - u32 val, pribits; - /* * Need to check that the SRE bit has actually been set. If * not, it means that SRE is disabled at EL2. We're going to @@ -548,28 +1148,34 @@ static void gic_cpu_sys_reg_init(void) if (!gic_enable_sre()) pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n"); - pribits = gic_read_ctlr(); - pribits &= ICC_CTLR_EL1_PRI_BITS_MASK; - pribits >>= ICC_CTLR_EL1_PRI_BITS_SHIFT; - pribits++; +} - /* - * Let's find out if Group0 is under control of EL3 or not by - * setting the highest possible, non-zero priority in PMR. - * - * If SCR_EL3.FIQ is set, the priority gets shifted down in - * order for the CPU interface to set bit 7, and keep the - * actual priority in the non-secure range. In the process, it - * looses the least significant bit and the actual priority - * becomes 0x80. Reading it back returns 0, indicating that - * we're don't have access to Group0. - */ - write_gicreg(BIT(8 - pribits), ICC_PMR_EL1); - val = read_gicreg(ICC_PMR_EL1); - group0 = val != 0; +static void gic_cpu_sys_reg_init(void) +{ + int i, cpu = smp_processor_id(); + u64 mpidr = gic_cpu_to_affinity(cpu); + u64 need_rss = MPIDR_RS(mpidr); + bool group0; + u32 pribits; + + pribits = gic_get_pribits(); + + group0 = gic_has_group0(); /* Set priority mask register */ - write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1); + if (!gic_prio_masking_enabled()) { + write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1); + } else if (gic_supports_nmi()) { + /* + * Check that all CPUs use the same priority space. + * + * If there's a mismatch with the boot CPU, the system is + * likely to die as interrupt masking will not work properly on + * all CPUs. + */ + WARN_ON(group0 != cpus_have_group0); + WARN_ON(gic_dist_security_disabled() != cpus_have_security_disabled); + } /* * Some firmwares hand over to the kernel with the BPR changed from @@ -594,8 +1200,10 @@ static void gic_cpu_sys_reg_init(void) case 7: write_gicreg(0, ICC_AP0R3_EL1); write_gicreg(0, ICC_AP0R2_EL1); + fallthrough; case 6: write_gicreg(0, ICC_AP0R1_EL1); + fallthrough; case 5: case 4: write_gicreg(0, ICC_AP0R0_EL1); @@ -609,8 +1217,10 @@ static void gic_cpu_sys_reg_init(void) case 7: write_gicreg(0, ICC_AP1R3_EL1); write_gicreg(0, ICC_AP1R2_EL1); + fallthrough; case 6: write_gicreg(0, ICC_AP1R1_EL1); + fallthrough; case 5: case 4: write_gicreg(0, ICC_AP1R0_EL1); @@ -628,11 +1238,11 @@ static void gic_cpu_sys_reg_init(void) for_each_online_cpu(i) { bool have_rss = per_cpu(has_rss, i) && per_cpu(has_rss, cpu); - need_rss |= MPIDR_RS(cpu_logical_map(i)); + need_rss |= MPIDR_RS(gic_cpu_to_affinity(i)); if (need_rss && (!have_rss)) pr_crit("CPU%d (%lx) can't SGI CPU%d (%lx), no RSS\n", cpu, (unsigned long)mpidr, - i, (unsigned long)cpu_logical_map(i)); + i, (unsigned long)gic_cpu_to_affinity(i)); } /** @@ -650,7 +1260,7 @@ static bool gicv3_nolpi; static int __init gicv3_nolpi_cfg(char *buf) { - return strtobool(buf, &gicv3_nolpi); + return kstrtobool(buf, &gicv3_nolpi); } early_param("irqchip.gicv3_nolpi", gicv3_nolpi_cfg); @@ -664,6 +1274,7 @@ static int gic_dist_supports_lpis(void) static void gic_cpu_init(void) { void __iomem *rbase; + int i; /* Register ourselves with the rest of the world */ if (gic_populate_rdist()) @@ -671,12 +1282,19 @@ static void gic_cpu_init(void) gic_enable_redist(true); + WARN((gic_data.ppi_nr > 16 || GIC_ESPI_NR != 0) && + !(gic_read_ctlr() & ICC_CTLR_EL1_ExtRange), + "Distributor has extended ranges, but CPU%d doesn't\n", + smp_processor_id()); + rbase = gic_data_rdist_sgi_base(); /* Configure SGIs/PPIs as non-secure Group-1 */ - writel_relaxed(~0, rbase + GICR_IGROUPR0); + for (i = 0; i < gic_data.ppi_nr + SGI_NR; i += 32) + writel_relaxed(~0, rbase + GICR_IGROUPR0 + i / 8); - gic_cpu_config(rbase, gic_redist_wait_for_rwp); + gic_cpu_config(rbase, gic_data.ppi_nr + SGI_NR, dist_prio_irq); + gic_redist_wait_for_rwp(); /* initialise system registers */ gic_cpu_sys_reg_init(); @@ -687,8 +1305,21 @@ static void gic_cpu_init(void) #define MPIDR_TO_SGI_RS(mpidr) (MPIDR_RS(mpidr) << ICC_SGI1R_RS_SHIFT) #define MPIDR_TO_SGI_CLUSTER_ID(mpidr) ((mpidr) & ~0xFUL) +/* + * gic_starting_cpu() is called after the last point where cpuhp is allowed + * to fail. So pre check for problems earlier. + */ +static int gic_check_rdist(unsigned int cpu) +{ + if (cpumask_test_cpu(cpu, &broken_rdists)) + return -EINVAL; + + return 0; +} + static int gic_starting_cpu(unsigned int cpu) { + gic_cpu_sys_reg_enable(); gic_cpu_init(); if (gic_dist_supports_lpis()) @@ -701,9 +1332,11 @@ static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask, unsigned long cluster_id) { int next_cpu, cpu = *base_cpu; - unsigned long mpidr = cpu_logical_map(cpu); + unsigned long mpidr; u16 tlist = 0; + mpidr = gic_cpu_to_affinity(cpu); + while (cpu < nr_cpu_ids) { tlist |= 1 << (mpidr & 0xf); @@ -712,7 +1345,7 @@ static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask, goto out; cpu = next_cpu; - mpidr = cpu_logical_map(cpu); + mpidr = gic_cpu_to_affinity(cpu); if (cluster_id != MPIDR_TO_SGI_CLUSTER_ID(mpidr)) { cpu--; @@ -743,43 +1376,60 @@ static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq) gic_write_sgi1r(val); } -static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) +static void gic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask) { int cpu; - if (WARN_ON(irq >= 16)) + if (WARN_ON(d->hwirq >= 16)) return; /* * Ensure that stores to Normal memory are visible to the * other CPUs before issuing the IPI. */ - wmb(); + dsb(ishst); for_each_cpu(cpu, mask) { - u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu)); + u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(gic_cpu_to_affinity(cpu)); u16 tlist; tlist = gic_compute_target_list(&cpu, mask, cluster_id); - gic_send_sgi(cluster_id, tlist, irq); + gic_send_sgi(cluster_id, tlist, d->hwirq); } /* Force the above writes to ICC_SGI1R_EL1 to be executed */ isb(); } -static void gic_smp_init(void) +static void __init gic_smp_init(void) { - set_smp_cross_call(gic_raise_softirq); + struct irq_fwspec sgi_fwspec = { + .fwnode = gic_data.fwnode, + .param_count = 1, + }; + int base_sgi; + + cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN, + "irqchip/arm/gicv3:checkrdist", + gic_check_rdist, NULL); + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING, "irqchip/arm/gicv3:starting", gic_starting_cpu, NULL); + + /* Register all 8 non-secure SGIs */ + base_sgi = irq_domain_alloc_irqs(gic_data.domain, 8, NUMA_NO_NODE, &sgi_fwspec); + if (WARN_ON(base_sgi <= 0)) + return; + + set_smp_ipi_range(base_sgi, 8); } static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { unsigned int cpu; + u32 offset, index; void __iomem *reg; int enabled; u64 val; @@ -800,8 +1450,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, if (enabled) gic_mask_irq(d); - reg = gic_dist_base(d) + GICD_IROUTER + (gic_irq(d) * 8); - val = gic_mpidr_to_affinity(cpu_logical_map(cpu)); + offset = convert_offset_index(d, GICD_IROUTER, &index); + reg = gic_dist_base(d) + offset + (index * 8); + val = gic_cpu_to_affinity(cpu); gic_write_irouter(val, reg); @@ -811,8 +1462,6 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, */ if (enabled) gic_unmask_irq(d); - else - gic_dist_wait_for_rwp(); irq_data_update_effective_affinity(d, cpumask_of(cpu)); @@ -820,22 +1469,23 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, } #else #define gic_set_affinity NULL +#define gic_ipi_send_mask NULL #define gic_smp_init() do { } while(0) #endif -#ifdef CONFIG_CPU_PM -/* Check whether it's single security state view */ -static bool gic_dist_security_disabled(void) +static int gic_retrigger(struct irq_data *data) { - return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS; + return !gic_irq_set_irqchip_state(data, IRQCHIP_STATE_PENDING, true); } +#ifdef CONFIG_CPU_PM static int gic_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) { - if (cmd == CPU_PM_EXIT) { + if (cmd == CPU_PM_EXIT || cmd == CPU_PM_ENTER_FAILED) { if (gic_dist_security_disabled()) gic_enable_redist(true); + gic_cpu_sys_reg_enable(); gic_cpu_sys_reg_init(); } else if (cmd == CPU_PM_ENTER && gic_dist_security_disabled()) { gic_write_grpen1(0); @@ -864,8 +1514,12 @@ static struct irq_chip gic_chip = { .irq_eoi = gic_eoi_irq, .irq_set_type = gic_set_type, .irq_set_affinity = gic_set_affinity, + .irq_retrigger = gic_retrigger, .irq_get_irqchip_state = gic_irq_get_irqchip_state, .irq_set_irqchip_state = gic_irq_set_irqchip_state, + .irq_nmi_setup = gic_irq_nmi_setup, + .irq_nmi_teardown = gic_irq_nmi_teardown, + .ipi_send_mask = gic_ipi_send_mask, .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, @@ -878,66 +1532,71 @@ static struct irq_chip gic_eoimode1_chip = { .irq_eoi = gic_eoimode1_eoi_irq, .irq_set_type = gic_set_type, .irq_set_affinity = gic_set_affinity, + .irq_retrigger = gic_retrigger, .irq_get_irqchip_state = gic_irq_get_irqchip_state, .irq_set_irqchip_state = gic_irq_set_irqchip_state, .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity, + .irq_nmi_setup = gic_irq_nmi_setup, + .irq_nmi_teardown = gic_irq_nmi_teardown, + .ipi_send_mask = gic_ipi_send_mask, .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, }; -#define GIC_ID_NR (1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer)) - static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { struct irq_chip *chip = &gic_chip; + struct irq_data *irqd = irq_desc_get_irq_data(irq_to_desc(irq)); if (static_branch_likely(&supports_deactivate_key)) chip = &gic_eoimode1_chip; - /* SGIs are private to the core kernel */ - if (hw < 16) - return -EPERM; - /* Nothing here */ - if (hw >= gic_data.irq_nr && hw < 8192) - return -EPERM; - /* Off limits */ - if (hw >= GIC_ID_NR) - return -EPERM; - - /* PPIs */ - if (hw < 32) { + switch (__get_intid_range(hw)) { + case SGI_RANGE: + case PPI_RANGE: + case EPPI_RANGE: irq_set_percpu_devid(irq); irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_percpu_devid_irq, NULL, NULL); - irq_set_status_flags(irq, IRQ_NOAUTOEN); - } - /* SPIs */ - if (hw >= 32 && hw < gic_data.irq_nr) { + break; + + case SPI_RANGE: + case ESPI_RANGE: irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_fasteoi_irq, NULL, NULL); irq_set_probe(irq); - irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq))); - } - /* LPIs */ - if (hw >= 8192 && hw < GIC_ID_NR) { + irqd_set_single_target(irqd); + break; + + case LPI_RANGE: if (!gic_dist_supports_lpis()) return -EPERM; irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_fasteoi_irq, NULL, NULL); + break; + + default: + return -EPERM; } + /* Prevents SW retriggers which mess up the ACK/EOI ordering */ + irqd_set_handle_enforce_irqctx(irqd); return 0; } -#define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1) - static int gic_irq_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *hwirq, unsigned int *type) { + if (fwspec->param_count == 1 && fwspec->param[0] < 16) { + *hwirq = fwspec->param[0]; + *type = IRQ_TYPE_EDGE_RISING; + return 0; + } + if (is_of_node(fwspec->fwnode)) { if (fwspec->param_count < 3) return -EINVAL; @@ -947,9 +1606,14 @@ static int gic_irq_domain_translate(struct irq_domain *d, *hwirq = fwspec->param[1] + 32; break; case 1: /* PPI */ - case GIC_IRQ_TYPE_PARTITION: *hwirq = fwspec->param[1] + 16; break; + case 2: /* ESPI */ + *hwirq = fwspec->param[1] + ESPI_BASE_INTID; + break; + case 3: /* EPPI */ + *hwirq = fwspec->param[1] + EPPI_BASE_INTID; + break; case GIC_IRQ_TYPE_LPI: /* LPI */ *hwirq = fwspec->param[1]; break; @@ -961,10 +1625,8 @@ static int gic_irq_domain_translate(struct irq_domain *d, /* * Make it clear that broken DTs are... broken. - * Partitionned PPIs are an unfortunate exception. */ - WARN_ON(*type == IRQ_TYPE_NONE && - fwspec->param[0] != GIC_IRQ_TYPE_PARTITION); + WARN_ON(*type == IRQ_TYPE_NONE); return 0; } @@ -972,6 +1634,12 @@ static int gic_irq_domain_translate(struct irq_domain *d, if(fwspec->param_count != 2) return -EINVAL; + if (fwspec->param[0] < 16) { + pr_err(FW_BUG "Illegal GSI%d translation request\n", + fwspec->param[0]); + return -EINVAL; + } + *hwirq = fwspec->param[0]; *type = fwspec->param[1]; @@ -1019,77 +1687,293 @@ static int gic_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token) { + irq_hw_number_t hwirq; + unsigned int type; + int ret; + /* Not for us */ - if (fwspec->fwnode != d->fwnode) + if (fwspec->fwnode != d->fwnode) return 0; + /* Handle pure domain searches */ + if (!fwspec->param_count) + return d->bus_token == bus_token; + /* If this is not DT, then we have a single domain */ if (!is_of_node(fwspec->fwnode)) return 1; - /* - * If this is a PPI and we have a 4th (non-null) parameter, - * then we need to match the partition domain. - */ - if (fwspec->param_count >= 4 && - fwspec->param[0] == 1 && fwspec->param[3] != 0) - return d == partition_get_domain(gic_data.ppi_descs[fwspec->param[1]]); + ret = gic_irq_domain_translate(d, fwspec, &hwirq, &type); + if (WARN_ON_ONCE(ret)) + return 0; return d == gic_data.domain; } +static int gic_irq_get_fwspec_info(struct irq_fwspec *fwspec, struct irq_fwspec_info *info) +{ + const struct cpumask *mask = NULL; + + info->flags = 0; + info->affinity = NULL; + + /* ACPI is not capable of describing PPI affinity -- yet */ + if (!is_of_node(fwspec->fwnode)) + return 0; + + /* If the specifier provides an affinity, use it */ + if (fwspec->param_count == 4 && fwspec->param[3]) { + struct fwnode_handle *fw; + + switch (fwspec->param[0]) { + case 1: /* PPI */ + case 3: /* EPPI */ + break; + default: + return 0; + } + + fw = of_fwnode_handle(of_find_node_by_phandle(fwspec->param[3])); + if (!fw) + return -ENOENT; + + for (int i = 0; i < gic_data.nr_parts; i++) { + if (gic_data.parts[i].partition_id == fw) { + mask = &gic_data.parts[i].mask; + break; + } + } + + if (!mask) + return -ENOENT; + } else { + mask = cpu_possible_mask; + } + + info->affinity = mask; + info->flags = IRQ_FWSPEC_INFO_AFFINITY_VALID; + + return 0; +} + static const struct irq_domain_ops gic_irq_domain_ops = { .translate = gic_irq_domain_translate, .alloc = gic_irq_domain_alloc, .free = gic_irq_domain_free, .select = gic_irq_domain_select, + .get_fwspec_info = gic_irq_get_fwspec_info, }; -static int partition_domain_translate(struct irq_domain *d, - struct irq_fwspec *fwspec, - unsigned long *hwirq, - unsigned int *type) +static bool gic_enable_quirk_msm8996(void *data) { - struct device_node *np; - int ret; + struct gic_chip_data *d = data; - np = of_find_node_by_phandle(fwspec->param[3]); - if (WARN_ON(!np)) - return -EINVAL; + d->flags |= FLAGS_WORKAROUND_GICR_WAKER_MSM8996; - ret = partition_translate_id(gic_data.ppi_descs[fwspec->param[1]], - of_node_to_fwnode(np)); - if (ret < 0) - return ret; + return true; +} - *hwirq = ret; - *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; +static bool gic_enable_quirk_cavium_38539(void *data) +{ + struct gic_chip_data *d = data; - return 0; + d->flags |= FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539; + + return true; } -static const struct irq_domain_ops partition_domain_ops = { - .translate = partition_domain_translate, - .select = gic_irq_domain_select, -}; +static bool gic_enable_quirk_hip06_07(void *data) +{ + struct gic_chip_data *d = data; -static bool gic_enable_quirk_msm8996(void *data) + /* + * HIP06 GICD_IIDR clashes with GIC-600 product number (despite + * not being an actual ARM implementation). The saving grace is + * that GIC-600 doesn't have ESPI, so nothing to do in that case. + * HIP07 doesn't even have a proper IIDR, and still pretends to + * have ESPI. In both cases, put them right. + */ + if (d->rdists.gicd_typer & GICD_TYPER_ESPI) { + /* Zero both ESPI and the RES0 field next to it... */ + d->rdists.gicd_typer &= ~GENMASK(9, 8); + return true; + } + + return false; +} + +#define T241_CHIPN_MASK GENMASK_ULL(45, 44) +#define T241_CHIP_GICDA_OFFSET 0x1580000 +#define SMCCC_SOC_ID_T241 0x036b0241 + +static bool gic_enable_quirk_nvidia_t241(void *data) +{ + s32 soc_id = arm_smccc_get_soc_id_version(); + unsigned long chip_bmask = 0; + phys_addr_t phys; + u32 i; + + /* Check JEP106 code for NVIDIA T241 chip (036b:0241) */ + if ((soc_id < 0) || (soc_id != SMCCC_SOC_ID_T241)) + return false; + + /* Find the chips based on GICR regions PHYS addr */ + for (i = 0; i < gic_data.nr_redist_regions; i++) { + chip_bmask |= BIT(FIELD_GET(T241_CHIPN_MASK, + (u64)gic_data.redist_regions[i].phys_base)); + } + + if (hweight32(chip_bmask) < 3) + return false; + + /* Setup GICD alias regions */ + for (i = 0; i < ARRAY_SIZE(t241_dist_base_alias); i++) { + if (chip_bmask & BIT(i)) { + phys = gic_data.dist_phys_base + T241_CHIP_GICDA_OFFSET; + phys |= FIELD_PREP(T241_CHIPN_MASK, i); + t241_dist_base_alias[i] = ioremap(phys, SZ_64K); + WARN_ON_ONCE(!t241_dist_base_alias[i]); + } + } + static_branch_enable(&gic_nvidia_t241_erratum); + return true; +} + +static bool gic_enable_quirk_asr8601(void *data) { struct gic_chip_data *d = data; - d->flags |= FLAGS_WORKAROUND_GICR_WAKER_MSM8996; + d->flags |= FLAGS_WORKAROUND_ASR_ERRATUM_8601001; return true; } -static int __init gic_init_bases(void __iomem *dist_base, +static bool gic_enable_quirk_arm64_2941627(void *data) +{ + static_branch_enable(&gic_arm64_2941627_erratum); + return true; +} + +static bool gic_enable_quirk_rk3399(void *data) +{ + struct gic_chip_data *d = data; + + if (of_machine_is_compatible("rockchip,rk3399")) { + d->flags |= FLAGS_WORKAROUND_INSECURE; + return true; + } + + return false; +} + +static bool rd_set_non_coherent(void *data) +{ + struct gic_chip_data *d = data; + + d->rdists.flags |= RDIST_FLAGS_FORCE_NON_SHAREABLE; + return true; +} + +static const struct gic_quirk gic_quirks[] = { + { + .desc = "GICv3: Qualcomm MSM8996 broken firmware", + .compatible = "qcom,msm8996-gic-v3", + .init = gic_enable_quirk_msm8996, + }, + { + .desc = "GICv3: ASR erratum 8601001", + .compatible = "asr,asr8601-gic-v3", + .init = gic_enable_quirk_asr8601, + }, + { + .desc = "GICv3: HIP06 erratum 161010803", + .iidr = 0x0204043b, + .mask = 0xffffffff, + .init = gic_enable_quirk_hip06_07, + }, + { + .desc = "GICv3: HIP07 erratum 161010803", + .iidr = 0x00000000, + .mask = 0xffffffff, + .init = gic_enable_quirk_hip06_07, + }, + { + /* + * Reserved register accesses generate a Synchronous + * External Abort. This erratum applies to: + * - ThunderX: CN88xx + * - OCTEON TX: CN83xx, CN81xx + * - OCTEON TX2: CN93xx, CN96xx, CN98xx, CNF95xx* + */ + .desc = "GICv3: Cavium erratum 38539", + .iidr = 0xa000034c, + .mask = 0xe8f00fff, + .init = gic_enable_quirk_cavium_38539, + }, + { + .desc = "GICv3: NVIDIA erratum T241-FABRIC-4", + .iidr = 0x0402043b, + .mask = 0xffffffff, + .init = gic_enable_quirk_nvidia_t241, + }, + { + /* + * GIC-700: 2941627 workaround - IP variant [0,1] + * + */ + .desc = "GICv3: ARM64 erratum 2941627", + .iidr = 0x0400043b, + .mask = 0xff0e0fff, + .init = gic_enable_quirk_arm64_2941627, + }, + { + /* + * GIC-700: 2941627 workaround - IP variant [2] + */ + .desc = "GICv3: ARM64 erratum 2941627", + .iidr = 0x0402043b, + .mask = 0xff0f0fff, + .init = gic_enable_quirk_arm64_2941627, + }, + { + .desc = "GICv3: non-coherent attribute", + .property = "dma-noncoherent", + .init = rd_set_non_coherent, + }, + { + .desc = "GICv3: Insecure RK3399 integration", + .iidr = 0x0000043b, + .mask = 0xff000fff, + .init = gic_enable_quirk_rk3399, + }, + { + } +}; + +static void gic_enable_nmi_support(void) +{ + if (!gic_prio_masking_enabled() || nmi_support_forbidden) + return; + + pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n", + gic_has_relaxed_pmr_sync() ? "relaxed" : "forced"); + + static_branch_enable(&supports_pseudo_nmis); + + if (static_branch_likely(&supports_deactivate_key)) + gic_eoimode1_chip.flags |= IRQCHIP_SUPPORTS_NMI; + else + gic_chip.flags |= IRQCHIP_SUPPORTS_NMI; +} + +static int __init gic_init_bases(phys_addr_t dist_phys_base, + void __iomem *dist_base, struct redist_region *rdist_regs, u32 nr_redist_regions, u64 redist_stride, struct fwnode_handle *handle) { u32 typer; - int gic_irqs; int err; if (!is_hyp_mode_available()) @@ -1099,6 +1983,7 @@ static int __init gic_init_bases(void __iomem *dist_base, pr_info("GIC: Using split EOI/Deactivate mode\n"); gic_data.fwnode = handle; + gic_data.dist_phys_base = dist_phys_base; gic_data.dist_base = dist_base; gic_data.redist_regions = rdist_regs; gic_data.nr_redist_regions = nr_redist_regions; @@ -1106,30 +1991,42 @@ static int __init gic_init_bases(void __iomem *dist_base, /* * Find out how many interrupts are supported. - * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI) */ typer = readl_relaxed(gic_data.dist_base + GICD_TYPER); gic_data.rdists.gicd_typer = typer; - gic_irqs = GICD_TYPER_IRQS(typer); - if (gic_irqs > 1020) - gic_irqs = 1020; - gic_data.irq_nr = gic_irqs; + + gic_enable_quirks(readl_relaxed(gic_data.dist_base + GICD_IIDR), + gic_quirks, &gic_data); + + pr_info("%d SPIs implemented\n", GIC_LINE_NR - 32); + pr_info("%d Extended SPIs implemented\n", GIC_ESPI_NR); + + /* + * ThunderX1 explodes on reading GICD_TYPER2, in violation of the + * architecture spec (which says that reserved registers are RES0). + */ + if (!(gic_data.flags & FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539)) + gic_data.rdists.gicd_typer2 = readl_relaxed(gic_data.dist_base + GICD_TYPER2); gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops, &gic_data); - irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED); gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist)); - gic_data.rdists.has_vlpis = true; - gic_data.rdists.has_direct_lpi = true; + if (!static_branch_unlikely(&gic_nvidia_t241_erratum)) { + /* Disable GICv4.x features for the erratum T241-FABRIC-4 */ + gic_data.rdists.has_rvpeid = true; + gic_data.rdists.has_vlpis = true; + gic_data.rdists.has_direct_lpi = true; + gic_data.rdists.has_vpend_valid_dirty = true; + } if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) { err = -ENOMEM; goto out_free; } + irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED); + gic_data.has_rss = !!(typer & GICD_TYPER_RSS); - pr_info("Distributor has %sRange Selector support\n", - gic_data.has_rss ? "" : "no "); if (typer & GICD_TYPER_MBIS) { err = mbi_init(handle, gic_data.domain); @@ -1139,16 +2036,23 @@ static int __init gic_init_bases(void __iomem *dist_base, set_handle_irq(gic_handle_irq); - gic_update_vlpi_properties(); + gic_update_rdist_properties(); - gic_smp_init(); + gic_cpu_sys_reg_enable(); + gic_prio_init(); gic_dist_init(); gic_cpu_init(); + gic_enable_nmi_support(); + gic_smp_init(); gic_cpu_pm_init(); if (gic_dist_supports_lpis()) { - its_init(handle, &gic_data.rdists, gic_data.domain); + its_init(handle, &gic_data.rdists, gic_data.domain, dist_prio_irq); its_cpu_init(); + its_lpi_memreserve_init(); + } else { + if (IS_ENABLED(CONFIG_ARM_GIC_V2M)) + gicv2m_init(handle, gic_data.domain); } return 0; @@ -1183,7 +2087,6 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node) return; nr_parts = of_get_child_count(parts_node); - if (!nr_parts) goto out_put_node; @@ -1197,7 +2100,7 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node) part = &parts[part_idx]; - part->partition_id = of_node_to_fwnode(child_part); + part->partition_id = of_fwnode_handle(child_part); pr_info("GIC: PPI partition %pOFn[%d] { ", child_part, part_idx); @@ -1221,51 +2124,32 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node) continue; cpu = of_cpu_node_to_id(cpu_node); - if (WARN_ON(cpu < 0)) + if (WARN_ON(cpu < 0)) { + of_node_put(cpu_node); continue; + } pr_cont("%pOF[%d] ", cpu_node, cpu); cpumask_set_cpu(cpu, &part->mask); + of_node_put(cpu_node); } pr_cont("}\n"); part_idx++; } - for (i = 0; i < 16; i++) { - unsigned int irq; - struct partition_desc *desc; - struct irq_fwspec ppi_fwspec = { - .fwnode = gic_data.fwnode, - .param_count = 3, - .param = { - [0] = GIC_IRQ_TYPE_PARTITION, - [1] = i, - [2] = IRQ_TYPE_NONE, - }, - }; - - irq = irq_create_fwspec_mapping(&ppi_fwspec); - if (WARN_ON(!irq)) - continue; - desc = partition_create_desc(gic_data.fwnode, parts, nr_parts, - irq, &partition_domain_ops); - if (WARN_ON(!desc)) - continue; - - gic_data.ppi_descs[i] = desc; - } + gic_data.parts = parts; + gic_data.nr_parts = nr_parts; out_put_node: of_node_put(parts_node); } -static void __init gic_of_setup_kvm_info(struct device_node *node) +static void __init gic_of_setup_kvm_info(struct device_node *node, u32 nr_redist_regions) { int ret; struct resource r; - u32 gicv_idx; gic_v3_kvm_info.type = GIC_V3; @@ -1273,43 +2157,58 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) if (!gic_v3_kvm_info.maint_irq) return; - if (of_property_read_u32(node, "#redistributor-regions", - &gicv_idx)) - gicv_idx = 1; - - gicv_idx += 3; /* Also skip GICD, GICC, GICH */ - ret = of_address_to_resource(node, gicv_idx, &r); + /* Also skip GICD, GICC, GICH */ + ret = of_address_to_resource(node, nr_redist_regions + 3, &r); if (!ret) gic_v3_kvm_info.vcpu = r; gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; - gic_set_kvm_info(&gic_v3_kvm_info); + gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; + vgic_set_kvm_info(&gic_v3_kvm_info); } -static const struct gic_quirk gic_quirks[] = { - { - .desc = "GICv3: Qualcomm MSM8996 broken firmware", - .compatible = "qcom,msm8996-gic-v3", - .init = gic_enable_quirk_msm8996, - }, - { - } -}; +static void gic_request_region(resource_size_t base, resource_size_t size, + const char *name) +{ + if (!request_mem_region(base, size, name)) + pr_warn_once(FW_BUG "%s region %pa has overlapping address\n", + name, &base); +} + +static void __iomem *gic_of_iomap(struct device_node *node, int idx, + const char *name, struct resource *res) +{ + void __iomem *base; + int ret; + + ret = of_address_to_resource(node, idx, res); + if (ret) + return IOMEM_ERR_PTR(ret); + + gic_request_region(res->start, resource_size(res), name); + base = of_iomap(node, idx); + + return base ?: IOMEM_ERR_PTR(-ENOMEM); +} static int __init gic_of_init(struct device_node *node, struct device_node *parent) { + phys_addr_t dist_phys_base; void __iomem *dist_base; struct redist_region *rdist_regs; + struct resource res; u64 redist_stride; u32 nr_redist_regions; int err, i; - dist_base = of_iomap(node, 0); - if (!dist_base) { + dist_base = gic_of_iomap(node, 0, "GICD", &res); + if (IS_ERR(dist_base)) { pr_err("%pOF: unable to map gic dist registers\n", node); - return -ENXIO; + return PTR_ERR(dist_base); } + dist_phys_base = res.start; + err = gic_validate_dist_version(dist_base); if (err) { pr_err("%pOF: no distributor detected, giving up\n", node); @@ -1327,12 +2226,8 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare } for (i = 0; i < nr_redist_regions; i++) { - struct resource res; - int ret; - - ret = of_address_to_resource(node, 1 + i, &res); - rdist_regs[i].redist_base = of_iomap(node, 1 + i); - if (ret || !rdist_regs[i].redist_base) { + rdist_regs[i].redist_base = gic_of_iomap(node, 1 + i, "GICR", &res); + if (IS_ERR(rdist_regs[i].redist_base)) { pr_err("%pOF: couldn't map region %d\n", node, i); err = -ENODEV; goto out_unmap_rdist; @@ -1345,20 +2240,20 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare gic_enable_of_quirks(node, gic_quirks, &gic_data); - err = gic_init_bases(dist_base, rdist_regs, nr_redist_regions, - redist_stride, &node->fwnode); + err = gic_init_bases(dist_phys_base, dist_base, rdist_regs, + nr_redist_regions, redist_stride, &node->fwnode); if (err) goto out_unmap_rdist; gic_populate_ppi_partitions(node); if (static_branch_likely(&supports_deactivate_key)) - gic_of_setup_kvm_info(node); + gic_of_setup_kvm_info(node, nr_redist_regions); return 0; out_unmap_rdist: for (i = 0; i < nr_redist_regions; i++) - if (rdist_regs[i].redist_base) + if (rdist_regs[i].redist_base && !IS_ERR(rdist_regs[i].redist_base)) iounmap(rdist_regs[i].redist_base); kfree(rdist_regs); out_unmap_dist: @@ -1375,6 +2270,7 @@ static struct struct redist_region *redist_regs; u32 nr_redist_regions; bool single_redist; + int enabled_rdists; u32 maint_irq; int maint_irq_mode; phys_addr_t vcpu_base; @@ -1392,7 +2288,7 @@ gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base) } static int __init -gic_acpi_parse_madt_redist(struct acpi_subtable_header *header, +gic_acpi_parse_madt_redist(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_redistributor *redist = @@ -1405,12 +2301,18 @@ gic_acpi_parse_madt_redist(struct acpi_subtable_header *header, return -ENOMEM; } + if (acpi_get_madt_revision() >= 7 && + (redist->flags & ACPI_MADT_GICR_NON_COHERENT)) + gic_data.rdists.flags |= RDIST_FLAGS_FORCE_NON_SHAREABLE; + + gic_request_region(redist->base_address, redist->length, "GICR"); + gic_acpi_register_redist(redist->base_address, redist_base); return 0; } static int __init -gic_acpi_parse_madt_gicc(struct acpi_subtable_header *header, +gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_interrupt *gicc = @@ -1419,13 +2321,33 @@ gic_acpi_parse_madt_gicc(struct acpi_subtable_header *header, u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; void __iomem *redist_base; - /* GICC entry which has !ACPI_MADT_ENABLED is not unusable so skip */ - if (!(gicc->flags & ACPI_MADT_ENABLED)) + /* Neither enabled or online capable means it doesn't exist, skip it */ + if (!(gicc->flags & (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) + return 0; + + /* + * Capable but disabled CPUs can be brought online later. What about + * the redistributor? ACPI doesn't want to say! + * Virtual hotplug systems can use the MADT's "always-on" GICR entries. + * Otherwise, prevent such CPUs from being brought online. + */ + if (!(gicc->flags & ACPI_MADT_ENABLED)) { + int cpu = get_cpu_for_acpi_id(gicc->uid); + + pr_warn("CPU %u's redistributor is inaccessible: this CPU can't be brought online\n", cpu); + if (cpu >= 0) + cpumask_set_cpu(cpu, &broken_rdists); return 0; + } redist_base = ioremap(gicc->gicr_base_address, size); if (!redist_base) return -ENOMEM; + gic_request_region(gicc->gicr_base_address, size, "GICR"); + + if (acpi_get_madt_revision() >= 7 && + (gicc->flags & ACPI_MADT_GICC_NON_COHERENT)) + gic_data.rdists.flags |= RDIST_FLAGS_FORCE_NON_SHAREABLE; gic_acpi_register_redist(gicc->gicr_base_address, redist_base); return 0; @@ -1452,14 +2374,14 @@ static int __init gic_acpi_collect_gicr_base(void) return -ENODEV; } -static int __init gic_acpi_match_gicr(struct acpi_subtable_header *header, +static int __init gic_acpi_match_gicr(union acpi_subtable_headers *header, const unsigned long end) { /* Subtable presence means that redist exists, that's it */ return 0; } -static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header, +static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_interrupt *gicc = @@ -1467,19 +2389,15 @@ static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header, /* * If GICC is enabled and has valid gicr base address, then it means - * GICR base is presented via GICC - */ - if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) - return 0; - - /* - * It's perfectly valid firmware can pass disabled GICC entry, driver - * should not treat as errors, skip the entry instead of probe fail. + * GICR base is presented via GICC. The redistributor is only known to + * be accessible if the GICC is marked as enabled. If this bit is not + * set, we'd need to add the redistributor at runtime, which isn't + * supported. */ - if (!(gicc->flags & ACPI_MADT_ENABLED)) - return 0; + if (gicc->flags & ACPI_MADT_ENABLED && gicc->gicr_base_address) + acpi_data.enabled_rdists++; - return -ENODEV; + return 0; } static int __init gic_acpi_count_gicr_regions(void) @@ -1500,8 +2418,10 @@ static int __init gic_acpi_count_gicr_regions(void) count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, gic_acpi_match_gicc, 0); - if (count > 0) + if (count > 0) { acpi_data.single_redist = true; + count = acpi_data.enabled_rdists; + } return count; } @@ -1525,7 +2445,7 @@ static bool __init acpi_validate_gic_table(struct acpi_subtable_header *header, return true; } -static int __init gic_acpi_parse_virt_madt_gicc(struct acpi_subtable_header *header, +static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_interrupt *gicc = @@ -1533,8 +2453,8 @@ static int __init gic_acpi_parse_virt_madt_gicc(struct acpi_subtable_header *hea int maint_irq_mode; static int first_madt = true; - /* Skip unusable CPUs */ - if (!(gicc->flags & ACPI_MADT_ENABLED)) + if (!(gicc->flags & + (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) return 0; maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ? @@ -1603,14 +2523,21 @@ static void __init gic_acpi_setup_kvm_info(void) } gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; - gic_set_kvm_info(&gic_v3_kvm_info); + gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; + vgic_set_kvm_info(&gic_v3_kvm_info); +} + +static struct fwnode_handle *gsi_domain_handle; + +static struct fwnode_handle *gic_v3_get_gsi_domain_id(u32 gsi) +{ + return gsi_domain_handle; } static int __init -gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end) +gic_acpi_init(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_distributor *dist; - struct fwnode_handle *domain_handle; size_t size; int i, err; @@ -1622,6 +2549,7 @@ gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end) pr_err("Unable to map GICD registers\n"); return -ENOMEM; } + gic_request_region(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE, "GICD"); err = gic_validate_dist_version(acpi_data.dist_base); if (err) { @@ -1641,18 +2569,19 @@ gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end) if (err) goto out_redist_unmap; - domain_handle = irq_domain_alloc_fwnode(acpi_data.dist_base); - if (!domain_handle) { + gsi_domain_handle = irq_domain_alloc_fwnode(&dist->base_address); + if (!gsi_domain_handle) { err = -ENOMEM; goto out_redist_unmap; } - err = gic_init_bases(acpi_data.dist_base, acpi_data.redist_regs, - acpi_data.nr_redist_regions, 0, domain_handle); + err = gic_init_bases(dist->base_address, acpi_data.dist_base, + acpi_data.redist_regs, acpi_data.nr_redist_regions, + 0, gsi_domain_handle); if (err) goto out_fwhandle_free; - acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle); + acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, gic_v3_get_gsi_domain_id); if (static_branch_likely(&supports_deactivate_key)) gic_acpi_setup_kvm_info(); @@ -1660,7 +2589,7 @@ gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end) return 0; out_fwhandle_free: - irq_domain_free_fwnode(domain_handle); + irq_domain_free_fwnode(gsi_domain_handle); out_redist_unmap: for (i = 0; i < acpi_data.nr_redist_regions; i++) if (acpi_data.redist_regs[i].redist_base) |
