From 113fe88eed53af08800f54a03e463636105831e0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 11 Jun 2022 18:55:15 +0200 Subject: powerpc: Don't include asm/setup.h in asm/machdep.h asm/machdep.h doesn't need asm/setup.h Remove it. Add it directly in files that needs it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3b1dfb19a2c3265fb4abc2bfc7b6eae9261a998b.1654966508.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/machdep.h | 2 -- arch/powerpc/kernel/pci-common.c | 1 + arch/powerpc/kernel/prom.c | 2 +- arch/powerpc/kernel/prom_init.c | 2 +- arch/powerpc/kexec/core.c | 1 + arch/powerpc/kvm/powerpc.c | 1 + arch/powerpc/mm/mem.c | 1 + arch/powerpc/platforms/pseries/kexec.c | 2 +- arch/powerpc/platforms/pseries/lpar.c | 2 +- arch/powerpc/platforms/pseries/setup.c | 1 + arch/powerpc/sysdev/fsl_pci.c | 1 + drivers/scsi/mesh.c | 2 +- 12 files changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 358d171ae8e0..613755afa8a9 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -8,8 +8,6 @@ #include #include -#include - struct pt_regs; struct pci_bus; struct device_node; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 068410cd54a3..c87999289752 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "../../../drivers/pci/pci.h" diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index feae8509b59c..1066b072db35 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 04694ec423f6..1939683e35ed 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 7ab4980fe13a..9773dd0640f3 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -19,6 +19,7 @@ #include #include #include +#include void machine_kexec_mask_interrupts(void) { unsigned int i; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 191992fcb2c2..fb1490761c87 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -33,6 +33,7 @@ #include #endif #include +#include #include "timing.h" #include "irq.h" diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 52b77684acda..2cf6748755e1 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -25,6 +25,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index ab6cdbebb35e..096d09ed89f6 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 937f9c010b22..e6c117fb6491 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index afb074269b42..e86a749173e6 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -72,6 +72,7 @@ #include #include #include +#include #include "pseries.h" diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 1011cfea2e32..e8d072e98b66 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c index 322d3ad38159..a74f0c2c8ba7 100644 --- a/drivers/scsi/mesh.c +++ b/drivers/scsi/mesh.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include -- cgit From 7dc3ba0a071892ea212f90f63738fd9f81b1f638 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 11 Jun 2022 18:55:16 +0200 Subject: powerpc: Move prom_init() out of asm-prototypes.h This is the end of the work started with commit 76222808fc25 ("powerpc: Move C prototypes out of asm-prototypes.h") Now that asm/machdep.h doesn't include asm/setup.h anymore, there are no conflicts anymore with the function prom_init() defined in drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowrom.o So we can move it to asm/setup.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e111e4f0addb0fa810d5f6a71d3b8e62c0b53492.1654966508.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-prototypes.h | 6 ------ arch/powerpc/include/asm/setup.h | 5 +++++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d995c65d18ab..3b2bbc273055 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -34,12 +34,6 @@ int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, int64_t a4, int64_t a5, int64_t a6, int64_t a7, int64_t opcode, uint64_t msr); -/* prom_init (OpenFirmware) */ -unsigned long __init prom_init(unsigned long r3, unsigned long r4, - unsigned long pp, - unsigned long r6, unsigned long r7, - unsigned long kbase); - /* misc runtime */ extern u64 __bswapdi2(u64); extern s64 __lshrdi3(s64, int); diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 8fa37ef5da4d..14a0b8af738e 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -85,6 +85,11 @@ void __init machine_init(u64 dt_ptr); void __init early_setup(unsigned long dt_ptr); void early_setup_secondary(void); +/* prom_init (OpenFirmware) */ +unsigned long __init prom_init(unsigned long r3, unsigned long r4, + unsigned long pp, unsigned long r6, + unsigned long r7, unsigned long kbase); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ -- cgit From 882c835b71e22ca82361dab3b60b85b557abd72f Mon Sep 17 00:00:00 2001 From: Jiang Jian Date: Tue, 21 Jun 2022 20:53:21 +0800 Subject: cxl: drop unexpected word "the" in the comments there is an unexpected word "the" in the comments that need to be dropped file: drivers/misc/cxl/cxl.h line: 1107 +/* check if the given pci_dev is on the the cxl vphb bus */ changed to +/* check if the given pci_dev is on the cxl vphb bus */ Signed-off-by: Jiang Jian Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220621125321.122280-1-jiangjian@cdjrlc.com --- drivers/misc/cxl/cxl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 7a6dd91987fd..0562071cdd4a 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -1104,7 +1104,7 @@ extern const struct cxl_backend_ops cxl_native_ops; extern const struct cxl_backend_ops cxl_guest_ops; extern const struct cxl_backend_ops *cxl_ops; -/* check if the given pci_dev is on the the cxl vphb bus */ +/* check if the given pci_dev is on the cxl vphb bus */ bool cxl_pci_is_vphb_device(struct pci_dev *dev); /* decode AFU error bits in the PSL register PSL_SERR_An */ -- cgit From 6d056b7254f9954522b7bb9947c8779a013d189f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 7 May 2022 13:01:44 +0300 Subject: powerpc/52xx: Remove dead code, i.e. mpc52xx_get_xtal_freq() It seems mpc52xx_get_xtal_freq() is not used anywhere. Remove dead code. Signed-off-by: Andy Shevchenko Reviewed-by: Wolfram Sang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220507100147.5802-1-andriy.shevchenko@linux.intel.com --- arch/powerpc/include/asm/mpc52xx.h | 1 - arch/powerpc/platforms/52xx/mpc52xx_common.c | 37 ---------------------------- 2 files changed, 38 deletions(-) diff --git a/arch/powerpc/include/asm/mpc52xx.h b/arch/powerpc/include/asm/mpc52xx.h index ce1e0aabaa64..ddd80aae1e32 100644 --- a/arch/powerpc/include/asm/mpc52xx.h +++ b/arch/powerpc/include/asm/mpc52xx.h @@ -274,7 +274,6 @@ extern void mpc52xx_declare_of_platform_devices(void); extern int mpc5200_psc_ac97_gpio_reset(int psc_number); extern void mpc52xx_map_common_devices(void); extern int mpc52xx_set_psc_clkdiv(int psc_id, int clkdiv); -extern unsigned int mpc52xx_get_xtal_freq(struct device_node *node); extern void __noreturn mpc52xx_restart(char *cmd); /* mpc52xx_gpt.c */ diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index 4348506d667d..409c0ec06265 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -203,43 +203,6 @@ int mpc52xx_set_psc_clkdiv(int psc_id, int clkdiv) } EXPORT_SYMBOL(mpc52xx_set_psc_clkdiv); -/** - * mpc52xx_get_xtal_freq - Get SYS_XTAL_IN frequency for a device - * - * @node: device node - * - * Returns the frequency of the external oscillator clock connected - * to the SYS_XTAL_IN pin, or 0 if it cannot be determined. - */ -unsigned int mpc52xx_get_xtal_freq(struct device_node *node) -{ - u32 val; - unsigned int freq; - - if (!mpc52xx_cdm) - return 0; - - freq = mpc5xxx_get_bus_frequency(node); - if (!freq) - return 0; - - if (in_8(&mpc52xx_cdm->ipb_clk_sel) & 0x1) - freq *= 2; - - val = in_be32(&mpc52xx_cdm->rstcfg); - if (val & (1 << 5)) - freq *= 8; - else - freq *= 4; - if (val & (1 << 6)) - freq /= 12; - else - freq /= 16; - - return freq; -} -EXPORT_SYMBOL(mpc52xx_get_xtal_freq); - /** * mpc52xx_restart: ppc_md->restart hook for mpc5200 using the watchdog timer */ -- cgit From de06fba62af64144aca6f8a8bedbc848d2e5b440 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 7 May 2022 13:01:45 +0300 Subject: powerpc/mpc5xxx: Switch mpc5xxx_get_bus_frequency() to use fwnode Switch mpc5xxx_get_bus_frequency() to use fwnode in order to help cleaning up other parts of the kernel from OF specific code. No functional change intended. Signed-off-by: Andy Shevchenko Acked-by: Chris Packham # for i2c-mpc Acked-by: Wolfram Sang # for the I2C part Acked-by: Mark Brown Acked-by: Marc Kleine-Budde # for mscan/mpc5xxx_can Acked-by: Damien Le Moal Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220507100147.5802-2-andriy.shevchenko@linux.intel.com --- arch/powerpc/include/asm/mpc5xxx.h | 9 +++++- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 2 +- arch/powerpc/sysdev/mpc5xxx_clocks.c | 41 +++++++++++++----------- drivers/ata/pata_mpc52xx.c | 2 +- drivers/i2c/busses/i2c-mpc.c | 7 ++-- drivers/net/can/mscan/mpc5xxx_can.c | 2 +- drivers/net/ethernet/freescale/fec_mpc52xx.c | 2 +- drivers/net/ethernet/freescale/fec_mpc52xx_phy.c | 3 +- drivers/net/ethernet/freescale/fs_enet/mii-fec.c | 4 +-- drivers/spi/spi-mpc52xx.c | 2 +- drivers/tty/serial/mpc52xx_uart.c | 4 +-- 11 files changed, 44 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/include/asm/mpc5xxx.h b/arch/powerpc/include/asm/mpc5xxx.h index 2f60f5c5461b..44db26380435 100644 --- a/arch/powerpc/include/asm/mpc5xxx.h +++ b/arch/powerpc/include/asm/mpc5xxx.h @@ -11,7 +11,14 @@ #ifndef __ASM_POWERPC_MPC5xxx_H__ #define __ASM_POWERPC_MPC5xxx_H__ -extern unsigned long mpc5xxx_get_bus_frequency(struct device_node *node); +#include + +unsigned long mpc5xxx_fwnode_get_bus_frequency(struct fwnode_handle *fwnode); + +static inline unsigned long mpc5xxx_get_bus_frequency(struct device *dev) +{ + return mpc5xxx_fwnode_get_bus_frequency(dev_fwnode(dev)); +} #endif /* __ASM_POWERPC_MPC5xxx_H__ */ diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 968f5b727273..76ece6001cdf 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -722,7 +722,7 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev) raw_spin_lock_init(&gpt->lock); gpt->dev = &ofdev->dev; - gpt->ipb_freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node); + gpt->ipb_freq = mpc5xxx_get_bus_frequency(&ofdev->dev); gpt->regs = of_iomap(ofdev->dev.of_node, 0); if (!gpt->regs) return -ENOMEM; diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/mpc5xxx_clocks.c index 834a6d7fbd88..c5bf7e1b3780 100644 --- a/arch/powerpc/sysdev/mpc5xxx_clocks.c +++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c @@ -1,31 +1,34 @@ // SPDX-License-Identifier: GPL-2.0 -/** - * mpc5xxx_get_bus_frequency - Find the bus frequency for a device - * @node: device node - * - * Returns bus frequency (IPS on MPC512x, IPB on MPC52xx), - * or 0 if the bus frequency cannot be found. - */ #include -#include #include +#include + #include -unsigned long mpc5xxx_get_bus_frequency(struct device_node *node) +/** + * mpc5xxx_fwnode_get_bus_frequency - Find the bus frequency for a firmware node + * @fwnode: firmware node + * + * Returns bus frequency (IPS on MPC512x, IPB on MPC52xx), + * or 0 if the bus frequency cannot be found. + */ +unsigned long mpc5xxx_fwnode_get_bus_frequency(struct fwnode_handle *fwnode) { - const unsigned int *p_bus_freq = NULL; + struct fwnode_handle *parent; + u32 bus_freq; + int ret; - of_node_get(node); - while (node) { - p_bus_freq = of_get_property(node, "bus-frequency", NULL); - if (p_bus_freq) - break; + ret = fwnode_property_read_u32(fwnode, "bus-frequency", &bus_freq); + if (!ret) + return bus_freq; - node = of_get_next_parent(node); + fwnode_for_each_parent_node(fwnode, parent) { + ret = fwnode_property_read_u32(parent, "bus-frequency", &bus_freq); + if (!ret) + return bus_freq; } - of_node_put(node); - return p_bus_freq ? *p_bus_freq : 0; + return 0; } -EXPORT_SYMBOL(mpc5xxx_get_bus_frequency); +EXPORT_SYMBOL(mpc5xxx_fwnode_get_bus_frequency); diff --git a/drivers/ata/pata_mpc52xx.c b/drivers/ata/pata_mpc52xx.c index 03b6ae37a578..6559b606736d 100644 --- a/drivers/ata/pata_mpc52xx.c +++ b/drivers/ata/pata_mpc52xx.c @@ -683,7 +683,7 @@ static int mpc52xx_ata_probe(struct platform_device *op) struct bcom_task *dmatsk; /* Get ipb frequency */ - ipb_freq = mpc5xxx_get_bus_frequency(op->dev.of_node); + ipb_freq = mpc5xxx_get_bus_frequency(&op->dev); if (!ipb_freq) { dev_err(&op->dev, "could not determine IPB bus frequency\n"); return -ENODEV; diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 6c698c10d3cd..81ac92bb4f6f 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -239,6 +239,7 @@ static const struct mpc_i2c_divider mpc_i2c_dividers_52xx[] = { static int mpc_i2c_get_fdr_52xx(struct device_node *node, u32 clock, u32 *real_clk) { + struct fwnode_handle *fwnode = of_fwnode_handle(node); const struct mpc_i2c_divider *div = NULL; unsigned int pvr = mfspr(SPRN_PVR); u32 divider; @@ -246,12 +247,12 @@ static int mpc_i2c_get_fdr_52xx(struct device_node *node, u32 clock, if (clock == MPC_I2C_CLOCK_LEGACY) { /* see below - default fdr = 0x3f -> div = 2048 */ - *real_clk = mpc5xxx_get_bus_frequency(node) / 2048; + *real_clk = mpc5xxx_fwnode_get_bus_frequency(fwnode) / 2048; return -EINVAL; } /* Determine divider value */ - divider = mpc5xxx_get_bus_frequency(node) / clock; + divider = mpc5xxx_fwnode_get_bus_frequency(fwnode) / clock; /* * We want to choose an FDR/DFSR that generates an I2C bus speed that @@ -266,7 +267,7 @@ static int mpc_i2c_get_fdr_52xx(struct device_node *node, u32 clock, break; } - *real_clk = mpc5xxx_get_bus_frequency(node) / div->divider; + *real_clk = mpc5xxx_fwnode_get_bus_frequency(fwnode) / div->divider; return (int)div->fdr; } diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c index 65ba6697bd7d..c469b2f3e57d 100644 --- a/drivers/net/can/mscan/mpc5xxx_can.c +++ b/drivers/net/can/mscan/mpc5xxx_can.c @@ -63,7 +63,7 @@ static u32 mpc52xx_can_get_clock(struct platform_device *ofdev, else *mscan_clksrc = MSCAN_CLKSRC_XTAL; - freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node); + freq = mpc5xxx_get_bus_frequency(&ofdev->dev); if (!freq) return 0; diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 5ddb769bdfb4..a7f4c3c29f3e 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -924,7 +924,7 @@ static int mpc52xx_fec_probe(struct platform_device *op) /* Start with safe defaults for link connection */ priv->speed = 100; priv->duplex = DUPLEX_HALF; - priv->mdio_speed = ((mpc5xxx_get_bus_frequency(np) >> 20) / 5) << 1; + priv->mdio_speed = ((mpc5xxx_get_bus_frequency(&op->dev) >> 20) / 5) << 1; /* The current speed preconfigures the speed of the MII link */ prop = of_get_property(np, "current-speed", &prop_size); diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c b/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c index f85b5e81dfc1..95f778cce98c 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c @@ -100,8 +100,7 @@ static int mpc52xx_fec_mdio_probe(struct platform_device *of) dev_set_drvdata(dev, bus); /* set MII speed */ - out_be32(&priv->regs->mii_speed, - ((mpc5xxx_get_bus_frequency(of->dev.of_node) >> 20) / 5) << 1); + out_be32(&priv->regs->mii_speed, ((mpc5xxx_get_bus_frequency(dev) >> 20) / 5) << 1); err = of_mdiobus_register(bus, np); if (err) diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c index 152f4d83765a..d37d7a19a759 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c @@ -102,7 +102,7 @@ static int fs_enet_mdio_probe(struct platform_device *ofdev) struct resource res; struct mii_bus *new_bus; struct fec_info *fec; - int (*get_bus_freq)(struct device_node *); + int (*get_bus_freq)(struct device *); int ret = -ENOMEM, clock, speed; match = of_match_device(fs_enet_mdio_fec_match, &ofdev->dev); @@ -136,7 +136,7 @@ static int fs_enet_mdio_probe(struct platform_device *ofdev) } if (get_bus_freq) { - clock = get_bus_freq(ofdev->dev.of_node); + clock = get_bus_freq(&ofdev->dev); if (!clock) { /* Use maximum divider if clock is unknown */ dev_warn(&ofdev->dev, "could not determine IPS clock\n"); diff --git a/drivers/spi/spi-mpc52xx.c b/drivers/spi/spi-mpc52xx.c index 3ebdce804b90..bc5e36fd4288 100644 --- a/drivers/spi/spi-mpc52xx.c +++ b/drivers/spi/spi-mpc52xx.c @@ -437,7 +437,7 @@ static int mpc52xx_spi_probe(struct platform_device *op) ms->irq0 = irq_of_parse_and_map(op->dev.of_node, 0); ms->irq1 = irq_of_parse_and_map(op->dev.of_node, 1); ms->state = mpc52xx_spi_fsmstate_idle; - ms->ipb_freq = mpc5xxx_get_bus_frequency(op->dev.of_node); + ms->ipb_freq = mpc5xxx_get_bus_frequency(&op->dev); ms->gpio_cs_count = of_gpio_count(op->dev.of_node); if (ms->gpio_cs_count > 0) { master->num_chipselect = ms->gpio_cs_count; diff --git a/drivers/tty/serial/mpc52xx_uart.c b/drivers/tty/serial/mpc52xx_uart.c index e50f069b5ebb..3f1986c89694 100644 --- a/drivers/tty/serial/mpc52xx_uart.c +++ b/drivers/tty/serial/mpc52xx_uart.c @@ -1630,7 +1630,7 @@ mpc52xx_console_setup(struct console *co, char *options) return ret; } - uartclk = mpc5xxx_get_bus_frequency(np); + uartclk = mpc5xxx_fwnode_get_bus_frequency(of_fwnode_handle(np)); if (uartclk == 0) { pr_debug("Could not find uart clock frequency!\n"); return -EINVAL; @@ -1747,7 +1747,7 @@ static int mpc52xx_uart_of_probe(struct platform_device *op) /* set the uart clock to the input clock of the psc, the different * prescalers are taken into account in the set_baudrate() methods * of the respective chip */ - uartclk = mpc5xxx_get_bus_frequency(op->dev.of_node); + uartclk = mpc5xxx_get_bus_frequency(&op->dev); if (uartclk == 0) { dev_dbg(&op->dev, "Could not find uart clock frequency!\n"); return -EINVAL; -- cgit From 00bcb550dc60f73d593d2dbb718c4f521c7d7be8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 7 May 2022 13:01:46 +0300 Subject: powerpc/52xx: Get rid of of_node assignment Let GPIO library assign of_node from the parent device. This allows to move GPIO library and drivers to use fwnode APIs instead of being stuck with OF-only interfaces. Signed-off-by: Andy Shevchenko Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220507100147.5802-3-andriy.shevchenko@linux.intel.com --- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 76ece6001cdf..74baded88f47 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -316,17 +317,15 @@ mpc52xx_gpt_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) return 0; } -static void -mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node) +static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt) { int rc; - /* Only setup GPIO if the device tree claims the GPT is - * a GPIO controller */ - if (!of_find_property(node, "gpio-controller", NULL)) + /* Only setup GPIO if the device claims the GPT is a GPIO controller */ + if (!device_property_present(gpt->dev, "gpio-controller")) return; - gpt->gc.label = kasprintf(GFP_KERNEL, "%pOF", node); + gpt->gc.label = kasprintf(GFP_KERNEL, "%pfw", dev_fwnode(gpt->dev)); if (!gpt->gc.label) { dev_err(gpt->dev, "out of memory\n"); return; @@ -338,7 +337,7 @@ mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node) gpt->gc.get = mpc52xx_gpt_gpio_get; gpt->gc.set = mpc52xx_gpt_gpio_set; gpt->gc.base = -1; - gpt->gc.of_node = node; + gpt->gc.parent = gpt->dev; /* Setup external pin in GPIO mode */ clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_MS_MASK, @@ -351,8 +350,7 @@ mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node) dev_dbg(gpt->dev, "%s() complete.\n", __func__); } #else /* defined(CONFIG_GPIOLIB) */ -static void -mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *p, struct device_node *np) { } +static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt) { } #endif /* defined(CONFIG_GPIOLIB) */ /*********************************************************************** @@ -729,7 +727,7 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev) dev_set_drvdata(&ofdev->dev, gpt); - mpc52xx_gpt_gpio_setup(gpt, ofdev->dev.of_node); + mpc52xx_gpt_gpio_setup(gpt); mpc52xx_gpt_irq_setup(gpt, ofdev->dev.of_node); mutex_lock(&mpc52xx_gpt_list_mutex); -- cgit From a784101f77b1bef4b40f4ad68af3f54fcfa5321b Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 22 Jun 2022 15:52:35 +1000 Subject: KVM: PPC: Book3s: Fix warning about xics_rm_h_xirr_x This fixes "no previous prototype": arch/powerpc/kvm/book3s_hv_rm_xics.c:482:15: warning: no previous prototype for 'xics_rm_h_xirr_x' [-Wmissing-prototypes] Reported by the kernel test robot. Fixes: b22af9041927 ("KVM: PPC: Book3s: Remove real mode interrupt controller hcalls handlers") Signed-off-by: Alexey Kardashevskiy Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220622055235.1139204-1-aik@ozlabs.ru --- arch/powerpc/kvm/book3s_xics.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index 8e4c79e2fcd8..08fb0843faf5 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -143,6 +143,7 @@ static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics, } extern unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu); +extern unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu); extern int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, unsigned long mfrr); extern int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); -- cgit From 4228a996b072d36f3baafb4afdc2d2d66d2cbadf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 20 Jun 2022 09:31:03 +1000 Subject: selftests/powerpc: Skip energy_scale_info test on older firmware Older machines don't have the firmware feature that enables the code this test is testing. Skip the test if the sysfs directory doesn't exist. Also use the FAIL_IF() macro to provide more verbose error reporting if an error is encountered. Fixes: 57201d657eb7 ("selftest/powerpc: Add PAPR sysfs attributes sniff test") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220619233103.2666171-1-mpe@ellerman.id.au --- .../selftests/powerpc/papr_attributes/attr_test.c | 30 +++++++++++++--------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c index bab0dc06e90b..9b655be641c9 100644 --- a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c +++ b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c @@ -7,6 +7,7 @@ * Copyright 2022, Pratik Rajesh Sampat, IBM Corp. */ +#include #include #include #include @@ -32,7 +33,7 @@ enum type { NUM_VAL }; -int value_type(int id) +static int value_type(int id) { int val_type; @@ -54,15 +55,21 @@ int value_type(int id) return val_type; } -int verify_energy_info(void) +static int verify_energy_info(void) { const char *path = "/sys/firmware/papr/energy_scale_info"; struct dirent *entry; struct stat s; DIR *dirp; - if (stat(path, &s) || !S_ISDIR(s.st_mode)) - return -1; + errno = 0; + if (stat(path, &s)) { + SKIP_IF(errno == ENOENT); + FAIL_IF(errno); + } + + FAIL_IF(!S_ISDIR(s.st_mode)); + dirp = opendir(path); while ((entry = readdir(dirp)) != NULL) { @@ -76,25 +83,24 @@ int verify_energy_info(void) id = atoi(entry->d_name); attr_type = value_type(id); - if (attr_type == INVALID) - return -1; + FAIL_IF(attr_type == INVALID); /* Check if the files exist and have data in them */ sprintf(file_name, "%s/%d/desc", path, id); f = fopen(file_name, "r"); - if (!f || fgetc(f) == EOF) - return -1; + FAIL_IF(!f); + FAIL_IF(fgetc(f) == EOF); sprintf(file_name, "%s/%d/value", path, id); f = fopen(file_name, "r"); - if (!f || fgetc(f) == EOF) - return -1; + FAIL_IF(!f); + FAIL_IF(fgetc(f) == EOF); if (attr_type == STR_VAL) { sprintf(file_name, "%s/%d/value_desc", path, id); f = fopen(file_name, "r"); - if (!f || fgetc(f) == EOF) - return -1; + FAIL_IF(!f); + FAIL_IF(fgetc(f) == EOF); } } -- cgit From 2d386769753a71e57a1a38c7fb79013d3ac451e9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 17 Jun 2022 18:02:43 +1000 Subject: powerpc: Update asm-prototypes.h comment This header was recently cleaned up in commit 76222808fc25 ("powerpc: Move C prototypes out of asm-prototypes.h"), update the comment to reflect it's proper purpose. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220617080243.2177583-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/asm-prototypes.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 3b2bbc273055..81631e64dbeb 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -2,8 +2,9 @@ #ifndef _ASM_POWERPC_ASM_PROTOTYPES_H #define _ASM_POWERPC_ASM_PROTOTYPES_H /* - * This file is for prototypes of C functions that are only called - * from asm, and any associated variables. + * This file is for C prototypes of asm symbols that are EXPORTed. + * It allows the modversions logic to see their prototype and + * generate proper CRCs for them. * * Copyright 2016, Daniel Axtens, IBM Corporation. */ -- cgit From d7f396461518c766b2436d64b6d3ba6a4c418dcf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 23 Jun 2022 12:08:36 +0200 Subject: powerpc/powermac: Remove empty function note_scsi_host() note_scsi_host() has been an empty function since commit 6ee0d9f744d4 ("[POWERPC] Remove unused old code from powermac setup code"). Remove it. Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/26f8b72a4276c0bd8ed63860c7316f6361c351b4.1655978907.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/setup.h | 1 - arch/powerpc/platforms/powermac/setup.c | 7 ------- drivers/scsi/mesh.c | 5 ----- 3 files changed, 13 deletions(-) diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 14a0b8af738e..d8c28902cf59 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -12,7 +12,6 @@ extern unsigned long long memory_limit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); struct device_node; -extern void note_scsi_host(struct device_node *, void *); /* Used in very early kernel initialization. */ extern unsigned long reloc_offset(void); diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index f71735ec449f..04daa7f0a03c 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -320,13 +320,6 @@ static void __init pmac_setup_arch(void) #endif /* CONFIG_ADB */ } -#ifdef CONFIG_SCSI -void note_scsi_host(struct device_node *node, void *host) -{ -} -EXPORT_SYMBOL(note_scsi_host); -#endif - static int initializing = 1; static int pmac_late_init(void) diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c index a74f0c2c8ba7..84b541a57b7b 100644 --- a/drivers/scsi/mesh.c +++ b/drivers/scsi/mesh.c @@ -1882,11 +1882,6 @@ static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match) goto out_release; } - /* Old junk for root discovery, that will die ultimately */ -#if !defined(MODULE) - note_scsi_host(mesh, mesh_host); -#endif - mesh_host->base = macio_resource_start(mdev, 0); mesh_host->irq = macio_irq(mdev, 0); ms = (struct mesh_state *) mesh_host->hostdata; -- cgit From 890005a7d98f7452cfe86dcfb2aeeb7df01132ce Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 22 May 2022 19:52:56 +0530 Subject: powerpc/perf: Optimize clearing the pending PMI and remove WARN_ON for PMI check in power_pmu_disable commit 2c9ac51b850d ("powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC") added a new function "pmi_irq_pending" in hw_irq.h. This function is to check if there is a PMI marked as pending in Paca (PACA_IRQ_PMI).This is used in power_pmu_disable in a WARN_ON. The intention here is to provide a warning if there is PMI pending, but no counter is found overflown. During some of the perf runs, below warning is hit: WARNING: CPU: 36 PID: 0 at arch/powerpc/perf/core-book3s.c:1332 power_pmu_disable+0x25c/0x2c0 Modules linked in: ----- NIP [c000000000141c3c] power_pmu_disable+0x25c/0x2c0 LR [c000000000141c8c] power_pmu_disable+0x2ac/0x2c0 Call Trace: [c000000baffcfb90] [c000000000141c8c] power_pmu_disable+0x2ac/0x2c0 (unreliable) [c000000baffcfc10] [c0000000003e2f8c] perf_pmu_disable+0x4c/0x60 [c000000baffcfc30] [c0000000003e3344] group_sched_out.part.124+0x44/0x100 [c000000baffcfc80] [c0000000003e353c] __perf_event_disable+0x13c/0x240 [c000000baffcfcd0] [c0000000003dd334] event_function+0xc4/0x140 [c000000baffcfd20] [c0000000003d855c] remote_function+0x7c/0xa0 [c000000baffcfd50] [c00000000026c394] flush_smp_call_function_queue+0xd4/0x300 [c000000baffcfde0] [c000000000065b24] smp_ipi_demux_relaxed+0xa4/0x100 [c000000baffcfe20] [c0000000000cb2b0] xive_muxed_ipi_action+0x20/0x40 [c000000baffcfe40] [c000000000207c3c] __handle_irq_event_percpu+0x8c/0x250 [c000000baffcfee0] [c000000000207e2c] handle_irq_event_percpu+0x2c/0xa0 [c000000baffcff10] [c000000000210a04] handle_percpu_irq+0x84/0xc0 [c000000baffcff40] [c000000000205f14] generic_handle_irq+0x54/0x80 [c000000baffcff60] [c000000000015740] __do_irq+0x90/0x1d0 [c000000baffcff90] [c000000000016990] __do_IRQ+0xc0/0x140 [c0000009732f3940] [c000000bafceaca8] 0xc000000bafceaca8 [c0000009732f39d0] [c000000000016b78] do_IRQ+0x168/0x1c0 [c0000009732f3a00] [c0000000000090c8] hardware_interrupt_common_virt+0x218/0x220 This means that there is no PMC overflown among the active events in the PMU, but there is a PMU pending in Paca. The function "any_pmc_overflown" checks the PMCs on active events in cpuhw->n_events. Code snippet: <<>> if (any_pmc_overflown(cpuhw)) clear_pmi_irq_pending(); else WARN_ON(pmi_irq_pending()); <<>> Here the PMC overflown is not from active event. Example: When we do perf record, default cycles and instructions will be running on PMC6 and PMC5 respectively. It could happen that overflowed event is currently not active and pending PMI is for the inactive event. Debug logs from trace_printk: <<>> any_pmc_overflown: idx is 5: pmc value is 0xd9a power_pmu_disable: PMC1: 0x0, PMC2: 0x0, PMC3: 0x0, PMC4: 0x0, PMC5: 0xd9a, PMC6: 0x80002011 <<>> Here active PMC (from idx) is PMC5 , but overflown PMC is PMC6(0x80002011). When we handle PMI interrupt for such cases, if the PMC overflown is from inactive event, it will be ignored. Reference commit: commit bc09c219b2e6 ("powerpc/perf: Fix finding overflowed PMC in interrupt") Patch addresses two changes: 1) Fix 1 : Removal of warning ( WARN_ON(pmi_irq_pending()); ) We were printing warning if no PMC is found overflown among active PMU events, but PMI pending in PACA. But this could happen in cases where PMC overflown is not in active PMC. An inactive event could have caused the overflow. Hence the warning is not needed. To know pending PMI is from an inactive event, we need to loop through all PMC's which will cause more SPR reads via mfspr and increase in context switch. Also in existing function: perf_event_interrupt, already we ignore PMI's overflown when it is from an inactive PMC. 2) Fix 2: optimization in clearing pending PMI. Currently we check for any active PMC overflown before clearing PMI pending in Paca. This is causing additional SPR read also. From point 1, we know that if PMI pending in Paca from inactive cases, that is going to be ignored during replay. Hence if there is pending PMI in Paca, just clear it irrespective of PMC overflown or not. In summary, remove the any_pmc_overflown check entirely in power_pmu_disable. ie If there is a pending PMI in Paca, clear it, since we are in pmu_disable. There could be cases where PMI is pending because of inactive PMC ( which later when replayed also will get ignored ), so WARN_ON could give false warning. Hence removing it. Fixes: 2c9ac51b850d ("powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC") Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220522142256.24699-1-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 140502a7fdf8..03c64a0195df 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1349,27 +1349,22 @@ static void power_pmu_disable(struct pmu *pmu) * a PMI happens during interrupt replay and perf counter * values are cleared by PMU callbacks before replay. * - * If any PMC corresponding to the active PMU events are - * overflown, disable the interrupt by clearing the paca - * bit for PMI since we are disabling the PMU now. - * Otherwise provide a warning if there is PMI pending, but - * no counter is found overflown. + * Disable the interrupt by clearing the paca bit for PMI + * since we are disabling the PMU now. Otherwise provide a + * warning if there is PMI pending, but no counter is found + * overflown. + * + * Since power_pmu_disable runs under local_irq_save, it + * could happen that code hits a PMC overflow without PMI + * pending in paca. Hence only clear PMI pending if it was + * set. + * + * If a PMI is pending, then MSR[EE] must be disabled (because + * the masked PMI handler disabling EE). So it is safe to + * call clear_pmi_irq_pending(). */ - if (any_pmc_overflown(cpuhw)) { - /* - * Since power_pmu_disable runs under local_irq_save, it - * could happen that code hits a PMC overflow without PMI - * pending in paca. Hence only clear PMI pending if it was - * set. - * - * If a PMI is pending, then MSR[EE] must be disabled (because - * the masked PMI handler disabling EE). So it is safe to - * call clear_pmi_irq_pending(). - */ - if (pmi_irq_pending()) - clear_pmi_irq_pending(); - } else - WARN_ON(pmi_irq_pending()); + if (pmi_irq_pending()) + clear_pmi_irq_pending(); val = mmcra = cpuhw->mmcr.mmcra; -- cgit From 42e0576eec75479fa7709c41e5c3b9ec556b8f4d Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:10:39 +0530 Subject: selftests/powerpc/pmu: Add mask/shift bits for extracting threshold compare field In power10, threshold compare field is not part of the raw event code and provided via event attribute config1. Hence add the mask and shift bits based on event attribute config1, to extract the threshold compare value for power10 Also add a new function called get_thresh_cmp_val to compute and return the threshold compare field for a given platform, since incase of power10, threshold compare value provided is decimal. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-2-atrajeev@linux.vnet.ibm.com --- .../selftests/powerpc/pmu/sampling_tests/misc.c | 44 ++++++++++++++++++++++ .../selftests/powerpc/pmu/sampling_tests/misc.h | 1 + 2 files changed, 45 insertions(+) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index c01a31d5f4ee..b984d1e162ac 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -60,6 +60,8 @@ static void init_ev_encodes(void) switch (pvr) { case POWER10: + ev_mask_thd_cmp = 0x3ffff; + ev_shift_thd_cmp = 0; ev_mask_rsq = 1; ev_shift_rsq = 9; ev_mask_comb = 3; @@ -410,3 +412,45 @@ u64 get_reg_value(u64 *intr_regs, char *register_name) return *(intr_regs + register_bit_position); } + +int get_thresh_cmp_val(struct event event) +{ + int exp = 0; + u64 result = 0; + u64 value; + + if (!have_hwcap2(PPC_FEATURE2_ARCH_3_1)) + return EV_CODE_EXTRACT(event.attr.config, thd_cmp); + + value = EV_CODE_EXTRACT(event.attr.config1, thd_cmp); + + if (!value) + return value; + + /* + * Incase of P10, thresh_cmp value is not part of raw event code + * and provided via attr.config1 parameter. To program threshold in MMCRA, + * take a 18 bit number N and shift right 2 places and increment + * the exponent E by 1 until the upper 10 bits of N are zero. + * Write E to the threshold exponent and write the lower 8 bits of N + * to the threshold mantissa. + * The max threshold that can be written is 261120. + */ + if (value > 261120) + value = 261120; + while ((64 - __builtin_clzl(value)) > 8) { + exp++; + value >>= 2; + } + + /* + * Note that it is invalid to write a mantissa with the + * upper 2 bits of mantissa being zero, unless the + * exponent is also zero. + */ + if (!(value & 0xC0) && exp) + result = -1; + else + result = (exp << 8) | value; + return result; +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index 7675f3177725..078120883fde 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -52,6 +52,7 @@ void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); int collect_samples(void *sample_buff); u64 *get_intr_regs(struct event *event, void *sample_buff); u64 get_reg_value(u64 *intr_regs, char *register_name); +int get_thresh_cmp_val(struct event event); static inline int get_mmcr0_fc56(u64 mmcr0, int pmc) { -- cgit From a069b5f980e3b65b64b6322b71d5819f90dbb42b Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:40 +0530 Subject: selftests/powerpc: Add support to fetch "platform" and "base platform" from auxv to detect platform. The /proc/self/auxv contains information about "platform" on any system. Also "base platform" which is an indication about platform string corresponding to the real PVR. When systems are booted in compat mode, say, power10 booted in power9 mode, "platform" will point to power9 whereas base platform will point to power10. Incase, if the distro doesn't support platform indicated by real PVR, base platform will have a default value. The mismatch of platform/base platform is an indication of system booted in compat mode. In such cases, distro will have a Generic Compat registered which supports basic features for performance monitoring. Some of the selftest needs to be handled differently ( ex: generic events, alternative events, bhrb filter map) in Generic Compat PMU. Hence selftest framework needs utility functions to identify such cases. One way is make sure of auxv information. Below condition can be used to detect if Generic Compat PMU is registered. ie: if ((AT_PLATFORM != AT_BASE_PLATFORM) && (AT_BASE_PLATFORM != PVR)) this indicates Generic Compat PMU. Add utility function in "include/utils.h" to return: AT_PLATFORM and AT_BASE_PLATFORM from auxv. Also update misc.c in "sampling_tests" folder to add function to use above check to determine presence of generic compat pmu. In other architecture ( like x86 ), pmu_name is exposed via "/sys/bus/event_source/devices/cpu/caps". The same could be used in powerpc in future. Since currently we don't have the "caps" support in powerpc, patch uses auxv information to detect platform type and compat mode. But as placeholder utility function is added considering possiblity of getting "caps" information via sysfs. If that doesn't exist, fallback to using auxv information. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-3-atrajeev@linux.vnet.ibm.com --- tools/testing/selftests/powerpc/include/utils.h | 10 +++++ .../selftests/powerpc/pmu/sampling_tests/misc.c | 50 ++++++++++++++++++++++ .../selftests/powerpc/pmu/sampling_tests/misc.h | 3 ++ 3 files changed, 63 insertions(+) diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index b9fa9cd709df..e222a5858450 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -74,6 +74,16 @@ static inline bool have_hwcap2(unsigned long ftr2) } #endif +static inline char *auxv_base_platform(void) +{ + return ((char *)get_auxv_entry(AT_BASE_PLATFORM)); +} + +static inline char *auxv_platform(void) +{ + return ((char *)get_auxv_entry(AT_PLATFORM)); +} + bool is_ppc64le(void); int using_hash_mmu(bool *using_hash); diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index b984d1e162ac..6e30b455cbd6 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -454,3 +454,53 @@ int get_thresh_cmp_val(struct event event) result = (exp << 8) | value; return result; } + +/* + * Utility function to check for generic compat PMU + * by comparing base_platform value from auxv and real + * PVR value. + */ +static bool auxv_generic_compat_pmu(void) +{ + int base_pvr = 0; + + if (!strcmp(auxv_base_platform(), "power9")) + base_pvr = POWER9; + else if (!strcmp(auxv_base_platform(), "power10")) + base_pvr = POWER10; + + return (!base_pvr); +} + +/* + * Check for generic compat PMU. + * First check for presence of pmu_name from + * "/sys/bus/event_source/devices/cpu/caps". + * If doesn't exist, fallback to using value + * auxv. + */ +bool check_for_generic_compat_pmu(void) +{ + char pmu_name[256]; + + memset(pmu_name, 0, sizeof(pmu_name)); + if (read_sysfs_file("bus/event_source/devices/cpu/caps/pmu_name", + pmu_name, sizeof(pmu_name)) < 0) + return auxv_generic_compat_pmu(); + + if (!strcmp(pmu_name, "ISAv3")) + return true; + else + return false; +} + +/* + * Check if system is booted in compat mode. + */ +bool check_for_compat_mode(void) +{ + char *platform = auxv_platform(); + char *base_platform = auxv_base_platform(); + + return strcmp(platform, base_platform); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index 078120883fde..c0e923f38793 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -5,6 +5,7 @@ * Copyright 2022, Kajol Jain, IBM Corp. */ +#include #include "../event.h" #define POWER10 0x80 @@ -53,6 +54,8 @@ int collect_samples(void *sample_buff); u64 *get_intr_regs(struct event *event, void *sample_buff); u64 get_reg_value(u64 *intr_regs, char *register_name); int get_thresh_cmp_val(struct event event); +bool check_for_generic_compat_pmu(void); +bool check_for_compat_mode(void); static inline int get_mmcr0_fc56(u64 mmcr0, int pmc) { -- cgit From 50d9c30a685c14e41e44d48a08a08703c680d861 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:10:41 +0530 Subject: selftests/powerpc/pmu: Add interface test for mmcra_thresh_cmp fields The testcase uses event code 0x35340401e0 for load only sampling, to verify the settings of thresh compare field in Monitor Mode Control Register A (MMCRA: 9-18 bits for power9 and MMCRA: 8-18 bits for power10). Testcase checks if the thresh compare field is programmed correctly via perf interface to MMCRA register. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-4-atrajeev@linux.vnet.ibm.com --- .../selftests/powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/mmcra_thresh_cmp_test.c | 74 ++++++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index a785c6a173b9..6508e6074bac 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -4,7 +4,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ - mmcr3_src_test mmcra_thresh_marked_sample_test + mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c new file mode 100644 index 000000000000..904362f172c9 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0) + * Threshold event selection used is issue to complete for cycles + * Sampling criteria is Load only sampling + */ +#define p9_EventCode 0x13E35340401e0 +#define p10_EventCode 0x35340401e0 + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* A perf sampling test to test mmcra fields */ +static int mmcra_thresh_cmp(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Skip for comapt mode */ + SKIP_IF(check_for_compat_mode()); + + /* Init the event for the sampling test */ + if (!have_hwcap2(PPC_FEATURE2_ARCH_3_1)) { + event_init_sampling(&event, p9_EventCode); + } else { + event_init_sampling(&event, p10_EventCode); + event.attr.config1 = 1000; + } + + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop_with_ll_sc(1000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that thresh cmp match with the corresponding event code fields */ + FAIL_IF(get_thresh_cmp_val(event) != + get_mmcra_thd_cmp(get_reg_value(intr_regs, "MMCRA"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + FAIL_IF(test_harness(mmcra_thresh_cmp, "mmcra_thresh_cmp")); +} -- cgit From 61d89900315aa25f6da0c1bc800ce295d74d69f1 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:10:42 +0530 Subject: selftests/powerpc/pmu: Add support for branch sampling in get_intr_regs function Add support for sample type as PERF_SAMPLE_BRANCH_STACK in sampling tests. This change is a precursor/helper for sampling testcases, that test branck stack feature in perf interface. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-5-atrajeev@linux.vnet.ibm.com --- .../selftests/powerpc/pmu/sampling_tests/misc.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index 6e30b455cbd6..5a26fc3a9706 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -259,13 +259,32 @@ u64 *get_intr_regs(struct event *event, void *sample_buff) u64 *intr_regs; size_t size = 0; - if ((type ^ PERF_SAMPLE_REGS_INTR)) + if ((type ^ (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_BRANCH_STACK)) && + (type ^ PERF_SAMPLE_REGS_INTR)) return NULL; intr_regs = (u64 *)perf_read_first_sample(sample_buff, &size); if (!intr_regs) return NULL; + if (type & PERF_SAMPLE_BRANCH_STACK) { + /* + * PERF_RECORD_SAMPLE and PERF_SAMPLE_BRANCH_STACK: + * struct { + * struct perf_event_header hdr; + * u64 number_of_branches; + * struct perf_branch_entry[number_of_branches]; + * u64 data[]; + * }; + * struct perf_branch_entry { + * u64 from; + * u64 to; + * u64 misc; + * }; + */ + intr_regs += ((*intr_regs) * 3) + 1; + } + /* * First entry in the sample buffer used to specify * PERF_SAMPLE_REGS_ABI_64, skip perf regs abi to access -- cgit From c55dabc6d577a864cd618107ea6aaa6cad8c987b Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:10:43 +0530 Subject: selftests/powerpc/pmu: Add interface test for mmcra_ifm field of indirect call type The testcase uses "instructions" event to check if the Instruction filtering mode(IFM) bits are programmed correctly for indirect branch type. Testcase checks if IFM bits are programmed correctly to Monitor Mode Control Register A (MMCRA) via perf interface for ISA v3.1 platform. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-6-atrajeev@linux.vnet.ibm.com --- tools/testing/selftests/powerpc/pmu/branch_loops.S | 28 +++++++++ .../selftests/powerpc/pmu/sampling_tests/Makefile | 5 +- .../pmu/sampling_tests/mmcra_bhrb_ind_call_test.c | 69 ++++++++++++++++++++++ 3 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/powerpc/pmu/branch_loops.S create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c diff --git a/tools/testing/selftests/powerpc/pmu/branch_loops.S b/tools/testing/selftests/powerpc/pmu/branch_loops.S new file mode 100644 index 000000000000..de758dd3cecf --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/branch_loops.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include + + .text + +#define ITER_SHIFT 31 + +FUNC_START(indirect_branch_loop) + li r3, 1 + sldi r3, r3, ITER_SHIFT + +1: cmpdi r3, 0 + beqlr + + addi r3, r3, -1 + + ld r4, 2f@got(%r2) + mtctr r4 + bctr + + .balign 32 +2: b 1b + +FUNC_END(indirect_branch_loop) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 6508e6074bac..89def6e706c8 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -4,9 +4,10 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ - mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test + mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \ + mmcra_bhrb_ind_call_test top_srcdir = ../../../../../.. include ../../../lib.mk -$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S +$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S ../branch_loops.S diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c new file mode 100644 index 000000000000..f0706730c099 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void indirect_branch_loop(void); + +/* Instructions */ +#define EventCode 0x500fa + +/* ifm field for indirect branch mode */ +#define IFM_IND_BRANCH 0x2 + +/* + * A perf sampling test for mmcra + * field: ifm for bhrb ind_call. + */ +static int mmcra_bhrb_ind_call_test(void) +{ + struct event event; + u64 *intr_regs; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_IND_CALL; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + indirect_branch_loop(); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that ifm bit is set properly in MMCRA */ + FAIL_IF(get_mmcra_ifm(get_reg_value(intr_regs, "MMCRA"), 5) != IFM_IND_BRANCH); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_ind_call_test, "mmcra_bhrb_ind_call_test"); +} -- cgit From faa64ddc1e398131e7eaadc8f03cb7bd3904eff2 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:10:44 +0530 Subject: selftests/powerpc/pmu: Add interface test for mmcra_ifm field for any branch type The testcase uses "instructions" event to check if the Instruction filtering mode(IFM) bits are programmed correctly for type any branch. Testcase checks if IFM bits is programmed correctly to Monitor Mode Control Register A (MMCRA) via perf interface. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-7-atrajeev@linux.vnet.ibm.com --- .../selftests/powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/mmcra_bhrb_any_test.c | 65 ++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 89def6e706c8..63b084f66dbf 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -5,7 +5,7 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \ - mmcra_bhrb_ind_call_test + mmcra_bhrb_ind_call_test mmcra_bhrb_any_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c new file mode 100644 index 000000000000..14854694af62 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* Instructions */ +#define EventCode 0x500fa + +/* ifm field for any branch mode */ +#define IFM_ANY_BRANCH 0x0 + +/* + * A perf sampling test for mmcra + * field: ifm for bhrb any call. + */ +static int mmcra_bhrb_any_test(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that ifm bit is set properly in MMCRA */ + FAIL_IF(get_mmcra_ifm(get_reg_value(intr_regs, "MMCRA"), 5) != IFM_ANY_BRANCH); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_any_test, "mmcra_bhrb_any_test"); +} -- cgit From 014fb4a3ae746276f4320f7010d03157485051cb Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:10:45 +0530 Subject: selftests/powerpc/pmu: Add interface test for mmcra_ifm field for conditional branch type The testcase uses "instructions" event to check if the Instruction filtering mode(IFM) bits are programmed correctly for conditional branch type. Testcase checks if IFM bits is programmed correctly to Monitor Mode Control Register A (MMCRA) via perf interface for ISA v3.1 platform. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-8-atrajeev@linux.vnet.ibm.com --- .../selftests/powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/mmcra_bhrb_cond_test.c | 69 ++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 63b084f66dbf..53569fbb1cda 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -5,7 +5,7 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \ - mmcra_bhrb_ind_call_test mmcra_bhrb_any_test + mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c new file mode 100644 index 000000000000..3e08176eb7f8 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* Instructions */ +#define EventCode 0x500fa + +/* ifm field for conditional branch mode */ +#define IFM_COND_BRANCH 0x3 + +/* + * A perf sampling test for mmcra + * field: ifm for bhrb cond call. + */ +static int mmcra_bhrb_cond_test(void) +{ + struct event event; + u64 *intr_regs; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_COND; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that ifm bit is set properly in MMCRA */ + FAIL_IF(get_mmcra_ifm(get_reg_value(intr_regs, "MMCRA"), 5) != IFM_COND_BRANCH); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_cond_test, "mmcra_bhrb_cond_test"); +} -- cgit From 84cc4e66d90f6624f821df381073813dd502f657 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:10:46 +0530 Subject: selftests/powerpc/pmu: Add interface test for bhrb disable field The testcase uses "instructions" event to generate the samples and fetch Monitor Mode Control Register A (MMCRA) when overflow. Branch History Rolling Buffer(bhrb) disable bit is part of MMCRA which need to be verified by perf interface. Testcase checks if the bhrb disable bit of MMCRA register is programmed correctly via perf interface for ISA v3.1 platform Also make get_mmcra_ifm return type as u64. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-9-atrajeev@linux.vnet.ibm.com --- .../selftests/powerpc/pmu/sampling_tests/Makefile | 3 +- .../selftests/powerpc/pmu/sampling_tests/misc.h | 2 +- .../pmu/sampling_tests/mmcra_bhrb_disable_test.c | 66 ++++++++++++++++++++++ 3 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 53569fbb1cda..f4da49d55d57 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -5,7 +5,8 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \ - mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test + mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test \ + mmcra_bhrb_disable_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index c0e923f38793..874a1596add8 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -188,7 +188,7 @@ static inline int get_mmcra_sm(u64 mmcra, int pmc) return ((mmcra >> 42) & 0x3); } -static inline int get_mmcra_bhrb_disable(u64 mmcra, int pmc) +static inline u64 get_mmcra_bhrb_disable(u64 mmcra, int pmc) { if (pvr == POWER10) return mmcra & BHRB_DISABLE; diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c new file mode 100644 index 000000000000..186a853c0f62 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* Instructions */ +#define EventCode 0x500fa + +/* + * A perf sampling test for mmcra + * field: bhrb_disable. + */ +static int mmcra_bhrb_disable_test(void) +{ + struct event event; + u64 *intr_regs; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that bhrb_disable bit is set in MMCRA */ + FAIL_IF(get_mmcra_bhrb_disable(get_reg_value(intr_regs, "MMCRA"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_disable_test, "mmcra_bhrb_disable_test"); +} -- cgit From 9cfd110a36649f9452120a648f15f32d1c82b99d Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:47 +0530 Subject: selftests/powerpc/pmu: Refactor the platform check and add macros to find array size/PVR The platform check for selftest support "check_pvr_for_sampling_tests" is specific to sampling tests which includes PVR check, presence of PMU and extended regs support. Extended regs support is needed for sampling tests which tests whether PMU registers are programmed correctly. There could be other sampling tests which may not need extended regs, example, bhrb filter tests which only needs validity check via event open. Hence refactor the platform check to have a common function "platform_check_for_tests" that checks only for PVR check and presence of PMU. The existing function "check_pvr_for_sampling_tests" will invoke the common function and also will include checks for extended regs specific for sampling. The common function can also be used by tests other than sampling like event code tests. Add macro to find array size ("ARRAY_SIZE") to sampling tests "misc.h" file. This can be used in next tests to find event array size. Also update "include/reg.h" to add macros to find minor and major version from PVR which will be used in testcases. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-10-atrajeev@linux.vnet.ibm.com --- tools/testing/selftests/powerpc/include/reg.h | 4 ++++ .../selftests/powerpc/pmu/sampling_tests/misc.c | 20 ++++++++++++++++---- .../selftests/powerpc/pmu/sampling_tests/misc.h | 3 +++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index c422be8a42b2..2ac7a4c7749c 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -55,6 +55,10 @@ #define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) #define SPRN_PVR 0x11F +#define PVR_CFG(pvr) (((pvr) >> 8) & 0xF) /* Configuration field */ +#define PVR_MAJ(pvr) (((pvr) >> 4) & 0xF) /* Major revision field */ +#define PVR_MIN(pvr) (((pvr) >> 0) & 0xF) /* Minor revision field */ + #define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */ #define SPRN_DSCR 0x03 /* Data Stream Control Register */ #define SPRN_PPR 896 /* Program Priority Register */ diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index 5a26fc3a9706..eac6420abdf1 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -121,12 +121,10 @@ int check_extended_regs_support(void) return -1; } -int check_pvr_for_sampling_tests(void) +int platform_check_for_tests(void) { pvr = PVR_VER(mfspr(SPRN_PVR)); - platform_extended_mask = perf_get_platform_reg_mask(); - /* * Check for supported platforms * for sampling test @@ -138,19 +136,33 @@ int check_pvr_for_sampling_tests(void) * Check PMU driver registered by looking for * PPC_FEATURE2_EBB bit in AT_HWCAP2 */ - if (!have_hwcap2(PPC_FEATURE2_EBB)) + if (!have_hwcap2(PPC_FEATURE2_EBB) || !have_hwcap2(PPC_FEATURE2_ARCH_3_00)) goto out; + return 0; + +out: + printf("%s: Tests unsupported for this platform\n", __func__); + return -1; +} + +int check_pvr_for_sampling_tests(void) +{ + SKIP_IF(platform_check_for_tests()); + + platform_extended_mask = perf_get_platform_reg_mask(); /* check if platform supports extended regs */ if (check_extended_regs_support()) goto out; init_ev_encodes(); return 0; + out: printf("%s: Sampling tests un-supported\n", __func__); return -1; } + /* * Allocate mmap buffer of "mmap_pages" number of * pages. diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index 874a1596add8..4181755cf5a0 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -18,6 +18,8 @@ #define MMCR1_RSQ 0x200000000000ULL /* radix scope qual field */ #define BHRB_DISABLE 0x2000000000ULL /* MMCRA BHRB DISABLE bit */ +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + extern int ev_mask_pmcxsel, ev_shift_pmcxsel; extern int ev_mask_marked, ev_shift_marked; extern int ev_mask_comb, ev_shift_comb; @@ -36,6 +38,7 @@ extern int ev_mask_mmcr3_src, ev_shift_mmcr3_src; extern int pvr; extern u64 platform_extended_mask; extern int check_pvr_for_sampling_tests(void); +extern int platform_check_for_tests(void); /* * Event code field extraction macro. -- cgit From 2ac05f8f2e4b9068e5bbc0836b35abafd70f02c1 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:48 +0530 Subject: selftests/powerpc/pmu: Add selftest to check branch stack enablement will not crash on any platforms While enabling branch stack for an event, BHRB (Branch History Rolling Buffer) filter is set using bhrb_filter_map() callback. This callback is not defined for cases like generic_compat_pmu or in case where there is no PMU registered. A fix was added in kernel to address a crash issue observed while enabling branch stack for environments which doesn't have this callback. commit b460b512417a ("powerpc/perf: Fix crashes with generic_compat_pmu & BHRB"). Add perf sampling test to exercise this code path and make sure enabling branch stack shouldn't crash in any platform. Testcase uses software event cycles since software event is available and can be used even in cases without PMU. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-11-atrajeev@linux.vnet.ibm.com --- .../selftests/powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c | 59 ++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index f4da49d55d57..8d4839cde013 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -6,7 +6,7 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \ mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test \ - mmcra_bhrb_disable_test + mmcra_bhrb_disable_test bhrb_no_crash_wo_pmu_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c new file mode 100644 index 000000000000..4644c6782974 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * A perf sampling test for making sure + * enabling branch stack doesn't crash in any + * environment, say: + * - With generic compat PMU + * - without any PMU registered + * - With platform specific PMU + * A fix for bhrb sampling crash was added in kernel + * via commit: b460b512417a ("powerpc/perf: Fix crashes + * with generic_compat_pmu & BHRB") + * + * This testcase exercises this code by doing branch + * stack enable for software event. s/w event is used + * since software event will work even in platform + * without PMU. + */ +static int bhrb_no_crash_wo_pmu_test(void) +{ + struct event event; + + /* + * Init the event for the sampling test. + * This uses software event which works on + * any platform. + */ + event_init_opts(&event, 0, PERF_TYPE_SOFTWARE, "cycles"); + + event.attr.sample_period = 1000; + event.attr.sample_type = PERF_SAMPLE_BRANCH_STACK; + event.attr.disabled = 1; + + /* + * Return code of event_open is not + * considered since test just expects no crash from + * using PERF_SAMPLE_BRANCH_STACK. Also for environment + * like generic compat PMU, branch stack is unsupported. + */ + event_open(&event); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(bhrb_no_crash_wo_pmu_test, "bhrb_no_crash_wo_pmu_test"); +} -- cgit From 11bbc524390572dfe1bd0375c7e7ab8f9ddf4b34 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:49 +0530 Subject: selftests/powerpc/pmu: Add selftest to check PERF_SAMPLE_REGS_INTR option will not crash on any platforms With sampling, --intr-regs option is used for capturing interrupt regs. When --intr-regs option is used, PMU code uses is_sier_available() function which uses PMU flags in the code. In environment where platform specific PMU is not registered, PMU flags is not defined. A fix was added in kernel to address crash while accessing is_sier_available() function when pmu is not set. commit f75e7d73bdf7 ("powerpc/perf: Fix crash with is_sier_available when pmu is not set"). Add perf sampling test to exercise this code and make sure enabling intr_regs shouldn't crash in any platform. Testcase uses software event cycles since software event will work even in cases without PMU. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-12-atrajeev@linux.vnet.ibm.com --- .../selftests/powerpc/pmu/sampling_tests/Makefile | 2 +- .../intr_regs_no_crash_wo_pmu_test.c | 57 ++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 8d4839cde013..8d4566dac440 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -6,7 +6,7 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \ mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test \ - mmcra_bhrb_disable_test bhrb_no_crash_wo_pmu_test + mmcra_bhrb_disable_test bhrb_no_crash_wo_pmu_test intr_regs_no_crash_wo_pmu_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c new file mode 100644 index 000000000000..839d2d225da0 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * A perf sampling test for making sure + * sampling with -intr-regs doesn't crash + * in any environment, say: + * - With generic compat PMU + * - without any PMU registered + * - With platform specific PMU. + * A fix for crash with intr_regs was + * addressed in commit: f75e7d73bdf7 in kernel. + * + * This testcase exercises this code path by doing + * intr_regs using software event. Software event is + * used since s/w event will work even in platform + * without PMU. + */ +static int intr_regs_no_crash_wo_pmu_test(void) +{ + struct event event; + + /* + * Init the event for the sampling test. + * This uses software event which works on + * any platform. + */ + event_init_opts(&event, 0, PERF_TYPE_SOFTWARE, "cycles"); + + event.attr.sample_period = 1000; + event.attr.sample_type = PERF_SAMPLE_REGS_INTR; + event.attr.disabled = 1; + + /* + * Return code of event_open is not considered + * since test just expects no crash from using + * PERF_SAMPLE_REGS_INTR. + */ + event_open(&event); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(intr_regs_no_crash_wo_pmu_test, "intr_regs_no_crash_wo_pmu_test"); +} -- cgit From f6380e05aa92b005ac6f38be92afbdd2a0706cff Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:50 +0530 Subject: selftests/powerpc/pmu: Add selftest for checking valid and invalid bhrb filter maps For PERF_SAMPLE_BRANCH_STACK sample type, different branch_sample_type, ie branch filters are supported. All the branch filters are not supported in powerpc. Example, power10 platform supports any, ind_call and cond branch filters. Whereas, it is different in power9. Testcase checks event open for invalid and valid branch sample types. The branch types for testcase are picked from "perf_branch_sample_type" in perf_event.h Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-13-atrajeev@linux.vnet.ibm.com --- .../selftests/powerpc/pmu/sampling_tests/Makefile | 3 +- .../pmu/sampling_tests/bhrb_filter_map_test.c | 105 +++++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 8d4566dac440..ed9befc2f836 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -6,7 +6,8 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \ mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test \ - mmcra_bhrb_disable_test bhrb_no_crash_wo_pmu_test intr_regs_no_crash_wo_pmu_test + mmcra_bhrb_disable_test bhrb_no_crash_wo_pmu_test intr_regs_no_crash_wo_pmu_test \ + bhrb_filter_map_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c new file mode 100644 index 000000000000..8182647c63c8 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * A perf sampling test to check bhrb filter + * map. All the branch filters are not supported + * in powerpc. Supported filters in: + * power10: any, any_call, ind_call, cond + * power9: any, any_call + * + * Testcase checks event open for invalid bhrb filter + * types should fail and valid filter types should pass. + * Testcase does validity check for these branch + * sample types. + */ + +/* Invalid types for powerpc */ +/* Valid bhrb filters in power9/power10 */ +int bhrb_filter_map_valid_common[] = { + PERF_SAMPLE_BRANCH_ANY, + PERF_SAMPLE_BRANCH_ANY_CALL, +}; + +/* Valid bhrb filters in power10 */ +int bhrb_filter_map_valid_p10[] = { + PERF_SAMPLE_BRANCH_IND_CALL, + PERF_SAMPLE_BRANCH_COND, +}; + +#define EventCode 0x1001e + +static int bhrb_filter_map_test(void) +{ + struct event event; + int i; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Skip for Generic compat PMU since + * bhrb filters is not supported + */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* Init the event for the sampling test */ + event_init(&event, EventCode); + + event.attr.sample_period = 1000; + event.attr.sample_type = PERF_SAMPLE_BRANCH_STACK; + event.attr.disabled = 1; + + /* Invalid filter maps which are expected to fail in event_open */ + for (i = PERF_SAMPLE_BRANCH_USER_SHIFT; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { + /* Skip the valid branch sample type */ + if (i == PERF_SAMPLE_BRANCH_ANY_SHIFT || i == PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT \ + || i == PERF_SAMPLE_BRANCH_IND_CALL_SHIFT || i == PERF_SAMPLE_BRANCH_COND_SHIFT) + continue; + event.attr.branch_sample_type = 1U << i; + FAIL_IF(!event_open(&event)); + } + + /* valid filter maps for power9/power10 which are expected to pass in event_open */ + for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_common); i++) { + event.attr.branch_sample_type = bhrb_filter_map_valid_common[i]; + FAIL_IF(event_open(&event)); + event_close(&event); + } + + /* + * filter maps which are valid in power10 and invalid in power9. + * PVR check is used here since PMU specific data like bhrb filter + * alternative tests is handled by respective PMU driver code and + * using PVR will work correctly for all cases including generic + * compat mode. + */ + if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) { + for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_p10); i++) { + event.attr.branch_sample_type = bhrb_filter_map_valid_p10[i]; + FAIL_IF(event_open(&event)); + event_close(&event); + } + } else { + for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_p10); i++) { + event.attr.branch_sample_type = bhrb_filter_map_valid_p10[i]; + FAIL_IF(!event_open(&event)); + } + } + + return 0; +} + +int main(void) +{ + return test_harness(bhrb_filter_map_test, "bhrb_filter_map_test"); +} -- cgit From 0321f2d0ae6959f79f5b8a21b31694b54dbaa35d Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:51 +0530 Subject: selftests/powerpc/pmu: Add selftest for mmcr1 pmcxsel/unit/cache fields The testcase uses event code "0x21c040" to verify the settings for different fields in Monitor Mode Control Register 1 (MMCR1). The fields include PMCxSEL, PMCXCOMB PMCxUNIT, cache. Checks if these fields are translated correctly via perf interface to MMCR1 Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-14-atrajeev@linux.vnet.ibm.com --- .../selftests/powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/mmcr1_sel_unit_cache_test.c | 77 ++++++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index ed9befc2f836..f966d3359c6b 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -7,7 +7,7 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \ mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test \ mmcra_bhrb_disable_test bhrb_no_crash_wo_pmu_test intr_regs_no_crash_wo_pmu_test \ - bhrb_filter_map_test + bhrb_filter_map_test mmcr1_sel_unit_cache_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c new file mode 100644 index 000000000000..f0c003282630 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +#define MALLOC_SIZE (0x10000 * 10) /* Ought to be enough .. */ + +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define EventCode 0x21c040 + +/* + * A perf sampling test for mmcr1 + * fields : pmcxsel, unit, cache. + */ +static int mmcr1_sel_unit_cache(void) +{ + struct event event; + u64 *intr_regs; + char *p; + int i; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + p = malloc(MALLOC_SIZE); + FAIL_IF(!p); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_period = 1; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + event_enable(&event); + + /* workload to make the event overflow */ + for (i = 0; i < MALLOC_SIZE; i += 0x10000) + p[i] = i; + + event_disable(&event); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that pmcxsel, unit and cache field of MMCR1 + * match with corresponding event code fields + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, pmcxsel) != + get_mmcr1_pmcxsel(get_reg_value(intr_regs, "MMCR1"), 1)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, unit) != + get_mmcr1_unit(get_reg_value(intr_regs, "MMCR1"), 1)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, cache) != + get_mmcr1_cache(get_reg_value(intr_regs, "MMCR1"), 1)); + + free(p); + event_close(&event); + return 0; +} + +int main(void) +{ + FAIL_IF(test_harness(mmcr1_sel_unit_cache, "mmcr1_sel_unit_cache")); +} -- cgit From 78cd598af648131d2e9a32825c59b8d1e9ec9357 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:10:52 +0530 Subject: selftests/powerpc/pmu: Add interface test for bhrb disable field for non-branch samples The testcase uses "instructions" event to generate the samples and fetch Monitor Mode Control Register A (MMCRA) when overflow. Branch History Rolling Buffer(bhrb) disable bit is part of MMCRA which need to be verified by perf interface. Incase sample is not of branch type, bhrb disable bit is explicitly set to 1. Testcase checks if the bhrb disable bit is set of MMCRA register via perf interface for ISA v3.1 platform Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-15-atrajeev@linux.vnet.ibm.com --- .../selftests/powerpc/pmu/sampling_tests/Makefile | 2 +- .../mmcra_bhrb_disable_no_branch_test.c | 64 ++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index f966d3359c6b..9e67351fb252 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -7,7 +7,7 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \ mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test \ mmcra_bhrb_disable_test bhrb_no_crash_wo_pmu_test intr_regs_no_crash_wo_pmu_test \ - bhrb_filter_map_test mmcr1_sel_unit_cache_test + bhrb_filter_map_test mmcr1_sel_unit_cache_test mmcra_bhrb_disable_no_branch_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c new file mode 100644 index 000000000000..488c865387e4 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* Instructions */ +#define EventCode 0x500fa + +/* + * A perf sampling test for mmcra + * field: bhrb_disable. + */ +static int mmcra_bhrb_disable_no_branch_test(void) +{ + struct event event; + u64 *intr_regs; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that bhrb_disable bit is set in MMCRA for non-branch samples */ + FAIL_IF(!get_mmcra_bhrb_disable(get_reg_value(intr_regs, "MMCRA"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_disable_no_branch_test, "mmcra_bhrb_disable_no_branch_test"); +} -- cgit From 0a110a4b69dacc30ce4f6c10c0396bd2fd097831 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:53 +0530 Subject: selftests/powerpc/pmu: Add support for perf event code tests Add new folder for enabling perf event code tests which includes checking for group constraints, valid/invalid events, also checks for event excludes, alternatives so on. A new folder "event_code_tests", is created under "selftests/powerpc/pmu". Also updates the corresponding Makefiles in "selftests/powerpc" and "event_code_tests" folder. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-16-atrajeev@linux.vnet.ibm.com --- tools/testing/selftests/powerpc/pmu/Makefile | 11 +++++++++-- tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile | 9 +++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index edbd96d3b2ab..30803353bd7c 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -8,7 +8,7 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c top_srcdir = ../../../../.. include ../../lib.mk -all: $(TEST_GEN_PROGS) ebb sampling_tests +all: $(TEST_GEN_PROGS) ebb sampling_tests event_code_tests $(TEST_GEN_PROGS): $(EXTRA_SOURCES) @@ -27,6 +27,7 @@ override define RUN_TESTS $(DEFAULT_RUN_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests + TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests endef DEFAULT_EMIT_TESTS := $(EMIT_TESTS) @@ -34,6 +35,7 @@ override define EMIT_TESTS $(DEFAULT_EMIT_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests + TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests endef DEFAULT_INSTALL_RULE := $(INSTALL_RULE) @@ -41,12 +43,14 @@ override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install + TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install endef clean: $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean + TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean ebb: TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all @@ -54,4 +58,7 @@ ebb: sampling_tests: TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all -.PHONY: all run_tests clean ebb sampling_tests +event_code_tests: + TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all + +.PHONY: all run_tests clean ebb sampling_tests event_code_tests diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile new file mode 100644 index 000000000000..6377ae205064 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -m64 + +TEST_GEN_PROGS := + +top_srcdir = ../../../../../.. +include ../../../lib.mk + +$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c ../sampling_tests/misc.h ../sampling_tests/misc.c -- cgit From 9258c0aa755fac469869dd647a6c3d5299ff7725 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:54 +0530 Subject: selftests/powerpc/pmu: Add selftest for group constraint check for PMC5 and PMC6 Events using Performance Monitor Counter 5 (PMC5) and Performance Monitor Counter 6 (PMC6) can't have other fields in event code like cache bits, thresholding or marked bit. PMC5 and PMC6 only supports base events: ie 500fa and 600f4. Other combinations should fail. Testcase tries setting other bits in event code for 500fa and 600f4 to check this scenario. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-17-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../event_code_tests/group_constraint_pmc56_test.c | 63 ++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 6377ae205064..eb0017233b0b 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -m64 -TEST_GEN_PROGS := +TEST_GEN_PROGS := group_constraint_pmc56_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c new file mode 100644 index 000000000000..f5ee4796d46c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase for checking constraint checks for + * Performance Monitor Counter 5 (PMC5) and also + * Performance Monitor Counter 6 (PMC6). Events using + * PMC5/PMC6 shouldn't have other fields in event + * code like cache bits, thresholding or marked bit. + */ + +static int group_constraint_pmc56(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Events using PMC5 and PMC6 with cache bit + * set in event code is expected to fail. + */ + event_init(&event, 0x2500fa); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x2600f4); + FAIL_IF(!event_open(&event)); + + /* + * PMC5 and PMC6 only supports base events: + * ie 500fa and 600f4. Other combinations + * should fail. + */ + event_init(&event, 0x501e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x6001e); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x501fa); + FAIL_IF(!event_open(&event)); + + /* + * Events using PMC5 and PMC6 with random + * sampling bits set in event code should fail + * to schedule. + */ + event_init(&event, 0x35340500fa); + FAIL_IF(!event_open(&event)); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_pmc56, "group_constraint_pmc56"); +} -- cgit From 4000c2e5d40a3ee340c3940949d658fc52a56603 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:55 +0530 Subject: selftests/powerpc/pmu: Add selftest to check PMC5/6 is excluded from some constraint checks Events using Performance Monitor Counter 5 (PMC5) and Performance Monitor Counter 6 (PMC6) should be excluded from constraint check when scheduled along with group of events. Example, combination of PMC5, PMC6, and an event with cache bit will succeed to schedule though first two events doesn't have cache bit set. Testcase use three events, ie, 600f4(cycles), 500fa(instructions), 22C040 with cache bit (dc_ic) set to test this constraint check. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-18-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../group_pmc56_exclude_constraints_test.c | 64 ++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index eb0017233b0b..c0eb28935e6e 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -m64 -TEST_GEN_PROGS := group_constraint_pmc56_test +TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c new file mode 100644 index 000000000000..cff9ac170df6 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include +#include +#include "../sampling_tests/misc.h" + +/* + * Testcase for group constraint check for + * Performance Monitor Counter 5 (PMC5) and also + * Performance Monitor Counter 6 (PMC6). + * Test that pmc5/6 is excluded from constraint + * check when scheduled along with group of events. + */ + +static int group_pmc56_exclude_constraints(void) +{ + struct event *e, events[3]; + int i; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * PMC5/6 is excluded from constraint bit + * check along with group of events. Use + * group of events with PMC5, PMC6 and also + * event with cache bit (dc_ic) set. Test expects + * this set of events to go in as a group. + */ + e = &events[0]; + event_init(e, 0x500fa); + + e = &events[1]; + event_init(e, 0x600f4); + + e = &events[2]; + event_init(e, 0x22C040); + + FAIL_IF(event_open(&events[0])); + + /* + * The event_open will fail if constraint check fails. + * Since we are asking for events in a group and since + * PMC5/PMC6 is excluded from group constraints, even_open + * should pass. + */ + for (i = 1; i < 3; i++) + FAIL_IF(event_open_with_group(&events[i], events[0].fd)); + + for (i = 0; i < 3; i++) + event_close(&events[i]); + + return 0; +} + +int main(void) +{ + return test_harness(group_pmc56_exclude_constraints, "group_pmc56_exclude_constraints"); +} -- cgit From 827765a449dbc41ad19ab3e31757c93cac47b728 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:56 +0530 Subject: selftests/powerpc/pmu: Add selftest to check constraint for number of counters in use. Testcase for group constraint check for number of counters in use. The number of programmable counters is from PMC1 to PMC4. Testcase uses four events with PMC1 to PMC4 and 5th event without any PMC which is expected to fail since it is exceeding the number of counters in use. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-19-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../group_constraint_pmc_count_test.c | 70 ++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index c0eb28935e6e..6310634c5beb 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -m64 -TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test +TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c new file mode 100644 index 000000000000..af7c5c75101c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase for number of counters in use. + * The number of programmable counters is from + * performance monitor counter 1 to performance + * monitor counter 4 (PMC1-PMC4). If number of + * counters in use exceeds the limit, next event + * should fail to schedule. + */ + +static int group_constraint_pmc_count(void) +{ + struct event *e, events[5]; + int i; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Test for number of counters in use. + * Use PMC1 to PMC4 for leader and 3 sibling + * events. Trying to open fourth event should + * fail here. + */ + e = &events[0]; + event_init(e, 0x1001a); + + e = &events[1]; + event_init(e, 0x200fc); + + e = &events[2]; + event_init(e, 0x30080); + + e = &events[3]; + event_init(e, 0x40054); + + e = &events[4]; + event_init(e, 0x0002c); + + FAIL_IF(event_open(&events[0])); + + /* + * The event_open will fail on event 4 if constraint + * check fails + */ + for (i = 1; i < 5; i++) { + if (i == 4) + FAIL_IF(!event_open_with_group(&events[i], events[0].fd)); + else + FAIL_IF(event_open_with_group(&events[i], events[0].fd)); + } + + for (i = 1; i < 4; i++) + event_close(&events[i]); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_pmc_count, "group_constraint_pmc_count"); +} -- cgit From 38b6da45304e55de11b8b79a0f31a4d61818e63e Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:57 +0530 Subject: selftests/powerpc/pmu: Add selftest for group constraint check when using same PMC Testcase for group constraint check when using events with same PMC. Multiple events in a group asking for same PMC should fail. Testcase uses "0x22C040" on PMC2 as leader and also subling which is expected to fail. Using PMC1 for sibling event should pass the test. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-20-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 3 +- .../group_constraint_repeat_test.c | 56 ++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 6310634c5beb..ace100e3226e 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -m64 -TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test +TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ + group_constraint_repeat_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c new file mode 100644 index 000000000000..371cd05bb3ed --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* The processor's L1 data cache was reloaded */ +#define EventCode1 0x21C040 +#define EventCode2 0x22C040 + +/* + * Testcase for group constraint check + * when using events with same PMC. + * Multiple events in a group shouldn't + * ask for same PMC. If so it should fail. + */ + +static int group_constraint_repeat(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Two events in a group using same PMC + * should fail to get scheduled. Usei same PMC2 + * for leader and sibling event which is expected + * to fail. + */ + event_init(&leader, EventCode1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode1); + + /* Expected to fail since sibling event is requesting same PMC as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_init(&event, EventCode2); + + /* Expected to pass since sibling event is requesting different PMC */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_repeat, "group_constraint_repeat"); +} -- cgit From dc431be3b54901744e84f5f94f0f0a2b5d36bb7f Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:58 +0530 Subject: selftests/powerpc/pmu: Add selftest for group constraint check for radix_scope_qual field Testcase for group constraint check for radix_scope_qual field which is used to program Monitor Mode Control Register (MMCR1) bit 18. All events in the group should match radix_scope_qual bit, otherwise event_open for the group should fail. Testcase uses "0x14242" (PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L2) with radix_scope_qual bit set for power10. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-21-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../group_constraint_radix_scope_qual_test.c | 56 ++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index ace100e3226e..5b61fb0b9fd6 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -2,7 +2,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ - group_constraint_repeat_test + group_constraint_repeat_test group_constraint_radix_scope_qual_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c new file mode 100644 index 000000000000..9225618b846a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L2 */ +#define EventCode_1 0x14242 +/* PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L3 */ +#define EventCode_2 0x24242 + +/* + * Testcase for group constraint check for radix_scope_qual + * field which is used to program Monitor Mode Control + * egister (MMCR1) bit 18. + * All events in the group should match radix_scope_qual, + * bits otherwise event_open for the group should fail. + */ + +static int group_constraint_radix_scope_qual(void) +{ + struct event event, leader; + + /* + * Check for platform support for the test. + * This test is aplicable on power10 only. + */ + SKIP_IF(platform_check_for_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the events for the group contraint check for radix_scope_qual bits */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, 0x200fc); + + /* Expected to fail as sibling event doesn't request same radix_scope_qual bits as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_init(&event, EventCode_2); + /* Expected to pass as sibling event request same radix_scope_qual bits as leader */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_radix_scope_qual, + "group_constraint_radix_scope_qual"); +} -- cgit From beebeecb47d3b93198fe46922fd4ba2af2090cdd Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:10:59 +0530 Subject: selftests/powerpc/pmu: Add selftest for group constraint for MMCRA Sampling Mode field Testcase for reserved bits in Monitor Mode Control Register A (MMCRA) Random Sampling Mode (SM) value. As per Instruction Set Architecture (ISA), the values 0x5, 0x9, 0xD, 0x19, 0x1D, 0x1A, 0x1E are reserved for sampling mode field. Test that having these reserved bit values should cause event_open to fail. Input event code in testcases uses these sampling bits along with 401e0 (PM_MRK_INST_CMPL). Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-22-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../reserved_bits_mmcra_sample_elig_mode_test.c | 77 ++++++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 5b61fb0b9fd6..5dd482843572 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -2,7 +2,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ - group_constraint_repeat_test group_constraint_radix_scope_qual_test + group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c new file mode 100644 index 000000000000..4c119c821b99 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase for reserved bits in Monitor Mode Control + * Register A (MMCRA) Random Sampling Mode (SM) value. + * As per Instruction Set Architecture (ISA), the values + * 0x5, 0x9, 0xD, 0x19, 0x1D, 0x1A, 0x1E are reserved + * for sampling mode field. Test that having these reserved + * bit values should cause event_open to fail. + * Input event code uses these sampling bits along with + * 401e0 (PM_MRK_INST_CMPL). + */ + +static int reserved_bits_mmcra_sample_elig_mode(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Skip for Generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* + * MMCRA Random Sampling Mode (SM) values: 0x5 + * 0x9, 0xD, 0x19, 0x1D, 0x1A, 0x1E is reserved. + * Expected to fail when using these reserved values. + */ + event_init(&event, 0x50401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x90401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0xD0401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x190401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x1D0401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x1A0401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x1E0401e0); + FAIL_IF(!event_open(&event)); + + /* + * MMCRA Random Sampling Mode (SM) value 0x10 + * is reserved in power10 and 0xC is reserved in + * power9. + */ + if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) { + event_init(&event, 0x100401e0); + FAIL_IF(!event_open(&event)); + } else if (PVR_VER(mfspr(SPRN_PVR)) == POWER9) { + event_init(&event, 0xC0401e0); + FAIL_IF(!event_open(&event)); + } + + return 0; +} + +int main(void) +{ + return test_harness(reserved_bits_mmcra_sample_elig_mode, + "reserved_bits_mmcra_sample_elig_mode"); +} -- cgit From 122b6b9e57006520addd9f3a44f6b7e3ce503044 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:11:00 +0530 Subject: selftests/powerpc/pmu: Add selftest for group constraint check MMCRA sample bits Events with different "sample" field values which is used to program Monitor Mode Control Register A (MMCRA) in a group will fail to schedule. Testcase uses event with load only sampling mode as group leader and event with store only sampling as sibling event. So that it can check that using different sample bits in event code will fail in event open for group of events Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-23-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 3 +- .../group_constraint_mmcra_sample_test.c | 54 ++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 5dd482843572..590b642ef900 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -2,7 +2,8 @@ CFLAGS += -m64 TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ - group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test + group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ + group_constraint_mmcra_sample_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c new file mode 100644 index 000000000000..ff625b5d80eb --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +#define EventCode_1 0x35340401e0 +#define EventCode_2 0x353c0101ec +#define EventCode_3 0x35340101ec +/* + * Test that using different sample bits in + * event code cause failure in schedule for + * group of events. + */ + +static int group_constraint_mmcra_sample(void) +{ + struct event event, leader; + + SKIP_IF(platform_check_for_tests()); + + /* + * Events with different "sample" field values + * in a group will fail to schedule. + * Use event with load only sampling mode as + * group leader. Use event with store only sampling + * as sibling event. + */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling event doesn't use same sampling bits as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_init(&event, EventCode_3); + + /* Expected to pass as sibling event use same sampling bits as leader */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_mmcra_sample, "group_constraint_mmcra_sample"); +} -- cgit From 5196a27978dcc74251eab14cffa8fa96813e0365 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:11:01 +0530 Subject: selftests/powerpc/pmu: Add selftest for checking invalid bits in event code Some of the bits in the event code is reserved for specific platforms. Event code bits 52-59 are reserved in power9, whereas in power10, these are used for programming Monitor Mode Control Register 3 (MMCR3). Bit 9 in event code is reserved in power9, whereas it is used for programming "radix_scope_qual" bit 18 in Monitor Mode Control Register 1 (MMCR1). Testcase to ensure that using reserved bits in event code should cause event_open to fail. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-24-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../pmu/event_code_tests/invalid_event_code_test.c | 67 ++++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 590b642ef900..1ce1ef4586fd 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -3,7 +3,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ - group_constraint_mmcra_sample_test + group_constraint_mmcra_sample_test invalid_event_code_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c new file mode 100644 index 000000000000..f51fcab837fc --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define EventCode_1 0x1340000001c040 +/* PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L2 */ +#define EventCode_2 0x14242 +/* Event code with IFM, EBB, BHRB bits set in event code */ +#define EventCode_3 0xf00000000000001e + +/* + * Some of the bits in the event code is + * reserved for specific platforms. + * Event code bits 52-59 are reserved in power9, + * whereas in power10, these are used for programming + * Monitor Mode Control Register 3 (MMCR3). + * Bit 9 in event code is reserved in power9, + * whereas it is used for programming "radix_scope_qual" + * bit 18 in Monitor Mode Control Register 1 (MMCR1). + * + * Testcase to ensure that using reserved bits in + * event code should cause event_open to fail. + */ + +static int invalid_event_code(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Events using MMCR3 bits and radix scope qual bits + * should fail in power9 and should succeed in power10. + * Init the events and check for pass/fail in event open. + */ + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) { + event_init(&event, EventCode_1); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init(&event, EventCode_2); + FAIL_IF(event_open(&event)); + event_close(&event); + } else { + event_init(&event, EventCode_1); + FAIL_IF(!event_open(&event)); + + event_init(&event, EventCode_2); + FAIL_IF(!event_open(&event)); + } + + return 0; +} + +int main(void) +{ + return test_harness(invalid_event_code, "invalid_event_code"); +} -- cgit From 0c90263339da3e4cdcbf57cfa43d6d866c3ac95e Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:11:02 +0530 Subject: selftests/powerpc/pmu: Add selftest for reserved bit check for MMCRA thresh_ctl field Testcase for reserved bits in Monitor Mode Control Register A (MMCRA) thresh_ctl bits. For MMCRA[48:51]/[52:55]) Threshold Start/Stop, 0b11110000/0b00001111 is reserved. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-25-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../reserved_bits_mmcra_thresh_ctl_test.c | 44 ++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 1ce1ef4586fd..e50570794337 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -3,7 +3,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ - group_constraint_mmcra_sample_test invalid_event_code_test + group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c new file mode 100644 index 000000000000..4ea1c2f8913f --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase for reserved bits in Monitor Mode + * Control Register A (MMCRA) thresh_ctl bits. + * For MMCRA[48:51]/[52:55]) Threshold Start/Stop, + * 0b11110000/0b00001111 is reserved. + */ + +static int reserved_bits_mmcra_thresh_ctl(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Skip for Generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* + * MMCRA[48:51]/[52:55]) Threshold Start/Stop + * events Selection. 0b11110000/0b00001111 is reserved. + * Expected to fail when using these reserved values. + */ + event_init(&event, 0xf0340401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x0f340401e0); + FAIL_IF(!event_open(&event)); + + return 0; +} + +int main(void) +{ + return test_harness(reserved_bits_mmcra_thresh_ctl, "reserved_bits_mmcra_thresh_ctl"); +} -- cgit From a77c69766c7d4a213e65a4ecdedda7c22f2deb01 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:11:03 +0530 Subject: selftests/powerpc/pmu: Add selftest for blacklist events check in power9 Some of the events are blacklisted in power9. The list of blacklisted events are noted in power9-events-list.h When trying to do event open for any of these blacklisted event will cause a failure. Testcase ensures that using blacklisted events will cause event_open to fail in power9. This test is only applicable on power9 DD2.1 and DD2.2 and hence test adds checks to skip on other platforms. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-26-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 3 +- .../pmu/event_code_tests/blacklisted_events_test.c | 132 +++++++++++++++++++++ 2 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index e50570794337..a5916a938154 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -3,7 +3,8 @@ CFLAGS += -m64 TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ - group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test + group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ + blacklisted_events_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c new file mode 100644 index 000000000000..fafeff19cb34 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +#define PM_DTLB_MISS_16G 0x1c058 +#define PM_DERAT_MISS_2M 0x1c05a +#define PM_DTLB_MISS_2M 0x1c05c +#define PM_MRK_DTLB_MISS_1G 0x1d15c +#define PM_DTLB_MISS_4K 0x2c056 +#define PM_DERAT_MISS_1G 0x2c05a +#define PM_MRK_DERAT_MISS_2M 0x2d152 +#define PM_MRK_DTLB_MISS_4K 0x2d156 +#define PM_MRK_DTLB_MISS_16G 0x2d15e +#define PM_DTLB_MISS_64K 0x3c056 +#define PM_MRK_DERAT_MISS_1G 0x3d152 +#define PM_MRK_DTLB_MISS_64K 0x3d156 +#define PM_DISP_HELD_SYNC_HOLD 0x4003c +#define PM_DTLB_MISS_16M 0x4c056 +#define PM_DTLB_MISS_1G 0x4c05a +#define PM_MRK_DTLB_MISS_16M 0x4c15e +#define PM_MRK_ST_DONE_L2 0x10134 +#define PM_RADIX_PWC_L1_HIT 0x1f056 +#define PM_FLOP_CMPL 0x100f4 +#define PM_MRK_NTF_FIN 0x20112 +#define PM_RADIX_PWC_L2_HIT 0x2d024 +#define PM_IFETCH_THROTTLE 0x3405e +#define PM_MRK_L2_TM_ST_ABORT_SISTER 0x3e15c +#define PM_RADIX_PWC_L3_HIT 0x3f056 +#define PM_RUN_CYC_SMT2_MODE 0x3006c +#define PM_TM_TX_PASS_RUN_INST 0x4e014 + +#define PVR_POWER9_CUMULUS 0x00002000 + +int blacklist_events_dd21[] = { + PM_MRK_ST_DONE_L2, + PM_RADIX_PWC_L1_HIT, + PM_FLOP_CMPL, + PM_MRK_NTF_FIN, + PM_RADIX_PWC_L2_HIT, + PM_IFETCH_THROTTLE, + PM_MRK_L2_TM_ST_ABORT_SISTER, + PM_RADIX_PWC_L3_HIT, + PM_RUN_CYC_SMT2_MODE, + PM_TM_TX_PASS_RUN_INST, + PM_DISP_HELD_SYNC_HOLD, +}; + +int blacklist_events_dd22[] = { + PM_DTLB_MISS_16G, + PM_DERAT_MISS_2M, + PM_DTLB_MISS_2M, + PM_MRK_DTLB_MISS_1G, + PM_DTLB_MISS_4K, + PM_DERAT_MISS_1G, + PM_MRK_DERAT_MISS_2M, + PM_MRK_DTLB_MISS_4K, + PM_MRK_DTLB_MISS_16G, + PM_DTLB_MISS_64K, + PM_MRK_DERAT_MISS_1G, + PM_MRK_DTLB_MISS_64K, + PM_DISP_HELD_SYNC_HOLD, + PM_DTLB_MISS_16M, + PM_DTLB_MISS_1G, + PM_MRK_DTLB_MISS_16M, +}; + +int pvr_min; + +/* + * check for power9 support for 2.1 and + * 2.2 model where blacklist is applicable. + */ +int check_for_power9_version(void) +{ + pvr_min = PVR_MIN(mfspr(SPRN_PVR)); + + SKIP_IF(PVR_VER(pvr) != POWER9); + SKIP_IF(!(pvr & PVR_POWER9_CUMULUS)); + + SKIP_IF(!(3 - pvr_min)); + + return 0; +} + +/* + * Testcase to ensure that using blacklisted bits in + * event code should cause event_open to fail in power9 + */ + +static int blacklisted_events(void) +{ + struct event event; + int i = 0; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * check for power9 support for 2.1 and + * 2.2 model where blacklist is applicable. + */ + SKIP_IF(check_for_power9_version()); + + /* Skip for Generic compat mode */ + SKIP_IF(check_for_generic_compat_pmu()); + + if (pvr_min == 1) { + for (i = 0; i < ARRAY_SIZE(blacklist_events_dd21); i++) { + event_init(&event, blacklist_events_dd21[i]); + FAIL_IF(!event_open(&event)); + } + } else if (pvr_min == 2) { + for (i = 0; i < ARRAY_SIZE(blacklist_events_dd22); i++) { + event_init(&event, blacklist_events_dd22[i]); + FAIL_IF(!event_open(&event)); + } + } + + return 0; +} + +int main(void) +{ + return test_harness(blacklisted_events, "blacklisted_events"); +} -- cgit From 5958ad4392b0f437605ade8bab42447b0d97ad8c Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:11:04 +0530 Subject: selftests/powerpc/pmu: Add selftest for event alternatives for power9 Platform specific PMU supports alternative event for some of the event codes. During perf_event_open, it any event group doesn't match constraint check criteria, further lookup is done to find alternative event. Code checks to see if it is possible to schedule event as group using alternative events. Testcase exercises the alternative event find code for power9. Example, since events in same PMC can't go in as a group, ideally using PM_RUN_CYC_ALT (0x200f4) and PM_BR_TAKEN_CMPL (0x200fa) will fail. But since RUN_CYC (0x600f4) is alternative event for 0x200f4, it is possible to use 0x600f4 and 0x200fa as group. Testcase uses such combination for all events in power9 which has an alternative event. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-27-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../event_code_tests/event_alternatives_tests_p9.c | 116 +++++++++++++++++++++ 2 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index a5916a938154..cf27e612290e 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -4,7 +4,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ - blacklisted_events_test + blacklisted_events_test event_alternatives_tests_p9 top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c new file mode 100644 index 000000000000..f7dcf0e0447c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +#define PM_RUN_CYC_ALT 0x200f4 +#define PM_INST_DISP 0x200f2 +#define PM_BR_2PATH 0x20036 +#define PM_LD_MISS_L1 0x3e054 +#define PM_RUN_INST_CMPL_ALT 0x400fa + +#define EventCode_1 0x200fa +#define EventCode_2 0x200fc +#define EventCode_3 0x300fc +#define EventCode_4 0x400fc + +/* + * Check for event alternatives. + */ + +static int event_alternatives_tests_p9(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * PVR check is used here since PMU specific data like + * alternative events is handled by respective PMU driver + * code and using PVR will work correctly for all cases + * including generic compat mode. + */ + SKIP_IF(PVR_VER(mfspr(SPRN_PVR)) != POWER9); + + /* Skip for generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* Init the event for PM_RUN_CYC_ALT */ + event_init(&leader, PM_RUN_CYC_ALT); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_1); + + /* + * Expected to pass since PM_RUN_CYC_ALT in PMC2 has alternative event + * 0x600f4. So it can go in with EventCode_1 which is using PMC2 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + event_init(&leader, PM_INST_DISP); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + /* + * Expected to pass since PM_INST_DISP in PMC2 has alternative event + * 0x300f2 in PMC3. So it can go in with EventCode_2 which is using PMC2 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + event_init(&leader, PM_BR_2PATH); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + /* + * Expected to pass since PM_BR_2PATH in PMC2 has alternative event + * 0x40036 in PMC4. So it can go in with EventCode_2 which is using PMC2 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + event_init(&leader, PM_LD_MISS_L1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_3); + /* + * Expected to pass since PM_LD_MISS_L1 in PMC3 has alternative event + * 0x400f0 in PMC4. So it can go in with EventCode_3 which is using PMC3 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + event_init(&leader, PM_RUN_INST_CMPL_ALT); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_4); + /* + * Expected to pass since PM_RUN_INST_CMPL_ALT in PMC4 has alternative event + * 0x500fa in PMC5. So it can go in with EventCode_4 which is using PMC4 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(event_alternatives_tests_p9, "event_alternatives_tests_p9"); +} -- cgit From 3f1a87425f8c2f9af745923865a4765e36a2ed3c Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:11:05 +0530 Subject: selftests/powerpc/pmu: Add selftest for event alternatives for power10 Platform specific PMU supports alternative event for some of the event codes. During perf_event_open, it any event group doesn't match constraint check criteria, further lookup is done to find alternative event. Code checks to see if it is possible to schedule event as group using alternative events. Testcase exercises the alternative event find code for power10. Example, Using PMC1 to PMC4 in a group and again trying to schedule PM_CYC_ALT (0x0001e) will fail since this exceeds number of programmable events in group. But since 0x600f4 is an alternative event for 0x0001e, it is possible to use 0x0001e in the group. Testcase uses such combination all events in power10 which has alternative event. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-28-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../event_alternatives_tests_p10.c | 109 +++++++++++++++++++++ 2 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index cf27e612290e..50bcc036dddf 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -4,7 +4,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ - blacklisted_events_test event_alternatives_tests_p9 + blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c new file mode 100644 index 000000000000..8be7aada6523 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +#define PM_RUN_CYC_ALT 0x200f4 +#define PM_INST_DISP 0x200f2 +#define PM_BR_2PATH 0x20036 +#define PM_LD_MISS_L1 0x3e054 +#define PM_RUN_INST_CMPL_ALT 0x400fa + +#define EventCode_1 0x100fc +#define EventCode_2 0x200fa +#define EventCode_3 0x300fc +#define EventCode_4 0x400fc + +/* + * Check for event alternatives. + */ + +static int event_alternatives_tests_p10(void) +{ + struct event *e, events[5]; + int i; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * PVR check is used here since PMU specific data like + * alternative events is handled by respective PMU driver + * code and using PVR will work correctly for all cases + * including generic compat mode. + */ + SKIP_IF(PVR_VER(mfspr(SPRN_PVR)) != POWER10); + + SKIP_IF(check_for_generic_compat_pmu()); + + /* + * Test for event alternative for 0x0001e + * and 0x00002. + */ + e = &events[0]; + event_init(e, 0x0001e); + + e = &events[1]; + event_init(e, EventCode_1); + + e = &events[2]; + event_init(e, EventCode_2); + + e = &events[3]; + event_init(e, EventCode_3); + + e = &events[4]; + event_init(e, EventCode_4); + + FAIL_IF(event_open(&events[0])); + + /* + * Expected to pass since 0x0001e has alternative event + * 0x600f4 in PMC6. So it can go in with other events + * in PMC1 to PMC4. + */ + for (i = 1; i < 5; i++) + FAIL_IF(event_open_with_group(&events[i], events[0].fd)); + + for (i = 0; i < 5; i++) + event_close(&events[i]); + + e = &events[0]; + event_init(e, 0x00002); + + e = &events[1]; + event_init(e, EventCode_1); + + e = &events[2]; + event_init(e, EventCode_2); + + e = &events[3]; + event_init(e, EventCode_3); + + e = &events[4]; + event_init(e, EventCode_4); + + FAIL_IF(event_open(&events[0])); + + /* + * Expected to pass since 0x00020 has alternative event + * 0x500fa in PMC5. So it can go in with other events + * in PMC1 to PMC4. + */ + for (i = 1; i < 5; i++) + FAIL_IF(event_open_with_group(&events[i], events[0].fd)); + + for (i = 0; i < 5; i++) + event_close(&events[i]); + + return 0; +} + +int main(void) +{ + return test_harness(event_alternatives_tests_p10, "event_alternatives_tests_p10"); +} -- cgit From 8efeedf5aac77b58f68e6eb9df62758ba1882bb3 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 10 Jun 2022 19:11:06 +0530 Subject: selftests/powerpc/pmu: Add selftest for PERF_TYPE_HARDWARE events valid check Testcase to ensure that using invalid event in generic event for PERF_TYPE_HARDWARE will fail. Invalid generic events in power10 are: - PERF_COUNT_HW_BUS_CYCLES - PERF_COUNT_HW_STALLED_CYCLES_FRONTEND - PERF_COUNT_HW_STALLED_CYCLES_BACKEND - PERF_COUNT_HW_REF_CPU_CYCLES Invalid generic events in power9 are: - PERF_COUNT_HW_BUS_CYCLES - PERF_COUNT_HW_REF_CPU_CYCLES Testcase does event open for valid and invalid generic events to ensure event open works for all valid events and fails for invalid events. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-29-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../event_code_tests/generic_events_valid_test.c | 130 +++++++++++++++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 50bcc036dddf..0d56f1ef530f 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -4,7 +4,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ - blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 + blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 generic_events_valid_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c new file mode 100644 index 000000000000..0d237c15d3f2 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include +#include +#include +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase to ensure that using invalid event in generic + * event for PERF_TYPE_HARDWARE should fail + */ + +static int generic_events_valid_test(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* generic events is different in compat_mode */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* + * Invalid generic events in power10: + * - PERF_COUNT_HW_BUS_CYCLES + * - PERF_COUNT_HW_STALLED_CYCLES_FRONTEND + * - PERF_COUNT_HW_STALLED_CYCLES_BACKEND + * - PERF_COUNT_HW_REF_CPU_CYCLES + */ + if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) { + event_init_opts(&event, PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_INSTRUCTIONS, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_CACHE_REFERENCES, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_CACHE_MISSES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BRANCH_MISSES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BUS_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + + event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + + event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + + event_init_opts(&event, PERF_COUNT_HW_REF_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + } else if (PVR_VER(mfspr(SPRN_PVR)) == POWER9) { + /* + * Invalid generic events in power9: + * - PERF_COUNT_HW_BUS_CYCLES + * - PERF_COUNT_HW_REF_CPU_CYCLES + */ + event_init_opts(&event, PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_CACHE_REFERENCES, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_CACHE_MISSES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BRANCH_MISSES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BUS_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + + event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_REF_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + } + + return 0; +} + +int main(void) +{ + return test_harness(generic_events_valid_test, "generic_events_valid_test"); +} -- cgit From 20b3073f8727e20332379f145b6eecf580291b2c Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:11:07 +0530 Subject: selftests/powerpc/pmu: Add selftest for group constraint check for MMCR0 l2l3_sel bits In power10, L2L3 select bits in the event code is used to program l2l3_sel field in Monitor Mode Control Register 0 (MMCR0: 56-60). When scheduling events as a group, all events in that group should match value in these bits. Otherwise event open for the sibling events will fail. Testcase uses event code "0x010000046080" as leader and another events "0x26880" and "0x010000026880" as sibling events, and checks for l2l3_sel constraints via perf interface for ISA v3.1 platform. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-30-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 3 +- .../group_constraint_l2l3_sel_test.c | 64 ++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 0d56f1ef530f..58e1a7a2ed4e 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -4,7 +4,8 @@ CFLAGS += -m64 TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ - blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 generic_events_valid_test + blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 generic_events_valid_test \ + group_constraint_l2l3_sel_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c new file mode 100644 index 000000000000..85a636886069 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* All successful D-side store dispatches for this thread */ +#define EventCode_1 0x010000046080 +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define EventCode_2 0x26880 +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define EventCode_3 0x010000026880 + +/* + * Testcase for group constraint check of l2l3_sel bits which is + * used to program l2l3 select field in Monitor Mode Control Register 0 + * (MMCR0: 56-60). + * All events in the group should match l2l3_sel bits otherwise + * event_open for the group should fail. + */ +static int group_constraint_l2l3_sel(void) +{ + struct event event, leader; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(platform_check_for_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the events for the group contraint check for l2l3_sel bits */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling event doesn't request same l2l3_sel bits as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint l2l3_sel test */ + event_init(&event, EventCode_3); + + /* Expected to succeed as sibling event request same l2l3_sel bits as leader */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_l2l3_sel, "group_constraint_l2l3_sel"); +} -- cgit From 291c01ed207d83c8910e0fb21944e6ef84021956 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:11:08 +0530 Subject: selftests/powerpc/pmu: Add selftest for group constraint check for MMCR1 cache bits Data and instruction cache qualifier bits in the event code is used to program cache select field in Monitor Mode Control Register 1 (MMCR1: 16-17). When scheduling events as a group, all events in that group should match value in these bits. Otherwise event open for the sibling events will fail. Testcase uses event code "0x1100fc" as leader and other events like "0x23e054" and "0x13e054" as sibling events to checks for l1 cache select field constraints via perf interface. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-31-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../event_code_tests/group_constraint_cache_test.c | 60 ++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 58e1a7a2ed4e..dc27ca2ffcad 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -5,7 +5,7 @@ TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_te group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 generic_events_valid_test \ - group_constraint_l2l3_sel_test + group_constraint_l2l3_sel_test group_constraint_cache_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c new file mode 100644 index 000000000000..f4be05aa3a3d --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* All L1 D cache load references counted at finish, gated by reject */ +#define EventCode_1 0x1100fc +/* Load Missed L1 */ +#define EventCode_2 0x23e054 +/* Load Missed L1 */ +#define EventCode_3 0x13e054 + +/* + * Testcase for group constraint check of data and instructions + * cache qualifier bits which is used to program cache select field in + * Monitor Mode Control Register 1 (MMCR1: 16-17) for l1 cache. + * All events in the group should match cache select bits otherwise + * event_open for the group will fail. + */ +static int group_constraint_cache(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Init the events for the group contraint check for l1 cache select bits */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling event doesn't request same l1 cache select bits as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint l1 cache select test */ + event_init(&event, EventCode_3); + + /* Expected to succeed as sibling event request same l1 cache select bits as leader */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_cache, "group_constraint_cache"); +} -- cgit From 8eaca8c4b4ed9a2058e4f232d56b5973191fec37 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:11:09 +0530 Subject: selftests/powerpc/pmu: Add selftest for group constraint check for MMCRA thresh_cmp field Thresh compare bits for a event is used to program thresh compare field in Monitor Mode Control Register A (MMCRA: 9-18 bits for power9 and MMCRA: 8-18 bits for power10). When scheduling events as a group, all events in that group should match value in thresh compare bits. Otherwise event open for the sibling events will fail. Testcase uses event code "0x401e0" as leader and another event "0x101ec" as sibling event, and checks for thresh compare constraint via perf interface. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-32-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../group_constraint_thresh_cmp_test.c | 96 ++++++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index dc27ca2ffcad..374044062561 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -5,7 +5,7 @@ TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_te group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 generic_events_valid_test \ - group_constraint_l2l3_sel_test group_constraint_cache_test + group_constraint_l2l3_sel_test group_constraint_cache_test group_constraint_thresh_cmp_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c new file mode 100644 index 000000000000..9f1197104e8c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* + * Primary PMU events used here is PM_MRK_INST_CMPL (0x401e0) and + * PM_THRESH_MET (0x101ec) + * Threshold event selection used is issue to complete for cycles + * Sampling criteria is Load or Store only sampling + */ +#define p9_EventCode_1 0x13e35340401e0 +#define p9_EventCode_2 0x17d34340101ec +#define p9_EventCode_3 0x13e35340101ec +#define p10_EventCode_1 0x35340401e0 +#define p10_EventCode_2 0x35340101ec + +/* + * Testcase for group constraint check of thresh_cmp bits which is + * used to program thresh compare field in Monitor Mode Control Register A + * (MMCRA: 9-18 bits for power9 and MMCRA: 8-18 bits for power10). + * All events in the group should match thresh compare bits otherwise + * event_open for the group will fail. + */ +static int group_constraint_thresh_cmp(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) { + /* Init the events for the group contraint check for thresh_cmp bits */ + event_init(&leader, p10_EventCode_1); + + /* Add the thresh_cmp value for leader in config1 */ + leader.attr.config1 = 1000; + FAIL_IF(event_open(&leader)); + + event_init(&event, p10_EventCode_2); + + /* Add the different thresh_cmp value from the leader event in config1 */ + event.attr.config1 = 2000; + + /* Expected to fail as sibling and leader event request different thresh_cmp bits */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint thresh compare test */ + event_init(&event, p10_EventCode_2); + + /* Add the same thresh_cmp value for leader and sibling event in config1 */ + event.attr.config1 = 1000; + + /* Expected to succeed as sibling and leader event request same thresh_cmp bits */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + } else { + /* Init the events for the group contraint check for thresh_cmp bits */ + event_init(&leader, p9_EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, p9_EventCode_2); + + /* Expected to fail as sibling and leader event request different thresh_cmp bits */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint thresh compare test */ + event_init(&event, p9_EventCode_3); + + /* Expected to succeed as sibling and leader event request same thresh_cmp bits */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + } + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_thresh_cmp, "group_constraint_thresh_cmp"); +} -- cgit From 142c9bd1ff215f364a5d683a9dd0b7c413397185 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:11:10 +0530 Subject: selftests/powerpc/pmu: Add selftest for group constraint for unit and pmc field in p9 Unit and pmu bits in the event code is used to program unit and pmc fields in Monitor Mode Control Register 1 (MMCR1). For power9 platform, incase unit field value is within 6 to 9, one of the event in the group should use PMC4. Otherwise event_open should fail for that group. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-33-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 3 +- .../event_code_tests/group_constraint_unit_test.c | 74 ++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 374044062561..f72c73b5b79a 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -5,7 +5,8 @@ TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_te group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 generic_events_valid_test \ - group_constraint_l2l3_sel_test group_constraint_cache_test group_constraint_thresh_cmp_test + group_constraint_l2l3_sel_test group_constraint_cache_test group_constraint_thresh_cmp_test \ + group_constraint_unit_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c new file mode 100644 index 000000000000..a2c18923dcec --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* All successful D-side store dispatches for this thread with PMC 2 */ +#define EventCode_1 0x26080 +/* All successful D-side store dispatches for this thread with PMC 4 */ +#define EventCode_2 0x46080 +/* All successful D-side store dispatches for this thread that were L2 Miss with PMC 3 */ +#define EventCode_3 0x36880 + +/* + * Testcase for group constraint check of unit and pmc bits which is + * used to program corresponding unit and pmc field in Monitor Mode + * Control Register 1 (MMCR1) + * One of the event in the group should use PMC 4 incase units field + * value is within 6 to 9 otherwise event_open for the group will fail. + */ +static int group_constraint_unit(void) +{ + struct event *e, events[3]; + + /* + * Check for platform support for the test. + * Constraint to use PMC4 with one of the event in group, + * when the unit is within 6 to 9 is only applicable on + * power9. + */ + SKIP_IF(platform_check_for_tests()); + SKIP_IF(have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the events for the group contraint check for unit bits */ + e = &events[0]; + event_init(e, EventCode_1); + + /* Expected to fail as PMC 4 is not used with unit field value 6 to 9 */ + FAIL_IF(!event_open(&events[0])); + + /* Init the events for the group contraint check for unit bits */ + e = &events[1]; + event_init(e, EventCode_2); + + /* Expected to pass as PMC 4 is used with unit field value 6 to 9 */ + FAIL_IF(event_open(&events[1])); + + /* Init the event for the group contraint unit test */ + e = &events[2]; + event_init(e, EventCode_3); + + /* Expected to fail as PMC4 is not being used */ + FAIL_IF(!event_open_with_group(&events[2], events[0].fd)); + + /* Expected to succeed as event using PMC4 */ + FAIL_IF(event_open_with_group(&events[2], events[1].fd)); + + event_close(&events[0]); + event_close(&events[1]); + event_close(&events[2]); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_unit, "group_constraint_unit"); +} -- cgit From c178606ab51076d464fe537cd7a6bcbc615939e5 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:11:11 +0530 Subject: selftests/powerpc/pmu: Add selftest for group constraint check for MMCRA thresh_ctl field Thresh control bits in the event code is used to program thresh_ctl field in Monitor Mode Control Register A (MMCRA: 48-55). When scheduling events as a group, all events in that group should match value in these bits. Otherwise event open for the sibling events will fail. Testcase uses event code PM_MRK_INST_CMPL (0x401e0) as leader and another event PM_THRESH_MET (101ec) as sibling event, and checks if group constraint checks for thresh_ctl field added correctly via perf interface. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-34-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../group_constraint_thresh_ctl_test.c | 64 ++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index f72c73b5b79a..16cbb2e52865 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -6,7 +6,7 @@ TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_te group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 generic_events_valid_test \ group_constraint_l2l3_sel_test group_constraint_cache_test group_constraint_thresh_cmp_test \ - group_constraint_unit_test + group_constraint_unit_test group_constraint_thresh_ctl_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c new file mode 100644 index 000000000000..e0852ebc1671 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* + * Primary PMU events used here are PM_MRK_INST_CMPL (0x401e0) and + * PM_THRESH_MET (0x101ec). + * Threshold event selection used is issue to complete and issue to + * finished for cycles + * Sampling criteria is Load or Store only sampling + */ +#define EventCode_1 0x35340401e0 +#define EventCode_2 0x34340101ec +#define EventCode_3 0x35340101ec + +/* + * Testcase for group constraint check of thresh_ctl bits which is + * used to program thresh compare field in Monitor Mode Control Register A + * (MMCR0: 48-55). + * All events in the group should match thresh ctl bits otherwise + * event_open for the group will fail. + */ +static int group_constraint_thresh_ctl(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Init the events for the group contraint thresh control test */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling and leader event request different thresh_ctl bits */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint thresh control test */ + event_init(&event, EventCode_3); + + /* Expected to succeed as sibling and leader event request same thresh_ctl bits */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_thresh_ctl, "group_constraint_thresh_ctl"); +} -- cgit From 9ac92fecd1dbfcabd64925571b94151d7a814878 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:11:12 +0530 Subject: selftests/powerpc/pmu: Add selftest for group constraint check for MMCRA thresh_sel field Thresh select bits in the event code is used to program thresh_sel field in Monitor Mode Control Register A (MMCRA: 45-47). When scheduling events as a group, all events in that group should match value in these bits. Otherwise event open for the sibling events will fail. Testcase uses event code PM_MRK_INST_CMPL (0x401e0) as leader and another event PM_THRESH_MET (0x101ec) as sibling event, and checks if group constraint checks for thresh_sel field added correctly via perf interface. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-35-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 2 +- .../group_constraint_thresh_sel_test.c | 63 ++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 16cbb2e52865..755993d210f2 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -6,7 +6,7 @@ TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_te group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 generic_events_valid_test \ group_constraint_l2l3_sel_test group_constraint_cache_test group_constraint_thresh_cmp_test \ - group_constraint_unit_test group_constraint_thresh_ctl_test + group_constraint_unit_test group_constraint_thresh_ctl_test group_constraint_thresh_sel_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c new file mode 100644 index 000000000000..50a8cd843ce7 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* + * Primary PMU events used here are PM_MRK_INST_CMPL (0x401e0) and + * PM_THRESH_MET (0x101ec). + * Threshold event selection used is issue to complete + * Sampling criteria is Load or Store only sampling + */ +#define EventCode_1 0x35340401e0 +#define EventCode_2 0x35540101ec +#define EventCode_3 0x35340101ec + +/* + * Testcase for group constraint check of thresh_sel bits which is + * used to program thresh select field in Monitor Mode Control Register A + * (MMCRA: 45-57). + * All events in the group should match thresh sel bits otherwise + * event_open for the group will fail. + */ +static int group_constraint_thresh_sel(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Init the events for the group contraint thresh select test */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling and leader event request different thresh_sel bits */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint thresh select test */ + event_init(&event, EventCode_3); + + /* Expected to succeed as sibling and leader event request same thresh_sel bits */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_thresh_sel, "group_constraint_thresh_sel"); +} -- cgit From ab8bca92aebcb59d81dc95ddebe241052f2bb411 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 10 Jun 2022 19:11:13 +0530 Subject: selftests/powerpc/pmu: Add test for hardware cache events The testcase checks if the transalation of a generic hardware cache event is done properly via perf interface. The hardware cache events has type as PERF_TYPE_HW_CACHE and each event points to raw event code id. Testcase checks different combination of cache level, cache event operation type and cache event result type and verify for a given event code, whether transalation matches with the current cache event mappings via perf interface. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610134113.62991-36-atrajeev@linux.vnet.ibm.com --- .../powerpc/pmu/event_code_tests/Makefile | 3 +- .../event_code_tests/hw_cache_event_type_test.c | 88 ++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 755993d210f2..4e07d7046457 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -6,7 +6,8 @@ TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_te group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 generic_events_valid_test \ group_constraint_l2l3_sel_test group_constraint_cache_test group_constraint_thresh_cmp_test \ - group_constraint_unit_test group_constraint_thresh_ctl_test group_constraint_thresh_sel_test + group_constraint_unit_test group_constraint_thresh_ctl_test group_constraint_thresh_sel_test \ + hw_cache_event_type_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c new file mode 100644 index 000000000000..a45b1da5b568 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include +#include + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* + * Load Missed L1, for power9 its pointing to PM_LD_MISS_L1_FIN (0x2c04e) and + * for power10 its pointing to PM_LD_MISS_L1 (0x3e054) + * + * Hardware cache level : PERF_COUNT_HW_CACHE_L1D + * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_READ + * Hardware cache event result type : PERF_COUNT_HW_CACHE_RESULT_MISS + */ +#define EventCode_1 0x10000 +/* + * Hardware cache level : PERF_COUNT_HW_CACHE_L1D + * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_WRITE + * Hardware cache event result type : PERF_COUNT_HW_CACHE_RESULT_ACCESS + */ +#define EventCode_2 0x0100 +/* + * Hardware cache level : PERF_COUNT_HW_CACHE_DTLB + * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_WRITE + * Hardware cache event result type : PERF_COUNT_HW_CACHE_RESULT_ACCESS + */ +#define EventCode_3 0x0103 +/* + * Hardware cache level : PERF_COUNT_HW_CACHE_L1D + * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_READ + * Hardware cache event result type : Invalid ( > PERF_COUNT_HW_CACHE_RESULT_MAX) + */ +#define EventCode_4 0x030000 + +/* + * A perf test to check valid hardware cache events. + */ +static int hw_cache_event_type_test(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Skip for Generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* Init the event to test hardware cache event */ + event_init_opts(&event, EventCode_1, PERF_TYPE_HW_CACHE, "event"); + + /* Expected to success as its pointing to L1 load miss */ + FAIL_IF(event_open(&event)); + event_close(&event); + + /* Init the event to test hardware cache event */ + event_init_opts(&event, EventCode_2, PERF_TYPE_HW_CACHE, "event"); + + /* Expected to fail as the corresponding cache event entry have 0 in that index */ + FAIL_IF(!event_open(&event)); + event_close(&event); + + /* Init the event to test hardware cache event */ + event_init_opts(&event, EventCode_3, PERF_TYPE_HW_CACHE, "event"); + + /* Expected to fail as the corresponding cache event entry have -1 in that index */ + FAIL_IF(!event_open(&event)); + event_close(&event); + + /* Init the event to test hardware cache event */ + event_init_opts(&event, EventCode_4, PERF_TYPE_HW_CACHE, "event"); + + /* Expected to fail as hardware cache event result type is Invalid */ + FAIL_IF(!event_open(&event)); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(hw_cache_event_type_test, "hw_cache_event_type_test"); +} -- cgit From 46d60bdb1283bb0f22d9480e2d6c972623cb4182 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 6 May 2022 11:14:24 +0200 Subject: powerpc: Include asm/firmware.h in all users of firmware_has_feature() Trying to remove asm/ppc_asm.h from all places that don't need it leads to several failures linked to firmware_has_feature(). To fix it, include asm/firmware.h in all files using firmware_has_feature() All users found with: git grep -L "firmware\.h" ` git grep -l "firmware_has_feature("` Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/11956ec181a034b51a881ac9c059eea72c679a73.1651828453.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 3 +++ arch/powerpc/include/asm/cputime.h | 1 + arch/powerpc/include/asm/interrupt.h | 1 + arch/powerpc/include/asm/mman.h | 1 + arch/powerpc/include/asm/prom.h | 1 + arch/powerpc/kernel/dawr.c | 1 + arch/powerpc/kexec/core.c | 1 + arch/powerpc/kvm/book3s_64_mmu_radix.c | 1 + arch/powerpc/kvm/book3s_hv_nested.c | 1 + arch/powerpc/mm/book3s64/hash_pgtable.c | 1 + arch/powerpc/mm/book3s64/pkeys.c | 1 + arch/powerpc/mm/hugetlbpage.c | 1 + arch/powerpc/platforms/pseries/papr_platform_attributes.c | 1 + arch/powerpc/platforms/pseries/vas.c | 1 + 14 files changed, 16 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index b37a28f62cf6..aa1c67c8bfc8 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_H #define _ASM_POWERPC_BOOK3S_64_HUGETLB_H + +#include + /* * For radix we want generic code to handle hugetlb. But then if we want * both hash and radix to be enabled together we need to workaround the diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 504f7fe6711a..6d2b27997492 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -19,6 +19,7 @@ #include #include #include +#include typedef u64 __nocast cputime_t; typedef u64 __nocast cputime64_t; diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index b14f54d789d2..8069dbc4b8d1 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -69,6 +69,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 1b024e64c8ec..17a77d47ed6d 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -12,6 +12,7 @@ #include #include #include +#include static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 5c80152e8f18..6f109b5cb84e 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -14,6 +14,7 @@ #include #include #include +#include /* These includes should be removed once implicit includes are cleaned up. */ #include diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index 30d4eca88d17..909a05cd2809 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -11,6 +11,7 @@ #include #include #include +#include bool dawr_force_enable; EXPORT_SYMBOL_GPL(dawr_force_enable); diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 9773dd0640f3..cf84bfe9e27e 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -20,6 +20,7 @@ #include #include #include +#include void machine_kexec_mask_interrupts(void) { unsigned int i; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 42851c32ff3b..9d4b3feda3b6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * Supported radix tree geometry. diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 0644732d1a25..be8249cc6107 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -20,6 +20,7 @@ #include #include #include +#include static struct patb_entry *pseries_partition_tb; diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 2e0cad5817ba..ae008b9df0e6 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -13,6 +13,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 753e62ba67af..1d2675ab6711 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b282af39fcf6..bc84a594ca62 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -24,6 +24,7 @@ #include #include #include +#include bool hugetlb_disabled = false; diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c index 515150417bb3..526c621b098b 100644 --- a/arch/powerpc/platforms/pseries/papr_platform_attributes.c +++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c @@ -22,6 +22,7 @@ #include #include +#include #include "pseries.h" diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 500a1fc4a1d7..91e7eda0606c 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "vas.h" -- cgit From e93dee186fc95f2058b0c9d2317d8b876b8512db Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 6 May 2022 11:14:25 +0200 Subject: powerpc: Don't include asm/ppc_asm.h in other headers asm/ppc_asm.h is not needed in any of the header it is included. It is only needed by irq.c. Include it there and remove it from other headers. word-at-a-time.h only need ex_table.h, so include it instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e2d7b96547037f852c7ed164e4f79e8918c2607a.1651828453.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/io.h | 1 - arch/powerpc/include/asm/uaccess.h | 1 - arch/powerpc/include/asm/word-at-a-time.h | 2 +- arch/powerpc/kernel/irq.c | 1 + 4 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index c5a5f7c9b231..9b07133c3bd1 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -33,7 +33,6 @@ extern struct pci_dev *isa_bridge_pcidev; #include #include #include -#include #define SIO_CONFIG_RA 0x398 #define SIO_CONFIG_RD 0x399 diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 9b82b38ff867..14a08806f8e8 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -2,7 +2,6 @@ #ifndef _ARCH_POWERPC_UACCESS_H #define _ARCH_POWERPC_UACCESS_H -#include #include #include #include diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index f3f4710d4ff5..46c31fb8748d 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -7,7 +7,7 @@ #include #include -#include +#include #ifdef __BIG_ENDIAN__ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index dd09919c3c66..4db27198b002 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -65,6 +65,7 @@ #include #include #include +#include #ifdef CONFIG_PPC64 #include -- cgit From 7d7b28b302085e1ec2815bc9f5205af28394c5db Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 18 May 2022 09:40:15 +0200 Subject: powerpc/irq: Split irq.c More than half of irq.c is dedicated to PPC64. Move PPC64 code out of irq.c into irq_64.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9f1a47de80f78d3dd270a7a72f69f55f581c4054.1652859593.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/irq.c | 421 -------------------------------------- arch/powerpc/kernel/irq_64.c | 476 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 477 insertions(+), 422 deletions(-) create mode 100644 arch/powerpc/kernel/irq_64.c diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index f91f0f29a566..317c984ee9d0 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -63,7 +63,7 @@ obj-y := cputable.o syscalls.o \ hw_breakpoint_constraints.o interrupt.o \ kdebugfs.o stacktrace.o obj-y += ptrace/ -obj-$(CONFIG_PPC64) += setup_64.o \ +obj-$(CONFIG_PPC64) += setup_64.o irq_64.o\ paca.o nvram_64.o note.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32_wrapper.o diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4db27198b002..4abfe6bd6622 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -67,12 +67,6 @@ #include #include -#ifdef CONFIG_PPC64 -#include -#include -#include -#include -#endif #define CREATE_TRACE_POINTS #include #include @@ -89,411 +83,6 @@ u32 tau_interrupts(unsigned long cpu); #endif #endif /* CONFIG_PPC32 */ -#ifdef CONFIG_PPC64 - -int distribute_irqs = 1; - -static inline notrace unsigned long get_irq_happened(void) -{ - unsigned long happened; - - __asm__ __volatile__("lbz %0,%1(13)" - : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); - - return happened; -} - -void replay_soft_interrupts(void) -{ - struct pt_regs regs; - - /* - * Be careful here, calling these interrupt handlers can cause - * softirqs to be raised, which they may run when calling irq_exit, - * which will cause local_irq_enable() to be run, which can then - * recurse into this function. Don't keep any state across - * interrupt handler calls which may change underneath us. - * - * We use local_paca rather than get_paca() to avoid all the - * debug_smp_processor_id() business in this low level function. - */ - - ppc_save_regs(®s); - regs.softe = IRQS_ENABLED; - regs.msr |= MSR_EE; - -again: - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(mfmsr() & MSR_EE); - - /* - * Force the delivery of pending soft-disabled interrupts on PS3. - * Any HV call will have this side effect. - */ - if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { - u64 tmp, tmp2; - lv1_get_version_info(&tmp, &tmp2); - } - - /* - * Check if an hypervisor Maintenance interrupt happened. - * This is a higher priority interrupt than the others, so - * replay it first. - */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { - local_paca->irq_happened &= ~PACA_IRQ_HMI; - regs.trap = INTERRUPT_HMI; - handle_hmi_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (local_paca->irq_happened & PACA_IRQ_DEC) { - local_paca->irq_happened &= ~PACA_IRQ_DEC; - regs.trap = INTERRUPT_DECREMENTER; - timer_interrupt(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (local_paca->irq_happened & PACA_IRQ_EE) { - local_paca->irq_happened &= ~PACA_IRQ_EE; - regs.trap = INTERRUPT_EXTERNAL; - do_IRQ(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { - local_paca->irq_happened &= ~PACA_IRQ_DBELL; - regs.trap = INTERRUPT_DOORBELL; - doorbell_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - /* Book3E does not support soft-masking PMI interrupts */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { - local_paca->irq_happened &= ~PACA_IRQ_PMI; - regs.trap = INTERRUPT_PERFMON; - performance_monitor_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) { - /* - * We are responding to the next interrupt, so interrupt-off - * latencies should be reset here. - */ - trace_hardirqs_on(); - trace_hardirqs_off(); - goto again; - } -} - -#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) -static inline void replay_soft_interrupts_irqrestore(void) -{ - unsigned long kuap_state = get_kuap(); - - /* - * Check if anything calls local_irq_enable/restore() when KUAP is - * disabled (user access enabled). We handle that case here by saving - * and re-locking AMR but we shouldn't get here in the first place, - * hence the warning. - */ - kuap_assert_locked(); - - if (kuap_state != AMR_KUAP_BLOCKED) - set_kuap(AMR_KUAP_BLOCKED); - - replay_soft_interrupts(); - - if (kuap_state != AMR_KUAP_BLOCKED) - set_kuap(kuap_state); -} -#else -#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() -#endif - -notrace void arch_local_irq_restore(unsigned long mask) -{ - unsigned char irq_happened; - - /* Write the new soft-enabled value if it is a disable */ - if (mask) { - irq_soft_mask_set(mask); - return; - } - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(in_nmi() || in_hardirq()); - - /* - * After the stb, interrupts are unmasked and there are no interrupts - * pending replay. The restart sequence makes this atomic with - * respect to soft-masked interrupts. If this was just a simple code - * sequence, a soft-masked interrupt could become pending right after - * the comparison and before the stb. - * - * This allows interrupts to be unmasked without hard disabling, and - * also without new hard interrupts coming in ahead of pending ones. - */ - asm_volatile_goto( -"1: \n" -" lbz 9,%0(13) \n" -" cmpwi 9,0 \n" -" bne %l[happened] \n" -" stb 9,%1(13) \n" -"2: \n" - RESTART_TABLE(1b, 2b, 1b) - : : "i" (offsetof(struct paca_struct, irq_happened)), - "i" (offsetof(struct paca_struct, irq_soft_mask)) - : "cr0", "r9" - : happened); - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(!(mfmsr() & MSR_EE)); - - return; - -happened: - irq_happened = get_irq_happened(); - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(!irq_happened); - - if (irq_happened == PACA_IRQ_HARD_DIS) { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(mfmsr() & MSR_EE); - irq_soft_mask_set(IRQS_ENABLED); - local_paca->irq_happened = 0; - __hard_irq_enable(); - return; - } - - /* Have interrupts to replay, need to hard disable first */ - if (!(irq_happened & PACA_IRQ_HARD_DIS)) { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { - if (!(mfmsr() & MSR_EE)) { - /* - * An interrupt could have come in and cleared - * MSR[EE] and set IRQ_HARD_DIS, so check - * IRQ_HARD_DIS again and warn if it is still - * clear. - */ - irq_happened = get_irq_happened(); - WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); - } - } - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - } else { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { - if (WARN_ON_ONCE(mfmsr() & MSR_EE)) - __hard_irq_disable(); - } - } - - /* - * Disable preempt here, so that the below preempt_enable will - * perform resched if required (a replayed interrupt may set - * need_resched). - */ - preempt_disable(); - irq_soft_mask_set(IRQS_ALL_DISABLED); - trace_hardirqs_off(); - - replay_soft_interrupts_irqrestore(); - local_paca->irq_happened = 0; - - trace_hardirqs_on(); - irq_soft_mask_set(IRQS_ENABLED); - __hard_irq_enable(); - preempt_enable(); -} -EXPORT_SYMBOL(arch_local_irq_restore); - -/* - * This is a helper to use when about to go into idle low-power - * when the latter has the side effect of re-enabling interrupts - * (such as calling H_CEDE under pHyp). - * - * You call this function with interrupts soft-disabled (this is - * already the case when ppc_md.power_save is called). The function - * will return whether to enter power save or just return. - * - * In the former case, it will have notified lockdep of interrupts - * being re-enabled and generally sanitized the lazy irq state, - * and in the latter case it will leave with interrupts hard - * disabled and marked as such, so the local_irq_enable() call - * in arch_cpu_idle() will properly re-enable everything. - */ -bool prep_irq_for_idle(void) -{ - /* - * First we need to hard disable to ensure no interrupt - * occurs before we effectively enter the low power state - */ - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - /* - * If anything happened while we were soft-disabled, - * we return now and do not enter the low power state. - */ - if (lazy_irq_pending()) - return false; - - /* Tell lockdep we are about to re-enable */ - trace_hardirqs_on(); - - /* - * Mark interrupts as soft-enabled and clear the - * PACA_IRQ_HARD_DIS from the pending mask since we - * are about to hard enable as well as a side effect - * of entering the low power state. - */ - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - irq_soft_mask_set(IRQS_ENABLED); - - /* Tell the caller to enter the low power state */ - return true; -} - -#ifdef CONFIG_PPC_BOOK3S -/* - * This is for idle sequences that return with IRQs off, but the - * idle state itself wakes on interrupt. Tell the irq tracer that - * IRQs are enabled for the duration of idle so it does not get long - * off times. Must be paired with fini_irq_for_idle_irqsoff. - */ -bool prep_irq_for_idle_irqsoff(void) -{ - WARN_ON(!irqs_disabled()); - - /* - * First we need to hard disable to ensure no interrupt - * occurs before we effectively enter the low power state - */ - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - /* - * If anything happened while we were soft-disabled, - * we return now and do not enter the low power state. - */ - if (lazy_irq_pending()) - return false; - - /* Tell lockdep we are about to re-enable */ - trace_hardirqs_on(); - - return true; -} - -/* - * Take the SRR1 wakeup reason, index into this table to find the - * appropriate irq_happened bit. - * - * Sytem reset exceptions taken in idle state also come through here, - * but they are NMI interrupts so do not need to wait for IRQs to be - * restored, and should be taken as early as practical. These are marked - * with 0xff in the table. The Power ISA specifies 0100b as the system - * reset interrupt reason. - */ -#define IRQ_SYSTEM_RESET 0xff - -static const u8 srr1_to_lazyirq[0x10] = { - 0, 0, 0, - PACA_IRQ_DBELL, - IRQ_SYSTEM_RESET, - PACA_IRQ_DBELL, - PACA_IRQ_DEC, - 0, - PACA_IRQ_EE, - PACA_IRQ_EE, - PACA_IRQ_HMI, - 0, 0, 0, 0, 0 }; - -void replay_system_reset(void) -{ - struct pt_regs regs; - - ppc_save_regs(®s); - regs.trap = 0x100; - get_paca()->in_nmi = 1; - system_reset_exception(®s); - get_paca()->in_nmi = 0; -} -EXPORT_SYMBOL_GPL(replay_system_reset); - -void irq_set_pending_from_srr1(unsigned long srr1) -{ - unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18; - u8 reason = srr1_to_lazyirq[idx]; - - /* - * Take the system reset now, which is immediately after registers - * are restored from idle. It's an NMI, so interrupts need not be - * re-enabled before it is taken. - */ - if (unlikely(reason == IRQ_SYSTEM_RESET)) { - replay_system_reset(); - return; - } - - if (reason == PACA_IRQ_DBELL) { - /* - * When doorbell triggers a system reset wakeup, the message - * is not cleared, so if the doorbell interrupt is replayed - * and the IPI handled, the doorbell interrupt would still - * fire when EE is enabled. - * - * To avoid taking the superfluous doorbell interrupt, - * execute a msgclr here before the interrupt is replayed. - */ - ppc_msgclr(PPC_DBELL_MSGTYPE); - } - - /* - * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0, - * so this can be called unconditionally with the SRR1 wake - * reason as returned by the idle code, which uses 0 to mean no - * interrupt. - * - * If a future CPU was to designate this as an interrupt reason, - * then a new index for no interrupt must be assigned. - */ - local_paca->irq_happened |= reason; -} -#endif /* CONFIG_PPC_BOOK3S */ - -/* - * Force a replay of the external interrupt handler on this CPU. - */ -void force_external_irq_replay(void) -{ - /* - * This must only be called with interrupts soft-disabled, - * the replay will happen when re-enabling. - */ - WARN_ON(!arch_irqs_disabled()); - - /* - * Interrupts must always be hard disabled before irq_happened is - * modified (to prevent lost update in case of interrupt between - * load and store). - */ - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - /* Indicate in the PACA that we have an interrupt to replay */ - local_paca->irq_happened |= PACA_IRQ_EE; -} - -#endif /* CONFIG_PPC64 */ - int arch_show_interrupts(struct seq_file *p, int prec) { int j; @@ -795,13 +384,3 @@ int irq_choose_cpu(const struct cpumask *mask) return hard_smp_processor_id(); } #endif - -#ifdef CONFIG_PPC64 -static int __init setup_noirqdistrib(char *str) -{ - distribute_irqs = 0; - return 1; -} - -__setup("noirqdistrib", setup_noirqdistrib); -#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c new file mode 100644 index 000000000000..56dd7d94e661 --- /dev/null +++ b/arch/powerpc/kernel/irq_64.c @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Derived from arch/i386/kernel/irq.c + * Copyright (C) 1992 Linus Torvalds + * Adapted from arch/i386 by Gary Thomas + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Updated and modified by Cort Dougan + * Copyright (C) 1996-2001 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +int distribute_irqs = 1; + +static inline notrace unsigned long get_irq_happened(void) +{ + unsigned long happened; + + __asm__ __volatile__("lbz %0,%1(13)" + : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); + + return happened; +} + +void replay_soft_interrupts(void) +{ + struct pt_regs regs; + + /* + * Be careful here, calling these interrupt handlers can cause + * softirqs to be raised, which they may run when calling irq_exit, + * which will cause local_irq_enable() to be run, which can then + * recurse into this function. Don't keep any state across + * interrupt handler calls which may change underneath us. + * + * We use local_paca rather than get_paca() to avoid all the + * debug_smp_processor_id() business in this low level function. + */ + + ppc_save_regs(®s); + regs.softe = IRQS_ENABLED; + regs.msr |= MSR_EE; + +again: + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + + /* + * Force the delivery of pending soft-disabled interrupts on PS3. + * Any HV call will have this side effect. + */ + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { + u64 tmp, tmp2; + lv1_get_version_info(&tmp, &tmp2); + } + + /* + * Check if an hypervisor Maintenance interrupt happened. + * This is a higher priority interrupt than the others, so + * replay it first. + */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { + local_paca->irq_happened &= ~PACA_IRQ_HMI; + regs.trap = INTERRUPT_HMI; + handle_hmi_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (local_paca->irq_happened & PACA_IRQ_DEC) { + local_paca->irq_happened &= ~PACA_IRQ_DEC; + regs.trap = INTERRUPT_DECREMENTER; + timer_interrupt(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (local_paca->irq_happened & PACA_IRQ_EE) { + local_paca->irq_happened &= ~PACA_IRQ_EE; + regs.trap = INTERRUPT_EXTERNAL; + do_IRQ(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { + local_paca->irq_happened &= ~PACA_IRQ_DBELL; + regs.trap = INTERRUPT_DOORBELL; + doorbell_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + /* Book3E does not support soft-masking PMI interrupts */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { + local_paca->irq_happened &= ~PACA_IRQ_PMI; + regs.trap = INTERRUPT_PERFMON; + performance_monitor_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) { + /* + * We are responding to the next interrupt, so interrupt-off + * latencies should be reset here. + */ + trace_hardirqs_on(); + trace_hardirqs_off(); + goto again; + } +} + +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) +static inline void replay_soft_interrupts_irqrestore(void) +{ + unsigned long kuap_state = get_kuap(); + + /* + * Check if anything calls local_irq_enable/restore() when KUAP is + * disabled (user access enabled). We handle that case here by saving + * and re-locking AMR but we shouldn't get here in the first place, + * hence the warning. + */ + kuap_assert_locked(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(AMR_KUAP_BLOCKED); + + replay_soft_interrupts(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(kuap_state); +} +#else +#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() +#endif + +notrace void arch_local_irq_restore(unsigned long mask) +{ + unsigned char irq_happened; + + /* Write the new soft-enabled value if it is a disable */ + if (mask) { + irq_soft_mask_set(mask); + return; + } + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(in_nmi() || in_hardirq()); + + /* + * After the stb, interrupts are unmasked and there are no interrupts + * pending replay. The restart sequence makes this atomic with + * respect to soft-masked interrupts. If this was just a simple code + * sequence, a soft-masked interrupt could become pending right after + * the comparison and before the stb. + * + * This allows interrupts to be unmasked without hard disabling, and + * also without new hard interrupts coming in ahead of pending ones. + */ + asm_volatile_goto( +"1: \n" +" lbz 9,%0(13) \n" +" cmpwi 9,0 \n" +" bne %l[happened] \n" +" stb 9,%1(13) \n" +"2: \n" + RESTART_TABLE(1b, 2b, 1b) + : : "i" (offsetof(struct paca_struct, irq_happened)), + "i" (offsetof(struct paca_struct, irq_soft_mask)) + : "cr0", "r9" + : happened); + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); + + return; + +happened: + irq_happened = get_irq_happened(); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!irq_happened); + + if (irq_happened == PACA_IRQ_HARD_DIS) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + irq_soft_mask_set(IRQS_ENABLED); + local_paca->irq_happened = 0; + __hard_irq_enable(); + return; + } + + /* Have interrupts to replay, need to hard disable first */ + if (!(irq_happened & PACA_IRQ_HARD_DIS)) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (!(mfmsr() & MSR_EE)) { + /* + * An interrupt could have come in and cleared + * MSR[EE] and set IRQ_HARD_DIS, so check + * IRQ_HARD_DIS again and warn if it is still + * clear. + */ + irq_happened = get_irq_happened(); + WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); + } + } + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + } else { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (WARN_ON_ONCE(mfmsr() & MSR_EE)) + __hard_irq_disable(); + } + } + + /* + * Disable preempt here, so that the below preempt_enable will + * perform resched if required (a replayed interrupt may set + * need_resched). + */ + preempt_disable(); + irq_soft_mask_set(IRQS_ALL_DISABLED); + trace_hardirqs_off(); + + replay_soft_interrupts_irqrestore(); + local_paca->irq_happened = 0; + + trace_hardirqs_on(); + irq_soft_mask_set(IRQS_ENABLED); + __hard_irq_enable(); + preempt_enable(); +} +EXPORT_SYMBOL(arch_local_irq_restore); + +/* + * This is a helper to use when about to go into idle low-power + * when the latter has the side effect of re-enabling interrupts + * (such as calling H_CEDE under pHyp). + * + * You call this function with interrupts soft-disabled (this is + * already the case when ppc_md.power_save is called). The function + * will return whether to enter power save or just return. + * + * In the former case, it will have notified lockdep of interrupts + * being re-enabled and generally sanitized the lazy irq state, + * and in the latter case it will leave with interrupts hard + * disabled and marked as such, so the local_irq_enable() call + * in arch_cpu_idle() will properly re-enable everything. + */ +bool prep_irq_for_idle(void) +{ + /* + * First we need to hard disable to ensure no interrupt + * occurs before we effectively enter the low power state + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* + * If anything happened while we were soft-disabled, + * we return now and do not enter the low power state. + */ + if (lazy_irq_pending()) + return false; + + /* Tell lockdep we are about to re-enable */ + trace_hardirqs_on(); + + /* + * Mark interrupts as soft-enabled and clear the + * PACA_IRQ_HARD_DIS from the pending mask since we + * are about to hard enable as well as a side effect + * of entering the low power state. + */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + irq_soft_mask_set(IRQS_ENABLED); + + /* Tell the caller to enter the low power state */ + return true; +} + +#ifdef CONFIG_PPC_BOOK3S +/* + * This is for idle sequences that return with IRQs off, but the + * idle state itself wakes on interrupt. Tell the irq tracer that + * IRQs are enabled for the duration of idle so it does not get long + * off times. Must be paired with fini_irq_for_idle_irqsoff. + */ +bool prep_irq_for_idle_irqsoff(void) +{ + WARN_ON(!irqs_disabled()); + + /* + * First we need to hard disable to ensure no interrupt + * occurs before we effectively enter the low power state + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* + * If anything happened while we were soft-disabled, + * we return now and do not enter the low power state. + */ + if (lazy_irq_pending()) + return false; + + /* Tell lockdep we are about to re-enable */ + trace_hardirqs_on(); + + return true; +} + +/* + * Take the SRR1 wakeup reason, index into this table to find the + * appropriate irq_happened bit. + * + * Sytem reset exceptions taken in idle state also come through here, + * but they are NMI interrupts so do not need to wait for IRQs to be + * restored, and should be taken as early as practical. These are marked + * with 0xff in the table. The Power ISA specifies 0100b as the system + * reset interrupt reason. + */ +#define IRQ_SYSTEM_RESET 0xff + +static const u8 srr1_to_lazyirq[0x10] = { + 0, 0, 0, + PACA_IRQ_DBELL, + IRQ_SYSTEM_RESET, + PACA_IRQ_DBELL, + PACA_IRQ_DEC, + 0, + PACA_IRQ_EE, + PACA_IRQ_EE, + PACA_IRQ_HMI, + 0, 0, 0, 0, 0 }; + +void replay_system_reset(void) +{ + struct pt_regs regs; + + ppc_save_regs(®s); + regs.trap = 0x100; + get_paca()->in_nmi = 1; + system_reset_exception(®s); + get_paca()->in_nmi = 0; +} +EXPORT_SYMBOL_GPL(replay_system_reset); + +void irq_set_pending_from_srr1(unsigned long srr1) +{ + unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18; + u8 reason = srr1_to_lazyirq[idx]; + + /* + * Take the system reset now, which is immediately after registers + * are restored from idle. It's an NMI, so interrupts need not be + * re-enabled before it is taken. + */ + if (unlikely(reason == IRQ_SYSTEM_RESET)) { + replay_system_reset(); + return; + } + + if (reason == PACA_IRQ_DBELL) { + /* + * When doorbell triggers a system reset wakeup, the message + * is not cleared, so if the doorbell interrupt is replayed + * and the IPI handled, the doorbell interrupt would still + * fire when EE is enabled. + * + * To avoid taking the superfluous doorbell interrupt, + * execute a msgclr here before the interrupt is replayed. + */ + ppc_msgclr(PPC_DBELL_MSGTYPE); + } + + /* + * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0, + * so this can be called unconditionally with the SRR1 wake + * reason as returned by the idle code, which uses 0 to mean no + * interrupt. + * + * If a future CPU was to designate this as an interrupt reason, + * then a new index for no interrupt must be assigned. + */ + local_paca->irq_happened |= reason; +} +#endif /* CONFIG_PPC_BOOK3S */ + +/* + * Force a replay of the external interrupt handler on this CPU. + */ +void force_external_irq_replay(void) +{ + /* + * This must only be called with interrupts soft-disabled, + * the replay will happen when re-enabling. + */ + WARN_ON(!arch_irqs_disabled()); + + /* + * Interrupts must always be hard disabled before irq_happened is + * modified (to prevent lost update in case of interrupt between + * load and store). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* Indicate in the PACA that we have an interrupt to replay */ + local_paca->irq_happened |= PACA_IRQ_EE; +} + +static int __init setup_noirqdistrib(char *str) +{ + distribute_irqs = 0; + return 1; +} + +__setup("noirqdistrib", setup_noirqdistrib); -- cgit From 98552307e3a72d480e72744bf5da2a865822f496 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 18 May 2022 09:40:16 +0200 Subject: powerpc/irq64: Remove get_irq_happened() No need to open code the read of local_paca->irq_happened in assembly, we have READ_ONCE() for doing the same. Replace get_irq_happened() by READ_ONCE(local_paca->irq_happened). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/af511b53e4eb51f8fbc51eda7f5597175e68dce6.1652859593.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/irq_64.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index 56dd7d94e661..01645e03e9f0 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -68,16 +68,6 @@ int distribute_irqs = 1; -static inline notrace unsigned long get_irq_happened(void) -{ - unsigned long happened; - - __asm__ __volatile__("lbz %0,%1(13)" - : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); - - return happened; -} - void replay_soft_interrupts(void) { struct pt_regs regs; @@ -234,7 +224,7 @@ notrace void arch_local_irq_restore(unsigned long mask) return; happened: - irq_happened = get_irq_happened(); + irq_happened = READ_ONCE(local_paca->irq_happened); if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(!irq_happened); @@ -257,7 +247,7 @@ happened: * IRQ_HARD_DIS again and warn if it is still * clear. */ - irq_happened = get_irq_happened(); + irq_happened = READ_ONCE(local_paca->irq_happened); WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); } } -- cgit From ef5b570d3700fbb8628a58da0487486ceeb713cd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 18 May 2022 10:32:27 +0200 Subject: powerpc/irq: Don't open code irq_soft_mask helpers Use READ_ONCE() and WRITE_ONCE() instead of open coding read and write of local PACA irq_soft_mask. For the write, add a barrier to keep the memory clobber that was there previously. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e2454434992cc932a5a34b695ae981c0b2f4c28e.1652862729.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/hw_irq.h | 43 +++++++-------------------------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 674e5aaafcbd..edc569481faf 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -113,14 +113,7 @@ static inline void __hard_RI_enable(void) static inline notrace unsigned long irq_soft_mask_return(void) { - unsigned long flags; - - asm volatile( - "lbz %0,%1(13)" - : "=r" (flags) - : "i" (offsetof(struct paca_struct, irq_soft_mask))); - - return flags; + return READ_ONCE(local_paca->irq_soft_mask); } /* @@ -148,46 +141,24 @@ static inline notrace void irq_soft_mask_set(unsigned long mask) WARN_ON(mask && !(mask & IRQS_DISABLED)); #endif - asm volatile( - "stb %0,%1(13)" - : - : "r" (mask), - "i" (offsetof(struct paca_struct, irq_soft_mask)) - : "memory"); + WRITE_ONCE(local_paca->irq_soft_mask, mask); + barrier(); } static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask) { - unsigned long flags; - -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON(mask && !(mask & IRQS_DISABLED)); -#endif + unsigned long flags = irq_soft_mask_return(); - asm volatile( - "lbz %0,%1(13); stb %2,%1(13)" - : "=&r" (flags) - : "i" (offsetof(struct paca_struct, irq_soft_mask)), - "r" (mask) - : "memory"); + irq_soft_mask_set(mask); return flags; } static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask) { - unsigned long flags, tmp; - - asm volatile( - "lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)" - : "=&r" (flags), "=r" (tmp) - : "i" (offsetof(struct paca_struct, irq_soft_mask)), - "r" (mask) - : "memory"); + unsigned long flags = irq_soft_mask_return(); -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED)); -#endif + irq_soft_mask_set(flags | mask); return flags; } -- cgit From 78ffe6a7e2a169c4dcbbd08717a0a8d738659d15 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 18 May 2022 10:32:28 +0200 Subject: powerpc/irq: Replace #ifdefs by IS_ENABLED() Replace #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG and #ifdef CONFIG_PERF_EVENTS by IS_ENABLED() in hw_irq.h and plpar_wrappers.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c1ded642f8d9002767f8fed48ed6d1e76254ed73.1652862729.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/hw_irq.h | 30 ++++++++++++++---------------- arch/powerpc/include/asm/plpar_wrappers.h | 5 ++--- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index edc569481faf..6efab00aa1c8 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -123,7 +123,6 @@ static inline notrace unsigned long irq_soft_mask_return(void) */ static inline notrace void irq_soft_mask_set(unsigned long mask) { -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG /* * The irq mask must always include the STD bit if any are set. * @@ -138,8 +137,8 @@ static inline notrace void irq_soft_mask_set(unsigned long mask) * unmasks to be replayed, among other things. For now, take * the simple approach. */ - WARN_ON(mask && !(mask & IRQS_DISABLED)); -#endif + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON(mask && !(mask & IRQS_DISABLED)); WRITE_ONCE(local_paca->irq_soft_mask, mask); barrier(); @@ -324,11 +323,13 @@ bool power_pmu_wants_prompt_pmi(void); */ static inline bool should_hard_irq_enable(void) { -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); - WARN_ON(mfmsr() & MSR_EE); -#endif -#ifdef CONFIG_PERF_EVENTS + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); + WARN_ON(mfmsr() & MSR_EE); + } + + if (!IS_ENABLED(CONFIG_PERF_EVENTS)) + return false; /* * If the PMU is not running, there is not much reason to enable * MSR[EE] in irq handlers because any interrupts would just be @@ -343,9 +344,6 @@ static inline bool should_hard_irq_enable(void) return false; return true; -#else - return false; -#endif } /* @@ -353,11 +351,11 @@ static inline bool should_hard_irq_enable(void) */ static inline void do_hard_irq_enable(void) { -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); - WARN_ON(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK); - WARN_ON(mfmsr() & MSR_EE); -#endif + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); + WARN_ON(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK); + WARN_ON(mfmsr() & MSR_EE); + } /* * This allows PMI interrupts (and watchdog soft-NMIs) through. * There is no other reason to enable this way. diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 83e0f701ebc6..8239c0af5eb2 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -43,11 +43,10 @@ static inline long extended_cede_processor(unsigned long latency_hint) set_cede_latency_hint(latency_hint); rc = cede_processor(); -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + /* Ensure that H_CEDE returns with IRQs on */ - if (WARN_ON(!(mfmsr() & MSR_EE))) + if (WARN_ON(IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && !(mfmsr() & MSR_EE))) __hard_irq_enable(); -#endif set_cede_latency_hint(old_latency_hint); -- cgit From 077fc62b2b66a95af43dbb363fb8e932999812d3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 18 May 2022 10:48:55 +0200 Subject: powerpc/irq: remove inline assembly in hard_irq_disable macro Use WRITE_ONCE() instead of opencoding the saving of current stack pointeur. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9f05937d8722ddd2064a7c2362d8f9000e15e1ba.1652863723.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/hw_irq.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 6efab00aa1c8..26ede09c521d 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -282,9 +282,7 @@ static inline bool pmi_irq_pending(void) flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ if (!arch_irqs_disabled_flags(flags)) { \ - asm ("stdx %%r1, 0, %1 ;" \ - : "=m" (local_paca->saved_r1) \ - : "b" (&local_paca->saved_r1)); \ + WRITE_ONCE(local_paca->saved_r1, current_stack_pointer);\ trace_hardirqs_off(); \ } \ } while(0) -- cgit From 41f20d6db2b64677225bb0b97df956241c353ef8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 3 Jun 2022 15:08:14 +0200 Subject: powerpc/irq: Increase stack_overflow detection limit when KASAN is enabled When KASAN is enabled, as shown by the Oops below, the 2k limit is not enough to allow stack dump after a stack overflow detection when CONFIG_DEBUG_STACKOVERFLOW is selected: do_IRQ: stack overflow: 1984 CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1 Call Trace: Oops: Kernel stack overflow, sig: 11 [#1] BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac Modules linked in: sr_mod cdrom radeon(+) ohci_pci(+) hwmon i2c_algo_bit drm_ttm_helper ttm drm_dp_helper snd_aoa_i2sbus snd_aoa_soundbus snd_pcm ehci_pci snd_timer ohci_hcd snd ssb ehci_hcd 8250_pci soundcore drm_kms_helper pcmcia 8250 pcmcia_core syscopyarea usbcore sysfillrect 8250_base sysimgblt serial_mctrl_gpio fb_sys_fops usb_common pkcs8_key_parser fuse drm drm_panel_orientation_quirks configfs CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1 NIP: c02e5558 LR: c07eb3bc CTR: c07f46a8 REGS: e7fe9f50 TRAP: 0000 Not tainted (5.18.0-gentoo-PMacG4) MSR: 00001032 CR: 44a14824 XER: 20000000 GPR00: c07eb3bc eaa1c000 c26baea0 eaa1c0a0 00000008 00000000 c07eb3bc eaa1c010 GPR08: eaa1c0a8 04f3f3f3 f1f1f1f1 c07f4c84 44a14824 0080f7e4 00000005 00000010 GPR16: 00000025 eaa1c154 eaa1c158 c0dbad64 00000020 fd543810 eaa1c0a0 eaa1c29e GPR24: c0dbad44 c0db8740 05ffffff fd543802 eaa1c150 c0c9a3c0 eaa1c0a0 c0c9a3c0 NIP [c02e5558] kasan_check_range+0xc/0x2b4 LR [c07eb3bc] format_decode+0x80/0x604 Call Trace: [eaa1c000] [c07eb3bc] format_decode+0x80/0x604 (unreliable) [eaa1c070] [c07f4dac] vsnprintf+0x128/0x938 [eaa1c110] [c07f5788] sprintf+0xa0/0xc0 [eaa1c180] [c0154c1c] __sprint_symbol.constprop.0+0x170/0x198 [eaa1c230] [c07ee71c] symbol_string+0xf8/0x260 [eaa1c430] [c07f46d0] pointer+0x15c/0x710 [eaa1c4b0] [c07f4fbc] vsnprintf+0x338/0x938 [eaa1c550] [c00e8fa0] vprintk_store+0x2a8/0x678 [eaa1c690] [c00e94e4] vprintk_emit+0x174/0x378 [eaa1c6d0] [c00ea008] _printk+0x9c/0xc0 [eaa1c750] [c000ca94] show_stack+0x21c/0x260 [eaa1c7a0] [c07d0bd4] dump_stack_lvl+0x60/0x90 [eaa1c7c0] [c0009234] __do_IRQ+0x170/0x174 [eaa1c800] [c0009258] do_IRQ+0x20/0x34 [eaa1c820] [c00045b4] HardwareInterrupt_virt+0x108/0x10c ... An investigation shows that on PPC32, calling dump_stack() requires more than 1k when KASAN is not selected and a bit more than 2k bytes when KASAN is selected. On PPC64 the registers are twice the size of PPC32 registers, so the need should be approximately twice the need on PPC32. In the meantime we have THREAD_SIZE which is twice larger on PPC64 than PPC32, and twice larger when KASAN is selected. So we can easily use the value of THREAD_SIZE to set the limit. On PPC32, THREAD_SIZE is 8k without KASAN and 16k with KASAN. On PPC64, THREAD_SIZE is 16k without KASAN. To be on the safe side, leave 2k on PPC32 without KASAN, 4k with KASAN, and 4k on PPC64 without KASAN. It means the limit should be one fourth of THREAD_SIZE. Reported-by: Erhard Furtner Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e8b4eb82a126c3c6c352173a544fe94609ff660b.1654261687.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4abfe6bd6622..bf7078b8a765 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -194,8 +194,8 @@ static inline void check_stack_overflow(void) sp = current_stack_pointer & (THREAD_SIZE - 1); - /* check for stack overflow: is there less than 2KB free? */ - if (unlikely(sp < 2048)) { + /* check for stack overflow: is there less than 1/4th free? */ + if (unlikely(sp < THREAD_SIZE / 4)) { pr_err("do_IRQ: stack overflow: %ld\n", sp); dump_stack(); } -- cgit From 051bd351a2ef9c69753dc9cf6bd396986f27778c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 9 Jun 2022 12:16:40 +0200 Subject: powerpc/irq: Make __do_irq() static Since commit 48cf12d88969 ("powerpc/irq: Inline call_do_irq() and call_do_softirq()"), __do_irq() is not used outside irq.c Reorder functions and make __do_irq() static and drop the declaration in irq.h. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/adbe1c8315ec2d63259f41468e82e51677bb1eda.1654769775.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/irq.h | 1 - arch/powerpc/kernel/irq.c | 46 +++++++++++++++++++++--------------------- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 13f0409dd617..5c1516a5ba8f 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -54,7 +54,6 @@ extern void *softirq_ctx[NR_CPUS]; void __do_IRQ(struct pt_regs *regs); extern void __init init_IRQ(void); -extern void __do_irq(struct pt_regs *regs); int irq_choose_cpu(const struct cpumask *mask); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index bf7078b8a765..037db84026a1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -220,31 +220,9 @@ static __always_inline void call_do_softirq(const void *sp) ); } -static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) -{ - register unsigned long r3 asm("r3") = (unsigned long)regs; - - /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ - asm volatile ( - PPC_STLU " %%r1, %[offset](%[sp]) ;" - "mr %%r1, %[sp] ;" - "bl %[callee] ;" - PPC_LL " %%r1, 0(%%r1) ;" - : // Outputs - "+r" (r3) - : // Inputs - [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), - [callee] "i" (__do_irq) - : // Clobbers - "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", - "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", - "r11", "r12" - ); -} - DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq); -void __do_irq(struct pt_regs *regs) +static void __do_irq(struct pt_regs *regs) { unsigned int irq; @@ -270,6 +248,28 @@ void __do_irq(struct pt_regs *regs) trace_irq_exit(regs); } +static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) +{ + register unsigned long r3 asm("r3") = (unsigned long)regs; + + /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ + asm volatile ( + PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r1, %[sp] ;" + "bl %[callee] ;" + PPC_LL " %%r1, 0(%%r1) ;" + : // Outputs + "+r" (r3) + : // Inputs + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [callee] "i" (__do_irq) + : // Clobbers + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", + "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" + ); +} + void __do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); -- cgit From e90855be9e90e4a046d2be817a31fae6637415a4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 9 Jun 2022 12:16:41 +0200 Subject: powerpc/irq: Perform stack_overflow detection after switching to IRQ stack When KASAN is enabled, as shown by the Oops below, the 2k limit is not enough to allow stack dump after a stack overflow detection when CONFIG_DEBUG_STACKOVERFLOW is selected: do_IRQ: stack overflow: 1984 CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1 Call Trace: Oops: Kernel stack overflow, sig: 11 [#1] BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac Modules linked in: sr_mod cdrom radeon(+) ohci_pci(+) hwmon i2c_algo_bit drm_ttm_helper ttm drm_dp_helper snd_aoa_i2sbus snd_aoa_soundbus snd_pcm ehci_pci snd_timer ohci_hcd snd ssb ehci_hcd 8250_pci soundcore drm_kms_helper pcmcia 8250 pcmcia_core syscopyarea usbcore sysfillrect 8250_base sysimgblt serial_mctrl_gpio fb_sys_fops usb_common pkcs8_key_parser fuse drm drm_panel_orientation_quirks configfs CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1 NIP: c02e5558 LR: c07eb3bc CTR: c07f46a8 REGS: e7fe9f50 TRAP: 0000 Not tainted (5.18.0-gentoo-PMacG4) MSR: 00001032 CR: 44a14824 XER: 20000000 GPR00: c07eb3bc eaa1c000 c26baea0 eaa1c0a0 00000008 00000000 c07eb3bc eaa1c010 GPR08: eaa1c0a8 04f3f3f3 f1f1f1f1 c07f4c84 44a14824 0080f7e4 00000005 00000010 GPR16: 00000025 eaa1c154 eaa1c158 c0dbad64 00000020 fd543810 eaa1c0a0 eaa1c29e GPR24: c0dbad44 c0db8740 05ffffff fd543802 eaa1c150 c0c9a3c0 eaa1c0a0 c0c9a3c0 NIP [c02e5558] kasan_check_range+0xc/0x2b4 LR [c07eb3bc] format_decode+0x80/0x604 Call Trace: [eaa1c000] [c07eb3bc] format_decode+0x80/0x604 (unreliable) [eaa1c070] [c07f4dac] vsnprintf+0x128/0x938 [eaa1c110] [c07f5788] sprintf+0xa0/0xc0 [eaa1c180] [c0154c1c] __sprint_symbol.constprop.0+0x170/0x198 [eaa1c230] [c07ee71c] symbol_string+0xf8/0x260 [eaa1c430] [c07f46d0] pointer+0x15c/0x710 [eaa1c4b0] [c07f4fbc] vsnprintf+0x338/0x938 [eaa1c550] [c00e8fa0] vprintk_store+0x2a8/0x678 [eaa1c690] [c00e94e4] vprintk_emit+0x174/0x378 [eaa1c6d0] [c00ea008] _printk+0x9c/0xc0 [eaa1c750] [c000ca94] show_stack+0x21c/0x260 [eaa1c7a0] [c07d0bd4] dump_stack_lvl+0x60/0x90 [eaa1c7c0] [c0009234] __do_IRQ+0x170/0x174 [eaa1c800] [c0009258] do_IRQ+0x20/0x34 [eaa1c820] [c00045b4] HardwareInterrupt_virt+0x108/0x10c ... As the detection is asynchronously performed at IRQs, we could be long after the limit has been crossed, so increasing the limit would not solve the problem completely. In order to be sure that there is enough stack space for the stack dump, do it after the switch to the IRQ stack. That way it is sure that the stack is large enough, unless the IRQ stack has been overfilled in which case the end of life is close. Reported-by: Erhard Furtner Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c215d714329f475b431a6193369035aadfc0d182.1654769775.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/irq.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 037db84026a1..56d165c186c7 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -185,14 +185,12 @@ u64 arch_irq_stat_cpu(unsigned int cpu) return sum; } -static inline void check_stack_overflow(void) +static inline void check_stack_overflow(unsigned long sp) { - long sp; - if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW)) return; - sp = current_stack_pointer & (THREAD_SIZE - 1); + sp &= THREAD_SIZE - 1; /* check for stack overflow: is there less than 1/4th free? */ if (unlikely(sp < THREAD_SIZE / 4)) { @@ -222,12 +220,14 @@ static __always_inline void call_do_softirq(const void *sp) DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq); -static void __do_irq(struct pt_regs *regs) +static void __do_irq(struct pt_regs *regs, unsigned long oldsp) { unsigned int irq; trace_irq_entry(regs); + check_stack_overflow(oldsp); + /* * Query the platform PIC for the interrupt & ack it. * @@ -255,6 +255,7 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ asm volatile ( PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r4, %%r1 ;" "mr %%r1, %[sp] ;" "bl %[callee] ;" PPC_LL " %%r1, 0(%%r1) ;" @@ -280,11 +281,9 @@ void __do_IRQ(struct pt_regs *regs) irqsp = hardirq_ctx[raw_smp_processor_id()]; sirqsp = softirq_ctx[raw_smp_processor_id()]; - check_stack_overflow(); - /* Already there ? */ if (unlikely(cursp == irqsp || cursp == sirqsp)) { - __do_irq(regs); + __do_irq(regs, current_stack_pointer); set_irq_regs(old_regs); return; } -- cgit From 78f1c24abd16952d383f34eefbb0af7bb53f9b0b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 9 Jun 2022 12:16:42 +0200 Subject: powerpc/irq: Simplify __do_irq() Remove duplicated code by implementing a proper if/else. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5a3b21311191f1240850db6ab29b19ac7885fe03.1654769775.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/irq.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 56d165c186c7..d50a18888bd9 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -281,14 +281,11 @@ void __do_IRQ(struct pt_regs *regs) irqsp = hardirq_ctx[raw_smp_processor_id()]; sirqsp = softirq_ctx[raw_smp_processor_id()]; - /* Already there ? */ - if (unlikely(cursp == irqsp || cursp == sirqsp)) { + /* Already there ? If not switch stack and call */ + if (unlikely(cursp == irqsp || cursp == sirqsp)) __do_irq(regs, current_stack_pointer); - set_irq_regs(old_regs); - return; - } - /* Switch stack and call */ - call_do_irq(regs, irqsp); + else + call_do_irq(regs, irqsp); set_irq_regs(old_regs); } -- cgit From 92f89ec1b534b6eca2b81bae97d30a786932f51a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 11 Jun 2022 08:51:57 +0200 Subject: powerpc: Restore CONFIG_DEBUG_INFO in defconfigs Commit f9b3cd245784 ("Kconfig.debug: make DEBUG_INFO selectable from a choice") broke the selection of CONFIG_DEBUG_INFO by powerpc defconfigs. It is now necessary to select one of the three DEBUG_INFO_DWARF* options to get DEBUG_INFO enabled. Replace DEBUG_INFO=y by DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y in all defconfigs using the following command: sed -i s/DEBUG_INFO=y/DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y/g `git grep -l DEBUG_INFO arch/powerpc/configs/` Fixes: f9b3cd245784 ("Kconfig.debug: make DEBUG_INFO selectable from a choice") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/98a4c2603bf9e4b776e219f5b8541d23aa24e854.1654930308.git.christophe.leroy@csgroup.eu --- arch/powerpc/configs/44x/akebono_defconfig | 2 +- arch/powerpc/configs/44x/currituck_defconfig | 2 +- arch/powerpc/configs/44x/fsp2_defconfig | 2 +- arch/powerpc/configs/44x/iss476-smp_defconfig | 2 +- arch/powerpc/configs/44x/warp_defconfig | 2 +- arch/powerpc/configs/52xx/lite5200b_defconfig | 2 +- arch/powerpc/configs/52xx/motionpro_defconfig | 2 +- arch/powerpc/configs/52xx/tqm5200_defconfig | 2 +- arch/powerpc/configs/adder875_defconfig | 2 +- arch/powerpc/configs/ep8248e_defconfig | 2 +- arch/powerpc/configs/ep88xc_defconfig | 2 +- arch/powerpc/configs/fsl-emb-nonhw.config | 2 +- arch/powerpc/configs/mgcoge_defconfig | 2 +- arch/powerpc/configs/mpc5200_defconfig | 2 +- arch/powerpc/configs/mpc8272_ads_defconfig | 2 +- arch/powerpc/configs/mpc885_ads_defconfig | 2 +- arch/powerpc/configs/ppc6xx_defconfig | 2 +- arch/powerpc/configs/pq2fads_defconfig | 2 +- arch/powerpc/configs/ps3_defconfig | 2 +- arch/powerpc/configs/tqm8xx_defconfig | 2 +- 20 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 4bc549c6edc5..fde4824f235e 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -118,7 +118,7 @@ CONFIG_CRAMFS=y CONFIG_NLS_DEFAULT="n" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_XMON=y diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig index 717827219921..7283b7d4a1a5 100644 --- a/arch/powerpc/configs/44x/currituck_defconfig +++ b/arch/powerpc/configs/44x/currituck_defconfig @@ -73,7 +73,7 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NLS_DEFAULT="n" -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_XMON=y diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig index 8da316e61a08..3fdfbb29b854 100644 --- a/arch/powerpc/configs/44x/fsp2_defconfig +++ b/arch/powerpc/configs/44x/fsp2_defconfig @@ -110,7 +110,7 @@ CONFIG_XZ_DEC=y CONFIG_PRINTK_TIME=y CONFIG_MESSAGE_LOGLEVEL_DEFAULT=3 CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_CRYPTO_CBC=y diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index c11e777b2f3d..0f6380e1e612 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -56,7 +56,7 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_PPC_EARLY_DEBUG=y diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index 47252c2d7669..20891c413149 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -88,7 +88,7 @@ CONFIG_NLS_UTF8=y CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig index 63368e677506..7db479dcbc0c 100644 --- a/arch/powerpc/configs/52xx/lite5200b_defconfig +++ b/arch/powerpc/configs/52xx/lite5200b_defconfig @@ -58,6 +58,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig index 72762da94846..6186ead1e105 100644 --- a/arch/powerpc/configs/52xx/motionpro_defconfig +++ b/arch/powerpc/configs/52xx/motionpro_defconfig @@ -84,7 +84,7 @@ CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_CRYPTO_ECB=y diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig index a3c8ca74032c..e6735b945327 100644 --- a/arch/powerpc/configs/52xx/tqm5200_defconfig +++ b/arch/powerpc/configs/52xx/tqm5200_defconfig @@ -85,7 +85,7 @@ CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_CRYPTO_ECB=y diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index 5326bc739279..7f35d5bc1229 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -45,7 +45,7 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_CRC32_SLICEBY4=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig index 00d69965f898..8df6d3a293e3 100644 --- a/arch/powerpc/configs/ep8248e_defconfig +++ b/arch/powerpc/configs/ep8248e_defconfig @@ -59,7 +59,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set CONFIG_BDI_SWITCH=y diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index f5c3e72da719..a98ef6a4abef 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -48,6 +48,6 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_CRC32_SLICEBY4=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index df37efed0aec..f14c6dbd7346 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -24,7 +24,7 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig index dcc8dccf54f3..498d35db7833 100644 --- a/arch/powerpc/configs/mgcoge_defconfig +++ b/arch/powerpc/configs/mgcoge_defconfig @@ -73,7 +73,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig index 83d801307178..c0fe5e76604a 100644 --- a/arch/powerpc/configs/mpc5200_defconfig +++ b/arch/powerpc/configs/mpc5200_defconfig @@ -122,6 +122,6 @@ CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/mpc8272_ads_defconfig b/arch/powerpc/configs/mpc8272_ads_defconfig index 00a4d2bf43b2..4145ef5689ca 100644 --- a/arch/powerpc/configs/mpc8272_ads_defconfig +++ b/arch/powerpc/configs/mpc8272_ads_defconfig @@ -67,7 +67,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_BDI_SWITCH=y diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index c74dc76b1d0d..700115d85d6f 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -71,7 +71,7 @@ CONFIG_ROOT_NFS=y CONFIG_CRYPTO=y CONFIG_CRYPTO_DEV_TALITOS=y CONFIG_CRC32_SLICEBY4=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_VM_PGTABLE=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index b622ecd73286..91967824272e 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1065,7 +1065,7 @@ CONFIG_NLS_ISO8859_14=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_HEADERS_INSTALL=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig index 9d8a76857c6f..9d63e2e65211 100644 --- a/arch/powerpc/configs/pq2fads_defconfig +++ b/arch/powerpc/configs/pq2fads_defconfig @@ -68,7 +68,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 7c95fab4b920..2d9ac233da68 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -153,7 +153,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_STACKOVERFLOW=y diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index 77857d513022..083c2e57520a 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -55,6 +55,6 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_CRC32_SLICEBY4=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y -- cgit From 1ce844973bb516e95d3f2bcb001a3992548def9d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 14 Jun 2022 12:32:23 +0200 Subject: powerpc/32: Remove the 'nobats' kernel parameter Mapping without BATs doesn't bring any added value to the user. Remove that option. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6977314c823cfb728bc0273cea634b41807bfb64.1655202721.git.christophe.leroy@csgroup.eu --- Documentation/admin-guide/kernel-parameters.txt | 3 --- arch/powerpc/mm/book3s32/mmu.c | 2 +- arch/powerpc/mm/init_32.c | 11 ----------- arch/powerpc/mm/mmu_decl.h | 1 - arch/powerpc/platforms/83xx/misc.c | 14 ++++++-------- 5 files changed, 7 insertions(+), 24 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8090130b544b..96de3f1ece00 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3495,9 +3495,6 @@ noautogroup Disable scheduler automatic task group creation. - nobats [PPC] Do not use BATs for mapping kernel lowmem - on "Classic" PPC cores. - nocache [ARM] nodsp [SH] Disable hardware DSP at boot time. diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 49a737fbbd18..1794132db31e 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -161,7 +161,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; - if (debug_pagealloc_enabled_or_kfence() || __map_without_bats) { + if (debug_pagealloc_enabled_or_kfence()) { pr_debug_once("Read-Write memory mapped without BATs\n"); if (base >= border) return base; diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 693a3a7a9463..321794747ea1 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -69,12 +69,6 @@ EXPORT_SYMBOL(agp_special_page); void MMU_init(void); -/* - * this tells the system to map all of ram with the segregs - * (i.e. page tables) instead of the bats. - * -- Cort - */ -int __map_without_bats; int __map_without_ltlbs; /* max amount of low RAM to map in */ @@ -85,11 +79,6 @@ unsigned long __max_low_memory = MAX_LOW_MEM; */ static void __init MMU_setup(void) { - /* Check for nobats option (used in mapin_ram). */ - if (strstr(boot_command_line, "nobats")) { - __map_without_bats = 1; - } - if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 63c4b1a4d435..229c72e49198 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -92,7 +92,6 @@ extern void mapin_ram(void); extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot); -extern int __map_without_bats; extern unsigned int rtas_data, rtas_size; struct hash_pte; diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index 3285dabcf923..2fb2a85d131f 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -121,17 +121,15 @@ void __init mpc83xx_setup_pci(void) void __init mpc83xx_setup_arch(void) { + phys_addr_t immrbase = get_immrbase(); + int immrsize = IS_ALIGNED(immrbase, SZ_2M) ? SZ_2M : SZ_1M; + unsigned long va = fix_to_virt(FIX_IMMR_BASE); + if (ppc_md.progress) ppc_md.progress("mpc83xx_setup_arch()", 0); - if (!__map_without_bats) { - phys_addr_t immrbase = get_immrbase(); - int immrsize = IS_ALIGNED(immrbase, SZ_2M) ? SZ_2M : SZ_1M; - unsigned long va = fix_to_virt(FIX_IMMR_BASE); - - setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG); - update_bats(); - } + setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG); + update_bats(); } int machine_check_83xx(struct pt_regs *regs) -- cgit From 56e54b4e6c477b2a7df43f9a320ae5f9a5bfb16c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 14 Jun 2022 12:32:24 +0200 Subject: powerpc/32: Remove 'noltlbs' kernel parameter Mapping without large TLBs has no added value on the 8xx. Mapping without large TLBs is still necessary on 40x when selecting CONFIG_KFENCE or CONFIG_DEBUG_PAGEALLOC or CONFIG_STRICT_KERNEL_RWX, but this is done automatically and doesn't require user selection. Remove 'noltlbs' kernel parameter, the user has no reason to use it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/80ca17bd39cf608a8ebd0764d7064a498e131199.1655202721.git.christophe.leroy@csgroup.eu --- Documentation/admin-guide/kernel-parameters.txt | 3 --- arch/powerpc/mm/init_32.c | 3 --- arch/powerpc/mm/nohash/8xx.c | 9 --------- 3 files changed, 15 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 96de3f1ece00..2322e429150d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3661,9 +3661,6 @@ nolapic_timer [X86-32,APIC] Do not use the local APIC timer. - noltlbs [PPC] Do not use large page/tlb entries for kernel - lowmem mapping on PPC40x and PPC8xx - nomca [IA-64] Disable machine check abort handling nomce [X86-32] Disable Machine Check Exception diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 321794747ea1..6f2e6210c273 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -79,9 +79,6 @@ unsigned long __max_low_memory = MAX_LOW_MEM; */ static void __init MMU_setup(void) { - if (strstr(boot_command_line, "noltlbs")) { - __map_without_ltlbs = 1; - } if (IS_ENABLED(CONFIG_PPC_8xx)) return; diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 27f9186ae374..6b668ccef836 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -14,8 +14,6 @@ #define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) -extern int __map_without_ltlbs; - static unsigned long block_mapped_ram; /* @@ -28,8 +26,6 @@ phys_addr_t v_block_mapped(unsigned long va) if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE) return p + va - VIRT_IMMR_BASE; - if (__map_without_ltlbs) - return 0; if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram) return __pa(va); return 0; @@ -45,8 +41,6 @@ unsigned long p_block_mapped(phys_addr_t pa) if (pa >= p && pa < p + IMMR_SIZE) return VIRT_IMMR_BASE + pa - p; - if (__map_without_ltlbs) - return 0; if (pa < block_mapped_ram) return (unsigned long)__va(pa); return 0; @@ -153,9 +147,6 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) mmu_mapin_immr(); - if (__map_without_ltlbs) - return 0; - mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); if (debug_pagealloc_enabled_or_kfence()) { top = boundary; -- cgit From 513f5bbac7b9ca7046bc350dd6eb39b957e338a7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 14 Jun 2022 12:32:25 +0200 Subject: powerpc/32: Remove __map_without_ltlbs __map_without_ltlbs is used only for 40x, and only when STRICT_KERNEL_RWX, KFENCE or DEBUG_PAGEALLOC is active. Do the verification directly in 40x version of mmu_mapin_ram() and remove __map_without_ltlbs from core ppc32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3422094db965d218c4c3d8580f526963a9ac897f.1655202721.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/init_32.c | 23 ----------------------- arch/powerpc/mm/nohash/40x.c | 9 +++++++-- 2 files changed, 7 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 6f2e6210c273..62d9af6606cd 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -69,29 +69,9 @@ EXPORT_SYMBOL(agp_special_page); void MMU_init(void); -int __map_without_ltlbs; - /* max amount of low RAM to map in */ unsigned long __max_low_memory = MAX_LOW_MEM; -/* - * Check for command-line options that affect what MMU_init will do. - */ -static void __init MMU_setup(void) -{ - if (IS_ENABLED(CONFIG_PPC_8xx)) - return; - - if (IS_ENABLED(CONFIG_KFENCE)) - __map_without_ltlbs = 1; - - if (debug_pagealloc_enabled()) - __map_without_ltlbs = 1; - - if (strict_kernel_rwx_enabled()) - __map_without_ltlbs = 1; -} - /* * MMU_init sets up the basic memory mappings for the kernel, * including both RAM and possibly some I/O regions, @@ -102,9 +82,6 @@ void __init MMU_init(void) if (ppc_md.progress) ppc_md.progress("MMU:enter", 0x111); - /* parse args from command line */ - MMU_setup(); - /* * Reserve gigantic pages for hugetlb. This MUST occur before * lowmem_end_addr is initialized below. diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c index b32e465a3d52..3684d6e570fb 100644 --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -43,7 +43,6 @@ #include -extern int __map_without_ltlbs; /* * MMU_init_hw does the chip-specific initialization of the MMU hardware. */ @@ -94,7 +93,13 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) p = 0; s = total_lowmem; - if (__map_without_ltlbs) + if (IS_ENABLED(CONFIG_KFENCE)) + return 0; + + if (debug_pagealloc_enabled()) + return 0; + + if (strict_kernel_rwx_enabled()) return 0; while (s >= LARGE_PAGE_SIZE_16M) { -- cgit From 12a9eddd239e14ccbf0ea50b3504a21bda002ba9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Jun 2022 08:21:22 +0200 Subject: powerpc: Remove _PAGE_SAO stub for book3e/64 Since commit 634093c59a12 ("powerpc/mm: enable ARCH_HAS_VM_GET_PAGE_PROT"), _PAGE_SAO is used only in arch/powerpc/mm/book3s64/pgtable.c The _PAGE_SAO stub defined as 0 for book3e/64 can be removed. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/715e644fb3c7d992c0b71f6165ab6cf8c682055a.1655706069.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/64/pgtable.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 57083f95e82b..25b72b2b30ce 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -83,8 +83,6 @@ */ #include -#define _PAGE_SAO 0 - #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) /* -- cgit From 2db2008e636327a3caa648ef1e34a9d501d20e2e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 23 Jun 2022 10:56:57 +0200 Subject: powerpc/64e: Rewrite p4d_populate() as a static inline function Rewrite p4d_populate() as a static inline function instead of a macro. This change allows typechecking and would have helped detecting a recently found bug in map_kernel_page(). Signed-off-by: Christophe Leroy Acked-by: Mike Rapoport Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1b416f8a8fe1bc3f4e01175680ce310b7eb3a1e4.1655974565.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/64/pgalloc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 668aee6017e7..e50b211becb3 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -15,7 +15,10 @@ struct vmemmap_backing { }; extern struct vmemmap_backing *vmemmap_list; -#define p4d_populate(MM, P4D, PUD) p4d_set(P4D, (unsigned long)PUD) +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) +{ + p4d_set(p4d, (unsigned long)pud); +} static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { -- cgit From dd8de84b57b02ba9c1fe530a6d916c0853f136bd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:43:35 +0200 Subject: powerpc/ptdump: Fix display of RW pages on FSL_BOOK3E On FSL_BOOK3E, _PAGE_RW is defined with two bits, one for user and one for supervisor. As soon as one of the two bits is set, the page has to be display as RW. But the way it is implemented today requires both bits to be set in order to display it as RW. Instead of display RW when _PAGE_RW bits are set and R otherwise, reverse the logic and display R when _PAGE_RW bits are all 0 and RW otherwise. This change has no impact on other platforms as _PAGE_RW is a single bit on all of them. Fixes: 8eb07b187000 ("powerpc/mm: Dump linux pagetables") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0c33b96317811edf691e81698aaee8fa45ec3449.1656427391.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ptdump/shared.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index 03607ab90c66..f884760ca5cf 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -17,9 +17,9 @@ static const struct flag_info flag_array[] = { .clear = " ", }, { .mask = _PAGE_RW, - .val = _PAGE_RW, - .set = "rw", - .clear = "r ", + .val = 0, + .set = "r ", + .clear = "rw", }, { .mask = _PAGE_EXEC, .val = _PAGE_EXEC, -- cgit From 09317643117ade87c03158341e87466413fa8f1a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:48:54 +0200 Subject: powerpc/64e: Fix early TLB miss with KUAP With KUAP, the TLB miss handler bails out when an access to user memory is performed with a nul TID. But the normal TLB miss routine which is only used early during boot does the check regardless for all memory areas, not only user memory. By chance there is no early IO or vmalloc access, but when KASAN come we will start having early TLB misses. Fix it by creating a special branch for user accesses similar to the one in the 'bolted' TLB miss handlers. Unfortunately SPRN_MAS1 is now read too early and there are no registers available to preserve it so it will be read a second time. Fixes: 57bc963837f5 ("powerpc/kuap: Wire-up KUAP on book3e/64") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8d6c5859a45935d6e1a336da4dc20be421e8cea7.1656427701.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/tlb_low_64e.S | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 8b97c4acfebf..9e9ab3803fb2 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -583,7 +583,7 @@ itlb_miss_fault_e6500: */ rlwimi r11,r14,32-19,27,27 rlwimi r11,r14,32-16,19,19 - beq normal_tlb_miss + beq normal_tlb_miss_user /* XXX replace the RMW cycles with immediate loads + writes */ 1: mfspr r10,SPRN_MAS1 cmpldi cr0,r15,8 /* Check for vmalloc region */ @@ -626,7 +626,7 @@ itlb_miss_fault_e6500: cmpldi cr0,r15,0 /* Check for user region */ std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ - beq normal_tlb_miss + beq normal_tlb_miss_user li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */ oris r11,r11,_PAGE_ACCESSED@h @@ -653,6 +653,12 @@ itlb_miss_fault_e6500: * r11 = PTE permission mask * r10 = crap (free to use) */ +normal_tlb_miss_user: +#ifdef CONFIG_PPC_KUAP + mfspr r14,SPRN_MAS1 + rlwinm. r14,r14,0,0x3fff0000 + beq- normal_tlb_miss_access_fault /* KUAP fault */ +#endif normal_tlb_miss: /* So we first construct the page table address. We do that by * shifting the bottom of the address (not the region ID) by @@ -683,11 +689,6 @@ finish_normal_tlb_miss: /* Check if required permissions are met */ andc. r15,r11,r14 bne- normal_tlb_miss_access_fault -#ifdef CONFIG_PPC_KUAP - mfspr r11,SPRN_MAS1 - rlwinm. r10,r11,0,0x3fff0000 - beq- normal_tlb_miss_access_fault /* KUAP fault */ -#endif /* Now we build the MAS: * @@ -709,9 +710,7 @@ finish_normal_tlb_miss: rldicl r10,r14,64-8,64-8 cmpldi cr0,r10,BOOK3E_PAGESZ_4K beq- 1f -#ifndef CONFIG_PPC_KUAP mfspr r11,SPRN_MAS1 -#endif rlwimi r11,r14,31,21,24 rlwinm r11,r11,0,21,19 mtspr SPRN_MAS1,r11 -- cgit From 3adfb457b84bd6de4e78a99814038fbd7205f253 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:48:55 +0200 Subject: powerpc/64e: Remove MMU_FTR_USE_TLBRSRV and MMU_FTR_USE_PAIRED_MAS Commit fb5a515704d7 ("powerpc: Remove platforms/wsp and associated pieces") removed the last CPU having features MMU_FTRS_A2 and commit cd68098bcedd ("powerpc: Clean up MMU_FTRS_A2 and MMU_FTR_TYPE_3E") removed MMU_FTRS_A2 which was the last user of MMU_FTR_USE_TLBRSRV and MMU_FTR_USE_PAIRED_MAS. Remove all code that relies on MMU_FTR_USE_TLBRSRV and MMU_FTR_USE_PAIRED_MAS. With this change done, TLB miss can happen before the mmu feature fixups. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cfd5a0ecdb1598da968832e1bddf7431ec267200.1656427701.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mmu.h | 12 ------ arch/powerpc/kernel/setup_64.c | 1 - arch/powerpc/mm/nohash/book3e_hugetlbpage.c | 30 ++++--------- arch/powerpc/mm/nohash/tlb_low_64e.S | 66 ----------------------------- 4 files changed, 8 insertions(+), 101 deletions(-) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 5f41565a1e5d..860d0290ca4d 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -96,15 +96,6 @@ */ #define MMU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x00200000) -/* Enable use of TLB reservation. Processor should support tlbsrx. - * instruction and MAS0[WQ]. - */ -#define MMU_FTR_USE_TLBRSRV ASM_CONST(0x00800000) - -/* Use paired MAS registers (MAS7||MAS3, etc.) - */ -#define MMU_FTR_USE_PAIRED_MAS ASM_CONST(0x01000000) - /* Doesn't support the B bit (1T segment) in SLBIE */ #define MMU_FTR_NO_SLBIE_B ASM_CONST(0x02000000) @@ -180,9 +171,6 @@ enum { #ifdef CONFIG_PPC_83xx MMU_FTR_NEED_DTLB_SW_LRU | #endif -#ifdef CONFIG_PPC_BOOK3E_64 - MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS | -#endif #ifdef CONFIG_PPC_BOOK3S_64 MMU_FTR_KERNEL_RO | #ifdef CONFIG_PPC_64S_HASH_MMU diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5761f08dae95..2b2d0b0fbb30 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -113,7 +113,6 @@ void __init setup_tlb_core_data(void) * Should we panic instead? */ WARN_ONCE(smt_enabled_at_boot >= 2 && - !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && book3e_htw_mode != PPC_HTW_E6500, "%s: unsupported MMU configuration\n", __func__); } diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c index 307ca919d393..c7d4b317a823 100644 --- a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c +++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c @@ -103,21 +103,11 @@ static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) int found = 0; mtspr(SPRN_MAS6, pid << 16); - if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) { - asm volatile( - "li %0,0\n" - "tlbsx. 0,%1\n" - "bne 1f\n" - "li %0,1\n" - "1:\n" - : "=&r"(found) : "r"(ea)); - } else { - asm volatile( - "tlbsx 0,%1\n" - "mfspr %0,0x271\n" - "srwi %0,%0,31\n" - : "=&r"(found) : "r"(ea)); - } + asm volatile( + "tlbsx 0,%1\n" + "mfspr %0,0x271\n" + "srwi %0,%0,31\n" + : "=&r"(found) : "r"(ea)); return found; } @@ -169,13 +159,9 @@ book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte) mtspr(SPRN_MAS1, mas1); mtspr(SPRN_MAS2, mas2); - if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) { - mtspr(SPRN_MAS7_MAS3, mas7_3); - } else { - if (mmu_has_feature(MMU_FTR_BIG_PHYS)) - mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); - mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); - } + if (mmu_has_feature(MMU_FTR_BIG_PHYS)) + mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); + mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); asm volatile ("tlbwe"); diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 9e9ab3803fb2..a59485c549a7 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -152,16 +152,7 @@ tlb_miss_common_bolted: clrrdi r15,r15,3 beq tlb_miss_fault_bolted /* No PGDIR, bail */ -BEGIN_MMU_FTR_SECTION - /* Set the TLB reservation and search for existing entry. Then load - * the entry. - */ - PPC_TLBSRX_DOT(0,R16) - ldx r14,r14,r15 /* grab pgd entry */ - beq tlb_miss_done_bolted /* tlb exists already, bail */ -MMU_FTR_SECTION_ELSE ldx r14,r14,r15 /* grab pgd entry */ -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 @@ -674,16 +665,7 @@ normal_tlb_miss: clrrdi r14,r14,3 or r10,r15,r14 -BEGIN_MMU_FTR_SECTION - /* Set the TLB reservation and search for existing entry. Then load - * the entry. - */ - PPC_TLBSRX_DOT(0,R16) ld r14,0(r10) - beq normal_tlb_miss_done -MMU_FTR_SECTION_ELSE - ld r14,0(r10) -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) finish_normal_tlb_miss: /* Check if required permissions are met */ @@ -727,13 +709,9 @@ finish_normal_tlb_miss: li r11,MAS3_SW|MAS3_UW andc r15,r15,r11 1: -BEGIN_MMU_FTR_SECTION srdi r16,r15,32 mtspr SPRN_MAS3,r15 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r15 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe @@ -809,13 +787,6 @@ virt_page_table_tlb_miss: #else 1: #endif -BEGIN_MMU_FTR_SECTION - /* Search if we already have a TLB entry for that virtual address, and - * if we do, bail out. - */ - PPC_TLBSRX_DOT(0,R16) - beq virt_page_table_tlb_miss_done -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) /* Now, we need to walk the page tables. First check if we are in * range. @@ -866,41 +837,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) clrldi r11,r15,4 /* remove region ID from RPN */ ori r10,r11,1 /* Or-in SR */ -BEGIN_MMU_FTR_SECTION srdi r16,r10,32 mtspr SPRN_MAS3,r10 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe -BEGIN_MMU_FTR_SECTION -virt_page_table_tlb_miss_done: - - /* We have overridden MAS2:EPN but currently our primary TLB miss - * handler will always restore it so that should not be an issue, - * if we ever optimize the primary handler to not write MAS2 on - * some cases, we'll have to restore MAS2:EPN here based on the - * original fault's DEAR. If we do that we have to modify the - * ITLB miss handler to also store SRR0 in the exception frame - * as DEAR. - * - * However, one nasty thing we did is we cleared the reservation - * (well, potentially we did). We do a trick here thus if we - * are not a level 0 exception (we interrupted the TLB miss) we - * offset the return address by -4 in order to replay the tlbsrx - * instruction there - */ - subf r10,r13,r12 - cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE - bne- 1f - ld r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) - addi r10,r11,-4 - std r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) -1: -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) /* Return to caller, normal case */ TLB_MISS_EPILOG_SUCCESS rfi @@ -1115,13 +1057,9 @@ htw_tlb_miss: */ ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) -BEGIN_MMU_FTR_SECTION srdi r16,r10,32 mtspr SPRN_MAS3,r10 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe @@ -1202,13 +1140,9 @@ tlb_load_linear: clrldi r10,r10,4 /* clear region bits */ ori r10,r10,MAS3_SR|MAS3_SW|MAS3_SX -BEGIN_MMU_FTR_SECTION srdi r16,r10,32 mtspr SPRN_MAS3,r10 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe -- cgit From b646c1f7f43c13510d519e3044c87aa32352fc1f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:48:56 +0200 Subject: powerpc/64e: Remove unused REGION related macros Those macros are not used anywhere. Remove them as they are soon going to be wrong and are not worth modifying as they are not used. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f0efde8cee0924c3991790042b176ac77ad35e1f.1656427701.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/64/pgtable.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 25b72b2b30ce..d29f1d65ef05 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -57,18 +57,6 @@ #define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE - FIXADDR_SIZE) #define FIXADDR_SIZE SZ_32M - -/* - * Region IDs - */ -#define REGION_SHIFT 60UL -#define REGION_MASK (0xfUL << REGION_SHIFT) -#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) - -#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START)) -#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) -#define USER_REGION_ID (0UL) - /* * Defines the address of the vmemap area, in its own region on * after the vmalloc space on Book3E -- cgit From 128c1ea2f838d3031a1c475607860e4271a8e9dc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:48:57 +0200 Subject: powerpc/64e: Move virtual memory closer to linear memory Today nohash/64 have linear memory based at 0xc000000000000000 and virtual memory based at 0x8000000000000000. In order to implement KASAN, we need to regroup both areas. Move virtual memmory at 0xc000100000000000. This complicates a bit TLB miss handlers. Until now, memory region was easily identified with the 4 higher bits of address: - 0 ==> User - c ==> Linear Memory - 8 ==> Virtual Memory Now we need to rely on the 20 higher bits, with: - 0xxxx ==> User - c0000 ==> Linear Memory - c0001 ==> Virtual Memory Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4b225168031449fc34fc7132f3923cc8dc54af60.1656427701.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/64/pgtable.h | 2 +- arch/powerpc/mm/nohash/tlb_low_64e.S | 64 ++++++++++++++++------------ 2 files changed, 38 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index d29f1d65ef05..9104cc4121b2 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -25,7 +25,7 @@ /* * Define the address range of the kernel non-linear virtual area */ -#define KERN_VIRT_START ASM_CONST(0x8000000000000000) +#define KERN_VIRT_START ASM_CONST(0xc000100000000000) #define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) /* diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index a59485c549a7..68ffbfdba894 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -213,10 +213,11 @@ itlb_miss_kernel_bolted: tlb_miss_kernel_bolted: mfspr r10,SPRN_MAS1 ld r14,PACA_KERNELPGD(r13) - cmpldi cr0,r15,8 /* Check for vmalloc region */ + srdi r15,r16,44 /* get kernel region */ + andi. r15,r15,1 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 - beq+ tlb_miss_common_bolted + bne+ tlb_miss_common_bolted tlb_miss_fault_bolted: /* We need to check if it was an instruction miss */ @@ -498,7 +499,9 @@ tlb_miss_huge_e6500: tlb_miss_kernel_e6500: ld r14,PACA_KERNELPGD(r13) - cmpldi cr1,r15,8 /* Check for vmalloc region */ + srdi r15,r16,44 /* get kernel region */ + xoris r15,r15,0xc /* Check for vmalloc region */ + cmplwi cr1,r15,1 beq+ cr1,tlb_miss_common_e6500 tlb_miss_fault_e6500: @@ -532,16 +535,18 @@ itlb_miss_fault_e6500: */ mfspr r14,SPRN_ESR mfspr r16,SPRN_DEAR /* get faulting address */ - srdi r15,r16,60 /* get region */ - cmpldi cr0,r15,0xc /* linear mapping ? */ + srdi r15,r16,44 /* get region */ + xoris r15,r15,0xc + cmpldi cr0,r15,0 /* linear mapping ? */ beq tlb_load_linear /* yes -> go to linear map load */ + cmpldi cr1,r15,1 /* vmalloc mapping ? */ /* The page tables are mapped virtually linear. At this point, though, * we don't know whether we are trying to fault in a first level * virtual address or a virtual page table address. We can get that * from bit 0x1 of the region ID which we have set for a page table */ - andi. r10,r15,0x1 + andis. r10,r15,0x1 bne- virt_page_table_tlb_miss std r14,EX_TLB_ESR(r12); /* save ESR */ @@ -553,7 +558,7 @@ itlb_miss_fault_e6500: /* We do the user/kernel test for the PID here along with the RW test */ - cmpldi cr0,r15,0 /* Check for user region */ + srdi. r15,r16,60 /* Check for user region */ /* We pre-test some combination of permissions to avoid double * faults: @@ -577,10 +582,9 @@ itlb_miss_fault_e6500: beq normal_tlb_miss_user /* XXX replace the RMW cycles with immediate loads + writes */ 1: mfspr r10,SPRN_MAS1 - cmpldi cr0,r15,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 - beq+ normal_tlb_miss + beq+ cr1,normal_tlb_miss /* We got a crappy address, just fault with whatever DEAR and ESR * are here @@ -606,16 +610,18 @@ itlb_miss_fault_e6500: * * Faulting address is SRR0 which is already in r16 */ - srdi r15,r16,60 /* get region */ - cmpldi cr0,r15,0xc /* linear mapping ? */ + srdi r15,r16,44 /* get region */ + xoris r15,r15,0xc + cmpldi cr0,r15,0 /* linear mapping ? */ beq tlb_load_linear /* yes -> go to linear map load */ + cmpldi cr1,r15,1 /* vmalloc mapping ? */ /* We do the user/kernel test for the PID here along with the RW test */ li r11,_PAGE_PRESENT|_PAGE_BAP_UX /* Base perm */ oris r11,r11,_PAGE_ACCESSED@h - cmpldi cr0,r15,0 /* Check for user region */ + srdi. r15,r16,60 /* Check for user region */ std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ beq normal_tlb_miss_user @@ -623,10 +629,9 @@ itlb_miss_fault_e6500: oris r11,r11,_PAGE_ACCESSED@h /* XXX replace the RMW cycles with immediate loads + writes */ mfspr r10,SPRN_MAS1 - cmpldi cr0,r15,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 - beq+ normal_tlb_miss + beq+ cr1,normal_tlb_miss /* We got a crappy address, just fault */ TLB_MISS_EPILOG_ERROR @@ -659,10 +664,11 @@ normal_tlb_miss: * NOTE: For 64K pages, we do things slightly differently in * order to handle the weird page table format used by linux */ - ori r10,r15,0x1 + srdi r15,r16,44 + oris r10,r15,0x1 rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4 - sldi r15,r10,60 - clrrdi r14,r14,3 + sldi r15,r10,44 + clrrdi r14,r14,19 or r10,r15,r14 ld r14,0(r10) @@ -763,6 +769,7 @@ normal_tlb_miss_access_fault: */ virt_page_table_tlb_miss: /* Are we hitting a kernel page table ? */ + srdi r15,r16,60 andi. r10,r15,0x8 /* The cool thing now is that r10 contains 0 for user and 8 for kernel, @@ -791,7 +798,8 @@ virt_page_table_tlb_miss: /* Now, we need to walk the page tables. First check if we are in * range. */ - rldicl. r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4 + rldicl r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4 + cmpldi r10,0x80 bne- virt_page_table_tlb_miss_fault /* Get the PGD pointer */ @@ -910,23 +918,24 @@ virt_page_table_tlb_miss_whacko_fault: */ mfspr r14,SPRN_ESR mfspr r16,SPRN_DEAR /* get faulting address */ - srdi r11,r16,60 /* get region */ - cmpldi cr0,r11,0xc /* linear mapping ? */ + srdi r11,r16,44 /* get region */ + xoris r11,r11,0xc + cmpldi cr0,r11,0 /* linear mapping ? */ beq tlb_load_linear /* yes -> go to linear map load */ + cmpldi cr1,r11,1 /* vmalloc mapping ? */ /* We do the user/kernel test for the PID here along with the RW test */ - cmpldi cr0,r11,0 /* Check for user region */ + srdi. r11,r16,60 /* Check for user region */ ld r15,PACAPGD(r13) /* Load user pgdir */ beq htw_tlb_miss /* XXX replace the RMW cycles with immediate loads + writes */ 1: mfspr r10,SPRN_MAS1 - cmpldi cr0,r11,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ - beq+ htw_tlb_miss + beq+ cr1,htw_tlb_miss /* We got a crappy address, just fault with whatever DEAR and ESR * are here @@ -952,19 +961,20 @@ virt_page_table_tlb_miss_whacko_fault: * * Faulting address is SRR0 which is already in r16 */ - srdi r11,r16,60 /* get region */ - cmpldi cr0,r11,0xc /* linear mapping ? */ + srdi r11,r16,44 /* get region */ + xoris r11,r11,0xc + cmpldi cr0,r11,0 /* linear mapping ? */ beq tlb_load_linear /* yes -> go to linear map load */ + cmpldi cr1,r11,1 /* vmalloc mapping ? */ /* We do the user/kernel test for the PID here along with the RW test */ - cmpldi cr0,r11,0 /* Check for user region */ + srdi. r11,r16,60 /* Check for user region */ ld r15,PACAPGD(r13) /* Load user pgdir */ beq htw_tlb_miss /* XXX replace the RMW cycles with immediate loads + writes */ 1: mfspr r10,SPRN_MAS1 - cmpldi cr0,r11,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ -- cgit From 059c189389ebe9c4909d849d1a5f65c53115ca19 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:48:58 +0200 Subject: powerpc/64e: Reorganise virtual memory Reduce the size of IO map in order to leave the last quarter of virtual MAP for KASAN shadow mapping. This gives the following layout. +------------------------+ Kernel virtual map end (0xc000200000000000) | | | 16TB (unused) | | | +------------------------+ Kernel IO map end | | | 16TB of IO map | | | +------------------------+ Kernel IO map start | | | 16TB of vmemmap | | | +------------------------+ Kernel vmemmap start | | | 16TB of vmap | | | +------------------------+ Kernel virt start (0xc000100000000000) | | | 64TB of linear mem | | | +------------------------+ Kernel linear (0xc.....) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/54ef01673bf14228106afd629f795c83acb9a00c.1656427701.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/64/pgtable.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 9104cc4121b2..599921cc257e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -38,15 +38,16 @@ #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) /* - * The second half of the kernel virtual space is used for IO mappings, + * The third quarter of the kernel virtual space is used for IO mappings, * it's itself carved into the PIO region (ISA and PHB IO space) and * the ioremap space * * ISA_IO_BASE = KERN_IO_START, 64K reserved area * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces - * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE + * IOREMAP_BASE = ISA_IO_BASE + 2G to KERN_IO_START + KERN_IO_SIZE */ #define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1)) +#define KERN_IO_SIZE (KERN_VIRT_SIZE >> 2) #define FULL_IO_SIZE 0x80000000ul #define ISA_IO_BASE (KERN_IO_START) #define ISA_IO_END (KERN_IO_START + 0x10000ul) @@ -54,7 +55,7 @@ #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) #define IOREMAP_START (ioremap_bot) -#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE - FIXADDR_SIZE) +#define IOREMAP_END (KERN_IO_START + KERN_IO_SIZE - FIXADDR_SIZE) #define FIXADDR_SIZE SZ_32M /* -- cgit From c7b9ed7c34a9f5dbf8222d63e3e313cef9f3150b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:48:59 +0200 Subject: powerpc/64e: KASAN Full support for BOOK3E/64 We now have memory organised in a way that allows implementing KASAN. Unlike book3s/64, book3e always has translation active so the only thing needed to use KASAN is to setup an early zero shadow mapping just after setting a stack pointer and before calling early_setup(). The memory layout is now as follows +------------------------+ Kernel virtual map end (0xc000200000000000) | | | 16TB of KASAN map | | | +------------------------+ Kernel KASAN shadow map start | | | 16TB of IO map | | | +------------------------+ Kernel IO map start | | | 16TB of vmemmap | | | +------------------------+ Kernel vmemmap start | | | 16TB of vmap | | | +------------------------+ Kernel virt start (0xc000100000000000) | | | 64TB of linear mem | | | +------------------------+ Kernel linear (0xc.....) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0bef8beda27baf71e3b9e8b13e620fba6e19499b.1656427701.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 2 + arch/powerpc/Kconfig.debug | 3 +- arch/powerpc/include/asm/kasan.h | 13 +++- arch/powerpc/kernel/head_64.S | 3 + arch/powerpc/mm/kasan/Makefile | 1 + arch/powerpc/mm/kasan/init_book3e_64.c | 133 +++++++++++++++++++++++++++++++++ arch/powerpc/mm/kasan/init_book3s_64.c | 2 + arch/powerpc/platforms/Kconfig.cputype | 1 - 8 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/mm/kasan/init_book3e_64.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c2ce2e60c8f0..92e0cad7752d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -193,6 +193,7 @@ config PPC select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN if PPC_RADIX_MMU + select HAVE_ARCH_KASAN if PPC_BOOK3E_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KFENCE if PPC_BOOK3S_32 || PPC_8xx || 40x select HAVE_ARCH_KGDB @@ -254,6 +255,7 @@ config PPC select IOMMU_HELPER if PPC64 select IRQ_DOMAIN select IRQ_FORCED_THREADING + select KASAN_VMALLOC if KASAN && MODULES select MMU_GATHER_PAGE_SIZE select MMU_GATHER_RCU_TABLE_FREE select MODULES_USE_ELF_RELA diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 9f363c143d86..6a8855d45af7 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -375,4 +375,5 @@ config KASAN_SHADOW_OFFSET hex depends on KASAN default 0xe0000000 if PPC32 - default 0xa80e000000000000 if PPC64 + default 0xa80e000000000000 if PPC_BOOK3S_64 + default 0xa8001c0000000000 if PPC_BOOK3E_64 diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index a6be4025cba2..92a968202ba7 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -19,7 +19,7 @@ #define KASAN_SHADOW_SCALE_SHIFT 3 -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32) #define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M) #else #define KASAN_KERN_START PAGE_OFFSET @@ -39,6 +39,17 @@ * c00e000000000000 << 3 + a80e000000000000 = c00fc00000000000 */ #define KASAN_SHADOW_END 0xc00fc00000000000UL + +#else + +/* + * The shadow ends before the highest accessible address + * because we don't need a shadow for the shadow. + * But it doesn't hurt to have a shadow for the shadow, + * keep shadow end aligned eases things. + */ +#define KASAN_SHADOW_END 0xc000200000000000UL + #endif #ifdef CONFIG_KASAN diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index d3eea633d11a..cf2c08902c05 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -965,6 +965,9 @@ start_here_multiplatform: * and SLB setup before we turn on relocation. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif /* Restore parameters passed from prom_init/kexec */ mr r3,r31 LOAD_REG_ADDR(r12, DOTSYM(early_setup)) diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile index 4999aadb1867..699eeffd9f55 100644 --- a/arch/powerpc/mm/kasan/Makefile +++ b/arch/powerpc/mm/kasan/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_PPC32) += init_32.o obj-$(CONFIG_PPC_8xx) += 8xx.o obj-$(CONFIG_PPC_BOOK3S_32) += book3s_32.o obj-$(CONFIG_PPC_BOOK3S_64) += init_book3s_64.o +obj-$(CONFIG_PPC_BOOK3E_64) += init_book3e_64.o diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c new file mode 100644 index 000000000000..11519e88dc6b --- /dev/null +++ b/arch/powerpc/mm/kasan/init_book3e_64.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KASAN for 64-bit Book3e powerpc + * + * Copyright 2022, Christophe Leroy, CS GROUP France + */ + +#define DISABLE_BRANCH_PROFILING + +#include +#include +#include +#include + +#include + +static inline bool kasan_pud_table(p4d_t p4d) +{ + return p4d_page(p4d) == virt_to_page(lm_alias(kasan_early_shadow_pud)); +} + +static inline bool kasan_pmd_table(pud_t pud) +{ + return pud_page(pud) == virt_to_page(lm_alias(kasan_early_shadow_pmd)); +} + +static inline bool kasan_pte_table(pmd_t pmd) +{ + return pmd_page(pmd) == virt_to_page(lm_alias(kasan_early_shadow_pte)); +} + +static int __init kasan_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) +{ + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + pgdp = pgd_offset_k(ea); + p4dp = p4d_offset(pgdp, ea); + if (kasan_pud_table(*p4dp)) { + pudp = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); + memcpy(pudp, kasan_early_shadow_pud, PUD_TABLE_SIZE); + p4d_populate(&init_mm, p4dp, pudp); + } + pudp = pud_offset(p4dp, ea); + if (kasan_pmd_table(*pudp)) { + pmdp = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); + memcpy(pmdp, kasan_early_shadow_pmd, PMD_TABLE_SIZE); + pud_populate(&init_mm, pudp, pmdp); + } + pmdp = pmd_offset(pudp, ea); + if (kasan_pte_table(*pmdp)) { + ptep = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); + memcpy(ptep, kasan_early_shadow_pte, PTE_TABLE_SIZE); + pmd_populate_kernel(&init_mm, pmdp, ptep); + } + ptep = pte_offset_kernel(pmdp, ea); + + __set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot), 0); + + return 0; +} + +static void __init kasan_init_phys_region(void *start, void *end) +{ + unsigned long k_start, k_end, k_cur; + void *va; + + if (start >= end) + return; + + k_start = ALIGN_DOWN((unsigned long)kasan_mem_to_shadow(start), PAGE_SIZE); + k_end = ALIGN((unsigned long)kasan_mem_to_shadow(end), PAGE_SIZE); + + va = memblock_alloc(k_end - k_start, PAGE_SIZE); + for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE, va += PAGE_SIZE) + kasan_map_kernel_page(k_cur, __pa(va), PAGE_KERNEL); +} + +void __init kasan_early_init(void) +{ + int i; + unsigned long addr; + pgd_t *pgd = pgd_offset_k(KASAN_SHADOW_START); + pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL); + + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); + + for (i = 0; i < PTRS_PER_PTE; i++) + __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page, + &kasan_early_shadow_pte[i], zero_pte, 0); + + for (i = 0; i < PTRS_PER_PMD; i++) + pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i], + kasan_early_shadow_pte); + + for (i = 0; i < PTRS_PER_PUD; i++) + pud_populate(&init_mm, &kasan_early_shadow_pud[i], + kasan_early_shadow_pmd); + + for (addr = KASAN_SHADOW_START; addr != KASAN_SHADOW_END; addr += PGDIR_SIZE) + p4d_populate(&init_mm, p4d_offset(pgd++, addr), kasan_early_shadow_pud); +} + +void __init kasan_init(void) +{ + phys_addr_t start, end; + u64 i; + pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO); + + for_each_mem_range(i, &start, &end) + kasan_init_phys_region((void *)start, (void *)end); + + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) + kasan_remove_zero_shadow((void *)VMALLOC_START, VMALLOC_SIZE); + + for (i = 0; i < PTRS_PER_PTE; i++) + __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page, + &kasan_early_shadow_pte[i], zero_pte, 0); + + flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + + /* Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KASAN init done\n"); +} + +void __init kasan_late_init(void) { } diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c index 0da5566d6b84..9300d641cf9a 100644 --- a/arch/powerpc/mm/kasan/init_book3s_64.c +++ b/arch/powerpc/mm/kasan/init_book3s_64.c @@ -99,4 +99,6 @@ void __init kasan_init(void) pr_info("KASAN init done\n"); } +void __init kasan_early_init(void) { } + void __init kasan_late_init(void) { } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9e2df4b66478..383ed4fe6013 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -2,7 +2,6 @@ config PPC32 bool default y if !PPC64 - select KASAN_VMALLOC if KASAN && MODULES config PPC64 bool "64-bit kernel" -- cgit From 9981bace85d816ed8724ac46e49285e8488d29e6 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 25 May 2022 10:05:50 -0300 Subject: KVM: PPC: Book3S HV: Fix "rm_exit" entry in debugfs timings At debugfs/kvm//vcpu0/timings we show how long each part of the code takes to run: $ cat /sys/kernel/debug/kvm/*-*/vcpu0/timings rm_entry: 123785 49398892 118 4898 rm_intr: 123780 6075890 22 390 rm_exit: 0 0 0 0 <-- NOK guest: 123780 46732919988 402 9997638 cede: 0 0 0 0 <-- OK, no cede napping in P9 The "rm_exit" is always showing zero because it is the last one and end_timing does not increment the counter of the previous entry. We can fix it by calling accumulate_time again instead of end_timing. That way the counter gets incremented. The rest of the arithmetic can be ignored because there are no timing points after this and the accumulators are reset before the next round. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525130554.2614394-2-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_hv_p9_entry.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 112a09b33328..7f88be386b27 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -438,15 +438,6 @@ void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs); #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING -static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) -{ - struct kvmppc_vcore *vc = vcpu->arch.vcore; - u64 tb = mftb() - vc->tb_offset_applied; - - vcpu->arch.cur_activity = next; - vcpu->arch.cur_tb_start = tb; -} - static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) { struct kvmppc_vcore *vc = vcpu->arch.vcore; @@ -478,8 +469,8 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator curr->seqcount = seq + 2; } -#define start_timing(vcpu, next) __start_timing(vcpu, next) -#define end_timing(vcpu) __start_timing(vcpu, NULL) +#define start_timing(vcpu, next) __accumulate_time(vcpu, next) +#define end_timing(vcpu) __accumulate_time(vcpu, NULL) #define accumulate_time(vcpu, next) __accumulate_time(vcpu, next) #else #define start_timing(vcpu, next) do {} while (0) -- cgit From 3f8ed993be3cf154a91d9ab5e80470c6c755adbe Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 25 May 2022 10:05:51 -0300 Subject: KVM: PPC: Book3S HV: Add a new config for P8 debug timing Turn the existing Kconfig KVM_BOOK3S_HV_EXIT_TIMING into KVM_BOOK3S_HV_P8_TIMING in preparation for the addition of a new config for P9 timings. This applies only to P8 code, the generic timing code is still kept under KVM_BOOK3S_HV_EXIT_TIMING. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525130554.2614394-3-farosas@linux.ibm.com --- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kvm/Kconfig | 6 +++++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 24 ++++++++++++------------ 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index eec536aef83a..8c10f536e478 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -379,7 +379,7 @@ int main(void) OFFSET(VCPU_SPRG2, kvm_vcpu, arch.shregs.sprg2); OFFSET(VCPU_SPRG3, kvm_vcpu, arch.shregs.sprg3); #endif -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING OFFSET(VCPU_TB_RMENTRY, kvm_vcpu, arch.rm_entry); OFFSET(VCPU_TB_RMINTR, kvm_vcpu, arch.rm_intr); OFFSET(VCPU_TB_RMEXIT, kvm_vcpu, arch.rm_exit); diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index ddd88179110a..73f8277df7d1 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -128,7 +128,11 @@ config KVM_BOOK3S_64_PR and system calls on the host. config KVM_BOOK3S_HV_EXIT_TIMING - bool "Detailed timing for hypervisor real-mode code" + bool + +config KVM_BOOK3S_HV_P8_TIMING + bool "Detailed timing for hypervisor real-mode code (for POWER8)" + select KVM_BOOK3S_HV_EXIT_TIMING depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS help Calculate time taken for each vcpu in the real-mode guest entry, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 0fc0e68d20d0..7ded202bf995 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -237,14 +237,14 @@ kvm_novcpu_wakeup: cmpdi r4, 0 beq kvmppc_primary_no_guest -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMENTRY bl kvmhv_start_timing #endif b kvmppc_got_guest kvm_novcpu_exit: -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING ld r4, HSTATE_KVM_VCPU(r13) cmpdi r4, 0 beq 13f @@ -523,7 +523,7 @@ kvmppc_hv_entry: li r6, KVM_GUEST_MODE_HOST_HV stb r6, HSTATE_IN_GUEST(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* Store initial timestamp */ cmpdi r4, 0 beq 1f @@ -894,7 +894,7 @@ fast_guest_return: li r9, KVM_GUEST_MODE_GUEST_HV stb r9, HSTATE_IN_GUEST(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* Accumulate timing */ addi r3, r4, VCPU_TB_GUEST bl kvmhv_accumulate_time @@ -945,7 +945,7 @@ secondary_too_late: cmpdi r4, 0 beq 11f stw r12, VCPU_TRAP(r4) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMEXIT bl kvmhv_accumulate_time #endif @@ -959,7 +959,7 @@ hdec_soon: li r12, BOOK3S_INTERRUPT_HV_DECREMENTER 12: stw r12, VCPU_TRAP(r4) mr r9, r4 -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMEXIT bl kvmhv_accumulate_time #endif @@ -1056,7 +1056,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) li r0, MSR_RI mtmsrd r0, 1 -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r9, VCPU_TB_RMINTR mr r4, r9 bl kvmhv_accumulate_time @@ -1135,7 +1135,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r9, VCPU_TB_RMEXIT mr r4, r9 bl kvmhv_accumulate_time @@ -1495,7 +1495,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_LPCR,r8 isync -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* Finish timing, if we have a vcpu */ ld r4, HSTATE_KVM_VCPU(r13) cmpdi r4, 0 @@ -2153,7 +2153,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) ld r4, HSTATE_KVM_VCPU(r13) std r3, VCPU_DEC_EXPIRES(r4) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING ld r4, HSTATE_KVM_VCPU(r13) addi r3, r4, VCPU_TB_CEDE bl kvmhv_accumulate_time @@ -2221,7 +2221,7 @@ kvm_end_cede: /* get vcpu pointer */ ld r4, HSTATE_KVM_VCPU(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMINTR bl kvmhv_accumulate_time #endif @@ -2961,7 +2961,7 @@ kvmppc_fix_pmao: isync blr -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* * Start timing an activity * r3 = pointer to time accumulation struct, r4 = vcpu -- cgit From c3fa64c99c61d99631a8e06a6cf991c35c98ec7d Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 25 May 2022 10:05:52 -0300 Subject: KVM: PPC: Book3S HV: Decouple the debug timing from the P8 entry path We are currently doing the timing for debug purposes of the P9 entry path using the accumulators and terminology defined by the old entry path for P8 machines. Not only the "real-mode" and "napping" mentions are out of place for the P9 Radix entry path but also we cannot change them because the timing code is coupled to the structures defined in struct kvm_vcpu_arch. Add a new CONFIG_KVM_BOOK3S_HV_P9_TIMING to enable the timing code for the P9 entry path. For now, just add the new CONFIG and duplicate the structures. A subsequent patch will add the P9 changes. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525130554.2614394-4-farosas@linux.ibm.com --- arch/powerpc/include/asm/kvm_host.h | 8 ++++++++ arch/powerpc/kvm/Kconfig | 14 +++++++++++++- arch/powerpc/kvm/book3s_hv.c | 13 +++++++++++-- arch/powerpc/kvm/book3s_hv_p9_entry.c | 2 +- 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 2909a88acd16..a0f44762a069 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -830,11 +830,19 @@ struct kvm_vcpu_arch { #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */ u64 cur_tb_start; /* when it started */ +#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */ struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */ struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ struct kvmhv_tb_accumulator guest_time; /* guest execution */ struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ +#else + struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */ + struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */ + struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ + struct kvmhv_tb_accumulator guest_time; /* guest execution */ + struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ +#endif #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ }; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 73f8277df7d1..191347f44731 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -130,10 +130,22 @@ config KVM_BOOK3S_64_PR config KVM_BOOK3S_HV_EXIT_TIMING bool +config KVM_BOOK3S_HV_P9_TIMING + bool "Detailed timing for the P9 entry point" + select KVM_BOOK3S_HV_EXIT_TIMING + depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS + help + Calculate time taken for each vcpu in various parts of the + code. The total, minimum and maximum times in nanoseconds + together with the number of executions are reported in debugfs in + kvm/vm#/vcpu#/timings. + + If unsure, say N. + config KVM_BOOK3S_HV_P8_TIMING bool "Detailed timing for hypervisor real-mode code (for POWER8)" select KVM_BOOK3S_HV_EXIT_TIMING - depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS + depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS && !KVM_BOOK3S_HV_P9_TIMING help Calculate time taken for each vcpu in the real-mode guest entry, exit, and interrupt handling code, plus time spent in the guest diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e08fb3124dca..108ee563f72a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2660,11 +2660,19 @@ static struct debugfs_timings_element { const char *name; size_t offset; } timings[] = { +#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)}, {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)}, {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)}, {"guest", offsetof(struct kvm_vcpu, arch.guest_time)}, {"cede", offsetof(struct kvm_vcpu, arch.cede_time)}, +#else + {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)}, + {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)}, + {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)}, + {"guest", offsetof(struct kvm_vcpu, arch.guest_time)}, + {"cede", offsetof(struct kvm_vcpu, arch.cede_time)}, +#endif }; #define N_TIMINGS (ARRAY_SIZE(timings)) @@ -2783,8 +2791,9 @@ static const struct file_operations debugfs_timings_ops = { /* Create a debugfs directory for the vcpu */ static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { - debugfs_create_file("timings", 0444, debugfs_dentry, vcpu, - &debugfs_timings_ops); + if (cpu_has_feature(CPU_FTR_ARCH_300) == IS_ENABLED(CONFIG_KVM_BOOK3S_HV_P9_TIMING)) + debugfs_create_file("timings", 0444, debugfs_dentry, vcpu, + &debugfs_timings_ops); return 0; } diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 7f88be386b27..8d8c0cf39bdf 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -437,7 +437,7 @@ void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs); -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) { struct kvmppc_vcore *vc = vcpu->arch.vcore; -- cgit From 2861c827286fb6646f6b6caee418efd99992097c Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 25 May 2022 10:05:53 -0300 Subject: KVM: PPC: Book3S HV: Expose timing functions to module code The next patch adds new timing points to the P9 entry path, some of which are in the module code, so we need to export the timing functions. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525130554.2614394-5-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.h | 10 ++++++++++ arch/powerpc/kvm/book3s_hv_p9_entry.c | 11 ++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.h b/arch/powerpc/kvm/book3s_hv.h index 6b7f07d9026b..2f2e59d7d433 100644 --- a/arch/powerpc/kvm/book3s_hv.h +++ b/arch/powerpc/kvm/book3s_hv.h @@ -40,3 +40,13 @@ void switch_pmu_to_guest(struct kvm_vcpu *vcpu, struct p9_host_os_sprs *host_os_sprs); void switch_pmu_to_host(struct kvm_vcpu *vcpu, struct p9_host_os_sprs *host_os_sprs); + +#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING +void accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next); +#define start_timing(vcpu, next) accumulate_time(vcpu, next) +#define end_timing(vcpu) accumulate_time(vcpu, NULL) +#else +#define accumulate_time(vcpu, next) do {} while (0) +#define start_timing(vcpu, next) do {} while (0) +#define end_timing(vcpu) do {} while (0) +#endif diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 8d8c0cf39bdf..32e078db8da2 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -438,7 +438,7 @@ void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs); #ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING -static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) +void accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) { struct kvmppc_vcore *vc = vcpu->arch.vcore; struct kvmhv_tb_accumulator *curr; @@ -468,14 +468,7 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator smp_wmb(); curr->seqcount = seq + 2; } - -#define start_timing(vcpu, next) __accumulate_time(vcpu, next) -#define end_timing(vcpu) __accumulate_time(vcpu, NULL) -#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next) -#else -#define start_timing(vcpu, next) do {} while (0) -#define end_timing(vcpu) do {} while (0) -#define accumulate_time(vcpu, next) do {} while (0) +EXPORT_SYMBOL_GPL(accumulate_time); #endif static inline u64 mfslbv(unsigned int idx) -- cgit From b44bb1b7cbbae66ec73868f5fbd57c54f0612d1c Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 25 May 2022 10:05:54 -0300 Subject: KVM: PPC: Book3S HV: Provide more detailed timings for P9 entry path Alter the data collection points for the debug timing code in the P9 path to be more in line with what the code does. The points where we accumulate time are now the following: vcpu_entry: From vcpu_run_hv entry until the start of the inner loop; guest_entry: From the start of the inner loop until the guest entry in asm; in_guest: From the guest entry in asm until the return to KVM C code; guest_exit: From the return into KVM C code until the corresponding hypercall/page fault handling or re-entry into the guest; hypercall: Time spent handling hcalls in the kernel (hcalls can go to QEMU, not accounted here); page_fault: Time spent handling page faults; vcpu_exit: vcpu_run_hv exit (almost no code here currently). Like before, these are exposed in debugfs in a file called "timings". There are four values: - number of occurrences of the accumulation point; - total time the vcpu spent in the phase in ns; - shortest time the vcpu spent in the phase in ns; - longest time the vcpu spent in the phase in ns; === Before: rm_entry: 53132 16793518 256 4060 rm_intr: 53132 2125914 22 340 rm_exit: 53132 24108344 374 2180 guest: 53132 40980507996 404 9997650 cede: 0 0 0 0 After: vcpu_entry: 34637 7716108 178 4416 guest_entry: 52414 49365608 324 747542 in_guest: 52411 40828715840 258 9997480 guest_exit: 52410 19681717182 826 102496674 vcpu_exit: 34636 1744462 38 182 hypercall: 45712 22878288 38 1307962 page_fault: 992 111104034 568 168688 With just one instruction (hcall): vcpu_entry: 1 942 942 942 guest_entry: 1 4044 4044 4044 in_guest: 1 1540 1540 1540 guest_exit: 1 3542 3542 3542 vcpu_exit: 1 80 80 80 hypercall: 0 0 0 0 page_fault: 0 0 0 0 === Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525130554.2614394-6-farosas@linux.ibm.com --- arch/powerpc/include/asm/kvm_host.h | 12 +++++++----- arch/powerpc/kvm/Kconfig | 9 +++++---- arch/powerpc/kvm/book3s_hv.c | 23 ++++++++++++++++++----- arch/powerpc/kvm/book3s_hv_p9_entry.c | 14 ++++---------- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a0f44762a069..eeba679802cf 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -831,11 +831,13 @@ struct kvm_vcpu_arch { struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */ u64 cur_tb_start; /* when it started */ #ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING - struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */ - struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */ - struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ - struct kvmhv_tb_accumulator guest_time; /* guest execution */ - struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ + struct kvmhv_tb_accumulator vcpu_entry; + struct kvmhv_tb_accumulator vcpu_exit; + struct kvmhv_tb_accumulator in_guest; + struct kvmhv_tb_accumulator hcall; + struct kvmhv_tb_accumulator pg_fault; + struct kvmhv_tb_accumulator guest_entry; + struct kvmhv_tb_accumulator guest_exit; #else struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */ struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 191347f44731..cedf1e0f50e1 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -135,10 +135,11 @@ config KVM_BOOK3S_HV_P9_TIMING select KVM_BOOK3S_HV_EXIT_TIMING depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS help - Calculate time taken for each vcpu in various parts of the - code. The total, minimum and maximum times in nanoseconds - together with the number of executions are reported in debugfs in - kvm/vm#/vcpu#/timings. + Calculate time taken for each vcpu during vcpu entry and + exit, time spent inside the guest and time spent handling + hypercalls and page faults. The total, minimum and maximum + times in nanoseconds together with the number of executions + are reported in debugfs in kvm/vm#/vcpu#/timings. If unsure, say N. diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 108ee563f72a..c68883170a82 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2661,11 +2661,13 @@ static struct debugfs_timings_element { size_t offset; } timings[] = { #ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING - {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)}, - {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)}, - {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)}, - {"guest", offsetof(struct kvm_vcpu, arch.guest_time)}, - {"cede", offsetof(struct kvm_vcpu, arch.cede_time)}, + {"vcpu_entry", offsetof(struct kvm_vcpu, arch.vcpu_entry)}, + {"guest_entry", offsetof(struct kvm_vcpu, arch.guest_entry)}, + {"in_guest", offsetof(struct kvm_vcpu, arch.in_guest)}, + {"guest_exit", offsetof(struct kvm_vcpu, arch.guest_exit)}, + {"vcpu_exit", offsetof(struct kvm_vcpu, arch.vcpu_exit)}, + {"hypercall", offsetof(struct kvm_vcpu, arch.hcall)}, + {"page_fault", offsetof(struct kvm_vcpu, arch.pg_fault)}, #else {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)}, {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)}, @@ -4014,8 +4016,10 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns mtspr(SPRN_DAR, vcpu->arch.shregs.dar); mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); switch_pmu_to_guest(vcpu, &host_os_sprs); + accumulate_time(vcpu, &vcpu->arch.in_guest); trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), __pa(&vcpu->arch.regs)); + accumulate_time(vcpu, &vcpu->arch.guest_exit); kvmhv_restore_hv_return_state(vcpu, &hvregs); switch_pmu_to_host(vcpu, &host_os_sprs); vcpu->arch.shregs.msr = vcpu->arch.regs.msr; @@ -4703,6 +4707,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) struct kvm *kvm; unsigned long msr; + start_timing(vcpu, &vcpu->arch.vcpu_entry); + if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; @@ -4768,6 +4774,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; do { + accumulate_time(vcpu, &vcpu->arch.guest_entry); if (cpu_has_feature(CPU_FTR_ARCH_300)) r = kvmhv_run_single_vcpu(vcpu, ~(u64)0, vcpu->arch.vcore->lpcr); @@ -4775,6 +4782,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) r = kvmppc_run_vcpu(vcpu); if (run->exit_reason == KVM_EXIT_PAPR_HCALL) { + accumulate_time(vcpu, &vcpu->arch.hcall); + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) { /* * These should have been caught reflected @@ -4790,6 +4799,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) trace_kvm_hcall_exit(vcpu, r); kvmppc_core_prepare_to_enter(vcpu); } else if (r == RESUME_PAGE_FAULT) { + accumulate_time(vcpu, &vcpu->arch.pg_fault); srcu_idx = srcu_read_lock(&kvm->srcu); r = kvmppc_book3s_hv_page_fault(vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); @@ -4801,12 +4811,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) r = kvmppc_xics_rm_complete(vcpu, 0); } } while (is_kvmppc_resume_guest(r)); + accumulate_time(vcpu, &vcpu->arch.vcpu_exit); vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&kvm->arch.vcpus_running); srr_regs_clobbered(); + end_timing(vcpu); + return r; } diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 32e078db8da2..e740eca45862 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -779,8 +779,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV); WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME)); - start_timing(vcpu, &vcpu->arch.rm_entry); - vcpu->arch.ceded = 0; /* Save MSR for restore, with EE clear. */ @@ -941,13 +939,13 @@ tm_return_to_guest: mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0); mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1); - accumulate_time(vcpu, &vcpu->arch.guest_time); - switch_pmu_to_guest(vcpu, &host_os_sprs); + accumulate_time(vcpu, &vcpu->arch.in_guest); + kvmppc_p9_enter_guest(vcpu); - switch_pmu_to_host(vcpu, &host_os_sprs); - accumulate_time(vcpu, &vcpu->arch.rm_intr); + accumulate_time(vcpu, &vcpu->arch.guest_exit); + switch_pmu_to_host(vcpu, &host_os_sprs); /* XXX: Could get these from r11/12 and paca exsave instead */ vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0); @@ -1042,8 +1040,6 @@ tm_return_to_guest: #endif } - accumulate_time(vcpu, &vcpu->arch.rm_exit); - /* Advance host PURR/SPURR by the amount used by guest */ purr = mfspr(SPRN_PURR); spurr = mfspr(SPRN_SPURR); @@ -1150,8 +1146,6 @@ tm_return_to_guest: asm volatile(PPC_CP_ABORT); out: - end_timing(vcpu); - return trap; } EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9); -- cgit From 0df01238b8aa300cbc736e7ec433d201a76036f3 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 14 Jun 2022 13:52:04 -0300 Subject: KVM: PPC: Book3S HV: tracing: Add missing hcall names The kvm_trace_symbol_hcall macro is missing several of the hypercalls defined in hvcall.h. Add the most common ones that are issued during guest lifetime, including the ones that are only used by QEMU and SLOF. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220614165204.549229-1-farosas@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 8 ++++++++ arch/powerpc/kvm/trace_hv.h | 21 ++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index d92a20a85395..1d454c70e7c6 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -350,6 +350,14 @@ /* Platform specific hcalls, used by KVM */ #define H_RTAS 0xf000 +/* + * Platform specific hcalls, used by QEMU/SLOF. These are ignored by + * KVM and only kept here so we can identify them during tracing. + */ +#define H_LOGICAL_MEMOP 0xF001 +#define H_CAS 0XF002 +#define H_UPDATE_DT 0XF003 + /* "Platform specific hcalls", provided by PHYP */ #define H_GET_24X7_CATALOG_PAGE 0xF078 #define H_GET_24X7_DATA 0xF07C diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 32e2cb5811cc..8d57c8428531 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -94,6 +94,7 @@ {H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \ {H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \ {H_MANAGE_TRACE, "H_MANAGE_TRACE"}, \ + {H_GET_CPU_CHARACTERISTICS, "H_GET_CPU_CHARACTERISTICS"}, \ {H_FREE_LOGICAL_LAN_BUFFER, "H_FREE_LOGICAL_LAN_BUFFER"}, \ {H_QUERY_INT_STATE, "H_QUERY_INT_STATE"}, \ {H_POLL_PENDING, "H_POLL_PENDING"}, \ @@ -125,7 +126,25 @@ {H_COP, "H_COP"}, \ {H_GET_MPP_X, "H_GET_MPP_X"}, \ {H_SET_MODE, "H_SET_MODE"}, \ - {H_RTAS, "H_RTAS"} + {H_REGISTER_PROC_TBL, "H_REGISTER_PROC_TBL"}, \ + {H_QUERY_VAS_CAPABILITIES, "H_QUERY_VAS_CAPABILITIES"}, \ + {H_INT_GET_SOURCE_INFO, "H_INT_GET_SOURCE_INFO"}, \ + {H_INT_SET_SOURCE_CONFIG, "H_INT_SET_SOURCE_CONFIG"}, \ + {H_INT_GET_QUEUE_INFO, "H_INT_GET_QUEUE_INFO"}, \ + {H_INT_SET_QUEUE_CONFIG, "H_INT_SET_QUEUE_CONFIG"}, \ + {H_INT_ESB, "H_INT_ESB"}, \ + {H_INT_RESET, "H_INT_RESET"}, \ + {H_RPT_INVALIDATE, "H_RPT_INVALIDATE"}, \ + {H_RTAS, "H_RTAS"}, \ + {H_LOGICAL_MEMOP, "H_LOGICAL_MEMOP"}, \ + {H_CAS, "H_CAS"}, \ + {H_UPDATE_DT, "H_UPDATE_DT"}, \ + {H_GET_PERF_COUNTER_INFO, "H_GET_PERF_COUNTER_INFO"}, \ + {H_SET_PARTITION_TABLE, "H_SET_PARTITION_TABLE"}, \ + {H_ENTER_NESTED, "H_ENTER_NESTED"}, \ + {H_TLB_INVALIDATE, "H_TLB_INVALIDATE"}, \ + {H_COPY_TOFROM_GUEST, "H_COPY_TOFROM_GUEST"} + #define kvm_trace_symbol_kvmret \ {RESUME_GUEST, "RESUME_GUEST"}, \ -- cgit From f5c847ea19d323974d6f7c7e9fa4858ce0727096 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 24 Jun 2022 11:27:12 -0300 Subject: KVM: PPC: Align pt_regs in kvm_vcpu_arch structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The H_ENTER_NESTED hypercall receives as second parameter the address of a region of memory containing the values for the nested guest privileged registers. We currently use the pt_regs structure contained within kvm_vcpu_arch for that end. Most hypercalls that receive a memory address expect that region to not cross a 4K page boundary. We would want H_ENTER_NESTED to follow the same pattern so this patch ensures the pt_regs structure sits within a page. Note: the pt_regs structure is currently 384 bytes in size, so aligning to 512 is sufficient to ensure it will not cross a 4K page and avoids punching too big a hole in struct kvm_vcpu_arch. Signed-off-by: Fabiano Rosas Signed-off-by: Murilo Opsfelder Araújo Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220624142712.790491-1-farosas@linux.ibm.com --- arch/powerpc/include/asm/kvm_host.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index eeba679802cf..c2b003550dc9 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -523,7 +523,11 @@ struct kvm_vcpu_arch { struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; #endif - struct pt_regs regs; + /* + * This is passed along to the HV via H_ENTER_NESTED. Align to + * prevent it crossing a real 4K page. + */ + struct pt_regs regs __aligned(512); struct thread_fp_state fp; -- cgit From dd3549c5032d588bf444bc8b8eae5108d0aa055a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 16 Jun 2022 17:07:05 +1000 Subject: selftests/powerpc: Add missing files to .gitignores These were missed when the respective tests were added, add them now. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220616070705.1941829-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/math/.gitignore | 1 + tools/testing/selftests/powerpc/mce/.gitignore | 1 + tools/testing/selftests/powerpc/pmu/ebb/.gitignore | 1 + tools/testing/selftests/powerpc/security/.gitignore | 1 + 4 files changed, 4 insertions(+) create mode 100644 tools/testing/selftests/powerpc/mce/.gitignore diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore index d0c23b2e4b60..07b4893ef7af 100644 --- a/tools/testing/selftests/powerpc/math/.gitignore +++ b/tools/testing/selftests/powerpc/math/.gitignore @@ -7,3 +7,4 @@ fpu_signal vmx_signal vsx_preempt fpu_denormal +mma diff --git a/tools/testing/selftests/powerpc/mce/.gitignore b/tools/testing/selftests/powerpc/mce/.gitignore new file mode 100644 index 000000000000..f5921462a495 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/.gitignore @@ -0,0 +1 @@ +inject-ra-err diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore index 2920fb39439b..64d8dfdac74a 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore @@ -21,3 +21,4 @@ back_to_back_ebbs_test lost_exception_test no_handler_test cycles_with_mmcr2_test +regs_access_pmccext_test diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore index 93614b125ded..9357b186b13c 100644 --- a/tools/testing/selftests/powerpc/security/.gitignore +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -2,3 +2,4 @@ rfi_flush entry_flush spectre_v2 +uaccess_flush -- cgit From 2a83afe72a2b5760155c2dd840c776aee292dc90 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 31 May 2022 16:59:36 +1000 Subject: powerpc/64: Drop ppc_inst_as_str() The ppc_inst_as_str() macro tries to make printing variable length, aka "prefixed", instructions convenient. It mostly succeeds, but it does hide an on-stack buffer, which triggers stack protector. More problematically it doesn't compile at all with GCC 12, with -Wdangling-pointer, due to the fact that it returns the char buffer declared inside the macro: arch/powerpc/kernel/trace/ftrace.c: In function '__ftrace_modify_call': ./include/linux/printk.h:475:44: error: using a dangling pointer to '__str' [-Werror=dangling-pointer=] 475 | #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__) ... arch/powerpc/kernel/trace/ftrace.c:567:17: note: in expansion of macro 'pr_err' 567 | pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); | ^~~~~~ ./arch/powerpc/include/asm/inst.h:156:14: note: '__str' declared here 156 | char __str[PPC_INST_STR_LEN]; \ | ^~~~~ This could be fixed by having the caller declare the buffer, but in some places there'd need to be two buffers. In all cases where ppc_inst_as_str() is used the output is not really meant for user consumption, it's almost always indicative of a kernel bug. A simpler solution is to just print the value as an unsigned long. For normal instructions the output is identical. For prefixed instructions the value is printed as a single 64-bit quantity, whereas previously the low half was printed first. But that is good enough for debug output, especially as prefixed instructions will be rare in kernel code in practice. Old: c000000000111170 60420000 ori r2,r2,0 c000000000111174 04100001 e580fb00 .long 0xe580fb0004100001 New: c00000000010f90c 60420000 ori r2,r2,0 c00000000010f910 e580fb0004100001 .long 0xe580fb0004100001 Reported-by: Bagas Sanjaya Reported-by: Petr Mladek Signed-off-by: Michael Ellerman Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20220531065936.3674348-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/inst.h | 19 ------------------- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/trace/ftrace.c | 24 +++++++++++++----------- arch/powerpc/lib/test_emulate_step.c | 6 +++--- arch/powerpc/xmon/xmon.c | 2 +- 5 files changed, 18 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index b49aae9f6f27..684d3f453282 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -139,25 +139,6 @@ static inline void ppc_inst_write(u32 *ptr, ppc_inst_t x) *(u64 *)ptr = ppc_inst_as_ulong(x); } -#define PPC_INST_STR_LEN sizeof("00000000 00000000") - -static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], ppc_inst_t x) -{ - if (ppc_inst_prefixed(x)) - sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x)); - else - sprintf(str, "%08x", ppc_inst_val(x)); - - return str; -} - -#define ppc_inst_as_str(x) \ -({ \ - char __str[PPC_INST_STR_LEN]; \ - __ppc_inst_as_str(__str, x); \ - __str; \ -}) - static inline int __copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) { unsigned int val, suffix; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 1c97c0f177ae..912d4f8a13be 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -269,7 +269,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) * So, we should never get here... but, its still * good to catch them, just in case... */ - printk("Can't step on instruction %s\n", ppc_inst_as_str(insn)); + printk("Can't step on instruction %08lx\n", ppc_inst_as_ulong(insn)); BUG(); } else { /* diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 2a893e06e4f1..cab67b5120b9 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -69,8 +69,8 @@ ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) /* Make sure it is what we expect it to be */ if (!ppc_inst_equal(replaced, old)) { - pr_err("%p: replaced (%s) != old (%s)", - (void *)ip, ppc_inst_as_str(replaced), ppc_inst_as_str(old)); + pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip, + ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old)); return -EINVAL; } @@ -127,7 +127,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; } @@ -159,8 +159,8 @@ __ftrace_make_nop(struct module *mod, /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { - pr_err("Unexpected instruction %s around bl _mcount\n", - ppc_inst_as_str(op)); + pr_err("Unexpected instruction %08lx around bl _mcount\n", + ppc_inst_as_ulong(op)); return -EINVAL; } } else if (IS_ENABLED(CONFIG_PPC64)) { @@ -174,7 +174,8 @@ __ftrace_make_nop(struct module *mod, } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08lx found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); + pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC, + ppc_inst_as_ulong(op)); return -EINVAL; } } @@ -312,7 +313,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; } @@ -416,8 +417,8 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; if (!expected_nop_sequence(ip, op[0], op[1])) { - pr_err("Unexpected call sequence at %p: %s %s\n", - ip, ppc_inst_as_str(op[0]), ppc_inst_as_str(op[1])); + pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip, + ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1])); return -EINVAL; } @@ -486,7 +487,8 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) } if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { - pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op)); + pr_err("Unexpected call sequence at %p: %08lx\n", + ip, ppc_inst_as_ulong(op)); return -EINVAL; } @@ -564,7 +566,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; } diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 4f141daafcff..f2e47be05e8c 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -1616,11 +1616,11 @@ static int __init emulate_compute_instr(struct pt_regs *regs, if (analysed != 1 || GETTYPE(op.type) != COMPUTE) { if (negative) return -EFAULT; - pr_info("emulation failed, instruction = %s\n", ppc_inst_as_str(instr)); + pr_info("emulation failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); return -EFAULT; } if (analysed == 1 && negative) - pr_info("negative test failed, instruction = %s\n", ppc_inst_as_str(instr)); + pr_info("negative test failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); if (!negative) emulate_update_regs(regs, &op); return 0; @@ -1637,7 +1637,7 @@ static int __init execute_compute_instr(struct pt_regs *regs, /* Patch the NOP with the actual instruction */ patch_instruction_site(&patch__exec_instr, instr); if (exec_instr(regs)) { - pr_info("execution failed, instruction = %s\n", ppc_inst_as_str(instr)); + pr_info("execution failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); return -EFAULT; } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 3d9782ea3fa7..f80c714f1d49 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3047,7 +3047,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, dotted = 0; last_inst = inst; if (praddr) - printf(REG" %s", adr, ppc_inst_as_str(inst)); + printf(REG" %08lx", adr, ppc_inst_as_ulong(inst)); printf("\t"); if (!ppc_inst_prefixed(inst)) dump_func(ppc_inst_val(inst), adr); -- cgit From d9abe36df74976231baa5abc4d399d11b504ace2 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Tue, 31 May 2022 17:19:50 +0800 Subject: powerpc/papr_scm: use dev_get_drvdata Eliminate direct accesses to the driver_data field. Signed-off-by: Haowen Bai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1653988790-19999-1-git-send-email-baihaowen@meizu.com --- arch/powerpc/platforms/pseries/papr_scm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 82cae08976bc..69f21d30394b 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -354,7 +354,7 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, { struct papr_scm_perf_stat *stat; struct papr_scm_perf_stats *stats; - struct papr_scm_priv *p = (struct papr_scm_priv *)dev->driver_data; + struct papr_scm_priv *p = dev_get_drvdata(dev); int rc, size; /* Allocate request buffer enough to hold single performance stat */ -- cgit From 61bdbca855024997fd8c82dc190f458aa81beca8 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Mon, 23 May 2022 18:43:53 +0200 Subject: powerpc/64s: Don't read H_BLOCK_REMOVE characteristics in radix mode There is no need to read the H_BLOCK_REMOVE characteristics when running in Radix mode because this hcall is never called. Furthermore since the commit 387e220a2e5e ("powerpc/64s: Move hash MMU support code under CONFIG_PPC_64S_HASH_MMU") define pseries_lpar_read_hblkrm_characteristics as un empty function if CONFIG_PPC_64S_HASH_MMU is not set, the #ifdef block can be removed. Signed-off-by: Laurent Dufour Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220523164353.26441-1-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/setup.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e86a749173e6..4f08161f09b9 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -803,9 +803,8 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); pseries_setup_security_mitigations(); -#ifdef CONFIG_PPC_64S_HASH_MMU - pseries_lpar_read_hblkrm_characteristics(); -#endif + if (!radix_enabled()) + pseries_lpar_read_hblkrm_characteristics(); /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); -- cgit From 65112709115f48f16d7082bcabf173d08622e69f Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Fri, 10 Jun 2022 21:25:48 +0530 Subject: powerpc/bpf/64: add support for BPF_ATOMIC bitwise operations Adding instructions for ppc64 for atomic[64]_and atomic[64]_or atomic[64]_xor Signed-off-by: Hari Bathini Tested-by: Naveen N. Rao (ppc64le) Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610155552.25892-2-hbathini@linux.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 57 ++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 594c54931e20..c421bedd0e98 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -777,41 +777,42 @@ emit_clear: * BPF_STX ATOMIC (atomic ops) */ case BPF_STX | BPF_ATOMIC | BPF_W: - if (imm != BPF_ADD) { - pr_err_ratelimited( - "eBPF filter atomic op code %02x (@%d) unsupported\n", - code, i); - return -ENOTSUPP; - } - - /* *(u32 *)(dst + off) += src */ - - /* Get EA into TMP_REG_1 */ - EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off)); + case BPF_STX | BPF_ATOMIC | BPF_DW: + /* Get offset into TMP_REG_1 */ + EMIT(PPC_RAW_LI(tmp1_reg, off)); tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ - EMIT(PPC_RAW_LWARX(tmp2_reg, 0, tmp1_reg, 0)); - /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); - /* store result back */ - EMIT(PPC_RAW_STWCX(tmp2_reg, 0, tmp1_reg)); - /* we're done if this succeeded */ - PPC_BCC_SHORT(COND_NE, tmp_idx); - break; - case BPF_STX | BPF_ATOMIC | BPF_DW: - if (imm != BPF_ADD) { + if (size == BPF_DW) + EMIT(PPC_RAW_LDARX(tmp2_reg, tmp1_reg, dst_reg, 0)); + else + EMIT(PPC_RAW_LWARX(tmp2_reg, tmp1_reg, dst_reg, 0)); + + switch (imm) { + case BPF_ADD: + EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); + break; + case BPF_AND: + EMIT(PPC_RAW_AND(tmp2_reg, tmp2_reg, src_reg)); + break; + case BPF_OR: + EMIT(PPC_RAW_OR(tmp2_reg, tmp2_reg, src_reg)); + break; + case BPF_XOR: + EMIT(PPC_RAW_XOR(tmp2_reg, tmp2_reg, src_reg)); + break; + default: pr_err_ratelimited( "eBPF filter atomic op code %02x (@%d) unsupported\n", code, i); - return -ENOTSUPP; + return -EOPNOTSUPP; } - /* *(u64 *)(dst + off) += src */ - EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off)); - tmp_idx = ctx->idx * 4; - EMIT(PPC_RAW_LDARX(tmp2_reg, 0, tmp1_reg, 0)); - EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); - EMIT(PPC_RAW_STDCX(tmp2_reg, 0, tmp1_reg)); + /* store result back */ + if (size == BPF_DW) + EMIT(PPC_RAW_STDCX(tmp2_reg, tmp1_reg, dst_reg)); + else + EMIT(PPC_RAW_STWCX(tmp2_reg, tmp1_reg, dst_reg)); + /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); break; -- cgit From dbe6e2456fb0263a5a961a92836d2cebdbca979c Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Fri, 10 Jun 2022 21:25:49 +0530 Subject: powerpc/bpf/64: add support for atomic fetch operations Adding instructions for ppc64 for atomic[64]_fetch_add atomic[64]_fetch_and atomic[64]_fetch_or atomic[64]_fetch_xor Signed-off-by: Hari Bathini Tested-by: Naveen N. Rao (ppc64le) Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610155552.25892-3-hbathini@linux.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index c421bedd0e98..c53236b3a8b1 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -787,17 +787,25 @@ emit_clear: else EMIT(PPC_RAW_LWARX(tmp2_reg, tmp1_reg, dst_reg, 0)); + /* Save old value in _R0 */ + if (imm & BPF_FETCH) + EMIT(PPC_RAW_MR(_R0, tmp2_reg)); + switch (imm) { case BPF_ADD: + case BPF_ADD | BPF_FETCH: EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); break; case BPF_AND: + case BPF_AND | BPF_FETCH: EMIT(PPC_RAW_AND(tmp2_reg, tmp2_reg, src_reg)); break; case BPF_OR: + case BPF_OR | BPF_FETCH: EMIT(PPC_RAW_OR(tmp2_reg, tmp2_reg, src_reg)); break; case BPF_XOR: + case BPF_XOR | BPF_FETCH: EMIT(PPC_RAW_XOR(tmp2_reg, tmp2_reg, src_reg)); break; default: @@ -807,13 +815,17 @@ emit_clear: return -EOPNOTSUPP; } - /* store result back */ + /* store new value */ if (size == BPF_DW) EMIT(PPC_RAW_STDCX(tmp2_reg, tmp1_reg, dst_reg)); else EMIT(PPC_RAW_STWCX(tmp2_reg, tmp1_reg, dst_reg)); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); + + /* For the BPF_FETCH variant, get old value into src_reg */ + if (imm & BPF_FETCH) + EMIT(PPC_RAW_MR(src_reg, _R0)); break; /* -- cgit From 1e82dfaa7819f03f0b0022be7ca15bbc83090da1 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Fri, 10 Jun 2022 21:25:50 +0530 Subject: powerpc/bpf/64: Add instructions for atomic_[cmp]xchg This adds two atomic opcodes BPF_XCHG and BPF_CMPXCHG on ppc64, both of which include the BPF_FETCH flag. The kernel's atomic_cmpxchg operation fundamentally has 3 operands, but we only have two register fields. Therefore the operand we compare against (the kernel's API calls it 'old') is hard-coded to be BPF_REG_R0. Also, kernel's atomic_cmpxchg returns the previous value at dst_reg + off. JIT the same for BPF too with return value put in BPF_REG_0. BPF_REG_R0 = atomic_cmpxchg(dst_reg + off, BPF_REG_R0, src_reg); Signed-off-by: Hari Bathini Tested-by: Naveen N. Rao (ppc64le) Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610155552.25892-4-hbathini@linux.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index c53236b3a8b1..29ee306d6302 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -360,6 +360,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 size = BPF_SIZE(code); u32 tmp1_reg = bpf_to_ppc(TMP_REG_1); u32 tmp2_reg = bpf_to_ppc(TMP_REG_2); + u32 save_reg, ret_reg; s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -778,6 +779,9 @@ emit_clear: */ case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + save_reg = tmp2_reg; + ret_reg = src_reg; + /* Get offset into TMP_REG_1 */ EMIT(PPC_RAW_LI(tmp1_reg, off)); tmp_idx = ctx->idx * 4; @@ -808,6 +812,24 @@ emit_clear: case BPF_XOR | BPF_FETCH: EMIT(PPC_RAW_XOR(tmp2_reg, tmp2_reg, src_reg)); break; + case BPF_CMPXCHG: + /* + * Return old value in BPF_REG_0 for BPF_CMPXCHG & + * in src_reg for other cases. + */ + ret_reg = bpf_to_ppc(BPF_REG_0); + + /* Compare with old value in BPF_R0 */ + if (size == BPF_DW) + EMIT(PPC_RAW_CMPD(bpf_to_ppc(BPF_REG_0), tmp2_reg)); + else + EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), tmp2_reg)); + /* Don't set if different from old value */ + PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4); + fallthrough; + case BPF_XCHG: + save_reg = src_reg; + break; default: pr_err_ratelimited( "eBPF filter atomic op code %02x (@%d) unsupported\n", @@ -817,15 +839,22 @@ emit_clear: /* store new value */ if (size == BPF_DW) - EMIT(PPC_RAW_STDCX(tmp2_reg, tmp1_reg, dst_reg)); + EMIT(PPC_RAW_STDCX(save_reg, tmp1_reg, dst_reg)); else - EMIT(PPC_RAW_STWCX(tmp2_reg, tmp1_reg, dst_reg)); + EMIT(PPC_RAW_STWCX(save_reg, tmp1_reg, dst_reg)); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); - /* For the BPF_FETCH variant, get old value into src_reg */ - if (imm & BPF_FETCH) - EMIT(PPC_RAW_MR(src_reg, _R0)); + if (imm & BPF_FETCH) { + EMIT(PPC_RAW_MR(ret_reg, _R0)); + /* + * Skip unnecessary zero-extension for 32-bit cmpxchg. + * For context, see commit 39491867ace5. + */ + if (size != BPF_DW && imm == BPF_CMPXCHG && + insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; + } break; /* -- cgit From aea7ef8a82c0ea13ff20b65ff2edf8a38a17eda8 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Fri, 10 Jun 2022 21:25:51 +0530 Subject: powerpc/bpf/32: add support for BPF_ATOMIC bitwise operations Adding instructions for ppc32 for atomic_and atomic_or atomic_xor atomic_fetch_add atomic_fetch_and atomic_fetch_or atomic_fetch_xor Signed-off-by: Hari Bathini Tested-by: Naveen N. Rao (ppc64le) Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610155552.25892-5-hbathini@linux.ibm.com --- arch/powerpc/net/bpf_jit_comp32.c | 53 ++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index e46ed1e8c6ca..28dc6a1a8f2f 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -294,6 +294,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 dst_reg_h = dst_reg - 1; u32 src_reg = bpf_to_ppc(insn[i].src_reg); u32 src_reg_h = src_reg - 1; + u32 ax_reg = bpf_to_ppc(BPF_REG_AX); u32 tmp_reg = bpf_to_ppc(TMP_REG); u32 size = BPF_SIZE(code); s16 off = insn[i].off; @@ -798,25 +799,53 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * BPF_STX ATOMIC (atomic ops) */ case BPF_STX | BPF_ATOMIC | BPF_W: - if (imm != BPF_ADD) { - pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n", - code, i); - return -ENOTSUPP; - } - - /* *(u32 *)(dst + off) += src */ - bpf_set_seen_register(ctx, tmp_reg); + bpf_set_seen_register(ctx, ax_reg); + /* Get offset into TMP_REG */ EMIT(PPC_RAW_LI(tmp_reg, off)); + tmp_idx = ctx->idx * 4; /* load value from memory into r0 */ EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0)); - /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(_R0, _R0, src_reg)); - /* store result back */ + + /* Save old value in BPF_REG_AX */ + if (imm & BPF_FETCH) + EMIT(PPC_RAW_MR(ax_reg, _R0)); + + switch (imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + EMIT(PPC_RAW_ADD(_R0, _R0, src_reg)); + break; + case BPF_AND: + case BPF_AND | BPF_FETCH: + EMIT(PPC_RAW_AND(_R0, _R0, src_reg)); + break; + case BPF_OR: + case BPF_OR | BPF_FETCH: + EMIT(PPC_RAW_OR(_R0, _R0, src_reg)); + break; + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + EMIT(PPC_RAW_XOR(_R0, _R0, src_reg)); + break; + default: + pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n", + code, i); + return -EOPNOTSUPP; + } + + /* store new value */ EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg)); /* we're done if this succeeded */ - PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4); + PPC_BCC_SHORT(COND_NE, tmp_idx); + + /* For the BPF_FETCH variant, get old data into src_reg */ + if (imm & BPF_FETCH) { + EMIT(PPC_RAW_MR(src_reg, ax_reg)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(src_reg_h, 0)); + } break; case BPF_STX | BPF_ATOMIC | BPF_DW: /* *(u64 *)(dst + off) += src */ -- cgit From 2d9206b227434912582049c49af1085660fa1e50 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Fri, 10 Jun 2022 21:25:52 +0530 Subject: powerpc/bpf/32: Add instructions for atomic_[cmp]xchg This adds two atomic opcodes BPF_XCHG and BPF_CMPXCHG on ppc32, both of which include the BPF_FETCH flag. The kernel's atomic_cmpxchg operation fundamentally has 3 operands, but we only have two register fields. Therefore the operand we compare against (the kernel's API calls it 'old') is hard-coded to be BPF_REG_R0. Also, kernel's atomic_cmpxchg returns the previous value at dst_reg + off. JIT the same for BPF too with return value put in BPF_REG_0. BPF_REG_R0 = atomic_cmpxchg(dst_reg + off, BPF_REG_R0, src_reg); Signed-off-by: Hari Bathini Tested-by: Naveen N. Rao (ppc64le) Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610155552.25892-6-hbathini@linux.ibm.com --- arch/powerpc/net/bpf_jit_comp32.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 28dc6a1a8f2f..43f1c76d48ce 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -297,6 +297,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 ax_reg = bpf_to_ppc(BPF_REG_AX); u32 tmp_reg = bpf_to_ppc(TMP_REG); u32 size = BPF_SIZE(code); + u32 save_reg, ret_reg; s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -799,6 +800,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * BPF_STX ATOMIC (atomic ops) */ case BPF_STX | BPF_ATOMIC | BPF_W: + save_reg = _R0; + ret_reg = src_reg; + bpf_set_seen_register(ctx, tmp_reg); bpf_set_seen_register(ctx, ax_reg); @@ -829,6 +833,21 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_XOR | BPF_FETCH: EMIT(PPC_RAW_XOR(_R0, _R0, src_reg)); break; + case BPF_CMPXCHG: + /* + * Return old value in BPF_REG_0 for BPF_CMPXCHG & + * in src_reg for other cases. + */ + ret_reg = bpf_to_ppc(BPF_REG_0); + + /* Compare with old value in BPF_REG_0 */ + EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), _R0)); + /* Don't set if different from old value */ + PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4); + fallthrough; + case BPF_XCHG: + save_reg = src_reg; + break; default: pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n", code, i); @@ -836,15 +855,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } /* store new value */ - EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg)); + EMIT(PPC_RAW_STWCX(save_reg, tmp_reg, dst_reg)); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); /* For the BPF_FETCH variant, get old data into src_reg */ if (imm & BPF_FETCH) { - EMIT(PPC_RAW_MR(src_reg, ax_reg)); + EMIT(PPC_RAW_MR(ret_reg, ax_reg)); if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(src_reg_h, 0)); + EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */ } break; -- cgit From 4dee21e0f2520f5032b0abce0ecae593a71bd19d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 28 Jun 2022 18:02:28 +1000 Subject: KVM: PPC: Do not warn when userspace asked for too big TCE table KVM manages emulated TCE tables for guest LIOBNs by a two level table which maps up to 128TiB with 16MB IOMMU pages (enabled in QEMU by default) and MAX_ORDER=11 (the kernel's default). Note that the last level of the table is allocated when actual TCE is updated. However these tables are created via ioctl() on kvmfd and the userspace can trigger WARN_ON_ONCE_GFP(order >= MAX_ORDER, gfp) in mm/page_alloc.c and flood dmesg. This adds __GFP_NOWARN. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220628080228.1508847-1-aik@ozlabs.ru --- arch/powerpc/kvm/book3s_64_vio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index d6589c4fe889..40864373ef87 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -307,7 +307,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, return ret; ret = -ENOMEM; - stt = kzalloc(struct_size(stt, pages, npages), GFP_KERNEL); + stt = kzalloc(struct_size(stt, pages, npages), GFP_KERNEL | __GFP_NOWARN); if (!stt) goto fail_acct; -- cgit From a28a2eff1e0ff684f51b3dc6371ff5b651a063d4 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 1 Jun 2022 14:01:17 +1000 Subject: powerpc/pseries/iommu: Print ibm,query-pe-dma-windows parameters PowerVM has a stricter policy about allocating TCEs for LPARs and often there is not enough TCEs for 1:1 mapping, this adds the supported numbers into dev_info() to help analyzing bugreports. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220601040117.1467710-1-aik@ozlabs.ru --- arch/powerpc/platforms/pseries/iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index fba64304e859..2fb10ef6dee8 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1021,9 +1021,6 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out, cfg_addr, BUID_HI(buid), BUID_LO(buid)); - dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n", - ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid), - BUID_LO(buid), ret); switch (out_sz) { case 5: @@ -1041,6 +1038,11 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, break; } + dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d, lb=%llx ps=%x wn=%d\n", + ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid), + BUID_LO(buid), ret, query->largest_available_block, + query->page_size, query->windows_available); + return ret; } -- cgit From 5969e0c1c7e2132d8b2cf80168072b1195ddce46 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 29 Apr 2021 10:32:08 +0530 Subject: powerpc/perf: Update MMCR2 to support event exclude_idle struct perf_event_attr supports exclude counting of idle task. This is sent to kernel via perf_event_attr.exclude_idle and in perf tool, user can use ":I" event modifier to enable this for specific event. Monitor Mode Control Register 2 (MMCR2) SPR has control bits for each PMCs to freeze counting based on the Control Register CTRL[RUN] state. CTRL[RUN] is not set when idle task is running. Patch adds a check for event attr.exclude_idle to set MMCR2[FCnWAIT] bit. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210429050208.266619-1-maddy@linux.ibm.com --- arch/powerpc/perf/isa207-common.c | 3 +++ arch/powerpc/perf/isa207-common.h | 1 + 2 files changed, 4 insertions(+) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 42abbcfc73da..56301b2bc8ae 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -686,6 +686,9 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcr2 |= MMCR2_FCS(pmc); } + if (pevents[i]->attr.exclude_idle) + mmcr2 |= MMCR2_FCWAIT(pmc); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { if (pmc <= 4) { val = (event[i] >> p10_EVENT_MMCR3_SHIFT) & diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index ff122603989b..f594fa6580d1 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -249,6 +249,7 @@ /* Bits in MMCR2 for PowerISA v2.07 */ #define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9))) #define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9))) +#define MMCR2_FCWAIT(pmc) (1ull << (58 - (((pmc) - 1) * 9))) #define MMCR2_FCH(pmc) (1ull << (57 - (((pmc) - 1) * 9))) #define MAX_ALT 2 -- cgit From cea9d62b64c981b2a72e7166b21ba413fea16c83 Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Thu, 26 May 2022 08:57:36 +0200 Subject: powerpc: Kconfig: Replace tabs with whitespaces Replace tabs after keywords with whitespaces to be consistent. Signed-off-by: Juerg Haefliger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220526065737.86370-2-juerg.haefliger@canonical.com --- arch/powerpc/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 92e0cad7752d..02d040d1af85 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -11,7 +11,7 @@ config 64BIT config LIVEPATCH_64 def_bool PPC64 - depends on LIVEPATCH + depends on LIVEPATCH config MMU bool @@ -447,7 +447,7 @@ choice default MATH_EMULATION_FULL depends on MATH_EMULATION -config MATH_EMULATION_FULL +config MATH_EMULATION_FULL bool "Emulate all the floating point instructions" help Select this option will enable the kernel to support to emulate @@ -1250,7 +1250,7 @@ config PHYSICAL_START default "0x00000000" endif -config ARCH_RANDOM +config ARCH_RANDOM def_bool n config PPC_LIB_RHEAP -- cgit From d60cb5010cafc7889384ce49292a320f5bcd56ff Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Thu, 26 May 2022 08:57:37 +0200 Subject: powerpc: Kconfig.debug: Remove extra empty line Remove a stray extra empty line. Signed-off-by: Juerg Haefliger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220526065737.86370-3-juerg.haefliger@canonical.com --- arch/powerpc/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 6a8855d45af7..ae727d4218b9 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -305,7 +305,6 @@ config PPC_EARLY_DEBUG_OPAL def_bool y depends on PPC_EARLY_DEBUG_OPAL_RAW || PPC_EARLY_DEBUG_OPAL_HVSI - config PPC_EARLY_DEBUG_HVSI_VTERMNO hex "vterm number to use with early debug HVSI" depends on PPC_EARLY_DEBUG_LPAR_HVSI -- cgit From 1e2e5e82748aaf1fcc0e2a91e309793cd6cc5076 Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Fri, 20 May 2022 13:52:29 +0200 Subject: powerpc/powernv: Kconfig: Replace single quotes Replace single quotes with double quotes which seems to be the convention for strings. Signed-off-by: Juerg Haefliger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220520115229.147368-1-juergh@canonical.com --- arch/powerpc/platforms/powernv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 161dfe024085..ecc4a5806b42 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -20,7 +20,7 @@ config PPC_POWERNV default y config OPAL_PRD - tristate 'OPAL PRD driver' + tristate "OPAL PRD driver" depends on PPC_POWERNV help This enables the opal-prd driver, a facility to run processor -- cgit From 81e9685dd41384a39adda823df8b4f6e16ec2898 Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Fri, 20 May 2022 13:54:31 +0200 Subject: KVM: PPC: Kconfig: Fix indentation The convention for indentation seems to be a single tab. Help text is further indented by an additional two whitespaces. Fix the lines that violate these rules. Signed-off-by: Juerg Haefliger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220520115431.147593-1-juergh@canonical.com --- arch/powerpc/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index cedf1e0f50e1..dcb398d5e009 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -166,7 +166,7 @@ config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND Old nested HV capable Linux guests have a bug where they don't reflect the PMU in-use status of their L2 guest to the L0 host while the L2 PMU registers are live. This can result in loss - of L2 PMU register state, causing perf to not work correctly in + of L2 PMU register state, causing perf to not work correctly in L2 guests. Selecting this option for the L0 host implements a workaround for -- cgit From 54c15ec3b738c6086f2be001dae962ec412640e5 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Fri, 24 Jun 2022 10:55:50 +0200 Subject: powerpc: dts: Add DTS file for CZ.NIC Turris 1.x routers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CZ.NIC Turris 1.0 and 1.1 are open source routers, they have dual-core PowerPC Freescale P2020 CPU and are based on Freescale P2020RDB-PC-A board. Hardware design is fully open source, all firmware and hardware design files are available at Turris project website: https://docs.turris.cz/hw/turris-1x/turris-1x/ https://project.turris.cz/en/hardware.html Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220624085550.20570-1-pali@kernel.org --- arch/powerpc/boot/dts/turris1x.dts | 475 +++++++++++++++++++++++++++++++++++++ 1 file changed, 475 insertions(+) create mode 100644 arch/powerpc/boot/dts/turris1x.dts diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts new file mode 100644 index 000000000000..c76b628cf026 --- /dev/null +++ b/arch/powerpc/boot/dts/turris1x.dts @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Turris 1.x Device Tree Source + * + * Copyright 2013 - 2022 CZ.NIC z.s.p.o. (http://www.nic.cz/) + * + * Pinout, Schematics and Altium hardware design files are open source + * and available at: https://docs.turris.cz/hw/turris-1x/turris-1x/ + */ + +#include +#include +#include +/include/ "fsl/p2020si-pre.dtsi" + +/ { + model = "Turris 1.x"; + compatible = "cznic,turris1x", "fsl,P2020RDB-PC"; /* fsl,P2020RDB-PC is required for booting Linux */ + + aliases { + ethernet0 = &enet0; + ethernet1 = &enet1; + ethernet2 = &enet2; + serial0 = &serial0; + serial1 = &serial1; + pci0 = &pci0; + pci1 = &pci1; + pci2 = &pci2; + spi0 = &spi0; + }; + + memory { + device_type = "memory"; + }; + + soc: soc@ffe00000 { + ranges = <0x0 0x0 0xffe00000 0x00100000>; + + i2c@3000 { + /* PCA9557PW GPIO controller for boot config */ + gpio-controller@18 { + compatible = "nxp,pca9557"; + label = "bootcfg"; + reg = <0x18>; + #gpio-cells = <2>; + gpio-controller; + polarity = <0x00>; + }; + + /* STM32F030R8T6 MCU for power control */ + power-control@2a { + /* + * Turris Power Control firmware runs on STM32F0 MCU. + * This firmware is open source and available at: + * https://gitlab.nic.cz/turris/hw/turris_power_control + */ + reg = <0x2a>; + }; + + /* DDR3 SPD/EEPROM PSWP instruction */ + eeprom@32 { + reg = <0x32>; + }; + + /* SA56004ED temperature control */ + temperature-sensor@4c { + compatible = "nxp,sa56004"; + reg = <0x4c>; + interrupt-parent = <&gpio>; + interrupts = <12 IRQ_TYPE_LEVEL_LOW>, /* GPIO12 - ALERT pin */ + <13 IRQ_TYPE_LEVEL_LOW>; /* GPIO13 - CRIT pin */ + }; + + /* DDR3 SPD/EEPROM */ + eeprom@52 { + compatible = "atmel,spd"; + reg = <0x52>; + }; + + /* MCP79402-I/ST Protected EEPROM */ + eeprom@57 { + reg = <0x57>; + }; + + /* ATSHA204-TH-DA-T crypto module */ + crypto@64 { + compatible = "atmel,atsha204"; + reg = <0x64>; + }; + + /* IDT6V49205BNLGI clock generator */ + clock-generator@69 { + compatible = "idt,6v49205b"; + reg = <0x69>; + }; + + /* MCP79402-I/ST RTC */ + rtc@6f { + compatible = "microchip,mcp7940x"; + reg = <0x6f>; + interrupt-parent = <&gpio>; + interrupts = <14 0>; /* GPIO14 - MFP pin */ + }; + }; + + /* SPI on connector P1 */ + spi0: spi@7000 { + }; + + gpio: gpio-controller@fc00 { + #interrupt-cells = <2>; + interrupt-controller; + }; + + /* Connected to SMSC USB2412-DZK 2-Port USB 2.0 Hub Controller */ + usb@22000 { + phy_type = "ulpi"; + dr_mode = "host"; + }; + + enet0: ethernet@24000 { + /* Connected to port 6 of QCA8337N-AL3C switch */ + phy-connection-type = "rgmii-id"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + mdio@24520 { + /* KSZ9031RNXCA ethernet phy for WAN port */ + phy: ethernet-phy@7 { + interrupts = <3 1 0 0>; + reg = <0x7>; + }; + + /* QCA8337N-AL3C switch with integrated ethernet PHYs for LAN ports */ + switch@10 { + compatible = "qca,qca8337"; + interrupts = <2 1 0 0>; + reg = <0x10>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "cpu1"; + ethernet = <&enet1>; + phy-mode = "rgmii-id"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + port@1 { + reg = <1>; + label = "lan5"; + }; + + port@2 { + reg = <2>; + label = "lan4"; + }; + + port@3 { + reg = <3>; + label = "lan3"; + }; + + port@4 { + reg = <4>; + label = "lan2"; + }; + + port@5 { + reg = <5>; + label = "lan1"; + }; + + port@6 { + reg = <6>; + label = "cpu0"; + ethernet = <&enet0>; + phy-mode = "rgmii-id"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + }; + }; + + ptp_clock@24e00 { + fsl,tclk-period = <5>; + fsl,tmr-prsc = <200>; + fsl,tmr-add = <0xcccccccd>; + fsl,tmr-fiper1 = <0x3b9ac9fb>; + fsl,tmr-fiper2 = <0x0001869b>; + fsl,max-adj = <249999999>; + }; + + enet1: ethernet@25000 { + /* Connected to port 0 of QCA8337N-AL3C switch */ + phy-connection-type = "rgmii-id"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + mdio@25520 { + status = "disabled"; + }; + + enet2: ethernet@26000 { + /* Connected to KSZ9031RNXCA ethernet phy (WAN port) */ + label = "wan"; + phy-handle = <&phy>; + phy-connection-type = "rgmii-id"; + }; + + mdio@26520 { + status = "disabled"; + }; + + sdhc@2e000 { + bus-width = <4>; + cd-gpios = <&gpio 8 GPIO_ACTIVE_LOW>; + }; + }; + + lbc: localbus@ffe05000 { + reg = <0 0xffe05000 0 0x1000>; + + ranges = <0x0 0x0 0x0 0xef000000 0x01000000>, /* NOR */ + <0x1 0x0 0x0 0xff800000 0x00040000>, /* NAND */ + <0x3 0x0 0x0 0xffa00000 0x00020000>; /* CPLD */ + + /* S29GL128P90TFIR10 NOR */ + nor@0,0 { + compatible = "cfi-flash"; + reg = <0x0 0x0 0x01000000>; + bank-width = <2>; + device-width = <1>; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + /* 128 kB for Device Tree Blob */ + reg = <0x00000000 0x00020000>; + label = "dtb"; + }; + + partition@20000 { + /* 1.7 MB for Rescue Linux Kernel Image */ + reg = <0x00020000 0x001a0000>; + label = "rescue-kernel"; + }; + + partition@1c0000 { + /* 1.5 MB for Rescue JFFS2 Root File System */ + reg = <0x001c0000 0x00180000>; + label = "rescue-rootfs"; + }; + + partition@340000 { + /* 11 MB for TAR.XZ Backup with content of NAND Root File System */ + reg = <0x00340000 0x00b00000>; + label = "backup-rootfs"; + }; + + partition@e40000 { + /* 768 kB for Certificates JFFS2 File System */ + reg = <0x00e40000 0x000c0000>; + label = "certificates"; + }; + + /* free unused space 0x00f00000-0x00f20000 */ + + partition@f20000 { + /* 128 kB for U-Boot Environment Variables */ + reg = <0x00f20000 0x00020000>; + label = "u-boot-env"; + }; + + partition@f40000 { + /* 768 kB for U-Boot Bootloader Image */ + reg = <0x00f40000 0x000c0000>; + label = "u-boot"; + }; + }; + }; + + /* MT29F2G08ABAEAWP:E NAND */ + nand@1,0 { + compatible = "fsl,p2020-fcm-nand", "fsl,elbc-fcm-nand"; + reg = <0x1 0x0 0x00040000>; + nand-ecc-mode = "soft"; + nand-ecc-algo = "bch"; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + /* 256 MB for UBI with one volume: UBIFS Root File System */ + reg = <0x00000000 0x10000000>; + label = "rootfs"; + }; + }; + }; + + /* LCMXO1200C-3FTN256C FPGA */ + cpld@3,0 { + /* + * Turris CPLD firmware which runs on this Lattice FPGA, + * is extended version of P1021RDB-PC CPLD v4.1 firmware. + * It is backward compatible with its original version + * and the only extension is support for Turris LEDs. + * Turris CPLD firmware is open source and available at: + * https://gitlab.nic.cz/turris/hw/turris_cpld/-/blob/master/CZ_NIC_Router_CPLD.v + */ + compatible = "cznic,turris1x-cpld", "fsl,p1021rdb-pc-cpld", "simple-bus"; + reg = <0x3 0x0 0x30>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x3 0x0 0x00020000>; + + /* MAX6370KA+T watchdog */ + watchdog@2 { + /* + * CPLD firmware maps SET0, SET1 and SET2 + * input logic of MAX6370KA+T chip to CPLD + * memory space at byte offset 0x2. WDI + * input logic is outside of the CPLD and + * connected via external GPIO. + */ + compatible = "maxim,max6370"; + reg = <0x02 0x01>; + gpios = <&gpio 11 GPIO_ACTIVE_LOW>; + }; + + led-controller@13 { + /* + * LEDs are controlled by CPLD firmware. + * All five LAN LEDs share common RGB settings + * and so it is not possible to set different + * colors on different LAN ports. + */ + compatible = "cznic,turris1x-leds"; + reg = <0x13 0x1d>; + #address-cells = <1>; + #size-cells = <0>; + + multi-led@0 { + reg = <0x0>; + color = ; + function = LED_FUNCTION_WAN; + }; + + multi-led@1 { + reg = <0x1>; + color = ; + function = LED_FUNCTION_LAN; + function-enumerator = <5>; + }; + + multi-led@2 { + reg = <0x2>; + color = ; + function = LED_FUNCTION_LAN; + function-enumerator = <4>; + }; + + multi-led@3 { + reg = <0x3>; + color = ; + function = LED_FUNCTION_LAN; + function-enumerator = <3>; + }; + + multi-led@4 { + reg = <0x4>; + color = ; + function = LED_FUNCTION_LAN; + function-enumerator = <2>; + }; + + multi-led@5 { + reg = <0x5>; + color = ; + function = LED_FUNCTION_LAN; + function-enumerator = <1>; + }; + + multi-led@6 { + reg = <0x6>; + color = ; + function = LED_FUNCTION_WLAN; + }; + + multi-led@7 { + reg = <0x7>; + color = ; + function = LED_FUNCTION_POWER; + }; + }; + }; + }; + + pci2: pcie@ffe08000 { + /* + * PCIe bus for on-board TUSB7340RKM USB 3.0 xHCI controller. + * This xHCI controller is available only on Turris 1.1 boards. + * Turris 1.0 boards have nothing connected to this PCIe bus, + * so system would see only PCIe Root Port of this PCIe Root + * Complex. TUSB7340RKM xHCI controller has four SuperSpeed + * channels. Channel 0 is connected to the front USB 3.0 port, + * channel 1 (but only USB 2.0 subset) to USB 2.0 pins on mPCIe + * slot 1 (CN5), channels 2 and 3 to connector P600. + * + * P2020 PCIe Root Port uses 1MB of PCIe MEM and xHCI controller + * uses 64kB + 8kB of PCIe MEM. No PCIe IO is used or required. + * So allocate 2MB of PCIe MEM for this PCIe bus. + */ + reg = <0 0xffe08000 0 0x1000>; + ranges = <0x02000000 0x0 0xc0000000 0 0xc0000000 0x0 0x00200000>, /* MEM */ + <0x01000000 0x0 0x00000000 0 0xffc20000 0x0 0x00010000>; /* IO */ + + pcie@0 { + ranges; + }; + }; + + pci1: pcie@ffe09000 { + /* PCIe bus on mPCIe slot 2 (CN6) for expansion mPCIe card */ + reg = <0 0xffe09000 0 0x1000>; + ranges = <0x02000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000>, /* MEM */ + <0x01000000 0x0 0x00000000 0 0xffc10000 0x0 0x00010000>; /* IO */ + + pcie@0 { + ranges; + }; + }; + + pci0: pcie@ffe0a000 { + /* + * PCIe bus on mPCIe slot 1 (CN5) for expansion mPCIe card. + * Turris 1.1 boards have in this mPCIe slot additional USB 2.0 + * pins via channel 1 of TUSB7340RKM xHCI controller and also + * additional SIM card slot, both for USB-based WWAN cards. + */ + reg = <0 0xffe0a000 0 0x1000>; + ranges = <0x02000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000>, /* MEM */ + <0x01000000 0x0 0x00000000 0 0xffc00000 0x0 0x00010000>; /* IO */ + + pcie@0 { + ranges; + }; + }; +}; + +/include/ "fsl/p2020si-post.dtsi" -- cgit From 78988b273d592ce74c8aecdd5d748906c38a9e9d Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 10 Jun 2022 14:10:06 +0930 Subject: powerpc/perf: Give generic PMU a nice name When booting on a machine that uses the compat pmu driver we see this: [ 0.071192] GENERIC_COMPAT performance monitor hardware support registered Which is a bit shouty. Give it a nicer name. Signed-off-by: Joel Stanley Reviewed-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220610044006.2095806-1-joel@jms.id.au --- arch/powerpc/perf/generic-compat-pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index f3db88aee4dd..16392962c511 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -292,7 +292,7 @@ static int generic_compute_mmcr(u64 event[], int n_ev, } static struct power_pmu generic_compat_pmu = { - .name = "GENERIC_COMPAT", + .name = "ISAv3", .n_counter = MAX_PMU_COUNTERS, .add_fields = ISA207_ADD_FIELDS, .test_adder = ISA207_TEST_ADDER, -- cgit From 6320e693d98c7430653866b7ca6679338134cb79 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 20 May 2022 14:16:29 +0530 Subject: powerpc/perf: Add support for caps under sysfs in powerpc Add caps support under "/sys/bus/event_source/devices//" for powerpc. This directory can be used to expose some of the specific features that powerpc PMU supports to the user. Example: pmu_name. The name of PMU registered will depend on platform, say power9 or power10 or it could be Generic Compat PMU. Currently the only way to know which is the registered PMU is from the dmesg logs. But clearing the dmesg will make it difficult to know exact PMU backend used. And even extracting from dmesg will be complicated, as we need to parse the dmesg logs and add filters for pmu name. Whereas by exposing it via caps will make it easy as we just need to directly read it from the sysfs. Add a caps directory to /sys/bus/event_source/devices/cpu/ for power8, power9, power10 and generic compat PMU in respective PMU driver code. Update the pmu_name file under caps folder in core-book3s using "attr_update". The information exposed currently: - pmu_name : Underlying PMU name from the driver Example result with power9 pmu: # ls /sys/bus/event_source/devices/cpu/caps pmu_name # cat /sys/bus/event_source/devices/cpu/caps/pmu_name POWER9 Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220520084630.15181-1-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 31 +++++++++++++++++++++++++++++++ arch/powerpc/perf/generic-compat-pmu.c | 10 ++++++++++ arch/powerpc/perf/power10-pmu.c | 10 ++++++++++ arch/powerpc/perf/power8-pmu.c | 10 ++++++++++ arch/powerpc/perf/power9-pmu.c | 10 ++++++++++ 5 files changed, 71 insertions(+) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 03c64a0195df..13919eb96931 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2483,6 +2483,33 @@ static int power_pmu_prepare_cpu(unsigned int cpu) return 0; } +static ssize_t pmu_name_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + if (ppmu) + return sysfs_emit(buf, "%s", ppmu->name); + + return 0; +} + +static DEVICE_ATTR_RO(pmu_name); + +static struct attribute *pmu_caps_attrs[] = { + &dev_attr_pmu_name.attr, + NULL +}; + +static const struct attribute_group pmu_caps_group = { + .name = "caps", + .attrs = pmu_caps_attrs, +}; + +static const struct attribute_group *pmu_caps_groups[] = { + &pmu_caps_group, + NULL, +}; + int __init register_power_pmu(struct power_pmu *pmu) { if (ppmu) @@ -2493,6 +2520,10 @@ int __init register_power_pmu(struct power_pmu *pmu) pmu->name); power_pmu.attr_groups = ppmu->attr_groups; + + if (ppmu->flags & PPMU_ARCH_207S) + power_pmu.attr_update = pmu_caps_groups; + power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS); #ifdef MSR_HV diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index 16392962c511..b5c414876ed5 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -151,9 +151,19 @@ static const struct attribute_group generic_compat_pmu_format_group = { .attrs = generic_compat_pmu_format_attr, }; +static struct attribute *generic_compat_pmu_caps_attrs[] = { + NULL +}; + +static struct attribute_group generic_compat_pmu_caps_group = { + .name = "caps", + .attrs = generic_compat_pmu_caps_attrs, +}; + static const struct attribute_group *generic_compat_pmu_attr_groups[] = { &generic_compat_pmu_format_group, &generic_compat_pmu_events_group, + &generic_compat_pmu_caps_group, NULL, }; diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index c6d51e7093cf..d1adcd9f52e2 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -258,6 +258,15 @@ static const struct attribute_group power10_pmu_format_group = { .attrs = power10_pmu_format_attr, }; +static struct attribute *power10_pmu_caps_attrs[] = { + NULL +}; + +static struct attribute_group power10_pmu_caps_group = { + .name = "caps", + .attrs = power10_pmu_caps_attrs, +}; + static const struct attribute_group *power10_pmu_attr_groups_dd1[] = { &power10_pmu_format_group, &power10_pmu_events_group_dd1, @@ -267,6 +276,7 @@ static const struct attribute_group *power10_pmu_attr_groups_dd1[] = { static const struct attribute_group *power10_pmu_attr_groups[] = { &power10_pmu_format_group, &power10_pmu_events_group, + &power10_pmu_caps_group, NULL, }; diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index e37b1e714d2b..2518f5375d4a 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -187,9 +187,19 @@ static const struct attribute_group power8_pmu_events_group = { .attrs = power8_events_attr, }; +static struct attribute *power8_pmu_caps_attrs[] = { + NULL +}; + +static struct attribute_group power8_pmu_caps_group = { + .name = "caps", + .attrs = power8_pmu_caps_attrs, +}; + static const struct attribute_group *power8_pmu_attr_groups[] = { &isa207_pmu_format_group, &power8_pmu_events_group, + &power8_pmu_caps_group, NULL, }; diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 3ad40ffb9256..bc6d76bce97b 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -258,9 +258,19 @@ static const struct attribute_group power9_pmu_format_group = { .attrs = power9_pmu_format_attr, }; +static struct attribute *power9_pmu_caps_attrs[] = { + NULL +}; + +static struct attribute_group power9_pmu_caps_group = { + .name = "caps", + .attrs = power9_pmu_caps_attrs, +}; + static const struct attribute_group *power9_pmu_attr_groups[] = { &power9_pmu_format_group, &power9_pmu_events_group, + &power9_pmu_caps_group, NULL, }; -- cgit From 999d7c47a0f737157608b9b0d2d37bdd1afec58c Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 20 May 2022 14:16:30 +0530 Subject: docs: ABI: sysfs-bus-event_source-devices: Document sysfs caps entry for PMU Details is added about "caps" attribute group in the ABI documentation. This is used to expose some of the PMU attributes in "caps" directory under : /sys/bus/event_source/devices//. The dev/caps will contain information about features that platform specific PMU supports. Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220520084630.15181-2-atrajeev@linux.vnet.ibm.com --- .../ABI/testing/sysfs-bus-event_source-devices-caps | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-event_source-devices-caps diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps b/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps new file mode 100644 index 000000000000..8757dcf41c08 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps @@ -0,0 +1,18 @@ +What: /sys/bus/event_source/devices//caps +Date: May 2022 +KernelVersion: 5.19 +Contact: Linux kernel mailing list +Description: + Attribute group to describe the capabilities exposed + for a particular pmu. Each attribute of this group can + expose information specific to a PMU, say pmu_name, so that + userspace can understand some of the feature which the + platform specific PMU supports. + + One of the example available capability in supported platform + like Intel is pmu_name, which exposes underlying CPU name known + to the PMU driver. + + Example output in powerpc: + grep . /sys/bus/event_source/devices/cpu/caps/* + /sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9 -- cgit From 1978c48495171165453a7c90d1345c1d8d776d02 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Sun, 12 Jun 2022 23:34:00 +0200 Subject: powerpc/52xx: Mark gpt driver as not removable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning an error code (here -EBUSY) from a remove callback doesn't prevent the driver from being unloaded. The only effect is that an error message is emitted and the driver is removed anyhow. So instead drop the remove function (which is equivalent to returning zero) and set the suppress_bind_attrs property to make it impossible to unload the driver via sysfs. This is a preparation for making platform remove callbacks return void. Signed-off-by: Uwe Kleine-König Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220612213400.159257-1-u.kleine-koenig@pengutronix.de --- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 74baded88f47..e43e08d991ea 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -753,11 +753,6 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev) return 0; } -static int mpc52xx_gpt_remove(struct platform_device *ofdev) -{ - return -EBUSY; -} - static const struct of_device_id mpc52xx_gpt_match[] = { { .compatible = "fsl,mpc5200-gpt", }, @@ -770,10 +765,10 @@ static const struct of_device_id mpc52xx_gpt_match[] = { static struct platform_driver mpc52xx_gpt_driver = { .driver = { .name = "mpc52xx-gpt", + .suppress_bind_attrs = true, .of_match_table = mpc52xx_gpt_match, }, .probe = mpc52xx_gpt_probe, - .remove = mpc52xx_gpt_remove, }; static int __init mpc52xx_gpt_init(void) -- cgit From c6b2bd262b33aa2451f52aec2190131d1762945a Mon Sep 17 00:00:00 2001 From: Scott Cheloha Date: Wed, 13 Jul 2022 15:23:32 -0500 Subject: powerpc/pseries: hvcall.h: add H_WATCHDOG opcode, H_NOOP return code PAPR v2.12 defines a new hypercall, H_WATCHDOG. The hypercall permits guest control of one or more virtual watchdog timers. Add the opcode for the H_WATCHDOG hypercall to hvcall.h. While here, add a definition for H_NOOP, a possible return code for H_WATCHDOG. Signed-off-by: Scott Cheloha Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713202335.1217647-2-cheloha@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 1d454c70e7c6..4457abe31e6e 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -87,6 +87,7 @@ #define H_P7 -60 #define H_P8 -61 #define H_P9 -62 +#define H_NOOP -63 #define H_TOO_BIG -64 #define H_UNSUPPORTED -67 #define H_OVERLAP -68 @@ -324,7 +325,8 @@ #define H_RPT_INVALIDATE 0x448 #define H_SCM_FLUSH 0x44C #define H_GET_ENERGY_SCALE_INFO 0x450 -#define MAX_HCALL_OPCODE H_GET_ENERGY_SCALE_INFO +#define H_WATCHDOG 0x45C +#define MAX_HCALL_OPCODE H_WATCHDOG /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) -- cgit From 1621563ec62ff143c7b817dd5eab0884cdfaf89d Mon Sep 17 00:00:00 2001 From: Scott Cheloha Date: Wed, 13 Jul 2022 15:23:33 -0500 Subject: powerpc/pseries: add FW_FEATURE_WATCHDOG flag PAPR v2.12 specifies a new optional function set, "hcall-watchdog", for the /rtas/ibm,hypertas-functions property. The presence of this function set indicates support for the H_WATCHDOG hypercall. Check for this function set and, if present, set the new FW_FEATURE_WATCHDOG flag. Signed-off-by: Scott Cheloha Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713202335.1217647-3-cheloha@linux.ibm.com --- arch/powerpc/include/asm/firmware.h | 3 ++- arch/powerpc/platforms/pseries/firmware.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 8dddd34b8ecf..398e0b5e485f 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -55,6 +55,7 @@ #define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000) #define FW_FEATURE_FORM2_AFFINITY ASM_CONST(0x0000020000000000) #define FW_FEATURE_ENERGY_SCALE_INFO ASM_CONST(0x0000040000000000) +#define FW_FEATURE_WATCHDOG ASM_CONST(0x0000080000000000) #ifndef __ASSEMBLY__ @@ -76,7 +77,7 @@ enum { FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR | FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY | - FW_FEATURE_ENERGY_SCALE_INFO, + FW_FEATURE_ENERGY_SCALE_INFO | FW_FEATURE_WATCHDOG, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR, FW_FEATURE_POWERNV_ALWAYS = 0, diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index 09c119b2f623..080108d129ed 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -67,6 +67,7 @@ hypertas_fw_features_table[] = { {FW_FEATURE_PAPR_SCM, "hcall-scm"}, {FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"}, {FW_FEATURE_ENERGY_SCALE_INFO, "hcall-energy-scale-info"}, + {FW_FEATURE_WATCHDOG, "hcall-watchdog"}, }; /* Build up the firmware features bitmask using the contents of -- cgit From 578030bfe117060bf86c81aaa7b3faead4589810 Mon Sep 17 00:00:00 2001 From: Scott Cheloha Date: Wed, 13 Jul 2022 15:23:34 -0500 Subject: powerpc/pseries: register pseries-wdt device with platform bus PAPR v2.12 defines a new hypercall, H_WATCHDOG. The hypercall permits guest control of one or more virtual watchdog timers. These timers do not conform to PowerPC device conventions. They are not affixed to any extant bus, nor do they have full representation in the device tree. As a workaround we represent them as platform devices. This patch registers a single platform device, "pseries-wdt", with the platform bus if the FW_FEATURE_WATCHDOG flag is set. A driver for this device, "pseries-wdt", will be introduced in a subsequent patch. Signed-off-by: Scott Cheloha Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713202335.1217647-4-cheloha@linux.ibm.com --- arch/powerpc/platforms/pseries/setup.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index a74c425be624..489f4c4df468 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -170,6 +171,18 @@ static void __init fwnmi_init(void) #endif } +/* + * Affix a device for the first timer to the platform bus if + * we have firmware support for the H_WATCHDOG hypercall. + */ +static __init int pseries_wdt_init(void) +{ + if (firmware_has_feature(FW_FEATURE_WATCHDOG)) + platform_device_register_simple("pseries-wdt", 0, NULL, 0); + return 0; +} +machine_subsys_initcall(pseries, pseries_wdt_init); + static void pseries_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); -- cgit From 69472ffa6575e3a1c1e3324dd06395af0f63eb71 Mon Sep 17 00:00:00 2001 From: Scott Cheloha Date: Wed, 13 Jul 2022 15:23:35 -0500 Subject: watchdog/pseries-wdt: initial support for H_WATCHDOG-based watchdog timers PAPR v2.12 defines a new hypercall, H_WATCHDOG. The hypercall permits guest control of one or more virtual watchdog timers. The timers have millisecond granularity. The guest is terminated when a timer expires. This patch adds a watchdog driver for these timers, "pseries-wdt". pseries_wdt_probe() currently assumes the existence of only one platform device and always assigns it watchdogNumber 1. If we ever expose more than one timer to userspace we will need to devise a way to assign a distinct watchdogNumber to each platform device at device registration time. Signed-off-by: Scott Cheloha Acked-by: Guenter Roeck Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713202335.1217647-5-cheloha@linux.ibm.com --- Documentation/watchdog/watchdog-parameters.rst | 12 ++ drivers/watchdog/Kconfig | 8 + drivers/watchdog/Makefile | 1 + drivers/watchdog/pseries-wdt.c | 239 +++++++++++++++++++++++++ 4 files changed, 260 insertions(+) create mode 100644 drivers/watchdog/pseries-wdt.c diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst index 223c99361a30..29153eed6689 100644 --- a/Documentation/watchdog/watchdog-parameters.rst +++ b/Documentation/watchdog/watchdog-parameters.rst @@ -425,6 +425,18 @@ pnx833x_wdt: ------------------------------------------------- +pseries-wdt: + action: + Action taken when watchdog expires: 0 (power off), 1 (restart), + 2 (dump and restart). (default=1) + timeout: + Initial watchdog timeout in seconds. (default=60) + nowayout: + Watchdog cannot be stopped once started. + (default=kernel config parameter) + +------------------------------------------------- + rc32434_wdt: timeout: Watchdog timeout value, in seconds (default=20) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 32fd37698932..a2429604a4ab 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1962,6 +1962,14 @@ config MEN_A21_WDT # PPC64 Architecture +config PSERIES_WDT + tristate "POWER Architecture Platform Watchdog Timer" + depends on PPC_PSERIES + select WATCHDOG_CORE + help + Driver for virtual watchdog timers provided by PAPR + hypervisors (e.g. PowerVM, KVM). + config WATCHDOG_RTAS tristate "RTAS watchdog" depends on PPC_RTAS diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index c324e9d820e9..cdeb119e6e61 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -187,6 +187,7 @@ obj-$(CONFIG_BOOKE_WDT) += booke_wdt.o obj-$(CONFIG_MEN_A21_WDT) += mena21_wdt.o # PPC64 Architecture +obj-$(CONFIG_PSERIES_WDT) += pseries-wdt.o obj-$(CONFIG_WATCHDOG_RTAS) += wdrtas.o # S390 Architecture diff --git a/drivers/watchdog/pseries-wdt.c b/drivers/watchdog/pseries-wdt.c new file mode 100644 index 000000000000..7f53b5293409 --- /dev/null +++ b/drivers/watchdog/pseries-wdt.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 International Business Machines, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pseries-wdt" + +/* + * H_WATCHDOG Input + * + * R4: "flags": + * + * Bits 48-55: "operation" + */ +#define PSERIES_WDTF_OP_START 0x100UL /* start timer */ +#define PSERIES_WDTF_OP_STOP 0x200UL /* stop timer */ +#define PSERIES_WDTF_OP_QUERY 0x300UL /* query timer capabilities */ + +/* + * Bits 56-63: "timeoutAction" (for "Start Watchdog" only) + */ +#define PSERIES_WDTF_ACTION_HARD_POWEROFF 0x1UL /* poweroff */ +#define PSERIES_WDTF_ACTION_HARD_RESTART 0x2UL /* restart */ +#define PSERIES_WDTF_ACTION_DUMP_RESTART 0x3UL /* dump + restart */ + +/* + * H_WATCHDOG Output + * + * R3: Return code + * + * H_SUCCESS The operation completed. + * + * H_BUSY The hypervisor is too busy; retry the operation. + * + * H_PARAMETER The given "flags" are somehow invalid. Either the + * "operation" or "timeoutAction" is invalid, or a + * reserved bit is set. + * + * H_P2 The given "watchdogNumber" is zero or exceeds the + * supported maximum value. + * + * H_P3 The given "timeoutInMs" is below the supported + * minimum value. + * + * H_NOOP The given "watchdogNumber" is already stopped. + * + * H_HARDWARE The operation failed for ineffable reasons. + * + * H_FUNCTION The H_WATCHDOG hypercall is not supported by this + * hypervisor. + * + * R4: + * + * - For the "Query Watchdog Capabilities" operation, a 64-bit + * structure: + */ +#define PSERIES_WDTQ_MIN_TIMEOUT(cap) (((cap) >> 48) & 0xffff) +#define PSERIES_WDTQ_MAX_NUMBER(cap) (((cap) >> 32) & 0xffff) + +static const unsigned long pseries_wdt_action[] = { + [0] = PSERIES_WDTF_ACTION_HARD_POWEROFF, + [1] = PSERIES_WDTF_ACTION_HARD_RESTART, + [2] = PSERIES_WDTF_ACTION_DUMP_RESTART, +}; + +#define WATCHDOG_ACTION 1 +static unsigned int action = WATCHDOG_ACTION; +module_param(action, uint, 0444); +MODULE_PARM_DESC(action, "Action taken when watchdog expires (default=" + __MODULE_STRING(WATCHDOG_ACTION) ")"); + +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0444); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +#define WATCHDOG_TIMEOUT 60 +static unsigned int timeout = WATCHDOG_TIMEOUT; +module_param(timeout, uint, 0444); +MODULE_PARM_DESC(timeout, "Initial watchdog timeout in seconds (default=" + __MODULE_STRING(WATCHDOG_TIMEOUT) ")"); + +struct pseries_wdt { + struct watchdog_device wd; + unsigned long action; + unsigned long num; /* Watchdog numbers are 1-based */ +}; + +static int pseries_wdt_start(struct watchdog_device *wdd) +{ + struct pseries_wdt *pw = watchdog_get_drvdata(wdd); + struct device *dev = wdd->parent; + unsigned long flags, msecs; + long rc; + + flags = pw->action | PSERIES_WDTF_OP_START; + msecs = wdd->timeout * MSEC_PER_SEC; + rc = plpar_hcall_norets(H_WATCHDOG, flags, pw->num, msecs); + if (rc != H_SUCCESS) { + dev_crit(dev, "H_WATCHDOG: %ld: failed to start timer %lu", + rc, pw->num); + return -EIO; + } + return 0; +} + +static int pseries_wdt_stop(struct watchdog_device *wdd) +{ + struct pseries_wdt *pw = watchdog_get_drvdata(wdd); + struct device *dev = wdd->parent; + long rc; + + rc = plpar_hcall_norets(H_WATCHDOG, PSERIES_WDTF_OP_STOP, pw->num); + if (rc != H_SUCCESS && rc != H_NOOP) { + dev_crit(dev, "H_WATCHDOG: %ld: failed to stop timer %lu", + rc, pw->num); + return -EIO; + } + return 0; +} + +static struct watchdog_info pseries_wdt_info = { + .identity = DRV_NAME, + .options = WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT + | WDIOF_PRETIMEOUT, +}; + +static const struct watchdog_ops pseries_wdt_ops = { + .owner = THIS_MODULE, + .start = pseries_wdt_start, + .stop = pseries_wdt_stop, +}; + +static int pseries_wdt_probe(struct platform_device *pdev) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct pseries_wdt *pw; + unsigned long cap; + long msecs, rc; + int err; + + rc = plpar_hcall(H_WATCHDOG, ret, PSERIES_WDTF_OP_QUERY); + if (rc == H_FUNCTION) + return -ENODEV; + if (rc != H_SUCCESS) + return -EIO; + cap = ret[0]; + + pw = devm_kzalloc(&pdev->dev, sizeof(*pw), GFP_KERNEL); + if (!pw) + return -ENOMEM; + + /* + * Assume watchdogNumber 1 for now. If we ever support + * multiple timers we will need to devise a way to choose a + * distinct watchdogNumber for each platform device at device + * registration time. + */ + pw->num = 1; + if (PSERIES_WDTQ_MAX_NUMBER(cap) < pw->num) + return -ENODEV; + + if (action >= ARRAY_SIZE(pseries_wdt_action)) + return -EINVAL; + pw->action = pseries_wdt_action[action]; + + pw->wd.parent = &pdev->dev; + pw->wd.info = &pseries_wdt_info; + pw->wd.ops = &pseries_wdt_ops; + msecs = PSERIES_WDTQ_MIN_TIMEOUT(cap); + pw->wd.min_timeout = DIV_ROUND_UP(msecs, MSEC_PER_SEC); + pw->wd.max_timeout = UINT_MAX / 1000; /* from linux/watchdog.h */ + pw->wd.timeout = timeout; + if (watchdog_init_timeout(&pw->wd, 0, NULL)) + return -EINVAL; + watchdog_set_nowayout(&pw->wd, nowayout); + watchdog_stop_on_reboot(&pw->wd); + watchdog_stop_on_unregister(&pw->wd); + watchdog_set_drvdata(&pw->wd, pw); + + err = devm_watchdog_register_device(&pdev->dev, &pw->wd); + if (err) + return err; + + platform_set_drvdata(pdev, &pw->wd); + + return 0; +} + +static int pseries_wdt_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct watchdog_device *wd = platform_get_drvdata(pdev); + + if (watchdog_active(wd)) + return pseries_wdt_stop(wd); + return 0; +} + +static int pseries_wdt_resume(struct platform_device *pdev) +{ + struct watchdog_device *wd = platform_get_drvdata(pdev); + + if (watchdog_active(wd)) + return pseries_wdt_start(wd); + return 0; +} + +static const struct platform_device_id pseries_wdt_id[] = { + { .name = "pseries-wdt" }, + {} +}; +MODULE_DEVICE_TABLE(platform, pseries_wdt_id); + +static struct platform_driver pseries_wdt_driver = { + .driver = { + .name = DRV_NAME, + }, + .id_table = pseries_wdt_id, + .probe = pseries_wdt_probe, + .resume = pseries_wdt_resume, + .suspend = pseries_wdt_suspend, +}; +module_platform_driver(pseries_wdt_driver); + +MODULE_AUTHOR("Alexey Kardashevskiy"); +MODULE_AUTHOR("Scott Cheloha"); +MODULE_DESCRIPTION("POWER Architecture Platform Watchdog Driver"); +MODULE_LICENSE("GPL"); -- cgit From fd97e4ad6d3b0c9fce3bca8ea8e6969d9ce7423b Mon Sep 17 00:00:00 2001 From: Ning Qiang Date: Wed, 13 Jul 2022 23:37:34 +0800 Subject: macintosh/adb: fix oob read in do_adb_query() function In do_adb_query() function of drivers/macintosh/adb.c, req->data is copied form userland. The parameter "req->data[2]" is missing check, the array size of adb_handler[] is 16, so adb_handler[req->data[2]].original_address and adb_handler[req->data[2]].handler_id will lead to oob read. Cc: stable Signed-off-by: Ning Qiang Reviewed-by: Kees Cook Reviewed-by: Greg Kroah-Hartman Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713153734.2248-1-sohu0106@126.com --- drivers/macintosh/adb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 439fab4eaa85..1bbb9ca08d40 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -647,7 +647,7 @@ do_adb_query(struct adb_request *req) switch(req->data[1]) { case ADB_QUERY_GETDEVINFO: - if (req->nbytes < 3) + if (req->nbytes < 3 || req->data[2] >= 16) break; mutex_lock(&adb_handler_mutex); req->reply[0] = adb_handler[req->data[2]].original_address; -- cgit From 4c9da83011c455c0791b1f5e4e84d454d4f4ae3c Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Mon, 11 Jul 2022 19:36:16 -0300 Subject: KVM: PPC: Book3S HV: Remove kvmhv_p9_[set,restore]_lpcr declarations The commit b1b1697ae0cc ("KVM: PPC: Book3S HV: Remove support for running HPT guest on RPT host without mixed mode support") removed the last references to these functions. Fixes: b1b1697ae0cc ("KVM: PPC: Book3S HV: Remove support for running HPT guest on RPT host without mixed mode support") Signed-off-by: Murilo Opsfelder Araujo Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220711223617.63625-2-muriloo@linux.ibm.com --- arch/powerpc/include/asm/kvm_book3s.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 91c9f937edcd..ff1336ab4c47 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -281,8 +281,6 @@ extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu); long kvmppc_read_intr(void); void kvmppc_bad_interrupt(struct pt_regs *regs); -void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip); -void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip); void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr); void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); -- cgit From b8c7ee79b1a37442a910b8a313045fb9aa639911 Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Mon, 11 Jul 2022 19:36:17 -0300 Subject: KVM: PPC: Book3s HV: Remove unused function kvmppc_bad_interrupt The commit fae5c9f3664b ("KVM: PPC: Book3S HV: remove ISA v3.0 and v3.1 support from P7/8 path") removed the last reference to the function. Fixes: fae5c9f3664b ("KVM: PPC: Book3S HV: remove ISA v3.0 and v3.1 support from P7/8 path") Signed-off-by: Murilo Opsfelder Araujo Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220711223617.63625-3-muriloo@linux.ibm.com --- arch/powerpc/include/asm/kvm_book3s.h | 1 - arch/powerpc/kvm/book3s_hv_builtin.c | 18 ------------------ 2 files changed, 19 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index ff1336ab4c47..bbf5e2c5fe09 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -280,7 +280,6 @@ extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu); long kvmppc_read_intr(void); -void kvmppc_bad_interrupt(struct pt_regs *regs); void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr); void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 88a8f6473c4e..c15c6faedce5 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -489,24 +489,6 @@ static long kvmppc_read_one_intr(bool *again) return kvmppc_check_passthru(xisr, xirr, again); } -void kvmppc_bad_interrupt(struct pt_regs *regs) -{ - /* - * 100 could happen at any time, 200 can happen due to invalid real - * address access for example (or any time due to a hardware problem). - */ - if (TRAP(regs) == 0x100) { - get_paca()->in_nmi++; - system_reset_exception(regs); - get_paca()->in_nmi--; - } else if (TRAP(regs) == 0x200) { - machine_check_exception(regs); - } else { - die("Bad interrupt in KVM entry/exit code", regs, SIGABRT); - } - panic("Bad KVM trap"); -} - static void kvmppc_end_cede(struct kvm_vcpu *vcpu) { vcpu->arch.ceded = 0; -- cgit From db5360840f09eded71009a981084ab10a93a08d8 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 11 Jul 2022 09:19:26 +0530 Subject: powerpc/kvm: Move pmu code in kvm folder to separate file for power9 and later platforms File book3s_hv_p9_entry.c in powerpc/kvm folder consists of functions like freeze_pmu, switch_pmu_to_guest and switch_pmu_to_host which are specific to Performance Monitoring Unit(PMU) for power9 and later platforms. For better maintenance, moving pmu related code from book3s_hv_p9_entry.c to a new file called book3s_hv_p9_perf.c, without any logic change. Also make corresponding changes in the Makefile to include book3s_hv_p9_perf.c during compilation. Signed-off-by: Kajol Jain Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220711034927.213192-1-kjain@linux.ibm.com --- arch/powerpc/kvm/Makefile | 1 + arch/powerpc/kvm/book3s_hv_p9_entry.c | 221 --------------------------------- arch/powerpc/kvm/book3s_hv_p9_perf.c | 225 ++++++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+), 221 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_hv_p9_perf.c diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 0cd23ce07d68..5319d889b184 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -86,6 +86,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ book3s_hv_builtin.o \ + book3s_hv_p9_perf.o \ $(kvm-book3s_64-builtin-tm-objs-y) \ $(kvm-book3s_64-builtin-xics-objs-y) endif diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index e740eca45862..34f1db212824 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -3,231 +3,10 @@ #include #include #include -#include -#include #include #include "book3s_hv.h" -static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) -{ - if (!(mmcr0 & MMCR0_FC)) - goto do_freeze; - if (mmcra & MMCRA_SAMPLE_ENABLE) - goto do_freeze; - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - if (!(mmcr0 & MMCR0_PMCCEXT)) - goto do_freeze; - if (!(mmcra & MMCRA_BHRB_DISABLE)) - goto do_freeze; - } - return; - -do_freeze: - mmcr0 = MMCR0_FC; - mmcra = 0; - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - mmcr0 |= MMCR0_PMCCEXT; - mmcra = MMCRA_BHRB_DISABLE; - } - - mtspr(SPRN_MMCR0, mmcr0); - mtspr(SPRN_MMCRA, mmcra); - isync(); -} - -void switch_pmu_to_guest(struct kvm_vcpu *vcpu, - struct p9_host_os_sprs *host_os_sprs) -{ - struct lppaca *lp; - int load_pmu = 1; - - lp = vcpu->arch.vpa.pinned_addr; - if (lp) - load_pmu = lp->pmcregs_in_use; - - /* Save host */ - if (ppc_get_pmu_inuse()) { - /* - * It might be better to put PMU handling (at least for the - * host) in the perf subsystem because it knows more about what - * is being used. - */ - - /* POWER9, POWER10 do not implement HPMC or SPMC */ - - host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0); - host_os_sprs->mmcra = mfspr(SPRN_MMCRA); - - freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra); - - host_os_sprs->pmc1 = mfspr(SPRN_PMC1); - host_os_sprs->pmc2 = mfspr(SPRN_PMC2); - host_os_sprs->pmc3 = mfspr(SPRN_PMC3); - host_os_sprs->pmc4 = mfspr(SPRN_PMC4); - host_os_sprs->pmc5 = mfspr(SPRN_PMC5); - host_os_sprs->pmc6 = mfspr(SPRN_PMC6); - host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1); - host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2); - host_os_sprs->sdar = mfspr(SPRN_SDAR); - host_os_sprs->siar = mfspr(SPRN_SIAR); - host_os_sprs->sier1 = mfspr(SPRN_SIER); - - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3); - host_os_sprs->sier2 = mfspr(SPRN_SIER2); - host_os_sprs->sier3 = mfspr(SPRN_SIER3); - } - } - -#ifdef CONFIG_PPC_PSERIES - /* After saving PMU, before loading guest PMU, flip pmcregs_in_use */ - if (kvmhv_on_pseries()) { - barrier(); - get_lppaca()->pmcregs_in_use = load_pmu; - barrier(); - } -#endif - - /* - * Load guest. If the VPA said the PMCs are not in use but the guest - * tried to access them anyway, HFSCR[PM] will be set by the HFAC - * fault so we can make forward progress. - */ - if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) { - mtspr(SPRN_PMC1, vcpu->arch.pmc[0]); - mtspr(SPRN_PMC2, vcpu->arch.pmc[1]); - mtspr(SPRN_PMC3, vcpu->arch.pmc[2]); - mtspr(SPRN_PMC4, vcpu->arch.pmc[3]); - mtspr(SPRN_PMC5, vcpu->arch.pmc[4]); - mtspr(SPRN_PMC6, vcpu->arch.pmc[5]); - mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]); - mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]); - mtspr(SPRN_SDAR, vcpu->arch.sdar); - mtspr(SPRN_SIAR, vcpu->arch.siar); - mtspr(SPRN_SIER, vcpu->arch.sier[0]); - - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]); - mtspr(SPRN_SIER2, vcpu->arch.sier[1]); - mtspr(SPRN_SIER3, vcpu->arch.sier[2]); - } - - /* Set MMCRA then MMCR0 last */ - mtspr(SPRN_MMCRA, vcpu->arch.mmcra); - mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]); - /* No isync necessary because we're starting counters */ - - if (!vcpu->arch.nested && - (vcpu->arch.hfscr_permitted & HFSCR_PM)) - vcpu->arch.hfscr |= HFSCR_PM; - } -} -EXPORT_SYMBOL_GPL(switch_pmu_to_guest); - -void switch_pmu_to_host(struct kvm_vcpu *vcpu, - struct p9_host_os_sprs *host_os_sprs) -{ - struct lppaca *lp; - int save_pmu = 1; - - lp = vcpu->arch.vpa.pinned_addr; - if (lp) - save_pmu = lp->pmcregs_in_use; - if (IS_ENABLED(CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND)) { - /* - * Save pmu if this guest is capable of running nested guests. - * This is option is for old L1s that do not set their - * lppaca->pmcregs_in_use properly when entering their L2. - */ - save_pmu |= nesting_enabled(vcpu->kvm); - } - - if (save_pmu) { - vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0); - vcpu->arch.mmcra = mfspr(SPRN_MMCRA); - - freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra); - - vcpu->arch.pmc[0] = mfspr(SPRN_PMC1); - vcpu->arch.pmc[1] = mfspr(SPRN_PMC2); - vcpu->arch.pmc[2] = mfspr(SPRN_PMC3); - vcpu->arch.pmc[3] = mfspr(SPRN_PMC4); - vcpu->arch.pmc[4] = mfspr(SPRN_PMC5); - vcpu->arch.pmc[5] = mfspr(SPRN_PMC6); - vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1); - vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2); - vcpu->arch.sdar = mfspr(SPRN_SDAR); - vcpu->arch.siar = mfspr(SPRN_SIAR); - vcpu->arch.sier[0] = mfspr(SPRN_SIER); - - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3); - vcpu->arch.sier[1] = mfspr(SPRN_SIER2); - vcpu->arch.sier[2] = mfspr(SPRN_SIER3); - } - - } else if (vcpu->arch.hfscr & HFSCR_PM) { - /* - * The guest accessed PMC SPRs without specifying they should - * be preserved, or it cleared pmcregs_in_use after the last - * access. Just ensure they are frozen. - */ - freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA)); - - /* - * Demand-fault PMU register access in the guest. - * - * This is used to grab the guest's VPA pmcregs_in_use value - * and reflect it into the host's VPA in the case of a nested - * hypervisor. - * - * It also avoids having to zero-out SPRs after each guest - * exit to avoid side-channels when. - * - * This is cleared here when we exit the guest, so later HFSCR - * interrupt handling can add it back to run the guest with - * PM enabled next time. - */ - if (!vcpu->arch.nested) - vcpu->arch.hfscr &= ~HFSCR_PM; - } /* otherwise the PMU should still be frozen */ - -#ifdef CONFIG_PPC_PSERIES - if (kvmhv_on_pseries()) { - barrier(); - get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); - barrier(); - } -#endif - - if (ppc_get_pmu_inuse()) { - mtspr(SPRN_PMC1, host_os_sprs->pmc1); - mtspr(SPRN_PMC2, host_os_sprs->pmc2); - mtspr(SPRN_PMC3, host_os_sprs->pmc3); - mtspr(SPRN_PMC4, host_os_sprs->pmc4); - mtspr(SPRN_PMC5, host_os_sprs->pmc5); - mtspr(SPRN_PMC6, host_os_sprs->pmc6); - mtspr(SPRN_MMCR1, host_os_sprs->mmcr1); - mtspr(SPRN_MMCR2, host_os_sprs->mmcr2); - mtspr(SPRN_SDAR, host_os_sprs->sdar); - mtspr(SPRN_SIAR, host_os_sprs->siar); - mtspr(SPRN_SIER, host_os_sprs->sier1); - - if (cpu_has_feature(CPU_FTR_ARCH_31)) { - mtspr(SPRN_MMCR3, host_os_sprs->mmcr3); - mtspr(SPRN_SIER2, host_os_sprs->sier2); - mtspr(SPRN_SIER3, host_os_sprs->sier3); - } - - /* Set MMCRA then MMCR0 last */ - mtspr(SPRN_MMCRA, host_os_sprs->mmcra); - mtspr(SPRN_MMCR0, host_os_sprs->mmcr0); - isync(); - } -} -EXPORT_SYMBOL_GPL(switch_pmu_to_host); - static void load_spr_state(struct kvm_vcpu *vcpu, struct p9_host_os_sprs *host_os_sprs) { diff --git a/arch/powerpc/kvm/book3s_hv_p9_perf.c b/arch/powerpc/kvm/book3s_hv_p9_perf.c new file mode 100644 index 000000000000..da3135cab9ea --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_p9_perf.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include "book3s_hv.h" + +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) +{ + if (!(mmcr0 & MMCR0_FC)) + goto do_freeze; + if (mmcra & MMCRA_SAMPLE_ENABLE) + goto do_freeze; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (!(mmcr0 & MMCR0_PMCCEXT)) + goto do_freeze; + if (!(mmcra & MMCRA_BHRB_DISABLE)) + goto do_freeze; + } + return; + +do_freeze: + mmcr0 = MMCR0_FC; + mmcra = 0; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mmcr0 |= MMCR0_PMCCEXT; + mmcra = MMCRA_BHRB_DISABLE; + } + + mtspr(SPRN_MMCR0, mmcr0); + mtspr(SPRN_MMCRA, mmcra); + isync(); +} + +void switch_pmu_to_guest(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + struct lppaca *lp; + int load_pmu = 1; + + lp = vcpu->arch.vpa.pinned_addr; + if (lp) + load_pmu = lp->pmcregs_in_use; + + /* Save host */ + if (ppc_get_pmu_inuse()) { + /* + * It might be better to put PMU handling (at least for the + * host) in the perf subsystem because it knows more about what + * is being used. + */ + + /* POWER9, POWER10 do not implement HPMC or SPMC */ + + host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0); + host_os_sprs->mmcra = mfspr(SPRN_MMCRA); + + freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra); + + host_os_sprs->pmc1 = mfspr(SPRN_PMC1); + host_os_sprs->pmc2 = mfspr(SPRN_PMC2); + host_os_sprs->pmc3 = mfspr(SPRN_PMC3); + host_os_sprs->pmc4 = mfspr(SPRN_PMC4); + host_os_sprs->pmc5 = mfspr(SPRN_PMC5); + host_os_sprs->pmc6 = mfspr(SPRN_PMC6); + host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1); + host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2); + host_os_sprs->sdar = mfspr(SPRN_SDAR); + host_os_sprs->siar = mfspr(SPRN_SIAR); + host_os_sprs->sier1 = mfspr(SPRN_SIER); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3); + host_os_sprs->sier2 = mfspr(SPRN_SIER2); + host_os_sprs->sier3 = mfspr(SPRN_SIER3); + } + } + +#ifdef CONFIG_PPC_PSERIES + /* After saving PMU, before loading guest PMU, flip pmcregs_in_use */ + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = load_pmu; + barrier(); + } +#endif + + /* + * Load guest. If the VPA said the PMCs are not in use but the guest + * tried to access them anyway, HFSCR[PM] will be set by the HFAC + * fault so we can make forward progress. + */ + if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) { + mtspr(SPRN_PMC1, vcpu->arch.pmc[0]); + mtspr(SPRN_PMC2, vcpu->arch.pmc[1]); + mtspr(SPRN_PMC3, vcpu->arch.pmc[2]); + mtspr(SPRN_PMC4, vcpu->arch.pmc[3]); + mtspr(SPRN_PMC5, vcpu->arch.pmc[4]); + mtspr(SPRN_PMC6, vcpu->arch.pmc[5]); + mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]); + mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]); + mtspr(SPRN_SDAR, vcpu->arch.sdar); + mtspr(SPRN_SIAR, vcpu->arch.siar); + mtspr(SPRN_SIER, vcpu->arch.sier[0]); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]); + mtspr(SPRN_SIER2, vcpu->arch.sier[1]); + mtspr(SPRN_SIER3, vcpu->arch.sier[2]); + } + + /* Set MMCRA then MMCR0 last */ + mtspr(SPRN_MMCRA, vcpu->arch.mmcra); + mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]); + /* No isync necessary because we're starting counters */ + + if (!vcpu->arch.nested && + (vcpu->arch.hfscr_permitted & HFSCR_PM)) + vcpu->arch.hfscr |= HFSCR_PM; + } +} +EXPORT_SYMBOL_GPL(switch_pmu_to_guest); + +void switch_pmu_to_host(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + struct lppaca *lp; + int save_pmu = 1; + + lp = vcpu->arch.vpa.pinned_addr; + if (lp) + save_pmu = lp->pmcregs_in_use; + if (IS_ENABLED(CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND)) { + /* + * Save pmu if this guest is capable of running nested guests. + * This is option is for old L1s that do not set their + * lppaca->pmcregs_in_use properly when entering their L2. + */ + save_pmu |= nesting_enabled(vcpu->kvm); + } + + if (save_pmu) { + vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0); + vcpu->arch.mmcra = mfspr(SPRN_MMCRA); + + freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra); + + vcpu->arch.pmc[0] = mfspr(SPRN_PMC1); + vcpu->arch.pmc[1] = mfspr(SPRN_PMC2); + vcpu->arch.pmc[2] = mfspr(SPRN_PMC3); + vcpu->arch.pmc[3] = mfspr(SPRN_PMC4); + vcpu->arch.pmc[4] = mfspr(SPRN_PMC5); + vcpu->arch.pmc[5] = mfspr(SPRN_PMC6); + vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1); + vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2); + vcpu->arch.sdar = mfspr(SPRN_SDAR); + vcpu->arch.siar = mfspr(SPRN_SIAR); + vcpu->arch.sier[0] = mfspr(SPRN_SIER); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3); + vcpu->arch.sier[1] = mfspr(SPRN_SIER2); + vcpu->arch.sier[2] = mfspr(SPRN_SIER3); + } + + } else if (vcpu->arch.hfscr & HFSCR_PM) { + /* + * The guest accessed PMC SPRs without specifying they should + * be preserved, or it cleared pmcregs_in_use after the last + * access. Just ensure they are frozen. + */ + freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA)); + + /* + * Demand-fault PMU register access in the guest. + * + * This is used to grab the guest's VPA pmcregs_in_use value + * and reflect it into the host's VPA in the case of a nested + * hypervisor. + * + * It also avoids having to zero-out SPRs after each guest + * exit to avoid side-channels when. + * + * This is cleared here when we exit the guest, so later HFSCR + * interrupt handling can add it back to run the guest with + * PM enabled next time. + */ + if (!vcpu->arch.nested) + vcpu->arch.hfscr &= ~HFSCR_PM; + } /* otherwise the PMU should still be frozen */ + +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); + barrier(); + } +#endif + + if (ppc_get_pmu_inuse()) { + mtspr(SPRN_PMC1, host_os_sprs->pmc1); + mtspr(SPRN_PMC2, host_os_sprs->pmc2); + mtspr(SPRN_PMC3, host_os_sprs->pmc3); + mtspr(SPRN_PMC4, host_os_sprs->pmc4); + mtspr(SPRN_PMC5, host_os_sprs->pmc5); + mtspr(SPRN_PMC6, host_os_sprs->pmc6); + mtspr(SPRN_MMCR1, host_os_sprs->mmcr1); + mtspr(SPRN_MMCR2, host_os_sprs->mmcr2); + mtspr(SPRN_SDAR, host_os_sprs->sdar); + mtspr(SPRN_SIAR, host_os_sprs->siar); + mtspr(SPRN_SIER, host_os_sprs->sier1); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mtspr(SPRN_MMCR3, host_os_sprs->mmcr3); + mtspr(SPRN_SIER2, host_os_sprs->sier2); + mtspr(SPRN_SIER3, host_os_sprs->sier3); + } + + /* Set MMCRA then MMCR0 last */ + mtspr(SPRN_MMCRA, host_os_sprs->mmcra); + mtspr(SPRN_MMCR0, host_os_sprs->mmcr0); + isync(); + } +} +EXPORT_SYMBOL_GPL(switch_pmu_to_host); -- cgit From 4008d54e29531813e800580f8309133b9b14a921 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 11 Jul 2022 09:19:27 +0530 Subject: powerpc/kvm: Remove comment related to moving PMU code to perf subsystem Commit aabcaf6ae2a0 ("KVM: PPC: Book3S HV P9: Move host OS save/restore functions to built-in") added a comment in switch_pmu_to_guest function, indicating possibility of moving PMU handling code to perf subsystem. But perf subsystem code compilation depends upon the enablement of CONFIG_PERF_EVENTS whereas, kvm code don't have any dependency on this config. Patch remove this comment as switch_pmu_to_guest functionality is needed even if perf subsystem is disabled. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220711034927.213192-2-kjain@linux.ibm.com --- arch/powerpc/kvm/book3s_hv_p9_perf.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_p9_perf.c b/arch/powerpc/kvm/book3s_hv_p9_perf.c index da3135cab9ea..44d24cca3df1 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_perf.c +++ b/arch/powerpc/kvm/book3s_hv_p9_perf.c @@ -44,12 +44,6 @@ void switch_pmu_to_guest(struct kvm_vcpu *vcpu, /* Save host */ if (ppc_get_pmu_inuse()) { - /* - * It might be better to put PMU handling (at least for the - * host) in the perf subsystem because it knows more about what - * is being used. - */ - /* POWER9, POWER10 do not implement HPMC or SPMC */ host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0); -- cgit From 2b461880c20777d317b4ad24ef040918860133ca Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 18 Jul 2022 19:51:58 +1000 Subject: powerpc: Fix all occurences of duplicate words Since commit 87c78b612f4f ("powerpc: Fix all occurences of "the the"") fixed "the the", there's now a steady stream of patches fixing other duplicate words. Just fix them all at once, to save the overhead of dealing with individual patches for each case. This leaves a few cases of "that that", which in some contexts is correct. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220718095158.326606-1-mpe@ellerman.id.au --- arch/powerpc/crypto/aes-spe-glue.c | 2 +- arch/powerpc/kernel/btext.c | 2 +- arch/powerpc/kernel/eeh_driver.c | 2 +- arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/kernel/pci-common.c | 2 +- arch/powerpc/kernel/pci_dn.c | 2 +- arch/powerpc/kernel/ptrace/ptrace-vsx.c | 2 +- arch/powerpc/kernel/trace/ftrace.c | 6 +++--- arch/powerpc/kernel/watchdog.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 2 +- arch/powerpc/platforms/4xx/cpm.c | 2 +- arch/powerpc/platforms/powernv/vas-fault.c | 2 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +- arch/powerpc/platforms/pseries/papr_scm.c | 2 +- 15 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c index e8dfe9fb0266..efab78a3a8f6 100644 --- a/arch/powerpc/crypto/aes-spe-glue.c +++ b/arch/powerpc/crypto/aes-spe-glue.c @@ -28,7 +28,7 @@ * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32 * bit unit (SU2). One of these can be a memory access that is executed via * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per - * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data + * 16 byte block or 25 cycles per byte. Thus 768 bytes of input data * will need an estimated maximum of 20,000 cycles. Headroom for cache misses * included. Even with the low end model clocked at 667 MHz this equals to a * critical time window of less than 30us. The value has been chosen to diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 8f69bb07e500..2769889219bf 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -73,7 +73,7 @@ static inline void rmci_maybe_off(void) * the display during identify_machine() and MMU_Init() * * The display is mapped to virtual address 0xD0000000, rather - * than 1:1, because some some CHRP machines put the frame buffer + * than 1:1, because some CHRP machines put the frame buffer * in the region starting at 0xC0000000 (PAGE_OFFSET). * This mapping is temporary and will disappear as soon as the * setup done by MMU_Init() is applied. diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 260273e56431..f279295179bd 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -750,7 +750,7 @@ static void eeh_pe_cleanup(struct eeh_pe *pe) * @pdev: pci_dev to check * * This function may return a false positive if we can't determine the slot's - * presence state. This might happen for for PCIe slots if the PE containing + * presence state. This might happen for PCIe slots if the PE containing * the upstream bridge is also frozen, or the bridge is part of the same PE * as the device. * diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index b66dd6f775a4..3d0dc133a9ae 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2779,7 +2779,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: - * - If it was a decrementer interrupt, we bump the dec to max and and return. + * - If it was a decrementer interrupt, we bump the dec to max and return. * - If it was a doorbell we return immediately since doorbells are edge * triggered and won't automatically refire. * - If it was a HMI we return immediately since we handled it in realmode diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index c87999289752..cbb912ee92fa 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1088,7 +1088,7 @@ void pcibios_fixup_bus(struct pci_bus *bus) */ pci_read_bridge_bases(bus); - /* Now fixup the bus bus */ + /* Now fixup the bus */ pcibios_setup_bus_self(bus); } EXPORT_SYMBOL(pcibios_fixup_bus); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 938ab8838ab5..7a35fc25a304 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -259,7 +259,7 @@ void remove_sriov_vf_pdns(struct pci_dev *pdev) if (edev) { /* * We allocate pci_dn's for the totalvfs count, - * but only only the vfs that were activated + * but only the vfs that were activated * have a configured PE. */ if (edev->pe) diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c index 1da4303128ef..7df08004c47d 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-vsx.c +++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c @@ -71,7 +71,7 @@ int fpr_set(struct task_struct *target, const struct user_regset *regset, } /* - * Currently to set and and get all the vsx state, you need to call + * Currently to set and get all the vsx state, you need to call * the fp and VMX calls as well. This only get/sets the lower 32 * 128bit VSX registers. */ diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index cab67b5120b9..cb158c32b50b 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -125,7 +125,7 @@ __ftrace_make_nop(struct module *mod, return -EFAULT; } - /* Make sure that that this is still a 24bit jump */ + /* Make sure that this is still a 24bit jump */ if (!is_bl_op(op)) { pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; @@ -311,7 +311,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; } - /* Make sure that that this is still a 24bit jump */ + /* Make sure that this is still a 24bit jump */ if (!is_bl_op(op)) { pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; @@ -564,7 +564,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return -EFAULT; } - /* Make sure that that this is still a 24bit jump */ + /* Make sure that this is still a 24bit jump */ if (!is_bl_op(op)) { pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 7d28b9553654..dd882d00845d 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -353,7 +353,7 @@ static void watchdog_timer_interrupt(int cpu) if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) { /* * Something has called printk from NMI context. It might be - * stuck, so this this triggers a flush that will get that + * stuck, so this triggers a flush that will get that * printk output to the console. * * See wd_lockup_ipi. diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c68883170a82..529b6c6895bf 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5665,7 +5665,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) else kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); - /* invalidate the entry (what do do on error from the above ?) */ + /* invalidate the entry (what to do on error from the above ?) */ pimap->mapped[i].r_hwirq = 0; /* diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index fc92613dc2bf..363a9447d63d 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -408,7 +408,7 @@ repeat: ssize); if (ret == -1) { /* - * Try to to keep bolted entries in primary. + * Try to keep bolted entries in primary. * Remove non bolted entries and try insert again */ ret = mmu_hash_ops.hpte_remove(hpteg); diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c index 1d3bc35ee1a7..182e12855c27 100644 --- a/arch/powerpc/platforms/4xx/cpm.c +++ b/arch/powerpc/platforms/4xx/cpm.c @@ -63,7 +63,7 @@ static unsigned int cpm_set(unsigned int cpm_reg, unsigned int mask) * known as class 1, 2 and 3. For class 1 units, they are * unconditionally put to sleep when the corresponding CPM bit is * set. For class 2 and 3 units this is not case; if they can be - * put to to sleep, they will. Here we do not verify, we just + * put to sleep, they will. Here we do not verify, we just * set them and expect them to eventually go off when they can. */ value = dcr_read(cpm.dcr_host, cpm.dcr_offset[cpm_reg]); diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index c1bfad56447d..2b47d5a86328 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -77,7 +77,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) /* * VAS can interrupt with multiple page faults. So process all * valid CRBs within fault FIFO until reaches invalid CRB. - * We use CCW[0] and pswid to validate validate CRBs: + * We use CCW[0] and pswid to validate CRBs: * * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0 * OS sets this bit to 1 after reading CRB. diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 1b0c901a6f3b..8e40ccac0f44 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -71,7 +71,7 @@ static void pseries_pcibios_bus_add_device(struct pci_dev *pdev) if (pdev->is_virtfn) { /* * FIXME: This really should be handled by choosing the right - * parent PE in in pseries_eeh_init_edev(). + * parent PE in pseries_eeh_init_edev(). */ struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 69f21d30394b..20f6ed813bff 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -29,7 +29,7 @@ (1ul << ND_CMD_SET_CONFIG_DATA) | \ (1ul << ND_CMD_CALL)) -/* DIMM health bitmap bitmap indicators */ +/* DIMM health bitmap indicators */ /* SCM device is unable to persist memory contents */ #define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) /* SCM device failed to persist memory contents */ -- cgit From fd19a1f72aa7bf687609e0810e644fe5b3846342 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:27 +1000 Subject: selftests/powerpc: Ensure 16-byte stack pointer alignment The PUSH/POP_BASIC_STACK helpers in basic_asm.h do not ensure that the stack pointer is always 16-byte aligned, which is required per the ABI. Fix the macros to do the alignment if the caller fails to. Currently only one caller passes a non-aligned size, tm_signal_self(), which hasn't been caught in testing, presumably because it's a leaf function. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/include/basic_asm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/basic_asm.h b/tools/testing/selftests/powerpc/include/basic_asm.h index 886dc026fe7a..807e83e821ec 100644 --- a/tools/testing/selftests/powerpc/include/basic_asm.h +++ b/tools/testing/selftests/powerpc/include/basic_asm.h @@ -58,7 +58,7 @@ #define PUSH_BASIC_STACK(_extra) \ mflr r0; \ std r0, STACK_FRAME_LR_POS(%r1); \ - stdu %r1, -(_extra + STACK_FRAME_MIN_SIZE)(%r1); \ + stdu %r1, -(((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE)(%r1); \ mfcr r0; \ stw r0, STACK_FRAME_CR_POS(%r1); \ std %r2, STACK_FRAME_TOC_POS(%r1); @@ -67,7 +67,7 @@ ld %r2, STACK_FRAME_TOC_POS(%r1); \ lwz r0, STACK_FRAME_CR_POS(%r1); \ mtcr r0; \ - addi %r1, %r1, (_extra + STACK_FRAME_MIN_SIZE); \ + addi %r1, %r1, (((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE); \ ld r0, STACK_FRAME_LR_POS(%r1); \ mtlr r0; -- cgit From cf4baafd7846b3def67057a09b7603a6b566417a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:28 +1000 Subject: selftests/powerpc/ptrace: Set LOCAL_HDRS Set LOCAL_HDRS so header changes cause rebuilds. The lib.mk logic adds all the headers in LOCAL_HDRS as dependencies, so there's no need to also list them explicitly. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-2-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/ptrace/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index a500639da97a..0b0652d88b1b 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -4,12 +4,13 @@ TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ perf-hwbreak ptrace-syscall ptrace-perf-hwbreak +LOCAL_HDRS += $(patsubst %,$(selfdir)/powerpc/ptrace/%,$(wildcard *.h)) + top_srcdir = ../../../../.. include ../../lib.mk CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie -$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h $(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread -$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h +$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S -- cgit From 3c20a1d07c377d7260ca853e216cc85bbd7857fa Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:29 +1000 Subject: selftests/powerpc/ptrace: Split CFLAGS better Currently all ptrace tests are built 64-bit and with TM enabled. Only the TM tests need TM enabled, so split those out into a separate variable so that can be specified precisely. Split the rest of the tests into a variable, and add -m64 to CFLAGS for those tests, so that in a subsequent patch some tests can be made to build 32-bit. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-3-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/ptrace/Makefile | 33 +++++++++++++++++++++---- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 0b0652d88b1b..8611b0670587 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -1,15 +1,38 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ - ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ - ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ - perf-hwbreak ptrace-syscall ptrace-perf-hwbreak + +TM_TESTS := ptrace-tm-gpr +TM_TESTS += ptrace-tm-spd-gpr +TM_TESTS += ptrace-tm-spd-tar +TM_TESTS += ptrace-tm-spd-vsx +TM_TESTS += ptrace-tm-spr +TM_TESTS += ptrace-tm-tar +TM_TESTS += ptrace-tm-vsx + +TESTS_64 := $(TM_TESTS) +TESTS_64 += core-pkey +TESTS_64 += perf-hwbreak +TESTS_64 += ptrace-gpr +TESTS_64 += ptrace-hwbreak +TESTS_64 += ptrace-perf-hwbreak +TESTS_64 += ptrace-pkey +TESTS_64 += ptrace-syscall +TESTS_64 += ptrace-tar +TESTS_64 += ptrace-vsx + +TEST_GEN_PROGS := $(TESTS_64) LOCAL_HDRS += $(patsubst %,$(selfdir)/powerpc/ptrace/%,$(wildcard *.h)) top_srcdir = ../../../../.. include ../../lib.mk -CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie +TM_TESTS := $(patsubst %,$(OUTPUT)/%,$(TM_TESTS)) +TESTS_64 := $(patsubst %,$(OUTPUT)/%,$(TESTS_64)) + +$(TESTS_64): CFLAGS += -m64 +$(TM_TESTS): CFLAGS += -I../tm -mhtm + +CFLAGS += -I../../../../../usr/include -fno-pie $(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread -- cgit From 8f2e02394dc907f5e0140bfab80a9aa11e3449ed Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:30 +1000 Subject: selftests/powerpc: Don't save CR by default in asm helpers Thare are some asm helpers for creating/popping stack frames in basic_asm.h. They always save/restore CR, but none of the selftests tests touch non-volatile CR fields, so it's unnecessary to save them by default. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-4-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/include/basic_asm.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/basic_asm.h b/tools/testing/selftests/powerpc/include/basic_asm.h index 807e83e821ec..faec7b11c041 100644 --- a/tools/testing/selftests/powerpc/include/basic_asm.h +++ b/tools/testing/selftests/powerpc/include/basic_asm.h @@ -59,14 +59,10 @@ mflr r0; \ std r0, STACK_FRAME_LR_POS(%r1); \ stdu %r1, -(((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE)(%r1); \ - mfcr r0; \ - stw r0, STACK_FRAME_CR_POS(%r1); \ std %r2, STACK_FRAME_TOC_POS(%r1); #define POP_BASIC_STACK(_extra) \ ld %r2, STACK_FRAME_TOC_POS(%r1); \ - lwz r0, STACK_FRAME_CR_POS(%r1); \ - mtcr r0; \ addi %r1, %r1, (((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE); \ ld r0, STACK_FRAME_LR_POS(%r1); \ mtlr r0; -- cgit From cfbc0723d18f5aeab4308c66d7d1992317eed7c9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:31 +1000 Subject: selftests/powerpc: Don't save TOC by default in asm helpers Thare are some asm helpers for creating/popping stack frames in basic_asm.h. They always save/restore r2 (TOC pointer), but none of the selftests change r2, so it's unnecessary to save it by default. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-5-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/include/basic_asm.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/basic_asm.h b/tools/testing/selftests/powerpc/include/basic_asm.h index faec7b11c041..be380aa779f8 100644 --- a/tools/testing/selftests/powerpc/include/basic_asm.h +++ b/tools/testing/selftests/powerpc/include/basic_asm.h @@ -58,11 +58,9 @@ #define PUSH_BASIC_STACK(_extra) \ mflr r0; \ std r0, STACK_FRAME_LR_POS(%r1); \ - stdu %r1, -(((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE)(%r1); \ - std %r2, STACK_FRAME_TOC_POS(%r1); + stdu %r1, -(((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE)(%r1); #define POP_BASIC_STACK(_extra) \ - ld %r2, STACK_FRAME_TOC_POS(%r1); \ addi %r1, %r1, (((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE); \ ld r0, STACK_FRAME_LR_POS(%r1); \ mtlr r0; -- cgit From bd4d3042e7570fc024b5ff15e895363e4bf5a78f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:32 +1000 Subject: selftests/powerpc: Add 32-bit support to asm helpers Add support for 32-bit builds to the asm helpers. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-6-mpe@ellerman.id.au --- .../testing/selftests/powerpc/include/basic_asm.h | 47 +++++++++++++++++----- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/basic_asm.h b/tools/testing/selftests/powerpc/include/basic_asm.h index be380aa779f8..2d7f6e592dd9 100644 --- a/tools/testing/selftests/powerpc/include/basic_asm.h +++ b/tools/testing/selftests/powerpc/include/basic_asm.h @@ -5,6 +5,16 @@ #include #include +#ifdef __powerpc64__ +#define PPC_LL ld +#define PPC_STL std +#define PPC_STLU stdu +#else +#define PPC_LL lwz +#define PPC_STL stw +#define PPC_STLU stwu +#endif + #define LOAD_REG_IMMEDIATE(reg, expr) \ lis reg, (expr)@highest; \ ori reg, reg, (expr)@higher; \ @@ -14,16 +24,20 @@ /* * Note: These macros assume that variables being stored on the stack are - * doublewords, while this is usually the case it may not always be the + * sizeof(long), while this is usually the case it may not always be the * case for each use case. */ +#ifdef __powerpc64__ + +// ABIv2 #if defined(_CALL_ELF) && _CALL_ELF == 2 #define STACK_FRAME_MIN_SIZE 32 #define STACK_FRAME_TOC_POS 24 #define __STACK_FRAME_PARAM(_param) (32 + ((_param)*8)) #define __STACK_FRAME_LOCAL(_num_params, _var_num) \ ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8)) -#else + +#else // ABIv1 below #define STACK_FRAME_MIN_SIZE 112 #define STACK_FRAME_TOC_POS 40 #define __STACK_FRAME_PARAM(i) (48 + ((i)*8)) @@ -34,7 +48,24 @@ */ #define __STACK_FRAME_LOCAL(_num_params, _var_num) \ (112 + ((_var_num)*8)) -#endif + + +#endif // ABIv2 + +// Common 64-bit +#define STACK_FRAME_LR_POS 16 +#define STACK_FRAME_CR_POS 8 + +#else // 32-bit below + +#define STACK_FRAME_MIN_SIZE 16 +#define STACK_FRAME_LR_POS 4 + +#define __STACK_FRAME_PARAM(_param) (STACK_FRAME_MIN_SIZE + ((_param)*4)) +#define __STACK_FRAME_LOCAL(_num_params, _var_num) \ + ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*4)) + +#endif // __powerpc64__ /* Parameter x saved to the stack */ #define STACK_FRAME_PARAM(var) __STACK_FRAME_PARAM(var) @@ -42,8 +73,6 @@ /* Local variable x saved to the stack after x parameters */ #define STACK_FRAME_LOCAL(num_params, var) \ __STACK_FRAME_LOCAL(num_params, var) -#define STACK_FRAME_LR_POS 16 -#define STACK_FRAME_CR_POS 8 /* * It is very important to note here that _extra is the extra amount of @@ -56,13 +85,13 @@ * preprocessed incorrectly, hence r0. */ #define PUSH_BASIC_STACK(_extra) \ - mflr r0; \ - std r0, STACK_FRAME_LR_POS(%r1); \ - stdu %r1, -(((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE)(%r1); + mflr r0; \ + PPC_STL r0, STACK_FRAME_LR_POS(%r1); \ + PPC_STLU %r1, -(((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE)(%r1); #define POP_BASIC_STACK(_extra) \ addi %r1, %r1, (((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE); \ - ld r0, STACK_FRAME_LR_POS(%r1); \ + PPC_LL r0, STACK_FRAME_LR_POS(%r1); \ mtlr r0; #endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */ -- cgit From af9f3f31f6cc8e3f637f19189e83d99f3fdd96ad Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:33 +1000 Subject: selftests/powerpc/ptrace: Drop unused load_fpr_single_precision() This function is never called, drop it. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-7-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/include/reg.h | 1 - tools/testing/selftests/powerpc/lib/reg.S | 37 --------------------------- 2 files changed, 38 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index 2ac7a4c7749c..58b6491aa5a4 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -164,7 +164,6 @@ #ifndef __ASSEMBLER__ void store_gpr(unsigned long *addr); void load_gpr(unsigned long *addr); -void load_fpr_single_precision(float *addr); void store_fpr_single_precision(float *addr); #endif /* end of __ASSEMBLER__ */ diff --git a/tools/testing/selftests/powerpc/lib/reg.S b/tools/testing/selftests/powerpc/lib/reg.S index 9304ea7d59b9..dd37b8e6f84c 100644 --- a/tools/testing/selftests/powerpc/lib/reg.S +++ b/tools/testing/selftests/powerpc/lib/reg.S @@ -53,43 +53,6 @@ FUNC_START(store_gpr) blr FUNC_END(store_gpr) -/* Single Precision Float - float buf[32] */ -FUNC_START(load_fpr_single_precision) - lfs 0, 0*4(3) - lfs 1, 1*4(3) - lfs 2, 2*4(3) - lfs 3, 3*4(3) - lfs 4, 4*4(3) - lfs 5, 5*4(3) - lfs 6, 6*4(3) - lfs 7, 7*4(3) - lfs 8, 8*4(3) - lfs 9, 9*4(3) - lfs 10, 10*4(3) - lfs 11, 11*4(3) - lfs 12, 12*4(3) - lfs 13, 13*4(3) - lfs 14, 14*4(3) - lfs 15, 15*4(3) - lfs 16, 16*4(3) - lfs 17, 17*4(3) - lfs 18, 18*4(3) - lfs 19, 19*4(3) - lfs 20, 20*4(3) - lfs 21, 21*4(3) - lfs 22, 22*4(3) - lfs 23, 23*4(3) - lfs 24, 24*4(3) - lfs 25, 25*4(3) - lfs 26, 26*4(3) - lfs 27, 27*4(3) - lfs 28, 28*4(3) - lfs 29, 29*4(3) - lfs 30, 30*4(3) - lfs 31, 31*4(3) - blr -FUNC_END(load_fpr_single_precision) - /* Single Precision Float - float buf[32] */ FUNC_START(store_fpr_single_precision) stfs 0, 0*4(3) -- cgit From 53fa86e7ece54cbb1fae1443bd6b348088d8ce7e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:34 +1000 Subject: selftests/powerpc/ptrace: Convert to load/store doubles Some of the ptrace tests check the contents of floating pointer registers. Currently these use float, which is always 4 bytes, but the ptrace API supports saving/restoring 8 bytes per register, so switch to using doubles to exercise the code more fully. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-8-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/include/reg.h | 68 ++++++++++----------- tools/testing/selftests/powerpc/lib/reg.S | 70 +++++++++++----------- .../testing/selftests/powerpc/ptrace/ptrace-gpr.c | 16 ++--- .../testing/selftests/powerpc/ptrace/ptrace-gpr.h | 14 ++--- .../selftests/powerpc/ptrace/ptrace-tm-gpr.c | 18 +++--- .../selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c | 20 +++---- tools/testing/selftests/powerpc/ptrace/ptrace.h | 14 +++-- 7 files changed, 111 insertions(+), 109 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index 58b6491aa5a4..d5a547f72669 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -127,44 +127,44 @@ "li 30, %[" #_asm_symbol_name_immed "];" \ "li 31, %[" #_asm_symbol_name_immed "];" -#define ASM_LOAD_FPR_SINGLE_PRECISION(_asm_symbol_name_addr) \ - "lfs 0, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 1, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 2, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 3, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 4, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 5, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 6, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 7, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 8, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 9, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 10, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 11, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 12, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 13, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 14, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 15, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 16, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 17, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 18, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 19, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 20, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 21, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 22, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 23, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 24, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 25, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 26, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 27, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 28, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 29, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 30, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 31, 0(%[" #_asm_symbol_name_addr "]);" +#define ASM_LOAD_FPR(_asm_symbol_name_addr) \ + "lfd 0, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 1, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 2, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 3, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 4, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 5, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 6, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 7, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 8, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 9, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 10, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 11, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 12, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 13, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 14, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 15, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 16, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 17, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 18, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 19, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 20, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 21, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 22, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 23, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 24, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 25, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 26, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 27, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 28, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 29, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 30, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 31, 0(%[" #_asm_symbol_name_addr "]);" #ifndef __ASSEMBLER__ void store_gpr(unsigned long *addr); void load_gpr(unsigned long *addr); -void store_fpr_single_precision(float *addr); +void store_fpr(double *addr); #endif /* end of __ASSEMBLER__ */ #endif /* _SELFTESTS_POWERPC_REG_H */ diff --git a/tools/testing/selftests/powerpc/lib/reg.S b/tools/testing/selftests/powerpc/lib/reg.S index dd37b8e6f84c..6d1af4a9a6b4 100644 --- a/tools/testing/selftests/powerpc/lib/reg.S +++ b/tools/testing/selftests/powerpc/lib/reg.S @@ -53,42 +53,42 @@ FUNC_START(store_gpr) blr FUNC_END(store_gpr) -/* Single Precision Float - float buf[32] */ -FUNC_START(store_fpr_single_precision) - stfs 0, 0*4(3) - stfs 1, 1*4(3) - stfs 2, 2*4(3) - stfs 3, 3*4(3) - stfs 4, 4*4(3) - stfs 5, 5*4(3) - stfs 6, 6*4(3) - stfs 7, 7*4(3) - stfs 8, 8*4(3) - stfs 9, 9*4(3) - stfs 10, 10*4(3) - stfs 11, 11*4(3) - stfs 12, 12*4(3) - stfs 13, 13*4(3) - stfs 14, 14*4(3) - stfs 15, 15*4(3) - stfs 16, 16*4(3) - stfs 17, 17*4(3) - stfs 18, 18*4(3) - stfs 19, 19*4(3) - stfs 20, 20*4(3) - stfs 21, 21*4(3) - stfs 22, 22*4(3) - stfs 23, 23*4(3) - stfs 24, 24*4(3) - stfs 25, 25*4(3) - stfs 26, 26*4(3) - stfs 27, 27*4(3) - stfs 28, 28*4(3) - stfs 29, 29*4(3) - stfs 30, 30*4(3) - stfs 31, 31*4(3) +/* Double Precision Float - double buf[32] */ +FUNC_START(store_fpr) + stfd 0, 0*8(3) + stfd 1, 1*8(3) + stfd 2, 2*8(3) + stfd 3, 3*8(3) + stfd 4, 4*8(3) + stfd 5, 5*8(3) + stfd 6, 6*8(3) + stfd 7, 7*8(3) + stfd 8, 8*8(3) + stfd 9, 9*8(3) + stfd 10, 10*8(3) + stfd 11, 11*8(3) + stfd 12, 12*8(3) + stfd 13, 13*8(3) + stfd 14, 14*8(3) + stfd 15, 15*8(3) + stfd 16, 16*8(3) + stfd 17, 17*8(3) + stfd 18, 18*8(3) + stfd 19, 19*8(3) + stfd 20, 20*8(3) + stfd 21, 21*8(3) + stfd 22, 22*8(3) + stfd 23, 23*8(3) + stfd 24, 24*8(3) + stfd 25, 25*8(3) + stfd 26, 26*8(3) + stfd 27, 27*8(3) + stfd 28, 28*8(3) + stfd 29, 29*8(3) + stfd 30, 30*8(3) + stfd 31, 31*8(3) blr -FUNC_END(store_fpr_single_precision) +FUNC_END(store_fpr) /* VMX/VSX registers - unsigned long buf[128] */ FUNC_START(loadvsx) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c index 17cd480c8780..1468e89c044b 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c @@ -12,20 +12,20 @@ int shm_id; int *cptr, *pptr; -float a = FPR_1; -float b = FPR_2; -float c = FPR_3; +double a = FPR_1; +double b = FPR_2; +double c = FPR_3; void gpr(void) { unsigned long gpr_buf[18]; - float fpr_buf[32]; + double fpr_buf[32]; cptr = (int *)shmat(shm_id, NULL, 0); asm __volatile__( ASM_LOAD_GPR_IMMED(gpr_1) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_1) + ASM_LOAD_FPR(flt_1) : : [gpr_1]"i"(GPR_1), [flt_1] "b" (&a) : "memory", "r6", "r7", "r8", "r9", "r10", @@ -41,12 +41,12 @@ void gpr(void) shmdt((void *)cptr); store_gpr(gpr_buf); - store_fpr_single_precision(fpr_buf); + store_fpr(fpr_buf); if (validate_gpr(gpr_buf, GPR_3)) exit(1); - if (validate_fpr_float(fpr_buf, c)) + if (validate_fpr_double(fpr_buf, c)) exit(1); exit(0); @@ -55,7 +55,7 @@ void gpr(void) int trace_gpr(pid_t child) { unsigned long gpr[18]; - unsigned long fpr[32]; + __u64 fpr[32]; FAIL_IF(start_trace(child)); FAIL_IF(show_gpr(child, gpr)); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h index c5cd53181e2e..a5470b88bd08 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h @@ -12,10 +12,10 @@ #define FPR_3 0.003 #define FPR_4 0.004 -#define FPR_1_REP 0x3f50624de0000000 -#define FPR_2_REP 0x3f60624de0000000 -#define FPR_3_REP 0x3f689374c0000000 -#define FPR_4_REP 0x3f70624de0000000 +#define FPR_1_REP 0x3f50624dd2f1a9fcull +#define FPR_2_REP 0x3f60624dd2f1a9fcull +#define FPR_3_REP 0x3f689374bc6a7efaull +#define FPR_4_REP 0x3f70624dd2f1a9fcull /* Buffer must have 18 elements */ int validate_gpr(unsigned long *gpr, unsigned long val) @@ -36,13 +36,13 @@ int validate_gpr(unsigned long *gpr, unsigned long val) } /* Buffer must have 32 elements */ -int validate_fpr(unsigned long *fpr, unsigned long val) +int validate_fpr(__u64 *fpr, __u64 val) { int i, found = 1; for (i = 0; i < 32; i++) { if (fpr[i] != val) { - printf("FPR[%d]: %lx Expected: %lx\n", i, fpr[i], val); + printf("FPR[%d]: %llx Expected: %llx\n", i, fpr[i], val); found = 0; } } @@ -53,7 +53,7 @@ int validate_fpr(unsigned long *fpr, unsigned long val) } /* Buffer must have 32 elements */ -int validate_fpr_float(float *fpr, float val) +int validate_fpr_double(double *fpr, double val) { int i, found = 1; diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c index 67ca297c5cca..5dc152b162df 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c @@ -12,15 +12,15 @@ int shm_id; unsigned long *cptr, *pptr; -float a = FPR_1; -float b = FPR_2; -float c = FPR_3; +double a = FPR_1; +double b = FPR_2; +double c = FPR_3; void tm_gpr(void) { unsigned long gpr_buf[18]; unsigned long result, texasr; - float fpr_buf[32]; + double fpr_buf[32]; printf("Starting the child\n"); cptr = (unsigned long *)shmat(shm_id, NULL, 0); @@ -29,12 +29,12 @@ trans: cptr[1] = 0; asm __volatile__( ASM_LOAD_GPR_IMMED(gpr_1) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_1) + ASM_LOAD_FPR(flt_1) "1: ;" "tbegin.;" "beq 2f;" ASM_LOAD_GPR_IMMED(gpr_2) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_2) + ASM_LOAD_FPR(flt_2) "tsuspend.;" "li 7, 1;" "stw 7, 0(%[cptr1]);" @@ -70,12 +70,12 @@ trans: shmdt((void *)cptr); store_gpr(gpr_buf); - store_fpr_single_precision(fpr_buf); + store_fpr(fpr_buf); if (validate_gpr(gpr_buf, GPR_3)) exit(1); - if (validate_fpr_float(fpr_buf, c)) + if (validate_fpr_double(fpr_buf, c)) exit(1); exit(0); @@ -87,7 +87,7 @@ trans: int trace_tm_gpr(pid_t child) { unsigned long gpr[18]; - unsigned long fpr[32]; + __u64 fpr[32]; FAIL_IF(start_trace(child)); FAIL_IF(show_gpr(child, gpr)); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c index 6f2bce1b6c5d..458cc1a70ccf 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c @@ -12,10 +12,10 @@ int shm_id; int *cptr, *pptr; -float a = FPR_1; -float b = FPR_2; -float c = FPR_3; -float d = FPR_4; +double a = FPR_1; +double b = FPR_2; +double c = FPR_3; +double d = FPR_4; __attribute__((used)) void wait_parent(void) { @@ -28,7 +28,7 @@ void tm_spd_gpr(void) { unsigned long gpr_buf[18]; unsigned long result, texasr; - float fpr_buf[32]; + double fpr_buf[32]; cptr = (int *)shmat(shm_id, NULL, 0); @@ -36,7 +36,7 @@ trans: cptr[2] = 0; asm __volatile__( ASM_LOAD_GPR_IMMED(gpr_1) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_1) + ASM_LOAD_FPR(flt_1) "1: ;" "tbegin.;" @@ -45,7 +45,7 @@ trans: ASM_LOAD_GPR_IMMED(gpr_2) "tsuspend.;" ASM_LOAD_GPR_IMMED(gpr_4) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_4) + ASM_LOAD_FPR(flt_4) "bl wait_parent;" "tresume.;" @@ -77,12 +77,12 @@ trans: shmdt((void *)cptr); store_gpr(gpr_buf); - store_fpr_single_precision(fpr_buf); + store_fpr(fpr_buf); if (validate_gpr(gpr_buf, GPR_3)) exit(1); - if (validate_fpr_float(fpr_buf, c)) + if (validate_fpr_double(fpr_buf, c)) exit(1); exit(0); } @@ -93,7 +93,7 @@ trans: int trace_tm_spd_gpr(pid_t child) { unsigned long gpr[18]; - unsigned long fpr[32]; + __u64 fpr[32]; FAIL_IF(start_trace(child)); FAIL_IF(show_gpr(child, gpr)); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h index 5181ad9b4b6c..4672e848604f 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace.h +++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h @@ -4,6 +4,9 @@ * * Copyright (C) 2015 Anshuman Khandual, IBM Corporation. */ + +#define __SANE_USERSPACE_TYPES__ + #include #include #include @@ -30,8 +33,8 @@ #define TEST_FAIL 1 struct fpr_regs { - unsigned long fpr[32]; - unsigned long fpscr; + __u64 fpr[32]; + __u64 fpscr; }; struct tm_spr_regs { @@ -318,7 +321,7 @@ fail: } /* FPR */ -int show_fpr(pid_t child, unsigned long *fpr) +int show_fpr(pid_t child, __u64 *fpr) { struct fpr_regs *regs; int ret, i; @@ -337,7 +340,7 @@ int show_fpr(pid_t child, unsigned long *fpr) return TEST_PASS; } -int write_fpr(pid_t child, unsigned long val) +int write_fpr(pid_t child, __u64 val) { struct fpr_regs *regs; int ret, i; @@ -360,7 +363,7 @@ int write_fpr(pid_t child, unsigned long val) return TEST_PASS; } -int show_ckpt_fpr(pid_t child, unsigned long *fpr) +int show_ckpt_fpr(pid_t child, __u64 *fpr) { struct fpr_regs *regs; struct iovec iov; @@ -742,4 +745,3 @@ void analyse_texasr(unsigned long texasr) } void store_gpr(unsigned long *addr); -void store_fpr(float *addr); -- cgit From 149a497d5fda3e996a00437260a4c170e43909c8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:35 +1000 Subject: selftests/powerpc/ptrace: Build the ptrace-gpr test as 32-bit when possible The ptrace-gpr test can now be built 32-bit, so do that if that's the compiler default rather than forcing a 64-bit build. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-9-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/ptrace/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 8611b0670587..3434a624ed77 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -11,7 +11,6 @@ TM_TESTS += ptrace-tm-vsx TESTS_64 := $(TM_TESTS) TESTS_64 += core-pkey TESTS_64 += perf-hwbreak -TESTS_64 += ptrace-gpr TESTS_64 += ptrace-hwbreak TESTS_64 += ptrace-perf-hwbreak TESTS_64 += ptrace-pkey @@ -19,7 +18,9 @@ TESTS_64 += ptrace-syscall TESTS_64 += ptrace-tar TESTS_64 += ptrace-vsx -TEST_GEN_PROGS := $(TESTS_64) +TESTS += ptrace-gpr + +TEST_GEN_PROGS := $(TESTS) $(TESTS_64) LOCAL_HDRS += $(patsubst %,$(selfdir)/powerpc/ptrace/%,$(wildcard *.h)) -- cgit From 611e385087efc2cc3a7033aedd3f84ad0cf2a703 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:36 +1000 Subject: selftests/powerpc/ptrace: Do more of ptrace-gpr in asm The ptrace-gpr test includes some inline asm to load GPR and FPR registers. It then goes back to C to wait for the parent to trace it and then checks register contents. The split between inline asm and C is fragile, it relies on the compiler not using any non-volatile GPRs after the inline asm block. It also requires a very large and unwieldy inline asm block. So convert the logic to set registers, wait, and store registers to a single asm function, meaning there's no window for the compiler to intervene. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-10-mpe@ellerman.id.au --- .../testing/selftests/powerpc/include/basic_asm.h | 8 ++++ tools/testing/selftests/powerpc/ptrace/Makefile | 1 + .../testing/selftests/powerpc/ptrace/ptrace-gpr.S | 52 ++++++++++++++++++++++ .../testing/selftests/powerpc/ptrace/ptrace-gpr.c | 29 +++++------- 4 files changed, 73 insertions(+), 17 deletions(-) create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S diff --git a/tools/testing/selftests/powerpc/include/basic_asm.h b/tools/testing/selftests/powerpc/include/basic_asm.h index 2d7f6e592dd9..26cde8ea1f49 100644 --- a/tools/testing/selftests/powerpc/include/basic_asm.h +++ b/tools/testing/selftests/powerpc/include/basic_asm.h @@ -94,4 +94,12 @@ PPC_LL r0, STACK_FRAME_LR_POS(%r1); \ mtlr r0; +.macro OP_REGS op, reg_width, start_reg, end_reg, base_reg, base_reg_offset=0, skip=0 + .set i, \start_reg + .rept (\end_reg - \start_reg + 1) + \op i, (\reg_width * (i - \skip) + \base_reg_offset)(\base_reg) + .set i, i + 1 + .endr +.endm + #endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */ diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 3434a624ed77..2f02cb54224d 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -35,6 +35,7 @@ $(TM_TESTS): CFLAGS += -I../tm -mhtm CFLAGS += -I../../../../../usr/include -fno-pie +$(OUTPUT)/ptrace-gpr: ptrace-gpr.S $(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread $(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S new file mode 100644 index 000000000000..070e8443e3cc --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * test helper assembly functions + * + * Copyright (C) 2016 Simon Guo, IBM Corporation. + * Copyright 2022 Michael Ellerman, IBM Corporation. + */ +#include "basic_asm.h" + +#define GPR_SIZE __SIZEOF_LONG__ +#define FIRST_GPR 14 +#define NUM_GPRS (32 - FIRST_GPR) +#define STACK_SIZE (NUM_GPRS * GPR_SIZE) + +// gpr_child_loop(int *read_flag, int *write_flag, +// unsigned long *gpr_buf, double *fpr_buf); +FUNC_START(gpr_child_loop) + // r3 = read_flag + // r4 = write_flag + // r5 = gpr_buf + // r6 = fpr_buf + PUSH_BASIC_STACK(STACK_SIZE) + + // Save non-volatile GPRs + OP_REGS PPC_STL, GPR_SIZE, FIRST_GPR, 31, %r1, STACK_FRAME_LOCAL(0, 0), FIRST_GPR + + // Load GPRs with expected values + OP_REGS PPC_LL, GPR_SIZE, FIRST_GPR, 31, r5, 0, FIRST_GPR + + // Load FPRs with expected values + OP_REGS lfd, 8, 0, 31, r6 + + // Signal to parent that we're ready + li r0, 1 + stw r0, 0(r4) + + // Wait for parent to finish +1: lwz r0, 0(r3) + cmpwi r0, 0 + beq 1b // Loop while flag is zero + + // Save GPRs back to caller buffer + OP_REGS PPC_STL, GPR_SIZE, FIRST_GPR, 31, r5, 0, FIRST_GPR + + // Save FPRs + OP_REGS stfd, 8, 0, 31, r6 + + // Reload non-volatile GPRs + OP_REGS PPC_LL, GPR_SIZE, FIRST_GPR, 31, %r1, STACK_FRAME_LOCAL(0, 0), FIRST_GPR + + POP_BASIC_STACK(STACK_SIZE) + blr diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c index 1468e89c044b..4e7a7eb01e3a 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c @@ -16,32 +16,27 @@ double a = FPR_1; double b = FPR_2; double c = FPR_3; +extern void gpr_child_loop(int *read_flag, int *write_flag, + unsigned long *gpr_buf, double *fpr_buf); + void gpr(void) { - unsigned long gpr_buf[18]; + unsigned long gpr_buf[32]; double fpr_buf[32]; + int i; cptr = (int *)shmat(shm_id, NULL, 0); + memset(gpr_buf, 0, sizeof(gpr_buf)); + memset(fpr_buf, 0, sizeof(fpr_buf)); - asm __volatile__( - ASM_LOAD_GPR_IMMED(gpr_1) - ASM_LOAD_FPR(flt_1) - : - : [gpr_1]"i"(GPR_1), [flt_1] "b" (&a) - : "memory", "r6", "r7", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15", "r16", "r17", - "r18", "r19", "r20", "r21", "r22", "r23", "r24", - "r25", "r26", "r27", "r28", "r29", "r30", "r31" - ); - - cptr[1] = 1; + for (i = 0; i < 32; i++) { + gpr_buf[i] = GPR_1; + fpr_buf[i] = a; + } - while (!cptr[0]) - asm volatile("" : : : "memory"); + gpr_child_loop(&cptr[0], &cptr[1], gpr_buf, fpr_buf); shmdt((void *)cptr); - store_gpr(gpr_buf); - store_fpr(fpr_buf); if (validate_gpr(gpr_buf, GPR_3)) exit(1); -- cgit From 7b1513d02edf4a6334618070641f47abbbef0cef Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:37 +1000 Subject: selftests/powerpc/ptrace: Make child errors more obvious Use the FAIL_IF() macro so that errors in the child report a line number, rather than just silently exiting. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-11-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c index 4e7a7eb01e3a..b574ea26395c 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c @@ -19,7 +19,7 @@ double c = FPR_3; extern void gpr_child_loop(int *read_flag, int *write_flag, unsigned long *gpr_buf, double *fpr_buf); -void gpr(void) +static int child(void) { unsigned long gpr_buf[32]; double fpr_buf[32]; @@ -38,13 +38,10 @@ void gpr(void) shmdt((void *)cptr); - if (validate_gpr(gpr_buf, GPR_3)) - exit(1); + FAIL_IF(validate_gpr(gpr_buf, GPR_3)); + FAIL_IF(validate_fpr_double(fpr_buf, c)); - if (validate_fpr_double(fpr_buf, c)) - exit(1); - - exit(0); + return 0; } int trace_gpr(pid_t child) @@ -76,7 +73,7 @@ int ptrace_gpr(void) return TEST_FAIL; } if (pid == 0) - gpr(); + exit(child()); if (pid) { pptr = (int *)shmat(shm_id, NULL, 0); -- cgit From c5a814cc992002c36fa5b7db5fbd55efb7430386 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:38 +1000 Subject: selftests/powerpc/ptrace: Use more interesting values The ptrace-gpr test uses fixed values to test that registers can be read/written via ptrace. In particular it sets all GPRs to 1, which means the test could miss some types of bugs - eg. if the kernel was only returning the low word. So generate some random values at startup and use those instead. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-12-mpe@ellerman.id.au --- .../testing/selftests/powerpc/ptrace/ptrace-gpr.c | 71 +++++++++++++++++----- 1 file changed, 57 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c index b574ea26395c..c5dcb8c02616 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c @@ -7,18 +7,18 @@ #include "ptrace.h" #include "ptrace-gpr.h" #include "reg.h" +#include /* Tracer and Tracee Shared Data */ int shm_id; int *cptr, *pptr; -double a = FPR_1; -double b = FPR_2; -double c = FPR_3; - extern void gpr_child_loop(int *read_flag, int *write_flag, unsigned long *gpr_buf, double *fpr_buf); +unsigned long child_gpr_val, parent_gpr_val; +double child_fpr_val, parent_fpr_val; + static int child(void) { unsigned long gpr_buf[32]; @@ -30,16 +30,16 @@ static int child(void) memset(fpr_buf, 0, sizeof(fpr_buf)); for (i = 0; i < 32; i++) { - gpr_buf[i] = GPR_1; - fpr_buf[i] = a; + gpr_buf[i] = child_gpr_val; + fpr_buf[i] = child_fpr_val; } gpr_child_loop(&cptr[0], &cptr[1], gpr_buf, fpr_buf); shmdt((void *)cptr); - FAIL_IF(validate_gpr(gpr_buf, GPR_3)); - FAIL_IF(validate_fpr_double(fpr_buf, c)); + FAIL_IF(validate_gpr(gpr_buf, parent_gpr_val)); + FAIL_IF(validate_fpr_double(fpr_buf, parent_fpr_val)); return 0; } @@ -47,24 +47,67 @@ static int child(void) int trace_gpr(pid_t child) { unsigned long gpr[18]; - __u64 fpr[32]; + __u64 tmp, fpr[32]; FAIL_IF(start_trace(child)); FAIL_IF(show_gpr(child, gpr)); - FAIL_IF(validate_gpr(gpr, GPR_1)); + FAIL_IF(validate_gpr(gpr, child_gpr_val)); FAIL_IF(show_fpr(child, fpr)); - FAIL_IF(validate_fpr(fpr, FPR_1_REP)); - FAIL_IF(write_gpr(child, GPR_3)); - FAIL_IF(write_fpr(child, FPR_3_REP)); + + memcpy(&tmp, &child_fpr_val, sizeof(tmp)); + FAIL_IF(validate_fpr(fpr, tmp)); + + FAIL_IF(write_gpr(child, parent_gpr_val)); + + memcpy(&tmp, &parent_fpr_val, sizeof(tmp)); + FAIL_IF(write_fpr(child, tmp)); + FAIL_IF(stop_trace(child)); return TEST_PASS; } +#ifndef __LONG_WIDTH__ +#define __LONG_WIDTH__ (sizeof(long) * 8) +#endif + +static uint64_t rand_reg(void) +{ + uint64_t result; + long r; + + r = random(); + + // Small values are typical + result = r & 0xffff; + if (r & 0x10000) + return result; + + // Pointers tend to have high bits set + result |= random() << (__LONG_WIDTH__ - 31); + if (r & 0x100000) + return result; + + // And sometimes we want a full 64-bit value + result ^= random() << 16; + + return result; +} + int ptrace_gpr(void) { - pid_t pid; + unsigned long seed; int ret, status; + pid_t pid; + + seed = getpid() ^ time(NULL); + printf("srand(%lu)\n", seed); + srand(seed); + + child_gpr_val = rand_reg(); + child_fpr_val = rand_reg(); + parent_gpr_val = rand_reg(); + parent_fpr_val = rand_reg(); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); pid = fork(); -- cgit From 6c9c7d8fbc3a2a0cfed0e7a5b39581847b632f0b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2022 00:02:39 +1000 Subject: selftests/powerpc/ptrace: Add peek/poke of FPRs Currently the ptrace-gpr test only tests the GET/SET(FP)REGS ptrace APIs. But there's an alternate (older) API, called PEEK/POKEUSR. Add some minimal testing of PEEK/POKEUSR of the FPRs. This is sufficient to detect the bug that was fixed recently in the 32-bit ptrace FPR handling. Depends-on: 8e1278444446 ("powerpc/32: Fix overread/overwrite of thread_struct via ptrace") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220627140239.2464900-13-mpe@ellerman.id.au --- .../testing/selftests/powerpc/ptrace/ptrace-gpr.c | 24 +++++++- tools/testing/selftests/powerpc/ptrace/ptrace.h | 65 ++++++++++++++++++++++ 2 files changed, 87 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c index c5dcb8c02616..9ed87d297799 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c @@ -46,22 +46,42 @@ static int child(void) int trace_gpr(pid_t child) { + __u64 tmp, fpr[32], *peeked_fprs; unsigned long gpr[18]; - __u64 tmp, fpr[32]; FAIL_IF(start_trace(child)); + + // Check child GPRs match what we expect using GETREGS FAIL_IF(show_gpr(child, gpr)); FAIL_IF(validate_gpr(gpr, child_gpr_val)); - FAIL_IF(show_fpr(child, fpr)); + // Check child FPRs match what we expect using GETFPREGS + FAIL_IF(show_fpr(child, fpr)); memcpy(&tmp, &child_fpr_val, sizeof(tmp)); FAIL_IF(validate_fpr(fpr, tmp)); + // Check child FPRs match what we expect using PEEKUSR + peeked_fprs = peek_fprs(child); + FAIL_IF(!peeked_fprs); + FAIL_IF(validate_fpr(peeked_fprs, tmp)); + free(peeked_fprs); + + // Write child GPRs using SETREGS FAIL_IF(write_gpr(child, parent_gpr_val)); + // Write child FPRs using SETFPREGS memcpy(&tmp, &parent_fpr_val, sizeof(tmp)); FAIL_IF(write_fpr(child, tmp)); + // Check child FPRs match what we just set, using PEEKUSR + peeked_fprs = peek_fprs(child); + FAIL_IF(!peeked_fprs); + FAIL_IF(validate_fpr(peeked_fprs, tmp)); + + // Write child FPRs using POKEUSR + FAIL_IF(poke_fprs(child, (unsigned long *)peeked_fprs)); + + // Child will check its FPRs match before exiting FAIL_IF(stop_trace(child)); return TEST_PASS; diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h index 4672e848604f..4e0233c0f2b3 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace.h +++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -440,6 +441,70 @@ int show_gpr(pid_t child, unsigned long *gpr) return TEST_PASS; } +long sys_ptrace(enum __ptrace_request request, pid_t pid, unsigned long addr, unsigned long data) +{ + return syscall(__NR_ptrace, request, pid, (void *)addr, data); +} + +// 33 because of FPSCR +#define PT_NUM_FPRS (33 * (sizeof(__u64) / sizeof(unsigned long))) + +__u64 *peek_fprs(pid_t child) +{ + unsigned long *fprs, *p, addr; + long ret; + int i; + + fprs = malloc(sizeof(unsigned long) * PT_NUM_FPRS); + if (!fprs) { + perror("malloc() failed"); + return NULL; + } + + for (i = 0, p = fprs; i < PT_NUM_FPRS; i++, p++) { + addr = sizeof(unsigned long) * (PT_FPR0 + i); + ret = sys_ptrace(PTRACE_PEEKUSER, child, addr, (unsigned long)p); + if (ret) { + perror("ptrace(PTRACE_PEEKUSR) failed"); + return NULL; + } + } + + addr = sizeof(unsigned long) * (PT_FPR0 + i); + ret = sys_ptrace(PTRACE_PEEKUSER, child, addr, (unsigned long)&addr); + if (!ret) { + printf("ptrace(PTRACE_PEEKUSR) succeeded unexpectedly!\n"); + return NULL; + } + + return (__u64 *)fprs; +} + +int poke_fprs(pid_t child, unsigned long *fprs) +{ + unsigned long *p, addr; + long ret; + int i; + + for (i = 0, p = fprs; i < PT_NUM_FPRS; i++, p++) { + addr = sizeof(unsigned long) * (PT_FPR0 + i); + ret = sys_ptrace(PTRACE_POKEUSER, child, addr, *p); + if (ret) { + perror("ptrace(PTRACE_POKEUSR) failed"); + return -1; + } + } + + addr = sizeof(unsigned long) * (PT_FPR0 + i); + ret = sys_ptrace(PTRACE_POKEUSER, child, addr, addr); + if (!ret) { + printf("ptrace(PTRACE_POKEUSR) succeeded unexpectedly!\n"); + return -1; + } + + return 0; +} + int write_gpr(pid_t child, unsigned long val) { struct pt_regs *regs; -- cgit From 882c0d1704cf61df13f01933269202d51e74b9f3 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 13 Jul 2022 17:47:26 +0200 Subject: powerpc/mobility: wait for memory transfer to complete In pseries_migration_partition(), loop until the memory transfer is complete. This way the calling drmgr process will not exit earlier, allowing callbacks to be run only once the migration is fully completed. If reading the VASI state is done after the hypervisor has completed the migration, the HCALL is returning H_PARAMETER. We can safely assume that the memory transfer is achieved if this happens. This will also allow to manage the NMI watchdog state in the next commits. Signed-off-by: Laurent Dufour Reviewed-by: Nathan Lynch Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713154729.80789-2-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 48 +++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 78f3f74c7056..6297467072e6 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -427,6 +427,43 @@ static int wait_for_vasi_session_suspending(u64 handle) return ret; } +static void wait_for_vasi_session_completed(u64 handle) +{ + unsigned long state = 0; + int ret; + + pr_info("waiting for memory transfer to complete...\n"); + + /* + * Wait for transition from H_VASI_RESUMED to H_VASI_COMPLETED. + */ + while (true) { + ret = poll_vasi_state(handle, &state); + + /* + * If the memory transfer is already complete and the migration + * has been cleaned up by the hypervisor, H_PARAMETER is return, + * which is translate in EINVAL by poll_vasi_state(). + */ + if (ret == -EINVAL || (!ret && state == H_VASI_COMPLETED)) { + pr_info("memory transfer completed.\n"); + break; + } + + if (ret) { + pr_err("H_VASI_STATE return error (%d)\n", ret); + break; + } + + if (state != H_VASI_RESUMED) { + pr_err("unexpected H_VASI_STATE result %lu\n", state); + break; + } + + msleep(500); + } +} + static void prod_single(unsigned int target_cpu) { long hvrc; @@ -673,9 +710,16 @@ static int pseries_migrate_partition(u64 handle) vas_migration_handler(VAS_SUSPEND); ret = pseries_suspend(handle); - if (ret == 0) + if (ret == 0) { post_mobility_fixup(); - else + /* + * Wait until the memory transfer is complete, so that the user + * space process returns from the syscall after the transfer is + * complete. This allows the user hooks to be executed at the + * right time. + */ + wait_for_vasi_session_completed(handle); + } else pseries_cancel_migration(handle, ret); vas_migration_handler(VAS_RESUME); -- cgit From 7c56a8733d0a2a4be2438a7512566e5ce552fccf Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 13 Jul 2022 17:47:27 +0200 Subject: watchdog: export lockup_detector_reconfigure In some circumstances it may be interesting to reconfigure the watchdog from inside the kernel. On PowerPC, this may helpful before and after a LPAR migration (LPM) is initiated, because it implies some latencies, watchdog, and especially NMI watchdog is expected to be triggered during this operation. Reconfiguring the watchdog with a factor, would prevent it to happen too frequently during LPM. Rename lockup_detector_reconfigure() as __lockup_detector_reconfigure() and create a new function lockup_detector_reconfigure() calling __lockup_detector_reconfigure() under the protection of watchdog_mutex. Signed-off-by: Laurent Dufour [mpe: Squash in build fix from Laurent, reported by Sachin] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713154729.80789-3-ldufour@linux.ibm.com --- include/linux/nmi.h | 2 ++ kernel/watchdog.c | 21 ++++++++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 750c7f395ca9..f700ff2df074 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -122,6 +122,8 @@ int watchdog_nmi_probe(void); int watchdog_nmi_enable(unsigned int cpu); void watchdog_nmi_disable(unsigned int cpu); +void lockup_detector_reconfigure(void); + /** * touch_nmi_watchdog - restart NMI watchdog timeout. * diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 20a7a55e62b6..41596c415111 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -541,7 +541,7 @@ int lockup_detector_offline_cpu(unsigned int cpu) return 0; } -static void lockup_detector_reconfigure(void) +static void __lockup_detector_reconfigure(void) { cpus_read_lock(); watchdog_nmi_stop(); @@ -561,6 +561,13 @@ static void lockup_detector_reconfigure(void) __lockup_detector_cleanup(); } +void lockup_detector_reconfigure(void) +{ + mutex_lock(&watchdog_mutex); + __lockup_detector_reconfigure(); + mutex_unlock(&watchdog_mutex); +} + /* * Create the watchdog infrastructure and configure the detector(s). */ @@ -577,13 +584,13 @@ static __init void lockup_detector_setup(void) return; mutex_lock(&watchdog_mutex); - lockup_detector_reconfigure(); + __lockup_detector_reconfigure(); softlockup_initialized = true; mutex_unlock(&watchdog_mutex); } #else /* CONFIG_SOFTLOCKUP_DETECTOR */ -static void lockup_detector_reconfigure(void) +static void __lockup_detector_reconfigure(void) { cpus_read_lock(); watchdog_nmi_stop(); @@ -591,9 +598,13 @@ static void lockup_detector_reconfigure(void) watchdog_nmi_start(); cpus_read_unlock(); } +void lockup_detector_reconfigure(void) +{ + __lockup_detector_reconfigure(); +} static inline void lockup_detector_setup(void) { - lockup_detector_reconfigure(); + __lockup_detector_reconfigure(); } #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ @@ -633,7 +644,7 @@ static void proc_watchdog_update(void) { /* Remove impossible cpus to keep sysctl output clean. */ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); - lockup_detector_reconfigure(); + __lockup_detector_reconfigure(); } /* -- cgit From f5e74e836097d1004077390717d4bd95d4a2c27a Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 13 Jul 2022 17:47:28 +0200 Subject: powerpc/watchdog: introduce a NMI watchdog's factor Introduce a factor which would apply to the NMI watchdog timeout. This factor is a percentage added to the watchdog_tresh value. The value is set under the watchdog_mutex protection and lockup_detector_reconfigure() is called to recompute wd_panic_timeout_tb. Once the factor is set, it remains until it is set back to 0, which means no impact. Signed-off-by: Laurent Dufour Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713154729.80789-4-ldufour@linux.ibm.com --- arch/powerpc/include/asm/nmi.h | 2 ++ arch/powerpc/kernel/watchdog.c | 21 ++++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index ea0e487f87b1..c3c7adef74de 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -5,8 +5,10 @@ #ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); long soft_nmi_interrupt(struct pt_regs *regs); +void watchdog_nmi_set_timeout_pct(u64 pct); #else static inline void arch_touch_nmi_watchdog(void) {} +static inline void watchdog_nmi_set_timeout_pct(u64 pct) {} #endif #ifdef CONFIG_NMI_IPI diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index dd882d00845d..dbcc4a793f0b 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -91,6 +91,10 @@ static cpumask_t wd_smp_cpus_pending; static cpumask_t wd_smp_cpus_stuck; static u64 wd_smp_last_reset_tb; +#ifdef CONFIG_PPC_PSERIES +static u64 wd_timeout_pct; +#endif + /* * Try to take the exclusive watchdog action / NMI IPI / printing lock. * wd_smp_lock must be held. If this fails, we should return and wait @@ -527,7 +531,13 @@ static int stop_watchdog_on_cpu(unsigned int cpu) static void watchdog_calc_timeouts(void) { - wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq; + u64 threshold = watchdog_thresh; + +#ifdef CONFIG_PPC_PSERIES + threshold += (READ_ONCE(wd_timeout_pct) * threshold) / 100; +#endif + + wd_panic_timeout_tb = threshold * ppc_tb_freq; /* Have the SMP detector trigger a bit later */ wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2; @@ -570,3 +580,12 @@ int __init watchdog_nmi_probe(void) } return 0; } + +#ifdef CONFIG_PPC_PSERIES +void watchdog_nmi_set_timeout_pct(u64 pct) +{ + pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct); + WRITE_ONCE(wd_timeout_pct, pct); + lockup_detector_reconfigure(); +} +#endif -- cgit From 118b1366930c8c833b8b36abef657f40d4e26610 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 13 Jul 2022 17:47:29 +0200 Subject: powerpc/pseries/mobility: set NMI watchdog factor during an LPM During an LPM, while the memory transfer is in progress on the arrival side, some latencies are generated when accessing not yet transferred pages on the arrival side. Thus, the NMI watchdog may be triggered too frequently, which increases the risk to hit an NMI interrupt in a bad place in the kernel, leading to a kernel panic. Disabling the Hard Lockup Watchdog until the memory transfer could be a too strong work around, some users would want this timeout to be eventually triggered if the system is hanging even during an LPM. Introduce a new sysctl variable nmi_watchdog_factor. It allows to apply a factor to the NMI watchdog timeout during an LPM. Just before the CPUs are stopped for the switchover sequence, the NMI watchdog timer is set to watchdog_thresh + factor% A value of 0 has no effect. The default value is 200, meaning that the NMI watchdog is set to 30s during LPM (based on a 10s watchdog_thresh value). Once the memory transfer is achieved, the factor is reset to 0. Setting this value to a high number is like disabling the NMI watchdog during an LPM. Signed-off-by: Laurent Dufour Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713154729.80789-5-ldufour@linux.ibm.com --- Documentation/admin-guide/sysctl/kernel.rst | 12 ++++++++ arch/powerpc/platforms/pseries/mobility.c | 43 +++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index ddccd1077462..9b7fa1baf225 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -592,6 +592,18 @@ to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst). +nmi_wd_lpm_factor (PPC only) +============================ + +Factor to apply to the NMI watchdog timeout (only when ``nmi_watchdog`` is +set to 1). This factor represents the percentage added to +``watchdog_thresh`` when calculating the NMI watchdog timeout during an +LPM. The soft lockup timeout is not impacted. + +A value of 0 means no change. The default value is 200 meaning the NMI +watchdog is set to 30s (based on ``watchdog_thresh`` equal to 10). + + numa_balancing ============== diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 6297467072e6..3d36a8955eaf 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -48,6 +48,39 @@ struct update_props_workarea { #define MIGRATION_SCOPE (1) #define PRRN_SCOPE -2 +#ifdef CONFIG_PPC_WATCHDOG +static unsigned int nmi_wd_lpm_factor = 200; + +#ifdef CONFIG_SYSCTL +static struct ctl_table nmi_wd_lpm_factor_ctl_table[] = { + { + .procname = "nmi_wd_lpm_factor", + .data = &nmi_wd_lpm_factor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + }, + {} +}; +static struct ctl_table nmi_wd_lpm_factor_sysctl_root[] = { + { + .procname = "kernel", + .mode = 0555, + .child = nmi_wd_lpm_factor_ctl_table, + }, + {} +}; + +static int __init register_nmi_wd_lpm_factor_sysctl(void) +{ + register_sysctl_table(nmi_wd_lpm_factor_sysctl_root); + + return 0; +} +device_initcall(register_nmi_wd_lpm_factor_sysctl); +#endif /* CONFIG_SYSCTL */ +#endif /* CONFIG_PPC_WATCHDOG */ + static int mobility_rtas_call(int token, char *buf, s32 scope) { int rc; @@ -702,13 +735,20 @@ static int pseries_suspend(u64 handle) static int pseries_migrate_partition(u64 handle) { int ret; + unsigned int factor = 0; +#ifdef CONFIG_PPC_WATCHDOG + factor = nmi_wd_lpm_factor; +#endif ret = wait_for_vasi_session_suspending(handle); if (ret) return ret; vas_migration_handler(VAS_SUSPEND); + if (factor) + watchdog_nmi_set_timeout_pct(factor); + ret = pseries_suspend(handle); if (ret == 0) { post_mobility_fixup(); @@ -722,6 +762,9 @@ static int pseries_migrate_partition(u64 handle) } else pseries_cancel_migration(handle, ret); + if (factor) + watchdog_nmi_set_timeout_pct(0); + vas_migration_handler(VAS_RESUME); return ret; -- cgit From 9257971377e2fe6e82f41f688651a82a2f160a88 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 25 Jul 2022 10:56:19 +0900 Subject: powerpc/purgatory: Omit use of bin2c The .incbin assembler directive is much faster than bin2c + $(CC). Do similar refactoring as in commit 4c0f032d4963 ("s390/purgatory: Omit use of bin2c"). Please note the .quad directive matches to size_t in C (both 8 byte) because the purgatory is compiled only for the 64-bit kernel. (KEXEC_FILE depends on PPC64). Signed-off-by: Masahiro Yamada Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220725015619.618070-1-masahiroy@kernel.org --- arch/powerpc/Kconfig | 1 - arch/powerpc/purgatory/.gitignore | 1 - arch/powerpc/purgatory/Makefile | 8 ++------ arch/powerpc/purgatory/kexec-purgatory.S | 14 ++++++++++++++ scripts/remove-stale-files | 2 ++ 5 files changed, 18 insertions(+), 8 deletions(-) create mode 100644 arch/powerpc/purgatory/kexec-purgatory.S diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 26331cdd3642..c76d499e2aa3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -553,7 +553,6 @@ config KEXEC_FILE bool "kexec file based system call" select KEXEC_CORE select HAVE_IMA_KEXEC if IMA - select BUILD_BIN2C select KEXEC_ELF depends on PPC64 depends on CRYPTO=y diff --git a/arch/powerpc/purgatory/.gitignore b/arch/powerpc/purgatory/.gitignore index b8dc6ff34254..5e40575c1f2b 100644 --- a/arch/powerpc/purgatory/.gitignore +++ b/arch/powerpc/purgatory/.gitignore @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -kexec-purgatory.c purgatory.ro diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile index 348f59581052..a81d155b89ae 100644 --- a/arch/powerpc/purgatory/Makefile +++ b/arch/powerpc/purgatory/Makefile @@ -2,17 +2,13 @@ KASAN_SANITIZE := n -targets += trampoline_$(BITS).o purgatory.ro kexec-purgatory.c +targets += trampoline_$(BITS).o purgatory.ro LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined $(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE $(call if_changed,ld) -quiet_cmd_bin2c = BIN2C $@ - cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@ - -$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE - $(call if_changed,bin2c) +$(obj)/kexec-purgatory.o: $(obj)/purgatory.ro obj-y += kexec-purgatory.o diff --git a/arch/powerpc/purgatory/kexec-purgatory.S b/arch/powerpc/purgatory/kexec-purgatory.S new file mode 100644 index 000000000000..f494fd5a0526 --- /dev/null +++ b/arch/powerpc/purgatory/kexec-purgatory.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + + .section .rodata, "a" + + .align 8 +kexec_purgatory: + .globl kexec_purgatory + .incbin "arch/powerpc/purgatory/purgatory.ro" +.Lkexec_purgatory_end: + + .align 8 +kexec_purgatory_size: + .globl kexec_purgatory_size + .quad .Lkexec_purgatory_end - kexec_purgatory diff --git a/scripts/remove-stale-files b/scripts/remove-stale-files index 7adab4618035..5a7543469698 100755 --- a/scripts/remove-stale-files +++ b/scripts/remove-stale-files @@ -20,6 +20,8 @@ set -e # yard. Stale files stay in this file for a while (for some release cycles?), # then will be really dead and removed from the code base entirely. +rm -f arch/powerpc/purgatory/kexec-purgatory.c + # These were previously generated source files. When you are building the kernel # with O=, make sure to remove the stale files in the output tree. Otherwise, # the build system wrongly compiles the stale ones. -- cgit From 0c551abfa004ce154d487d91777bf221c808a64f Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:10:43 +0200 Subject: powerpc/fsl-pci: Fix Class Code of PCIe Root Port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default old pre-3.0 Freescale PCIe controllers reports invalid PCI Class Code 0x0b20 for PCIe Root Port. It can be seen by lspci -b output on P2020 board which has this pre-3.0 controller: $ lspci -bvnn 00:00.0 Power PC [0b20]: Freescale Semiconductor Inc P2020E [1957:0070] (rev 21) !!! Invalid class 0b20 for header type 01 Capabilities: [4c] Express Root Port (Slot-), MSI 00 Fix this issue by programming correct PCI Class Code 0x0604 for PCIe Root Port to the Freescale specific PCIe register 0x474. With this change lspci -b output is: $ lspci -bvnn 00:00.0 PCI bridge [0604]: Freescale Semiconductor Inc P2020E [1957:0070] (rev 21) (prog-if 00 [Normal decode]) Capabilities: [4c] Express Root Port (Slot-), MSI 00 Without any "Invalid class" error. So class code was properly reflected into standard (read-only) PCI register 0x08. Same fix is already implemented in U-Boot pcie_fsl.c driver in commit: http://source.denx.de/u-boot/u-boot/-/commit/d18d06ac35229345a0af80977a408cfbe1d1015b Fix activated by U-Boot stay active also after booting Linux kernel. But boards which use older U-Boot version without that fix are affected and still require this fix. So implement this class code fix also in kernel fsl_pci.c driver. Cc: stable@vger.kernel.org Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706101043.4867-1-pali@kernel.org --- arch/powerpc/sysdev/fsl_pci.c | 8 ++++++++ arch/powerpc/sysdev/fsl_pci.h | 1 + 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index e8d072e98b66..af6c8ca824d3 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -522,6 +522,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) struct resource rsrc; const int *bus_range; u8 hdr_type, progif; + u32 class_code; struct device_node *dev; struct ccsr_pci __iomem *pci; u16 temp; @@ -595,6 +596,13 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS; if (fsl_pcie_check_link(hose)) hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK; + /* Fix Class Code to PCI_CLASS_BRIDGE_PCI_NORMAL for pre-3.0 controller */ + if (in_be32(&pci->block_rev1) < PCIE_IP_REV_3_0) { + early_read_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, &class_code); + class_code &= 0xff; + class_code |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8; + early_write_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, class_code); + } } else { /* * Set PBFR(PCI Bus Function Register)[10] = 1 to diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index cdbde2e0c96e..093a875d7d1e 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -18,6 +18,7 @@ struct platform_device; #define PCIE_LTSSM 0x0404 /* PCIE Link Training and Status */ #define PCIE_LTSSM_L0 0x16 /* L0 state */ +#define PCIE_FSL_CSR_CLASSCODE 0x474 /* FSL GPEX CSR */ #define PCIE_IP_REV_2_2 0x02080202 /* PCIE IP block version Rev2.2 */ #define PCIE_IP_REV_3_0 0x02080300 /* PCIE IP block version Rev3.0 */ #define PIWAR_EN 0x80000000 /* Enable */ -- cgit From 0531a4abd1c6c2e270f5ac6fc725ff550fcd14ab Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 13 Jul 2022 15:44:29 +0200 Subject: powerpc: dts: turris1x.dts: Add CPLD reboot node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CPLD firmware can reset board by writing value 0x01 at CPLD memory offset 0x0d. Define syscon-reboot node for this reset support. Fixes: 54c15ec3b738 ("powerpc: dts: Add DTS file for CZ.NIC Turris 1.x routers") Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713134429.18748-1-pali@kernel.org --- arch/powerpc/boot/dts/turris1x.dts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts index c76b628cf026..12e08271e61f 100644 --- a/arch/powerpc/boot/dts/turris1x.dts +++ b/arch/powerpc/boot/dts/turris1x.dts @@ -332,7 +332,7 @@ * Turris CPLD firmware is open source and available at: * https://gitlab.nic.cz/turris/hw/turris_cpld/-/blob/master/CZ_NIC_Router_CPLD.v */ - compatible = "cznic,turris1x-cpld", "fsl,p1021rdb-pc-cpld", "simple-bus"; + compatible = "cznic,turris1x-cpld", "fsl,p1021rdb-pc-cpld", "simple-bus", "syscon"; reg = <0x3 0x0 0x30>; #address-cells = <1>; #size-cells = <1>; @@ -352,6 +352,14 @@ gpios = <&gpio 11 GPIO_ACTIVE_LOW>; }; + reboot@d { + compatible = "syscon-reboot"; + reg = <0x0d 0x01>; + offset = <0x0d>; + mask = <0x01>; + value = <0x01>; + }; + led-controller@13 { /* * LEDs are controlled by CPLD firmware. -- cgit From a2954a7e47b60bb8d8c51f1b4d55af7585a4108a Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:43:04 +0200 Subject: powerpc/pci: Hide pci_device_from_OF_node() for non-powermac code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function pci_device_from_OF_node() is used only in powermac code. So hide it from all other platforms. Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706104308.5390-2-pali@kernel.org --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/kernel/pci_32.c | 2 ++ arch/powerpc/kernel/pci_64.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index c85f901227c9..98156932a1f5 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -170,8 +170,10 @@ static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus) return bus->sysdata; } +#ifdef CONFIG_PPC_PMAC extern int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn); +#endif #ifndef CONFIG_PPC64 extern void pci_create_OF_bus_map(void); diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 5a174936c9a0..c3b91fb62a71 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -154,6 +154,7 @@ pcibios_make_OF_bus_map(void) } +#ifdef CONFIG_PPC_PMAC /* * Returns the PCI device matching a given OF node */ @@ -193,6 +194,7 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) return -ENODEV; } EXPORT_SYMBOL(pci_device_from_OF_node); +#endif /* We create the "pci-OF-bus-map" property now so it appears in the * /proc device tree diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 19b03ddf5631..0c7cfb9fab04 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -286,6 +286,7 @@ int pcibus_to_node(struct pci_bus *bus) EXPORT_SYMBOL(pcibus_to_node); #endif +#ifdef CONFIG_PPC_PMAC int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn) { if (!PCI_DN(np)) @@ -294,3 +295,4 @@ int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn) *devfn = PCI_DN(np)->devfn; return 0; } +#endif -- cgit From 407a767182d3dc87176aabddd7c109b36a5727e9 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:43:05 +0200 Subject: powerpc/pci: Make pcibios_make_OF_bus_map() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function pcibios_make_OF_bus_map() is used only in pci_32.c. So make it static. Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706104308.5390-3-pali@kernel.org --- arch/powerpc/kernel/pci_32.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index c3b91fb62a71..df981294df29 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -36,8 +36,6 @@ int pcibios_assign_bus_offset = 1; EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(pci_dram_offset); -void __init pcibios_make_OF_bus_map(void); - static void fixup_cpc710_pci64(struct pci_dev* dev); static u8* pci_to_OF_bus_map; @@ -108,7 +106,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus) } } -void __init +static void __init pcibios_make_OF_bus_map(void) { int i; -- cgit From 704544588735b2e1115212dbba1c210b3687ff7a Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:43:06 +0200 Subject: powerpc/pci: Hide pci_create_OF_bus_map() for non-chrp code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function pci_create_OF_bus_map() is used only in chrp code. So hide it from all other platforms. Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706104308.5390-4-pali@kernel.org --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/kernel/pci_32.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 98156932a1f5..e18c95f4e1d4 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -176,7 +176,9 @@ extern int pci_device_from_OF_node(struct device_node *node, #endif #ifndef CONFIG_PPC64 +#ifdef CONFIG_PPC_CHRP extern void pci_create_OF_bus_map(void); +#endif #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index df981294df29..3291af89cea4 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -194,6 +194,7 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) EXPORT_SYMBOL(pci_device_from_OF_node); #endif +#ifdef CONFIG_PPC_CHRP /* We create the "pci-OF-bus-map" property now so it appears in the * /proc device tree */ @@ -218,6 +219,7 @@ pci_create_OF_bus_map(void) of_node_put(dn); } } +#endif void pcibios_setup_phb_io_space(struct pci_controller *hose) { -- cgit From 7f102d61983275ab68b5ac2715afa35c5f4ffd86 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:43:07 +0200 Subject: powerpc/pci: Disable filling pci-OF-bus-map for non-chrp/powermac MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creating or filling pci-OF-bus-map property in the device-tree is deprecated since May 2006 [1] and was used only in old platforms like PowerMac. Currently kernel code handles it only for chrp and powermac code. So completely disable filling pci-OF-bus-map property for non-chrp and non-powermac platforms. [1] - https://lore.kernel.org/linuxppc-dev/1148016268.13249.14.camel@localhost.localdomain/ Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706104308.5390-5-pali@kernel.org --- arch/powerpc/kernel/pci_32.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 3291af89cea4..2f7284b68f06 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -37,15 +37,12 @@ EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(pci_dram_offset); static void fixup_cpc710_pci64(struct pci_dev* dev); -static u8* pci_to_OF_bus_map; /* By default, we don't re-assign bus numbers. We do this only on * some pmacs */ static int pci_assign_all_buses; -static int pci_bus_count; - /* This will remain NULL for now, until isa-bridge.c is made common * to both 32-bit and 64-bit. */ @@ -65,6 +62,11 @@ fixup_cpc710_pci64(struct pci_dev* dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64); +#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) + +static u8* pci_to_OF_bus_map; +static int pci_bus_count; + /* * Functions below are used on OpenFirmware machines. */ @@ -221,6 +223,8 @@ pci_create_OF_bus_map(void) } #endif +#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) */ + void pcibios_setup_phb_io_space(struct pci_controller *hose) { unsigned long io_offset; @@ -252,6 +256,8 @@ static int __init pcibios_init(void) if (pci_assign_all_buses || next_busno <= hose->last_busno) next_busno = hose->last_busno + pcibios_assign_bus_offset; } + +#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) pci_bus_count = next_busno; /* OpenFirmware based machines need a map of OF bus @@ -260,6 +266,7 @@ static int __init pcibios_init(void) */ if (pci_assign_all_buses) pcibios_make_OF_bus_map(); +#endif /* Call common code to handle resource allocation */ pcibios_resource_survey(); -- cgit From 5663568130825458a2a8535ccef0db9a1bf7be82 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:43:08 +0200 Subject: powerpc/pci: Add config option for using all 256 PCI buses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default on PPC32 PCI bus numbers are unique across all PCI domains. So a system could have only 256 PCI buses independently of available PCI domains. This is due to filling DT property pci-OF-bus-map which does not support a multi-domain setup. On all powerpc platforms except chrp and powermac there is no DT property pci-OF-bus-map anymore and therefore it is possible on non-chrp/powermac platforms to avoid this limitation of maximum number of 256 PCI buses in a system even on multi-domain setup. But avoiding this limitation would mean that all PCI and PCIe devices would be present on completely different BDF addresses as every PCI domain starts numbering PCI bueses from zero (instead of the last bus number of previous enumerated PCI domain). Such change could break existing software which expects fixed PCI bus numbers. So add a new config option CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT which enables this change. By default it is disabled. It causes the initial value of hose->first_busno to be zero. Signed-off-by: Pali Rohár [mpe: Minor change log wording] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706104308.5390-6-pali@kernel.org --- arch/powerpc/Kconfig | 11 +++++++++++ arch/powerpc/kernel/pci_32.c | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c76d499e2aa3..04499f22d06b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -375,6 +375,17 @@ config PPC_DCR depends on PPC_DCR_NATIVE || PPC_DCR_MMIO default y +config PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT + depends on PPC32 + depends on !PPC_PMAC && !PPC_CHRP + bool "Assign PCI bus numbers from zero individually for each PCI domain" + help + By default on PPC32 were PCI bus numbers unique across all PCI domains. + So system could have only 256 PCI buses independently of available + PCI domains. When this option is enabled then PCI bus numbers are + PCI domain dependent and each PCI controller on own domain can have + 256 PCI buses, like it is on other Linux architectures. + config PPC_OF_PLATFORM_PCI bool depends on PCI diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 2f7284b68f06..433965bf37b4 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -239,7 +239,9 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose) static int __init pcibios_init(void) { struct pci_controller *hose, *tmp; +#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT int next_busno = 0; +#endif printk(KERN_INFO "PCI: Probing PCI hardware\n"); @@ -248,13 +250,17 @@ static int __init pcibios_init(void) /* Scan all of the recorded PCI controllers. */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { +#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT if (pci_assign_all_buses) hose->first_busno = next_busno; +#endif hose->last_busno = 0xff; pcibios_scan_phb(hose); pci_bus_add_devices(hose->bus); +#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT if (pci_assign_all_buses || next_busno <= hose->last_busno) next_busno = hose->last_busno + pcibios_assign_bus_offset; +#endif } #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) -- cgit From 28f07fab26319dacc5675ae01dfc84d82122c59b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 20 May 2022 22:36:49 +1000 Subject: powerpc/vdso: Fix __kernel_sync_dicache sequence with coherent icache Processors with coherent icache require the sequence sync ; icbi ; isync to entire store->execute coherency. icbi (to any address) must be executed to ensure isync flushes the pipeline. See "POWER9 Processor User's Manual, 4.6.2.2 Instruction Cache Block Invalidate (icbi)" for details. __kernel_sync_dicache is missing icbi for the coherent icache path. Add it. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220520123649.258440-1-npiggin@gmail.com --- arch/powerpc/kernel/vdso/cacheflush.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/vdso/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S index d4e43ab2d5df..0085ae464dac 100644 --- a/arch/powerpc/kernel/vdso/cacheflush.S +++ b/arch/powerpc/kernel/vdso/cacheflush.S @@ -91,6 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) 3: crclr cr0*4+so sync + icbi 0,r1 isync li r3,0 blr -- cgit From ef1911c6d26678b0e91fea33f076e98925997f0c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 15 Jul 2022 11:26:36 +1000 Subject: powerpc: add documentation for HWCAPs Take the arm64 HWCAP documentation file and adjust it for powerpc. Signed-off-by: Nicholas Piggin Reviewed-by: Segher Boessenkool [mpe: Fix ARCH_2_05 comment, as noticed by Tulio.] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220715012636.165948-1-npiggin@gmail.com --- Documentation/powerpc/elf_hwcaps.rst | 234 +++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 Documentation/powerpc/elf_hwcaps.rst diff --git a/Documentation/powerpc/elf_hwcaps.rst b/Documentation/powerpc/elf_hwcaps.rst new file mode 100644 index 000000000000..96d4fc4a3f91 --- /dev/null +++ b/Documentation/powerpc/elf_hwcaps.rst @@ -0,0 +1,234 @@ +.. _elf_hwcaps_index: + +================== +POWERPC ELF HWCAPs +================== + +This document describes the usage and semantics of the powerpc ELF HWCAPs. + + +1. Introduction +--------------- + +Some hardware or software features are only available on some CPU +implementations, and/or with certain kernel configurations, but have no other +discovery mechanism available to userspace code. The kernel exposes the +presence of these features to userspace through a set of flags called HWCAPs, +exposed in the auxiliary vector. + +Userspace software can test for features by acquiring the AT_HWCAP or +AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant +flags are set, e.g.:: + + bool floating_point_is_present(void) + { + unsigned long HWCAPs = getauxval(AT_HWCAP); + if (HWCAPs & PPC_FEATURE_HAS_FPU) + return true; + + return false; + } + +Where software relies on a feature described by a HWCAP, it should check the +relevant HWCAP flag to verify that the feature is present before attempting to +make use of the feature. + +HWCAP is the preferred method to test for the presence of a feature rather +than probing through other means, which may not be reliable or may cause +unpredictable behaviour. + +Software that targets a particular platform does not necessarily have to +test for required or implied features. For example if the program requires +FPU, VMX, VSX, it is not necessary to test those HWCAPs, and it may be +impossible to do so if the compiler generates code requiring those features. + +2. Facilities +------------- + +The Power ISA uses the term "facility" to describe a class of instructions, +registers, interrupts, etc. The presence or absence of a facility indicates +whether this class is available to be used, but the specifics depend on the +ISA version. For example, if the VSX facility is available, the VSX +instructions that can be used differ between the v3.0B and v3.1B ISA +versions. + +3. Categories +------------- + +The Power ISA before v3.0 uses the term "category" to describe certain +classes of instructions and operating modes which may be optional or +mutually exclusive, the exact meaning of the HWCAP flag may depend on +context, e.g., the presence of the BOOKE feature implies that the server +category is not implemented. + +4. HWCAP allocation +------------------- + +HWCAPs are allocated as described in Power Architecture 64-Bit ELF V2 ABI +Specification (which will be reflected in the kernel's uapi headers). + +5. The HWCAPs exposed in AT_HWCAP +--------------------------------- + +PPC_FEATURE_32 + 32-bit CPU + +PPC_FEATURE_64 + 64-bit CPU (userspace may be running in 32-bit mode). + +PPC_FEATURE_601_INSTR + The processor is PowerPC 601. + Unused in the kernel since: + f0ed73f3fa2c ("powerpc: Remove PowerPC 601") + +PPC_FEATURE_HAS_ALTIVEC + Vector (aka Altivec, VMX) facility is available. + +PPC_FEATURE_HAS_FPU + Floating point facility is available. + +PPC_FEATURE_HAS_MMU + Memory management unit is present and enabled. + +PPC_FEATURE_HAS_4xxMAC + The processor is 40x or 44x family. + +PPC_FEATURE_UNIFIED_CACHE + The processor has a unified L1 cache for instructions and data, as + found in NXP e200. + Unused in the kernel since: + 39c8bf2b3cc1 ("powerpc: Retire e200 core (mpc555x processor)") + +PPC_FEATURE_HAS_SPE + Signal Processing Engine facility is available. + +PPC_FEATURE_HAS_EFP_SINGLE + Embedded Floating Point single precision operations are available. + +PPC_FEATURE_HAS_EFP_DOUBLE + Embedded Floating Point double precision operations are available. + +PPC_FEATURE_NO_TB + The timebase facility (mftb instruction) is not available. + This is a 601 specific HWCAP, so if it is known that the processor + running is not a 601, via other HWCAPs or other means, it is not + required to test this bit before using the timebase. + Unused in the kernel since: + f0ed73f3fa2c ("powerpc: Remove PowerPC 601") + +PPC_FEATURE_POWER4 + The processor is POWER4 or PPC970/FX/MP. + POWER4 support dropped from the kernel since: + 471d7ff8b51b ("powerpc/64s: Remove POWER4 support") + +PPC_FEATURE_POWER5 + The processor is POWER5. + +PPC_FEATURE_POWER5_PLUS + The processor is POWER5+. + +PPC_FEATURE_CELL + The processor is Cell. + +PPC_FEATURE_BOOKE + The processor implements the embedded category ("BookE") architecture. + +PPC_FEATURE_SMT + The processor implements SMT. + +PPC_FEATURE_ICACHE_SNOOP + The processor icache is coherent with the dcache, and instruction storage + can be made consistent with data storage for the purpose of executing + instructions with the sequence (as described in, e.g., POWER9 Processor + User's Manual, 4.6.2.2 Instruction Cache Block Invalidate (icbi)): + sync + icbi (to any address) + isync + +PPC_FEATURE_ARCH_2_05 + The processor supports the v2.05 userlevel architecture. Processors + supporting later architectures DO NOT set this feature. + +PPC_FEATURE_PA6T + The processor is PA6T. + +PPC_FEATURE_HAS_DFP + DFP facility is available. + +PPC_FEATURE_POWER6_EXT + The processor is POWER6. + +PPC_FEATURE_ARCH_2_06 + The processor supports the v2.06 userlevel architecture. Processors + supporting later architectures also set this feature. + +PPC_FEATURE_HAS_VSX + VSX facility is available. + +PPC_FEATURE_PSERIES_PERFMON_COMPAT + The processor supports architected PMU events in the range 0xE0-0xFF. + +PPC_FEATURE_TRUE_LE + The processor supports true little-endian mode. + +PPC_FEATURE_PPC_LE + The processor supports "PowerPC Little-Endian", that uses address + munging to make storage access appear to be little-endian, but the + data is stored in a different format that is unsuitable to be + accessed by other agents not running in this mode. + +6. The HWCAPs exposed in AT_HWCAP2 +---------------------------------- + +PPC_FEATURE2_ARCH_2_07 + The processor supports the v2.07 userlevel architecture. Processors + supporting later architectures also set this feature. + +PPC_FEATURE2_HTM + Transactional Memory feature is available. + +PPC_FEATURE2_DSCR + DSCR facility is available. + +PPC_FEATURE2_EBB + EBB facility is available. + +PPC_FEATURE2_ISEL + isel instruction is available. This is superseded by ARCH_2_07 and + later. + +PPC_FEATURE2_TAR + TAR facility is available. + +PPC_FEATURE2_VEC_CRYPTO + v2.07 crypto instructions are available. + +PPC_FEATURE2_HTM_NOSC + System calls fail if called in a transactional state, see + Documentation/powerpc/syscall64-abi.rst + +PPC_FEATURE2_ARCH_3_00 + The processor supports the v3.0B / v3.0C userlevel architecture. Processors + supporting later architectures also set this feature. + +PPC_FEATURE2_HAS_IEEE128 + IEEE 128-bit binary floating point is supported with VSX + quad-precision instructions and data types. + +PPC_FEATURE2_DARN + darn instruction is available. + +PPC_FEATURE2_SCV + The scv 0 instruction may be used for system calls, see + Documentation/powerpc/syscall64-abi.rst. + +PPC_FEATURE2_HTM_NO_SUSPEND + A limited Transactional Memory facility that does not support suspend is + available, see Documentation/powerpc/transactional_memory.rst. + +PPC_FEATURE2_ARCH_3_1 + The processor supports the v3.1 userlevel architecture. Processors + supporting later architectures also set this feature. + +PPC_FEATURE2_MMA + MMA facility is available. -- cgit From abf0878ce95f8a9b47d8ecf2de1d4617bec21711 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 25 May 2022 12:23:56 +1000 Subject: powerpc/64s: POWER10 nest MMU does not require flush escalation workaround Per (non-public) Nest MMU Workbook, POWER10 and POWER9P NMMU does not cache PTEs in PWC, so does not require PWC flush to invalidate these translations. Skip the workaround on POWER10 and later. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525022358.780745-2-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index dda51fef2d2e..4e29b619578c 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -755,10 +755,18 @@ EXPORT_SYMBOL(radix__local_flush_tlb_page); static bool mm_needs_flush_escalation(struct mm_struct *mm) { /* - * P9 nest MMU has issues with the page walk cache - * caching PTEs and not flushing them properly when - * RIC = 0 for a PID/LPID invalidate + * The P9 nest MMU has issues with the page walk cache caching PTEs + * and not flushing them when RIC = 0 for a PID/LPID invalidate. + * + * This may have been fixed in shipping firmware (by disabling PWC + * or preventing it from caching PTEs), but until that is confirmed, + * this workaround is required - escalate all RIC=0 IS=1/2/3 flushes + * to RIC=2. + * + * POWER10 (and P9P) does not have this problem. */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return false; if (atomic_read(&mm->context.copros) > 0) return true; return false; -- cgit From 2a8a0f420f74425bf5f80760fd14d7a2c3abb87d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 25 May 2022 12:23:57 +1000 Subject: powerpc/64s: POWER10 nest MMU can upgrade PTE access authority without TLB flush The nest MMU in POWER9 does not re-fetch the PTE in response to permission mismatch, contrary to the architecture[*] and unlike the core MMU. This requires a TLB flush before upgrading permissions of valid PTEs, for any address space with a coprocessor attached. Per (non-public) Nest MMU Workbook, POWER10 nest MMU conforms to the architecture in this regard, so skip the workaround. [*] See: Power ISA Version 3.1B, 6.10.1.2 Modifying a Translation Table Entry, Setting a Reference or Change Bit or Upgrading Access Authority (PTE Subject to Atomic Hardware Updates): "If the only change being made to a valid PTE that is subject to atomic hardware updates is to set the Reference or Change bit to 1 or to upgrade access authority, a simpler sequence suffices because the translation hardware will refetch the PTE if an access is attempted for which the only problems were reference and/or change bits needing to be set or insufficient access authority." Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525022358.780745-3-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 10 ++++---- arch/powerpc/mm/book3s64/radix_pgtable.c | 35 +++++++++++++++++----------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c index d2fb776febb4..5e3195568525 100644 --- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c @@ -48,11 +48,13 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; /* - * To avoid NMMU hang while relaxing access we need to flush the tlb before - * we set the new value. + * POWER9 NMMU must flush the TLB after clearing the PTE before + * installing a PTE with more relaxed access permissions, see + * radix__ptep_set_access_flags. */ - if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && - (atomic_read(&mm->context.copros) > 0)) + if (!cpu_has_feature(CPU_FTR_ARCH_31) && + is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && + atomic_read(&mm->context.copros) > 0) radix__flush_hugetlb_page(vma, addr); set_huge_pte_at(vma->vm_mm, addr, ptep, pte); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index db2f3d193448..698274109c91 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1018,16 +1018,21 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, unsigned long change = pte_val(entry) ^ pte_val(*ptep); /* - * To avoid NMMU hang while relaxing access, we need mark - * the pte invalid in between. + * On POWER9, the NMMU is not able to relax PTE access permissions + * for a translation with a TLB. The PTE must be invalidated, TLB + * flushed before the new PTE is installed. + * + * This only needs to be done for radix, because hash translation does + * flush when updating the linux pte (and we don't support NMMU + * accelerators on HPT on POWER9 anyway XXX: do we?). + * + * POWER10 (and P9P) NMMU does behave as per ISA. */ - if ((change & _PAGE_RW) && atomic_read(&mm->context.copros) > 0) { + if (!cpu_has_feature(CPU_FTR_ARCH_31) && (change & _PAGE_RW) && + atomic_read(&mm->context.copros) > 0) { unsigned long old_pte, new_pte; old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID); - /* - * new value of pte - */ new_pte = old_pte | set; radix__flush_tlb_page_psize(mm, address, psize); __radix_pte_update(ptep, _PAGE_INVALID, new_pte); @@ -1035,9 +1040,12 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, __radix_pte_update(ptep, 0, set); /* * Book3S does not require a TLB flush when relaxing access - * restrictions when the address space is not attached to a - * NMMU, because the core MMU will reload the pte after taking - * an access fault, which is defined by the architecture. + * restrictions when the address space (modulo the POWER9 nest + * MMU issue above) because the MMU will reload the PTE after + * taking an access fault, as defined by the architecture. See + * "Setting a Reference or Change Bit or Upgrading Access + * Authority (PTE Subject to Atomic Hardware Updates)" in + * Power ISA Version 3.1B. */ } /* See ptesync comment in radix__set_pte_at */ @@ -1050,11 +1058,12 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; /* - * To avoid NMMU hang while relaxing access we need to flush the tlb before - * we set the new value. We need to do this only for radix, because hash - * translation does flush when updating the linux pte. + * POWER9 NMMU must flush the TLB after clearing the PTE before + * installing a PTE with more relaxed access permissions, see + * radix__ptep_set_access_flags. */ - if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && + if (!cpu_has_feature(CPU_FTR_ARCH_31) && + is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && (atomic_read(&mm->context.copros) > 0)) radix__flush_tlb_page(vma, addr); -- cgit From fd193f85d3206cc7e7aeea2b6033d105cca38d01 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 25 May 2022 12:23:58 +1000 Subject: powerpc/64s: Remove spurious fault flushing for NMMU Commit 6d8278c414cb2 ("powerpc/64s/radix: do not flush TLB on spurious fault") removed the TLB flush for spurious faults, except when a coprocessor (nest MMU) maps the address space. This is not needed because the NMMU workaround in the PTE permission upgrade paths prevents PTEs existing with less restrictive access permissions than their corresponding TLB entries have. Remove it and replace with a comment. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525022358.780745-4-npiggin@gmail.com --- arch/powerpc/include/asm/book3s/64/tlbflush.h | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index d2e80f178b6d..206f920fe5b9 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -138,9 +138,29 @@ static inline void flush_all_mm(struct mm_struct *mm) static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, unsigned long address) { - /* See ptep_set_access_flags comment */ - if (atomic_read(&vma->vm_mm->context.copros) > 0) - flush_tlb_page(vma, address); + /* + * Book3S 64 does not require spurious fault flushes because the PTE + * must be re-fetched in case of an access permission problem. So the + * only reason for a spurious fault should be concurrent modification + * to the PTE, in which case the PTE will eventually be re-fetched by + * the MMU when it attempts the access again. + * + * See: Power ISA Version 3.1B, 6.10.1.2 Modifying a Translation Table + * Entry, Setting a Reference or Change Bit or Upgrading Access + * Authority (PTE Subject to Atomic Hardware Updates): + * + * "If the only change being made to a valid PTE that is subject to + * atomic hardware updates is to set the Reference or Change bit to + * 1 or to upgrade access authority, a simpler sequence suffices + * because the translation hardware will refetch the PTE if an + * access is attempted for which the only problems were reference + * and/or change bits needing to be set or insufficient access + * authority." + * + * The nest MMU in POWER9 does not perform this PTE re-fetch, but + * it avoids the spurious fault problem by flushing the TLB before + * upgrading PTE permissions, see radix__ptep_set_access_flags. + */ } extern bool tlbie_capable; -- cgit From f57261e69825b332f22480b37a33e03d066c0da2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 11 Jul 2022 13:06:52 +1000 Subject: powerpc/mce: use early_cpu_to_node() in mce_init() cpu_to_node() is not yet available (setup_arch() is called before setup_per_cpu_areas() by start_kernel()). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220711030653.150950-1-npiggin@gmail.com --- arch/powerpc/kernel/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 18173199b79d..6c5d30fba766 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -756,7 +756,7 @@ void __init mce_init(void) mce_info = memblock_alloc_try_nid(sizeof(*mce_info), __alignof__(*mce_info), MEMBLOCK_LOW_LIMIT, - limit, cpu_to_node(i)); + limit, early_cpu_to_node(i)); if (!mce_info) goto err; paca_ptrs[i]->mce_info = mce_info; -- cgit From 980bbf7ca72012d317617fcdbfabe8708e4cef29 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 14 Jun 2022 12:34:08 +0200 Subject: powerpc/32: Call mmu_mark_initmem_nx() regardless of data block mapping. mark_initmem_nx() calls either mmu_mark_initmem_nx() or set_memory_attr() based on return from v_block_mapped() of _sinittext. But we can now handle text and data independently, so that text may be mapped by block even when data is mapped by pages. On the 8xx for instance, at startup 32Mbytes of memory are pinned in TLB. So the pinned entries need to go away for sinittext. In next patch a BAT will be set to also covers sinittext on book3s/32. So it will also be needed to call mmu_mark_initmem_nx() even when data above sinittext is not mapped with BATs. As this is highly dependent on the platform, call mmu_mark_initmem_nx() regardless of data block mapping. Then the platform will know what to do. Modify 8xx mmu_mark_initmem_nx() so that inittext mapping is modified only when pagealloc debug and kfence are not active, otherwise inittext is mapped with standard pages. And don't do anything on kernel text which is already mapped with PAGE_KERNEL_TEXT. Fixes: da1adea07576 ("powerpc/8xx: Allow STRICT_KERNEL_RwX with pinned TLB") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/db3fc14f3bfa6215b0786ef58a6e2bc1e1f964d7.1655202804.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/8xx.c | 4 ++-- arch/powerpc/mm/pgtable_32.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 6b668ccef836..dbbfe897455d 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -170,8 +170,8 @@ void mmu_mark_initmem_nx(void) unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8; unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); - mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); - mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); + if (!debug_pagealloc_enabled_or_kfence()) + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); mmu_pin_tlb(block_mapped_ram, false); } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index a56ade39dc68..3ac73f9fb5d5 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -135,9 +135,9 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_sinittext)) { - mmu_mark_initmem_nx(); - } else { + mmu_mark_initmem_nx(); + + if (!v_block_mapped((unsigned long)_sinittext)) { set_memory_nx((unsigned long)_sinittext, numpages); set_memory_rw((unsigned long)_sinittext, numpages); } -- cgit From 2a0fb3c155c97c75176e557d61f8e66c1bd9b735 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 14 Jun 2022 12:34:09 +0200 Subject: powerpc/32: Set an IBAT covering up to _einittext during init Always set an IBAT covering up to _einittext during init because when CONFIG_MODULES is not selected there is no reason to have an exception handler for kernel instruction TLB misses. It implies DBAT and IBAT are now totaly independent, IBATs are set by setibat() and DBAT by setbat(). This allows to revert commit 9bb162fa26ed ("powerpc/603: Fix boot failure with DEBUG_PAGEALLOC and KFENCE") Reported-by: Maxime Bizon Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ce7f04a39593934d9b1ee68c69144ccd3d4da4a1.1655202804.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 4 ++-- arch/powerpc/mm/book3s32/mmu.c | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 6c739beb938c..519b60695167 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -418,14 +418,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +#ifdef CONFIG_MODULES lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +#ifdef CONFIG_MODULES bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 1794132db31e..a96b73006dfb 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -159,7 +159,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long done; unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; + unsigned long size; + size = roundup_pow_of_two((unsigned long)_einittext - PAGE_OFFSET); + setibat(0, PAGE_OFFSET, 0, size, PAGE_KERNEL_X); if (debug_pagealloc_enabled_or_kfence()) { pr_debug_once("Read-Write memory mapped without BATs\n"); @@ -245,10 +248,9 @@ void mmu_mark_rodata_ro(void) } /* - * Set up one of the I/D BAT (block address translation) register pairs. + * Set up one of the D BAT (block address translation) register pairs. * The parameters are not checked; in particular size must be a power * of 2 between 128k and 256M. - * On 603+, only set IBAT when _PAGE_EXEC is set */ void __init setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot) @@ -284,10 +286,6 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, /* G bit must be zero in IBATs */ flags &= ~_PAGE_EXEC; } - if (flags & _PAGE_EXEC) - bat[0] = bat[1]; - else - bat[0].batu = bat[0].batl = 0; bat_addrs[index].start = virt; bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1; -- cgit From 6042a1652d643d1d34fa89bb314cb102960c0800 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 1 Jul 2022 08:06:15 +0200 Subject: powerpc/32s: Fix boot failure with KASAN + SMP + JUMP_LABEL_FEATURE_CHECK_DEBUG Since commit 4291d085b0b0 ("powerpc/32s: Make pte_update() non atomic on 603 core"), pte_update() has been using mmu_has_feature(MMU_FTR_HPTE_TABLE) to avoid a useless atomic operation on 603 cores. When kasan_early_init() sets up the early zero shadow, it uses __set_pte_at(). On book3s/32, __set_pte_at() calls pte_update() when CONFIG_SMP is selected in order to ensure the preservation of _PAGE_HASHPTE in case of concurrent update of the PTE. But that's too early for mmu_has_feature(), so when CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG is selected, mmu_has_feature() calls printk(). That's too early to call printk() because KASAN early zero shadow page is not set up yet. It leads to a deadlock. However, when kasan_early_init() is called, there is only one CPU running and no risk of concurrent PTE update. So __set_pte_at() can be called with the 'percpu' flag. With that flag set, the PTE is written directly instead of being written via pte_update(). Fixes: 4291d085b0b0 ("powerpc/32s: Make pte_update() non atomic on 603 core") Reported-by: Erhard Furtner Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2ee707512b8b212b079b877f4ceb525a1606a3fb.1656655567.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/kasan/init_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index f3e4d069e0ba..a70828a6d935 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -25,7 +25,7 @@ static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot) int i; for (i = 0; i < PTRS_PER_PTE; i++, ptep++) - __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); + __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 1); } int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) -- cgit From ec3eb9d941a98f4c0dac263110729680a734279b Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Thu, 7 Jul 2022 16:37:17 +0200 Subject: powerpc/perf: Use PVR rather than oprofile field to determine CPU version Currently the perf CPU backend drivers detect what CPU they're on using cur_cpu_spec->oprofile_cpu_type. Although that works, it's a bit crufty to be using oprofile related fields, especially seeing as oprofile is more or less unused these days. It also means perf is reliant on the fragile logic in setup_cpu_spec() which detects when we're using a logical PVR and copies back the PMU related fields from the raw CPU entry. So lets check the PVR directly. Suggested-by: Michael Ellerman Signed-off-by: Rashmica Gupta Signed-off-by: Christophe Leroy Reviewed-by: Madhavan Srinivasan [chleroy: Added power10 and fixed checkpatch issues] Reviewed-and-tested-by: Athira Rajeev Reviewed-and-tested-By: Kajol Jain [For 24x7 side changes] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20c0ee7f99dbf0dbf8658df6b39f84753e6db1ef.1657204631.git.christophe.leroy@csgroup.eu --- arch/powerpc/perf/e500-pmu.c | 9 +++++---- arch/powerpc/perf/e6500-pmu.c | 5 +++-- arch/powerpc/perf/hv-24x7.c | 6 +++--- arch/powerpc/perf/mpc7450-pmu.c | 5 +++-- arch/powerpc/perf/power10-pmu.c | 6 ++---- arch/powerpc/perf/power5+-pmu.c | 6 +++--- arch/powerpc/perf/power5-pmu.c | 5 +++-- arch/powerpc/perf/power6-pmu.c | 5 +++-- arch/powerpc/perf/power7-pmu.c | 7 ++++--- arch/powerpc/perf/power8-pmu.c | 5 +++-- arch/powerpc/perf/power9-pmu.c | 4 +--- arch/powerpc/perf/ppc970-pmu.c | 7 ++++--- 12 files changed, 37 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/perf/e500-pmu.c b/arch/powerpc/perf/e500-pmu.c index a59c33bed32a..e3e1a68eb1d5 100644 --- a/arch/powerpc/perf/e500-pmu.c +++ b/arch/powerpc/perf/e500-pmu.c @@ -118,12 +118,13 @@ static struct fsl_emb_pmu e500_pmu = { static int init_e500_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type) - return -ENODEV; + unsigned int pvr = mfspr(SPRN_PVR); - if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc")) + /* ec500mc */ + if (PVR_VER(pvr) == PVR_VER_E500MC || PVR_VER(pvr) == PVR_VER_E5500) num_events = 256; - else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500")) + /* e500 */ + else if (PVR_VER(pvr) != PVR_VER_E500V1 && PVR_VER(pvr) != PVR_VER_E500V2) return -ENODEV; return register_fsl_emb_pmu(&e500_pmu); diff --git a/arch/powerpc/perf/e6500-pmu.c b/arch/powerpc/perf/e6500-pmu.c index 44ad65da82ed..bd779a2338f8 100644 --- a/arch/powerpc/perf/e6500-pmu.c +++ b/arch/powerpc/perf/e6500-pmu.c @@ -107,8 +107,9 @@ static struct fsl_emb_pmu e6500_pmu = { static int init_e6500_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e6500")) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_VER_E6500) return -ENODEV; return register_fsl_emb_pmu(&e6500_pmu); diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index cf5406b31e27..33c23225fd54 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1718,16 +1718,16 @@ static int hv_24x7_init(void) { int r; unsigned long hret; + unsigned int pvr = mfspr(SPRN_PVR); struct hv_perf_caps caps; if (!firmware_has_feature(FW_FEATURE_LPAR)) { pr_debug("not a virtualized system, not enabling\n"); return -ENODEV; - } else if (!cur_cpu_spec->oprofile_cpu_type) - return -ENODEV; + } /* POWER8 only supports v1, while POWER9 only supports v2. */ - if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8")) + if (PVR_VER(pvr) == PVR_POWER8) interface_version = 1; else { interface_version = 2; diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c index e39b15b79a83..552d51a925d3 100644 --- a/arch/powerpc/perf/mpc7450-pmu.c +++ b/arch/powerpc/perf/mpc7450-pmu.c @@ -417,8 +417,9 @@ struct power_pmu mpc7450_pmu = { static int __init init_mpc7450_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450")) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_7450) return -ENODEV; return register_power_pmu(&mpc7450_pmu); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index d1adcd9f52e2..403ba3a69512 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -607,12 +607,10 @@ int __init init_power10_pmu(void) unsigned int pvr; int rc; - /* Comes from cpu_specs[] */ - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) + pvr = mfspr(SPRN_PVR); + if (PVR_VER(pvr) != PVR_POWER10) return -ENODEV; - pvr = mfspr(SPRN_PVR); /* Add the ppmu flag for power10 DD1 */ if ((PVR_CFG(pvr) == 1)) power10_pmu.flags |= PPMU_P10_DD1; diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index 753b4740ef64..b4708ab73145 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -679,9 +679,9 @@ static struct power_pmu power5p_pmu = { int __init init_power5p_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") - && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++"))) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_POWER5p) return -ENODEV; return register_power_pmu(&power5p_pmu); diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index 1f83c4cba0aa..c6aefd0a1cc8 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -620,8 +620,9 @@ static struct power_pmu power5_pmu = { int __init init_power5_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_POWER5) return -ENODEV; return register_power_pmu(&power5_pmu); diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index aec746f86804..5729b6e059de 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -541,8 +541,9 @@ static struct power_pmu power6_pmu = { int __init init_power6_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6")) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_POWER6) return -ENODEV; return register_power_pmu(&power6_pmu); diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index a74211410b8d..c95ccf2e28da 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -447,11 +447,12 @@ static struct power_pmu power7_pmu = { int __init init_power7_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_POWER7 && PVR_VER(pvr) != PVR_POWER7p) return -ENODEV; - if (pvr_version_is(PVR_POWER7p)) + if (PVR_VER(pvr) == PVR_POWER7p) power7_pmu.flags |= PPMU_SIAR_VALID; return register_power_pmu(&power7_pmu); diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 2518f5375d4a..ef9685065aaf 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -391,9 +391,10 @@ static struct power_pmu power8_pmu = { int __init init_power8_pmu(void) { int rc; + unsigned int pvr = mfspr(SPRN_PVR); - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8")) + if (PVR_VER(pvr) != PVR_POWER8E && PVR_VER(pvr) != PVR_POWER8NVL && + PVR_VER(pvr) != PVR_POWER8) return -ENODEV; rc = register_power_pmu(&power8_pmu); diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index bc6d76bce97b..cb6a7dc02dd7 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -467,9 +467,7 @@ int __init init_power9_pmu(void) int rc = 0; unsigned int pvr = mfspr(SPRN_PVR); - /* Comes from cpu_specs[] */ - if (!cur_cpu_spec->oprofile_cpu_type || - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9")) + if (PVR_VER(pvr) != PVR_POWER9) return -ENODEV; /* Blacklist events */ diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 09802482ba72..762676fb839e 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -491,9 +491,10 @@ static struct power_pmu ppc970_pmu = { int __init init_ppc970_pmu(void) { - if (!cur_cpu_spec->oprofile_cpu_type || - (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970") - && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP"))) + unsigned int pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PVR_970 && PVR_VER(pvr) != PVR_970MP && + PVR_VER(pvr) != PVR_970FX && PVR_VER(pvr) != PVR_970GX) return -ENODEV; return register_power_pmu(&ppc970_pmu); -- cgit From 62ccae78820b25a0ac64bb0f648388ec834fcb3c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 7 Jul 2022 16:37:18 +0200 Subject: powerpc: Remove remaining parts of oprofile Commit 9850b6c69356 ("arch: powerpc: Remove oprofile") removed oprofile. Remove all remaining parts of it. Signed-off-by: Christophe Leroy Acked-by: Viresh Kumar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/298432fe1a14c0a415760011d72c3f0999efd5e2.1657204631.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 3 -- arch/powerpc/kernel/cputable.c | 67 +------------------------------ arch/powerpc/kernel/dt_cpu_ftrs.c | 4 -- arch/powerpc/platforms/cell/spufs/spufs.h | 2 +- arch/powerpc/platforms/ps3/Kconfig | 2 +- 5 files changed, 4 insertions(+), 74 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 549eb6dd146f..ae8c3e13cfce 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -70,9 +70,6 @@ struct cpu_spec { /* Used to restore cpu setup on secondary processors and at resume */ cpu_restore_t cpu_restore; - /* Used by oprofile userspace to select the right counters */ - char *oprofile_cpu_type; - /* Name of processor class, for the ELF AT_PLATFORM entry */ char *platform; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index a5dbfccd2047..d8e42ef750f1 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -149,7 +149,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970", .platform = "ppc970", }, { /* PPC970FX */ @@ -166,7 +165,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970", .platform = "ppc970", }, { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */ @@ -183,7 +181,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970MP", .platform = "ppc970", }, { /* PPC970MP */ @@ -200,7 +197,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970MP, .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970MP", .platform = "ppc970", }, { /* PPC970GX */ @@ -216,7 +212,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 8, .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970", .platform = "ppc970", }, { /* Power5 GR */ @@ -230,7 +225,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power5", .platform = "power5", }, { /* Power5++ */ @@ -243,7 +237,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, - .oprofile_cpu_type = "ppc64/power5++", .platform = "power5+", }, { /* Power5 GS */ @@ -257,7 +250,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power5+", .platform = "power5+", }, { /* POWER6 in P5+ mode; 2.04-compliant processor */ @@ -269,7 +261,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .platform = "power5+", }, { /* Power6 */ @@ -284,7 +275,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power6", .platform = "power6x", }, { /* 2.05-compliant processor, i.e. Power6 "architected" mode */ @@ -296,7 +286,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER6, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .platform = "power6", }, { /* 2.06-compliant processor, i.e. Power7 "architected" mode */ @@ -309,7 +298,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .machine_check_early = __machine_check_early_realmode_p7, @@ -325,7 +313,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER8, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -341,7 +328,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER9, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .platform = "power9", @@ -356,7 +342,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER10, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power10, .cpu_restore = __restore_cpu_power10, .platform = "power10", @@ -373,7 +358,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power7", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .machine_check_early = __machine_check_early_realmode_p7, @@ -391,7 +375,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power7", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .machine_check_early = __machine_check_early_realmode_p7, @@ -409,7 +392,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -427,7 +409,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -445,7 +426,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -463,7 +443,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -481,7 +460,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -499,7 +477,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -517,7 +494,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -535,7 +511,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power10", .cpu_setup = __setup_cpu_power10, .cpu_restore = __restore_cpu_power10, .machine_check_early = __machine_check_early_realmode_p10, @@ -554,7 +529,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 4, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/cell-be", .platform = "ppc-cell-be", }, { /* PA Semi PA6T */ @@ -570,7 +544,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_PA6T, .cpu_setup = __setup_cpu_pa6t, .cpu_restore = __restore_cpu_pa6t, - .oprofile_cpu_type = "ppc64/pa6t", .platform = "pa6t", }, { /* default match */ @@ -734,7 +707,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 745/755 */ .pvr_mask = 0xfffff000, @@ -765,7 +737,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 750FX rev 2.0 must disable HID0[DPM] */ .pvr_mask = 0xffffffff, @@ -781,7 +752,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 750FX (All revs except 2.0) */ .pvr_mask = 0xffff0000, @@ -797,7 +767,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750fx, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 750GX */ .pvr_mask = 0xffff0000, @@ -813,7 +782,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750fx, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 740/750 (L2CR bit need fixup for 740) */ .pvr_mask = 0xffff0000, @@ -891,7 +859,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -908,7 +875,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -925,7 +891,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -942,7 +907,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -959,7 +923,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -976,7 +939,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -993,7 +955,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1010,7 +971,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1026,7 +986,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1043,7 +1002,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1060,7 +1018,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1172,7 +1129,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_603, .machine_check = machine_check_83xx, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e300", .platform = "ppc603", }, { /* e300c4 (e300c1, plus one IU) */ @@ -1188,7 +1144,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_603, .machine_check = machine_check_83xx, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e300", .platform = "ppc603", }, #endif @@ -1884,7 +1839,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", .cpu_setup = __setup_cpu_e500v1, .machine_check = machine_check_e500, .platform = "ppc8540", @@ -1903,7 +1857,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", .cpu_setup = __setup_cpu_e500v2, .machine_check = machine_check_e500, .platform = "ppc8548", @@ -1922,7 +1875,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500mc", .cpu_setup = __setup_cpu_e500mc, .machine_check = machine_check_e500mc, .platform = "ppce500mc", @@ -1943,7 +1895,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500mc", .cpu_setup = __setup_cpu_e5500, #ifndef CONFIG_PPC32 .cpu_restore = __restore_cpu_e5500, @@ -1965,7 +1916,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 6, - .oprofile_cpu_type = "ppc/e6500", .cpu_setup = __setup_cpu_e6500, #ifndef CONFIG_PPC32 .cpu_restore = __restore_cpu_e6500, @@ -2033,23 +1983,10 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, t->pmc_type = old.pmc_type; /* - * If we have passed through this logic once before and - * have pulled the default case because the real PVR was - * not found inside cpu_specs[], then we are possibly - * running in compatibility mode. In that case, let the - * oprofiler know which set of compatibility counters to - * pull from by making sure the oprofile_cpu_type string - * is set to that of compatibility mode. If the - * oprofile_cpu_type already has a value, then we are - * possibly overriding a real PVR with a logical one, - * and, in that case, keep the current value for - * oprofile_cpu_type. Furthermore, let's ensure that the + * Let's ensure that the * fix for the PMAO bug is enabled on compatibility mode. */ - if (old.oprofile_cpu_type != NULL) { - t->oprofile_cpu_type = old.oprofile_cpu_type; - t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; - } + t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 2ad365c21afa..fc800a9fb2c4 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -102,7 +102,6 @@ static struct cpu_spec __initdata base_cpu_spec = { .dcache_bsize = 32, /* cache info init. */ .num_pmcs = 0, .pmc_type = PPC_PMC_DEFAULT, - .oprofile_cpu_type = NULL, .cpu_setup = NULL, .cpu_restore = __restore_cpu_cpufeatures, .machine_check_early = NULL, @@ -387,7 +386,6 @@ static int __init feat_enable_pmu_power8(struct dt_cpu_feature *f) cur_cpu_spec->num_pmcs = 6; cur_cpu_spec->pmc_type = PPC_PMC_IBM; - cur_cpu_spec->oprofile_cpu_type = "ppc64/power8"; return 1; } @@ -423,7 +421,6 @@ static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f) cur_cpu_spec->num_pmcs = 6; cur_cpu_spec->pmc_type = PPC_PMC_IBM; - cur_cpu_spec->oprofile_cpu_type = "ppc64/power9"; return 1; } @@ -449,7 +446,6 @@ static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) cur_cpu_spec->num_pmcs = 6; cur_cpu_spec->pmc_type = PPC_PMC_IBM; - cur_cpu_spec->oprofile_cpu_type = "ppc64/power10"; return 1; } diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index afc1d6604d12..23c6799cfa5a 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -76,7 +76,7 @@ struct spu_context { struct address_space *mss; /* 'mss' area mappings. */ struct address_space *psmap; /* 'psmap' area mappings. */ struct mutex mapping_lock; - u64 object_id; /* user space pointer for oprofile */ + u64 object_id; /* user space pointer for GNU Debugger */ enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state; struct mutex state_mutex; diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig index 610682caabc4..a44869e5ea70 100644 --- a/arch/powerpc/platforms/ps3/Kconfig +++ b/arch/powerpc/platforms/ps3/Kconfig @@ -165,7 +165,7 @@ config PS3_LPM If you intend to use the advanced performance monitoring and profiling support of the Cell processor with programs like - oprofile and perfmon2, then say Y or M, otherwise say N. + perfmon2, then say Y or M, otherwise say N. config PS3GELIC_UDBG bool "PS3 udbg output via UDP broadcasts on Ethernet" -- cgit From 7b48377e1d9f68a1b58dfd22d40b083f38ce76a0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 7 Jul 2022 16:55:14 +0200 Subject: powerpc/probes: Remove ppc_opcode_t ppc_opcode_t is just an u32. There is no point in hiding u32 behind such a typedef. Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b2d762191b095530789ac8b71b167c6740bb6aed.1657205708.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/kprobes.h | 2 +- arch/powerpc/include/asm/probes.h | 1 - arch/powerpc/include/asm/uprobes.h | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index bab364152b29..c8e4b4fd4e33 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -29,7 +29,7 @@ struct pt_regs; struct kprobe; -typedef ppc_opcode_t kprobe_opcode_t; +typedef u32 kprobe_opcode_t; extern kprobe_opcode_t optinsn_slot; diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h index 6f66e358aa37..00634e3145e7 100644 --- a/arch/powerpc/include/asm/probes.h +++ b/arch/powerpc/include/asm/probes.h @@ -10,7 +10,6 @@ #include #include -typedef u32 ppc_opcode_t; #define BREAKPOINT_INSTRUCTION 0x7fe00008 /* trap */ /* Trap definitions per ISA */ diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index a7ae1860115a..4fea116d3d37 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -12,7 +12,7 @@ #include #include -typedef ppc_opcode_t uprobe_opcode_t; +typedef u32 uprobe_opcode_t; #define MAX_UINSN_BYTES 8 #define UPROBE_XOL_SLOT_BYTES (MAX_UINSN_BYTES) -- cgit From d00d762daf1278641eec92d74011a7e625e84a68 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 7 Jul 2022 16:55:15 +0200 Subject: powerpc/ppc-opcode: Define and use PPC_RAW_TRAP() and PPC_RAW_TW() Add and use PPC_RAW_TRAP() instead of opencoding. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/52c7e522e56a38e3ff0363906919445920005a8f.1657205708.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/include/asm/probes.h | 3 ++- arch/powerpc/xmon/xmon.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 89beabf5325c..5527a955fb4a 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -581,6 +581,8 @@ #define PPC_RAW_BRANCH(offset) (0x48000000 | PPC_LI(offset)) #define PPC_RAW_BL(offset) (0x48000001 | PPC_LI(offset)) +#define PPC_RAW_TW(t0, a, b) (0x7f000008 | ___PPC_RS(t0) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_TRAP() PPC_RAW_TW(31, 0, 0) /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h index 00634e3145e7..e77a2ed7d938 100644 --- a/arch/powerpc/include/asm/probes.h +++ b/arch/powerpc/include/asm/probes.h @@ -9,8 +9,9 @@ */ #include #include +#include -#define BREAKPOINT_INSTRUCTION 0x7fe00008 /* trap */ +#define BREAKPOINT_INSTRUCTION PPC_RAW_TRAP() /* trap */ /* Trap definitions per ISA */ #define IS_TW(instr) (((instr) & 0xfc0007fe) == 0x7c000008) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index f80c714f1d49..26ef3388c24c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -116,7 +116,7 @@ struct bpt { static struct bpt bpts[NBPTS]; static struct bpt dabr[HBP_NUM_MAX]; static struct bpt *iabr; -static unsigned bpinstr = 0x7fe00008; /* trap */ +static unsigned int bpinstr = PPC_RAW_TRAP(); #define BP_NUM(bp) ((bp) - bpts + 1) -- cgit From de40303b54bc458d7df0d4b4ee1d296df7fe98c7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 7 Jul 2022 16:55:16 +0200 Subject: powerpc/ppc-opcode: Define and use PPC_RAW_SETB() We have PPC_INST_SETB then build the 'setb' instruction in the user. Instead, define PPC_RAW_SETB() and use it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b08a4f26919a8f8cdcf7544ab552d9c1c63418b5.1657205708.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc-opcode.h | 2 +- arch/powerpc/lib/test_emulate_step.c | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 5527a955fb4a..7b81b37a191e 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -290,7 +290,6 @@ #define PPC_INST_STRING 0x7c00042a #define PPC_INST_STRING_MASK 0xfc0007fe #define PPC_INST_STRING_GEN_MASK 0xfc00067e -#define PPC_INST_SETB 0x7c000100 #define PPC_INST_STSWI 0x7c0005aa #define PPC_INST_STSWX 0x7c00052a #define PPC_INST_TRECHKPT 0x7c0007dd @@ -583,6 +582,7 @@ #define PPC_RAW_BL(offset) (0x48000001 | PPC_LI(offset)) #define PPC_RAW_TW(t0, a, b) (0x7f000008 | ___PPC_RS(t0) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_TRAP() PPC_RAW_TW(31, 0, 0) +#define PPC_RAW_SETB(t, bfa) (0x7c000100 | ___PPC_RT(t) | ___PPC_RA((bfa) << 2)) /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index f2e47be05e8c..23c7805fb7b3 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -53,9 +53,6 @@ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ PPC_RAW_ADDI(t, a, i)) -#define TEST_SETB(t, bfa) ppc_inst(PPC_INST_SETB | ___PPC_RT(t) | ___PPC_RA((bfa & 0x7) << 2)) - - static void __init init_pt_regs(struct pt_regs *regs) { static unsigned long msr; @@ -935,21 +932,21 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "BFA = 1, CR = GT", - .instr = TEST_SETB(20, 1), + .instr = ppc_inst(PPC_RAW_SETB(20, 1)), .regs = { .ccr = 0x4000000, } }, { .descr = "BFA = 4, CR = LT", - .instr = TEST_SETB(20, 4), + .instr = ppc_inst(PPC_RAW_SETB(20, 4)), .regs = { .ccr = 0x8000, } }, { .descr = "BFA = 5, CR = EQ", - .instr = TEST_SETB(20, 5), + .instr = ppc_inst(PPC_RAW_SETB(20, 5)), .regs = { .ccr = 0x200, } -- cgit From 9be013b2a9ecb29b5168e4b9db0e48ed53acf37c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Jul 2022 16:19:29 +0200 Subject: powerpc/32: Do not allow selection of e5500 or e6500 CPUs on PPC32 Commit 0e00a8c9fd92 ("powerpc: Allow CPU selection also on PPC32") enlarged the CPU selection logic to PPC32 by removing depend to PPC64, and failed to restrict that depend to E5500_CPU and E6500_CPU. Fortunately that got unnoticed because -mcpu=8540 will override the -mcpu=e500mc64 or -mpcu=e6500 as they are ealier, but that's fragile and may no be right in the future. Add back the depend PPC64 on E5500_CPU and E6500_CPU. Fixes: 0e00a8c9fd92 ("powerpc: Allow CPU selection also on PPC32") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8abab4888da69ff78b73a56f64d9678a7bf684e9.1657549153.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/Kconfig.cputype | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 383ed4fe6013..9805a2c717b9 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -173,11 +173,11 @@ config POWER9_CPU config E5500_CPU bool "Freescale e5500" - depends on E500 + depends on PPC64 && E500 config E6500_CPU bool "Freescale e6500" - depends on E500 + depends on PPC64 && E500 config 860_CPU bool "8xx family" -- cgit From 446cda1b21d9a6b3697fe399c6a3a00ff4a285f5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Jul 2022 16:19:30 +0200 Subject: powerpc/32: Don't always pass -mcpu=powerpc to the compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 4bf4f42a2feb ("powerpc/kbuild: Set default generic machine type for 32-bit compile"), when building a 32 bits kernel with a bi-arch version of GCC, or when building a book3s/32 kernel, the option -mcpu=powerpc is passed to GCC at all time, relying on it being eventually overriden by a subsequent -mcpu=xxxx. But when building the same kernel with a 32 bits only version of GCC, that is not done, relying on gcc being built with the expected default CPU. This logic has two problems. First, it is a bit fragile to rely on whether the GCC version is bi-arch or not, because today we can have bi-arch versions of GCC configured with a 32 bits default. Second, there are some versions of GCC which don't support -mcpu=powerpc, for instance for e500 SPE-only versions. So, stop relying on this approximative logic and allow the user to decide whether he/she wants to use the toolchain's default CPU or if he/she wants to set one, and allow only possible CPUs based on the selected target. Reported-by: Pali Rohár Signed-off-by: Christophe Leroy Tested-by: Pali Rohár Reviewed-by: Arnd Bergmann Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d4df724691351531bf46d685d654689e5dfa0d74.1657549153.git.christophe.leroy@csgroup.eu --- arch/powerpc/Makefile | 26 +------------------------- arch/powerpc/platforms/Kconfig.cputype | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a0cd70712061..d54e1fe03551 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -15,23 +15,6 @@ HAS_BIARCH := $(call cc-option-yn, -m32) # Set default 32 bits cross compilers for vdso and boot wrapper CROSS32_COMPILE ?= -ifeq ($(HAS_BIARCH),y) -ifeq ($(CROSS32_COMPILE),) -ifdef CONFIG_PPC32 -# These options will be overridden by any -mcpu option that the CPU -# or platform code sets later on the command line, but they are needed -# to set a sane 32-bit cpu target for the 64-bit cross compiler which -# may default to the wrong ISA. -KBUILD_CFLAGS += -mcpu=powerpc -KBUILD_AFLAGS += -mcpu=powerpc -endif -endif -endif - -ifdef CONFIG_PPC_BOOK3S_32 -KBUILD_CFLAGS += -mcpu=powerpc -endif - # If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use # ppc64_defconfig because we have nothing better to go on. uname := $(shell uname -m) @@ -183,6 +166,7 @@ endif endif CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) +AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) # Altivec option not allowed with e500mc64 in GCC. ifdef CONFIG_ALTIVEC @@ -193,14 +177,6 @@ endif CFLAGS-$(CONFIG_E5500_CPU) += $(E5500_CPU) CFLAGS-$(CONFIG_E6500_CPU) += $(call cc-option,-mcpu=e6500,$(E5500_CPU)) -ifdef CONFIG_PPC32 -ifdef CONFIG_PPC_E500MC -CFLAGS-y += $(call cc-option,-mcpu=e500mc,-mcpu=powerpc) -else -CFLAGS-$(CONFIG_E500) += $(call cc-option,-mcpu=8540 -msoft-float,-mcpu=powerpc) -endif -endif - asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) KBUILD_CPPFLAGS += -I $(srctree)/arch/$(ARCH) $(asinstr) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9805a2c717b9..bfea5d8452f8 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -135,9 +135,9 @@ config GENERIC_CPU select ARCH_HAS_FAST_MULTIPLIER select PPC_64S_HASH_MMU -config GENERIC_CPU +config POWERPC_CPU bool "Generic 32 bits powerpc" - depends on PPC32 && !PPC_8xx + depends on PPC32 && !PPC_8xx && !PPC_85xx config CELL_CPU bool "Cell Broadband Engine" @@ -196,11 +196,23 @@ config G4_CPU depends on PPC_BOOK3S_32 select ALTIVEC +config E500_CPU + bool "e500 (8540)" + depends on PPC_85xx && !PPC_E500MC + +config E500MC_CPU + bool "e500mc" + depends on PPC_85xx && PPC_E500MC + +config TOOLCHAIN_DEFAULT_CPU + bool "Rely on the toolchain's implicit default CPU" + depends on PPC32 + endchoice config TARGET_CPU_BOOL bool - default !GENERIC_CPU + default !GENERIC_CPU && !TOOLCHAIN_DEFAULT_CPU config TARGET_CPU string @@ -215,6 +227,9 @@ config TARGET_CPU default "e300c2" if E300C2_CPU default "e300c3" if E300C3_CPU default "G4" if G4_CPU + default "8540" if E500_CPU + default "e500mc" if E500MC_CPU + default "powerpc" if POWERPC_CPU config PPC_BOOK3S def_bool y -- cgit From ff27d9200a98757efc7c2cdf198904fd79cf4ffd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Jul 2022 16:19:31 +0200 Subject: powerpc/405: Fix build failure with GCC 12 (unrecognized opcode: `wrteei') Building ppc40x_defconfig leads to following error CC arch/powerpc/kernel/process.o {standard input}: Assembler messages: {standard input}:626: Error: unrecognized opcode: `wrteei' Add -mcpu=405 by default. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d344a42c99061cfe10a28e00de4e31a1363f4251.1657549153.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/Kconfig.cputype | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index bfea5d8452f8..e14000557ebd 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -137,7 +137,7 @@ config GENERIC_CPU config POWERPC_CPU bool "Generic 32 bits powerpc" - depends on PPC32 && !PPC_8xx && !PPC_85xx + depends on PPC32 && !PPC_8xx && !PPC_85xx && !40x config CELL_CPU bool "Cell Broadband Engine" @@ -179,6 +179,10 @@ config E6500_CPU bool "Freescale e6500" depends on PPC64 && E500 +config 405_CPU + bool "40x family" + depends on 40x + config 860_CPU bool "8xx family" depends on PPC_8xx @@ -223,6 +227,7 @@ config TARGET_CPU default "power7" if POWER7_CPU default "power8" if POWER8_CPU default "power9" if POWER9_CPU + default "405" if 405_CPU default "860" if 860_CPU default "e300c2" if E300C2_CPU default "e300c3" if E300C3_CPU -- cgit From 2255411d1d0f0661d1e5acd5f6edf4e6652a345a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Jul 2022 16:19:32 +0200 Subject: powerpc/44x: Fix build failure with GCC 12 (unrecognized opcode: `wrteei') Building ppc40x_defconfig leads to following error CC arch/powerpc/kernel/idle.o {standard input}: Assembler messages: {standard input}:67: Error: unrecognized opcode: `wrteei' {standard input}:78: Error: unrecognized opcode: `wrteei' Add -mcpu=440 by default and alternatively 464 and 476. Once that's done, -mcpu=powerpc is only for book3s/32 now. But then comes CC arch/powerpc/kernel/io.o {standard input}: Assembler messages: {standard input}:198: Error: unrecognized opcode: `eieio' {standard input}:230: Error: unrecognized opcode: `eieio' {standard input}:245: Error: unrecognized opcode: `eieio' {standard input}:254: Error: unrecognized opcode: `eieio' {standard input}:273: Error: unrecognized opcode: `eieio' {standard input}:396: Error: unrecognized opcode: `eieio' {standard input}:404: Error: unrecognized opcode: `eieio' {standard input}:423: Error: unrecognized opcode: `eieio' {standard input}:512: Error: unrecognized opcode: `eieio' {standard input}:520: Error: unrecognized opcode: `eieio' {standard input}:539: Error: unrecognized opcode: `eieio' {standard input}:628: Error: unrecognized opcode: `eieio' {standard input}:636: Error: unrecognized opcode: `eieio' {standard input}:655: Error: unrecognized opcode: `eieio' Fix it by replacing eieio by mbar on booke. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b0d982e223314ed82ab959f5d4ad2c4c00bedb99.1657549153.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/barrier.h | 2 ++ arch/powerpc/include/asm/nohash/pgtable.h | 2 +- arch/powerpc/include/asm/synch.h | 5 ++++- arch/powerpc/mm/nohash/tlb_low.S | 4 ++-- arch/powerpc/platforms/Kconfig.cputype | 17 ++++++++++++++++- arch/powerpc/sysdev/fsl_rio.c | 12 ++++++++++-- 6 files changed, 35 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index f0e687236484..ef2d8b15eaab 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -42,6 +42,8 @@ /* The sub-arch has lwsync */ #if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC) # define SMPWMB LWSYNC +#elif defined(CONFIG_BOOKE) +# define SMPWMB mbar #else # define SMPWMB eieio #endif diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index ac75f4ab0dba..b499da6c1a99 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -193,7 +193,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, if (IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_PTE_64BIT) && !percpu) { __asm__ __volatile__("\ stw%X0 %2,%0\n\ - eieio\n\ + mbar\n\ stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index 1d67bc8d7bc6..7130176d8cb8 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -14,7 +14,10 @@ extern void do_lwsync_fixups(unsigned long value, void *fixup_start, static inline void eieio(void) { - __asm__ __volatile__ ("eieio" : : : "memory"); + if (IS_ENABLED(CONFIG_BOOKE)) + __asm__ __volatile__ ("mbar" : : : "memory"); + else + __asm__ __volatile__ ("eieio" : : : "memory"); } static inline void isync(void) diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index dd39074de9af..d62b613a0d5d 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -186,7 +186,7 @@ _GLOBAL(_tlbivax_bcast) isync PPC_TLBIVAX(0, R3) isync - eieio + mbar tlbsync BEGIN_FTR_SECTION b 1f @@ -355,7 +355,7 @@ _GLOBAL(_tlbivax_bcast) rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND 1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ PPC_TLBIVAX(0,R3) - eieio + mbar tlbsync sync wrtee r10 diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index e14000557ebd..3cc8452b8660 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -137,7 +137,7 @@ config GENERIC_CPU config POWERPC_CPU bool "Generic 32 bits powerpc" - depends on PPC32 && !PPC_8xx && !PPC_85xx && !40x + depends on PPC_BOOK3S_32 config CELL_CPU bool "Cell Broadband Engine" @@ -183,6 +183,18 @@ config 405_CPU bool "40x family" depends on 40x +config 440_CPU + bool "440 (44x family)" + depends on 44x + +config 464_CPU + bool "464 (44x family)" + depends on 44x + +config 476_CPU + bool "476 (47x family)" + depends on PPC_47x + config 860_CPU bool "8xx family" depends on PPC_8xx @@ -228,6 +240,9 @@ config TARGET_CPU default "power8" if POWER8_CPU default "power9" if POWER9_CPU default "405" if 405_CPU + default "440" if 440_CPU + default "464" if 464_CPU + default "476" if 476_CPU default "860" if 860_CPU default "e300c2" if E300C2_CPU default "e300c3" if E300C3_CPU diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 1bfc9afa8a1a..4647c6074f3b 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -69,10 +69,10 @@ static DEFINE_SPINLOCK(fsl_rio_config_lock); -#define __fsl_read_rio_config(x, addr, err, op) \ +#define ___fsl_read_rio_config(x, addr, err, op, barrier) \ __asm__ __volatile__( \ "1: "op" %1,0(%2)\n" \ - " eieio\n" \ + " "barrier"\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %1,-1\n" \ @@ -83,6 +83,14 @@ static DEFINE_SPINLOCK(fsl_rio_config_lock); : "=r" (err), "=r" (x) \ : "b" (addr), "i" (-EFAULT), "0" (err)) +#ifdef CONFIG_BOOKE +#define __fsl_read_rio_config(x, addr, err, op) \ + ___fsl_read_rio_config(x, addr, err, op, "mbar") +#else +#define __fsl_read_rio_config(x, addr, err, op) \ + ___fsl_read_rio_config(x, addr, err, op, "eieio") +#endif + void __iomem *rio_regs_win; void __iomem *rmu_regs_win; resource_size_t rio_law_start; -- cgit From d6b551b8f90cc92c7d3c09cf38c748efe305ecb4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Jul 2022 16:19:33 +0200 Subject: powerpc/64e: Fix build failure with GCC 12 (unrecognized opcode: `wrteei') With GCC 12, corenet64_smp_defconfig leads to the following build errors: CC arch/powerpc/kernel/irq.o {standard input}: Assembler messages: {standard input}:3616: Error: unrecognized opcode: `wrteei' {standard input}:5689: Error: unrecognized opcode: `wrteei' CC arch/powerpc/kernel/pmc.o {standard input}: Assembler messages: {standard input}:42: Error: unrecognized opcode: `mfpmr' {standard input}:53: Error: unrecognized opcode: `mtpmr' CC arch/powerpc/kernel/io.o {standard input}: Assembler messages: {standard input}:376: Error: unrecognized opcode: `mbar' ... CC arch/powerpc/mm/nohash/book3e_hugetlbpage.o {standard input}: Assembler messages: {standard input}:291: Error: unrecognized opcode: `tlbsx' {standard input}:482: Error: unrecognized opcode: `tlbwe' {standard input}:608: Error: unrecognized opcode: `lbarx' {standard input}:608: Error: unrecognized opcode: `stbcx.' -mpcu=powerpc64 cannot be used anymore for book3e, it must be a booke CPU. But then we get: CC arch/powerpc/lib/xor_vmx.o cc1: error: AltiVec not supported in this target Altivec is not supported with -mcpu=e5500 so don't allow selection of altivec when e5500 is selected. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/77255a5a957967723b84d0356d9e5fb21569f4e8.1657549153.git.christophe.leroy@csgroup.eu --- arch/powerpc/Makefile | 8 +------- arch/powerpc/platforms/Kconfig.cputype | 8 ++++---- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index d54e1fe03551..02742facf895 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -168,13 +168,7 @@ endif CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) -# Altivec option not allowed with e500mc64 in GCC. -ifdef CONFIG_ALTIVEC -E5500_CPU := -mcpu=powerpc64 -else -E5500_CPU := $(call cc-option,-mcpu=e500mc64,-mcpu=powerpc64) -endif -CFLAGS-$(CONFIG_E5500_CPU) += $(E5500_CPU) +CFLAGS-$(CONFIG_E5500_CPU) += $(call cc-option,-mcpu=e500mc64,-mcpu=powerpc64) CFLAGS-$(CONFIG_E6500_CPU) += $(call cc-option,-mcpu=e6500,$(E5500_CPU)) asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 3cc8452b8660..5185d942b455 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -126,12 +126,12 @@ choice config GENERIC_CPU bool "Generic (POWER4 and above)" - depends on PPC64 && !CPU_LITTLE_ENDIAN - select PPC_64S_HASH_MMU if PPC_BOOK3S_64 + depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + select PPC_64S_HASH_MMU config GENERIC_CPU bool "Generic (POWER8 and above)" - depends on PPC64 && CPU_LITTLE_ENDIAN + depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN select ARCH_HAS_FAST_MULTIPLIER select PPC_64S_HASH_MMU @@ -358,7 +358,7 @@ config PHYS_64BIT config ALTIVEC bool "AltiVec Support" - depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 || (PPC_E500MC && PPC64) + depends on PPC_BOOK3S || (PPC_E500MC && PPC64 && !E5500_CPU) select PPC_FPU help This option enables kernel support for the Altivec extensions to the -- cgit From 4515862b66d3bdaf681cade1c72f047c93d94d01 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Thu, 28 Jul 2022 10:33:31 +0700 Subject: docs: powerpc: fix indentation warnings Sphinx reported unexpected indentation warnings: Documentation/powerpc/elf_hwcaps.rst:82: WARNING: Unexpected indentation. Documentation/powerpc/elf_hwcaps.rst:100: WARNING: Unexpected indentation. Documentation/powerpc/elf_hwcaps.rst:117: WARNING: Unexpected indentation. Documentation/powerpc/elf_hwcaps.rst:122: WARNING: Unexpected indentation. Documentation/powerpc/elf_hwcaps.rst:144: WARNING: Unexpected indentation. Fix these warnings by unindenting commit references and using literal code block for instructions list for PPC_FEATURE_ICACHE_SNOOP. Fixes: 3df1ff42e69e91 ("powerpc: add documentation for HWCAPs") Reported-by: Stephen Rothwell Signed-off-by: Bagas Sanjaya Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/linuxppc-dev/20220727220050.549db613@canb.auug.org.au/ Link: https://lore.kernel.org/r/20220728033332.27836-2-bagasdotme@gmail.com --- Documentation/powerpc/elf_hwcaps.rst | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/Documentation/powerpc/elf_hwcaps.rst b/Documentation/powerpc/elf_hwcaps.rst index 96d4fc4a3f91..bb7438cebf36 100644 --- a/Documentation/powerpc/elf_hwcaps.rst +++ b/Documentation/powerpc/elf_hwcaps.rst @@ -78,8 +78,7 @@ PPC_FEATURE_64 PPC_FEATURE_601_INSTR The processor is PowerPC 601. - Unused in the kernel since: - f0ed73f3fa2c ("powerpc: Remove PowerPC 601") + Unused in the kernel since f0ed73f3fa2c ("powerpc: Remove PowerPC 601") PPC_FEATURE_HAS_ALTIVEC Vector (aka Altivec, VMX) facility is available. @@ -96,8 +95,7 @@ PPC_FEATURE_HAS_4xxMAC PPC_FEATURE_UNIFIED_CACHE The processor has a unified L1 cache for instructions and data, as found in NXP e200. - Unused in the kernel since: - 39c8bf2b3cc1 ("powerpc: Retire e200 core (mpc555x processor)") + Unused in the kernel since 39c8bf2b3cc1 ("powerpc: Retire e200 core (mpc555x processor)") PPC_FEATURE_HAS_SPE Signal Processing Engine facility is available. @@ -113,13 +111,11 @@ PPC_FEATURE_NO_TB This is a 601 specific HWCAP, so if it is known that the processor running is not a 601, via other HWCAPs or other means, it is not required to test this bit before using the timebase. - Unused in the kernel since: - f0ed73f3fa2c ("powerpc: Remove PowerPC 601") + Unused in the kernel since f0ed73f3fa2c ("powerpc: Remove PowerPC 601") PPC_FEATURE_POWER4 The processor is POWER4 or PPC970/FX/MP. - POWER4 support dropped from the kernel since: - 471d7ff8b51b ("powerpc/64s: Remove POWER4 support") + POWER4 support dropped from the kernel since 471d7ff8b51b ("powerpc/64s: Remove POWER4 support") PPC_FEATURE_POWER5 The processor is POWER5. @@ -140,7 +136,8 @@ PPC_FEATURE_ICACHE_SNOOP The processor icache is coherent with the dcache, and instruction storage can be made consistent with data storage for the purpose of executing instructions with the sequence (as described in, e.g., POWER9 Processor - User's Manual, 4.6.2.2 Instruction Cache Block Invalidate (icbi)): + User's Manual, 4.6.2.2 Instruction Cache Block Invalidate (icbi)):: + sync icbi (to any address) isync -- cgit From 0595a216920cb035030c73cec3ab9fe413ef1d77 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Thu, 28 Jul 2022 10:33:32 +0700 Subject: docs: powerpc: use different label name for elf_hwcaps.rst Sphinx reported duplicate label warning: WARNING: duplicate label elf_hwcaps_index, other instance in Documentation/arm64/elf_hwcaps.rst The warning is caused by elf_hwcaps_index label name is already used for arm64 documentation, whileas powerpc use the same name. Disambiguate the label name for powerpc. Fixes: 3df1ff42e69e91 ("powerpc: add documentation for HWCAPs") Reported-by: Stephen Rothwell Signed-off-by: Bagas Sanjaya Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/linuxppc-dev/20220727220050.549db613@canb.auug.org.au/ Link: https://lore.kernel.org/r/20220728033332.27836-3-bagasdotme@gmail.com --- Documentation/powerpc/elf_hwcaps.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/powerpc/elf_hwcaps.rst b/Documentation/powerpc/elf_hwcaps.rst index bb7438cebf36..3366e5b18e67 100644 --- a/Documentation/powerpc/elf_hwcaps.rst +++ b/Documentation/powerpc/elf_hwcaps.rst @@ -1,4 +1,4 @@ -.. _elf_hwcaps_index: +.. _elf_hwcaps_powerpc: ================== POWERPC ELF HWCAPs -- cgit From a05aae92f84ba6d2705f6dac206ef5dcf097ea96 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Thu, 28 Jul 2022 10:33:33 +0700 Subject: docs: powerpc: add elf_hwcaps to table of contents elf_hwcaps documentation is missing from table of contents at index.rst, hence triggers Sphinx warning: Documentation/powerpc/elf_hwcaps.rst: WARNING: document isn't included in any toctree Add the documentation to the index to fix the warning. Fixes: 3df1ff42e69e91 ("powerpc: add documentation for HWCAPs") Reported-by: Stephen Rothwell Signed-off-by: Bagas Sanjaya Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/linuxppc-dev/20220727220050.549db613@canb.auug.org.au/ Link: https://lore.kernel.org/r/20220728033332.27836-4-bagasdotme@gmail.com --- Documentation/powerpc/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst index 0f7d3c495693..85e80e30160b 100644 --- a/Documentation/powerpc/index.rst +++ b/Documentation/powerpc/index.rst @@ -17,6 +17,7 @@ powerpc dawr-power9 dscr eeh-pci-error-recovery + elf_hwcaps elfnote firmware-assisted-dump hvcs -- cgit From ebef8abc963b9e537c0a0d619dd8faf1b8f2b183 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 8 Jul 2022 09:11:05 +0200 Subject: video: fbdev: offb: Include missing linux/platform_device.h A lot of drivers were getting platform and of headers indirectly via headers like asm/pci.h or asm/prom.h Most of them were fixed during 5.19 cycle but a newissue was introduced by commit 52b1b46c39ae ("of: Create platform devices for OF framebuffers") Include missing platform_device.h to allow cleaning asm/pci.h Fixes: 52b1b46c39ae ("of: Create platform devices for OF framebuffers") Signed-off-by: Christophe Leroy Acked-by: Helge Deller Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f75b383673663e27f6b57e50b4abfb9fe3780b00.1657264228.git.christophe.leroy@csgroup.eu --- drivers/video/fbdev/offb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c index b1acb1ebebe9..91001990e351 100644 --- a/drivers/video/fbdev/offb.c +++ b/drivers/video/fbdev/offb.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #ifdef CONFIG_PPC32 -- cgit From 61657dcd528b75cd196adaf56890124c13953c8d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 8 Jul 2022 09:11:06 +0200 Subject: scsi: cxlflash: Include missing linux/irqdomain.h powerpc's asm/prom.h brings some headers that it doesn't need itself. Once those headers are removed from asm/prom.h, the following errors occur: CC [M] drivers/scsi/cxlflash/ocxl_hw.o drivers/scsi/cxlflash/ocxl_hw.c: In function 'afu_map_irq': drivers/scsi/cxlflash/ocxl_hw.c:195:16: error: implicit declaration of function 'irq_create_mapping' [-Werror=implicit-function-declaration] 195 | virq = irq_create_mapping(NULL, irq->hwirq); | ^~~~~~~~~~~~~~~~~~ drivers/scsi/cxlflash/ocxl_hw.c:222:9: error: implicit declaration of function 'irq_dispose_mapping' [-Werror=implicit-function-declaration] 222 | irq_dispose_mapping(virq); | ^~~~~~~~~~~~~~~~~~~ drivers/scsi/cxlflash/ocxl_hw.c: In function 'afu_unmap_irq': drivers/scsi/cxlflash/ocxl_hw.c:264:13: error: implicit declaration of function 'irq_find_mapping'; did you mean 'is_cow_mapping'? [-Werror=implicit-function-declaration] 264 | if (irq_find_mapping(NULL, irq->hwirq)) { | ^~~~~~~~~~~~~~~~ | is_cow_mapping cc1: some warnings being treated as errors Fix it by including linux/irqdomain.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c6c0cc5e9179a642370a61439f95158271a78c03.1657264228.git.christophe.leroy@csgroup.eu --- drivers/scsi/cxlflash/ocxl_hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c index 244fc27215dc..631eda2d467e 100644 --- a/drivers/scsi/cxlflash/ocxl_hw.c +++ b/drivers/scsi/cxlflash/ocxl_hw.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include -- cgit From 4177ab2283dcd98735572ebda56b9d479dc1d7f2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 27 Jul 2022 15:03:52 +1000 Subject: EDAC/mpc85xx: Include required of headers directly A subsequent commit to cleanup powerpc's asm/prom.h leads to build errors in mpc85xx_edac.c due to missing headers. Include all required headers directly to avoid the build failure. Reported-by: kernel test robot Signed-off-by: Michael Ellerman --- drivers/edac/mpc85xx_edac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 5bf92298554d..e50d7928bf8f 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -24,6 +24,8 @@ #include #include +#include +#include #include "edac_module.h" #include "mpc85xx_edac.h" #include "fsl_ddr_edac.h" -- cgit From 4d5c5bad51935482437528f7fa4dffdcb3330d8b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 8 Jul 2022 09:11:07 +0200 Subject: powerpc: Remove asm/prom.h from asm/mpc52xx.h and asm/pci.h asm/pci.h and asm/mpc52xx.h don't need asm/prom.h Declare struct device_node locally to avoid including of.h Signed-off-by: Christophe Leroy [mpe: Add missing include of prom.h to of_rtc.c] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cf5243343e2364c2b40f22ee5ad9a6e2453d1121.1657264228.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mpc52xx.h | 3 ++- arch/powerpc/include/asm/pci.h | 1 - arch/powerpc/kernel/prom.c | 1 + arch/powerpc/sysdev/of_rtc.c | 2 ++ 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/mpc52xx.h b/arch/powerpc/include/asm/mpc52xx.h index ddd80aae1e32..5ea16a71c2f0 100644 --- a/arch/powerpc/include/asm/mpc52xx.h +++ b/arch/powerpc/include/asm/mpc52xx.h @@ -15,7 +15,6 @@ #ifndef __ASSEMBLY__ #include -#include #include #endif /* __ASSEMBLY__ */ @@ -268,6 +267,8 @@ struct mpc52xx_intr { #ifndef __ASSEMBLY__ +struct device_node; + /* mpc52xx_common.c */ extern void mpc5200_setup_xlb_arbiter(void); extern void mpc52xx_declare_of_platform_devices(void); diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 915d6ee4b40a..0f182074cdb7 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -14,7 +14,6 @@ #include #include -#include #include /* Return values for pci_controller_ops.probe_mode function */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 1066b072db35..0a68c99da573 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -54,6 +54,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/sysdev/of_rtc.c b/arch/powerpc/sysdev/of_rtc.c index 1f408d34a6a7..420f949b7485 100644 --- a/arch/powerpc/sysdev/of_rtc.c +++ b/arch/powerpc/sysdev/of_rtc.c @@ -11,6 +11,8 @@ #include #include +#include + static __initdata struct { const char *compatible; char *plat_name; -- cgit From 36afe68714d45edf34430d28e3dc787425ad8b22 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 8 Jul 2022 09:11:08 +0200 Subject: powerpc: Finally remove unnecessary headers from asm/prom.h Remove all headers included from asm/prom.h which are not used by asm/prom.h itself. Declare struct device_node and struct property locally to avoid including of.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4be954abef978b34cff9193fc566ffefdd3517bb.1657264228.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/prom.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 6f109b5cb84e..2e82820fbd64 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -12,16 +12,10 @@ * Updates for PPC64 by Peter Bergner & David Engebretsen, IBM Corp. */ #include -#include -#include #include -/* These includes should be removed once implicit includes are cleaned up. */ -#include -#include -#include -#include -#include +struct device_node; +struct property; #define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ #define OF_DT_END_NODE 0x2 /* End node */ -- cgit From 51ac6d4ceaa4f2e878c1aa399135f2514a6acc24 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 29 Jun 2022 16:08:17 +1000 Subject: powerpc: Update reviewers Christophe and Nick have been active in recent years on the mailing list and making contributions, add them as reviewers. Paul and Ben are no longer actively reviewing powerpc patches, remove them from the reviewers, they're still on linuxppc-dev if needed. Acked-by: Benjamin Herrenschmidt Acked-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220629060817.2943966-1-mpe@ellerman.id.au --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1fc9ead83d2a..af4cfeec9d0f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11345,8 +11345,8 @@ F: drivers/macintosh/ LINUX FOR POWERPC (32-BIT AND 64-BIT) M: Michael Ellerman -R: Benjamin Herrenschmidt -R: Paul Mackerras +R: Nicholas Piggin +R: Christophe Leroy L: linuxppc-dev@lists.ozlabs.org S: Supported W: https://github.com/linuxppc/wiki/wiki -- cgit From c7255058b5430b5c42932383bd8887d591e7973a Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Thu, 30 Jun 2022 12:19:42 +0530 Subject: powerpc/crash: save cpu register data in crash_smp_send_stop() During kdump, two set of NMI IPIs are sent to secondary CPUs, if 'crash_kexec_post_notifiers' option is set. The first set of NMI IPIs to stop the CPUs and the other set to collect register data. Instead, capture register data for secondary CPUs while stopping them itself. Also, fallback to smp_send_stop() in case the function gets called without kdump configured. Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220630064942.192283-1-hbathini@linux.ibm.com --- arch/powerpc/include/asm/kexec.h | 1 + arch/powerpc/kernel/smp.c | 29 +++++---------- arch/powerpc/kexec/crash.c | 77 ++++++++++++++++++++++++---------------- 3 files changed, 57 insertions(+), 50 deletions(-) diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 2aefe14e1442..cce69101205e 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -83,6 +83,7 @@ extern void default_machine_crash_shutdown(struct pt_regs *regs); extern int crash_shutdown_register(crash_shutdown_t handler); extern int crash_shutdown_unregister(crash_shutdown_t handler); +extern void crash_kexec_prepare(void); extern void crash_kexec_secondary(struct pt_regs *regs); int __init overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index bcefab484ea6..6b850c157a62 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,6 @@ #endif #include #include -#include #include #include #include @@ -619,20 +619,6 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif -#ifdef CONFIG_NMI_IPI -static void crash_stop_this_cpu(struct pt_regs *regs) -#else -static void crash_stop_this_cpu(void *dummy) -#endif -{ - /* - * Just busy wait here and avoid marking CPU as offline to ensure - * register data is captured appropriately. - */ - while (1) - cpu_relax(); -} - void crash_smp_send_stop(void) { static bool stopped = false; @@ -651,11 +637,14 @@ void crash_smp_send_stop(void) stopped = true; -#ifdef CONFIG_NMI_IPI - smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); -#else - smp_call_function(crash_stop_this_cpu, NULL, 0); -#endif /* CONFIG_NMI_IPI */ +#ifdef CONFIG_KEXEC_CORE + if (kexec_crash_image) { + crash_kexec_prepare(); + return; + } +#endif + + smp_send_stop(); } #ifdef CONFIG_NMI_IPI diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index 80f54723cf6d..252724ed666a 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -40,6 +40,14 @@ #define REAL_MODE_TIMEOUT 10000 static int time_to_dump; + +/* + * In case of system reset, secondary CPUs enter crash_kexec_secondary with out + * having to send an IPI explicitly. So, indicate if the crash is via + * system reset to avoid sending another IPI. + */ +static int is_via_system_reset; + /* * crash_wake_offline should be set to 1 by platforms that intend to wake * up offline cpus prior to jumping to a kdump kernel. Currently powernv @@ -101,7 +109,7 @@ void crash_ipi_callback(struct pt_regs *regs) /* NOTREACHED */ } -static void crash_kexec_prepare_cpus(int cpu) +static void crash_kexec_prepare_cpus(void) { unsigned int msecs; volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ @@ -113,7 +121,15 @@ static void crash_kexec_prepare_cpus(int cpu) if (crash_wake_offline) ncpus = num_present_cpus() - 1; - crash_send_ipi(crash_ipi_callback); + /* + * If we came in via system reset, secondaries enter via crash_kexec_secondary(). + * So, wait a while for the secondary CPUs to enter for that case. + * Else, send IPI to all other CPUs. + */ + if (is_via_system_reset) + mdelay(PRIMARY_TIMEOUT); + else + crash_send_ipi(crash_ipi_callback); smp_wmb(); again: @@ -202,7 +218,7 @@ void crash_kexec_secondary(struct pt_regs *regs) #else /* ! CONFIG_SMP */ -static void crash_kexec_prepare_cpus(int cpu) +static void crash_kexec_prepare_cpus(void) { /* * move the secondaries to us so that we can copy @@ -248,6 +264,32 @@ noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu) static inline void crash_kexec_wait_realmode(int cpu) {} #endif /* CONFIG_SMP && CONFIG_PPC64 */ +void crash_kexec_prepare(void) +{ + /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ + printk_deferred_enter(); + + /* + * This function is only called after the system + * has panicked or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means stopping other cpus in + * an SMP system. + * The kernel is broken so disable interrupts. + */ + hard_irq_disable(); + + /* + * Make a note of crashing cpu. Will be used in machine_kexec + * such that another IPI will not be sent. + */ + crashing_cpu = smp_processor_id(); + + crash_kexec_prepare_cpus(); +} + /* * Register a function to be called on shutdown. Only use this if you * can't reset your device in the second kernel. @@ -311,35 +353,10 @@ void default_machine_crash_shutdown(struct pt_regs *regs) unsigned int i; int (*old_handler)(struct pt_regs *regs); - /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ - printk_deferred_enter(); - - /* - * This function is only called after the system - * has panicked or is otherwise in a critical state. - * The minimum amount of code to allow a kexec'd kernel - * to run successfully needs to happen here. - * - * In practice this means stopping other cpus in - * an SMP system. - * The kernel is broken so disable interrupts. - */ - hard_irq_disable(); - - /* - * Make a note of crashing cpu. Will be used in machine_kexec - * such that another IPI will not be sent. - */ - crashing_cpu = smp_processor_id(); - - /* - * If we came in via system reset, wait a while for the secondary - * CPUs to enter. - */ if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) - mdelay(PRIMARY_TIMEOUT); + is_via_system_reset = 1; - crash_kexec_prepare_cpus(crashing_cpu); + crash_smp_send_stop(); crash_save_cpu(regs, crashing_cpu); -- cgit From b1fc44eaa9ba31e28c4125d6b9205a3582b47b5d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 29 Jun 2022 16:06:14 +1000 Subject: pseries/iommu/ddw: Fix kdump to work in absence of ibm,dma-window The pseries platform uses 32bit default DMA window (always 4K pages) and optional 64bit DMA window available via DDW ("Dynamic DMA Windows"), 64K or 2M pages. For ages the default one was not removed and a huge window was created in addition. Things changed with SRIOV-enabled PowerVM which creates a default-and-bigger DMA window in 64bit space (still using 4K pages) for IOV VFs so certain OSes do not need to use the DDW API in order to utilize all available TCE budget. Linux on the other hand removes the default window and creates a bigger one (with more TCEs or/and a bigger page size - 64K/2M) in a bid to map the entire RAM, and if the new window size is smaller than that - it still uses this new bigger window. The result is that the default window is removed but the "ibm,dma-window" property is not. When kdump is invoked, the existing code tries reusing the existing 64bit DMA window which location and parameters are stored in the device tree but this fails as the new property does not make it to the kdump device tree blob. So the code falls back to the default window which does not exist anymore although the device tree says that it does. The result of that is that PCI devices become unusable and cannot be used for kdumping. This preserves the DMA64 and DIRECT64 properties in the device tree blob for the crash kernel. Since the crash kernel setup is done after device drivers are loaded and probed, the proper DMA config is stored at least for boot time devices. Because DDW window is optional and the code configures the default window first, the existing code creates an IOMMU table descriptor for the non-existing default DMA window. It is harmless for kdump as it does not touch the actual window (only reads what is mapped and marks those IO pages as used) but it is bad for kexec which clears it thinking it is a smaller default window rather than a bigger DDW window. This removes the "ibm,dma-window" property from the device tree after a bigger window is created and the crash kernel setup picks it up. Fixes: 381ceda88c4c ("powerpc/pseries/iommu: Make use of DDW for indirect mapping") Signed-off-by: Alexey Kardashevskiy Acked-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220629060614.1680476-1-aik@ozlabs.ru --- arch/powerpc/kexec/file_load_64.c | 54 +++++++++++++++++++++ arch/powerpc/platforms/pseries/iommu.c | 89 ++++++++++++++++++---------------- 2 files changed, 102 insertions(+), 41 deletions(-) diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index b4981b651d9a..5d2c22aa34fb 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -1038,6 +1038,48 @@ out: return ret; } +static int copy_property(void *fdt, int node_offset, const struct device_node *dn, + const char *propname) +{ + const void *prop, *fdtprop; + int len = 0, fdtlen = 0, ret; + + prop = of_get_property(dn, propname, &len); + fdtprop = fdt_getprop(fdt, node_offset, propname, &fdtlen); + + if (fdtprop && !prop) + ret = fdt_delprop(fdt, node_offset, propname); + else if (prop) + ret = fdt_setprop(fdt, node_offset, propname, prop, len); + + return ret; +} + +static int update_pci_dma_nodes(void *fdt, const char *dmapropname) +{ + struct device_node *dn; + int pci_offset, root_offset, ret = 0; + + if (!firmware_has_feature(FW_FEATURE_LPAR)) + return 0; + + root_offset = fdt_path_offset(fdt, "/"); + for_each_node_with_property(dn, dmapropname) { + pci_offset = fdt_subnode_offset(fdt, root_offset, of_node_full_name(dn)); + if (pci_offset < 0) + continue; + + ret = copy_property(fdt, pci_offset, dn, "ibm,dma-window"); + if (ret < 0) + break; + ret = copy_property(fdt, pci_offset, dn, dmapropname); + if (ret < 0) + break; + } + + return ret; +} + /** * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel * being loaded. @@ -1099,6 +1141,18 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, if (ret < 0) goto out; +#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" +#define DMA64_PROPNAME "linux,dma64-ddr-window-info" + ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME); + if (ret < 0) + goto out; + + ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME); + if (ret < 0) + goto out; +#undef DMA64_PROPNAME +#undef DIRECT64_PROPNAME + /* Update memory reserve map */ ret = get_reserved_memory_ranges(&rmem); if (ret) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 2fb10ef6dee8..561adac69022 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -700,6 +700,33 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = { .get = tce_get_pSeriesLP }; +/* + * Find nearest ibm,dma-window (default DMA window) or direct DMA window or + * dynamic 64bit DMA window, walking up the device tree. + */ +static struct device_node *pci_dma_find(struct device_node *dn, + const __be32 **dma_window) +{ + const __be32 *dw = NULL; + + for ( ; dn && PCI_DN(dn); dn = dn->parent) { + dw = of_get_property(dn, "ibm,dma-window", NULL); + if (dw) { + if (dma_window) + *dma_window = dw; + return dn; + } + dw = of_get_property(dn, DIRECT64_PROPNAME, NULL); + if (dw) + return dn; + dw = of_get_property(dn, DMA64_PROPNAME, NULL); + if (dw) + return dn; + } + + return NULL; +} + static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) { struct iommu_table *tbl; @@ -712,20 +739,10 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", dn); - /* - * Find nearest ibm,dma-window (default DMA window), walking up the - * device tree - */ - for (pdn = dn; pdn != NULL; pdn = pdn->parent) { - dma_window = of_get_property(pdn, "ibm,dma-window", NULL); - if (dma_window != NULL) - break; - } + pdn = pci_dma_find(dn, &dma_window); - if (dma_window == NULL) { + if (dma_window == NULL) pr_debug(" no ibm,dma-window property !\n"); - return; - } ppci = PCI_DN(pdn); @@ -735,11 +752,13 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) if (!ppci->table_group) { ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); tbl = ppci->table_group->tables[0]; - iommu_table_setparms_lpar(ppci->phb, pdn, tbl, - ppci->table_group, dma_window); + if (dma_window) { + iommu_table_setparms_lpar(ppci->phb, pdn, tbl, + ppci->table_group, dma_window); - if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) - panic("Failed to initialize iommu table"); + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + } iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -1234,7 +1253,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) bool default_win_removed = false, direct_mapping = false; bool pmem_present; struct pci_dn *pci = PCI_DN(pdn); - struct iommu_table *tbl = pci->table_group->tables[0]; + struct property *default_win = NULL; dn = of_find_node_by_type(NULL, "ibm,pmemory"); pmem_present = dn != NULL; @@ -1291,11 +1310,10 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) * for extensions presence. */ if (query.windows_available == 0) { - struct property *default_win; int reset_win_ext; /* DDW + IOMMU on single window may fail if there is any allocation */ - if (iommu_table_in_use(tbl)) { + if (iommu_table_in_use(pci->table_group->tables[0])) { dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); goto out_failed; } @@ -1431,16 +1449,18 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) pci->table_group->tables[1] = newtbl; - /* Keep default DMA window struct if removed */ - if (default_win_removed) { - tbl->it_size = 0; - vfree(tbl->it_map); - tbl->it_map = NULL; - } - set_iommu_table_base(&dev->dev, newtbl); } + if (default_win_removed) { + iommu_tce_table_put(pci->table_group->tables[0]); + pci->table_group->tables[0] = NULL; + + /* default_win is valid here because default_win_removed == true */ + of_remove_property(pdn, default_win); + dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn); + } + spin_lock(&dma_win_list_lock); list_add(&window->list, &dma_win_list); spin_unlock(&dma_win_list_lock); @@ -1505,13 +1525,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); pr_debug(" node is %pOF\n", dn); - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; - pdn = pdn->parent) { - dma_window = of_get_property(pdn, "ibm,dma-window", NULL); - if (dma_window) - break; - } - + pdn = pci_dma_find(dn, &dma_window); if (!pdn || !PCI_DN(pdn)) { printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " "no DMA window found for pci dev=%s dn=%pOF\n", @@ -1542,7 +1556,6 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) { struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; - const __be32 *dma_window = NULL; /* only attempt to use a new window if 64-bit DMA is requested */ if (dma_mask < DMA_BIT_MASK(64)) @@ -1556,13 +1569,7 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) * search upwards in the tree until we either hit a dma-window * property, OR find a parent with a table already allocated. */ - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; - pdn = pdn->parent) { - dma_window = of_get_property(pdn, "ibm,dma-window", NULL); - if (dma_window) - break; - } - + pdn = pci_dma_find(dn, NULL); if (pdn && PCI_DN(pdn)) return enable_ddw(pdev, pdn); -- cgit From d73b46c3c1449bf27f793b9d9ee86ed70c7a7163 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 14 Jul 2022 18:08:00 +1000 Subject: powerpc/ioda/iommu/debugfs: Generate unique debugfs entries The iommu_table::it_index is a LIOBN which is not initialized on PowerNV as it is not used except IOMMU debugfs where it is used for a node name. This initializes it_index witn a unique number to avoid warnings and have a node for every iommu_table. This should not cause any behavioral change without CONFIG_IOMMU_DEBUGFS. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220714080800.3712998-1-aik@ozlabs.ru --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c8cf2728031a..9de9b2fb163d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1609,6 +1609,7 @@ found: tbl->it_ops = &pnv_ioda1_iommu_ops; pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift; pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; + tbl->it_index = (phb->hose->global_number << 16) | pe->pe_number; if (!iommu_init_table(tbl, phb->hose->node, 0, 0)) panic("Failed to initialize iommu table"); @@ -1779,6 +1780,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) res_end = min(window_size, SZ_4G) >> tbl->it_page_shift; } + tbl->it_index = (pe->phb->hose->global_number << 16) | pe->pe_number; if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end)) rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); else -- cgit From d80f6de9d601c30b53c17f00cb7cfe3169f2ddad Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 14 Jul 2022 18:11:19 +1000 Subject: powerpc/iommu: Fix iommu_table_in_use for a small default DMA window case The existing iommu_table_in_use() helper checks if the kernel is using any of TCEs. There are some reserved TCEs: 1) the very first one if DMA window starts from 0 to avoid having a zero but still valid DMA handle; 2) it_reserved_start..it_reserved_end to exclude MMIO32 window in case the default window spans across that - this is the default for the first DMA window on PowerNV. When 1) is the case and 2) is not the helper does not skip 1) and returns wrong status. This only seems occurring when passing through a PCI device to a nested guest (not something we support really well) so it has not been seen before. This fixes the bug by adding a special case for no MMIO32 reservation. Fixes: 3c33066a2190 ("powerpc/kernel/iommu: Add new iommu_table_in_use() helper") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220714081119.3714605-1-aik@ozlabs.ru --- arch/powerpc/kernel/iommu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 7e56ddb3e0b9..caebe1431596 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -775,6 +775,11 @@ bool iommu_table_in_use(struct iommu_table *tbl) /* ignore reserved bit0 */ if (tbl->it_offset == 0) start = 1; + + /* Simple case with no reserved MMIO32 region */ + if (!tbl->it_reserved_start && !tbl->it_reserved_end) + return find_next_bit(tbl->it_map, tbl->it_size, start) != tbl->it_size; + end = tbl->it_reserved_start - tbl->it_offset; if (find_next_bit(tbl->it_map, end, start) != end) return true; -- cgit From 2454a7af0f2a42918aa972147a0bec38e6656cd8 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sat, 23 Jul 2022 07:30:46 -0400 Subject: powerpc/pseries: define driver for Platform KeyStore PowerVM provides an isolated Platform Keystore(PKS) storage allocation for each LPAR with individually managed access controls to store sensitive information securely. It provides a new set of hypervisor calls for Linux kernel to access PKS storage. Define POWER LPAR Platform KeyStore(PLPKS) driver using H_CALL interface to access PKS storage. Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220723113048.521744-2-nayna@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 11 + arch/powerpc/platforms/pseries/Kconfig | 13 + arch/powerpc/platforms/pseries/Makefile | 1 + arch/powerpc/platforms/pseries/plpks.c | 460 ++++++++++++++++++++++++++++++++ arch/powerpc/platforms/pseries/plpks.h | 71 +++++ 5 files changed, 556 insertions(+) create mode 100644 arch/powerpc/platforms/pseries/plpks.c create mode 100644 arch/powerpc/platforms/pseries/plpks.h diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 4457abe31e6e..8abae463f6c1 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -79,6 +79,7 @@ #define H_NOT_ENOUGH_RESOURCES -44 #define H_R_STATE -45 #define H_RESCINDED -46 +#define H_P1 -54 #define H_P2 -55 #define H_P3 -56 #define H_P4 -57 @@ -98,6 +99,8 @@ #define H_OP_MODE -73 #define H_COP_HW -74 #define H_STATE -75 +#define H_IN_USE -77 +#define H_ABORTED -78 #define H_UNSUPPORTED_FLAG_START -256 #define H_UNSUPPORTED_FLAG_END -511 #define H_MULTI_THREADS_ACTIVE -9005 @@ -322,6 +325,14 @@ #define H_SCM_UNBIND_ALL 0x3FC #define H_SCM_HEALTH 0x400 #define H_SCM_PERFORMANCE_STATS 0x418 +#define H_PKS_GET_CONFIG 0x41C +#define H_PKS_SET_PASSWORD 0x420 +#define H_PKS_GEN_PASSWORD 0x424 +#define H_PKS_WRITE_OBJECT 0x42C +#define H_PKS_GEN_KEY 0x430 +#define H_PKS_READ_OBJECT 0x434 +#define H_PKS_REMOVE_OBJECT 0x438 +#define H_PKS_CONFIRM_OBJECT_FLUSHED 0x43C #define H_RPT_INVALIDATE 0x448 #define H_SCM_FLUSH 0x44C #define H_GET_ENERGY_SCALE_INFO 0x450 diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index f7fd91d153a4..c4a6d4083a7a 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -142,6 +142,19 @@ config IBMEBUS help Bus device driver for GX bus based adapters. +config PSERIES_PLPKS + depends on PPC_PSERIES + bool "Support for the Platform Key Storage" + help + PowerVM provides an isolated Platform Keystore(PKS) storage + allocation for each LPAR with individually managed access + controls to store sensitive information securely. It can be + used to store asymmetric public keys or secrets as required + by different usecases. Select this config to enable + operating system interface to hypervisor to access this space. + + If unsure, select N. + config PAPR_SCM depends on PPC_PSERIES && MEMORY_HOTPLUG && LIBNVDIMM tristate "Support for the PAPR Storage Class Memory interface" diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 7aaff5323544..14e143b946a3 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_PAPR_SCM) += papr_scm.o obj-$(CONFIG_PPC_SPLPAR) += vphn.o obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o +obj-$(CONFIG_PSERIES_PLPKS) += plpks.o obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PPC_VAS) += vas.o vas-sysfs.o diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c new file mode 100644 index 000000000000..52aaa2894606 --- /dev/null +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * POWER LPAR Platform KeyStore(PLPKS) + * Copyright (C) 2022 IBM Corporation + * Author: Nayna Jain + * + * Provides access to variables stored in Power LPAR Platform KeyStore(PLPKS). + */ + +#define pr_fmt(fmt) "plpks: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "plpks.h" + +#define PKS_FW_OWNER 0x1 +#define PKS_BOOTLOADER_OWNER 0x2 +#define PKS_OS_OWNER 0x3 + +#define LABEL_VERSION 0 +#define MAX_LABEL_ATTR_SIZE 16 +#define MAX_NAME_SIZE 239 +#define MAX_DATA_SIZE 4000 + +#define PKS_FLUSH_MAX_TIMEOUT 5000 //msec +#define PKS_FLUSH_SLEEP 10 //msec +#define PKS_FLUSH_SLEEP_RANGE 400 + +static u8 *ospassword; +static u16 ospasswordlength; + +// Retrieved with H_PKS_GET_CONFIG +static u16 maxpwsize; +static u16 maxobjsize; + +struct plpks_auth { + u8 version; + u8 consumer; + __be64 rsvd0; + __be32 rsvd1; + __be16 passwordlength; + u8 password[]; +} __packed __aligned(16); + +struct label_attr { + u8 prefix[8]; + u8 version; + u8 os; + u8 length; + u8 reserved[5]; +}; + +struct label { + struct label_attr attr; + u8 name[MAX_NAME_SIZE]; + size_t size; +}; + +static int pseries_status_to_err(int rc) +{ + int err; + + switch (rc) { + case H_SUCCESS: + err = 0; + break; + case H_FUNCTION: + err = -ENXIO; + break; + case H_P1: + case H_P2: + case H_P3: + case H_P4: + case H_P5: + case H_P6: + err = -EINVAL; + break; + case H_NOT_FOUND: + err = -ENOENT; + break; + case H_BUSY: + err = -EBUSY; + break; + case H_AUTHORITY: + err = -EPERM; + break; + case H_NO_MEM: + err = -ENOMEM; + break; + case H_RESOURCE: + err = -EEXIST; + break; + case H_TOO_BIG: + err = -EFBIG; + break; + case H_STATE: + err = -EIO; + break; + case H_R_STATE: + err = -EIO; + break; + case H_IN_USE: + err = -EEXIST; + break; + case H_ABORTED: + err = -EINTR; + break; + default: + err = -EINVAL; + } + + return err; +} + +static int plpks_gen_password(void) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + u8 *password, consumer = PKS_OS_OWNER; + int rc; + + password = kzalloc(maxpwsize, GFP_KERNEL); + if (!password) + return -ENOMEM; + + rc = plpar_hcall(H_PKS_GEN_PASSWORD, retbuf, consumer, 0, + virt_to_phys(password), maxpwsize); + + if (!rc) { + ospasswordlength = maxpwsize; + ospassword = kzalloc(maxpwsize, GFP_KERNEL); + if (!ospassword) { + kfree(password); + return -ENOMEM; + } + memcpy(ospassword, password, ospasswordlength); + } else { + if (rc == H_IN_USE) { + pr_warn("Password is already set for POWER LPAR Platform KeyStore\n"); + rc = 0; + } else { + goto out; + } + } +out: + kfree(password); + + return pseries_status_to_err(rc); +} + +static struct plpks_auth *construct_auth(u8 consumer) +{ + struct plpks_auth *auth; + + if (consumer > PKS_OS_OWNER) + return ERR_PTR(-EINVAL); + + auth = kmalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL); + if (!auth) + return ERR_PTR(-ENOMEM); + + auth->version = 1; + auth->consumer = consumer; + auth->rsvd0 = 0; + auth->rsvd1 = 0; + + if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) { + auth->passwordlength = 0; + return auth; + } + + memcpy(auth->password, ospassword, ospasswordlength); + + auth->passwordlength = cpu_to_be16(ospasswordlength); + + return auth; +} + +/** + * Label is combination of label attributes + name. + * Label attributes are used internally by kernel and not exposed to the user. + */ +static struct label *construct_label(char *component, u8 varos, u8 *name, + u16 namelen) +{ + struct label *label; + size_t slen; + + if (!name || namelen > MAX_NAME_SIZE) + return ERR_PTR(-EINVAL); + + slen = strlen(component); + if (component && slen > sizeof(label->attr.prefix)) + return ERR_PTR(-EINVAL); + + label = kzalloc(sizeof(*label), GFP_KERNEL); + if (!label) + return ERR_PTR(-ENOMEM); + + if (component) + memcpy(&label->attr.prefix, component, slen); + + label->attr.version = LABEL_VERSION; + label->attr.os = varos; + label->attr.length = MAX_LABEL_ATTR_SIZE; + memcpy(&label->name, name, namelen); + + label->size = sizeof(struct label_attr) + namelen; + + return label; +} + +static int _plpks_get_config(void) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct { + u8 version; + u8 flags; + __be32 rsvd0; + __be16 maxpwsize; + __be16 maxobjlabelsize; + __be16 maxobjsize; + __be32 totalsize; + __be32 usedspace; + __be32 supportedpolicies; + __be64 rsvd1; + } __packed config; + size_t size; + int rc; + + size = sizeof(config); + + rc = plpar_hcall(H_PKS_GET_CONFIG, retbuf, virt_to_phys(&config), size); + + if (rc != H_SUCCESS) + return pseries_status_to_err(rc); + + maxpwsize = be16_to_cpu(config.maxpwsize); + maxobjsize = be16_to_cpu(config.maxobjsize); + + return 0; +} + +static int plpks_confirm_object_flushed(struct label *label, + struct plpks_auth *auth) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + u64 timeout = 0; + u8 status; + int rc; + + do { + rc = plpar_hcall(H_PKS_CONFIRM_OBJECT_FLUSHED, retbuf, + virt_to_phys(auth), virt_to_phys(label), + label->size); + + status = retbuf[0]; + if (rc) { + if (rc == H_NOT_FOUND && status == 1) + rc = 0; + break; + } + + if (!rc && status == 1) + break; + + usleep_range(PKS_FLUSH_SLEEP, + PKS_FLUSH_SLEEP + PKS_FLUSH_SLEEP_RANGE); + timeout = timeout + PKS_FLUSH_SLEEP; + } while (timeout < PKS_FLUSH_MAX_TIMEOUT); + + rc = pseries_status_to_err(rc); + + return rc; +} + +int plpks_write_var(struct plpks_var var) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct plpks_auth *auth; + struct label *label; + int rc; + + if (!var.component || !var.data || var.datalen <= 0 || + var.namelen > MAX_NAME_SIZE || var.datalen > MAX_DATA_SIZE) + return -EINVAL; + + if (var.policy & SIGNEDUPDATE) + return -EINVAL; + + auth = construct_auth(PKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + label = construct_label(var.component, var.os, var.name, var.namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out; + } + + rc = plpar_hcall(H_PKS_WRITE_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(label), label->size, var.policy, + virt_to_phys(var.data), var.datalen); + + if (!rc) + rc = plpks_confirm_object_flushed(label, auth); + + if (rc) + pr_err("Failed to write variable %s for component %s with error %d\n", + var.name, var.component, rc); + + rc = pseries_status_to_err(rc); + kfree(label); +out: + kfree(auth); + + return rc; +} + +int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct plpks_auth *auth; + struct label *label; + int rc; + + if (!component || vname.namelen > MAX_NAME_SIZE) + return -EINVAL; + + auth = construct_auth(PKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + label = construct_label(component, varos, vname.name, vname.namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out; + } + + rc = plpar_hcall(H_PKS_REMOVE_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(label), label->size); + + if (!rc) + rc = plpks_confirm_object_flushed(label, auth); + + if (rc) + pr_err("Failed to remove variable %s for component %s with error %d\n", + vname.name, component, rc); + + rc = pseries_status_to_err(rc); + kfree(label); +out: + kfree(auth); + + return rc; +} + +static int plpks_read_var(u8 consumer, struct plpks_var *var) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct plpks_auth *auth; + struct label *label; + u8 *output; + int rc; + + if (var->namelen > MAX_NAME_SIZE) + return -EINVAL; + + auth = construct_auth(PKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + label = construct_label(var->component, var->os, var->name, + var->namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out_free_auth; + } + + output = kzalloc(maxobjsize, GFP_KERNEL); + if (!output) { + rc = -ENOMEM; + goto out_free_label; + } + + rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(label), label->size, virt_to_phys(output), + maxobjsize); + + if (rc != H_SUCCESS) { + pr_err("Failed to read variable %s for component %s with error %d\n", + var->name, var->component, rc); + rc = pseries_status_to_err(rc); + goto out_free_output; + } + + if (var->datalen == 0 || var->datalen > retbuf[0]) + var->datalen = retbuf[0]; + + var->data = kzalloc(var->datalen, GFP_KERNEL); + if (!var->data) { + rc = -ENOMEM; + goto out_free_output; + } + var->policy = retbuf[1]; + + memcpy(var->data, output, var->datalen); + rc = 0; + +out_free_output: + kfree(output); +out_free_label: + kfree(label); +out_free_auth: + kfree(auth); + + return rc; +} + +int plpks_read_os_var(struct plpks_var *var) +{ + return plpks_read_var(PKS_OS_OWNER, var); +} + +int plpks_read_fw_var(struct plpks_var *var) +{ + return plpks_read_var(PKS_FW_OWNER, var); +} + +int plpks_read_bootloader_var(struct plpks_var *var) +{ + return plpks_read_var(PKS_BOOTLOADER_OWNER, var); +} + +static __init int pseries_plpks_init(void) +{ + int rc; + + rc = _plpks_get_config(); + + if (rc) { + pr_err("POWER LPAR Platform KeyStore is not supported or enabled\n"); + return rc; + } + + rc = plpks_gen_password(); + if (rc) + pr_err("Failed setting POWER LPAR Platform KeyStore Password\n"); + else + pr_info("POWER LPAR Platform KeyStore initialized successfully\n"); + + return rc; +} +arch_initcall(pseries_plpks_init); diff --git a/arch/powerpc/platforms/pseries/plpks.h b/arch/powerpc/platforms/pseries/plpks.h new file mode 100644 index 000000000000..c6a291367bb1 --- /dev/null +++ b/arch/powerpc/platforms/pseries/plpks.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 IBM Corporation + * Author: Nayna Jain + * + * Platform keystore for pseries LPAR(PLPKS). + */ + +#ifndef _PSERIES_PLPKS_H +#define _PSERIES_PLPKS_H + +#include +#include + +#define OSSECBOOTAUDIT 0x40000000 +#define OSSECBOOTENFORCE 0x20000000 +#define WORLDREADABLE 0x08000000 +#define SIGNEDUPDATE 0x01000000 + +#define PLPKS_VAR_LINUX 0x01 +#define PLPKS_VAR_COMMON 0x04 + +struct plpks_var { + char *component; + u8 *name; + u8 *data; + u32 policy; + u16 namelen; + u16 datalen; + u8 os; +}; + +struct plpks_var_name { + u8 *name; + u16 namelen; +}; + +struct plpks_var_name_list { + u32 varcount; + struct plpks_var_name varlist[]; +}; + +/** + * Writes the specified var and its data to PKS. + * Any caller of PKS driver should present a valid component type for + * their variable. + */ +int plpks_write_var(struct plpks_var var); + +/** + * Removes the specified var and its data from PKS. + */ +int plpks_remove_var(char *component, u8 varos, + struct plpks_var_name vname); + +/** + * Returns the data for the specified os variable. + */ +int plpks_read_os_var(struct plpks_var *var); + +/** + * Returns the data for the specified firmware variable. + */ +int plpks_read_fw_var(struct plpks_var *var); + +/** + * Returns the data for the specified bootloader variable. + */ +int plpks_read_bootloader_var(struct plpks_var *var); + +#endif -- cgit From d20c96deb3e2c1cedc47d2be9fc110ffed81b1af Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Sat, 9 Jul 2022 14:43:05 +0200 Subject: powerpc/85xx: Fix description of MPC85xx and P1/P2 boards options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit More MPC85xx and P1/P2 boards options have incorrect description. Fix them to include list of all boards for which they enable/disable support. Signed-off-by: Pali Rohár Acked-by: Scott Wood Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220709124305.17559-1-pali@kernel.org --- arch/powerpc/platforms/85xx/Kconfig | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 2be17ffe8714..be16eba0f704 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -62,13 +62,13 @@ config MPC85xx_CDS This option enables support for the MPC85xx CDS board config MPC85xx_MDS - bool "Freescale MPC85xx MDS" + bool "Freescale MPC8568 MDS / MPC8569 MDS / P1021 MDS" select DEFAULT_UIMAGE select PHYLIB if NETDEVICES select HAVE_RAPIDIO select SWIOTLB help - This option enables support for the MPC85xx MDS board + This option enables support for the MPC8568 MDS, MPC8569 MDS and P1021 MDS boards config MPC8536_DS bool "Freescale MPC8536 DS" @@ -78,28 +78,30 @@ config MPC8536_DS This option enables support for the MPC8536 DS board config MPC85xx_DS - bool "Freescale MPC85xx DS" + bool "Freescale MPC8544 DS / MPC8572 DS / P2020 DS" select PPC_I8259 select DEFAULT_UIMAGE select FSL_ULI1575 if PCI select SWIOTLB help - This option enables support for the MPC85xx DS (MPC8544 DS) board + This option enables support for the MPC8544 DS, MPC8572 DS and P2020 DS boards config MPC85xx_RDB - bool "Freescale MPC85xx RDB" + bool "Freescale P102x MBG/UTM/RDB and P2020 RDB" select PPC_I8259 select DEFAULT_UIMAGE select FSL_ULI1575 if PCI select SWIOTLB help - This option enables support for the MPC85xx RDB (P2020 RDB) board + This option enables support for the P1020 MBG PC, P1020 UTM PC, + P1020 RDB PC, P1020 RDB PD, P1020 RDB, P1021 RDB PC, P1024 RDB, + P1025 RDB, P2020 RDB and P2020 RDB PC boards config P1010_RDB - bool "Freescale P1010RDB" + bool "Freescale P1010 RDB" select DEFAULT_UIMAGE help - This option enables support for the MPC85xx RDB (P1010 RDB) board + This option enables support for the P1010 RDB board P1010RDB contains P1010Si, which provides CPU performance up to 800 MHz and 1600 DMIPS, additional functionality and faster interfaces -- cgit From 0fe1e96fef0a5c53b4c0d1500d356f3906000f81 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:21:48 +0200 Subject: powerpc/pci: Prefer PCI domain assignment via DT 'linux,pci-domain' and alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Other Linux architectures use DT property 'linux,pci-domain' for specifying fixed PCI domain of PCI controller specified in Device-Tree. And lot of Freescale powerpc boards have defined numbered pci alias in Device-Tree for every PCIe controller which number specify preferred PCI domain. So prefer usage of DT property 'linux,pci-domain' (via function of_get_pci_domain_nr()) and DT pci alias (via function of_alias_get_id()) on powerpc architecture for assigning PCI domain to PCI controller. Fixes: 63a72284b159 ("powerpc/pci: Assign fixed PHB number based on device-tree properties") Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706102148.5060-2-pali@kernel.org --- arch/powerpc/kernel/pci-common.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index cbb912ee92fa..8ce36aba42da 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -75,16 +75,30 @@ void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops) static int get_phb_number(struct device_node *dn) { int ret, phb_id = -1; - u32 prop_32; u64 prop; /* * Try fixed PHB numbering first, by checking archs and reading - * the respective device-tree properties. Firstly, try powernv by - * reading "ibm,opal-phbid", only present in OPAL environment. + * the respective device-tree properties. Firstly, try reading + * standard "linux,pci-domain", then try reading "ibm,opal-phbid" + * (only present in powernv OPAL environment), then try device-tree + * alias and as the last try to use lower bits of "reg" property. */ - ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + ret = of_get_pci_domain_nr(dn); + if (ret >= 0) { + prop = ret; + ret = 0; + } + if (ret) + ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + if (ret) + ret = of_alias_get_id(dn, "pci"); + if (ret >= 0) { + prop = ret; + ret = 0; + } if (ret) { + u32 prop_32; ret = of_property_read_u32_index(dn, "reg", 1, &prop_32); prop = prop_32; } @@ -96,10 +110,7 @@ static int get_phb_number(struct device_node *dn) if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap)) return phb_id; - /* - * If not pseries nor powernv, or if fixed PHB numbering tried to add - * the same PHB number twice, then fallback to dynamic PHB numbering. - */ + /* If everything fails then fallback to dynamic PHB numbering. */ phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS); BUG_ON(phb_id >= MAX_PHBS); set_bit(phb_id, phb_bitmap); -- cgit From 1f00b5ab992c122c51bc37662b3b4df5963462f3 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 4 May 2022 20:08:22 +0200 Subject: powerpc/85xx: P2020: Add law_trgt_if property to PCIe DT nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DT law_trgt_if property defines Local Access Window Target Interface. Local Access Window Target Interface is used for identifying individual peripheral and mapping its memory to CPU. Interface id is defined by hardware itself. U-Boot uses law_trgt_if DT property in PCIe nodes for configuring memory mapping of individual PCIe controllers. Linux kernel fsl_pci.c driver currently does not touch Local Access Window and expects that U-Boot configures it properly. Add law_trgt_if property to PCIe DT nodes for P2020. This allows usage of kernel P2020 PCIe DT nodes in U-Boot. And therefore allows to share P2020 DTS files between Linux kernel and U-Boot. Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220504180822.29782-1-pali@kernel.org --- arch/powerpc/boot/dts/fsl/p2020si-post.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi index 7a590c92fe56..81b9ab2119be 100644 --- a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi @@ -48,6 +48,7 @@ bus-range = <0 255>; clock-frequency = <33333333>; interrupts = <26 2 0 0>; + law_trgt_if = <2>; pcie@0 { reg = <0 0 0 0 0>; @@ -76,6 +77,7 @@ bus-range = <0 255>; clock-frequency = <33333333>; interrupts = <25 2 0 0>; + law_trgt_if = <1>; pcie@0 { reg = <0 0 0 0 0>; @@ -105,6 +107,7 @@ bus-range = <0 255>; clock-frequency = <33333333>; interrupts = <24 2 0 0>; + law_trgt_if = <0>; pcie@0 { reg = <0 0 0 0 0>; -- cgit From 901a30cf5f765a26f1308701d9df9e7f3d0023a5 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 18 Jul 2022 15:55:53 +0800 Subject: powerpc/pseries/vas: Fix comment typo The double `the' in line 807 is duplicated, remove one. Signed-off-by: Jason Wang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220718075553.70897-1-wangborong@cdjrlc.com --- arch/powerpc/platforms/pseries/vas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 91e7eda0606c..7e6e6dd2e33e 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -804,7 +804,7 @@ int vas_reconfig_capabilties(u8 type, int new_nr_creds) * The total number of available credits may be decreased or * increased with DLPAR operation. Means some windows have to be * closed / reopened. Hold the vas_pseries_mutex so that the - * the user space can not open new windows. + * user space can not open new windows. */ if (old_nr_creds < new_nr_creds) { /* -- cgit From 738f9dca0df3bb630e6f06a19573ab4e31bd443a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 15 Jul 2022 11:52:50 +0800 Subject: powerpc/sysdev: Fix comment typo The double `is' is duplicated in line 110, remove one. Signed-off-by: Jason Wang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220715035250.5978-1-wangborong@cdjrlc.com --- arch/powerpc/sysdev/cpm2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c index 3f130312b6e9..915f4d3991c3 100644 --- a/arch/powerpc/sysdev/cpm2.c +++ b/arch/powerpc/sysdev/cpm2.c @@ -107,7 +107,7 @@ EXPORT_SYMBOL(cpm_command); * memory mapped space. * The baud rate clock is the system clock divided by something. * It was set up long ago during the initial boot phase and is - * is given to us. + * given to us. * Baud rate clocks are zero-based in the driver code (as that maps * to port numbers). Documentation uses 1-based numbering. */ -- cgit From fde345e4d39a4f16697a8060564fff1dbac05035 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 7 Jul 2022 08:14:39 +0200 Subject: powerpc/platforms/83xx/suspend: Reorder to get rid of a forward declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By moving up pmc_types and pmc_match, the forward declaration for pmc_match can be dropped. Signed-off-by: Uwe Kleine-König Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220707061441.193869-1-u.kleine-koenig@pengutronix.de --- arch/powerpc/platforms/83xx/suspend.c | 43 +++++++++++++++++------------------ 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 6d47a5b81485..30b7700a2c98 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -319,7 +319,27 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = { .end = mpc83xx_suspend_end, }; -static const struct of_device_id pmc_match[]; +static struct pmc_type pmc_types[] = { + { + .has_deep_sleep = 1, + }, + { + .has_deep_sleep = 0, + } +}; + +static const struct of_device_id pmc_match[] = { + { + .compatible = "fsl,mpc8313-pmc", + .data = &pmc_types[0], + }, + { + .compatible = "fsl,mpc8349-pmc", + .data = &pmc_types[1], + }, + {} +}; + static int pmc_probe(struct platform_device *ofdev) { struct device_node *np = ofdev->dev.of_node; @@ -406,27 +426,6 @@ static int pmc_remove(struct platform_device *ofdev) return -EPERM; }; -static struct pmc_type pmc_types[] = { - { - .has_deep_sleep = 1, - }, - { - .has_deep_sleep = 0, - } -}; - -static const struct of_device_id pmc_match[] = { - { - .compatible = "fsl,mpc8313-pmc", - .data = &pmc_types[0], - }, - { - .compatible = "fsl,mpc8349-pmc", - .data = &pmc_types[1], - }, - {} -}; - static struct platform_driver pmc_driver = { .driver = { .name = "mpc83xx-pmc", -- cgit From ccc1439b924bca5d5a5d81cf6b0d4b10b321282e Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 7 Jul 2022 08:14:40 +0200 Subject: powerpc/platforms/83xx/suspend: Prevent unloading the driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning an error in .remove() doesn't prevent a driver from being unloaded. On unbind this only results in an error message, but the device is remove anyhow. I guess the author's idea of just returning -EPERM in .remove() was to prevent unbinding a device. To achieve that set the suppress_bind_attrs driver property and drop the useless .remove callback. This is a preparation for making platform remove callbacks return void. Signed-off-by: Uwe Kleine-König Reviewed-by: Christophe Leroy Acked-by: Scott Wood Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220707061441.193869-2-u.kleine-koenig@pengutronix.de --- arch/powerpc/platforms/83xx/suspend.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 30b7700a2c98..309f42ab63d4 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -421,18 +421,13 @@ out: return ret; } -static int pmc_remove(struct platform_device *ofdev) -{ - return -EPERM; -}; - static struct platform_driver pmc_driver = { .driver = { .name = "mpc83xx-pmc", .of_match_table = pmc_match, + .suppress_bind_attrs = true, }, .probe = pmc_probe, - .remove = pmc_remove }; builtin_platform_driver(pmc_driver); -- cgit From 95b002e4e47a36d88deec70808ef36674fb33cf5 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 7 Jul 2022 08:14:41 +0200 Subject: powerpc/platforms/83xx/suspend: Remove write-only global variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pmc_dev is only assigned in .probe(), otherwise the variable is unused. So drop this pointer that serves no purpose. Signed-off-by: Uwe Kleine-König Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220707061441.193869-3-u.kleine-koenig@pengutronix.de --- arch/powerpc/platforms/83xx/suspend.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 309f42ab63d4..3fa8979ac8a6 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -100,7 +100,6 @@ struct pmc_type { int has_deep_sleep; }; -static struct platform_device *pmc_dev; static int has_deep_sleep, deep_sleeping; static int pmc_irq; static struct mpc83xx_pmc __iomem *pmc_regs; @@ -356,7 +355,6 @@ static int pmc_probe(struct platform_device *ofdev) has_deep_sleep = type->has_deep_sleep; immrbase = get_immrbase(); - pmc_dev = ofdev; is_pci_agent = mpc83xx_is_pci_agent(); if (is_pci_agent < 0) -- cgit From fcdb758ce113c5d1b2b7034a058a9c472e42415e Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Fri, 17 Jun 2022 14:28:05 +1000 Subject: powerpc: make facility_unavailable_exception 64s The facility unavailable exception is only available on ppc book3s machines so use CONFIG_PPC_BOOK3S_64 rather than CONFIG_PPC64. tm_unavailable is only called from facility_unavailable_exception so can also be under this Kconfig symbol. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220617042805.426231-1-rashmica@linux.ibm.com --- arch/powerpc/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3aaa50e5c72f..dadfcef5d6db 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1676,7 +1676,7 @@ DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception) die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); } -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 static void tm_unavailable(struct pt_regs *regs) { #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -- cgit From e4787e71ae2de3f60bc04fe09d1be4ef628b6c68 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Fri, 17 Jun 2022 14:31:35 +1000 Subject: powerpc/signal: Update comment for clarity The comment being referred to was deleted in commit af1bbc3dd3d5 ("powerpc: Remove UP only lazy floating point and vector optimisations"). Add a bit more detail so it's clear why we need to clear the FP/VEC/VSX bits here. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220617043135.426897-1-rashmica@linux.ibm.com --- arch/powerpc/kernel/signal_64.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 472596a109e2..86bb5bb4c143 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -377,9 +377,12 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_ unsafe_get_user(set->sig[0], &sc->oldmask, efault_out); /* - * Force reload of FP/VEC. - * This has to be done before copying stuff into tsk->thread.fpr/vr - * for the reasons explained in the previous comment. + * Force reload of FP/VEC/VSX so userspace sees any changes. + * Clear these bits from the user process' MSR before copying into the + * thread struct. If we are rescheduled or preempted and another task + * uses FP/VEC/VSX, and this process has the MSR bits set, then the + * context switch code will save the current CPU state into the + * thread_struct - possibly overwriting the data we are updating here. */ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX)); -- cgit From cd1e64935f79e31d666172c52c951ca97152b783 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Fri, 17 Jun 2022 14:39:35 +1000 Subject: selftests/powerpc: Fix matrix multiply assist test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ISA states: "when ACC[i] contains defined data, the contents of VSRs 4×i to 4×i+3 are undefined until either a VSX Move From ACC instruction is used to copy the contents of ACC[i] to VSRs 4×i to 4×i+3 or some other instruction directly writes to one of these VSRs." We aren't doing this. This test only works on Power10 because the hardware implementation happens to map ACC0 to VSRs 0-3, but will fail on any other implementation that doesn't do this. So add xxmfacc between writing to the accumulator and accessing the VSRs. Fixes: 3527e1ab9a79 ("selftests/powerpc: Add matrix multiply assist (MMA) test") Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220617043935.428083-1-rashmica@linux.ibm.com --- tools/testing/selftests/powerpc/math/mma.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/powerpc/math/mma.S b/tools/testing/selftests/powerpc/math/mma.S index 8528c9849565..61cc88b1b26b 100644 --- a/tools/testing/selftests/powerpc/math/mma.S +++ b/tools/testing/selftests/powerpc/math/mma.S @@ -20,6 +20,9 @@ test_mma: /* xvi16ger2s */ .long 0xec042958 + /* Deprime the accumulator - xxmfacc 0 */ + .long 0x7c000162 + /* Store result in image passed in r5 */ stxvw4x 0,0,5 addi 5,5,16 -- cgit From 90b5d4fe0b3ba7f589c6723c6bfb559d9e83956a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 28 Jul 2022 00:32:17 +1000 Subject: powerpc/powernv: Avoid crashing if rng is NULL On a bare-metal Power8 system that doesn't have an "ibm,power-rng", a malicious QEMU and guest that ignore the absence of the KVM_CAP_PPC_HWRNG flag, and calls H_RANDOM anyway, will dereference a NULL pointer. In practice all Power8 machines have an "ibm,power-rng", but let's not rely on that, add a NULL check and early return in powernv_get_random_real_mode(). Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.") Cc: stable@vger.kernel.org # v4.1+ Signed-off-by: Jason A. Donenfeld Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220727143219.2684192-1-mpe@ellerman.id.au --- arch/powerpc/platforms/powernv/rng.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 3805ad13b8f3..2287c9cd0cd5 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -63,6 +63,8 @@ int powernv_get_random_real_mode(unsigned long *v) struct powernv_rng *rng; rng = raw_cpu_read(powernv_rng); + if (!rng) + return 0; *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); -- cgit From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 28 Jul 2022 00:32:18 +1000 Subject: powerpc/powernv/kvm: Use darn for H_RANDOM on Power9 The existing logic in KVM to support guests calling H_RANDOM only works on Power8, because it looks for an RNG in the device tree, but on Power9 we just use darn. In addition the existing code needs to work in real mode, so we have the special cased powernv_get_random_real_mode() to deal with that. Instead just have KVM call ppc_md.get_random_seed(), and do the real mode check inside of there, that way we use whatever RNG is available, including darn on Power9. Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.") Cc: stable@vger.kernel.org # v4.1+ Signed-off-by: Jason A. Donenfeld Tested-by: Sachin Sant [mpe: Rebase on previous commit, update change log appropriately] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au --- arch/powerpc/include/asm/archrandom.h | 5 ----- arch/powerpc/kvm/book3s_hv_builtin.c | 7 ++++--- arch/powerpc/platforms/powernv/rng.c | 36 ++++++++--------------------------- 3 files changed, 12 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index 9a53e29680f4..258174304904 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v) #endif /* CONFIG_ARCH_RANDOM */ #ifdef CONFIG_PPC_POWERNV -int powernv_hwrng_present(void); int powernv_get_random_long(unsigned long *v); -int powernv_get_random_real_mode(unsigned long *v); -#else -static inline int powernv_hwrng_present(void) { return 0; } -static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; } #endif #endif /* _ASM_POWERPC_ARCHRANDOM_H */ diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 88a8f6473c4e..3abaef5f9ac2 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode); int kvmppc_hwrng_present(void) { - return powernv_hwrng_present(); + return ppc_md.get_random_seed != NULL; } EXPORT_SYMBOL_GPL(kvmppc_hwrng_present); long kvmppc_rm_h_random(struct kvm_vcpu *vcpu) { - if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4])) + if (ppc_md.get_random_seed && + ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4])) return H_SUCCESS; return H_HARDWARE; diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 2287c9cd0cd5..d19305292e1e 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -29,15 +29,6 @@ struct powernv_rng { static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng); -int powernv_hwrng_present(void) -{ - struct powernv_rng *rng; - - rng = get_cpu_var(powernv_rng); - put_cpu_var(rng); - return rng != NULL; -} - static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) { unsigned long parity; @@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) return val; } -int powernv_get_random_real_mode(unsigned long *v) -{ - struct powernv_rng *rng; - - rng = raw_cpu_read(powernv_rng); - if (!rng) - return 0; - - *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); - - return 1; -} - static int powernv_get_random_darn(unsigned long *v) { unsigned long val; @@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v) { struct powernv_rng *rng; - rng = get_cpu_var(powernv_rng); - - *v = rng_whiten(rng, in_be64(rng->regs)); - - put_cpu_var(rng); - + if (mfmsr() & MSR_DR) { + rng = get_cpu_var(powernv_rng); + *v = rng_whiten(rng, in_be64(rng->regs)); + put_cpu_var(rng); + } else { + rng = raw_cpu_read(powernv_rng); + *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); + } return 1; } EXPORT_SYMBOL_GPL(powernv_get_random_long); -- cgit From 978030f054ff97d9079b35f0178e2013918fb316 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 28 Jul 2022 00:32:19 +1000 Subject: powerpc/powernv: rename remaining rng powernv_ functions to pnv_ The preferred nomenclature is pnv_, not powernv_, but rng.c used powernv_ for some reason, which isn't consistent with the rest. A recent commit added a few pnv_ functions to rng.c, making the file a bit of a mishmash. This commit just replaces the rest of them. Fixes: f3eac426657d ("powerpc/powernv: wire up rng during setup_arch") Signed-off-by: Jason A. Donenfeld Tested-by: Sachin Sant [mpe: Reorder after bug fix commits] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220727143219.2684192-3-mpe@ellerman.id.au --- arch/powerpc/include/asm/archrandom.h | 2 +- arch/powerpc/platforms/powernv/rng.c | 34 +++++++++++++++++----------------- drivers/char/hw_random/powernv-rng.c | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index 258174304904..3af27bb84a3d 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -38,7 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v) #endif /* CONFIG_ARCH_RANDOM */ #ifdef CONFIG_PPC_POWERNV -int powernv_get_random_long(unsigned long *v); +int pnv_get_random_long(unsigned long *v); #endif #endif /* _ASM_POWERPC_ARCHRANDOM_H */ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index d19305292e1e..196aa70fe043 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -21,15 +21,15 @@ #define DARN_ERR 0xFFFFFFFFFFFFFFFFul -struct powernv_rng { +struct pnv_rng { void __iomem *regs; void __iomem *regs_real; unsigned long mask; }; -static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng); +static DEFINE_PER_CPU(struct pnv_rng *, pnv_rng); -static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) +static unsigned long rng_whiten(struct pnv_rng *rng, unsigned long val) { unsigned long parity; @@ -49,7 +49,7 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) return val; } -static int powernv_get_random_darn(unsigned long *v) +static int pnv_get_random_darn(unsigned long *v) { unsigned long val; @@ -73,31 +73,31 @@ static int __init initialise_darn(void) return -ENODEV; for (i = 0; i < 10; i++) { - if (powernv_get_random_darn(&val)) { - ppc_md.get_random_seed = powernv_get_random_darn; + if (pnv_get_random_darn(&val)) { + ppc_md.get_random_seed = pnv_get_random_darn; return 0; } } return -EIO; } -int powernv_get_random_long(unsigned long *v) +int pnv_get_random_long(unsigned long *v) { - struct powernv_rng *rng; + struct pnv_rng *rng; if (mfmsr() & MSR_DR) { - rng = get_cpu_var(powernv_rng); + rng = get_cpu_var(pnv_rng); *v = rng_whiten(rng, in_be64(rng->regs)); put_cpu_var(rng); } else { - rng = raw_cpu_read(powernv_rng); + rng = raw_cpu_read(pnv_rng); *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); } return 1; } -EXPORT_SYMBOL_GPL(powernv_get_random_long); +EXPORT_SYMBOL_GPL(pnv_get_random_long); -static __init void rng_init_per_cpu(struct powernv_rng *rng, +static __init void rng_init_per_cpu(struct pnv_rng *rng, struct device_node *dn) { int chip_id, cpu; @@ -107,16 +107,16 @@ static __init void rng_init_per_cpu(struct powernv_rng *rng, pr_warn("No ibm,chip-id found for %pOF.\n", dn); for_each_possible_cpu(cpu) { - if (per_cpu(powernv_rng, cpu) == NULL || + if (per_cpu(pnv_rng, cpu) == NULL || cpu_to_chip_id(cpu) == chip_id) { - per_cpu(powernv_rng, cpu) = rng; + per_cpu(pnv_rng, cpu) = rng; } } } static __init int rng_create(struct device_node *dn) { - struct powernv_rng *rng; + struct pnv_rng *rng; struct resource res; unsigned long val; @@ -142,7 +142,7 @@ static __init int rng_create(struct device_node *dn) rng_init_per_cpu(rng, dn); - ppc_md.get_random_seed = powernv_get_random_long; + ppc_md.get_random_seed = pnv_get_random_long; return 0; } @@ -190,7 +190,7 @@ static int __init pnv_rng_late_init(void) if (ppc_md.get_random_seed == pnv_get_random_long_early) pnv_get_random_long_early(&v); - if (ppc_md.get_random_seed == powernv_get_random_long) { + if (ppc_md.get_random_seed == pnv_get_random_long) { for_each_compatible_node(dn, NULL, "ibm,power-rng") of_platform_device_create(dn, NULL, NULL); } diff --git a/drivers/char/hw_random/powernv-rng.c b/drivers/char/hw_random/powernv-rng.c index 8da1d7917bdc..429e956f34e1 100644 --- a/drivers/char/hw_random/powernv-rng.c +++ b/drivers/char/hw_random/powernv-rng.c @@ -23,7 +23,7 @@ static int powernv_rng_read(struct hwrng *rng, void *data, size_t max, bool wait buf = (unsigned long *)data; for (i = 0; i < len; i++) - powernv_get_random_long(buf++); + pnv_get_random_long(buf++); return len * sizeof(unsigned long); } -- cgit From 1547db7d1f4481c1f3ec731f3edc724ef3026ede Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 1 Jul 2022 16:24:34 +0800 Subject: powerpc: Move system_call_exception() to syscall.c This is a lead-up patch to enable syscall stack randomization, which uses alloca() and makes the compiler add unconditional stack canaries on syscall entry. In order to avoid triggering needless checks and slowing down the entry path, the feature needs to disable stack protector at the compilation unit level as there is no general way to control stack protector coverage with a function attribute. So move system_call_exception() to syscall.c to avoid affecting other functions in interrupt.c. Suggested-by: Michael Ellerman Signed-off-by: Xiu Jianfeng Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220701082435.126596-2-xiujianfeng@huawei.com --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/interrupt.c | 161 ------------------------------------- arch/powerpc/kernel/syscall.c | 173 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 174 insertions(+), 162 deletions(-) create mode 100644 arch/powerpc/kernel/syscall.c diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index de954fe106c5..0963d39464c8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -62,7 +62,7 @@ obj-y := cputable.o syscalls.o \ udbg.o misc.o io.o misc_$(BITS).o \ of_platform.o prom_parse.o firmware.o \ hw_breakpoint_constraints.o interrupt.o \ - kdebugfs.o stacktrace.o + kdebugfs.o stacktrace.o syscall.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o irq_64.o\ paca.o nvram_64.o note.o diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 784ea3289c84..0e75cb03244a 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -24,8 +24,6 @@ unsigned long global_dbcr0[NR_CPUS]; #endif -typedef long (*syscall_fn)(long, long, long, long, long, long); - #ifdef CONFIG_PPC_BOOK3S_64 DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); static inline bool exit_must_hard_disable(void) @@ -73,165 +71,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) return true; } -/* Has to run notrace because it is entered not completely "reconciled" */ -notrace long system_call_exception(long r3, long r4, long r5, - long r6, long r7, long r8, - unsigned long r0, struct pt_regs *regs) -{ - syscall_fn f; - - kuap_lock(); - - regs->orig_gpr3 = r3; - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); - - trace_hardirqs_off(); /* finish reconciling */ - - CT_WARN_ON(ct_state() == CONTEXT_KERNEL); - user_exit_irqoff(); - - BUG_ON(regs_is_unrecoverable(regs)); - BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(arch_irq_disabled_regs(regs)); - -#ifdef CONFIG_PPC_PKEY - if (mmu_has_feature(MMU_FTR_PKEY)) { - unsigned long amr, iamr; - bool flush_needed = false; - /* - * When entering from userspace we mostly have the AMR/IAMR - * different from kernel default values. Hence don't compare. - */ - amr = mfspr(SPRN_AMR); - iamr = mfspr(SPRN_IAMR); - regs->amr = amr; - regs->iamr = iamr; - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { - mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - flush_needed = true; - } - if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { - mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); - flush_needed = true; - } - if (flush_needed) - isync(); - } else -#endif - kuap_assert_locked(); - - booke_restore_dbcr0(); - - account_cpu_user_entry(); - - account_stolen_time(); - - /* - * This is not required for the syscall exit path, but makes the - * stack frame look nicer. If this was initialised in the first stack - * frame, or if the unwinder was taught the first stack frame always - * returns to user with IRQS_ENABLED, this store could be avoided! - */ - irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); - - /* - * If system call is called with TM active, set _TIF_RESTOREALL to - * prevent RFSCV being used to return to userspace, because POWER9 - * TM implementation has problems with this instruction returning to - * transactional state. Final register values are not relevant because - * the transaction will be aborted upon return anyway. Or in the case - * of unsupported_scv SIGILL fault, the return state does not much - * matter because it's an edge case. - */ - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) - set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); - - /* - * If the system call was made with a transaction active, doom it and - * return without performing the system call. Unless it was an - * unsupported scv vector, in which case it's treated like an illegal - * instruction. - */ -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && - !trap_is_unsupported_scv(regs)) { - /* Enable TM in the kernel, and disable EE (for scv) */ - hard_irq_disable(); - mtmsr(mfmsr() | MSR_TM); - - /* tabort, this dooms the transaction, nothing else */ - asm volatile(".long 0x7c00071d | ((%0) << 16)" - :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); - - /* - * Userspace will never see the return value. Execution will - * resume after the tbegin. of the aborted transaction with the - * checkpointed register state. A context switch could occur - * or signal delivered to the process before resuming the - * doomed transaction context, but that should all be handled - * as expected. - */ - return -ENOSYS; - } -#endif // CONFIG_PPC_TRANSACTIONAL_MEM - - local_irq_enable(); - - if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - /* - * We use the return value of do_syscall_trace_enter() as the - * syscall number. If the syscall was rejected for any reason - * do_syscall_trace_enter() returns an invalid syscall number - * and the test against NR_syscalls will fail and the return - * value to be used is in regs->gpr[3]. - */ - r0 = do_syscall_trace_enter(regs); - if (unlikely(r0 >= NR_syscalls)) - return regs->gpr[3]; - r3 = regs->gpr[3]; - r4 = regs->gpr[4]; - r5 = regs->gpr[5]; - r6 = regs->gpr[6]; - r7 = regs->gpr[7]; - r8 = regs->gpr[8]; - - } else if (unlikely(r0 >= NR_syscalls)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - return -ENOSYS; - } - - /* May be faster to do array_index_nospec? */ - barrier_nospec(); - - if (unlikely(is_compat_task())) { - f = (void *)compat_sys_call_table[r0]; - - r3 &= 0x00000000ffffffffULL; - r4 &= 0x00000000ffffffffULL; - r5 &= 0x00000000ffffffffULL; - r6 &= 0x00000000ffffffffULL; - r7 &= 0x00000000ffffffffULL; - r8 &= 0x00000000ffffffffULL; - - } else { - f = (void *)sys_call_table[r0]; - } - - return f(r3, r4, r5, r6, r7, r8); -} - static notrace void booke_load_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c new file mode 100644 index 000000000000..4d5689eeaf25 --- /dev/null +++ b/arch/powerpc/kernel/syscall.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include +#include +#include +#include +#include +#include + + +typedef long (*syscall_fn)(long, long, long, long, long, long); + +/* Has to run notrace because it is entered not completely "reconciled" */ +notrace long system_call_exception(long r3, long r4, long r5, + long r6, long r7, long r8, + unsigned long r0, struct pt_regs *regs) +{ + syscall_fn f; + + kuap_lock(); + + regs->orig_gpr3 = r3; + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + + trace_hardirqs_off(); /* finish reconciling */ + + CT_WARN_ON(ct_state() == CONTEXT_KERNEL); + user_exit_irqoff(); + + BUG_ON(regs_is_unrecoverable(regs)); + BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(arch_irq_disabled_regs(regs)); + +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_PKEY)) { + unsigned long amr, iamr; + bool flush_needed = false; + /* + * When entering from userspace we mostly have the AMR/IAMR + * different from kernel default values. Hence don't compare. + */ + amr = mfspr(SPRN_AMR); + iamr = mfspr(SPRN_IAMR); + regs->amr = amr; + regs->iamr = iamr; + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + flush_needed = true; + } + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + flush_needed = true; + } + if (flush_needed) + isync(); + } else +#endif + kuap_assert_locked(); + + booke_restore_dbcr0(); + + account_cpu_user_entry(); + + account_stolen_time(); + + /* + * This is not required for the syscall exit path, but makes the + * stack frame look nicer. If this was initialised in the first stack + * frame, or if the unwinder was taught the first stack frame always + * returns to user with IRQS_ENABLED, this store could be avoided! + */ + irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); + + /* + * If system call is called with TM active, set _TIF_RESTOREALL to + * prevent RFSCV being used to return to userspace, because POWER9 + * TM implementation has problems with this instruction returning to + * transactional state. Final register values are not relevant because + * the transaction will be aborted upon return anyway. Or in the case + * of unsupported_scv SIGILL fault, the return state does not much + * matter because it's an edge case. + */ + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) + set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); + + /* + * If the system call was made with a transaction active, doom it and + * return without performing the system call. Unless it was an + * unsupported scv vector, in which case it's treated like an illegal + * instruction. + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && + !trap_is_unsupported_scv(regs)) { + /* Enable TM in the kernel, and disable EE (for scv) */ + hard_irq_disable(); + mtmsr(mfmsr() | MSR_TM); + + /* tabort, this dooms the transaction, nothing else */ + asm volatile(".long 0x7c00071d | ((%0) << 16)" + :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); + + /* + * Userspace will never see the return value. Execution will + * resume after the tbegin. of the aborted transaction with the + * checkpointed register state. A context switch could occur + * or signal delivered to the process before resuming the + * doomed transaction context, but that should all be handled + * as expected. + */ + return -ENOSYS; + } +#endif // CONFIG_PPC_TRANSACTIONAL_MEM + + local_irq_enable(); + + if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { + if (unlikely(trap_is_unsupported_scv(regs))) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } + /* + * We use the return value of do_syscall_trace_enter() as the + * syscall number. If the syscall was rejected for any reason + * do_syscall_trace_enter() returns an invalid syscall number + * and the test against NR_syscalls will fail and the return + * value to be used is in regs->gpr[3]. + */ + r0 = do_syscall_trace_enter(regs); + if (unlikely(r0 >= NR_syscalls)) + return regs->gpr[3]; + r3 = regs->gpr[3]; + r4 = regs->gpr[4]; + r5 = regs->gpr[5]; + r6 = regs->gpr[6]; + r7 = regs->gpr[7]; + r8 = regs->gpr[8]; + + } else if (unlikely(r0 >= NR_syscalls)) { + if (unlikely(trap_is_unsupported_scv(regs))) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } + return -ENOSYS; + } + + /* May be faster to do array_index_nospec? */ + barrier_nospec(); + + if (unlikely(is_compat_task())) { + f = (void *)compat_sys_call_table[r0]; + + r3 &= 0x00000000ffffffffULL; + r4 &= 0x00000000ffffffffULL; + r5 &= 0x00000000ffffffffULL; + r6 &= 0x00000000ffffffffULL; + r7 &= 0x00000000ffffffffULL; + r8 &= 0x00000000ffffffffULL; + + } else { + f = (void *)sys_call_table[r0]; + } + + return f(r3, r4, r5, r6, r7, r8); +} -- cgit From f4a0318f278d98d9492916722e85f258c2221f88 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 1 Jul 2022 16:24:35 +0800 Subject: powerpc: add support for syscall stack randomization Add support for adding a random offset to the stack while handling syscalls. This patch uses mftb() instead of get_random_int() for better performance. In order to avoid unconditional stack canaries on syscall entry (due to the use of alloca()), also disable stack protector to avoid triggering needless checks and slowing down the entry path. As there is no general way to control stack protector coverage with a function attribute, this must be disabled at the compilation unit level. Signed-off-by: Xiu Jianfeng Reviewed-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220701082435.126596-3-xiujianfeng@huawei.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/Makefile | 7 +++++++ arch/powerpc/kernel/syscall.c | 19 ++++++++++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 04499f22d06b..2daeaab21240 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -196,6 +196,7 @@ config PPC select HAVE_ARCH_KASAN if PPC_BOOK3E_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KFENCE if PPC_BOOK3S_32 || PPC_8xx || 40x + select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 0963d39464c8..06d2d1f78f71 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -54,6 +54,13 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif +#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET +# Remove stack protector to avoid triggering unneeded stack canary +# checks due to randomize_kstack_offset. +CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong +CFLAGS_syscall.o += -fno-stack-protector +#endif + obj-y := cputable.o syscalls.o \ irq.o align.o signal_$(BITS).o pmc.o vdso.o \ process.o systbl.o idle.o \ diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c index 4d5689eeaf25..81ace9e8b72b 100644 --- a/arch/powerpc/kernel/syscall.c +++ b/arch/powerpc/kernel/syscall.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -18,10 +19,12 @@ notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs) { + long ret; syscall_fn f; kuap_lock(); + add_random_kstack_offset(); regs->orig_gpr3 = r3; if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) @@ -169,5 +172,19 @@ notrace long system_call_exception(long r3, long r4, long r5, f = (void *)sys_call_table[r0]; } - return f(r3, r4, r5, r6, r7, r8); + ret = f(r3, r4, r5, r6, r7, r8); + + /* + * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), + * so the maximum stack offset is 1k bytes (10 bits). + * + * The actual entropy will be further reduced by the compiler when + * applying stack alignment constraints: the powerpc architecture + * may have two kinds of stack alignment (16-bytes and 8-bytes). + * + * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3]. + */ + choose_random_kstack_offset(mftb()); + + return ret; } -- cgit From 8c9f37a78f70fad763f0d61f03245bb011765086 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 28 Jul 2022 22:07:46 +0530 Subject: powerpc/perf: Include caps feature for power10 DD1 version Commit 6320e693d98c ("powerpc/perf: Add support for caps under sysfs in powerpc") added support for caps under sysfs in powerpc. This added caps directory to: /sys/bus/event_source/devices/cpu/ for power8, power9, power10 and generic compat PMU in respective PMU driver code. For power10, it is added under "power10_pmu_attr_groups". But for DD1 version, attr_groups are defined under dd1 array: "power10_pmu_attr_groups_dd1". Since caps is not added for DD1, it fails to include "cpu/caps" in DD1 model. The issue was observed while booting power10 pseries with qemu version 6, but not observed with qemu version 7. This is because qemu version 7 uses a DD 2.0 CPU model. Below is the trace log: Can't update unknown attr grp name: cpu/caps^M ------------[ cut here ]------------^M Failed to register pmu: cpu, reason -22^M WARNING: CPU: 1 PID: 1 at kernel/events/core.c:13427 perf_event_sysfs_init+0xbc/0x108^M Modules linked in:^M CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.19.0-rc2-00111-g6320e693d98c #148^M NIP: c0000000020391f4 LR: c0000000020391f0 CTR: c0000000008c9c30^M REGS: c0000000044c38c0 TRAP: 0700 Not tainted (5.19.0-rc2-00111-g6320e693d98c)^M MSR: 8000000002029033 CR: 48000281 XER: 20040000^M CFAR: c00000000013feac IRQMASK: 0 ^M GPR00: c0000000020391f0 c0000000044c3b60 c00000000283db00 0000000000000027 ^M GPR04: 80000000ffffe0a8 0000000000000000 0000000000000004 00000000fdcd0000 ^M GPR08: 0000000000000027 c0000000ffe07e08 0000000000000001 0000000000000000 ^M GPR12: c00000000035dd90 c0000000fffff300 c000000000012478 0000000000000000 ^M GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 ^M GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 ^M GPR24: c000000002003480 0000000000000007 c0000000012a78d0 c000000001170a80 ^M GPR28: c0000000026c4df8 c0000000026c4e68 0000000000000000 c0000000025a8628 ^M NIP [c0000000020391f4] perf_event_sysfs_init+0xbc/0x108^M LR [c0000000020391f0] perf_event_sysfs_init+0xb8/0x108^M Call Trace:^M [c0000000044c3b60] [c0000000020391f0] perf_event_sysfs_init+0xb8/0x108 (unreliable)^M [c0000000044c3bf0] [c000000000011ec4] do_one_initcall+0x64/0x2d0^M [c0000000044c3cd0] [c0000000020049fc] kernel_init_freeable+0x338/0x3e0^M [c0000000044c3db0] [c0000000000124a0] kernel_init+0x30/0x1a0^M [c0000000044c3e10] [c00000000000cd54] ret_from_kernel_thread+0x5c/0x64^M Instruction dump:^M 813f0038 2c090000 4180002c 7fe3fb78 4a3280c5 2c030000 7c651b78 41820018 ^M e89f0030 7f63db78 4a106c59 60000000 <0fe00000> ebff0000 4bffffb4 39200001 ^M ---[ end trace 0000000000000000 ]---^M Fix it by adding caps for dd1 attr_groups in power10 PMU driver. Fixes: 6320e693d98c ("powerpc/perf: Add support for caps under sysfs in powerpc") Signed-off-by: Athira Rajeev [mpe: Update change log to mention qemu 7 DD2.0 CPU model] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220728163746.85062-1-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/power10-pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 403ba3a69512..9b5133e361a7 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -270,6 +270,7 @@ static struct attribute_group power10_pmu_caps_group = { static const struct attribute_group *power10_pmu_attr_groups_dd1[] = { &power10_pmu_format_group, &power10_pmu_events_group_dd1, + &power10_pmu_caps_group, NULL, }; -- cgit From 6ac059dacffa8ab2f7798f20e4bd3333890c541c Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 3 Jun 2022 16:15:42 +0400 Subject: powerpc/spufs: Fix refcount leak in spufs_init_isolated_loader of_find_node_by_path() returns remote device nodepointer with refcount incremented, we should use of_node_put() on it when done. Add missing of_node_put() to avoid refcount leak. Fixes: 0afacde3df4c ("[POWERPC] spufs: allow isolated mode apps by starting the SPE loader") Signed-off-by: Miaoqian Lin Acked-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220603121543.22884-1-linmq006@gmail.com --- arch/powerpc/platforms/cell/spufs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 34334c32b7f5..320008528edd 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -660,6 +660,7 @@ spufs_init_isolated_loader(void) return; loader = of_get_property(dn, "loader", &size); + of_node_put(dn); if (!loader) return; -- cgit From 255b650cbec6849443ce2e0cdd187fd5e61c218c Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 5 Jun 2022 09:32:23 +0400 Subject: powerpc/xive: Fix refcount leak in xive_get_max_prio of_find_node_by_path() returns a node pointer with refcount incremented, we should use of_node_put() on it when done. Add missing of_node_put() to avoid refcount leak. Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller") Signed-off-by: Miaoqian Lin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220605053225.56125-1-linmq006@gmail.com --- arch/powerpc/sysdev/xive/spapr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index d02911e78cfc..e2c8f93b535b 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -718,6 +718,7 @@ static bool __init xive_get_max_prio(u8 *max_prio) } reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len); + of_node_put(rootdn); if (!reg) { pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n"); return false; -- cgit From df5d4b616ee76abc97e5bd348e22659c2b095b1c Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 5 Jun 2022 10:51:29 +0400 Subject: powerpc/cell/axon_msi: Fix refcount leak in setup_msi_msg_address of_get_next_parent() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() in the error path to avoid refcount leak. Fixes: ce21b3c9648a ("[CELL] add support for MSI on Axon-based Cell systems") Signed-off-by: Miaoqian Lin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220605065129.63906-1-linmq006@gmail.com --- arch/powerpc/platforms/cell/axon_msi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index f3291e957a19..5b012abca773 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -223,6 +223,7 @@ static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) if (!prop) { dev_dbg(&dev->dev, "axon_msi: no msi-address-(32|64) properties found\n"); + of_node_put(dn); return -ENOENT; } -- cgit From ff446cd76854d47f451a84c26bb70934ae2ec5a7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 1 Aug 2022 18:48:34 +1000 Subject: selftests/powerpc: Avoid GCC 12 uninitialised variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC 12 thinks that `actual` might be used uninitialised. It's not, the use is guarded by `bad_mmcr2` which is only set to true at the same point where `actual` is initialised. cycles_with_mmcr2_test.c: In function ‘cycles_with_mmcr2’: cycles_with_mmcr2_test.c:81:17: error: ‘actual’ may be used uninitialized [-Werror=maybe-uninitialized] 81 | printf("Bad MMCR2 value seen is 0x%lx\n", actual); Silence the warning by initialising `actual` to zero. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220801113746.802046-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c index 4b45a2e70f62..fc32187d483d 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c @@ -50,6 +50,7 @@ int cycles_with_mmcr2(void) expected[1] = MMCR2_EXPECTED_2; i = 0; bad_mmcr2 = false; + actual = 0; /* Make sure we loop until we take at least one EBB */ while ((ebb_state.stats.ebb_count < 20 && !bad_mmcr2) || -- cgit From ca829e05d3d4f728810cc5e4b468d9ebc7745eb3 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Tue, 26 Jul 2022 09:57:47 +0800 Subject: powerpc/64: Init jump labels before parse_early_param() On 64-bit, calling jump_label_init() in setup_feature_keys() is too late because static keys may be used in subroutines of parse_early_param() which is again subroutine of early_init_devtree(). For example booting with "threadirqs": static_key_enable_cpuslocked(): static key '0xc000000002953260' used before call to jump_label_init() WARNING: CPU: 0 PID: 0 at kernel/jump_label.c:166 static_key_enable_cpuslocked+0xfc/0x120 ... NIP static_key_enable_cpuslocked+0xfc/0x120 LR static_key_enable_cpuslocked+0xf8/0x120 Call Trace: static_key_enable_cpuslocked+0xf8/0x120 (unreliable) static_key_enable+0x30/0x50 setup_forced_irqthreads+0x28/0x40 do_early_param+0xa0/0x108 parse_args+0x290/0x4e0 parse_early_options+0x48/0x5c parse_early_param+0x58/0x84 early_init_devtree+0xd4/0x518 early_setup+0xb4/0x214 So call jump_label_init() just before parse_early_param() in early_init_devtree(). Suggested-by: Michael Ellerman Signed-off-by: Zhouyi Zhou [mpe: Add call trace to change log and minor wording edits.] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220726015747.11754-1-zhouzhouyi@gmail.com --- arch/powerpc/kernel/prom.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 0a68c99da573..a730b951b64b 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -752,6 +752,13 @@ void __init early_init_devtree(void *params) early_init_dt_scan_root(); early_init_dt_scan_memory_ppc(); + /* + * As generic code authors expect to be able to use static keys + * in early_param() handlers, we initialize the static keys just + * before parsing early params (it's fine to call jump_label_init() + * more than once). + */ + jump_label_init(); parse_early_param(); /* make sure we've parsed cmdline for mem= before this */ -- cgit From f4b39e88b42d13366b831270306326b5c20971ca Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 2 Aug 2022 20:38:32 +1000 Subject: powerpc/pci: Fix PHB numbering when using opal-phbid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recent change to the PHB numbering logic has a logic error in the handling of "ibm,opal-phbid". When an "ibm,opal-phbid" property is present, &prop is written to and ret is set to zero. The following call to of_alias_get_id() is skipped because ret == 0. But then the if (ret >= 0) is true, and the body of that if statement sets prop = ret which throws away the value that was just read from "ibm,opal-phbid". Fix the logic by only doing the ret >= 0 check in the of_alias_get_id() case. Fixes: 0fe1e96fef0a ("powerpc/pci: Prefer PCI domain assignment via DT 'linux,pci-domain' and alias") Reviewed-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220802105723.1055178-1-mpe@ellerman.id.au --- arch/powerpc/kernel/pci-common.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 8ce36aba42da..bdd3332200c5 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -91,11 +91,13 @@ static int get_phb_number(struct device_node *dn) } if (ret) ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); - if (ret) + + if (ret) { ret = of_alias_get_id(dn, "pci"); - if (ret >= 0) { - prop = ret; - ret = 0; + if (ret >= 0) { + prop = ret; + ret = 0; + } } if (ret) { u32 prop_32; -- cgit From bce02f71e48f7b89b9b52424ad2df4e5bc87a8ec Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 2 Aug 2022 12:30:38 +0200 Subject: EDAC/ppc_4xx: Include required of_irq header directly Commit 4d5c5bad5193 ("powerpc: Remove asm/prom.h from asm/mpc52xx.h and asm/pci.h") that cleans up powerpc's asm/prom.h leads to build errors in ppc4xx_edac.c due to missing header. Include required header directly to avoid the build failure. Fixes: 4d5c5bad5193 ("powerpc: Remove asm/prom.h from asm/mpc52xx.h and asm/pci.h") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/993f5a7da371458cb819b5f3f569073c78523b01.1659436180.git.christophe.leroy@csgroup.eu --- drivers/edac/ppc4xx_edac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index 6793f6d799e7..0bc670778c99 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include -- cgit From 4cfa6ff24a9744ba484521c38bea613134fbfcb3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2022 16:29:41 +1000 Subject: powerpc/64e: Fix kexec build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building ppc64_book3e_allmodconfig the build fails with: arch/powerpc/kexec/file_load_64.c:1063:14: error: implicit declaration of function ‘firmware_has_feature’ 1063 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | ^~~~~~~~~~~~~~~~~~~~ Add a direct include of asm/firmware.h to fix the error. Fixes: b1fc44eaa9ba ("pseries/iommu/ddw: Fix kdump to work in absence of ibm,dma-window") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220803063152.1249270-1-mpe@ellerman.id.au --- arch/powerpc/kexec/file_load_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 5d2c22aa34fb..683462e4556b 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include -- cgit