From 469bcef53c546bb792aa66303933272991b7831d Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 4 Jul 2017 11:10:39 +0200 Subject: irqchip/atmel-aic: Fix unbalanced of_node_put() in aic_common_irq_fixup() aic_common_irq_fixup() is calling twice of_node_put() on the same node thus leading to an unbalanced refcount on the root node. Signed-off-by: Boris Brezillon Reported-by: Alexandre Belloni Fixes: b2f579b58e93 ("irqchip: atmel-aic: Add irq fixup infrastructure") Cc: Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-atmel-aic-common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 28b26c80f4cf..7c5a43488d27 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -196,7 +196,6 @@ static void __init aic_common_irq_fixup(const struct of_device_id *matches) return; match = of_match_node(matches, root); - of_node_put(root); if (match) { void (*fixup)(struct device_node *) = match->data; -- cgit From 277867ade8262583f4280cadbe90e0031a3706a7 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 4 Jul 2017 11:10:40 +0200 Subject: irqchip/atmel-aic: Fix unbalanced refcount in aic_common_rtc_irq_fixup() of_find_compatible_node() is calling of_node_put() on its first argument thus leading to an unbalanced of_node_get/put() issue if the node has not been retained before that. Instead of passing the root node, pass NULL, which does exactly the same: iterate over all DT nodes, starting from the root node. Signed-off-by: Boris Brezillon Reported-by: Alexandre Belloni Fixes: 3d61467f9bab ("irqchip: atmel-aic: Implement RTC irq fixup") Cc: Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-atmel-aic-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 7c5a43488d27..056507099725 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -142,9 +142,9 @@ void __init aic_common_rtc_irq_fixup(struct device_node *root) struct device_node *np; void __iomem *regs; - np = of_find_compatible_node(root, NULL, "atmel,at91rm9200-rtc"); + np = of_find_compatible_node(NULL, NULL, "atmel,at91rm9200-rtc"); if (!np) - np = of_find_compatible_node(root, NULL, + np = of_find_compatible_node(NULL, NULL, "atmel,at91sam9x5-rtc"); if (!np) -- cgit From 0a46230bf03549435156b36dee9e7489b8270be7 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 4 Jul 2017 11:10:41 +0200 Subject: irqchip/atmel-aic: Remove root argument from ->fixup() prototype We are no longer using the root argument passed to the ->fixup() hooks. Remove it. Signed-off-by: Boris Brezillon Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-atmel-aic-common.c | 8 ++++---- drivers/irqchip/irq-atmel-aic-common.h | 4 ++-- drivers/irqchip/irq-atmel-aic.c | 14 +++++++------- drivers/irqchip/irq-atmel-aic5.c | 4 ++-- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 056507099725..072bd227b6c6 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -137,7 +137,7 @@ static void __init aic_common_ext_irq_of_init(struct irq_domain *domain) #define AT91_RTC_IMR 0x28 #define AT91_RTC_IRQ_MASK 0x1f -void __init aic_common_rtc_irq_fixup(struct device_node *root) +void __init aic_common_rtc_irq_fixup(void) { struct device_node *np; void __iomem *regs; @@ -165,7 +165,7 @@ void __init aic_common_rtc_irq_fixup(struct device_node *root) #define AT91_RTT_ALMIEN (1 << 16) /* Alarm Interrupt Enable */ #define AT91_RTT_RTTINCIEN (1 << 17) /* Real Time Timer Increment Interrupt Enable */ -void __init aic_common_rtt_irq_fixup(struct device_node *root) +void __init aic_common_rtt_irq_fixup(void) { struct device_node *np; void __iomem *regs; @@ -198,8 +198,8 @@ static void __init aic_common_irq_fixup(const struct of_device_id *matches) match = of_match_node(matches, root); if (match) { - void (*fixup)(struct device_node *) = match->data; - fixup(root); + void (*fixup)(void) = match->data; + fixup(); } of_node_put(root); diff --git a/drivers/irqchip/irq-atmel-aic-common.h b/drivers/irqchip/irq-atmel-aic-common.h index af60376d50de..242e62c1851e 100644 --- a/drivers/irqchip/irq-atmel-aic-common.h +++ b/drivers/irqchip/irq-atmel-aic-common.h @@ -33,8 +33,8 @@ struct irq_domain *__init aic_common_of_init(struct device_node *node, const char *name, int nirqs, const struct of_device_id *matches); -void __init aic_common_rtc_irq_fixup(struct device_node *root); +void __init aic_common_rtc_irq_fixup(void); -void __init aic_common_rtt_irq_fixup(struct device_node *root); +void __init aic_common_rtt_irq_fixup(void); #endif /* __IRQ_ATMEL_AIC_COMMON_H */ diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c index 37f952dd9fc9..bb1ad451392f 100644 --- a/drivers/irqchip/irq-atmel-aic.c +++ b/drivers/irqchip/irq-atmel-aic.c @@ -209,20 +209,20 @@ static const struct irq_domain_ops aic_irq_ops = { .xlate = aic_irq_domain_xlate, }; -static void __init at91rm9200_aic_irq_fixup(struct device_node *root) +static void __init at91rm9200_aic_irq_fixup(void) { - aic_common_rtc_irq_fixup(root); + aic_common_rtc_irq_fixup(); } -static void __init at91sam9260_aic_irq_fixup(struct device_node *root) +static void __init at91sam9260_aic_irq_fixup(void) { - aic_common_rtt_irq_fixup(root); + aic_common_rtt_irq_fixup(); } -static void __init at91sam9g45_aic_irq_fixup(struct device_node *root) +static void __init at91sam9g45_aic_irq_fixup(void) { - aic_common_rtc_irq_fixup(root); - aic_common_rtt_irq_fixup(root); + aic_common_rtc_irq_fixup(); + aic_common_rtt_irq_fixup(); } static const struct of_device_id aic_irq_fixups[] __initconst = { diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index c04ee9a23d09..6acad2ea0fb3 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c @@ -305,9 +305,9 @@ static const struct irq_domain_ops aic5_irq_ops = { .xlate = aic5_irq_domain_xlate, }; -static void __init sama5d3_aic_irq_fixup(struct device_node *root) +static void __init sama5d3_aic_irq_fixup(void) { - aic_common_rtc_irq_fixup(root); + aic_common_rtc_irq_fixup(); } static const struct of_device_id aic5_irq_fixups[] __initconst = { -- cgit From 456c59c31c5126fe31c64956c43670060ea9debd Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 4 Jul 2017 10:56:34 +0100 Subject: irqchip/gic-v2: Report failures in gic_irq_domain_alloc If the GIC cannot map an IRQ via irq_domain_ops->alloc(), it doesn't return an error code. This can cause a problem with drivers, where it thinks it has successfully got an IRQ for the device, but requesting the same ends up failure with -ENOSYS (as the IRQ's chip is not set). Fixes: commit 9a1091ef0017c ("irqchip: gic: Support hierarchy irq domain.") Cc: Yingjoe Chen Cc: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 1b1df4f770bd..940c16278758 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1027,8 +1027,11 @@ static int gic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; - for (i = 0; i < nr_irqs; i++) - gic_irq_domain_map(domain, virq + i, hwirq + i); + for (i = 0; i < nr_irqs; i++) { + ret = gic_irq_domain_map(domain, virq + i, hwirq + i); + if (ret) + return ret; + } return 0; } -- cgit From 63c16c6eacb69d0cbdaee5dea0dd56d238375fe6 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 4 Jul 2017 10:56:33 +0100 Subject: irqchip/gic-v3: Report failures in gic_irq_domain_alloc If the GIC cannot map an IRQ via irq_domain_ops->alloc(), it doesn't return an error code. This can cause a problem with drivers, where it thinks it has successfully got an IRQ for the device, but requesting the same ends up failure with -ENOSYS (as the IRQ's chip is not set). Fixes: commit 443acc4f37f6 ("irqchip: GICv3: Convert to domain hierarchy") Cc: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index dbffb7ab6203..47630e9998b3 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -831,8 +831,11 @@ static int gic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; - for (i = 0; i < nr_irqs; i++) - gic_irq_domain_map(domain, virq + i, hwirq + i); + for (i = 0; i < nr_irqs; i++) { + ret = gic_irq_domain_map(domain, virq + i, hwirq + i); + if (ret) + return ret; + } return 0; } -- cgit From 65a30f8b300107266f316d550f060ccc186201a3 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 4 Jul 2017 10:56:35 +0100 Subject: irqchip/gic-v3: Honor forced affinity setting Honor the 'force' flag for set_affinity, by selecting a CPU from the given mask (which may not be reported "online" by the cpu_online_mask). Some drivers, like ARM PMU, rely on it. Cc: Marc Zyngier Reported-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 47630e9998b3..5ba64a7584a3 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -640,11 +640,16 @@ static void gic_smp_init(void) static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { - unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask); + unsigned int cpu; void __iomem *reg; int enabled; u64 val; + if (force) + cpu = cpumask_first(mask_val); + else + cpu = cpumask_any_and(mask_val, cpu_online_mask); + if (cpu >= nr_cpu_ids) return -EINVAL; -- cgit From 05969566e6d64113a861adc6c17cbba685c640b3 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 30 Jun 2017 17:43:02 -0300 Subject: ARM: dts: imx7d-sdb: Put pinctrl_spi4 in the correct location pinctrl_spi4 pin group is not part of the low power iomux controller, so move it under the normal iomuxc node. Fixes: 184f39b57cab6 ("ARM: dts: imx7d-sdb: Add GPIO expander node") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d-sdb.dts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index 54c45402286b..0a24d1bf3c39 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -557,6 +557,14 @@ >; }; + pinctrl_spi4: spi4grp { + fsl,pins = < + MX7D_PAD_GPIO1_IO09__GPIO1_IO9 0x59 + MX7D_PAD_GPIO1_IO12__GPIO1_IO12 0x59 + MX7D_PAD_GPIO1_IO13__GPIO1_IO13 0x59 + >; + }; + pinctrl_tsc2046_pendown: tsc2046_pendown { fsl,pins = < MX7D_PAD_EPDC_BDR1__GPIO2_IO29 0x59 @@ -697,13 +705,5 @@ fsl,pins = < MX7D_PAD_LPSR_GPIO1_IO01__PWM1_OUT 0x110b0 >; - - pinctrl_spi4: spi4grp { - fsl,pins = < - MX7D_PAD_GPIO1_IO09__GPIO1_IO9 0x59 - MX7D_PAD_GPIO1_IO12__GPIO1_IO12 0x59 - MX7D_PAD_GPIO1_IO13__GPIO1_IO13 0x59 - >; - }; }; }; -- cgit From 2a596fc9d974bb040eda9ab70bf8756fcaaa6afe Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Mon, 10 Jul 2017 16:55:04 +1000 Subject: drm/sun4i: Implement drm_driver lastclose to restore fbdev console The drm_driver lastclose callback is called when the last userspace DRM client has closed. Call drm_fbdev_cma_restore_mode to restore the fbdev console otherwise the fbdev console will stop working. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Cc: stable@vger.kernel.org Tested-by: Olliver Schinagl Reviewed-by: Chen-Yu Tsai Signed-off-by: Jonathan Liu Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index abc7d8fe06b4..a45a627283a1 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -25,12 +25,20 @@ #include "sun4i_framebuffer.h" #include "sun4i_tcon.h" +static void sun4i_drv_lastclose(struct drm_device *dev) +{ + struct sun4i_drv *drv = dev->dev_private; + + drm_fbdev_cma_restore_mode(drv->fbdev); +} + DEFINE_DRM_GEM_CMA_FOPS(sun4i_drv_fops); static struct drm_driver sun4i_drv_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_ATOMIC, /* Generic Operations */ + .lastclose = sun4i_drv_lastclose, .fops = &sun4i_drv_fops, .name = "sun4i-drm", .desc = "Allwinner sun4i Display Engine", -- cgit From fa405fd9dd7b0a5367fb5a773e93ac59efb98f44 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Tue, 11 Jul 2017 09:40:15 +0200 Subject: ARM: dts: at91: sama5d2: use sama5d2 compatible string for SMC A new compatible string has been introduced for sama5d2 SMC to allow to manage the registers mapping change. Signed-off-by: Ludovic Desroches Signed-off-by: Alexandre Belloni --- arch/arm/boot/dts/sama5d2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index cc06da394366..3e6e2dbc2595 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -1048,7 +1048,7 @@ }; hsmc: hsmc@f8014000 { - compatible = "atmel,sama5d3-smc", "syscon", "simple-mfd"; + compatible = "atmel,sama5d2-smc", "syscon", "simple-mfd"; reg = <0xf8014000 0x1000>; interrupts = <5 IRQ_TYPE_LEVEL_HIGH 6>; clocks = <&hsmc_clk>; -- cgit From 8ff235fe7a8c4a5824c223b9996457174442e73a Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Fri, 7 Jul 2017 15:33:10 +0200 Subject: ARM: dts: at91: sama5d2: fix EBI/NAND controllers declaration Fix HSMC interrupt ID, PMECC registers and EBI ones. Fixes: d9c41bf30cf8 ("ARM: dts: at91: Declare EBI/NAND controllers") Signed-off-by: Ludovic Desroches Acked-by: Nicolas Ferre Signed-off-by: Alexandre Belloni --- arch/arm/boot/dts/sama5d2.dtsi | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 3e6e2dbc2595..60e69aeacbdb 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -303,7 +303,7 @@ #size-cells = <1>; atmel,smc = <&hsmc>; reg = <0x10000000 0x10000000 - 0x40000000 0x30000000>; + 0x60000000 0x30000000>; ranges = <0x0 0x0 0x10000000 0x10000000 0x1 0x0 0x60000000 0x10000000 0x2 0x0 0x70000000 0x10000000 @@ -1050,16 +1050,16 @@ hsmc: hsmc@f8014000 { compatible = "atmel,sama5d2-smc", "syscon", "simple-mfd"; reg = <0xf8014000 0x1000>; - interrupts = <5 IRQ_TYPE_LEVEL_HIGH 6>; + interrupts = <17 IRQ_TYPE_LEVEL_HIGH 6>; clocks = <&hsmc_clk>; #address-cells = <1>; #size-cells = <1>; ranges; - pmecc: ecc-engine@ffffc070 { + pmecc: ecc-engine@f8014070 { compatible = "atmel,sama5d2-pmecc"; - reg = <0xffffc070 0x490>, - <0xffffc500 0x100>; + reg = <0xf8014070 0x490>, + <0xf8014500 0x100>; }; }; -- cgit From bc240eec4b074f5dc2753f295e980e66b72c90fb Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 26 Jun 2017 13:50:41 -0600 Subject: ntb: use correct mw_count function in ntb_tool and ntb_transport After converting to the new API, both ntb_tool and ntb_transport are using ntb_mw_count to iterate through ntb_peer_get_addr when they should be using ntb_peer_mw_count. This probably isn't an issue with the Intel and AMD drivers but this will matter for any future driver with asymetric memory window counts. Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Signed-off-by: Jon Mason Fixes: 443b9a14ecbe ("NTB: Alter MW API to support multi-ports devices") --- drivers/ntb/ntb_transport.c | 2 +- drivers/ntb/test/ntb_tool.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 9a03c5871efe..b29558ddfe95 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1059,7 +1059,7 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) int node; int rc, i; - mw_count = ntb_mw_count(ndev, PIDX); + mw_count = ntb_peer_mw_count(ndev); if (!ndev->ops->mw_set_trans) { dev_err(&ndev->dev, "Inbound MW based NTB API is required\n"); diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index f002bf48a08d..a69815c45ce6 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -959,7 +959,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) tc->ntb = ntb; init_waitqueue_head(&tc->link_wq); - tc->mw_count = min(ntb_mw_count(tc->ntb, PIDX), MAX_MWS); + tc->mw_count = min(ntb_peer_mw_count(tc->ntb), MAX_MWS); for (i = 0; i < tc->mw_count; i++) { rc = tool_init_mw(tc, i); if (rc) -- cgit From 29178c1473fd4ad9c523b41bb18a047749c66d11 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 8 Jun 2017 18:10:28 +0200 Subject: ARC: defconfig: Cleanup from old Kconfig options Remove old, dead Kconfig option INET_LRO. It is gone since commit 7bbf3cae65b6 ("ipv4: Remove inet_lro library"). Signed-off-by: Krzysztof Kozlowski Signed-off-by: Vineet Gupta --- arch/arc/configs/nps_defconfig | 1 - arch/arc/configs/tb10x_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index ede625c76216..7c9c706ae7f6 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -39,7 +39,6 @@ CONFIG_IP_PNP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index 4c5118384eb5..f30182549395 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -38,7 +38,6 @@ CONFIG_IP_MULTICAST=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set -- cgit From 293b915fd9bebf33cdc906516fb28d54649a25ac Mon Sep 17 00:00:00 2001 From: Oscar Campos Date: Tue, 18 Jul 2017 17:20:36 -0700 Subject: Input: trackpoint - assume 3 buttons when buttons detection fails Trackpoint buttons detection fails on ThinkPad 570 and 470 series, this makes the middle button of the trackpoint to not being recogized. As I don't believe there is any trackpoint with less than 3 buttons this patch just assumes three buttons when the extended button information read fails. Signed-off-by: Oscar Campos Acked-by: Peter Hutterer Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/trackpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 922ea02edcc3..20b5b21c1bba 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -380,8 +380,8 @@ int trackpoint_detect(struct psmouse *psmouse, bool set_properties) return 0; if (trackpoint_read(ps2dev, TP_EXT_BTN, &button_info)) { - psmouse_warn(psmouse, "failed to get extended button data\n"); - button_info = 0; + psmouse_warn(psmouse, "failed to get extended button data, assuming 3 buttons\n"); + button_info = 0x33; } psmouse->private = kzalloc(sizeof(struct trackpoint_data), GFP_KERNEL); -- cgit From bc9b934b2fbbd51008a1b52c0cd1b457e6440736 Mon Sep 17 00:00:00 2001 From: Alexander Dahl Date: Tue, 25 Jul 2017 14:00:22 +0200 Subject: memory: atmel-ebi: Fix smc timing return value evaluation Setting optional EBI/SMC properties through device tree always fails due to wrong evaluation of the return value of atmel_ebi_xslate_smc_timings(). If you put some of those properties in your dts file, but not 'atmel,smc-tdf-ns' the local variable 'required' in atmel_ebi_xslate_smc_timings() stays on 'false' after the first 'if' block. This leads to setting 'ret' to -EINVAL in the first run of the following 'for' loop which is then the return value of this function. However if you set 'atmel,smc-tdf-ns' in the dts file and everything in atmel_ebi_xslate_smc_timings() works well, it returns the content of 'required' which is 'true' then. So the function atmel_ebi_xslate_smc_timings() always returns non-zero which lets its call in atmel_ebi_xslate_smc_config() always fail and thus returning -EINVAL, so the EBI configuration for this node fails. Judging from the following code evaluating the local 'required' variable in atmel_ebi_xslate_smc_config() and the call of caps->xlate_config in atmel_ebi_dev_setup() it's probably right to only let the call fail if a negative error code is returned. Signed-off-by: Alexander Dahl Acked-by: Boris Brezillon Signed-off-by: Alexandre Belloni --- drivers/memory/atmel-ebi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c index 99e644cda4d1..1cf34d24d4e0 100644 --- a/drivers/memory/atmel-ebi.c +++ b/drivers/memory/atmel-ebi.c @@ -263,7 +263,7 @@ static int atmel_ebi_xslate_smc_config(struct atmel_ebi_dev *ebid, } ret = atmel_ebi_xslate_smc_timings(ebid, np, &conf->smcconf); - if (ret) + if (ret < 0) return -EINVAL; if ((ret > 0 && !required) || (!ret && required)) { -- cgit From 1f6b53901f5bffbbb0464c732e83bb2a3a784ff7 Mon Sep 17 00:00:00 2001 From: Alexander Dahl Date: Tue, 25 Jul 2017 14:00:23 +0200 Subject: memory: atmel-ebi: Allow t_DF timings of zero ns As reported in [1] and in [2] it's not possible to set the device tree property 'atmel,smc-tdf-ns' to zero, although the SoC allows a setting of 0ns for the t_DF time. Allow this setting by doing the same thing as in the atmel nand controller driver by setting ncycles to ATMEL_SMC_MODE_TDF_MIN if zero is set in the dts. [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2017-March/490966.html [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2017-July/520652.html Suggested-by: Boris Brezillon Signed-off-by: Alexander Dahl Acked-by: Boris Brezillon Signed-off-by: Alexandre Belloni --- drivers/memory/atmel-ebi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c index 1cf34d24d4e0..f8a01aea422e 100644 --- a/drivers/memory/atmel-ebi.c +++ b/drivers/memory/atmel-ebi.c @@ -120,12 +120,14 @@ static int atmel_ebi_xslate_smc_timings(struct atmel_ebi_dev *ebid, if (!ret) { required = true; ncycles = DIV_ROUND_UP(val, clk_period_ns); - if (ncycles > ATMEL_SMC_MODE_TDF_MAX || - ncycles < ATMEL_SMC_MODE_TDF_MIN) { + if (ncycles > ATMEL_SMC_MODE_TDF_MAX) { ret = -EINVAL; goto out; } + if (ncycles < ATMEL_SMC_MODE_TDF_MIN) + ncycles = ATMEL_SMC_MODE_TDF_MIN; + smcconf->mode |= ATMEL_SMC_MODE_TDF(ncycles); } -- cgit From 3fb3b3c4b68e33bb4acf42361b6a0db96115de35 Mon Sep 17 00:00:00 2001 From: Alexander Dahl Date: Tue, 25 Jul 2017 14:00:24 +0200 Subject: memory: atmel-ebi: Fix smc cycle xlate converter The converter function for translating ns timings in register values was initialized with a wrong function pointer. This resulted in wrong register values also for the setup and pulse registers when configuring the EBI interface trough dts. Includes a small fix in a comment of the smc driver, which was probably just a copy'n'paste mistake. Signed-off-by: Alexander Dahl Acked-by: Boris Brezillon Acked-by: Lee Jones Signed-off-by: Alexandre Belloni --- drivers/memory/atmel-ebi.c | 2 +- drivers/mfd/atmel-smc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c index f8a01aea422e..ebf69ff48ae2 100644 --- a/drivers/memory/atmel-ebi.c +++ b/drivers/memory/atmel-ebi.c @@ -72,7 +72,7 @@ struct atmel_smc_timing_xlate { { .name = nm, .converter = atmel_smc_cs_conf_set_pulse, .shift = pos} #define ATMEL_SMC_CYCLE_XLATE(nm, pos) \ - { .name = nm, .converter = atmel_smc_cs_conf_set_setup, .shift = pos} + { .name = nm, .converter = atmel_smc_cs_conf_set_cycle, .shift = pos} static void at91sam9_ebi_get_config(struct atmel_ebi_dev *ebid, struct atmel_ebi_dev_config *conf) diff --git a/drivers/mfd/atmel-smc.c b/drivers/mfd/atmel-smc.c index 954cf0f66a31..20cc0ea470fa 100644 --- a/drivers/mfd/atmel-smc.c +++ b/drivers/mfd/atmel-smc.c @@ -206,7 +206,7 @@ EXPORT_SYMBOL_GPL(atmel_smc_cs_conf_set_pulse); * parameter * * This function encodes the @ncycles value as described in the datasheet - * (section "SMC Pulse Register"), and then stores the result in the + * (section "SMC Cycle Register"), and then stores the result in the * @conf->setup field at @shift position. * * Returns -EINVAL if @shift is invalid, -ERANGE if @ncycles does not fit in -- cgit From eb92b4183d93a6f101a6bd3aaae651de404c119a Mon Sep 17 00:00:00 2001 From: Andreas Klinger Date: Tue, 25 Jul 2017 11:11:14 +0200 Subject: iio: bmp280: properly initialize device for humidity reading If the device is not initialized at least once it happens that the humidity reading is skipped, which means the special value 0x8000 is delivered. For omitting this case the oversampling of the humidity must be set before the oversampling of the temperature und pressure is set as written in the datasheet of the BME280. Furthermore proper error detection is added in case a skipped value is read from the device. This is done also for pressure and temperature reading. Especially it don't make sense to compensate this value and treat it as regular value. Signed-off-by: Andreas Klinger Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 27 ++++++++++++++++++++++++--- drivers/iio/pressure/bmp280.h | 5 +++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index d82b788374b6..0d2ea3ee371b 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -282,6 +282,11 @@ static int bmp280_read_temp(struct bmp280_data *data, } adc_temp = be32_to_cpu(tmp) >> 12; + if (adc_temp == BMP280_TEMP_SKIPPED) { + /* reading was skipped */ + dev_err(data->dev, "reading temperature skipped\n"); + return -EIO; + } comp_temp = bmp280_compensate_temp(data, adc_temp); /* @@ -317,6 +322,11 @@ static int bmp280_read_press(struct bmp280_data *data, } adc_press = be32_to_cpu(tmp) >> 12; + if (adc_press == BMP280_PRESS_SKIPPED) { + /* reading was skipped */ + dev_err(data->dev, "reading pressure skipped\n"); + return -EIO; + } comp_press = bmp280_compensate_press(data, adc_press); *val = comp_press; @@ -345,6 +355,11 @@ static int bmp280_read_humid(struct bmp280_data *data, int *val, int *val2) } adc_humidity = be16_to_cpu(tmp); + if (adc_humidity == BMP280_HUMIDITY_SKIPPED) { + /* reading was skipped */ + dev_err(data->dev, "reading humidity skipped\n"); + return -EIO; + } comp_humidity = bmp280_compensate_humidity(data, adc_humidity); *val = comp_humidity; @@ -597,14 +612,20 @@ static const struct bmp280_chip_info bmp280_chip_info = { static int bme280_chip_config(struct bmp280_data *data) { - int ret = bmp280_chip_config(data); + int ret; u8 osrs = BMP280_OSRS_HUMIDITIY_X(data->oversampling_humid + 1); + /* + * Oversampling of humidity must be set before oversampling of + * temperature/pressure is set to become effective. + */ + ret = regmap_update_bits(data->regmap, BMP280_REG_CTRL_HUMIDITY, + BMP280_OSRS_HUMIDITY_MASK, osrs); + if (ret < 0) return ret; - return regmap_update_bits(data->regmap, BMP280_REG_CTRL_HUMIDITY, - BMP280_OSRS_HUMIDITY_MASK, osrs); + return bmp280_chip_config(data); } static const struct bmp280_chip_info bme280_chip_info = { diff --git a/drivers/iio/pressure/bmp280.h b/drivers/iio/pressure/bmp280.h index 2c770e13be0e..61347438b779 100644 --- a/drivers/iio/pressure/bmp280.h +++ b/drivers/iio/pressure/bmp280.h @@ -96,6 +96,11 @@ #define BME280_CHIP_ID 0x60 #define BMP280_SOFT_RESET_VAL 0xB6 +/* BMP280 register skipped special values */ +#define BMP280_TEMP_SKIPPED 0x80000 +#define BMP280_PRESS_SKIPPED 0x80000 +#define BMP280_HUMIDITY_SKIPPED 0x8000 + /* Regmap configurations */ extern const struct regmap_config bmp180_regmap_config; extern const struct regmap_config bmp280_regmap_config; -- cgit From 50b39608efb1e60f334f9b59128fd6d970bfd5a6 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 27 Jul 2017 18:18:57 +0200 Subject: iio: trigger: stm32-timer: fix quadrature mode get routine Fixes: 4adec7da0536 ("iio: stm32 trigger: Add quadrature encoder device") SMS bitfiled is mode + 1. After reset, upon boot, SMS = 0. When reading from sysfs, stm32_get_quadrature_mode() returns -1 (e.g. -EPERM) which is wrong error code here. So, check SMS bitfiled matches valid encoder mode, or return -EINVAL. Signed-off-by: Fabrice Gasnier Acked-by: Benjamin Gaignard Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index d22bc56dd9fc..6aa73d6b2882 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -571,11 +571,14 @@ static int stm32_get_quadrature_mode(struct iio_dev *indio_dev, { struct stm32_timer_trigger *priv = iio_priv(indio_dev); u32 smcr; + int mode; regmap_read(priv->regmap, TIM_SMCR, &smcr); - smcr &= TIM_SMCR_SMS; + mode = (smcr & TIM_SMCR_SMS) - 1; + if ((mode < 0) || (mode > ARRAY_SIZE(stm32_quadrature_modes))) + return -EINVAL; - return smcr - 1; + return mode; } static const struct iio_enum stm32_quadrature_mode_enum = { -- cgit From 1987a08cd989fd9e5690e90a04e70046e93315f4 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 27 Jul 2017 18:18:58 +0200 Subject: iio: trigger: stm32-timer: fix write_raw return value Fixes: 4adec7da0536 ("iio: stm32 trigger: Add quadrature encoder device") IIO core expects zero as return value for write_raw() callback in case of success. Signed-off-by: Fabrice Gasnier Acked-by: Benjamin Gaignard Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index 6aa73d6b2882..107918b3a90b 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -406,9 +406,8 @@ static int stm32_counter_write_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - regmap_write(priv->regmap, TIM_CNT, val); + return regmap_write(priv->regmap, TIM_CNT, val); - return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: /* fixed scale */ return -EINVAL; -- cgit From 06e3fe89988b1c99a3d9953b1d3b1faf3f047017 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 27 Jul 2017 18:18:59 +0200 Subject: iio: trigger: stm32-timer: fix get/set down count direction Fixes: 4adec7da0536 ("iio: stm32 trigger: Add quadrature encoder device") This fixes two issues: - stm32_set_count_direction: to set down direction - stm32_get_count_direction: to get down direction IIO core provides/expects value to be an index of iio_enum items array. This needs to be turned by these routines into TIM_CR1_DIR (e.g. BIT(4)) value. Also, report error when attempting to write direction, when in encoder mode: in this case, direction is read only (given by encoder inputs). Signed-off-by: Fabrice Gasnier Acked-by: Benjamin Gaignard Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index 107918b3a90b..d28aa02b85e8 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -594,13 +594,20 @@ static const char *const stm32_count_direction_states[] = { static int stm32_set_count_direction(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, - unsigned int mode) + unsigned int dir) { struct stm32_timer_trigger *priv = iio_priv(indio_dev); + u32 val; + int mode; - regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_DIR, mode); + /* In encoder mode, direction is RO (given by TI1/TI2 signals) */ + regmap_read(priv->regmap, TIM_SMCR, &val); + mode = (val & TIM_SMCR_SMS) - 1; + if ((mode >= 0) || (mode < ARRAY_SIZE(stm32_quadrature_modes))) + return -EBUSY; - return 0; + return regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_DIR, + dir ? TIM_CR1_DIR : 0); } static int stm32_get_count_direction(struct iio_dev *indio_dev, @@ -611,7 +618,7 @@ static int stm32_get_count_direction(struct iio_dev *indio_dev, regmap_read(priv->regmap, TIM_CR1, &cr1); - return (cr1 & TIM_CR1_DIR); + return ((cr1 & TIM_CR1_DIR) ? 1 : 0); } static const struct iio_enum stm32_count_direction_enum = { -- cgit From 90938ca432e6b8f6bb1c22a24984738fc3d906ed Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 27 Jul 2017 18:19:00 +0200 Subject: iio: trigger: stm32-timer: add enable attribute In order to use encoder mode, timers needs to be enabled (e.g. CEN bit) along with peripheral clock. Add IIO_CHAN_INFO_ENABLE attribute to handle this. Also, in triggered mode, CEN bit is set automatically in hardware. Then clock must be enabled before starting triggered mode. Signed-off-by: Fabrice Gasnier Acked-by: Benjamin Gaignard Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 55 +++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index d28aa02b85e8..14e6eb04bbb0 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -366,34 +366,32 @@ static int stm32_counter_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct stm32_timer_trigger *priv = iio_priv(indio_dev); + u32 dat; switch (mask) { case IIO_CHAN_INFO_RAW: - { - u32 cnt; - - regmap_read(priv->regmap, TIM_CNT, &cnt); - *val = cnt; + regmap_read(priv->regmap, TIM_CNT, &dat); + *val = dat; + return IIO_VAL_INT; + case IIO_CHAN_INFO_ENABLE: + regmap_read(priv->regmap, TIM_CR1, &dat); + *val = (dat & TIM_CR1_CEN) ? 1 : 0; return IIO_VAL_INT; - } - case IIO_CHAN_INFO_SCALE: - { - u32 smcr; - regmap_read(priv->regmap, TIM_SMCR, &smcr); - smcr &= TIM_SMCR_SMS; + case IIO_CHAN_INFO_SCALE: + regmap_read(priv->regmap, TIM_SMCR, &dat); + dat &= TIM_SMCR_SMS; *val = 1; *val2 = 0; /* in quadrature case scale = 0.25 */ - if (smcr == 3) + if (dat == 3) *val2 = 2; return IIO_VAL_FRACTIONAL_LOG2; } - } return -EINVAL; } @@ -403,6 +401,7 @@ static int stm32_counter_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct stm32_timer_trigger *priv = iio_priv(indio_dev); + u32 dat; switch (mask) { case IIO_CHAN_INFO_RAW: @@ -411,6 +410,22 @@ static int stm32_counter_write_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_SCALE: /* fixed scale */ return -EINVAL; + + case IIO_CHAN_INFO_ENABLE: + if (val) { + regmap_read(priv->regmap, TIM_CR1, &dat); + if (!(dat & TIM_CR1_CEN)) + clk_enable(priv->clk); + regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN, + TIM_CR1_CEN); + } else { + regmap_read(priv->regmap, TIM_CR1, &dat); + regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN, + 0); + if (dat & TIM_CR1_CEN) + clk_disable(priv->clk); + } + return 0; } return -EINVAL; @@ -506,9 +521,19 @@ static int stm32_set_enable_mode(struct iio_dev *indio_dev, { struct stm32_timer_trigger *priv = iio_priv(indio_dev); int sms = stm32_enable_mode2sms(mode); + u32 val; if (sms < 0) return sms; + /* + * Triggered mode sets CEN bit automatically by hardware. So, first + * enable counter clock, so it can use it. Keeps it in sync with CEN. + */ + if (sms == 6) { + regmap_read(priv->regmap, TIM_CR1, &val); + if (!(val & TIM_CR1_CEN)) + clk_enable(priv->clk); + } regmap_update_bits(priv->regmap, TIM_SMCR, TIM_SMCR_SMS, sms); @@ -681,7 +706,9 @@ static const struct iio_chan_spec_ext_info stm32_trigger_count_info[] = { static const struct iio_chan_spec stm32_trigger_channel = { .type = IIO_COUNT, .channel = 0, - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE), + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_ENABLE) | + BIT(IIO_CHAN_INFO_SCALE), .ext_info = stm32_trigger_count_info, .indexed = 1 }; -- cgit From ff3aa88a4d61468baece3fc2bb54e2a3bea6360f Mon Sep 17 00:00:00 2001 From: Stefan Brüns Date: Wed, 26 Jul 2017 23:32:06 +0200 Subject: iio: adc: ina219: Avoid underflow for sleeping time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Proper support for the INA219 lowered the minimum sampling period from 2*140us to 2*84us. Subtracting 200us later leads to an underflow and an almost infinite udelay later. Using a signed int for the sampling period provides sufficient range (at most 2*8640*1024us), but catches the underflow when comparing with buffer_us. Fixes: 18edac2e22f4 ("iio: adc: Fix integration time/averaging for INA219/220") Signed-off-by: Stefan Brüns Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ina2xx-adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c index 232c0b80d658..c3f86138cb55 100644 --- a/drivers/iio/adc/ina2xx-adc.c +++ b/drivers/iio/adc/ina2xx-adc.c @@ -644,7 +644,7 @@ static int ina2xx_capture_thread(void *data) { struct iio_dev *indio_dev = data; struct ina2xx_chip_info *chip = iio_priv(indio_dev); - unsigned int sampling_us = SAMPLING_PERIOD(chip); + int sampling_us = SAMPLING_PERIOD(chip); int buffer_us; /* -- cgit From 50dbe1f4b453b2860ef0e3d48054b9fd24d5ae97 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Mon, 24 Jul 2017 18:10:38 +0200 Subject: iio: adc: stm32: fix common clock rate ADC clock input is provided to internal prescaler (that decreases its frequency). It's then used as reference clock for conversions. - Fix common clock rate used then by stm32-adc sub-devices. Take common prescaler into account. Currently, rate is used to set "boost" mode. It may unnecessarily be set. This impacts power consumption. - Fix ADC max clock rate on STM32H7 (fADC from datasheet). Currently, prescaler may be set too low. This can result in ADC reference clock used for conversion to exceed max allowed clock frequency. Fixes: 95e339b6e85d ("iio: adc: stm32: add support for STM32H7") Signed-off-by: Fabrice Gasnier Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc-core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index e09233b03c05..609676384f5e 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -64,7 +64,7 @@ #define STM32H7_CKMODE_MASK GENMASK(17, 16) /* STM32 H7 maximum analog clock rate (from datasheet) */ -#define STM32H7_ADC_MAX_CLK_RATE 72000000 +#define STM32H7_ADC_MAX_CLK_RATE 36000000 /** * stm32_adc_common_regs - stm32 common registers, compatible dependent data @@ -148,14 +148,14 @@ static int stm32f4_adc_clk_sel(struct platform_device *pdev, return -EINVAL; } - priv->common.rate = rate; + priv->common.rate = rate / stm32f4_pclk_div[i]; val = readl_relaxed(priv->common.base + STM32F4_ADC_CCR); val &= ~STM32F4_ADC_ADCPRE_MASK; val |= i << STM32F4_ADC_ADCPRE_SHIFT; writel_relaxed(val, priv->common.base + STM32F4_ADC_CCR); dev_dbg(&pdev->dev, "Using analog clock source at %ld kHz\n", - rate / (stm32f4_pclk_div[i] * 1000)); + priv->common.rate / 1000); return 0; } @@ -250,7 +250,7 @@ static int stm32h7_adc_clk_sel(struct platform_device *pdev, out: /* rate used later by each ADC instance to control BOOST mode */ - priv->common.rate = rate; + priv->common.rate = rate / div; /* Set common clock mode and prescaler */ val = readl_relaxed(priv->common.base + STM32H7_ADC_CCR); @@ -260,7 +260,7 @@ out: writel_relaxed(val, priv->common.base + STM32H7_ADC_CCR); dev_dbg(&pdev->dev, "Using %s clock/%d source at %ld kHz\n", - ckmode ? "bus" : "adc", div, rate / (div * 1000)); + ckmode ? "bus" : "adc", div, priv->common.rate / 1000); return 0; } -- cgit From 3f25bb4b7f7718d391321608f947840a79934568 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 21 Jun 2017 10:35:09 +0300 Subject: iwlwifi: mvm: fix TCP CSUM offload with WEP and A000 series When we enabled TCP checksum offload, we need to tell the firmware where the IP header starts. If we have an IV, then we need to adapt that value since the IV is placed before the SNAP header. This is true only for cases where the driver adds the IV, not the WEP case in which the IV is added by the firmware itself. On A000 devices series, the IV is always added by the device. Fix this. Fixes: 5e6a98dc4863 ("iwlwifi: mvm: enable TCP/UDP checksum support for 9000 family") Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 60360ed73f26..e5d3eba2e82a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -185,8 +185,14 @@ static u16 iwl_mvm_tx_csum(struct iwl_mvm *mvm, struct sk_buff *skb, else udp_hdr(skb)->check = 0; - /* mac header len should include IV, size is in words */ - if (info->control.hw_key) + /* + * mac header len should include IV, size is in words unless + * the IV is added by the firmware like in WEP. + * In new Tx API, the IV is always added by the firmware. + */ + if (!iwl_mvm_has_new_tx_api(mvm) && info->control.hw_key && + info->control.hw_key->cipher != WLAN_CIPHER_SUITE_WEP40 && + info->control.hw_key->cipher != WLAN_CIPHER_SUITE_WEP104) mh_len += info->control.hw_key->iv_len; mh_len /= 2; offload_assist |= mh_len << TX_CMD_OFFLD_MH_SIZE; -- cgit From 58877d7428b0747134cb65096d51dfabdc62000d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 29 Jun 2017 16:18:21 +0300 Subject: iwlwifi: add TLV for MLME offload firmware capability The firmware now adds a new DWORD for the MLME offload's capability even on firmware versions that don't support it. Add the TLV bit to avoid getting the print: capa flags index 3 larger than supported by driver. This fixes the bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196195 Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/file.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index 0fa8c473f1e2..c73a6438ce8f 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -328,6 +328,7 @@ typedef unsigned int __bitwise iwl_ucode_tlv_capa_t; * @IWL_UCODE_TLV_CAPA_TX_POWER_ACK: reduced TX power API has larger * command size (command version 4) that supports toggling ACK TX * power reduction. + * @IWL_UCODE_TLV_CAPA_MLME_OFFLOAD: supports MLME offload * * @NUM_IWL_UCODE_TLV_CAPA: number of bits used */ @@ -373,6 +374,7 @@ enum iwl_ucode_tlv_capa { IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG = (__force iwl_ucode_tlv_capa_t)80, IWL_UCODE_TLV_CAPA_LQM_SUPPORT = (__force iwl_ucode_tlv_capa_t)81, IWL_UCODE_TLV_CAPA_TX_POWER_ACK = (__force iwl_ucode_tlv_capa_t)84, + IWL_UCODE_TLV_CAPA_MLME_OFFLOAD = (__force iwl_ucode_tlv_capa_t)96, NUM_IWL_UCODE_TLV_CAPA #ifdef __CHECKER__ -- cgit From 92b0f7b26b313b23cc9bef0bd406607f4566c0c0 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 3 Jul 2017 16:25:33 +0300 Subject: iwlwifi: split the regulatory rules when the bandwidth flags require it When we create a regulatory domain out of an MCC notification, we need to make sure that all the channels in the rule have the exact same properties. The current code mixes channel 36 and 40 although 36 can be a control channel with HT40+ (36, 40) whereas 40 can't be a control channel with HT40+ since (40, 44) is invalid. Because of that, cfg80211 would allow to connect in 40MHz to APs that are configured to channel 40 HT40+ and that made our firmware assert. Fix this by checking the bandwidth flags before taking the decision if the rule should be split. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=195299 partly. Fixes: af45a9003f1f ("iwlwifi: create regdomain from mcc_update_cmd response") Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 5c08f4d40f6a..3ee6767392b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -785,7 +785,8 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int num_of_ch, __le32 *channels, u16 fw_mcc) { int ch_idx; - u16 ch_flags, prev_ch_flags = 0; + u16 ch_flags; + u32 reg_rule_flags, prev_reg_rule_flags = 0; const u8 *nvm_chan = cfg->ext_nvm ? iwl_ext_nvm_channels : iwl_nvm_channels; struct ieee80211_regdomain *regd; @@ -834,8 +835,11 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, continue; } + reg_rule_flags = iwl_nvm_get_regdom_bw_flags(nvm_chan, ch_idx, + ch_flags, cfg); + /* we can't continue the same rule */ - if (ch_idx == 0 || prev_ch_flags != ch_flags || + if (ch_idx == 0 || prev_reg_rule_flags != reg_rule_flags || center_freq - prev_center_freq > 20) { valid_rules++; new_rule = true; @@ -854,18 +858,17 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, rule->power_rule.max_eirp = DBM_TO_MBM(IWL_DEFAULT_MAX_TX_POWER); - rule->flags = iwl_nvm_get_regdom_bw_flags(nvm_chan, ch_idx, - ch_flags, cfg); + rule->flags = reg_rule_flags; /* rely on auto-calculation to merge BW of contiguous chans */ rule->flags |= NL80211_RRF_AUTO_BW; rule->freq_range.max_bandwidth_khz = 0; - prev_ch_flags = ch_flags; prev_center_freq = center_freq; + prev_reg_rule_flags = reg_rule_flags; IWL_DEBUG_DEV(dev, IWL_DL_LAR, - "Ch. %d [%sGHz] %s%s%s%s%s%s%s%s%s(0x%02x): Ad-Hoc %ssupported\n", + "Ch. %d [%sGHz] %s%s%s%s%s%s%s%s%s(0x%02x) reg_flags 0x%x: %s\n", center_freq, band == NL80211_BAND_5GHZ ? "5.2" : "2.4", CHECK_AND_PRINT_I(VALID), @@ -877,10 +880,10 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, CHECK_AND_PRINT_I(160MHZ), CHECK_AND_PRINT_I(INDOOR_ONLY), CHECK_AND_PRINT_I(GO_CONCURRENT), - ch_flags, + ch_flags, reg_rule_flags, ((ch_flags & NVM_CHANNEL_ACTIVE) && !(ch_flags & NVM_CHANNEL_RADAR)) - ? "" : "not "); + ? "Ad-Hoc" : ""); } regd->n_reg_rules = valid_rules; -- cgit From 9465c3f8ba67cff697c0529de5a03cc5e1509d41 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Thu, 6 Jul 2017 05:07:33 +0300 Subject: iwlwifi: mvm: set A-MPDU bit upon empty BA notification from FW The bit was set only if there was at least one reclaimed frame in an aggregation. It's important to set it also in the case that the whole A-MPDU was lost, otherwise rate scaling statistics will not be updated correctly. Thus, set it always in ba notification handler. This fixes a throughput degradation of about 20% in certain scenarios with multiple streams on 11ac. Signed-off-by: Gregory Greenman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index e5d3eba2e82a..5fcc9dd6be56 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1821,6 +1821,8 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) struct iwl_mvm_tid_data *tid_data; struct iwl_mvm_sta *mvmsta; + ba_info.flags = IEEE80211_TX_STAT_AMPDU; + if (iwl_mvm_has_new_tx_api(mvm)) { struct iwl_mvm_compressed_ba_notif *ba_res = (void *)pkt->data; -- cgit From 87f55616f81bf6c82f0e94cf4661151399d2a7b6 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Thu, 6 Jul 2017 05:27:55 +0300 Subject: iwlwifi: mvm: rs: fix TLC statistics collection Statistics should be collected according to the actual rate a frame/aggregation was transmitted and not according to the initial rate from the last LQ command (these rates are different if the frames were retransmitted at a lower rate from the rate scale table). This is needed to remove throughput degradation. Signed-off-by: Gregory Greenman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 65beca3a457a..8999a1199d60 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -1291,7 +1291,7 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, * first index into rate scale table. */ if (info->flags & IEEE80211_TX_STAT_AMPDU) { - rs_collect_tpc_data(mvm, lq_sta, curr_tbl, lq_rate.index, + rs_collect_tpc_data(mvm, lq_sta, curr_tbl, tx_resp_rate.index, info->status.ampdu_len, info->status.ampdu_ack_len, reduced_txp); @@ -1312,7 +1312,7 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, if (info->status.ampdu_ack_len == 0) info->status.ampdu_len = 1; - rs_collect_tlc_data(mvm, lq_sta, curr_tbl, lq_rate.index, + rs_collect_tlc_data(mvm, lq_sta, curr_tbl, tx_resp_rate.index, info->status.ampdu_len, info->status.ampdu_ack_len); @@ -1348,11 +1348,11 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, continue; rs_collect_tpc_data(mvm, lq_sta, tmp_tbl, - lq_rate.index, 1, + tx_resp_rate.index, 1, i < retries ? 0 : legacy_success, reduced_txp); rs_collect_tlc_data(mvm, lq_sta, tmp_tbl, - lq_rate.index, 1, + tx_resp_rate.index, 1, i < retries ? 0 : legacy_success); } -- cgit From e9fb92e13d5e743a6ff13d3206c0f9e5e8cbc1f4 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Sun, 9 Jul 2017 15:51:44 +0300 Subject: iwlwifi: fix fw_pre_next_step to apply also for C step C step NICs should use the latest FW (currently B step). Correct the condition to make C step NICs advanced its default FW name to the latest one. Also rename _next_ to b_or_c to avoid confusion. Fixes: 5da083d1922c ("iwlwifi: add support for 9000 HW B-step NICs") Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/9000.c | 14 +++++++------- drivers/net/wireless/intel/iwlwifi/iwl-config.h | 8 ++++---- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 5 +++-- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c index b4ecd1fe1374..97208ce19f92 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c @@ -154,7 +154,7 @@ static const struct iwl_tt_params iwl9000_tt_params = { const struct iwl_cfg iwl9160_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9160", .fw_name_pre = IWL9260A_FW_PRE, - .fw_name_pre_next_step = IWL9260B_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, IWL_DEVICE_9000, .ht_params = &iwl9000_ht_params, .nvm_ver = IWL9000_NVM_VERSION, @@ -165,7 +165,7 @@ const struct iwl_cfg iwl9160_2ac_cfg = { const struct iwl_cfg iwl9260_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9260", .fw_name_pre = IWL9260A_FW_PRE, - .fw_name_pre_next_step = IWL9260B_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, IWL_DEVICE_9000, .ht_params = &iwl9000_ht_params, .nvm_ver = IWL9000_NVM_VERSION, @@ -176,7 +176,7 @@ const struct iwl_cfg iwl9260_2ac_cfg = { const struct iwl_cfg iwl9270_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9270", .fw_name_pre = IWL9260A_FW_PRE, - .fw_name_pre_next_step = IWL9260B_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, IWL_DEVICE_9000, .ht_params = &iwl9000_ht_params, .nvm_ver = IWL9000_NVM_VERSION, @@ -186,8 +186,8 @@ const struct iwl_cfg iwl9270_2ac_cfg = { const struct iwl_cfg iwl9460_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9460", - .fw_name_pre = IWL9000_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9260A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, IWL_DEVICE_9000, .ht_params = &iwl9000_ht_params, .nvm_ver = IWL9000_NVM_VERSION, @@ -198,8 +198,8 @@ const struct iwl_cfg iwl9460_2ac_cfg = { const struct iwl_cfg iwl9560_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9560", - .fw_name_pre = IWL9000_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9260A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, IWL_DEVICE_9000, .ht_params = &iwl9000_ht_params, .nvm_ver = IWL9000_NVM_VERSION, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index c52623cb7c2a..d19c74827fbb 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -276,10 +276,10 @@ struct iwl_pwr_tx_backoff { * @fw_name_pre: Firmware filename prefix. The api version and extension * (.ucode) will be added to filename before loading from disk. The * filename is constructed as fw_name_pre.ucode. - * @fw_name_pre_next_step: same as @fw_name_pre, only for next step + * @fw_name_pre_b_or_c_step: same as @fw_name_pre, only for b or c steps * (if supported) - * @fw_name_pre_rf_next_step: same as @fw_name_pre_next_step, only for rf next - * step. Supported only in integrated solutions. + * @fw_name_pre_rf_next_step: same as @fw_name_pre_b_or_c_step, only for rf + * next step. Supported only in integrated solutions. * @ucode_api_max: Highest version of uCode API supported by driver. * @ucode_api_min: Lowest version of uCode API supported by driver. * @max_inst_size: The maximal length of the fw inst section @@ -330,7 +330,7 @@ struct iwl_cfg { /* params specific to an individual device within a device family */ const char *name; const char *fw_name_pre; - const char *fw_name_pre_next_step; + const char *fw_name_pre_b_or_c_step; const char *fw_name_pre_rf_next_step; /* params not likely to change within a device family */ const struct iwl_base_params *base_params; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 6fdb5921e17f..4e0f86fe0a6f 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -216,8 +216,9 @@ static int iwl_request_firmware(struct iwl_drv *drv, bool first) const char *fw_pre_name; if (drv->trans->cfg->device_family == IWL_DEVICE_FAMILY_9000 && - CSR_HW_REV_STEP(drv->trans->hw_rev) == SILICON_B_STEP) - fw_pre_name = cfg->fw_name_pre_next_step; + (CSR_HW_REV_STEP(drv->trans->hw_rev) == SILICON_B_STEP || + CSR_HW_REV_STEP(drv->trans->hw_rev) == SILICON_C_STEP)) + fw_pre_name = cfg->fw_name_pre_b_or_c_step; else if (drv->trans->cfg->integrated && CSR_HW_RFID_STEP(drv->trans->hw_rf_id) == SILICON_B_STEP && cfg->fw_name_pre_rf_next_step) -- cgit From 8addabf8e6e299f790038fdc92ddceaaf76adab8 Mon Sep 17 00:00:00 2001 From: Naftali Goldstein Date: Thu, 27 Jul 2017 04:53:55 +0300 Subject: iwlwifi: mvm: set the RTS_MIMO_PROT bit in flag mask when sending sta to fw Set the STA_FLG_RTS_MIMO_PROT bit in station_flags_msk of the add sta command, so that when smps mode changes, the FW will know about it. In particular, in AP mode, clients are added upon receival of an auth request, at which point there's no knowledge of the client's smps mode. When the assoc request arrives, the add_sta command is resent to modify the station parameters. At this point the driver knows the smps mode, but since the corresponding bit in the mask is not set, the fw doesn't update this field so there's no rts protection for mimo. Fixes: 5bc5aaad407c ("iwlwifi: mvm: set up initial SMPS/NSS station info") Signed-off-by: Naftali Goldstein Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index ab66b4394dfc..dcaef7c043ac 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -121,7 +121,8 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta, .mac_id_n_color = cpu_to_le32(mvm_sta->mac_id_n_color), .add_modify = update ? 1 : 0, .station_flags_msk = cpu_to_le32(STA_FLG_FAT_EN_MSK | - STA_FLG_MIMO_EN_MSK), + STA_FLG_MIMO_EN_MSK | + STA_FLG_RTS_MIMO_PROT), .tid_disable_tx = cpu_to_le16(mvm_sta->tid_disable_agg), }; int ret; -- cgit From 558f479f687aca6a336e13309424a7c3afd32721 Mon Sep 17 00:00:00 2001 From: Tzipi Peres Date: Sun, 30 Jul 2017 13:29:30 +0300 Subject: iwlwifi: add the new 9000 series PCI IDs Add two PCI IDs for the 9160 series. Add five PCI IDs for the 9260 series. Add one PCI IDs for the 9270 series. Add seven PCI IDs for the 9460 series. Add five PCI IDs for the 9560 series. Signed-off-by: Tzipi Peres Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index f16c1bb9bf94..84f4ba01e14f 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -510,9 +510,17 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 9000 Series */ {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, + {IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)}, + {IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0014, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x4010, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0214, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1610, iwl9270_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9460_2ac_cfg)}, @@ -527,10 +535,22 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0260, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0064, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x00A4, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x02A4, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x00A0, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x02A0, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x4030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0230, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0234, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0238, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x023C, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg)}, -- cgit From f3fd2afed8eee91620d05b69ab94c14793c849d7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 28 Jul 2017 15:10:48 -0700 Subject: ntb: transport shouldn't disable link due to bogus values in SPADs It seems that under certain scenarios the SPAD can have bogus values caused by an agent (i.e. BIOS or other software) that is not the kernel driver, and that causes memory window setup failure. This should not cause the link to be disabled because if we do that, the driver will never recover again. We have verified in testing that this issue happens and prevents proper link recovery. Signed-off-by: Dave Jiang Acked-by: Allen Hubbe Signed-off-by: Jon Mason Fixes: 84f766855f61 ("ntb: stop link work when we do not have memory") --- drivers/ntb/ntb_transport.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index b29558ddfe95..f58d8e305323 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -924,10 +924,8 @@ out1: ntb_free_mw(nt, i); /* if there's an actual failure, we should just bail */ - if (rc < 0) { - ntb_link_disable(ndev); + if (rc < 0) return; - } out: if (ntb_link_is_up(ndev, NULL, NULL) == 1) -- cgit From 0eb46345364d7318b11068c46e8a68d5dc10f65e Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Tue, 25 Jul 2017 14:57:42 -0600 Subject: ntb: ntb_test: ensure the link is up before trying to configure the mws After the link tests, there is a race on one side of the test for the link coming up. It's possible, in some cases, for the test script to write to the 'peer_trans' files before the link has come up. To fix this, we simply use the link event file to ensure both sides see the link as up before continuning. Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Signed-off-by: Jon Mason Fixes: a9c59ef77458 ("ntb_test: Add a selftest script for the NTB subsystem") --- tools/testing/selftests/ntb/ntb_test.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index 1c12b5855e4f..5fc7ad359e21 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -333,6 +333,10 @@ function ntb_tool_tests() link_test $LOCAL_TOOL $REMOTE_TOOL link_test $REMOTE_TOOL $LOCAL_TOOL + #Ensure the link is up on both sides before continuing + write_file Y $LOCAL_TOOL/link_event + write_file Y $REMOTE_TOOL/link_event + for PEER_TRANS in $(ls $LOCAL_TOOL/peer_trans*); do PT=$(basename $PEER_TRANS) write_file $MW_SIZE $LOCAL_TOOL/$PT -- cgit From d1ce263feb40e6b3208f3e1ebec6dbe86df6f522 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 12 Jul 2017 15:25:09 +0200 Subject: irqchip/gic-v3-its: Remove ACPICA version check for ACPI NUMA The version check was added due to dependency to a618c7f89a02 ACPICA: Add support for new SRAT subtable Now, that this code is in the kernel, remove the check. This is esp. useful to enable backports. Signed-off-by: Robert Richter Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index fed99c55e2f4..3bfbf8d96a0e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1835,7 +1835,7 @@ static int __init its_of_probe(struct device_node *node) #define ACPI_GICV3_ITS_MEM_SIZE (SZ_128K) -#if defined(CONFIG_ACPI_NUMA) && (ACPI_CA_VERSION >= 0x20170531) +#ifdef CONFIG_ACPI_NUMA struct its_srat_map { /* numa node id */ u32 numa_node; -- cgit From 39a06b67c2c1256bcf2361a1f67d2529f70ab206 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Jul 2017 18:37:55 +0100 Subject: irqchip/gic: Ensure we have an ISB between ack and ->handle_irq Devices that expose their interrupt status registers via system registers (e.g. Statistical profiling, CPU PMU, DynamIQ PMU, arch timer, vgic (although unused by Linux), ...) rely on a context synchronising operation on the CPU to ensure that the updated status register is visible to the CPU when handling the interrupt. This usually happens as a result of taking the IRQ exception in the first place, but there are two race scenarios where this isn't the case. For example, let's say we have two peripherals (X and Y), where Y uses a system register for its interrupt status. Case 1: 1. CPU takes an IRQ exception as a result of X raising an interrupt 2. Y then raises its interrupt line, but the update to its system register is not yet visible to the CPU 3. The GIC decides to expose Y's interrupt number first in the Ack register 4. The CPU runs the IRQ handler for Y, but the status register is stale Case 2: 1. CPU takes an IRQ exception as a result of X raising an interrupt 2. CPU reads the interrupt number for X from the Ack register and runs its IRQ handler 3. Y raises its interrupt line and the Ack register is updated, but again, the update to its system register is not yet visible to the CPU. 4. Since the GIC drivers poll the Ack register, we read Y's interrupt number and run its handler without a context synchronisation operation, therefore seeing the stale register value. In either case, we run the risk of missing an IRQ. This patch solves the problem by ensuring that we execute an ISB in the GIC drivers prior to invoking the interrupt handler. This is already the case for GICv3 and EOIMode 1 (the usual case for the host). Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3.c | 2 ++ drivers/irqchip/irq-gic.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 5ba64a7584a3..984c3ecfd22c 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -353,6 +353,8 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs if (static_key_true(&supports_deactivate)) gic_write_eoir(irqnr); + else + isb(); err = handle_domain_irq(gic_data.domain, irqnr, regs); if (err) { diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 940c16278758..d3e7c43718b8 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -361,6 +361,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) if (likely(irqnr > 15 && irqnr < 1020)) { if (static_key_true(&supports_deactivate)) writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI); + isb(); handle_domain_irq(gic->domain, irqnr, regs); continue; } @@ -401,10 +402,12 @@ static void gic_handle_cascade_irq(struct irq_desc *desc) goto out; cascade_irq = irq_find_mapping(chip_data->domain, gic_irq); - if (unlikely(gic_irq < 32 || gic_irq > 1020)) + if (unlikely(gic_irq < 32 || gic_irq > 1020)) { handle_bad_irq(desc); - else + } else { + isb(); generic_handle_irq(cascade_irq); + } out: chained_irq_exit(chip, desc); -- cgit From 4bb0f0e73c8c30917d169c4a0f1ac083690c545b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 1 Aug 2017 12:01:52 -0400 Subject: tracing: Call clear_boot_tracer() at lateinit_sync The clear_boot_tracer function is used to reset the default_bootup_tracer string to prevent it from being accessed after boot, as it originally points to init data. But since clear_boot_tracer() is called via the init_lateinit() call, it races with the initcall for registering the hwlat tracer. If someone adds "ftrace=hwlat" to the kernel command line, depending on how the linker sets up the text, the saved command line may be cleared, and the hwlat tracer never is initialized. Simply have the clear_boot_tracer() be called by initcall_lateinit_sync() as that's for tasks to be called after lateinit. Link: https://bugzilla.kernel.org/show_bug.cgi?id=196551 Cc: stable@vger.kernel.org Fixes: e7c15cd8a ("tracing: Added hardware latency tracer") Reported-by: Zamir SUN Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 42b9355033d4..784fb43b2abe 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8407,4 +8407,4 @@ __init static int clear_boot_tracer(void) } fs_initcall(tracer_init_tracefs); -late_initcall(clear_boot_tracer); +late_initcall_sync(clear_boot_tracer); -- cgit From 147d88e0b5eb90191bc5c12ca0a3c410b75a13d2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 1 Aug 2017 14:02:01 +0300 Subject: tracing: Missing error code in tracer_alloc_buffers() If ring_buffer_alloc() or one of the next couple function calls fail then we should return -ENOMEM but the current code returns success. Link: http://lkml.kernel.org/r/20170801110201.ajdkct7vwzixahvx@mwanda Cc: Sebastian Andrzej Siewior Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: b32614c03413 ('tracing/rb: Convert to hotplug state machine') Signed-off-by: Dan Carpenter Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 784fb43b2abe..d815fc317e9d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8293,6 +8293,7 @@ __init static int tracer_alloc_buffers(void) if (ret < 0) goto out_free_cpumask; /* Used for event triggers */ + ret = -ENOMEM; temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); if (!temp_buffer) goto out_rm_hp_state; -- cgit From a7e52ad7ed82e21273eccff93d1477a7b313aabb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 2 Aug 2017 14:20:54 -0400 Subject: ring-buffer: Have ring_buffer_alloc_read_page() return error on offline CPU Chunyu Hu reported: "per_cpu trace directories and files are created for all possible cpus, but only the cpus which have ever been on-lined have their own per cpu ring buffer (allocated by cpuhp threads). While trace_buffers_open, the open handler for trace file 'trace_pipe_raw' is always trying to access field of ring_buffer_per_cpu, and would panic with the NULL pointer. Align the behavior of trace_pipe_raw with trace_pipe, that returns -NODEV when openning it if that cpu does not have trace ring buffer. Reproduce: cat /sys/kernel/debug/tracing/per_cpu/cpu31/trace_pipe_raw (cpu31 is never on-lined, this is a 16 cores x86_64 box) Tested with: 1) boot with maxcpus=14, read trace_pipe_raw of cpu15. Got -NODEV. 2) oneline cpu15, read trace_pipe_raw of cpu15. Get the raw trace data. Call trace: [ 5760.950995] RIP: 0010:ring_buffer_alloc_read_page+0x32/0xe0 [ 5760.961678] tracing_buffers_read+0x1f6/0x230 [ 5760.962695] __vfs_read+0x37/0x160 [ 5760.963498] ? __vfs_read+0x5/0x160 [ 5760.964339] ? security_file_permission+0x9d/0xc0 [ 5760.965451] ? __vfs_read+0x5/0x160 [ 5760.966280] vfs_read+0x8c/0x130 [ 5760.967070] SyS_read+0x55/0xc0 [ 5760.967779] do_syscall_64+0x67/0x150 [ 5760.968687] entry_SYSCALL64_slow_path+0x25/0x25" This was introduced by the addition of the feature to reuse reader pages instead of re-allocating them. The problem is that the allocation of a reader page (which is per cpu) does not check if the cpu is online and set up for the ring buffer. Link: http://lkml.kernel.org/r/1500880866-1177-1-git-send-email-chuhu@redhat.com Cc: stable@vger.kernel.org Fixes: 73a757e63114 ("ring-buffer: Return reader page back into existing ring buffer") Reported-by: Chunyu Hu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 14 +++++++++----- kernel/trace/ring_buffer_benchmark.c | 2 +- kernel/trace/trace.c | 16 +++++++++++----- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 529cc50d7243..81279c6602ff 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4386,15 +4386,19 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); * the page that was allocated, with the read page of the buffer. * * Returns: - * The page allocated, or NULL on error. + * The page allocated, or ERR_PTR */ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) { - struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; + struct ring_buffer_per_cpu *cpu_buffer; struct buffer_data_page *bpage = NULL; unsigned long flags; struct page *page; + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return ERR_PTR(-ENODEV); + + cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); arch_spin_lock(&cpu_buffer->lock); @@ -4412,7 +4416,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY, 0); if (!page) - return NULL; + return ERR_PTR(-ENOMEM); bpage = page_address(page); @@ -4467,8 +4471,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); * * for example: * rpage = ring_buffer_alloc_read_page(buffer, cpu); - * if (!rpage) - * return error; + * if (IS_ERR(rpage)) + * return PTR_ERR(rpage); * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); * if (ret >= 0) * process_page(rpage, ret); diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 9fbcaf567886..68ee79afe31c 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -113,7 +113,7 @@ static enum event_status read_page(int cpu) int i; bpage = ring_buffer_alloc_read_page(buffer, cpu); - if (!bpage) + if (IS_ERR(bpage)) return EVENT_DROPPED; ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d815fc317e9d..44004d8aa3b3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6598,7 +6598,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, { struct ftrace_buffer_info *info = filp->private_data; struct trace_iterator *iter = &info->iter; - ssize_t ret; + ssize_t ret = 0; ssize_t size; if (!count) @@ -6612,10 +6612,15 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (!info->spare) { info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, iter->cpu_file); - info->spare_cpu = iter->cpu_file; + if (IS_ERR(info->spare)) { + ret = PTR_ERR(info->spare); + info->spare = NULL; + } else { + info->spare_cpu = iter->cpu_file; + } } if (!info->spare) - return -ENOMEM; + return ret; /* Do we have previous read data to read? */ if (info->read < PAGE_SIZE) @@ -6790,8 +6795,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, ref->ref = 1; ref->buffer = iter->trace_buffer->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); - if (!ref->page) { - ret = -ENOMEM; + if (IS_ERR(ref->page)) { + ret = PTR_ERR(ref->page); + ref->page = NULL; kfree(ref); break; } -- cgit From f862b31514bad66e48d9d4ff6036ee051cf36a6f Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 26 Jun 2017 14:47:25 +0300 Subject: ARC: [plat-axs10x]: prepare dts files for enabling PAE40 on axs103 Enable 64bit adressing, where it needed, to make possible enabling PAE40 on axs103. This patch doesn't affect on any functionality. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/axc001.dtsi | 20 +++++++++----------- arch/arc/boot/dts/axc003.dtsi | 21 ++++++++++----------- arch/arc/boot/dts/axc003_idu.dtsi | 21 ++++++++++----------- arch/arc/boot/dts/axs10x_mb.dtsi | 2 +- 4 files changed, 30 insertions(+), 34 deletions(-) diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index 53ce226f77a5..a380ffa1a458 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -15,15 +15,15 @@ / { compatible = "snps,arc"; - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; cpu_card { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; - ranges = <0x00000000 0xf0000000 0x10000000>; + ranges = <0x00000000 0x0 0xf0000000 0x10000000>; core_clk: core_clk { #clock-cells = <0>; @@ -91,23 +91,21 @@ mb_intc: dw-apb-ictl@0xe0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; - reg = < 0xe0012000 0x200 >; + reg = < 0x0 0xe0012000 0x0 0x200 >; interrupt-controller; interrupt-parent = <&core_intc>; interrupts = < 7 >; }; memory { - #address-cells = <1>; - #size-cells = <1>; - ranges = <0x00000000 0x80000000 0x20000000>; device_type = "memory"; - reg = <0x80000000 0x1b000000>; /* (512 - 32) MiB */ + /* CONFIG_KERNEL_RAM_BASE_ADDRESS needs to match low mem start */ + reg = <0x0 0x80000000 0x0 0x1b000000>; /* (512 - 32) MiB */ }; reserved-memory { - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; ranges; /* * We just move frame buffer area to the very end of @@ -118,7 +116,7 @@ */ frame_buffer: frame_buffer@9e000000 { compatible = "shared-dma-pool"; - reg = <0x9e000000 0x2000000>; + reg = <0x0 0x9e000000 0x0 0x2000000>; no-map; }; }; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 14df46f141bf..cc9239ef8d08 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -14,15 +14,15 @@ / { compatible = "snps,arc"; - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; cpu_card { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; - ranges = <0x00000000 0xf0000000 0x10000000>; + ranges = <0x00000000 0x0 0xf0000000 0x10000000>; core_clk: core_clk { #clock-cells = <0>; @@ -94,30 +94,29 @@ mb_intc: dw-apb-ictl@0xe0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; - reg = < 0xe0012000 0x200 >; + reg = < 0x0 0xe0012000 0x0 0x200 >; interrupt-controller; interrupt-parent = <&core_intc>; interrupts = < 24 >; }; memory { - #address-cells = <1>; - #size-cells = <1>; - ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x80000000 0x20000000>; /* 512MiB */ + /* CONFIG_KERNEL_RAM_BASE_ADDRESS needs to match low mem start */ + reg = <0x0 0x80000000 0x0 0x20000000 /* 512 MiB low mem */ + 0x1 0xc0000000 0x0 0x40000000>; /* 1 GiB highmem */ }; reserved-memory { - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; ranges; /* * Move frame buffer out of IOC aperture (0x8z-0xAz). */ frame_buffer: frame_buffer@be000000 { compatible = "shared-dma-pool"; - reg = <0xbe000000 0x2000000>; + reg = <0x0 0xbe000000 0x0 0x2000000>; no-map; }; }; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 695f9fa1996b..4ebb2170abec 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -14,15 +14,15 @@ / { compatible = "snps,arc"; - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; cpu_card { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; - ranges = <0x00000000 0xf0000000 0x10000000>; + ranges = <0x00000000 0x0 0xf0000000 0x10000000>; core_clk: core_clk { #clock-cells = <0>; @@ -100,30 +100,29 @@ mb_intc: dw-apb-ictl@0xe0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; - reg = < 0xe0012000 0x200 >; + reg = < 0x0 0xe0012000 0x0 0x200 >; interrupt-controller; interrupt-parent = <&idu_intc>; interrupts = <0>; }; memory { - #address-cells = <1>; - #size-cells = <1>; - ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x80000000 0x20000000>; /* 512MiB */ + /* CONFIG_KERNEL_RAM_BASE_ADDRESS needs to match low mem start */ + reg = <0x0 0x80000000 0x0 0x20000000 /* 512 MiB low mem */ + 0x1 0xc0000000 0x0 0x40000000>; /* 1 GiB highmem */ }; reserved-memory { - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; ranges; /* * Move frame buffer out of IOC aperture (0x8z-0xAz). */ frame_buffer: frame_buffer@be000000 { compatible = "shared-dma-pool"; - reg = <0xbe000000 0x2000000>; + reg = <0x0 0xbe000000 0x0 0x2000000>; no-map; }; }; diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index 41cfb29b62c1..0ff7e07edcd4 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -13,7 +13,7 @@ compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; - ranges = <0x00000000 0xe0000000 0x10000000>; + ranges = <0x00000000 0x0 0xe0000000 0x10000000>; interrupt-parent = <&mb_intc>; i2sclk: i2sclk@100a0 { -- cgit From 33460f86ad2c982f3172a10b17948ccaf923f07f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 28 Jul 2017 16:53:50 +0530 Subject: ARC: [plat-sim] Include this platform unconditionally Essentially remove CONFIG_ARC_PLAT_SIM There is no need for any platform specific code, just the board DTS match strings which we can include unconditionally Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 1 - arch/arc/Makefile | 2 +- arch/arc/configs/haps_hs_defconfig | 1 - arch/arc/configs/haps_hs_smp_defconfig | 1 - arch/arc/configs/nsim_700_defconfig | 1 - arch/arc/configs/nsim_hs_defconfig | 1 - arch/arc/configs/nsim_hs_smp_defconfig | 1 - arch/arc/configs/nsimosci_defconfig | 1 - arch/arc/configs/nsimosci_hs_defconfig | 1 - arch/arc/configs/nsimosci_hs_smp_defconfig | 1 - arch/arc/plat-sim/Kconfig | 13 ------------- arch/arc/plat-sim/platform.c | 5 ++++- 12 files changed, 5 insertions(+), 24 deletions(-) delete mode 100644 arch/arc/plat-sim/Kconfig diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index a5459698f0ee..7db85ab00c52 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -96,7 +96,6 @@ menu "ARC Architecture Configuration" menu "ARC Platform/SoC/Board" -source "arch/arc/plat-sim/Kconfig" source "arch/arc/plat-tb10x/Kconfig" source "arch/arc/plat-axs10x/Kconfig" #New platform adds here diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 44ef35d33956..3a61cfcc38c0 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -107,7 +107,7 @@ core-y += arch/arc/ # w/o this dtb won't embed into kernel binary core-y += arch/arc/boot/dts/ -core-$(CONFIG_ARC_PLAT_SIM) += arch/arc/plat-sim/ +core-y += arch/arc/plat-sim/ core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/ core-$(CONFIG_ARC_PLAT_AXS10X) += arch/arc/plat-axs10x/ core-$(CONFIG_ARC_PLAT_EZNPS) += arch/arc/plat-eznps/ diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig index 57b3e599322f..db04ea4dd2d9 100644 --- a/arch/arc/configs/haps_hs_defconfig +++ b/arch/arc/configs/haps_hs_defconfig @@ -21,7 +21,6 @@ CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_ARC_PLAT_SIM=y CONFIG_ISA_ARCV2=y CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs" CONFIG_PREEMPT=y diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index f85985adebb2..821a2e562f3f 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -23,7 +23,6 @@ CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_ARC_PLAT_SIM=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs_idu" diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index b0066a749d4c..6dff83a238b8 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -23,7 +23,6 @@ CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_ARC_PLAT_SIM=y CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index ebe9ebb92933..31ee51b987e7 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig @@ -26,7 +26,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_ARC_PLAT_SIM=y CONFIG_ISA_ARCV2=y CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs" CONFIG_PREEMPT=y diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index 4bde43278be6..8d3b1f67cae4 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -24,7 +24,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_ARC_PLAT_SIM=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs_idu" diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index f6fb3d26557e..6168ce2ac2ef 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -23,7 +23,6 @@ CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_ARC_PLAT_SIM=y CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci" # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index b9f0fe00044b..a70bdeb2b3fd 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -23,7 +23,6 @@ CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_ARC_PLAT_SIM=y CONFIG_ISA_ARCV2=y CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs" # CONFIG_COMPACTION is not set diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index 155add7761ed..ef96406c446e 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -18,7 +18,6 @@ CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_ARC_PLAT_SIM=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y # CONFIG_ARC_TIMERS_64BIT is not set diff --git a/arch/arc/plat-sim/Kconfig b/arch/arc/plat-sim/Kconfig deleted file mode 100644 index ac6af96a82f3..000000000000 --- a/arch/arc/plat-sim/Kconfig +++ /dev/null @@ -1,13 +0,0 @@ -# -# Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# - -menuconfig ARC_PLAT_SIM - bool "ARC nSIM based simulation virtual platforms" - help - Support for nSIM based ARC simulation platforms - This includes the standalone nSIM (uart only) vs. System C OSCI VP diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c index aea87389e44b..5cda56b1a2ea 100644 --- a/arch/arc/plat-sim/platform.c +++ b/arch/arc/plat-sim/platform.c @@ -20,11 +20,14 @@ */ static const char *simulation_compat[] __initconst = { +#ifdef CONFIG_ISA_ARCOMPACT "snps,nsim", - "snps,nsim_hs", "snps,nsimosci", +#else + "snps,nsim_hs", "snps,nsimosci_hs", "snps,zebu_hs", +#endif NULL, }; -- cgit From b37174d95b0251611a80ef60abf03752e9d66d67 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 7 Jul 2017 12:25:14 +0300 Subject: ARCv2: SLC: Make sure busy bit is set properly for region ops c70c473396cb "ARCv2: SLC: Make sure busy bit is set properly on SLC flushing" fixes problem for entire SLC operation where the problem was initially caught. But given a nature of the issue it is perfectly possible for busy bit to be read incorrectly even when region operation was started. So extending initial fix for regional operation as well. Signed-off-by: Alexey Brodkin Cc: stable@vger.kernel.org #4.10 Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index a867575a758b..bebc24cb7912 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -697,6 +697,9 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1)); write_aux_reg(ARC_REG_SLC_RGN_START, paddr); + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(ARC_REG_SLC_CTRL); + while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); spin_unlock_irqrestore(&lock, flags); -- cgit From 2e332fec2f2c996f8d5447b0946ca43bb0ae4b42 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 18 Jul 2017 12:14:09 -0700 Subject: ARC: dma: implement dma_unmap_page and sg variant Signed-off-by: Vineet Gupta --- arch/arc/mm/dma.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 2a07e6ecafbd..1d0326d874e7 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -153,6 +153,19 @@ static void _dma_cache_sync(phys_addr_t paddr, size_t size, } } +/* + * arc_dma_map_page - map a portion of a page for streaming DMA + * + * Ensure that any data held in the cache is appropriately discarded + * or written back. + * + * The device owns this memory once this call has completed. The CPU + * can regain ownership by calling dma_unmap_page(). + * + * Note: while it takes struct page as arg, caller can "abuse" it to pass + * a region larger than PAGE_SIZE, provided it is physically contiguous + * and this still works correctly + */ static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -165,6 +178,24 @@ static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page, return plat_phys_to_dma(dev, paddr); } +/* + * arc_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() + * + * After this call, reads by the CPU to the buffer are guaranteed to see + * whatever the device wrote there. + * + * Note: historically this routine was not implemented for ARC + */ +static void arc_dma_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + phys_addr_t paddr = plat_dma_to_phys(dev, handle); + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + _dma_cache_sync(paddr, size, dir); +} + static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { @@ -178,6 +209,18 @@ static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg, return nents; } +static void arc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + arc_dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, + attrs); +} + static void arc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) { @@ -223,7 +266,9 @@ const struct dma_map_ops arc_dma_ops = { .free = arc_dma_free, .mmap = arc_dma_mmap, .map_page = arc_dma_map_page, + .unmap_page = arc_dma_unmap_page, .map_sg = arc_dma_map_sg, + .unmap_sg = arc_dma_unmap_sg, .sync_single_for_device = arc_dma_sync_single_for_device, .sync_single_for_cpu = arc_dma_sync_single_for_cpu, .sync_sg_for_cpu = arc_dma_sync_sg_for_cpu, -- cgit From 7d79cee2c6540ea64dd917a14e2fd63d4ac3d3c0 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Tue, 1 Aug 2017 12:58:47 +0300 Subject: ARCv2: PAE40: Explicitly set MSB counterpart of SLC region ops addresses It is necessary to explicitly set both SLC_AUX_RGN_START1 and SLC_AUX_RGN_END1 which hold MSB bits of the physical address correspondingly of region start and end otherwise SLC region operation is executed in unpredictable manner Without this patch, SLC flushes on HSDK (IOC disabled) were taking seconds. Cc: stable@vger.kernel.org #4.4+ Reported-by: Vladimir Kondratiev Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta [vgupta: PAR40 regs only written if PAE40 exist] --- arch/arc/include/asm/cache.h | 2 ++ arch/arc/mm/cache.c | 13 +++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 19ebddffb279..02fd1cece6ef 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -96,7 +96,9 @@ extern unsigned long perip_base, perip_end; #define ARC_REG_SLC_FLUSH 0x904 #define ARC_REG_SLC_INVALIDATE 0x905 #define ARC_REG_SLC_RGN_START 0x914 +#define ARC_REG_SLC_RGN_START1 0x915 #define ARC_REG_SLC_RGN_END 0x916 +#define ARC_REG_SLC_RGN_END1 0x917 /* Bit val in SLC_CONTROL */ #define SLC_CTRL_DIS 0x001 diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index bebc24cb7912..874913b3e826 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -665,6 +665,7 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) static DEFINE_SPINLOCK(lock); unsigned long flags; unsigned int ctrl; + phys_addr_t end; spin_lock_irqsave(&lock, flags); @@ -694,8 +695,16 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) * END needs to be setup before START (latter triggers the operation) * END can't be same as START, so add (l2_line_sz - 1) to sz */ - write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1)); - write_aux_reg(ARC_REG_SLC_RGN_START, paddr); + end = paddr + sz + l2_line_sz - 1; + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_SLC_RGN_END1, upper_32_bits(end)); + + write_aux_reg(ARC_REG_SLC_RGN_END, lower_32_bits(end)); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_SLC_RGN_START1, upper_32_bits(paddr)); + + write_aux_reg(ARC_REG_SLC_RGN_START, lower_32_bits(paddr)); /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ read_aux_reg(ARC_REG_SLC_CTRL); -- cgit From b5ddb6d54729d814356937572d6c9b599f10c29f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 3 Aug 2017 17:45:44 +0530 Subject: ARCv2: PAE40: set MSB even if !CONFIG_ARC_HAS_PAE40 but PAE exists in SoC PAE40 confiuration in hardware extends some of the address registers for TLB/cache ops to 2 words. So far kernel was NOT setting the higher word if feature was not enabled in software which is wrong. Those need to be set to 0 in such case. Normally this would be done in the cache flush / tlb ops, however since these registers only exist conditionally, this would have to be conditional to a flag being set on boot which is expensive/ugly - specially for the more common case of PAE exists but not in use. Optimize that by zero'ing them once at boot - nobody will write to them afterwards Cc: stable@vger.kernel.org #4.4+ Signed-off-by: Vineet Gupta --- arch/arc/include/asm/mmu.h | 2 ++ arch/arc/mm/cache.c | 34 ++++++++++++++++++++++++++++------ arch/arc/mm/tlb.c | 12 +++++++++++- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index db7319e9b506..efb79fafff1d 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -94,6 +94,8 @@ static inline int is_pae40_enabled(void) return IS_ENABLED(CONFIG_ARC_HAS_PAE40); } +extern int pae40_exist_but_not_enab(void); + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 874913b3e826..7db283b46ebd 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1123,6 +1123,13 @@ noinline void __init arc_ioc_setup(void) __dc_enable(); } +/* + * Cache related boot time checks/setups only needed on master CPU: + * - Geometry checks (kernel build and hardware agree: e.g. L1_CACHE_BYTES) + * Assume SMP only, so all cores will have same cache config. A check on + * one core suffices for all + * - IOC setup / dma callbacks only need to be done once + */ void __init arc_cache_init_master(void) { unsigned int __maybe_unused cpu = smp_processor_id(); @@ -1202,12 +1209,27 @@ void __ref arc_cache_init(void) printk(arc_cache_mumbojumbo(0, str, sizeof(str))); - /* - * Only master CPU needs to execute rest of function: - * - Assume SMP so all cores will have same cache config so - * any geomtry checks will be same for all - * - IOC setup / dma callbacks only need to be setup once - */ if (!cpu) arc_cache_init_master(); + + /* + * In PAE regime, TLB and cache maintenance ops take wider addresses + * And even if PAE is not enabled in kernel, the upper 32-bits still need + * to be zeroed to keep the ops sane. + * As an optimization for more common !PAE enabled case, zero them out + * once at init, rather than checking/setting to 0 for every runtime op + */ + if (is_isa_arcv2() && pae40_exist_but_not_enab()) { + + if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) + write_aux_reg(ARC_REG_IC_PTAG_HI, 0); + + if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) + write_aux_reg(ARC_REG_DC_PTAG_HI, 0); + + if (l2_line_sz) { + write_aux_reg(ARC_REG_SLC_RGN_END1, 0); + write_aux_reg(ARC_REG_SLC_RGN_START1, 0); + } + } } diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index d0126fdfe2d8..b181f3ee38aa 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -104,6 +104,8 @@ /* A copy of the ASID from the PID reg is kept in asid_cache */ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; +static int __read_mostly pae_exists; + /* * Utility Routine to erase a J-TLB entry * Caller needs to setup Index Reg (manually or via getIndex) @@ -784,7 +786,7 @@ void read_decode_mmu_bcr(void) mmu->u_dtlb = mmu4->u_dtlb * 4; mmu->u_itlb = mmu4->u_itlb * 4; mmu->sasid = mmu4->sasid; - mmu->pae = mmu4->pae; + pae_exists = mmu->pae = mmu4->pae; } } @@ -809,6 +811,11 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) return buf; } +int pae40_exist_but_not_enab(void) +{ + return pae_exists && !is_pae40_enabled(); +} + void arc_mmu_init(void) { char str[256]; @@ -859,6 +866,9 @@ void arc_mmu_init(void) /* swapper_pg_dir is the pgd for the kernel, used by vmalloc */ write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir); #endif + + if (pae40_exist_but_not_enab()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); } /* -- cgit From 9e01e2d56db23485a75864b6aeee8e443f024ddb Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 2 Aug 2017 12:51:29 -0700 Subject: soc: imx: gpcv2: fix regulator deferred probe If a regulator requests a deferred probe, the power domain gets initialized twice. This leads to a list double add (without list debugging the kernel hangs due to the double add later): WARNING: CPU: 0 PID: 19 at lib/list_debug.c:31 __list_add_valid+0xbc/0xc4 list_add double add: new=c1229754, prev=c12383b4, next=c1229754. Initialize the power domain after we get the regulator. Also do not print an error in case the regulator defers probing. Cc: Fabio Estevam Cc: Andrey Smirnov Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Fixes: 03aa12629fc4 ("soc: imx: Add GPCv2 power gating driver") Signed-off-by: Stefan Agner Acked-by: Andrey Smirnov Tested-by: Andrey Smirnov Signed-off-by: Shawn Guo --- drivers/soc/imx/gpcv2.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/soc/imx/gpcv2.c b/drivers/soc/imx/gpcv2.c index 3039072911a5..afc7ecc3c187 100644 --- a/drivers/soc/imx/gpcv2.c +++ b/drivers/soc/imx/gpcv2.c @@ -200,16 +200,11 @@ static int imx7_pgc_domain_probe(struct platform_device *pdev) domain->dev = &pdev->dev; - ret = pm_genpd_init(&domain->genpd, NULL, true); - if (ret) { - dev_err(domain->dev, "Failed to init power domain\n"); - return ret; - } - domain->regulator = devm_regulator_get_optional(domain->dev, "power"); if (IS_ERR(domain->regulator)) { if (PTR_ERR(domain->regulator) != -ENODEV) { - dev_err(domain->dev, "Failed to get domain's regulator\n"); + if (PTR_ERR(domain->regulator) != -EPROBE_DEFER) + dev_err(domain->dev, "Failed to get domain's regulator\n"); return PTR_ERR(domain->regulator); } } else { @@ -217,6 +212,12 @@ static int imx7_pgc_domain_probe(struct platform_device *pdev) domain->voltage, domain->voltage); } + ret = pm_genpd_init(&domain->genpd, NULL, true); + if (ret) { + dev_err(domain->dev, "Failed to init power domain\n"); + return ret; + } + ret = of_genpd_add_provider_simple(domain->dev->of_node, &domain->genpd); if (ret) { -- cgit From 8317562097acec4c9e9750eb91115687931bca35 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Wed, 2 Aug 2017 22:06:11 +0200 Subject: ARM: dts: i.MX25: add ranges to tscadc Add a ranges; line to the tscadc node. This creates a 1:1 mapping between the addresses used by tscadc and those in its child nodes (adc, tsc). Without such a mapping, the reg = ... lines in the tsc and adc nodes do not create a resource. Probing the fsl-imx25-tcq and fsl-imx25-tsadc drivers will then fail since there's no IORESOURCE_MEM. Signed-off-by: Martin Kaiser Fixes: 92f651f39b42 ("ARM: dts: imx25: Add TSC and ADC support") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx25.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi index dfcc8e00cf1c..0ade3619f3c3 100644 --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@ -297,6 +297,7 @@ #address-cells = <1>; #size-cells = <1>; status = "disabled"; + ranges; adc: adc@50030800 { compatible = "fsl,imx25-gcq"; -- cgit From aae9d563230f974f2daa7135f911f021b2bba9e6 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Fri, 14 Jul 2017 12:06:59 +0200 Subject: iwlwifi: mvm: Fix a memory leak in an error handling path in 'iwl_mvm_sar_get_wgds_table()' We should free 'wgds.pointer' here as done a few lines above in another error handling path. It was allocated within 'acpi_evaluate_object()'. Fixes: c52030a01ccc ("iwlwifi: mvm: add GEO_TX_POWER_LIMIT cmd for geographic tx power table") Signed-off-by: Christophe JAILLET Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 79e7a7a285dc..82863e9273eb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1275,8 +1275,10 @@ static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm) entry = &wifi_pkg->package.elements[idx++]; if ((entry->type != ACPI_TYPE_INTEGER) || - (entry->integer.value > U8_MAX)) - return -EINVAL; + (entry->integer.value > U8_MAX)) { + ret = -EINVAL; + goto out_free; + } mvm->geo_profiles[i].values[j] = entry->integer.value; } -- cgit From 8cd7b51ff57c74260b20c97623b0e0d420c22be8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 2 Aug 2017 10:26:58 +0000 Subject: arm64: renesas: salvator-common: avoid audio_clkout naming conflict clock name of "audio_clkout" is used by Renesas sound driver. This duplicated naming breaks its clock registering/unregistering. Especially, when unbind/bind it can't handle clkout correctly. This patch renames "audio_clkout" to "audio-clkout" to avoid naming conflict. Fixes: 8a8f181d2cfd ("arm64: renesas: salvator-x: use CS2000 as AUDIO_CLK_B") Signed-off-by: Kuninori Morimoto Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/salvator-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index a451996f590a..f903957da504 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -45,7 +45,7 @@ stdout-path = "serial0:115200n8"; }; - audio_clkout: audio_clkout { + audio_clkout: audio-clkout { /* * This is same as <&rcar_sound 0> * but needed to avoid cs2000/rcar_sound probe dead-lock -- cgit From c017d21147848fe017772764a77a7f32c5b017f9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 27 Jul 2017 15:38:17 -0700 Subject: irqchip: brcmstb-l2: Define an irq_pm_shutdown function The Broadcom STB platforms support S5 and we allow specific hardware wake-up events to take us out of this state. Because we were not defining an irq_pm_shutdown() function pointer, we would not be correctly masking non-wakeup events, which would result in spurious wake-ups from sources that were not explicitly configured for wake-up. Fixes: 7f646e92766e ("irqchip: brcmstb-l2: Add Broadcom Set Top Box Level-2 interrupt controller") Acked-by: Gregory Fong Signed-off-by: Florian Fainelli Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-brcmstb-l2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c index bddf169c4b37..b009b916a292 100644 --- a/drivers/irqchip/irq-brcmstb-l2.c +++ b/drivers/irqchip/irq-brcmstb-l2.c @@ -189,6 +189,7 @@ static int __init brcmstb_l2_intc_of_init(struct device_node *np, ct->chip.irq_suspend = brcmstb_l2_intc_suspend; ct->chip.irq_resume = brcmstb_l2_intc_resume; + ct->chip.irq_pm_shutdown = brcmstb_l2_intc_suspend; if (data->can_wake) { /* This IRQ chip can wake the system, set all child interrupts -- cgit From 41e327b586762833e48b3703d53312ac32f05f24 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Mon, 7 Aug 2017 21:35:05 +0800 Subject: quota: correct space limit check Currently we compare total space (curspace + rsvspace) with space limit in quota-tools when setting grace time and also in check_bdq(), but we missing rsvspace in somewhere else, correct them. This patch also fix incorrect zero dqb_btime and grace time updating failure when we use rsvspace(e.g. ext4 dalloc feature). Signed-off-by: zhangyi (F) Signed-off-by: Jan Kara --- fs/quota/dquot.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 53a17496c5c5..566e6ef99f07 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1124,6 +1124,10 @@ void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) WARN_ON_ONCE(1); dquot->dq_dqb.dqb_rsvspace = 0; } + if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <= + dquot->dq_dqb.dqb_bsoftlimit) + dquot->dq_dqb.dqb_btime = (time64_t) 0; + clear_bit(DQ_BLKS_B, &dquot->dq_flags); } static void dquot_decr_inodes(struct dquot *dquot, qsize_t number) @@ -1145,7 +1149,8 @@ static void dquot_decr_space(struct dquot *dquot, qsize_t number) dquot->dq_dqb.dqb_curspace -= number; else dquot->dq_dqb.dqb_curspace = 0; - if (dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit) + if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <= + dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time64_t) 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } @@ -1381,14 +1386,18 @@ static int info_idq_free(struct dquot *dquot, qsize_t inodes) static int info_bdq_free(struct dquot *dquot, qsize_t space) { + qsize_t tspace; + + tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace; + if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || - dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit) + tspace <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_NOWARN; - if (dquot->dq_dqb.dqb_curspace - space <= dquot->dq_dqb.dqb_bsoftlimit) + if (tspace - space <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_BSOFTBELOW; - if (dquot->dq_dqb.dqb_curspace >= dquot->dq_dqb.dqb_bhardlimit && - dquot->dq_dqb.dqb_curspace - space < dquot->dq_dqb.dqb_bhardlimit) + if (tspace >= dquot->dq_dqb.dqb_bhardlimit && + tspace - space < dquot->dq_dqb.dqb_bhardlimit) return QUOTA_NL_BHARDBELOW; return QUOTA_NL_NOWARN; } @@ -2681,7 +2690,7 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di) if (check_blim) { if (!dm->dqb_bsoftlimit || - dm->dqb_curspace < dm->dqb_bsoftlimit) { + dm->dqb_curspace + dm->dqb_rsvspace < dm->dqb_bsoftlimit) { dm->dqb_btime = 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } else if (!(di->d_fieldmask & QC_SPC_TIMER)) -- cgit From 0a9c40cea70d3f2dfa401bc259b318cd9aec8613 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 3 Aug 2017 09:57:12 -0700 Subject: test_kmod: fix kmod.sh by making it executable We had just forgotten to do this. Without this if we run the following we get a permission denied: sudo make -C tools/testing/selftests/kmod/ run_tests make: Entering directory '/home/mcgrof/linux-next/tools/testing/selftests/kmod' /bin/sh: ./kmod.sh: Permission denied selftests: kmod.sh [FAIL] /home/mcgrof/linux-next/tools/testing/selftests/kmod make: Leaving directory '/home/mcgrof/linux-next/tools/testing/selftests/kmod Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader") Signed-off-by: Luis R. Rodriguez Signed-off-by: Shuah Khan --- tools/testing/selftests/kmod/kmod.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tools/testing/selftests/kmod/kmod.sh diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh old mode 100644 new mode 100755 -- cgit From 8b0949d407431559f7c80a02f5382f5e1c77b8d1 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 3 Aug 2017 09:57:13 -0700 Subject: test_sysctl: fix sysctl.sh by making it executable We had just forogtten to do this. Without this the following test fails: $ sudo make -C tools/testing/selftests/sysctl/ run_tests make: Entering directory '/home/mcgrof/linux-next/tools/testing/selftests/sysctl' /bin/sh: ./sysctl.sh: Permission denied selftests: sysctl.sh [FAIL] /home/mcgrof/linux-next/tools/testing/selftests/sysctl make: Leaving directory '/home/mcgrof/linux-next/tools/testing/selftests/sysctl' Fixes: 64b671204afd71 ("test_sysctl: add generic script to expand on tests") Signed-off-by: Luis R. Rodriguez Signed-off-by: Shuah Khan --- tools/testing/selftests/sysctl/sysctl.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tools/testing/selftests/sysctl/sysctl.sh diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh old mode 100644 new mode 100755 -- cgit From 33182d15c6bf182f7ae32a66ea4a547d979cd6d7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 8 Aug 2017 16:56:36 +1000 Subject: md: always clear ->safemode when md_check_recovery gets the mddev lock. If ->safemode == 1, md_check_recovery() will try to get the mddev lock and perform various other checks. If mddev->in_sync is zero, it will call set_in_sync, and clear ->safemode. However if mddev->in_sync is not zero, ->safemode will not be cleared. When md_check_recovery() drops the mddev lock, the thread is woken up again. Normally it would just check if there was anything else to do, find nothing, and go to sleep. However as ->safemode was not cleared, it will take the mddev lock again, then wake itself up when unlocking. This results in an infinite loop, repeatedly calling md_check_recovery(), which RCU or the soft-lockup detector will eventually complain about. Prior to commit 4ad23a976413 ("MD: use per-cpu counter for writes_pending"), safemode would only be set to one when the writes_pending counter reached zero, and would be cleared again when writes_pending is incremented. Since that patch, safemode is set more freely, but is not reliably cleared. So in md_check_recovery() clear ->safemode before checking ->in_sync. Fixes: 4ad23a976413 ("MD: use per-cpu counter for writes_pending") Cc: stable@vger.kernel.org (4.12+) Reported-by: Dominik Brodowski Reported-by: David R Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index c99634612fc4..d84aceede1cb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8656,6 +8656,9 @@ void md_check_recovery(struct mddev *mddev) if (mddev_trylock(mddev)) { int spares = 0; + if (mddev->safemode == 1) + mddev->safemode = 0; + if (mddev->ro) { struct md_rdev *rdev; if (!mddev->external && mddev->in_sync) -- cgit From 81fe48e9aa00bdd509bd3c37a76d1132da6b9f09 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 8 Aug 2017 16:56:36 +1000 Subject: md: fix test in md_write_start() md_write_start() needs to clear the in_sync flag is it is set, or if there might be a race with set_in_sync() such that the later will set it very soon. In the later case it is sufficient to take the spinlock to synchronize with set_in_sync(), and then set the flag if needed. The current test is incorrect. It should be: if "flag is set" or "race is possible" "flag is set" is trivially "mddev->in_sync". "race is possible" should be tested by "mddev->sync_checkers". If sync_checkers is 0, then there can be no race. set_in_sync() will wait in percpu_ref_switch_to_atomic_sync() for an RCU grace period, and as md_write_start() holds the rcu_read_lock(), set_in_sync() will be sure ot see the update to writes_pending. If sync_checkers is > 0, there could be race. If md_write_start() happened entirely between if (!mddev->in_sync && percpu_ref_is_zero(&mddev->writes_pending)) { and mddev->in_sync = 1; in set_in_sync(), then it would not see that is_sync had been set, and set_in_sync() would not see that writes_pending had been incremented. This bug means that in_sync is sometimes not set when it should be. Consequently there is a small chance that the array will be marked as "clean" when in fact it is inconsistent. Fixes: 4ad23a976413 ("MD: use per-cpu counter for writes_pending") cc: stable@vger.kernel.org (v4.12+) Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index d84aceede1cb..e4ba95f6cd59 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7996,7 +7996,7 @@ bool md_write_start(struct mddev *mddev, struct bio *bi) if (mddev->safemode == 1) mddev->safemode = 0; /* sync_checkers is always 0 when writes_pending is in per-cpu mode */ - if (mddev->in_sync || !mddev->sync_checkers) { + if (mddev->in_sync || mddev->sync_checkers) { spin_lock(&mddev->lock); if (mddev->in_sync) { mddev->in_sync = 0; -- cgit From b44886c54a999771060371c3a05d5fedfc7e2102 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 31 Jul 2017 14:52:26 -0700 Subject: md/r5cache: call mddev_lock/unlock() in r5c_journal_mode_set In r5c_journal_mode_set(), it is necessary to call mddev_lock() before accessing conf and conf->log. Otherwise, the conf->log may change (and become NULL). Shaohua: fix unlock in failure cases Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index bfa1e907c472..ce98414a6e34 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -2540,23 +2540,32 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page) */ int r5c_journal_mode_set(struct mddev *mddev, int mode) { - struct r5conf *conf = mddev->private; - struct r5l_log *log = conf->log; - - if (!log) - return -ENODEV; + struct r5conf *conf; + int err; if (mode < R5C_JOURNAL_MODE_WRITE_THROUGH || mode > R5C_JOURNAL_MODE_WRITE_BACK) return -EINVAL; + err = mddev_lock(mddev); + if (err) + return err; + conf = mddev->private; + if (!conf || !conf->log) { + mddev_unlock(mddev); + return -ENODEV; + } + if (raid5_calc_degraded(conf) > 0 && - mode == R5C_JOURNAL_MODE_WRITE_BACK) + mode == R5C_JOURNAL_MODE_WRITE_BACK) { + mddev_unlock(mddev); return -EINVAL; + } mddev_suspend(mddev); conf->log->r5c_journal_mode = mode; mddev_resume(mddev); + mddev_unlock(mddev); pr_debug("md/raid:%s: setting r5c cache mode to %d: %s\n", mdname(mddev), mode, r5c_journal_mode_str[mode]); -- cgit From a9501d742127e613d744e29814e9532bacb147e8 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 3 Aug 2017 10:03:17 -0700 Subject: md/r5cache: fix io_unit handling in r5l_log_endio() In r5l_log_endio(), once log->io_list_lock is released, the io unit may be accessed (or even freed) by other threads. Current code doesn't handle the io_unit properly, which leads to potential race conditions. This patch solves this race condition by: 1. Add a pending_stripe count flush_payload. Multiple flush_payloads are counted as only one pending_stripe. Flag has_flush_payload is added to show whether the io unit has flush_payload; 2. In r5l_log_endio(), check flags has_null_flush and has_flush_payload with log->io_list_lock held. After the lock is released, this IO unit is only accessed when we know the pending_stripe counter cannot be zeroed by other threads. Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index ce98414a6e34..2dcbafa8e66c 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -236,9 +236,10 @@ struct r5l_io_unit { bool need_split_bio; struct bio *split_bio; - unsigned int has_flush:1; /* include flush request */ - unsigned int has_fua:1; /* include fua request */ - unsigned int has_null_flush:1; /* include empty flush request */ + unsigned int has_flush:1; /* include flush request */ + unsigned int has_fua:1; /* include fua request */ + unsigned int has_null_flush:1; /* include null flush request */ + unsigned int has_flush_payload:1; /* include flush payload */ /* * io isn't sent yet, flush/fua request can only be submitted till it's * the first IO in running_ios list @@ -571,6 +572,8 @@ static void r5l_log_endio(struct bio *bio) struct r5l_io_unit *io_deferred; struct r5l_log *log = io->log; unsigned long flags; + bool has_null_flush; + bool has_flush_payload; if (bio->bi_status) md_error(log->rdev->mddev, log->rdev); @@ -580,6 +583,16 @@ static void r5l_log_endio(struct bio *bio) spin_lock_irqsave(&log->io_list_lock, flags); __r5l_set_io_unit_state(io, IO_UNIT_IO_END); + + /* + * if the io doesn't not have null_flush or flush payload, + * it is not safe to access it after releasing io_list_lock. + * Therefore, it is necessary to check the condition with + * the lock held. + */ + has_null_flush = io->has_null_flush; + has_flush_payload = io->has_flush_payload; + if (log->need_cache_flush && !list_empty(&io->stripe_list)) r5l_move_to_end_ios(log); else @@ -600,19 +613,23 @@ static void r5l_log_endio(struct bio *bio) if (log->need_cache_flush) md_wakeup_thread(log->rdev->mddev->thread); - if (io->has_null_flush) { + /* finish flush only io_unit and PAYLOAD_FLUSH only io_unit */ + if (has_null_flush) { struct bio *bi; WARN_ON(bio_list_empty(&io->flush_barriers)); while ((bi = bio_list_pop(&io->flush_barriers)) != NULL) { bio_endio(bi); - atomic_dec(&io->pending_stripe); + if (atomic_dec_and_test(&io->pending_stripe)) { + __r5l_stripe_write_finished(io); + return; + } } } - - /* finish flush only io_unit and PAYLOAD_FLUSH only io_unit */ - if (atomic_read(&io->pending_stripe) == 0) - __r5l_stripe_write_finished(io); + /* decrease pending_stripe for flush payload */ + if (has_flush_payload) + if (atomic_dec_and_test(&io->pending_stripe)) + __r5l_stripe_write_finished(io); } static void r5l_do_submit_io(struct r5l_log *log, struct r5l_io_unit *io) @@ -881,6 +898,11 @@ static void r5l_append_flush_payload(struct r5l_log *log, sector_t sect) payload->size = cpu_to_le32(sizeof(__le64)); payload->flush_stripes[0] = cpu_to_le64(sect); io->meta_offset += meta_size; + /* multiple flush payloads count as one pending_stripe */ + if (!io->has_flush_payload) { + io->has_flush_payload = 1; + atomic_inc(&io->pending_stripe); + } mutex_unlock(&log->io_mutex); } -- cgit From be37aa4b993bd5d4191f76a7bd43be33f987b972 Mon Sep 17 00:00:00 2001 From: Michael Hernandez Date: Mon, 31 Jul 2017 14:45:10 -0700 Subject: scsi: qla2xxx: Fix system crash while triggering FW dump This patch fixes system hang/crash while firmware dump is attempted with Block MQ enabled in qla2xxx driver. Fix is to remove check in fw dump template entries for existing request and response queues so that full buffer size is calculated during template size calculation. Following stack trace is seen during firmware dump capture process [ 694.390588] qla2xxx [0000:81:00.0]-5003:11: ISP System Error - mbx1=4b1fh mbx2=10h mbx3=2ah mbx7=0h. [ 694.402336] BUG: unable to handle kernel paging request at ffffc90008c7b000 [ 694.402372] IP: memcpy_erms+0x6/0x10 [ 694.402386] PGD 105f01a067 [ 694.402386] PUD 85f89c067 [ 694.402398] PMD 10490cb067 [ 694.402409] PTE 0 [ 694.402421] [ 694.402437] Oops: 0002 [#1] PREEMPT SMP [ 694.402452] Modules linked in: netconsole configfs qla2xxx scsi_transport_fc nvme_fc nvme_fabrics bnep bluetooth rfkill xt_tcpudp unix_diag xt_multiport ip6table_filter ip6_tables iptable_filter ip_tables x_tables af_packet iscsi_ibft iscsi_boot_sysfs xfs libcrc32c ipmi_ssif sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass igb crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel iTCO_wdt aes_x86_64 crypto_simd ptp iTCO_vendor_support glue_helper cryptd lpc_ich joydev i2c_i801 pcspkr ioatdma mei_me pps_core tpm_tis mei mfd_core acpi_power_meter tpm_tis_core ipmi_si ipmi_devintf tpm ipmi_msghandler shpchp wmi dca button acpi_pad btrfs xor uas usb_storage hid_generic usbhid raid6_pq crc32c_intel ast i2c_algo_bit drm_kms_helper syscopyarea sysfillrect [ 694.402692] sysimgblt fb_sys_fops xhci_pci ttm ehci_pci sr_mod xhci_hcd cdrom ehci_hcd drm usbcore sg [ 694.402730] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-1-default+ #19 [ 694.402753] Hardware name: Supermicro X10DRi/X10DRi, BIOS 1.1a 10/16/2015 [ 694.402776] task: ffffffff81c0e4c0 task.stack: ffffffff81c00000 [ 694.402798] RIP: 0010:memcpy_erms+0x6/0x10 [ 694.402813] RSP: 0018:ffff88085fc03cd0 EFLAGS: 00210006 [ 694.402832] RAX: ffffc90008c7ae0c RBX: 0000000000000004 RCX: 000000000001fe0c [ 694.402856] RDX: 0000000000020000 RSI: ffff8810332c01f4 RDI: ffffc90008c7b000 [ 694.402879] RBP: ffff88085fc03d18 R08: 0000000000020000 R09: 0000000000279e0a [ 694.402903] R10: 0000000000000000 R11: f000000000000000 R12: ffff88085fc03d80 [ 694.402927] R13: ffffc90008a01000 R14: ffffc90008a056d4 R15: ffff881052ef17e0 [ 694.402951] FS: 0000000000000000(0000) GS:ffff88085fc00000(0000) knlGS:0000000000000000 [ 694.402977] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 694.403012] CR2: ffffc90008c7b000 CR3: 0000000001c09000 CR4: 00000000001406f0 [ 694.403036] Call Trace: [ 694.403047] [ 694.403072] ? qla27xx_fwdt_entry_t263+0x18e/0x380 [qla2xxx] [ 694.403099] qla27xx_walk_template+0x9d/0x1a0 [qla2xxx] [ 694.403124] qla27xx_fwdump+0x1f3/0x272 [qla2xxx] [ 694.403149] qla2x00_async_event+0xb08/0x1a50 [qla2xxx] [ 694.403169] ? enqueue_task_fair+0xa2/0x9d0 Signed-off-by: Mike Hernandez Signed-off-by: Joe Carnuccio Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_tmpl.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 33142610882f..b18646d6057f 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -401,9 +401,6 @@ qla27xx_fwdt_entry_t263(struct scsi_qla_host *vha, for (i = 0; i < vha->hw->max_req_queues; i++) { struct req_que *req = vha->hw->req_q_map[i]; - if (!test_bit(i, vha->hw->req_qid_map)) - continue; - if (req || !buf) { length = req ? req->length : REQUEST_ENTRY_CNT_24XX; @@ -418,9 +415,6 @@ qla27xx_fwdt_entry_t263(struct scsi_qla_host *vha, for (i = 0; i < vha->hw->max_rsp_queues; i++) { struct rsp_que *rsp = vha->hw->rsp_q_map[i]; - if (!test_bit(i, vha->hw->rsp_qid_map)) - continue; - if (rsp || !buf) { length = rsp ? rsp->length : RESPONSE_ENTRY_CNT_MQ; @@ -660,9 +654,6 @@ qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha, for (i = 0; i < vha->hw->max_req_queues; i++) { struct req_que *req = vha->hw->req_q_map[i]; - if (!test_bit(i, vha->hw->req_qid_map)) - continue; - if (req || !buf) { qla27xx_insert16(i, buf, len); qla27xx_insert16(1, buf, len); @@ -675,9 +666,6 @@ qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha, for (i = 0; i < vha->hw->max_rsp_queues; i++) { struct rsp_que *rsp = vha->hw->rsp_q_map[i]; - if (!test_bit(i, vha->hw->rsp_qid_map)) - continue; - if (rsp || !buf) { qla27xx_insert16(i, buf, len); qla27xx_insert16(1, buf, len); -- cgit From 180efde0a3f43dbe533e4be203c2918793482d4e Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Tue, 1 Aug 2017 14:42:54 +0200 Subject: scsi: st: fix blk_get_queue usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If blk_queue_get() in st_probe fails, disk->queue must not be set to SDp->request_queue, as that would result in put_disk() dropping a not taken reference. Thus, disk->queue should be set only after a successful blk_queue_get(). Fixes: 2b5bebccd282 ("st: Take additional queue ref in st_probe") Signed-off-by: Bodo Stroesser Acked-by: Shirish Pargaonkar Signed-off-by: Hannes Reinecke Reviewed-by: Ewan D. Milne Acked-by: Kai Mäkisara Signed-off-by: Martin K. Petersen --- drivers/scsi/st.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 8e5013d9cad4..94e402ed30f6 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4299,11 +4299,11 @@ static int st_probe(struct device *dev) kref_init(&tpnt->kref); tpnt->disk = disk; disk->private_data = &tpnt->driver; - disk->queue = SDp->request_queue; /* SCSI tape doesn't register this gendisk via add_disk(). Manually * take queue reference that release_disk() expects. */ - if (!blk_get_queue(disk->queue)) + if (!blk_get_queue(SDp->request_queue)) goto out_put_disk; + disk->queue = SDp->request_queue; tpnt->driver = &st_template; tpnt->device = SDp; -- cgit From b0e17a9b0df29590c45dfb296f541270a5941f41 Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 1 Aug 2017 10:21:30 -0500 Subject: scsi: ipr: Fix scsi-mq lockdep issue Fixes the following lockdep warning that can occur when scsi-mq is enabled with ipr due to ipr calling scsi_unblock_requests from irq context. The fix is to move the call to scsi_unblock_requests to ipr's existing workqueue. stack backtrace: CPU: 28 PID: 0 Comm: swapper/28 Not tainted 4.13.0-rc2-gcc6x-gf74c89b #1 Call Trace: [c000001fffe97550] [c000000000b50818] dump_stack+0xe8/0x160 (unreliable) [c000001fffe97590] [c0000000001586d0] print_usage_bug+0x2d0/0x390 [c000001fffe97640] [c000000000158f34] mark_lock+0x7a4/0x8e0 [c000001fffe976f0] [c00000000015a000] __lock_acquire+0x6a0/0x1a70 [c000001fffe97860] [c00000000015befc] lock_acquire+0xec/0x2e0 [c000001fffe97930] [c000000000b71514] _raw_spin_lock+0x44/0x70 [c000001fffe97960] [c0000000005b60f4] blk_mq_sched_dispatch_requests+0xa4/0x2a0 [c000001fffe979c0] [c0000000005acac0] __blk_mq_run_hw_queue+0x100/0x2c0 [c000001fffe97a00] [c0000000005ad478] __blk_mq_delay_run_hw_queue+0x118/0x130 [c000001fffe97a40] [c0000000005ad61c] blk_mq_start_hw_queues+0x6c/0xa0 [c000001fffe97a80] [c000000000797aac] scsi_kick_queue+0x2c/0x60 [c000001fffe97aa0] [c000000000797cf0] scsi_run_queue+0x210/0x360 [c000001fffe97b10] [c00000000079b888] scsi_run_host_queues+0x48/0x80 [c000001fffe97b40] [c0000000007b6090] ipr_ioa_bringdown_done+0x70/0x1e0 [c000001fffe97bc0] [c0000000007bc860] ipr_reset_ioa_job+0x80/0xf0 [c000001fffe97bf0] [c0000000007b4d50] ipr_reset_timer_done+0xd0/0x100 [c000001fffe97c30] [c0000000001937bc] call_timer_fn+0xdc/0x4b0 [c000001fffe97cf0] [c000000000193d08] expire_timers+0x178/0x330 [c000001fffe97d60] [c0000000001940c8] run_timer_softirq+0xb8/0x120 [c000001fffe97de0] [c000000000b726a8] __do_softirq+0x168/0x6d8 [c000001fffe97ef0] [c0000000000df2c8] irq_exit+0x108/0x150 [c000001fffe97f10] [c000000000017bf4] __do_irq+0x2a4/0x4a0 [c000001fffe97f90] [c00000000002da50] call_do_irq+0x14/0x24 [c0000007fad93aa0] [c000000000017e8c] do_IRQ+0x9c/0x140 [c0000007fad93af0] [c000000000008b98] hardware_interrupt_common+0x138/0x140 Reported-by: Michael Ellerman Signed-off-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ipr.c | 33 +++++++++++++++++++-------------- drivers/scsi/ipr.h | 2 ++ 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index b0c68d24db01..da5bdbdcce52 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3351,6 +3351,16 @@ static void ipr_worker_thread(struct work_struct *work) return; } + if (ioa_cfg->scsi_unblock) { + ioa_cfg->scsi_unblock = 0; + ioa_cfg->scsi_blocked = 0; + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + scsi_unblock_requests(ioa_cfg->host); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + if (ioa_cfg->scsi_blocked) + scsi_block_requests(ioa_cfg->host); + } + if (!ioa_cfg->scan_enabled) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); return; @@ -7211,9 +7221,8 @@ static int ipr_ioa_bringdown_done(struct ipr_cmnd *ipr_cmd) ENTER; if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) { ipr_trace; - spin_unlock_irq(ioa_cfg->host->host_lock); - scsi_unblock_requests(ioa_cfg->host); - spin_lock_irq(ioa_cfg->host->host_lock); + ioa_cfg->scsi_unblock = 1; + schedule_work(&ioa_cfg->work_q); } ioa_cfg->in_reset_reload = 0; @@ -7287,13 +7296,7 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd) list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); - spin_unlock(ioa_cfg->host->host_lock); - scsi_unblock_requests(ioa_cfg->host); - spin_lock(ioa_cfg->host->host_lock); - - if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) - scsi_block_requests(ioa_cfg->host); - + ioa_cfg->scsi_unblock = 1; schedule_work(&ioa_cfg->work_q); LEAVE; return IPR_RC_JOB_RETURN; @@ -9249,8 +9252,11 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, spin_unlock(&ioa_cfg->hrrq[i]._lock); } wmb(); - if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) { + ioa_cfg->scsi_unblock = 0; + ioa_cfg->scsi_blocked = 1; scsi_block_requests(ioa_cfg->host); + } ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); ioa_cfg->reset_cmd = ipr_cmd; @@ -9306,9 +9312,8 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, wake_up_all(&ioa_cfg->reset_wait_q); if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) { - spin_unlock_irq(ioa_cfg->host->host_lock); - scsi_unblock_requests(ioa_cfg->host); - spin_lock_irq(ioa_cfg->host->host_lock); + ioa_cfg->scsi_unblock = 1; + schedule_work(&ioa_cfg->work_q); } return; } else { diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index e98a87a65335..c7f0e9e3cd7d 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -1488,6 +1488,8 @@ struct ipr_ioa_cfg { u8 cfg_locked:1; u8 clear_isr:1; u8 probe_done:1; + u8 scsi_unblock:1; + u8 scsi_blocked:1; u8 revid; -- cgit From 424f727b94132a4193af401dd823c44612d1d59f Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 1 Aug 2017 13:45:36 -0500 Subject: scsi: ses: Fix wrong page error If a SES device returns an error on a requested diagnostic page, we are currently printing an error indicating the wrong page was received. Fix this up to simply return a failure and only check the returned page when the diagnostic page buffer was populated by the device. Signed-off-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ses.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index f1cdf32d7514..8927f9f54ad9 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -99,7 +99,7 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code, ret = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen, NULL, SES_TIMEOUT, SES_RETRIES, NULL); - if (unlikely(!ret)) + if (unlikely(ret)) return ret; recv_page_code = ((unsigned char *)buf)[0]; -- cgit From 7e39a00d593133ca8fcd3eef0409685e7c895ee6 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Wed, 26 Jul 2017 15:08:45 +0300 Subject: iwlwifi: mvm: start mac queues when deferred tx frames are purged In AP mode, if a station is removed just as it is adding a new stream, the queue in question will remain stopped and no more TX will happen in this queue, leading to connection failures and other problems. This is because under DQA, when tx is deferred because a queue needs to be allocated, the mac queue for that TID is stopped until the new stream is added. If at this point the station that this stream belongs to is removed, all the deferred tx frames are purged, but the mac queue is not restarted. As a result, all following tx on this queue will not be transmitted. Fix this by starting the relevant mac queues when the deferred tx frames are purged. Fixes: 24afba7690e4 ("iwlwifi: mvm: support bss dynamic alloc/dealloc of queues") Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index c7b1e58e3384..ce901be5fba8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2597,8 +2597,18 @@ static void iwl_mvm_purge_deferred_tx_frames(struct iwl_mvm *mvm, spin_lock_bh(&mvm_sta->lock); for (i = 0; i <= IWL_MAX_TID_COUNT; i++) { tid_data = &mvm_sta->tid_data[i]; - while ((skb = __skb_dequeue(&tid_data->deferred_tx_frames))) + + while ((skb = __skb_dequeue(&tid_data->deferred_tx_frames))) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + /* + * The first deferred frame should've stopped the MAC + * queues, so we should never get a second deferred + * frame for the RA/TID. + */ + iwl_mvm_start_mac_queues(mvm, info->hw_queue); ieee80211_free_txskb(mvm->hw, skb); + } } spin_unlock_bh(&mvm_sta->lock); } -- cgit From a600852a9d00be08c539307a42729fd46b0a654e Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 27 Jul 2017 15:34:12 +0300 Subject: iwlwifi: mvm: don't WARN when a legit race happens in A-MPDU When we start an Rx A-MPDU session, we first get the AddBA request, then we send an ADD_STA command to the firmware that will reply with a BAID which is a hardware resource that tracks the BA session. This BAID will appear on each and every frame that we get from the firwmare until the A-MPDU session is torn down. In the Rx path, we look at this BAID to manage the reordering buffer. This flow is inherently racy since the hardware will start to put the BAID in the frames it receives even if the firmware hasn't sent the response to the ADD_STA command. This basically means that the driver can get frames with a valid BAID that it doesn't know yet. When that happens, the driver used to WARN. Fix this by simply not WARN in this case. When the driver will know abou the BAID, it will initialise the relevant states and the next frame with a valid BAID will refresh them. Fixes: b915c10174fb ("iwlwifi: mvm: add reorder buffer per queue") Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index f3e608196369..71c8b800ffa9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -636,9 +636,9 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, baid_data = rcu_dereference(mvm->baid_map[baid]); if (!baid_data) { - WARN(!(reorder & IWL_RX_MPDU_REORDER_BA_OLD_SN), - "Received baid %d, but no data exists for this BAID\n", - baid); + IWL_DEBUG_RX(mvm, + "Got valid BAID but no baid allocated, bypass the re-ordering buffer. Baid %d reorder 0x%x\n", + baid, reorder); return false; } @@ -759,7 +759,9 @@ static void iwl_mvm_agg_rx_received(struct iwl_mvm *mvm, data = rcu_dereference(mvm->baid_map[baid]); if (!data) { - WARN_ON(!(reorder_data & IWL_RX_MPDU_REORDER_BA_OLD_SN)); + IWL_DEBUG_RX(mvm, + "Got valid BAID but no baid allocated, bypass the re-ordering buffer. Baid %d reorder 0x%x\n", + baid, reorder_data); goto out; } -- cgit From 7f5770678b2d0cc8f3ffbf7eb73410f2acba7925 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 30 Jul 2017 21:10:44 +0300 Subject: dmaengine: tegra210-adma: fix of_irq_get() error check of_irq_get() may return 0 as well as negative error number on failure, while the driver only checks for the negative values. The driver would then call request_irq(0, ...) in tegra_adma_alloc_chan_resources() and never get valid channel interrupt. Check for 'tdc->irq <= 0' instead and return -ENXIO from the driver's probe iff of_irq_get() returned 0. Fixes: f46b195799b5 ("dmaengine: tegra-adma: Add support for Tegra210 ADMA") Signed-off-by: Sergei Shtylyov Acked-by: Thierry Reding Acked-by: Jon Hunter Signed-off-by: Vinod Koul --- drivers/dma/tegra210-adma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index b10cbaa82ff5..b26256f23d67 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -717,8 +717,8 @@ static int tegra_adma_probe(struct platform_device *pdev) tdc->chan_addr = tdma->base_addr + ADMA_CH_REG_OFFSET(i); tdc->irq = of_irq_get(pdev->dev.of_node, i); - if (tdc->irq < 0) { - ret = tdc->irq; + if (tdc->irq <= 0) { + ret = tdc->irq ?: -ENXIO; goto irq_dispose; } -- cgit From 04c2cf34362f133be09878bd752f8b014318b59a Mon Sep 17 00:00:00 2001 From: Naftali Goldstein Date: Tue, 11 Jul 2017 10:07:25 +0300 Subject: mac80211: add api to start ba session timer expired flow Some drivers handle rx buffer reordering internally (and by extension handle also the rx ba session timer internally), but do not ofload the addba/delba negotiation. Add an api for these drivers to properly tear-down the ba session, including sending a delba. Signed-off-by: Naftali Goldstein Signed-off-by: Luca Coelho --- include/net/mac80211.h | 15 +++++++++++++++ net/mac80211/agg-rx.c | 22 +++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b2b5419467cc..f8149ca192b4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5499,6 +5499,21 @@ static inline void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, ieee80211_manage_rx_ba_offl(vif, addr, tid + IEEE80211_NUM_TIDS); } +/** + * ieee80211_rx_ba_timer_expired - stop a Rx BA session due to timeout + * + * Some device drivers do not offload AddBa/DelBa negotiation, but handle rx + * buffer reording internally, and therefore also handle the session timer. + * + * Trigger the timeout flow, which sends a DelBa. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback + * @addr: station mac address + * @tid: the rx tid + */ +void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif, + const u8 *addr, unsigned int tid); + /* Rate control API */ /** diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 8708cbe8af5b..2b36eff5d97e 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -7,7 +7,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation - * Copyright(c) 2015 Intel Deutschland GmbH + * Copyright(c) 2015-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -466,3 +466,23 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_manage_rx_ba_offl); + +void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif, + const u8 *addr, unsigned int tid) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + rcu_read_lock(); + sta = sta_info_get_bss(sdata, addr); + if (!sta) + goto unlock; + + set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired); + ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + + unlock: + rcu_read_unlock(); +} +EXPORT_SYMBOL(ieee80211_rx_ba_timer_expired); -- cgit From 20fc690f38d17b8f961101a477a9aa0841fb6e20 Mon Sep 17 00:00:00 2001 From: Naftali Goldstein Date: Tue, 11 Jul 2017 10:07:32 +0300 Subject: iwlwifi: mvm: send delba upon rx ba session timeout When an RX block-ack session times out, the firmware, which offloads RX reordering but not the BA session negotiation, stops the session but doesn't send a DELBA. This causes the the session to remain active in the remote device, so no more BA sessions will be established, causing a severe throughput degradation due to the lack of aggregation. Use the new ieee80211_rx_ba_timer_expired API when the ba session timer expires, since this will tear down the ba session and also send a delba. The previous API used is intended for drivers that offload the addba/delba negotiation, but not the rx reordering, while our driver does the opposite. This patch depends on "mac80211: add api to start ba session timer expired flow". Signed-off-by: Naftali Goldstein Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index dcaef7c043ac..027ee5e72172 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -291,8 +291,8 @@ static void iwl_mvm_rx_agg_session_expired(unsigned long data) goto unlock; mvm_sta = iwl_mvm_sta_from_mac80211(sta); - ieee80211_stop_rx_ba_session_offl(mvm_sta->vif, - sta->addr, ba_data->tid); + ieee80211_rx_ba_timer_expired(mvm_sta->vif, + sta->addr, ba_data->tid); unlock: rcu_read_unlock(); } -- cgit From 28389575a8cf933a5f3c378556b9f4d3cce0efd2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 2 Aug 2017 16:40:47 +0800 Subject: crypto: ixp4xx - Fix error handling path in 'aead_perform()' In commit 0f987e25cb8a, the source processing has been moved in front of the destination processing, but the error handling path has not been modified accordingly. Free resources in the correct order to avoid some leaks. Cc: Fixes: 0f987e25cb8a ("crypto: ixp4xx - Fix false lastlen uninitialised warning") Reported-by: Christophe JAILLET Signed-off-by: Herbert Xu Reviewed-by: Arnd Bergmann --- drivers/crypto/ixp4xx_crypto.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 427cbe012729..dadc4a808df5 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -1073,7 +1073,7 @@ static int aead_perform(struct aead_request *req, int encrypt, req_ctx->hmac_virt = dma_pool_alloc(buffer_pool, flags, &crypt->icv_rev_aes); if (unlikely(!req_ctx->hmac_virt)) - goto free_buf_src; + goto free_buf_dst; if (!encrypt) { scatterwalk_map_and_copy(req_ctx->hmac_virt, req->src, cryptlen, authsize, 0); @@ -1088,10 +1088,10 @@ static int aead_perform(struct aead_request *req, int encrypt, BUG_ON(qmgr_stat_overflow(SEND_QID)); return -EINPROGRESS; -free_buf_src: - free_buf_chain(dev, req_ctx->src, crypt->src_buf); free_buf_dst: free_buf_chain(dev, req_ctx->dst, crypt->dst_buf); +free_buf_src: + free_buf_chain(dev, req_ctx->src, crypt->src_buf); crypt->ctl_flags = CTL_FLAG_UNUSED; return -ENOMEM; } -- cgit From 8861249c740fc4af9ddc5aee321eafefb960d7c6 Mon Sep 17 00:00:00 2001 From: "megha.dey@linux.intel.com" Date: Wed, 2 Aug 2017 13:49:09 -0700 Subject: crypto: x86/sha1 - Fix reads beyond the number of blocks passed It was reported that the sha1 AVX2 function(sha1_transform_avx2) is reading ahead beyond its intended data, and causing a crash if the next block is beyond page boundary: http://marc.info/?l=linux-crypto-vger&m=149373371023377 This patch makes sure that there is no overflow for any buffer length. It passes the tests written by Jan Stancek that revealed this problem: https://github.com/jstancek/sha1-avx2-crash I have re-enabled sha1-avx2 by reverting commit b82ce24426a4071da9529d726057e4e642948667 Cc: Fixes: b82ce24426a4 ("crypto: sha1-ssse3 - Disable avx2") Originally-by: Ilya Albrekht Tested-by: Jan Stancek Signed-off-by: Megha Dey Reported-by: Jan Stancek Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_avx2_x86_64_asm.S | 67 ++++++++++++++++++---------------- arch/x86/crypto/sha1_ssse3_glue.c | 2 +- 2 files changed, 37 insertions(+), 32 deletions(-) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 1cd792db15ef..1eab79c9ac48 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -117,11 +117,10 @@ .set T1, REG_T1 .endm -#define K_BASE %r8 #define HASH_PTR %r9 +#define BLOCKS_CTR %r8 #define BUFFER_PTR %r10 #define BUFFER_PTR2 %r13 -#define BUFFER_END %r11 #define PRECALC_BUF %r14 #define WK_BUF %r15 @@ -205,14 +204,14 @@ * blended AVX2 and ALU instruction scheduling * 1 vector iteration per 8 rounds */ - vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP + vmovdqu (i * 2)(BUFFER_PTR), W_TMP .elseif ((i & 7) == 1) - vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\ + vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\ WY_TMP, WY_TMP .elseif ((i & 7) == 2) vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY .elseif ((i & 7) == 4) - vpaddd K_XMM(K_BASE), WY, WY_TMP + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP .elseif ((i & 7) == 7) vmovdqu WY_TMP, PRECALC_WK(i&~7) @@ -255,7 +254,7 @@ vpxor WY, WY_TMP, WY_TMP .elseif ((i & 7) == 7) vpxor WY_TMP2, WY_TMP, WY - vpaddd K_XMM(K_BASE), WY, WY_TMP + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP vmovdqu WY_TMP, PRECALC_WK(i&~7) PRECALC_ROTATE_WY @@ -291,7 +290,7 @@ vpsrld $30, WY, WY vpor WY, WY_TMP, WY .elseif ((i & 7) == 7) - vpaddd K_XMM(K_BASE), WY, WY_TMP + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP vmovdqu WY_TMP, PRECALC_WK(i&~7) PRECALC_ROTATE_WY @@ -446,6 +445,16 @@ .endm +/* Add constant only if (%2 > %3) condition met (uses RTA as temp) + * %1 + %2 >= %3 ? %4 : 0 + */ +.macro ADD_IF_GE a, b, c, d + mov \a, RTA + add $\d, RTA + cmp $\c, \b + cmovge RTA, \a +.endm + /* * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining */ @@ -463,13 +472,16 @@ lea (2*4*80+32)(%rsp), WK_BUF # Precalc WK for first 2 blocks - PRECALC_OFFSET = 0 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64 .set i, 0 .rept 160 PRECALC i .set i, i + 1 .endr - PRECALC_OFFSET = 128 + + /* Go to next block if needed */ + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128 xchg WK_BUF, PRECALC_BUF .align 32 @@ -479,8 +491,8 @@ _loop: * we use K_BASE value as a signal of a last block, * it is set below by: cmovae BUFFER_PTR, K_BASE */ - cmp K_BASE, BUFFER_PTR - jne _begin + test BLOCKS_CTR, BLOCKS_CTR + jnz _begin .align 32 jmp _end .align 32 @@ -512,10 +524,10 @@ _loop0: .set j, j+2 .endr - add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */ - cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */ - cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ - + /* Update Counter */ + sub $1, BLOCKS_CTR + /* Move to the next block only if needed*/ + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128 /* * rounds * 60,62,64,66,68 @@ -532,8 +544,8 @@ _loop0: UPDATE_HASH 12(HASH_PTR), D UPDATE_HASH 16(HASH_PTR), E - cmp K_BASE, BUFFER_PTR /* is current block the last one? */ - je _loop + test BLOCKS_CTR, BLOCKS_CTR + jz _loop mov TB, B @@ -575,10 +587,10 @@ _loop2: .set j, j+2 .endr - add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */ - - cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */ - cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ + /* update counter */ + sub $1, BLOCKS_CTR + /* Move to the next block only if needed*/ + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128 jmp _loop3 _loop3: @@ -641,19 +653,12 @@ _loop3: avx2_zeroupper - lea K_XMM_AR(%rip), K_BASE - + /* Setup initial values */ mov CTX, HASH_PTR mov BUF, BUFFER_PTR - lea 64(BUF), BUFFER_PTR2 - - shl $6, CNT /* mul by 64 */ - add BUF, CNT - add $64, CNT - mov CNT, BUFFER_END - cmp BUFFER_END, BUFFER_PTR2 - cmovae K_BASE, BUFFER_PTR2 + mov BUF, BUFFER_PTR2 + mov CNT, BLOCKS_CTR xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index f960a043cdeb..fc61739150e7 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, static bool avx2_usable(void) { - if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI1) && boot_cpu_has(X86_FEATURE_BMI2)) return true; -- cgit From cb87481ee89dbd6609e227afbf64900fb4e5c930 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Jul 2017 22:46:27 +1000 Subject: kbuild: linker script do not match C names unless LD_DEAD_CODE_DATA_ELIMINATION is configured The .data and .bss sections were modified in the generic linker script to pull in sections named .data., which are generated by gcc with -ffunction-sections and -fdata-sections options. The problem with this pattern is it can also match section names that Linux defines explicitly, e.g., .data.unlikely. This can cause Linux sections to get moved into the wrong place. The way to avoid this is to use ".." separators for explicit section names (the dot character is valid in a section name but not a C identifier). However currently there are sections which don't follow this rule, so for now just disable the wild card by default. Example: http://marc.info/?l=linux-arm-kernel&m=150106824024221&w=2 Cc: # 4.9 Fixes: b67067f1176df ("kbuild: allow archs to select link dead code/data elimination") Signed-off-by: Nicholas Piggin Signed-off-by: Masahiro Yamada --- include/asm-generic/vmlinux.lds.h | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index da0be9a8d1de..9623d78f8494 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -59,6 +59,22 @@ /* Align . to a 8 byte boundary equals to maximum function alignment. */ #define ALIGN_FUNCTION() . = ALIGN(8) +/* + * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which + * generates .data.identifier sections, which need to be pulled in with + * .data. We don't want to pull in .data..other sections, which Linux + * has defined. Same for text and bss. + */ +#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION +#define TEXT_MAIN .text .text.[0-9a-zA-Z_]* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* +#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* +#else +#define TEXT_MAIN .text +#define DATA_MAIN .data +#define BSS_MAIN .bss +#endif + /* * Align to a 32 byte boundary equal to the * alignment gcc 4.5 uses for a struct @@ -198,12 +214,9 @@ /* * .data section - * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections generates - * .data.identifier which needs to be pulled in with .data, but don't want to - * pull in .data..stuff which has its own requirements. Same for bss. */ #define DATA_DATA \ - *(.data .data.[0-9a-zA-Z_]*) \ + *(DATA_MAIN) \ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ MEM_KEEP(init.data) \ @@ -434,16 +447,17 @@ VMLINUX_SYMBOL(__security_initcall_end) = .; \ } -/* .text section. Map to function alignment to avoid address changes +/* + * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map - * LD_DEAD_CODE_DATA_ELIMINATION option enables -ffunction-sections generates - * .text.identifier which needs to be pulled in with .text , but some - * architectures define .text.foo which is not intended to be pulled in here. - * Those enabling LD_DEAD_CODE_DATA_ELIMINATION must ensure they don't have - * conflicting section names, and must pull in .text.[0-9a-zA-Z_]* */ + * + * TEXT_MAIN here will match .text.fixup and .text.unlikely if dead + * code elimination is enabled, so these sections should be converted + * to use ".." first. + */ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ - *(.text.hot .text .text.fixup .text.unlikely) \ + *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ *(.ref.text) \ MEM_KEEP(init.text) \ MEM_KEEP(exit.text) \ @@ -613,7 +627,7 @@ BSS_FIRST_SECTIONS \ *(.bss..page_aligned) \ *(.dynbss) \ - *(.bss .bss.[0-9a-zA-Z_]*) \ + *(BSS_MAIN) \ *(COMMON) \ } -- cgit From 312a3d0918bb7d65862fbbd3e2f2f4630e4d6f56 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Wed, 2 Aug 2017 10:31:06 +0800 Subject: kbuild: trivial cleanups on the comments This is a bunch of trivial fixes and cleanups. Signed-off-by: Cao jin Signed-off-by: Masahiro Yamada --- Makefile | 10 +++++----- scripts/Kbuild.include | 7 +++---- scripts/Makefile.build | 8 ++++---- scripts/basic/Makefile | 2 +- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 6eba23bcb5ad..473f1aa76f6d 100644 --- a/Makefile +++ b/Makefile @@ -442,7 +442,7 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn \ # =========================================================================== # Rules shared between *config targets and build targets -# Basic helpers built in scripts/ +# Basic helpers built in scripts/basic/ PHONY += scripts_basic scripts_basic: $(Q)$(MAKE) $(build)=scripts/basic @@ -505,7 +505,7 @@ ifeq ($(KBUILD_EXTMOD),) endif endif endif -# install and module_install need also be processed one by one +# install and modules_install need also be processed one by one ifneq ($(filter install,$(MAKECMDGOALS)),) ifneq ($(filter modules_install,$(MAKECMDGOALS)),) mixed-targets := 1 @@ -964,7 +964,7 @@ export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y2) $(drivers-y) $(net-y) $(virt- export KBUILD_VMLINUX_LIBS := $(libs-y1) export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds export LDFLAGS_vmlinux -# used by scripts/pacmage/Makefile +# used by scripts/package/Makefile export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools) vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN) $(KBUILD_VMLINUX_LIBS) @@ -992,8 +992,8 @@ include/generated/autoksyms.h: FORCE ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) # Final link of vmlinux with optional arch pass after final link - cmd_link-vmlinux = \ - $(CONFIG_SHELL) $< $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) ; \ +cmd_link-vmlinux = \ + $(CONFIG_SHELL) $< $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) ; \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) vmlinux: scripts/link-vmlinux.sh vmlinux_prereq $(vmlinux-deps) FORCE diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index dd8e2dde0b34..9ffd3dda3889 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -85,8 +85,8 @@ TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/) # try-run # Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise) -# Exit code chooses option. "$$TMP" is can be used as temporary file and -# is automatically cleaned up. +# Exit code chooses option. "$$TMP" serves as a temporary file and is +# automatically cleaned up. try-run = $(shell set -e; \ TMP="$(TMPOUT).$$$$.tmp"; \ TMPO="$(TMPOUT).$$$$.o"; \ @@ -261,7 +261,6 @@ make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1))))) any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^) # Execute command if command has changed or prerequisite(s) are updated. -# if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ @set -e; \ $(echo-cmd) $(cmd_$(1)); \ @@ -315,7 +314,7 @@ if_changed_rule = $(if $(strip $(any-prereq) $(arg-check) ), \ $(rule_$(1)), @:) ### -# why - tell why a a target got build +# why - tell why a target got built # enabled by make V=2 # Output (listed in the order they are checked): # (1) - due to target is PHONY diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 4a9a2cec0a1b..f6152c70f7f4 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -229,8 +229,8 @@ ifeq ("$(origin RECORDMCOUNT_WARN)", "command line") endif # Due to recursion, we must skip empty.o. # The empty.o file is created in the make process in order to determine -# the target endianness and word size. It is made before all other C -# files, including recordmcount. +# the target endianness and word size. It is made before all other C +# files, including recordmcount. sub_cmd_record_mcount = \ if [ $(@) != "scripts/mod/empty.o" ]; then \ $(objtree)/scripts/recordmcount $(RECORDMCOUNT_FLAGS) "$(@)"; \ @@ -245,13 +245,13 @@ sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; recordmcount_source := $(srctree)/scripts/recordmcount.pl -endif +endif # BUILD_C_RECORDMCOUNT cmd_record_mcount = \ if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" = \ "$(CC_FLAGS_FTRACE)" ]; then \ $(sub_cmd_record_mcount) \ fi; -endif +endif # CONFIG_FTRACE_MCOUNT_RECORD ifdef CONFIG_STACK_VALIDATION ifneq ($(SKIP_STACK_VALIDATION),1) diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile index ec10d9345bc2..0372b33febe5 100644 --- a/scripts/basic/Makefile +++ b/scripts/basic/Makefile @@ -1,5 +1,5 @@ ### -# Makefile.basic lists the most basic programs used during the build process. +# This Makefile lists the most basic programs used during the build process. # The programs listed herein are what are needed to do the basic stuff, # such as fix file dependencies. # This initial step is needed to avoid files to be recompiled -- cgit From 4e433fc4d1a93310d31e7671faca8660a2890908 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Tue, 8 Aug 2017 21:20:50 +0800 Subject: fixdep: trivial: typo fix and correction Signed-off-by: Cao jin Signed-off-by: Masahiro Yamada --- scripts/basic/fixdep.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index fff818b92acb..bbf62cb1f819 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -25,7 +25,7 @@ * * So we play the same trick that "mkdep" played before. We replace * the dependency on autoconf.h by a dependency on every config - * option which is mentioned in any of the listed prequisites. + * option which is mentioned in any of the listed prerequisites. * * kconfig populates a tree in include/config/ with an empty file * for each config symbol and when the configuration is updated @@ -34,7 +34,7 @@ * the config symbols are rebuilt. * * So if the user changes his CONFIG_HIS_DRIVER option, only the objects - * which depend on "include/linux/config/his/driver.h" will be rebuilt, + * which depend on "include/config/his/driver.h" will be rebuilt, * so most likely only his driver ;-) * * The idea above dates, by the way, back to Michael E Chastain, AFAIK. @@ -75,7 +75,7 @@ * and then basically copies the ..d file to stdout, in the * process filtering out the dependency on autoconf.h and adding * dependencies on include/config/my/option.h for every - * CONFIG_MY_OPTION encountered in any of the prequisites. + * CONFIG_MY_OPTION encountered in any of the prerequisites. * * It will also filter out all the dependencies on *.ver. We need * to make sure that the generated version checksum are globally up -- cgit From 7ba190be873a86b2b90a25daca93ca1ced6c4423 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 2 Aug 2017 15:47:33 -0600 Subject: selftests: futex: fix run_tests target make -C tools/testing/selftests/futex/ run_tests doesn't run the futex tests. Running the tests when `dirname $(OUTPUT)` == $(PWD) doesn't work when the $(OUTPUT) is $(PWD) which is the case when the test is run using make -C tools/testing/selftests/futex/ run_tests. Fixes: a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT") Signed-off-by: Shuah Khan Reviewed-by: Darren Hart (VMware) Signed-off-by: Shuah Khan --- tools/testing/selftests/futex/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index e2fbb890aef9..7c647f619d63 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -14,7 +14,7 @@ all: done override define RUN_TESTS - @if [ `dirname $(OUTPUT)` = $(PWD) ]; then ./run.sh; fi + $(OUTPUT)/run.sh endef override define INSTALL_RULE -- cgit From 0fa375e6bc9023211eead30a6a79963c45a563da Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Wed, 9 Aug 2017 18:41:03 +0800 Subject: drm/rockchip: Fix suspend crash when drm is not bound Currently we are allocating drm_device in rockchip_drm_bind, so if the suspend/resume code access it when drm is not bound, we would hit this crash: [ 253.402836] Unable to handle kernel NULL pointer dereference at virtual address 00000028 [ 253.402837] pgd = ffffffc06c9b0000 [ 253.402841] [00000028] *pgd=0000000000000000, *pud=0000000000000000 [ 253.402844] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 253.402859] Modules linked in: btusb btrtl btbcm btintel bluetooth ath10k_pci ath10k_core ar10k_ath ar10k_mac80211 cfg80211 ip6table_filter asix usbnet mii [ 253.402864] CPU: 4 PID: 1331 Comm: cat Not tainted 4.4.70 #15 [ 253.402865] Hardware name: Google Scarlet (DT) [ 253.402867] task: ffffffc076c0ce00 ti: ffffffc06c2c8000 task.ti: ffffffc06c2c8000 [ 253.402871] PC is at rockchip_drm_sys_suspend+0x20/0x5c Add sanity checks to prevent that. Reported-by: Brian Norris Signed-off-by: Jeffy Chen Signed-off-by: Sean Paul Link: https://patchwork.kernel.org/patch/9890297/ --- drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index c6b1b7f3a2a3..c16bc0a7115b 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -275,11 +275,15 @@ static void rockchip_drm_fb_resume(struct drm_device *drm) static int rockchip_drm_sys_suspend(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); - struct rockchip_drm_private *priv = drm->dev_private; + struct rockchip_drm_private *priv; + + if (!drm) + return 0; drm_kms_helper_poll_disable(drm); rockchip_drm_fb_suspend(drm); + priv = drm->dev_private; priv->state = drm_atomic_helper_suspend(drm); if (IS_ERR(priv->state)) { rockchip_drm_fb_resume(drm); @@ -293,8 +297,12 @@ static int rockchip_drm_sys_suspend(struct device *dev) static int rockchip_drm_sys_resume(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); - struct rockchip_drm_private *priv = drm->dev_private; + struct rockchip_drm_private *priv; + + if (!drm) + return 0; + priv = drm->dev_private; drm_atomic_helper_resume(drm, priv->state); rockchip_drm_fb_resume(drm); drm_kms_helper_poll_enable(drm); -- cgit From 7dc88d2afb6e5c6bd6bbedc394eb43c1e3114bdd Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 22 Jul 2017 10:28:50 +0800 Subject: arm64: allwinner: a64: bananapi-m64: add missing ethernet0 alias The EMAC Ethernet controller was enabled, but an accompanying alias was not added. This results in unstable numbering if other Ethernet devices, such as a USB dongle, are present. Also, the bootloader uses the alias to assign a generated stable MAC address to the device node. Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard Fixes: e7295499903d ("arm64: allwinner: bananapi-m64: Enable dwmac-sun8i") [wens@csie.org: Rewrite commit log as fixing a previous patch with Fixes] Signed-off-by: Chen-Yu Tsai --- arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 0d1f026d831a..ba2fde2909f9 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -51,6 +51,7 @@ compatible = "sinovoip,bananapi-m64", "allwinner,sun50i-a64"; aliases { + ethernet0 = &emac; serial0 = &uart0; serial1 = &uart1; }; -- cgit From dff751c68904cf587d918cfb6b2f5b0112f73bc9 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 22 Jul 2017 10:28:51 +0800 Subject: arm64: allwinner: a64: pine64: add missing ethernet0 alias The EMAC Ethernet controller was enabled, but an accompanying alias was not added. This results in unstable numbering if other Ethernet devices, such as a USB dongle, are present. Also, the bootloader uses the alias to assign a generated stable MAC address to the device node. Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard Fixes: 970239437493 ("arm64: allwinner: pine64: Enable dwmac-sun8i") [wens@csie.org: Rewrite commit log as fixing a previous patch with Fixes] Signed-off-by: Chen-Yu Tsai --- arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts index 08cda24ea194..827168bc22ed 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts @@ -51,6 +51,7 @@ compatible = "pine64,pine64", "allwinner,sun50i-a64"; aliases { + ethernet0 = &emac; serial0 = &uart0; serial1 = &uart1; serial2 = &uart2; -- cgit From 56a9155074b4d23aa07a98c35c6f107dd50a9367 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 22 Jul 2017 10:28:52 +0800 Subject: arm64: allwinner: a64: sopine: add missing ethernet0 alias The EMAC Ethernet controller was enabled, but an accompanying alias was not added. This results in unstable numbering if other Ethernet devices, such as a USB dongle, are present. Also, the bootloader uses the alias to assign a generated stable MAC address to the device node. Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard Fixes: 96219b004865 ("arm64: allwinner: a64: add device tree for SoPine with baseboard") [wens@csie.org: Rewrite commit log as fixing a previous patch with Fixes] Signed-off-by: Chen-Yu Tsai --- arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index 17eb1cc5bf6b..216e3a5dafae 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -53,6 +53,7 @@ "allwinner,sun50i-a64"; aliases { + ethernet0 = &emac; serial0 = &uart0; }; -- cgit From bfe334924ccd9f4a53f30240c03cf2f43f5b2df1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Aug 2017 19:39:30 +0200 Subject: perf/x86: Fix RDPMC vs. mm_struct tracking Vince reported the following rdpmc() testcase failure: > Failing test case: > > fd=perf_event_open(); > addr=mmap(fd); > exec() // without closing or unmapping the event > fd=perf_event_open(); > addr=mmap(fd); > rdpmc() // GPFs due to rdpmc being disabled The problem is of course that exec() plays tricks with what is current->mm, only destroying the old mappings after having installed the new mm. Fix this confusion by passing along vma->vm_mm instead of relying on current->mm. Reported-by: Vince Weaver Tested-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 1e0fb9ec679c ("perf: Add pmu callbacks to track event mapping and unmapping") Link: http://lkml.kernel.org/r/20170802173930.cstykcqefmqt7jau@hirez.programming.kicks-ass.net [ Minor cleanups. ] Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 16 +++++++--------- include/linux/perf_event.h | 4 ++-- kernel/events/core.c | 6 +++--- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8e3db8f642a7..af12e294caed 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2114,7 +2114,7 @@ static void refresh_pce(void *ignored) load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm)); } -static void x86_pmu_event_mapped(struct perf_event *event) +static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) { if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; @@ -2129,22 +2129,20 @@ static void x86_pmu_event_mapped(struct perf_event *event) * For now, this can't happen because all callers hold mmap_sem * for write. If this changes, we'll need a different solution. */ - lockdep_assert_held_exclusive(¤t->mm->mmap_sem); + lockdep_assert_held_exclusive(&mm->mmap_sem); - if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1) - on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); + if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) + on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); } -static void x86_pmu_event_unmapped(struct perf_event *event) +static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) { - if (!current->mm) - return; if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; - if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed)) - on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); + if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed)) + on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); } static int x86_pmu_event_idx(struct perf_event *event) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a3b873fc59e4..b14095bcf4bb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -310,8 +310,8 @@ struct pmu { * Notification that the event was mapped or unmapped. Called * in the context of the mapping task. */ - void (*event_mapped) (struct perf_event *event); /*optional*/ - void (*event_unmapped) (struct perf_event *event); /*optional*/ + void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /* optional */ + void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /* optional */ /* * Flags for ->add()/->del()/ ->start()/->stop(). There are diff --git a/kernel/events/core.c b/kernel/events/core.c index 426c2ffba16d..a654b8a3586f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5090,7 +5090,7 @@ static void perf_mmap_open(struct vm_area_struct *vma) atomic_inc(&event->rb->aux_mmap_count); if (event->pmu->event_mapped) - event->pmu->event_mapped(event); + event->pmu->event_mapped(event, vma->vm_mm); } static void perf_pmu_output_stop(struct perf_event *event); @@ -5113,7 +5113,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) unsigned long size = perf_data_size(rb); if (event->pmu->event_unmapped) - event->pmu->event_unmapped(event); + event->pmu->event_unmapped(event, vma->vm_mm); /* * rb->aux_mmap_count will always drop before rb->mmap_count and @@ -5411,7 +5411,7 @@ aux_unlock: vma->vm_ops = &perf_mmap_vmops; if (event->pmu->event_mapped) - event->pmu->event_mapped(event); + event->pmu->event_mapped(event, vma->vm_mm); return ret; } -- cgit From 9b231d9f47c6114d317ce28cff92a74ad80547f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Aug 2017 15:42:09 +0200 Subject: perf/core: Fix time on IOC_ENABLE Vince reported that when we do IOC_ENABLE/IOC_DISABLE while the task is SIGSTOP'ed state the timestamps go wobbly. It turns out we indeed fail to correctly account time while in 'OFF' state and doing IOC_ENABLE without getting scheduled in exposes the problem. Further thinking about this problem, it occurred to me that we can suffer a similar fate when we migrate an uncore event between CPUs. The perf_event_install() on the 'new' CPU will do add_event_to_ctx() which will reset all the time stamp, resulting in a subsequent update_event_times() to overwrite the total_time_* fields with smaller values. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/events/core.c | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a654b8a3586f..ee20d4c546b5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2217,6 +2217,33 @@ static int group_can_go_on(struct perf_event *event, return can_add_hw; } +/* + * Complement to update_event_times(). This computes the tstamp_* values to + * continue 'enabled' state from @now, and effectively discards the time + * between the prior tstamp_stopped and now (as we were in the OFF state, or + * just switched (context) time base). + * + * This further assumes '@event->state == INACTIVE' (we just came from OFF) and + * cannot have been scheduled in yet. And going into INACTIVE state means + * '@event->tstamp_stopped = @now'. + * + * Thus given the rules of update_event_times(): + * + * total_time_enabled = tstamp_stopped - tstamp_enabled + * total_time_running = tstamp_stopped - tstamp_running + * + * We can insert 'tstamp_stopped == now' and reverse them to compute new + * tstamp_* values. + */ +static void __perf_event_enable_time(struct perf_event *event, u64 now) +{ + WARN_ON_ONCE(event->state != PERF_EVENT_STATE_INACTIVE); + + event->tstamp_stopped = now; + event->tstamp_enabled = now - event->total_time_enabled; + event->tstamp_running = now - event->total_time_running; +} + static void add_event_to_ctx(struct perf_event *event, struct perf_event_context *ctx) { @@ -2224,9 +2251,12 @@ static void add_event_to_ctx(struct perf_event *event, list_add_event(event, ctx); perf_group_attach(event); - event->tstamp_enabled = tstamp; - event->tstamp_running = tstamp; - event->tstamp_stopped = tstamp; + /* + * We can be called with event->state == STATE_OFF when we create with + * .disabled = 1. In that case the IOC_ENABLE will call this function. + */ + if (event->state == PERF_EVENT_STATE_INACTIVE) + __perf_event_enable_time(event, tstamp); } static void ctx_sched_out(struct perf_event_context *ctx, @@ -2471,10 +2501,11 @@ static void __perf_event_mark_enabled(struct perf_event *event) u64 tstamp = perf_event_time(event); event->state = PERF_EVENT_STATE_INACTIVE; - event->tstamp_enabled = tstamp - event->total_time_enabled; + __perf_event_enable_time(event, tstamp); list_for_each_entry(sub, &event->sibling_list, group_entry) { + /* XXX should not be > INACTIVE if event isn't */ if (sub->state >= PERF_EVENT_STATE_INACTIVE) - sub->tstamp_enabled = tstamp - sub->total_time_enabled; + __perf_event_enable_time(sub, tstamp); } } -- cgit From e93c17301ac55321fc18e0f8316e924e58a83c8c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 7 Aug 2017 19:43:13 -0700 Subject: x86/asm/64: Clear AC on NMI entries This closes a hole in our SMAP implementation. This patch comes from grsecurity. Good catch! Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/314cc9f294e8f14ed85485727556ad4f15bb1659.1502159503.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d271fb79248f..6d078b89a5e8 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1211,6 +1211,8 @@ ENTRY(nmi) * other IST entries. */ + ASM_CLAC + /* Use %rdx as our temp variable throughout */ pushq %rdx -- cgit From d197f7988721221fac64f899efd7657c15281810 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 31 Jul 2017 11:37:28 -0700 Subject: clocksource/drivers/arm_arch_timer: Fix mem frame loop initialization The loop to find the best memory frame in arch_timer_mem_acpi_init() initializes the loop counter with itself ('i = i'), which is suspicious in the first place and pointed out by clang. The loop condition is 'i < timer_count' and a prior for loop exits when 'i' reaches 'timer_count', therefore the second loop is never executed. Initialize the loop counter with 0 to iterate over all timers, which supposedly was the intention before the typo monster attacked. Fixes: c2743a36765d3 ("clocksource: arm_arch_timer: add GTDT support for memory-mapped timer") Signed-off-by: Matthias Kaehlcke Reported-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Daniel Lezcano --- drivers/clocksource/arm_arch_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index aae87c4c546e..72bbfccef113 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -1440,7 +1440,7 @@ static int __init arch_timer_mem_acpi_init(int platform_timer_count) * While unlikely, it's theoretically possible that none of the frames * in a timer expose the combination of feature we want. */ - for (i = i; i < timer_count; i++) { + for (i = 0; i < timer_count; i++) { timer = &timers[i]; frame = arch_timer_mem_find_best_frame(timer); -- cgit From 5c23a558a65406cac472df07fd26a2688a42cad2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 30 Jun 2017 01:14:45 -0500 Subject: clocksource/drivers/em_sti: Fix error return codes in em_sti_probe() Propagate the return values of platform_get_irq and devm_request_irq on failure. Cc: Frans Klaver Signed-off-by: Gustavo A. R. Silva Signed-off-by: Daniel Lezcano --- drivers/clocksource/em_sti.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c index bc48cbf6a795..269db74a0658 100644 --- a/drivers/clocksource/em_sti.c +++ b/drivers/clocksource/em_sti.c @@ -305,7 +305,7 @@ static int em_sti_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { dev_err(&pdev->dev, "failed to get irq\n"); - return -EINVAL; + return irq; } /* map memory, let base point to the STI instance */ @@ -314,11 +314,12 @@ static int em_sti_probe(struct platform_device *pdev) if (IS_ERR(p->base)) return PTR_ERR(p->base); - if (devm_request_irq(&pdev->dev, irq, em_sti_interrupt, - IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, - dev_name(&pdev->dev), p)) { + ret = devm_request_irq(&pdev->dev, irq, em_sti_interrupt, + IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, + dev_name(&pdev->dev), p); + if (ret) { dev_err(&pdev->dev, "failed to request low IRQ\n"); - return -ENOENT; + return ret; } /* get hold of clock */ -- cgit From 10e66760fa8ee11f254a69433fc132d04758a5fc Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 3 Aug 2017 12:58:18 +0200 Subject: x86/smpboot: Unbreak CPU0 hotplug A hang on CPU0 onlining after a preceding offlining is observed. Trace shows that CPU0 is stuck in check_tsc_sync_target() waiting for source CPU to run check_tsc_sync_source() but this never happens. Source CPU, in its turn, is stuck on synchronize_sched() which is called from native_cpu_up() -> do_boot_cpu() -> unregister_nmi_handler(). So it's a classic ABBA deadlock, due to the use of synchronize_sched() in unregister_nmi_handler(). Fix the bug by moving unregister_nmi_handler() from do_boot_cpu() to native_cpu_up() after cpu onlining is done. Signed-off-by: Vitaly Kuznetsov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170803105818.9934-1-vkuznets@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b474c8de7fba..54b9e89d4d6b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -971,7 +971,8 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) * Returns zero if CPU booted OK, else error code from * ->wakeup_secondary_cpu. */ -static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) +static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, + int *cpu0_nmi_registered) { volatile u32 *trampoline_status = (volatile u32 *) __va(real_mode_header->trampoline_status); @@ -979,7 +980,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) unsigned long start_ip = real_mode_header->trampoline_start; unsigned long boot_error = 0; - int cpu0_nmi_registered = 0; unsigned long timeout; idle->thread.sp = (unsigned long)task_pt_regs(idle); @@ -1035,7 +1035,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); else boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, - &cpu0_nmi_registered); + cpu0_nmi_registered); if (!boot_error) { /* @@ -1080,12 +1080,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) */ smpboot_restore_warm_reset_vector(); } - /* - * Clean up the nmi handler. Do this after the callin and callout sync - * to avoid impact of possible long unregister time. - */ - if (cpu0_nmi_registered) - unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); return boot_error; } @@ -1093,8 +1087,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) int native_cpu_up(unsigned int cpu, struct task_struct *tidle) { int apicid = apic->cpu_present_to_apicid(cpu); + int cpu0_nmi_registered = 0; unsigned long flags; - int err; + int err, ret = 0; WARN_ON(irqs_disabled()); @@ -1131,10 +1126,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) common_cpu_up(cpu, tidle); - err = do_boot_cpu(apicid, cpu, tidle); + err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered); if (err) { pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); - return -EIO; + ret = -EIO; + goto unreg_nmi; } /* @@ -1150,7 +1146,15 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) touch_nmi_watchdog(); } - return 0; +unreg_nmi: + /* + * Clean up the nmi handler. Do this after the callin and callout sync + * to avoid impact of possible long unregister time. + */ + if (cpu0_nmi_registered) + unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); + + return ret; } /** -- cgit From fdf6e7a8c96ebe115b6460768c82dd136ecbd8db Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Wed, 26 Jul 2017 18:15:49 +0800 Subject: irqchip/gic-v3-its: Allow GIC ITS number more than MAX_NUMNODES When enabling ITS NUMA support on D05, I got the boot log: [ 0.000000] SRAT: PXM 0 -> ITS 0 -> Node 0 [ 0.000000] SRAT: PXM 0 -> ITS 1 -> Node 0 [ 0.000000] SRAT: PXM 0 -> ITS 2 -> Node 0 [ 0.000000] SRAT: PXM 1 -> ITS 3 -> Node 1 [ 0.000000] SRAT: ITS affinity exceeding max count[4] This is wrong on D05 as we have 8 ITSs with 4 NUMA nodes. So dynamically alloc the memory needed instead of using its_srat_maps[MAX_NUMNODES], which count the number of ITS entry(ies) in SRAT and alloc its_srat_maps as needed, then build the mapping of numa node to ITS ID. Of course, its_srat_maps will be freed after ITS probing because we don't need that after boot. After doing this, I got what I wanted: [ 0.000000] SRAT: PXM 0 -> ITS 0 -> Node 0 [ 0.000000] SRAT: PXM 0 -> ITS 1 -> Node 0 [ 0.000000] SRAT: PXM 0 -> ITS 2 -> Node 0 [ 0.000000] SRAT: PXM 1 -> ITS 3 -> Node 1 [ 0.000000] SRAT: PXM 2 -> ITS 4 -> Node 2 [ 0.000000] SRAT: PXM 2 -> ITS 5 -> Node 2 [ 0.000000] SRAT: PXM 2 -> ITS 6 -> Node 2 [ 0.000000] SRAT: PXM 3 -> ITS 7 -> Node 3 Fixes: dbd2b8267233 ("irqchip/gic-v3-its: Add ACPI NUMA node mapping") Signed-off-by: Hanjun Guo Reviewed-by: Lorenzo Pieralisi Cc: Ganapatrao Kulkarni Cc: John Garry Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 3bfbf8d96a0e..5fd5f62c925d 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1843,7 +1843,7 @@ struct its_srat_map { u32 its_id; }; -static struct its_srat_map its_srat_maps[MAX_NUMNODES] __initdata; +static struct its_srat_map *its_srat_maps __initdata; static int its_in_srat __initdata; static int __init acpi_get_its_numa_node(u32 its_id) @@ -1857,6 +1857,12 @@ static int __init acpi_get_its_numa_node(u32 its_id) return NUMA_NO_NODE; } +static int __init gic_acpi_match_srat_its(struct acpi_subtable_header *header, + const unsigned long end) +{ + return 0; +} + static int __init gic_acpi_parse_srat_its(struct acpi_subtable_header *header, const unsigned long end) { @@ -1873,12 +1879,6 @@ static int __init gic_acpi_parse_srat_its(struct acpi_subtable_header *header, return -EINVAL; } - if (its_in_srat >= MAX_NUMNODES) { - pr_err("SRAT: ITS affinity exceeding max count[%d]\n", - MAX_NUMNODES); - return -EINVAL; - } - node = acpi_map_pxm_to_node(its_affinity->proximity_domain); if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) { @@ -1897,14 +1897,37 @@ static int __init gic_acpi_parse_srat_its(struct acpi_subtable_header *header, static void __init acpi_table_parse_srat_its(void) { + int count; + + count = acpi_table_parse_entries(ACPI_SIG_SRAT, + sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_GIC_ITS_AFFINITY, + gic_acpi_match_srat_its, 0); + if (count <= 0) + return; + + its_srat_maps = kmalloc(count * sizeof(struct its_srat_map), + GFP_KERNEL); + if (!its_srat_maps) { + pr_warn("SRAT: Failed to allocate memory for its_srat_maps!\n"); + return; + } + acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), ACPI_SRAT_TYPE_GIC_ITS_AFFINITY, gic_acpi_parse_srat_its, 0); } + +/* free the its_srat_maps after ITS probing */ +static void __init acpi_its_srat_maps_free(void) +{ + kfree(its_srat_maps); +} #else static void __init acpi_table_parse_srat_its(void) { } static int __init acpi_get_its_numa_node(u32 its_id) { return NUMA_NO_NODE; } +static void __init acpi_its_srat_maps_free(void) { } #endif static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header, @@ -1951,6 +1974,7 @@ static void __init its_acpi_probe(void) acpi_table_parse_srat_its(); acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR, gic_acpi_parse_madt_its, 0); + acpi_its_srat_maps_free(); } #else static void __init its_acpi_probe(void) { } -- cgit From a008873740a3d44946c7e72e456f15146cfd7287 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 10 Aug 2017 15:41:17 +0100 Subject: irqchip/gic-v3-its-platform-msi: Fix msi-parent parsing loop While parsing the msi-parent property to chase up the IRQ domain a given device belongs to, the index into the msi-parent tuple should be incremented to ensure all properties entries are taken into account. Current code missed the index update so the parsing loop does not work in case multiple msi-parent phandles are present and may turn into an infinite loop in of_pmsi_get_dev_id() if phandle at index 0 does not correspond to the domain we are actually looking-up. Fix the code by updating the phandle index at each iteration in of_pmsi_get_dev_id(). Fixes: deac7fc1c87f ("irqchip/gic-v3-its: Parse new version of msi-parent property") Signed-off-by: Lorenzo Pieralisi Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its-platform-msi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c b/drivers/irqchip/irq-gic-v3-its-platform-msi.c index 249240d9a425..833a90fe33ae 100644 --- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c @@ -43,6 +43,7 @@ static int of_pmsi_get_dev_id(struct irq_domain *domain, struct device *dev, *dev_id = args.args[0]; break; } + index++; } while (!ret); return ret; -- cgit From 9157259d16a8ee8116a98d32f29b797689327e8d Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Thu, 3 Aug 2017 09:17:21 -0400 Subject: mm: add pmd_t initializer __pmd() to work around a GCC bug. THP migration is added but only supports x86_64 at the moment. For all other architectures, swp_entry_to_pmd() only returns a zero pmd_t. Due to a GCC zero initializer bug #53119, the standard (pmd_t){0} initializer is not accepted by all GCC versions. __pmd() is a feasible workaround. In addition, sparc32's pmd_t is an array instead of a single value, so we need (pmd_t){ {0}, } instead of (pmd_t){0}. Thus, a different __pmd() definition is needed in sparc32. Signed-off-by: Zi Yan Signed-off-by: David S. Miller --- arch/sparc/include/asm/page_32.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h index 0efd0583a8c9..6249214148c2 100644 --- a/arch/sparc/include/asm/page_32.h +++ b/arch/sparc/include/asm/page_32.h @@ -68,6 +68,7 @@ typedef struct { unsigned long iopgprot; } iopgprot_t; #define iopgprot_val(x) ((x).iopgprot) #define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { { (x) }, }) #define __iopte(x) ((iopte_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __ctxd(x) ((ctxd_t) { (x) } ) @@ -95,6 +96,7 @@ typedef unsigned long iopgprot_t; #define iopgprot_val(x) (x) #define __pte(x) (x) +#define __pmd(x) ((pmd_t) { { (x) }, }) #define __iopte(x) (x) #define __pgd(x) (x) #define __ctxd(x) (x) -- cgit From c587c79f90632df59c61383c6abebb2e07a81911 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Tue, 8 Aug 2017 14:05:12 -0700 Subject: cpufreq: intel_pstate: report correct CPU frequencies during trace The intel_pstate CPU frequency scaling driver has always calculated CPU frequency incorrectly. Recent changes have eliminted most of the issues, however the frequency reported in the trace buffer, if used, is incorrect. It remains desireable that cpu->pstate.scaling still be a nice round number for things such as when setting max and min frequencies. So the proposal is to just fix the reported frequency in the trace data. Fixes what remains of [1]. Link: https://bugzilla.kernel.org/show_bug.cgi?id=96521 # [1] Signed-off-by: Doug Smythies Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 0566455f233e..65ee4fcace1f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1613,8 +1613,7 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) static inline int32_t get_avg_frequency(struct cpudata *cpu) { - return mul_ext_fp(cpu->sample.core_avg_perf, - cpu->pstate.max_pstate_physical * cpu->pstate.scaling); + return mul_ext_fp(cpu->sample.core_avg_perf, cpu_khz); } static inline int32_t get_avg_pstate(struct cpudata *cpu) -- cgit From 8e2f3bce05e056575c2c84a344a8291fdabb5f21 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Tue, 8 Aug 2017 14:12:49 -0700 Subject: cpufreq: x86: Disable interrupts during MSRs reading According to Intel 64 and IA-32 Architectures SDM, Volume 3, Chapter 14.2, "Software needs to exercise care to avoid delays between the two RDMSRs (for example interrupts)". So, disable interrupts during reading MSRs IA32_APERF and IA32_MPERF. See also: commit 4ab60c3f32c7 (cpufreq: intel_pstate: Disable interrupts during MSRs reading). Signed-off-by: Doug Smythies Reviewed-by: Len Brown Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/cpu/aperfmperf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index 7cf7c70b6ef2..0ee83321a313 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -40,13 +40,16 @@ static void aperfmperf_snapshot_khz(void *dummy) struct aperfmperf_sample *s = this_cpu_ptr(&samples); ktime_t now = ktime_get(); s64 time_delta = ktime_ms_delta(now, s->time); + unsigned long flags; /* Don't bother re-computing within the cache threshold time. */ if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS) return; + local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); + local_irq_restore(flags); aperf_delta = aperf - s->aperf; mperf_delta = mperf - s->mperf; -- cgit From a8ec3ee861b6e4e6b82a98777c65510ae63766c1 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 10 Aug 2017 18:07:36 +0300 Subject: arc: Mask individual IRQ lines during core INTC init ARC cores on reset have all interrupt lines of built-in INTC enabled. Which means once we globally enable interrupts (very early on boot) faulty hardware blocks may trigger an interrupt that Linux kernel cannot handle yet as corresponding handler is not yet installed. In that case system falls in "interrupt storm" and basically never does anything useful except entering and exiting generic IRQ handling code. One real example of that kind of problematic hardware is DW GMAC which also has interrupts enabled on reset and if Ethernet PHY informs GMAC about link state, GMAC immediately reports that upstream to ARC core and here we are. Now with that change we mask all individual IRQ lines making entire system more fool-proof. [This patch was motivated by Adaptrum platform support] Signed-off-by: Alexey Brodkin Cc: Eugeniy Paltsev Tested-by: Alexandru Gagniuc Signed-off-by: Vineet Gupta --- arch/arc/kernel/intc-arcv2.c | 3 +++ arch/arc/kernel/intc-compact.c | 14 +++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index f928795fd07a..cf90714a676d 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -75,10 +75,13 @@ void arc_init_IRQ(void) * Set a default priority for all available interrupts to prevent * switching of register banks if Fast IRQ and multiple register banks * are supported by CPU. + * Also disable all IRQ lines so faulty external hardware won't + * trigger interrupt that kernel is not ready to handle. */ for (i = NR_EXCEPTIONS; i < irq_bcr.irqs + NR_EXCEPTIONS; i++) { write_aux_reg(AUX_IRQ_SELECT, i); write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO); + write_aux_reg(AUX_IRQ_ENABLE, 0); } /* setup status32, don't enable intr yet as kernel doesn't want */ diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index 7e608c6b0a01..cef388025adf 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -27,7 +27,7 @@ */ void arc_init_IRQ(void) { - int level_mask = 0; + int level_mask = 0, i; /* Is timer high priority Interrupt (Level2 in ARCompact jargon) */ level_mask |= IS_ENABLED(CONFIG_ARC_COMPACT_IRQ_LEVELS) << TIMER0_IRQ; @@ -40,6 +40,18 @@ void arc_init_IRQ(void) if (level_mask) pr_info("Level-2 interrupts bitset %x\n", level_mask); + + /* + * Disable all IRQ lines so faulty external hardware won't + * trigger interrupt that kernel is not ready to handle. + */ + for (i = TIMER0_IRQ; i < NR_CPU_IRQS; i++) { + unsigned int ienb; + + ienb = read_aux_reg(AUX_IENABLE); + ienb &= ~(1 << i); + write_aux_reg(AUX_IENABLE, ienb); + } } /* -- cgit From 4d3a869333b74352c372077f316756d38cae09b1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 11 Aug 2017 09:51:41 +0200 Subject: ALSA: seq: Fix CONFIG_SND_SEQ_MIDI dependency The commit 0181307abc1d ("ALSA: seq: Reorganize kconfig and build") rewrote the dependency of each sequencer module in a standard way, but there was one change applied mistakenly: CONFIG_SND_SEQ_MIDI isn't enabled properly by CONFIG_SND_RAWMIDI. I seem to have changed the wrong one instead, CONFIG_SND_SEQ_MIDI_EMUL, which is eventually reverse-selected by CONFIG_SND_SEQ_MIDI itself. This ended up the lack of snd-seq-midi module as reported below. The fix is to put def_tristate properly to CONFIG_SND_SEQ_MIDI instead of *_MIDI_EMUL entry. Fixes: 0181307abc1d ("ALSA: seq: Reorganize kconfig and build") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196633 Signed-off-by: Takashi Iwai --- sound/core/seq/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/Kconfig b/sound/core/seq/Kconfig index a536760a94c2..45c1336c6597 100644 --- a/sound/core/seq/Kconfig +++ b/sound/core/seq/Kconfig @@ -47,10 +47,10 @@ config SND_SEQ_HRTIMER_DEFAULT timer. config SND_SEQ_MIDI_EVENT - def_tristate SND_RAWMIDI + tristate config SND_SEQ_MIDI - tristate + def_tristate SND_RAWMIDI select SND_SEQ_MIDI_EVENT config SND_SEQ_MIDI_EMUL -- cgit From 2406b296a3a80ba1c78707f205556f2388d474ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 21 Jul 2017 22:56:19 +0200 Subject: gpu: ipu-v3: add DRM dependency The new PRE/PRG driver code causes a link failure when IPUv3 is built-in, but DRM is built as a module: drivers/gpu/ipu-v3/ipu-pre.o: In function `ipu_pre_configure': ipu-pre.c:(.text.ipu_pre_configure+0x18): undefined reference to `drm_format_info' drivers/gpu/ipu-v3/ipu-prg.o: In function `ipu_prg_format_supported': ipu-prg.c:(.text.ipu_prg_format_supported+0x8): undefined reference to `drm_format_info' Adding a Kconfig dependency on DRM means we don't run into this problem any more. If DRM is disabled altogether, the IPUv3 driver is built without PRE/PRG support. Fixes: ea9c260514c1 ("gpu: ipu-v3: add driver for Prefetch Resolve Gasket") Link: https://patchwork.kernel.org/patch/9636665/ Signed-off-by: Arnd Bergmann [p.zabel@pengutronix.de: changed the dependency from DRM to DRM || !DRM, since the link failure only happens when DRM=m and IPUV3_CORE=y. Modified the commit message to reflect this.] Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/ipu-v3/Kconfig b/drivers/gpu/ipu-v3/Kconfig index 08766c6e7856..87a20b3dcf7a 100644 --- a/drivers/gpu/ipu-v3/Kconfig +++ b/drivers/gpu/ipu-v3/Kconfig @@ -1,6 +1,7 @@ config IMX_IPUV3_CORE tristate "IPUv3 core support" depends on SOC_IMX5 || SOC_IMX6Q || ARCH_MULTIPLATFORM + depends on DRM || !DRM # if DRM=m, this can't be 'y' select GENERIC_IRQ_CHIP help Choose this if you have a i.MX5/6 system and want to use the Image -- cgit From 5be5dd38d4628fdbff7359f235f7cdf0cf9655f1 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 5 Aug 2016 11:55:18 +0200 Subject: drm/imx: ipuv3-plane: fix YUV framebuffer scanout on the base plane Historically, only RGB framebuffers could be assigned to the primary plane. This changed with universal plane support. Since no colorspace conversion was set up for the IPUv3 full plane, assigning YUV frame buffers to the primary plane caused incorrect output. Fix this by enabling color space conversion also for the primary plane. Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index 6276bb834b4f..d3845989a29d 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -545,15 +545,13 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, return; } + ics = ipu_drm_fourcc_to_colorspace(fb->format->format); switch (ipu_plane->dp_flow) { case IPU_DP_FLOW_SYNC_BG: - ipu_dp_setup_channel(ipu_plane->dp, - IPUV3_COLORSPACE_RGB, - IPUV3_COLORSPACE_RGB); + ipu_dp_setup_channel(ipu_plane->dp, ics, IPUV3_COLORSPACE_RGB); ipu_dp_set_global_alpha(ipu_plane->dp, true, 0, true); break; case IPU_DP_FLOW_SYNC_FG: - ics = ipu_drm_fourcc_to_colorspace(state->fb->format->format); ipu_dp_setup_channel(ipu_plane->dp, ics, IPUV3_COLORSPACE_UNKNOWN); /* Enable local alpha on partial plane */ -- cgit From 9e80dbd87286d3252ac2f78c6465c16e2ec8d476 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Jul 2017 10:22:25 +0300 Subject: clocksource/drivers/timer-of: Checking for IS_ERR() instead of NULL The current code checks the return value of the of_io_request_and_map() function as it was returning a NULL pointer in case of error. However, it returns an error code encoded in the pointer return value, not a NULL value. Fix this by checking the returned pointer against IS_ERR() and return the error with PTR_ERR(). Signed-off-by: Dan Carpenter Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index d509b500a7b5..4d7aef9d9c15 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -128,9 +128,9 @@ static __init int timer_base_init(struct device_node *np, const char *name = of_base->name ? of_base->name : np->full_name; of_base->base = of_io_request_and_map(np, of_base->index, name); - if (!of_base->base) { + if (IS_ERR(of_base->base)) { pr_err("Failed to iomap (%s)\n", name); - return -ENXIO; + return PTR_ERR(of_base->base); } return 0; -- cgit From 599dc457c79bde8bd4fe8bbb2ba1f30ef3d7a5c8 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 18 Jul 2017 09:25:39 +0100 Subject: clocksource/drivers/Kconfig: Fix CLKSRC_PISTACHIO dependencies In v4.13, CLKSRC_PISTACHIO can select TIMER_OF on architectures without GENERIC_CLOCKEVENTS, resulting in a struct clock_event_device missing some required features and build breakage compiling timer_of.c. One of the symbols selecting TIMER_OF is CLKSRC_PISTACHIO, so add the dependency on GENERIC_CLOCKEVENTS. Thanks to kbuild test robot for finding this error (https://lkml.org/lkml/2017/7/16/249) Signed-off-by: Matt Redfearn Suggested-by: Ian Abbott Signed-off-by: Daniel Lezcano --- drivers/clocksource/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index fcae5ca6ac92..54a67f8a28eb 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -262,7 +262,7 @@ config CLKSRC_LPC32XX config CLKSRC_PISTACHIO bool "Clocksource for Pistachio SoC" if COMPILE_TEST - depends on HAS_IOMEM + depends on GENERIC_CLOCKEVENTS && HAS_IOMEM select TIMER_OF help Enables the clocksource for the Pistachio SoC. -- cgit From 5442c26995527245c94c4a49e535eae8a60a5299 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 1 Aug 2017 20:55:52 +0200 Subject: x86/cpufeature, kvm/svm: Rename (shorten) the new "virtualized VMSAVE/VMLOAD" CPUID flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "virtual_vmload_vmsave" is what is going to land in /proc/cpuinfo now as per v4.13-rc4, for a single feature bit which is clearly too long. So rename it to what it is called in the processor manual. "v_vmsave_vmload" is a bit shorter, after all. We could go more aggressively here but having it the same as in the processor manual is advantageous. Signed-off-by: Borislav Petkov Acked-by: Radim Krčmář Cc: Janakarajan Natarajan Cc: Jörg Rödel Cc: Linus Torvalds Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kvm-ML Link: http://lkml.kernel.org/r/20170801185552.GA3743@nazgul.tnic Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kvm/svm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ca3c48c0872f..5a28e8e55e36 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -286,7 +286,7 @@ #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1107626938cc..56ba05312759 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1100,7 +1100,7 @@ static __init int svm_hardware_setup(void) if (vls) { if (!npt_enabled || - !boot_cpu_has(X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE) || + !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) || !IS_ENABLED(CONFIG_X86_64)) { vls = false; } else { -- cgit From b45e4c45b13275a6b4a3f83ae8301a1963fbe5d0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 10 Aug 2017 16:57:09 +0100 Subject: x86: Mark various structures and functions as 'static' Mark a couple of structures and functions as 'static', pointed out by Sparse: warning: symbol 'bts_pmu' was not declared. Should it be static? warning: symbol 'p4_event_aliases' was not declared. Should it be static? warning: symbol 'rapl_attr_groups' was not declared. Should it be static? symbol 'process_uv2_message' was not declared. Should it be static? Signed-off-by: Colin Ian King Acked-by: Andrew Banman # for the UV change Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Will Deacon Cc: kernel-janitors@vger.kernel.org Link: http://lkml.kernel.org/r/20170810155709.7094-1-colin.king@canonical.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 2 +- arch/x86/events/intel/p4.c | 2 +- arch/x86/events/intel/rapl.c | 2 +- arch/x86/platform/uv/tlb_uv.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 8ae8c5ce3a1f..ddd8d3516bfc 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -69,7 +69,7 @@ struct bts_buffer { struct bts_phys buf[0]; }; -struct pmu bts_pmu; +static struct pmu bts_pmu; static size_t buf_size(struct page *page) { diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index eb0533558c2b..d32c0eed38ca 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -587,7 +587,7 @@ static __initconst const u64 p4_hw_cache_event_ids * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are * either up to date automatically or not applicable at all. */ -struct p4_event_alias { +static struct p4_event_alias { u64 original; u64 alternative; } p4_event_aliases[] = { diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index a45e2114a846..8e2457cb6b4a 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -559,7 +559,7 @@ static struct attribute_group rapl_pmu_format_group = { .attrs = rapl_formats_attr, }; -const struct attribute_group *rapl_attr_groups[] = { +static const struct attribute_group *rapl_attr_groups[] = { &rapl_pmu_attr_group, &rapl_pmu_format_group, &rapl_pmu_events_group, diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 3e4bdb442fbc..f44c0bc95aa2 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -26,7 +26,7 @@ static struct bau_operations ops __ro_after_init; /* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ -static int timeout_base_ns[] = { +static const int timeout_base_ns[] = { 20, 160, 1280, @@ -1216,7 +1216,7 @@ static struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, * set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register. * Such a message must be ignored. */ -void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) +static void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) { unsigned long mmr_image; unsigned char swack_vec; -- cgit From adb4f11e0a8f4e29900adb2b7af28b6bbd5c1fa4 Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Thu, 10 Aug 2017 10:52:45 +0800 Subject: clocksource/drivers/arm_arch_timer: Avoid infinite recursion when ftrace is enabled On platforms with an arch timer erratum workaround, it's possible for arch_timer_reg_read_stable() to recurse into itself when certain tracing options are enabled, leading to stack overflows and related problems. For example, when PREEMPT_TRACER and FUNCTION_GRAPH_TRACER are selected, it's possible to trigger this with: $ mount -t debugfs nodev /sys/kernel/debug/ $ echo function_graph > /sys/kernel/debug/tracing/current_tracer The problem is that in such cases, preempt_disable() instrumentation attempts to acquire a timestamp via trace_clock(), resulting in a call back to arch_timer_reg_read_stable(), and hence recursion. This patch changes arch_timer_reg_read_stable() to use preempt_{disable,enable}_notrace(), which avoids this. This problem is similar to the fixed by upstream commit 96b3d28bf4 ("sched/clock: Prevent tracing recursion in sched_clock_cpu()"). Fixes: 6acc71ccac71 ("arm64: arch_timer: Allows a CPU-specific erratum to only affect a subset of CPUs") Signed-off-by: Ding Tianhong Acked-by: Mark Rutland Acked-by: Marc Zyngier Signed-off-by: Daniel Lezcano --- arch/arm64/include/asm/arch_timer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 74d08e44a651..a652ce0a5cb2 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -65,13 +65,13 @@ DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *, u64 _val; \ if (needs_unstable_timer_counter_workaround()) { \ const struct arch_timer_erratum_workaround *wa; \ - preempt_disable(); \ + preempt_disable_notrace(); \ wa = __this_cpu_read(timer_unstable_counter_workaround); \ if (wa && wa->read_##reg) \ _val = wa->read_##reg(); \ else \ _val = read_sysreg(reg); \ - preempt_enable(); \ + preempt_enable_notrace(); \ } else { \ _val = read_sysreg(reg); \ } \ -- cgit From 622b2fbe625bc255faa4ee69a0fbcab80d3e40e6 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 9 Aug 2017 15:59:10 -0600 Subject: selftests: timers: freq-step: fix compile error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix compile error due to ksft_exit_skip() update to take var_args. freq-step.c: In function ‘init_test’: freq-step.c:234:3: error: too few arguments to function ‘ksft_exit_skip’ ksft_exit_skip(); ^~~~~~~~~~~~~~ In file included from freq-step.c:26:0: ../kselftest.h:167:19: note: declared here static inline int ksft_exit_skip(const char *msg, ...) ^~~~~~~~~~~~~~ : recipe for target 'freq-step' failed Signed-off-by: Shuah Khan --- tools/testing/selftests/timers/freq-step.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/timers/freq-step.c b/tools/testing/selftests/timers/freq-step.c index e8c61830825a..22312eb4c941 100644 --- a/tools/testing/selftests/timers/freq-step.c +++ b/tools/testing/selftests/timers/freq-step.c @@ -229,10 +229,9 @@ static void init_test(void) printf("CLOCK_MONOTONIC_RAW+CLOCK_MONOTONIC precision: %.0f ns\t\t", 1e9 * precision); - if (precision > MAX_PRECISION) { - printf("[SKIP]\n"); - ksft_exit_skip(); - } + if (precision > MAX_PRECISION) + ksft_exit_skip("precision: %.0f ns > MAX_PRECISION: %.0f ns\n", + 1e9 * precision, 1e9 * MAX_PRECISION); printf("[OK]\n"); srand(ts.tv_sec ^ ts.tv_nsec); -- cgit From e71cb9e00922902ba0519f37d09145f117dc02b3 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 9 Aug 2017 16:46:09 -0400 Subject: net: dsa: ksz: fix skb freeing The DSA layer frees the original skb when an xmit function returns NULL, meaning an error occurred. But if the tagging code copied the original skb, it is responsible of freeing the copy if an error occurs. The ksz tagging code currently has two issues: if skb_put_padto fails, the skb copy is not freed, and the original skb will be freed twice. To fix that, move skb_put_padto inside both branches of the skb_tailroom condition, before freeing the original skb, and free the copy on error. Signed-off-by: Vivien Didelot Reviewed-by: Woojung Huh Signed-off-by: David S. Miller --- net/dsa/tag_ksz.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index fab41de8e983..de66ca8e6201 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -42,6 +42,9 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len; if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) { + if (skb_put_padto(skb, skb->len + padlen)) + return NULL; + nskb = skb; } else { nskb = alloc_skb(NET_IP_ALIGN + skb->len + @@ -56,13 +59,15 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head); skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); + + if (skb_put_padto(nskb, nskb->len + padlen)) { + kfree_skb(nskb); + return NULL; + } + kfree_skb(skb); } - /* skb is freed when it fails */ - if (skb_put_padto(nskb, nskb->len + padlen)) - return NULL; - tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); tag[0] = 0; tag[1] = 1 << p->dp->index; /* destination port */ -- cgit From ad729bc9acfb7c47112964b4877ef5404578ed13 Mon Sep 17 00:00:00 2001 From: Andreas Born Date: Thu, 10 Aug 2017 06:41:44 +0200 Subject: bonding: require speed/duplex only for 802.3ad, alb and tlb The patch c4adfc822bf5 ("bonding: make speed, duplex setting consistent with link state") puts the link state to down if bond_update_speed_duplex() cannot retrieve speed and duplex settings. Assumably the patch was written with 802.3ad mode in mind which relies on link speed/duplex settings. For other modes like active-backup these settings are not required. Thus, only for these other modes, this patch reintroduces support for slaves that do not support reporting speed or duplex such as wireless devices. This fixes the regression reported in bug 196547 (https://bugzilla.kernel.org/show_bug.cgi?id=196547). Fixes: c4adfc822bf5 ("bonding: make speed, duplex setting consistent with link state") Signed-off-by: Andreas Born Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 ++++-- include/net/bonding.h | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9bee6c1c70cc..85bb272d2a34 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1569,7 +1569,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) new_slave->delay = 0; new_slave->link_failure_count = 0; - if (bond_update_speed_duplex(new_slave)) + if (bond_update_speed_duplex(new_slave) && + bond_needs_speed_duplex(bond)) new_slave->link = BOND_LINK_DOWN; new_slave->last_rx = jiffies - @@ -2140,7 +2141,8 @@ static void bond_miimon_commit(struct bonding *bond) continue; case BOND_LINK_UP: - if (bond_update_speed_duplex(slave)) { + if (bond_update_speed_duplex(slave) && + bond_needs_speed_duplex(bond)) { slave->link = BOND_LINK_DOWN; netdev_warn(bond->dev, "failed to get link speed/duplex for %s\n", diff --git a/include/net/bonding.h b/include/net/bonding.h index b00508d22e0a..b2e68657a216 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -277,6 +277,11 @@ static inline bool bond_is_lb(const struct bonding *bond) BOND_MODE(bond) == BOND_MODE_ALB; } +static inline bool bond_needs_speed_duplex(const struct bonding *bond) +{ + return BOND_MODE(bond) == BOND_MODE_8023AD || bond_is_lb(bond); +} + static inline bool bond_is_nondyn_tlb(const struct bonding *bond) { return (BOND_MODE(bond) == BOND_MODE_TLB) && -- cgit From 8d55373875052b891ae72c9bcaf9c2d7178676c0 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 10 Aug 2017 12:31:40 +0300 Subject: net/sched/hfsc: allocate tcf block for hfsc root class Without this filters cannot be attached. Signed-off-by: Konstantin Khlebnikov Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure") Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b52f74610dc7..3ad02bbe6903 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1428,6 +1428,10 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) return err; q->eligible = RB_ROOT; + err = tcf_block_get(&q->root.block, &q->root.filter_list); + if (err) + goto err_tcf; + q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; q->root.sched = q; @@ -1447,6 +1451,10 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); return 0; + +err_tcf: + qdisc_class_hash_destroy(&q->clhash); + return err; } static int -- cgit From fbca164776e438b639af592c522b8b0506b54dcc Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Thu, 10 Aug 2017 16:56:05 +0200 Subject: net: stmmac: Use the right logging function in stmmac_mdio_register Currently, the function stmmac_mdio_register() is only used by stmmac_dvr_probe() from stmmac_main.c, in order to register the MDIO bus and probe information about the PHY. As this function is called before calling register_netdev(), all messages logged from stmmac_mdio_register are prefixed by "(unnamed net_device)". The goal of netdev_info or netdev_err is to dump useful infos about a net_device, when this data structure is partially initialized, there is no point for using these functions. This commit fixes the issue by replacing all netdev_*() by the corresponding dev_*() function for logging. The last netdev_info is replaced by phy_attached_info(), as a valid phydev can be used at this point. Signed-off-by: Romain Perier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index db157a47000c..72ec711fcba2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -204,6 +204,7 @@ int stmmac_mdio_register(struct net_device *ndev) struct stmmac_priv *priv = netdev_priv(ndev); struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data; struct device_node *mdio_node = priv->plat->mdio_node; + struct device *dev = ndev->dev.parent; int addr, found; if (!mdio_bus_data) @@ -237,7 +238,7 @@ int stmmac_mdio_register(struct net_device *ndev) else err = mdiobus_register(new_bus); if (err != 0) { - netdev_err(ndev, "Cannot register the MDIO bus\n"); + dev_err(dev, "Cannot register the MDIO bus\n"); goto bus_register_fail; } @@ -285,14 +286,12 @@ int stmmac_mdio_register(struct net_device *ndev) irq_str = irq_num; break; } - netdev_info(ndev, "PHY ID %08x at %d IRQ %s (%s)%s\n", - phydev->phy_id, addr, irq_str, phydev_name(phydev), - act ? " active" : ""); + phy_attached_info(phydev); found = 1; } if (!found && !mdio_node) { - netdev_warn(ndev, "No PHY found\n"); + dev_warn(dev, "No PHY found\n"); mdiobus_unregister(new_bus); mdiobus_free(new_bus); return -ENODEV; -- cgit From bb3afda4fc4ea690ff92a36eef4c0afe4d19da04 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 11 Aug 2017 10:18:20 +0200 Subject: nfp: do not update MTU from BH in flower app The Flower app may receive a request to update the MTU of a representor netdev upon receipt of a control message from the firmware. This requires the RTNL lock which needs to be taken outside of the packet processing path. As a handling of this correctly seems a little to invasive for a fix simply skip setting the MTU for now. Relevant backtrace: [ 1496.288489] BUG: scheduling while atomic: kworker/0:3/373/0x00000100 [ 1496.294911] dca syscopyarea sysfillrect sysimgblt fb_sys_fops ptp drm mxm_wmi ahci pps_core libahci i2c_algo_bit wmi [last unloaded: nfp] [ 1496.294918] CPU: 0 PID: 373 Comm: kworker/0:3 Tainted: G OE 4.13.0-rc3+ #3 [ 1496.294919] Hardware name: Supermicro X10DRi/X10DRi, BIOS 2.0 12/28/2015 [ 1496.294923] Workqueue: events work_for_cpu_fn [ 1496.294924] Call Trace: [ 1496.294927] [ 1496.294931] dump_stack+0x63/0x82 [ 1496.294935] __schedule_bug+0x54/0x70 [ 1496.294937] __schedule+0x62f/0x890 [ 1496.294941] ? intel_unmap_sg+0x90/0x90 [ 1496.294942] schedule+0x36/0x80 [ 1496.294943] schedule_preempt_disabled+0xe/0x10 [ 1496.294945] __mutex_lock.isra.2+0x445/0x4a0 [ 1496.294947] ? device_is_rmrr_locked+0x12/0x50 [ 1496.294950] ? kfree+0x162/0x170 [ 1496.294952] ? device_is_rmrr_locked+0x12/0x50 [ 1496.294953] ? iommu_should_identity_map+0x50/0xe0 [ 1496.294954] __mutex_lock_slowpath+0x13/0x20 [ 1496.294955] ? iommu_no_mapping+0x48/0xd0 [ 1496.294956] ? __mutex_lock_slowpath+0x13/0x20 [ 1496.294957] mutex_lock+0x2f/0x40 [ 1496.294960] rtnl_lock+0x15/0x20 [ 1496.294979] nfp_flower_cmsg_rx+0xc8/0x150 [nfp] [ 1496.294986] nfp_ctrl_poll+0x286/0x350 [nfp] [ 1496.294989] tasklet_action+0xf6/0x110 [ 1496.294992] __do_softirq+0xed/0x278 [ 1496.294993] irq_exit+0xb6/0xc0 [ 1496.294994] do_IRQ+0x4f/0xd0 [ 1496.294996] common_interrupt+0x89/0x89 Fixes: 948faa46c05b ("nfp: add support for control messages for flower app") Signed-off-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index dd7fa9cf225f..b0837b58c3a1 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -115,14 +115,10 @@ nfp_flower_cmsg_portmod_rx(struct nfp_app *app, struct sk_buff *skb) return; } - if (link) { + if (link) netif_carrier_on(netdev); - rtnl_lock(); - dev_set_mtu(netdev, be16_to_cpu(msg->mtu)); - rtnl_unlock(); - } else { + else netif_carrier_off(netdev); - } rcu_read_unlock(); } -- cgit From 54a6a043fb8580d5a741774669ef6049f402f228 Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 11 Aug 2017 15:57:22 +0300 Subject: mISDN: Fix null pointer dereference at mISDN_FsmNew If mISDN_FsmNew() fails to allocate memory for jumpmatrix then null pointer dereference will occur on any write to jumpmatrix. The patch adds check on successful allocation and corresponding error handling. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: David S. Miller --- drivers/isdn/mISDN/fsm.c | 5 ++++- drivers/isdn/mISDN/fsm.h | 2 +- drivers/isdn/mISDN/layer1.c | 3 +-- drivers/isdn/mISDN/layer2.c | 15 +++++++++++++-- drivers/isdn/mISDN/tei.c | 20 +++++++++++++++++--- 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/drivers/isdn/mISDN/fsm.c b/drivers/isdn/mISDN/fsm.c index 78fc5d5e9051..92e6570b1143 100644 --- a/drivers/isdn/mISDN/fsm.c +++ b/drivers/isdn/mISDN/fsm.c @@ -26,7 +26,7 @@ #define FSM_TIMER_DEBUG 0 -void +int mISDN_FsmNew(struct Fsm *fsm, struct FsmNode *fnlist, int fncount) { @@ -34,6 +34,8 @@ mISDN_FsmNew(struct Fsm *fsm, fsm->jumpmatrix = kzalloc(sizeof(FSMFNPTR) * fsm->state_count * fsm->event_count, GFP_KERNEL); + if (fsm->jumpmatrix == NULL) + return -ENOMEM; for (i = 0; i < fncount; i++) if ((fnlist[i].state >= fsm->state_count) || @@ -45,6 +47,7 @@ mISDN_FsmNew(struct Fsm *fsm, } else fsm->jumpmatrix[fsm->state_count * fnlist[i].event + fnlist[i].state] = (FSMFNPTR) fnlist[i].routine; + return 0; } EXPORT_SYMBOL(mISDN_FsmNew); diff --git a/drivers/isdn/mISDN/fsm.h b/drivers/isdn/mISDN/fsm.h index 928f5be192c1..e1def8490221 100644 --- a/drivers/isdn/mISDN/fsm.h +++ b/drivers/isdn/mISDN/fsm.h @@ -55,7 +55,7 @@ struct FsmTimer { void *arg; }; -extern void mISDN_FsmNew(struct Fsm *, struct FsmNode *, int); +extern int mISDN_FsmNew(struct Fsm *, struct FsmNode *, int); extern void mISDN_FsmFree(struct Fsm *); extern int mISDN_FsmEvent(struct FsmInst *, int , void *); extern void mISDN_FsmChangeState(struct FsmInst *, int); diff --git a/drivers/isdn/mISDN/layer1.c b/drivers/isdn/mISDN/layer1.c index bebc57b72138..3192b0eb3944 100644 --- a/drivers/isdn/mISDN/layer1.c +++ b/drivers/isdn/mISDN/layer1.c @@ -414,8 +414,7 @@ l1_init(u_int *deb) l1fsm_s.event_count = L1_EVENT_COUNT; l1fsm_s.strEvent = strL1Event; l1fsm_s.strState = strL1SState; - mISDN_FsmNew(&l1fsm_s, L1SFnList, ARRAY_SIZE(L1SFnList)); - return 0; + return mISDN_FsmNew(&l1fsm_s, L1SFnList, ARRAY_SIZE(L1SFnList)); } void diff --git a/drivers/isdn/mISDN/layer2.c b/drivers/isdn/mISDN/layer2.c index 7243a6746f8b..9ff0903a0e89 100644 --- a/drivers/isdn/mISDN/layer2.c +++ b/drivers/isdn/mISDN/layer2.c @@ -2247,15 +2247,26 @@ static struct Bprotocol X75SLP = { int Isdnl2_Init(u_int *deb) { + int res; debug = deb; mISDN_register_Bprotocol(&X75SLP); l2fsm.state_count = L2_STATE_COUNT; l2fsm.event_count = L2_EVENT_COUNT; l2fsm.strEvent = strL2Event; l2fsm.strState = strL2State; - mISDN_FsmNew(&l2fsm, L2FnList, ARRAY_SIZE(L2FnList)); - TEIInit(deb); + res = mISDN_FsmNew(&l2fsm, L2FnList, ARRAY_SIZE(L2FnList)); + if (res) + goto error; + res = TEIInit(deb); + if (res) + goto error_fsm; return 0; + +error_fsm: + mISDN_FsmFree(&l2fsm); +error: + mISDN_unregister_Bprotocol(&X75SLP); + return res; } void diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c index 908127efccf8..12d9e5f4beb1 100644 --- a/drivers/isdn/mISDN/tei.c +++ b/drivers/isdn/mISDN/tei.c @@ -1387,23 +1387,37 @@ create_teimanager(struct mISDNdevice *dev) int TEIInit(u_int *deb) { + int res; debug = deb; teifsmu.state_count = TEI_STATE_COUNT; teifsmu.event_count = TEI_EVENT_COUNT; teifsmu.strEvent = strTeiEvent; teifsmu.strState = strTeiState; - mISDN_FsmNew(&teifsmu, TeiFnListUser, ARRAY_SIZE(TeiFnListUser)); + res = mISDN_FsmNew(&teifsmu, TeiFnListUser, ARRAY_SIZE(TeiFnListUser)); + if (res) + goto error; teifsmn.state_count = TEI_STATE_COUNT; teifsmn.event_count = TEI_EVENT_COUNT; teifsmn.strEvent = strTeiEvent; teifsmn.strState = strTeiState; - mISDN_FsmNew(&teifsmn, TeiFnListNet, ARRAY_SIZE(TeiFnListNet)); + res = mISDN_FsmNew(&teifsmn, TeiFnListNet, ARRAY_SIZE(TeiFnListNet)); + if (res) + goto error_smn; deactfsm.state_count = DEACT_STATE_COUNT; deactfsm.event_count = DEACT_EVENT_COUNT; deactfsm.strEvent = strDeactEvent; deactfsm.strState = strDeactState; - mISDN_FsmNew(&deactfsm, DeactFnList, ARRAY_SIZE(DeactFnList)); + res = mISDN_FsmNew(&deactfsm, DeactFnList, ARRAY_SIZE(DeactFnList)); + if (res) + goto error_deact; return 0; + +error_deact: + mISDN_FsmFree(&teifsmn); +error_smn: + mISDN_FsmFree(&teifsmu); +error: + return res; } void TEIFree(void) -- cgit From e4dde4127396f0c8f1c2e11b3ecc5baf4f8628bf Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 11 Aug 2017 18:31:24 +0200 Subject: net: fix compilation when busy poll is not enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIN_NAPI_ID is used in various places outside of CONFIG_NET_RX_BUSY_POLL wrapping, so when it's not set we run into build errors such as: net/core/dev.c: In function 'dev_get_by_napi_id': net/core/dev.c:886:16: error: ‘MIN_NAPI_ID’ undeclared (first use in this function) if (napi_id < MIN_NAPI_ID) ^~~~~~~~~~~ Thus, have MIN_NAPI_ID always defined to fix these errors. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/busy_poll.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 8ffd434676b7..71c72a939bf8 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -29,18 +29,18 @@ #include #include -#ifdef CONFIG_NET_RX_BUSY_POLL - -struct napi_struct; -extern unsigned int sysctl_net_busy_read __read_mostly; -extern unsigned int sysctl_net_busy_poll __read_mostly; - /* 0 - Reserved to indicate value not set * 1..NR_CPUS - Reserved for sender_cpu * NR_CPUS+1..~0 - Region available for NAPI IDs */ #define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1)) +#ifdef CONFIG_NET_RX_BUSY_POLL + +struct napi_struct; +extern unsigned int sysctl_net_busy_read __read_mostly; +extern unsigned int sysctl_net_busy_poll __read_mostly; + static inline bool net_busy_loop_on(void) { return sysctl_net_busy_poll; -- cgit From 2ed46ce45ec02f6b2188419acdf372a144e06fb5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 11 Aug 2017 18:31:25 +0200 Subject: bpf: fix two missing target_size settings in bpf_convert_ctx_access When CONFIG_NET_SCHED or CONFIG_NET_RX_BUSY_POLL is /not/ set and we try a narrow __sk_buff load of tc_index or napi_id, respectively, then verifier rightfully complains that it's misconfigured, because we need to set target_size in each of the two cases. The rewrite for the ctx access is just a dummy op, but needs to pass, so fix this up. Fixes: f96da09473b5 ("bpf: simplify narrower ctx access") Reported-by: Shubham Bansal Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index f44fc22fd45a..6280a602604c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3505,6 +3505,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, bpf_target_off(struct sk_buff, tc_index, 2, target_size)); #else + *target_size = 2; if (type == BPF_WRITE) *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); else @@ -3520,6 +3521,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1); *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #else + *target_size = 4; *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #endif break; -- cgit From fd851ba9caa9a63fdbb72a2e6ed5560c0989e999 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Aug 2017 10:48:53 -0700 Subject: udp: harden copy_linear_skb() syzkaller got crashes with CONFIG_HARDENED_USERCOPY=y configs. Issue here is that recvfrom() can be used with user buffer of Z bytes, and SO_PEEK_OFF of X bytes, from a skb with Y bytes, and following condition : Z < X < Y kernel BUG at mm/usercopy.c:72! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 2917 Comm: syzkaller842281 Not tainted 4.13.0-rc3+ #16 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801d2fa40c0 task.stack: ffff8801d1fe8000 RIP: 0010:report_usercopy mm/usercopy.c:64 [inline] RIP: 0010:__check_object_size+0x3ad/0x500 mm/usercopy.c:264 RSP: 0018:ffff8801d1fef8a8 EFLAGS: 00010286 RAX: 0000000000000078 RBX: ffffffff847102c0 RCX: 0000000000000000 RDX: 0000000000000078 RSI: 1ffff1003a3fded5 RDI: ffffed003a3fdf09 RBP: ffff8801d1fef998 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801d1ea480e R13: fffffffffffffffa R14: ffffffff84710280 R15: dffffc0000000000 FS: 0000000001360880(0000) GS:ffff8801dc000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000202ecfe4 CR3: 00000001d1ff8000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: check_object_size include/linux/thread_info.h:108 [inline] check_copy_size include/linux/thread_info.h:139 [inline] copy_to_iter include/linux/uio.h:105 [inline] copy_linear_skb include/net/udp.h:371 [inline] udpv6_recvmsg+0x1040/0x1af0 net/ipv6/udp.c:395 inet_recvmsg+0x14c/0x5f0 net/ipv4/af_inet.c:793 sock_recvmsg_nosec net/socket.c:792 [inline] sock_recvmsg+0xc9/0x110 net/socket.c:799 SYSC_recvfrom+0x2d6/0x570 net/socket.c:1788 SyS_recvfrom+0x40/0x50 net/socket.c:1760 entry_SYSCALL_64_fastpath+0x1f/0xbe Fixes: b65ac44674dd ("udp: try to avoid 2 cache miss on dequeue") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Signed-off-by: David S. Miller --- include/net/udp.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/udp.h b/include/net/udp.h index cc8036987dcb..e9b1d1eacb59 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -368,6 +368,8 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, { int n, copy = len - off; + if (copy < 0) + return -EINVAL; n = copy_to_iter(skb->data + off, copy, to); if (n == copy) return 0; -- cgit From c44245b3d5435f533ca8346ece65918f84c057f9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 Aug 2017 09:00:06 -0700 Subject: xfs: fix inobt inode allocation search optimization When we try to allocate a free inode by searching the inobt, we try to find the inode nearest the parent inode by searching chunks both left and right of the chunk containing the parent. As an optimization, we cache the leftmost and rightmost records that we previously searched; if we do another allocation with the same parent inode, we'll pick up the search where it last left off. There's a bug in the case where we found a free inode to the left of the parent's chunk: we need to update the cached left and right records, but because we already reassigned the right record to point to the left, we end up assigning the left record to both the cached left and right records. This isn't a correctness problem strictly, but it can result in the next allocation rechecking chunks unnecessarily or allocating inodes further away from the parent than it needs to. Fix it by swapping the record pointer after we update the cached left and right records. Fixes: bd169565993b ("xfs: speed up free inode search") Signed-off-by: Omar Sandoval Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_ialloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index ffd5a15d1bb6..abf5beaae907 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1246,13 +1246,13 @@ xfs_dialloc_ag_inobt( /* free inodes to the left? */ if (useleft && trec.ir_freecount) { - rec = trec; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); cur = tcur; pag->pagl_leftrec = trec.ir_startino; pag->pagl_rightrec = rec.ir_startino; pag->pagl_pagino = pagino; + rec = trec; goto alloc_inode; } -- cgit From e28ae8e428fefe2facd72cea9f29906ecb9c861d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Aug 2017 12:45:35 -0700 Subject: iomap: fix integer truncation issues in the zeroing and dirtying helpers Fix the min_t calls in the zeroing and dirtying helpers to perform the comparisms on 64-bit types, which prevents them from incorrectly being truncated, and larger zeroing operations being stuck in a never ending loop. Special thanks to Markus Stockhausen for spotting the bug. Reported-by: Paul Menzel Tested-by: Paul Menzel Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index 039266128b7f..59cc98ad7577 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -278,7 +278,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, unsigned long bytes; /* Bytes to write to page */ offset = (pos & (PAGE_SIZE - 1)); - bytes = min_t(unsigned long, PAGE_SIZE - offset, length); + bytes = min_t(loff_t, PAGE_SIZE - offset, length); rpage = __iomap_read_page(inode, pos); if (IS_ERR(rpage)) @@ -373,7 +373,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, unsigned offset, bytes; offset = pos & (PAGE_SIZE - 1); /* Within page */ - bytes = min_t(unsigned, PAGE_SIZE - offset, count); + bytes = min_t(loff_t, PAGE_SIZE - offset, count); if (IS_DAX(inode)) status = iomap_dax_zero(pos, offset, bytes, iomap); -- cgit From afc1f55ca44e257f69da8f43e0714a76686ae8d1 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 11 Aug 2017 20:34:45 -0700 Subject: MD: not clear ->safemode for external metadata array ->safemode should be triggered by mdadm for external metadaa array, otherwise array's state confuses mdadm. Fixes: 33182d15c6bf(md: always clear ->safemode when md_check_recovery gets the mddev lock.) Cc: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index e4ba95f6cd59..b01e458d31e9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8656,7 +8656,7 @@ void md_check_recovery(struct mddev *mddev) if (mddev_trylock(mddev)) { int spares = 0; - if (mddev->safemode == 1) + if (!mddev->external && mddev->safemode == 1) mddev->safemode = 0; if (mddev->ro) { -- cgit From 11e9d7829dd08dbafb24517fe922f11c3a8a9dc2 Mon Sep 17 00:00:00 2001 From: Andreas Born Date: Sat, 12 Aug 2017 00:36:55 +0200 Subject: bonding: ratelimit failed speed/duplex update warning bond_miimon_commit() handles the UP transition for each slave of a bond in the case of MII. It is triggered 10 times per second for the default MII Polling interval of 100ms. For device drivers that do not implement __ethtool_get_link_ksettings() the call to bond_update_speed_duplex() fails persistently while the MII status could remain UP. That is, in this and other cases where the speed/duplex update keeps failing over a longer period of time while the MII state is UP, a warning is printed every MII polling interval. To address these excessive warnings net_ratelimit() should be used. Printing a warning once would not be sufficient since the call to bond_update_speed_duplex() could recover to succeed and fail again later. In that case there would be no new indication what went wrong. Fixes: b5bf0f5b16b9c (bonding: correctly update link status during mii-commit phase) Signed-off-by: Andreas Born Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 85bb272d2a34..fc63992ab0e0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2144,9 +2144,10 @@ static void bond_miimon_commit(struct bonding *bond) if (bond_update_speed_duplex(slave) && bond_needs_speed_duplex(bond)) { slave->link = BOND_LINK_DOWN; - netdev_warn(bond->dev, - "failed to get link speed/duplex for %s\n", - slave->dev->name); + if (net_ratelimit()) + netdev_warn(bond->dev, + "failed to get link speed/duplex for %s\n", + slave->dev->name); continue; } bond_set_slave_link_state(slave, BOND_LINK_UP, -- cgit From d86e63e1f0b7868c55c8d4a54854b85e2bac690b Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Fri, 11 Aug 2017 22:27:35 +0800 Subject: arm64: allwinner: h5: fix pinctrl IRQs The pin controller of H5 has three IRQs at the chip's GIC, which represents three banks of pinctrl IRQs. However, the device tree used to miss the third IRQ of the pin controller, which makes the PG bank IRQ not usable. Add the missing IRQ to the pinctrl node. Fixes: 4e36de179f27 ("arm64: allwinner: h5: add Allwinner H5 .dtsi") Signed-off-by: Icenowy Zheng Signed-off-by: Chen-Yu Tsai --- arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi index 732e2e06f503..d9a720bff05d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi @@ -120,5 +120,8 @@ }; &pio { + interrupts = , + , + ; compatible = "allwinner,sun50i-h5-pinctrl"; }; -- cgit From e9bf53ab1ee34bb05c104bbfd2b77c844773f8e6 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Fri, 11 Aug 2017 11:07:36 +0100 Subject: brcmfmac: feature check for multi-scheduled scan fails on bcm4343x devices The firmware feature check introduced for multi-scheduled scan turned out to be failing for bcm4343{0,1,8} devices resulting in a firmware crash. The reason for this crash has not yet been root cause so this patch avoids the feature check for those device as a short-term fix. Reported-by: Stefan Wahren Reported-by: Ian Molton Fixes: 9fe929aaace6 ("brcmfmac: add firmware feature detection for gscan feature") Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c index d21258d277ce..f1b60740e020 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c @@ -159,8 +159,10 @@ void brcmf_feat_attach(struct brcmf_pub *drvr) brcmf_feat_firmware_capabilities(ifp); memset(&gscan_cfg, 0, sizeof(gscan_cfg)); - brcmf_feat_iovar_data_set(ifp, BRCMF_FEAT_GSCAN, "pfn_gscan_cfg", - &gscan_cfg, sizeof(gscan_cfg)); + if (drvr->bus_if->chip != BRCM_CC_43430_CHIP_ID) + brcmf_feat_iovar_data_set(ifp, BRCMF_FEAT_GSCAN, + "pfn_gscan_cfg", + &gscan_cfg, sizeof(gscan_cfg)); brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_PNO, "pfn"); if (drvr->bus_if->wowl_supported) brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_WOWL, "wowl"); -- cgit From 8df4b0031067758d8b0a3bfde7d35e980d0376d5 Mon Sep 17 00:00:00 2001 From: "Shih-Yuan Lee (FourDollars)" Date: Mon, 14 Aug 2017 18:00:47 +0800 Subject: ALSA: hda/realtek - Fix pincfg for Dell XPS 13 9370 The initial pin configs for Dell headset mode of ALC3271 has changed. /sys/class/sound/hwC0D0/init_pin_configs: (BIOS 0.1.4) 0x12 0xb7a60130 0x13 0xb8a61140 0x14 0x40000000 0x16 0x411111f0 0x17 0x90170110 0x18 0x411111f0 0x19 0x411111f0 0x1a 0x411111f0 0x1b 0x411111f0 0x1d 0x4087992d 0x1e 0x411111f0 0x21 0x04211020 has changed to ... /sys/class/sound/hwC0D0/init_pin_configs: (BIOS 0.2.0) 0x12 0xb7a60130 0x13 0x40000000 0x14 0x411111f0 0x16 0x411111f0 0x17 0x90170110 0x18 0x411111f0 0x19 0x411111f0 0x1a 0x411111f0 0x1b 0x411111f0 0x1d 0x4067992d 0x1e 0x411111f0 0x21 0x04211020 Fixes: b4576de87243 ("ALSA: hda/realtek - Fix typo of pincfg for Dell quirk") Signed-off-by: Shih-Yuan Lee (FourDollars) Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a91a9ef00c40..217bb582aff1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6647,7 +6647,6 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0299, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, ALC225_STANDARD_PINS, {0x12, 0xb7a60130}, - {0x13, 0xb8a61140}, {0x17, 0x90170110}), {} }; -- cgit From b80b32b6d5e79798b85cd4644206aaa069059390 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 14 Aug 2017 08:29:18 -0400 Subject: ext4: fix clang build regression Arnd Bergmann As Stefan pointed out, I misremembered what clang can do specifically, and it turns out that the variable-length array at the end of the structure did not work (a flexible array would have worked here but not solved the problem): fs/ext4/mballoc.c:2303:17: error: fields must have a constant size: 'variable length array in structure' extension will never be supported ext4_grpblk_t counters[blocksize_bits + 2]; This reverts part of my previous patch, using a fixed-size array again, but keeping the check for the array overflow. Fixes: 2df2c3402fc8 ("ext4: fix warning about stack corruption") Reported-by: Stefan Agner Tested-by: Chandan Rajendra Signed-off-by: Arnd Bergmann Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 5a1052627a81..701085620cd8 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2300,7 +2300,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) EXT4_MAX_BLOCK_LOG_SIZE); struct sg { struct ext4_group_info info; - ext4_grpblk_t counters[blocksize_bits + 2]; + ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; } sg; group--; @@ -2309,6 +2309,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n"); + i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + + sizeof(struct ext4_group_info); + grinfo = ext4_get_group_info(sb, group); /* Load the group info in memory only if not already loaded. */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) { @@ -2320,7 +2323,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) buddy_loaded = 1; } - memcpy(&sg, ext4_get_group_info(sb, group), sizeof(sg)); + memcpy(&sg, ext4_get_group_info(sb, group), i); if (buddy_loaded) ext4_mb_unload_buddy(&e4b); -- cgit From 32aaf194201e98db4235b7b71ac62a22e2ac355f Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Mon, 14 Aug 2017 08:30:06 -0400 Subject: ext4: add missing xattr hash update When updating an extended attribute, if the padded value sizes are the same, a shortcut is taken to avoid the bulk of the work. This was fine until the xattr hash update was moved inside ext4_xattr_set_entry(). With that change, the hash update got missed in the shortcut case. Thanks to ZhangYi (yizhang089@gmail.com) for root causing the problem. Fixes: daf8328172df ("ext4: eliminate xattr entry e_hash recalculation for removes") Reported-by: Miklos Szeredi Signed-off-by: Tahsin Erdogan Signed-off-by: Theodore Ts'o --- fs/ext4/xattr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 82a5af9f6668..3dd970168448 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1543,7 +1543,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, /* Clear padding bytes. */ memset(val + i->value_len, 0, new_size - i->value_len); } - return 0; + goto update_hash; } /* Compute min_offs and last. */ @@ -1707,6 +1707,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, here->e_value_size = cpu_to_le32(i->value_len); } +update_hash: if (i->value) { __le32 hash = 0; @@ -1725,7 +1726,8 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, here->e_name_len, &crc32c_hash, 1); } else if (is_block) { - __le32 *value = s->base + min_offs - new_size; + __le32 *value = s->base + le16_to_cpu( + here->e_value_offs); hash = ext4_xattr_hash_entry(here->e_name, here->e_name_len, value, -- cgit From 90b05b0598c698252d5f7dcdce335c5a546e9047 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 2 Aug 2017 13:47:42 +0900 Subject: gpio: reject invalid gpio before getting gpio_desc Check user-given gpio number and reject it before calling gpio_to_desc() because gpio_to_desc() is for kernel driver and it expects given gpio number is valid (means 0 to 511). If given number is invalid, gpio_to_desc() calls WARN() and dump registers and stack for debug. This means user can easily kick WARN() just by writing invalid gpio number (e.g. 512) to /sys/class/gpio/export. Fixes: 0e9a5edf5d01 ("gpio: fix deferred probe detection for legacy API") Signed-off-by: Masami Hiramatsu Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-sysfs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c index 16fe9742597b..fc80add5fedb 100644 --- a/drivers/gpio/gpiolib-sysfs.c +++ b/drivers/gpio/gpiolib-sysfs.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -432,6 +433,11 @@ static struct attribute *gpiochip_attrs[] = { }; ATTRIBUTE_GROUPS(gpiochip); +static struct gpio_desc *gpio_to_valid_desc(int gpio) +{ + return gpio_is_valid(gpio) ? gpio_to_desc(gpio) : NULL; +} + /* * /sys/class/gpio/export ... write-only * integer N ... number of GPIO to export (full access) @@ -450,7 +456,7 @@ static ssize_t export_store(struct class *class, if (status < 0) goto done; - desc = gpio_to_desc(gpio); + desc = gpio_to_valid_desc(gpio); /* reject invalid GPIOs */ if (!desc) { pr_warn("%s: invalid GPIO %ld\n", __func__, gpio); @@ -493,7 +499,7 @@ static ssize_t unexport_store(struct class *class, if (status < 0) goto done; - desc = gpio_to_desc(gpio); + desc = gpio_to_valid_desc(gpio); /* reject bogus commands (gpio_unexport ignores them) */ if (!desc) { pr_warn("%s: invalid GPIO %ld\n", __func__, gpio); -- cgit From 3f13b6a24f192da3096389b82202f16eff2c11ee Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Wed, 12 Jul 2017 13:22:29 +0200 Subject: gpio: mvebu: Fix cause computation in irq handler When switching to regmap, the way to compute the irq cause was reorganized. However while doing it, a typo was introduced: a 'xor' replaced a 'and'. This lead to wrong behavior in the interrupt handler ans one of the symptom was wrong irq handler called on the Armada 388 GP: "->handle_irq(): c016303c, handle_bad_irq+0x0/0x278 ->irq_data.chip(): c0b0ec0c, 0xc0b0ec0c ->action(): (null) IRQ_NOPROBE set IRQ_NOREQUEST set unexpected IRQ trap at vector 00 irq 0, desc: ee804800, depth: 1, count: 0, unhandled: 0" Fixes: 2233bf7a92e7 ("gpio: mvebu: switch to regmap for register access") Signed-off-by: Gregory CLEMENT Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mvebu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index e338c3743562..45c65f805fd6 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -557,7 +557,7 @@ static void mvebu_gpio_irq_handler(struct irq_desc *desc) edge_cause = mvebu_gpio_read_edge_cause(mvchip); edge_mask = mvebu_gpio_read_edge_mask(mvchip); - cause = (data_in ^ level_mask) | (edge_cause & edge_mask); + cause = (data_in & level_mask) | (edge_cause & edge_mask); for (i = 0; i < mvchip->chip.ngpio; i++) { int irq; -- cgit From 491ab4700d1b64f5cf2f9055e01613a923df5fab Mon Sep 17 00:00:00 2001 From: Nikhil Mahale Date: Wed, 9 Aug 2017 09:23:01 +0530 Subject: drm: Fix framebuffer leak Do not leak framebuffer if client provided crtc id found invalid. Signed-off-by: Nikhil Mahale Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1502250781-5779-1-git-send-email-nmahale@nvidia.com --- drivers/gpu/drm/drm_plane.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 5dc8c4350602..e40c12fabbde 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -601,6 +601,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data, crtc = drm_crtc_find(dev, plane_req->crtc_id); if (!crtc) { + drm_framebuffer_put(fb); DRM_DEBUG_KMS("Unknown crtc ID %d\n", plane_req->crtc_id); return -ENOENT; -- cgit From 7f5d6dac548b983702dd7aac1d463bd88dff50a8 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 14 Aug 2017 12:07:21 +0200 Subject: drm/atomic: Handle -EDEADLK with out-fences correctly complete_crtc_signaling is freeing fence_state, but when retrying num_fences and fence_state are not zero'd. This caused duplicate fd's in the fence_state array, followed by a BUG_ON in fs/file.c because we reallocate freed memory, and installing over an existing fd, or potential other fun. Zero fence_state and num_fences correctly in the retry loop, which allows kms_atomic_transition to pass. Fixes: beaf5af48034 ("drm/fence: add out-fences support") Cc: Gustavo Padovan Cc: Brian Starkey (v10) Cc: Sean Paul Cc: Daniel Vetter Cc: Jani Nikula Cc: David Airlie Signed-off-by: Maarten Lankhorst Cc: # v4.10+ Testcase: kms_atomic_transitions.plane-all-modeset-transition-fencing (with CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y) Link: https://patchwork.freedesktop.org/patch/msgid/20170814100721.13340-1-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter #intel-gfx on irc --- drivers/gpu/drm/drm_atomic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index c0f336d23f9c..b43939f24812 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -2167,10 +2167,10 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, struct drm_atomic_state *state; struct drm_modeset_acquire_ctx ctx; struct drm_plane *plane; - struct drm_out_fence_state *fence_state = NULL; + struct drm_out_fence_state *fence_state; unsigned plane_mask; int ret = 0; - unsigned int i, j, num_fences = 0; + unsigned int i, j, num_fences; /* disallow for drivers not supporting atomic: */ if (!drm_core_check_feature(dev, DRIVER_ATOMIC)) @@ -2211,6 +2211,8 @@ retry: plane_mask = 0; copied_objs = 0; copied_props = 0; + fence_state = NULL; + num_fences = 0; for (i = 0; i < arg->count_objs; i++) { uint32_t obj_id, count_props; -- cgit From 26a72e8a8d0707f1d49133a19c027a3af9fcfdcb Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 4 Aug 2017 15:03:48 +0100 Subject: drm/i915: remove unused function declaration This function is not part of the driver anymore. Signed-off-by: Lionel Landwerlin Fixes: 90f4fcd56bda ("drm/i915: Remove forced stop ring on suspend/unload") Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170804140348.24971-1-lionel.g.landwerlin@intel.com (cherry picked from commit fe29133df37ac31de9e657ad91bcf74cdfe8c4cd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lrc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 52b3a1fd4059..57ef5833c427 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -63,7 +63,6 @@ enum { }; /* Logical Rings */ -void intel_logical_ring_stop(struct intel_engine_cs *engine); void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int logical_render_ring_init(struct intel_engine_cs *engine); int logical_xcs_ring_init(struct intel_engine_cs *engine); -- cgit From a0125a932e917cb507b682cb66645efdca1f8cab Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Aug 2017 14:19:04 +0100 Subject: drm/i915: Perform an invalidate prior to executing golden renderstate As we may have just bound the renderstate into the GGTT for execution, we need to ensure that the GTT TLB are also flushed. On snb-gt2, this would cause a random GPU hang at the start of a new context (e.g. boot) and on snb-gt1, it was causing the renderstate batch to take ~10s. It was the GPU hang that revealed the truth, as the CS gleefully executed beyond the end of the golden renderstate batch, a good indicator for a GTT TLB miss. Fixes: 20fe17aa52dc ("drm/i915: Remove redundant TLB invalidate on switching contexts") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20170808131904.1385-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala Cc: # v4.12-rc1+ (cherry picked from commit 802673d66f8a6ded5d2689d597853c7bb3a70163) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_render_state.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 7032c542a9b1..4dd4c2159a92 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -242,6 +242,10 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *req) goto err_unpin; } + ret = req->engine->emit_flush(req, EMIT_INVALIDATE); + if (ret) + goto err_unpin; + ret = req->engine->emit_bb_start(req, so->batch_offset, so->batch_size, I915_DISPATCH_SECURE); -- cgit From 1dd7a3e7af70ebdd0cdd937b180726d15a4f0948 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 9 Aug 2017 13:07:02 -0700 Subject: drm/i915/cnl: Add slice and subslice information to debugfs. A missing part to EU slice power gating is the debugfs interface. This patch actually should have been squashed to the initial EU slice power gating one. v2: Initial patch was merged without this part. Fixes: c7ae7e9ab207 ("drm/i915/cnl: Configure EU slice power gating.") Cc: Joonas Lahtinen Signed-off-by: Rodrigo Vivi Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20170809200702.11236-1-rodrigo.vivi@intel.com (cherry picked from commit 7ea1adf30f82a4c0910524ac06f8f1f26281bb23) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 00d8967c8512..d1bd53b73738 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4580,7 +4580,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); - if (IS_GEN9_BC(dev_priv)) + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; -- cgit From 7eceb9d04966435ed2d03f5554413715ab3cb34a Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 17 Jul 2017 12:58:54 -0700 Subject: drm/i915: Return correct EDP voltage swing table for 0.85V For 0.85V cnl_get_buf_trans_edp() returns the DP table, instead of EDP. Use the correct table. The error was pointed out by this clang warning: drivers/gpu/drm/i915/intel_ddi.c:392:39: warning: variable 'cnl_ddi_translations_edp_0_85V' is not needed and will not be emitted [-Wunneeded-internal-declaration] static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_0_85V[] = { Fixes: cf54ca8bc567 ("drm/i915/cnl: Implement voltage swing sequence.") Signed-off-by: Matthias Kaehlcke Reviewed-by: Manasi Navare Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20170717195854.192139-1-mka@chromium.org (cherry picked from commit 50946c89850db13bd672c664aec6cf4551f71fe9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_ddi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 9edeaaef77ad..d3b3252a8742 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1762,7 +1762,7 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, if (dev_priv->vbt.edp.low_vswing) { if (voltage == VOLTAGE_INFO_0_85V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_85V); - return cnl_ddi_translations_dp_0_85V; + return cnl_ddi_translations_edp_0_85V; } else if (voltage == VOLTAGE_INFO_0_95V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_95V); return cnl_ddi_translations_edp_0_95V; -- cgit From 430ffaf46c05bda56535893f38e684f5418c4c93 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 12 Aug 2017 16:27:24 +0100 Subject: drm/i915: Suppress switch_mm emission between the same aliasing_ppgtt When switching between contexts using the aliasing_ppgtt, the VM is shared. We don't need to reload the PD registers unless they are dirty. Martin Peres reported an issue that looks like corruption between Haswell context switches, bisecting to commit f9326be5f1d3 ("drm/i915: Rearrange switch_context to load the aliasing ppgtt on first use"). Switching between the same mm (the aliasing_ppgtt is used for all contexts in this case) should be a nop, but appears to trigger some side-effects in the context switch. However, as we know the switch is redundant in this case, we can skip it and continue to ignore the issue until somebody feels strong enough to investigate full-ppgtt on gen7 again! Except.. Martin was using full-ppgtt which is not supported as it doesn't work correctly yet. So whilst the bisect did yield valuable information about the failures, the fix should not have any user impact under default settings, with the exception of a slightly lower throughput on xcs as the VM would always be reloaded. v2: Also remember to set the legacy_active_context following the switch on xcs (commit e8a9c58fcd9a ("drm/i915: Unify active context tracking between legacy/execlists/guc")) Fixes: f9326be5f1d3 ("drm/i915: Rearrange switch_context to load the aliasing ppgtt on first use") Fixes: e8a9c58fcd9a ("drm/i915: Unify active context tracking between legacy/execlists/guc") Reported-by: Martin Peres Signed-off-by: Chris Wilson Cc: Martin Peres Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20170812152724.6883-1-chris@chris-wilson.co.uk (cherry picked from commit 12124bea5b82dc1e917304aed703c27292270051) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_context.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 39ed58a21fc1..e1e971ee2ed5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -688,19 +688,19 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, } static bool -needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *engine, - struct i915_gem_context *to) +needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *engine) { + struct i915_gem_context *from = engine->legacy_active_context; + if (!ppgtt) return false; /* Always load the ppgtt on first use */ - if (!engine->legacy_active_context) + if (!from) return true; /* Same context without new entries, skip */ - if (engine->legacy_active_context == to && + if ((!from->ppgtt || from->ppgtt == ppgtt) && !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) return false; @@ -744,7 +744,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (skip_rcs_switch(ppgtt, engine, to)) return 0; - if (needs_pd_load_pre(ppgtt, engine, to)) { + if (needs_pd_load_pre(ppgtt, engine)) { /* Older GENs and non render rings still want the load first, * "PP_DCLV followed by PP_DIR_BASE register through Load * Register Immediate commands in Ring Buffer before submitting @@ -841,7 +841,7 @@ int i915_switch_context(struct drm_i915_gem_request *req) struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; - if (needs_pd_load_pre(ppgtt, engine, to)) { + if (needs_pd_load_pre(ppgtt, engine)) { int ret; trace_switch_mm(engine, to); @@ -852,6 +852,7 @@ int i915_switch_context(struct drm_i915_gem_request *req) ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); } + engine->legacy_active_context = to; return 0; } -- cgit From 781cc76e0c2469cb7ac12ba238a4ea006978e321 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 8 Aug 2017 10:08:26 +0200 Subject: drm/i915: Avoid the gpu reset vs. modeset deadlock ... using the biggest hammer we have. This is essentially a weaponized version of the timeout-based wedging Chris added in commit 36703e79a982c8ce5a8e43833291f2719e92d0d1 Author: Chris Wilson Date: Thu Jun 22 11:56:25 2017 +0100 drm/i915: Break modeset deadlocks on reset Because defense-in-depth is good it's good to still have both. Also note that with the locking change we can now restrict this a lot (old gpus and special testing only), so this doesn't kill the TDR benefits on at least anything remotely modern. And futuremore with a few tricks it should be possible to make a much more educated guess about whether an atomic commit is stuck waiting on the gpu (atomic_t counting the pending i915_sw_fence used by the atomic modeset code should do it), so we can improve this. But for now just start with something that is guaranteed to recover faster, for much better CI througput. This defacto reverts TDR on these platforms, but there's not really a single commit to specify as the sole offender. v2: Add a debug message to explain what's going on. We can't DRM_ERROR because that spams CI. And the timeout based fallback still prints a DRM_ERROR, in case something goes wrong. v3: Fix comment layout (Michel) Fixes: 4680816be336 ("drm/i915: Wait first for submission, before waiting for request completion") Fixes: 221fe7994554 ("drm/i915: Perform a direct reset of the GPU from the waiter") Cc: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin (v2) Cc: Michel Thierry Reviewed-by: Tvrtko Ursulin (v2) Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170808080828.23650-1-daniel.vetter@ffwll.ch (cherry picked from commit 97154ec242c14f646a3ab3b4da8f838d197f300d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9471c88d449e..cc484b56eeaa 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3485,6 +3485,13 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) !gpu_reset_clobbers_display(dev_priv)) return; + /* We have a modeset vs reset deadlock, defensively unbreak it. + * + * FIXME: We can do a _lot_ better, this is just a first iteration. + */ + i915_gem_set_wedged(dev_priv); + DRM_DEBUG_DRIVER("Wedging GPU to avoid deadlocks with pending modeset updates\n"); + /* * Need mode_config.mutex so that we don't * trample ongoing ->detect() and whatnot. -- cgit From 2c87d63ac853550e734edfd45e1be5e5aa44fbcc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 14 Aug 2017 00:52:58 +0200 Subject: ipv4: route: fix inet_rtm_getroute induced crash "ip route get $daddr iif eth0 from $saddr" causes: BUG: KASAN: use-after-free in ip_route_input_rcu+0x1535/0x1b50 Call Trace: ip_route_input_rcu+0x1535/0x1b50 ip_route_input_noref+0xf9/0x190 tcp_v4_early_demux+0x1a4/0x2b0 ip_rcv+0xbcb/0xc05 __netif_receive_skb+0x9c/0xd0 netif_receive_skb_internal+0x5a8/0x890 Problem is that inet_rtm_getroute calls either ip_route_input_rcu (if an iif was provided) or ip_route_output_key_hash_rcu. But ip_route_input_rcu, unlike ip_route_output_key_hash_rcu, already associates the dst_entry with the skb. This clears the SKB_DST_NOREF bit (i.e. skb_dst_drop will release/free the entry while it should not). Thus only set the dst if we called ip_route_output_key_hash_rcu(). I tested this patch by running: while true;do ip r get 10.0.1.2;done > /dev/null & while true;do ip r get 10.0.1.2 iif eth0 from 10.0.1.1;done > /dev/null & ... and saw no crash or memory leak. Cc: Roopa Prabhu Cc: David Ahern Fixes: ba52d61e0ff ("ipv4: route: restore skb_dst_set in inet_rtm_getroute") Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0383e66f59bc..7effa62beed3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2750,12 +2750,13 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, err = 0; if (IS_ERR(rt)) err = PTR_ERR(rt); + else + skb_dst_set(skb, &rt->dst); } if (err) goto errout_free; - skb_dst_set(skb, &rt->dst); if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; -- cgit From fed5f5718c4989a03b1b4cdc0c7f273c3c74ee9e Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Mon, 14 Aug 2017 17:55:56 +0200 Subject: tipc: accept PACKET_MULTICAST packets On L2 bearers, the TIPC broadcast function is sending out packets using the corresponding L2 broadcast address. At reception, we filter such packets under the assumption that they will also be delivered as broadcast packets. This assumption doesn't always hold true. Under high load, we have seen that a switch may convert the destination address and deliver the packet as a PACKET_MULTICAST, something leading to inadvertently dropped packets and a stale and reset broadcast link. We fix this by extending the reception filtering to accept packets of type PACKET_MULTICAST. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d174ee3254ee..767e0537dde5 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -596,7 +596,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, rcu_read_lock(); b = rcu_dereference_rtnl(dev->tipc_ptr); if (likely(b && test_bit(0, &b->up) && - (skb->pkt_type <= PACKET_BROADCAST))) { + (skb->pkt_type <= PACKET_MULTICAST))) { skb->next = NULL; tipc_rcv(dev_net(dev), skb, b); rcu_read_unlock(); -- cgit From 59a361bc6f6e91d57f25ff0aebb0e646beb3b41d Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Mon, 14 Aug 2017 18:28:49 +0200 Subject: tipc: avoid inheriting msg_non_seq flag when message is returned In the function msg_reverse(), we reverse the header while trying to reuse the original buffer whenever possible. Those rejected/returned messages are always transmitted as unicast, but the msg_non_seq field is not explicitly set to zero as it should be. We have seen cases where multicast senders set the message type to "NOT dest_droppable", meaning that a multicast message shorter than one MTU will be returned, e.g., during receive buffer overflow, by reusing the original buffer. This has the effect that even the 'msg_non_seq' field is inadvertently inherited by the rejected message, although it is now sent as a unicast message. This again leads the receiving unicast link endpoint to steer the packet toward the broadcast link receive function, where it is dropped. The affected unicast link is thereafter (after 100 failed retransmissions) declared 'stale' and reset. We fix this by unconditionally setting the 'msg_non_seq' flag to zero for all rejected/returned messages. Reported-by: Canh Duc Luu Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index ab3087687a32..dcd90e6fa7c3 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -513,6 +513,7 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) /* Now reverse the concerned fields */ msg_set_errcode(hdr, err); + msg_set_non_seq(hdr, 0); msg_set_origport(hdr, msg_destport(&ohdr)); msg_set_destport(hdr, msg_origport(&ohdr)); msg_set_destnode(hdr, msg_prevnode(&ohdr)); -- cgit From a23318feeff662c8d25d21623daebdd2e55ec221 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 9 Aug 2017 15:28:22 +0200 Subject: i2c: designware: Fix system suspend The commit 8503ff166504 ("i2c: designware: Avoid unnecessary resuming during system suspend"), may suggest to the PM core to try out the so called direct_complete path for system sleep. In this path, the PM core treats a runtime suspended device as it's already in a proper low power state for system sleep, which makes it skip calling the system sleep callbacks for the device, except for the ->prepare() and the ->complete() callbacks. However, the PM core may unset the direct_complete flag for a parent device, in case its child device are being system suspended before. In this scenario, the PM core invokes the system sleep callbacks, no matter if the device is runtime suspended or not. Particularly in cases of an existing i2c slave device, the above path is triggered, which breaks the assumption that the i2c device is always runtime resumed whenever the dw_i2c_plat_suspend() is being called. More precisely, dw_i2c_plat_suspend() calls clk_core_disable() and clk_core_unprepare(), for an already disabled/unprepared clock, leading to a splat in the log about clocks calls being wrongly balanced and breaking system sleep. To still allow the direct_complete path in cases when it's possible, but also to keep the fix simple, let's runtime resume the i2c device in the ->suspend() callback, before continuing to put the device into low power state. Note, in cases when the i2c device is attached to the ACPI PM domain, this problem doesn't occur, because ACPI's ->suspend() callback, assigned to acpi_subsys_suspend(), already calls pm_runtime_resume() for the device. It should also be noted that this change does not fix commit 8503ff166504 ("i2c: designware: Avoid unnecessary resuming during system suspend"). Because for the non-ACPI case, the system sleep support was already broken prior that point. Cc: # v4.4+ Signed-off-by: Ulf Hansson Acked-by: Rafael J. Wysocki Tested-by: John Stultz Tested-by: Jarkko Nikula Acked-by: Jarkko Nikula Reviewed-by: Mika Westerberg Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-platdrv.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 143a8fd582b4..441afc715a90 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -430,7 +430,7 @@ static void dw_i2c_plat_complete(struct device *dev) #endif #ifdef CONFIG_PM -static int dw_i2c_plat_suspend(struct device *dev) +static int dw_i2c_plat_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev); @@ -452,11 +452,21 @@ static int dw_i2c_plat_resume(struct device *dev) return 0; } +#ifdef CONFIG_PM_SLEEP +static int dw_i2c_plat_suspend(struct device *dev) +{ + pm_runtime_resume(dev); + return dw_i2c_plat_runtime_suspend(dev); +} +#endif + static const struct dev_pm_ops dw_i2c_dev_pm_ops = { .prepare = dw_i2c_plat_prepare, .complete = dw_i2c_plat_complete, SET_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume) - SET_RUNTIME_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume, NULL) + SET_RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend, + dw_i2c_plat_resume, + NULL) }; #define DW_I2C_DEV_PMOPS (&dw_i2c_dev_pm_ops) -- cgit From 984277a041d5ee4a65aaadf0307d67a7c401e11c Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 11 Aug 2017 14:44:55 +0300 Subject: i2c: designware: Fix oops from i2c_dw_irq_handler_slave When i2c-designware is initialized in slave mode the i2c-designware-slave.c: i2c_dw_irq_handler_slave() can hit a NULL pointer dereference when I2C slave backend is not registered but code is accessing the struct dw_i2c_dev.slave without testing is it NULL. We might get spurious interrupts from other devices or from IRQ core during unloading the driver when CONFIG_DEBUG_SHIRQ is set. Existing check for enable and IRQ status is not enough since device can be power gated and those bits may read 1. Fix this by handling the interrupt only when also struct dw_i2c_dev.slave is set. Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-slave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 0548c7ea578c..4b62a3872763 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -272,7 +272,7 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) slave_activity = ((dw_readl(dev, DW_IC_STATUS) & DW_IC_STATUS_SLAVE_ACTIVITY) >> 6); - if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY)) + if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY) || !dev->slave) return 0; dev_dbg(dev->dev, -- cgit From 4e2d93de070ceaca5097f7ee5c311731b83208a0 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Wed, 9 Aug 2017 15:24:44 +0300 Subject: i2c: designware: Fix standard mode speed when configuring the slave mode Code sets bit DW_IC_CON_SPEED_FAST (0x4) always when configuring the slave mode. This results incorrect register value DW_IC_CON_SPEED_HIGH (0x6) when OR'ed together with DW_IC_CON_SPEED_STD (0x2). Remove this and let the code set the speed mode bits according to clock frequency or default to fast mode. Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-platdrv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 441afc715a90..57248bccadbc 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -198,8 +198,7 @@ static void i2c_dw_configure_slave(struct dw_i2c_dev *dev) dev->functionality = I2C_FUNC_SLAVE | DW_IC_DEFAULT_FUNCTIONALITY; dev->slave_cfg = DW_IC_CON_RX_FIFO_FULL_HLD_CTRL | - DW_IC_CON_RESTART_EN | DW_IC_CON_STOP_DET_IFADDRESSED | - DW_IC_CON_SPEED_FAST; + DW_IC_CON_RESTART_EN | DW_IC_CON_STOP_DET_IFADDRESSED; dev->mode = DW_IC_SLAVE; -- cgit From f4b17a14faeec4160f97ad75ea7534f571f12404 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 9 Aug 2017 11:21:28 +0200 Subject: i2c: core: Make comment about I2C table requirement to reflect the code I2C drivers were required to have an I2C device ID table even if were for devices that would only be registered using a specific firmware interface (e.g: OF or ACPI). But commit da10c06a044b ("i2c: Make I2C ID tables non-mandatory for DT'ed devices") changed the I2C core to relax the requirement and allow drivers to avoid defining this table. Unfortunately it only took into account drivers for OF-only devices and forgot about ACPI-only ones, and this was fixed by commit c64ffff7a9d1 ("i2c: core: Allow empty id_table in ACPI case as well"). But the latter didn't update the original comment, so it doesn't reflect what the code does now. Signed-off-by: Javier Martinez Canillas Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 12822a4b8f8f..56e46581b84b 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -353,8 +353,8 @@ static int i2c_device_probe(struct device *dev) } /* - * An I2C ID table is not mandatory, if and only if, a suitable Device - * Tree match table entry is supplied for the probing device. + * An I2C ID table is not mandatory, if and only if, a suitable OF + * or ACPI ID table is supplied for the probing device. */ if (!driver->id_table && !i2c_acpi_match_device(dev->driver->acpi_match_table, client) && -- cgit From 42543aeb48e3701b49e0a83654248afc38feb88f Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Thu, 10 Aug 2017 18:15:45 +0300 Subject: i2c: simtec: use release_mem_region instead of release_resource Use api pair of request_mem_region and release_mem_region instead of release_resource. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-simtec.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-simtec.c b/drivers/i2c/busses/i2c-simtec.c index b4685bb9b5d7..adca51a99487 100644 --- a/drivers/i2c/busses/i2c-simtec.c +++ b/drivers/i2c/busses/i2c-simtec.c @@ -127,8 +127,7 @@ static int simtec_i2c_probe(struct platform_device *dev) iounmap(pd->reg); err_res: - release_resource(pd->ioarea); - kfree(pd->ioarea); + release_mem_region(pd->ioarea->start, size); err: kfree(pd); @@ -142,8 +141,7 @@ static int simtec_i2c_remove(struct platform_device *dev) i2c_del_adapter(&pd->adap); iounmap(pd->reg); - release_resource(pd->ioarea); - kfree(pd->ioarea); + release_mem_region(pd->ioarea->start, resource_size(pd->ioarea)); kfree(pd); return 0; -- cgit From f1c0b7e448b9e66dd9a7343bab58a3d3a477e104 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Fri, 28 Jul 2017 18:00:12 -0700 Subject: i2c: aspeed: fixed potential null pointer dereference Before I skipped null checks when the master is in the STOP state; this fixes that. Signed-off-by: Brendan Higgins Acked-by: Joel Stanley Signed-off-by: Wolfram Sang Fixes: f327c686d3ba ("i2c: aspeed: added driver for Aspeed I2C") --- drivers/i2c/busses/i2c-aspeed.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index f19348328a71..6fdf9231c23c 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -410,10 +410,11 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus) } /* We are in an invalid state; reset bus to a known state. */ - if (!bus->msgs && bus->master_state != ASPEED_I2C_MASTER_STOP) { + if (!bus->msgs) { dev_err(bus->dev, "bus in unknown state"); bus->cmd_err = -EIO; - aspeed_i2c_do_stop(bus); + if (bus->master_state != ASPEED_I2C_MASTER_STOP) + aspeed_i2c_do_stop(bus); goto out_no_complete; } msg = &bus->msgs[bus->msgs_index]; -- cgit From 1874064eed0502bd9bef7be8023757b0c4f26883 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 14 Aug 2017 20:11:26 -0700 Subject: Input: elan_i2c - add ELAN0608 to the ACPI table Similar to commit 722c5ac708b4f ("Input: elan_i2c - add ELAN0605 to the ACPI table"), ELAN0608 should be handled by elan_i2c. This touchpad can be found in Lenovo ideapad 320-14IKB. BugLink: https://bugs.launchpad.net/bugs/1708852 Signed-off-by: Kai-Heng Feng Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 3b616cb7c67f..9fe3908d12d5 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1248,6 +1248,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0100", 0 }, { "ELAN0600", 0 }, { "ELAN0605", 0 }, + { "ELAN0608", 0 }, { "ELAN1000", 0 }, { } }; -- cgit From 76988690402dde2880bfe06ecccf381d48ba8e1c Mon Sep 17 00:00:00 2001 From: KT Liao Date: Mon, 14 Aug 2017 20:11:59 -0700 Subject: Input: elan_i2c - Add antoher Lenovo ACPI ID for upcoming Lenovo NB Add 2 new IDs (ELAN0609 and ELAN060B) to the list of ACPI IDs that should be handled by the driver. Signed-off-by: KT Liao Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 9fe3908d12d5..714cf7f9b138 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1249,6 +1249,9 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0600", 0 }, { "ELAN0605", 0 }, { "ELAN0608", 0 }, + { "ELAN0605", 0 }, + { "ELAN0609", 0 }, + { "ELAN060B", 0 }, { "ELAN1000", 0 }, { } }; -- cgit From a99b646afa8a02571ea298bedca6592d818229cd Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Tue, 15 Aug 2017 11:23:23 +0800 Subject: PCI: Disable PCIe Relaxed Ordering if unsupported When bit4 is set in the PCIe Device Control register, it indicates whether the device is permitted to use relaxed ordering. On some platforms using relaxed ordering can have performance issues or due to erratum can cause data-corruption. In such cases devices must avoid using relaxed ordering. The patch adds a new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING to indicate that Relaxed Ordering (RO) attribute should not be used for Transaction Layer Packets (TLP) targeted towards these affected root complexes. This patch checks if there is any node in the hierarchy that indicates that using relaxed ordering is not safe. In such cases the patch turns off the relaxed ordering by clearing the capability for this device. Signed-off-by: Casey Leedom Signed-off-by: Ding Tianhong Acked-by: Ashok Raj Acked-by: Alexander Duyck Acked-by: Casey Leedom Signed-off-by: David S. Miller --- drivers/pci/probe.c | 43 +++++++++++++++++++++++++++++++++++++++++++ drivers/pci/quirks.c | 11 +++++++++++ include/linux/pci.h | 3 +++ 3 files changed, 57 insertions(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c31310db0404..e6a917b4acd3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1762,6 +1762,48 @@ static void pci_configure_extended_tags(struct pci_dev *dev) PCI_EXP_DEVCTL_EXT_TAG); } +/** + * pcie_relaxed_ordering_enabled - Probe for PCIe relaxed ordering enable + * @dev: PCI device to query + * + * Returns true if the device has enabled relaxed ordering attribute. + */ +bool pcie_relaxed_ordering_enabled(struct pci_dev *dev) +{ + u16 v; + + pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &v); + + return !!(v & PCI_EXP_DEVCTL_RELAX_EN); +} +EXPORT_SYMBOL(pcie_relaxed_ordering_enabled); + +static void pci_configure_relaxed_ordering(struct pci_dev *dev) +{ + struct pci_dev *root; + + /* PCI_EXP_DEVICE_RELAX_EN is RsvdP in VFs */ + if (dev->is_virtfn) + return; + + if (!pcie_relaxed_ordering_enabled(dev)) + return; + + /* + * For now, we only deal with Relaxed Ordering issues with Root + * Ports. Peer-to-Peer DMA is another can of worms. + */ + root = pci_find_pcie_root_port(dev); + if (!root) + return; + + if (root->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING) { + pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_RELAX_EN); + dev_info(&dev->dev, "Disable Relaxed Ordering because the Root Port didn't support it\n"); + } +} + static void pci_configure_device(struct pci_dev *dev) { struct hotplug_params hpp; @@ -1769,6 +1811,7 @@ static void pci_configure_device(struct pci_dev *dev) pci_configure_mps(dev); pci_configure_extended_tags(dev); + pci_configure_relaxed_ordering(dev); memset(&hpp, 0, sizeof(hpp)); ret = pci_get_hp_params(dev, &hpp); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6967c6b4cf6b..61b59bfa7bfc 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4015,6 +4015,17 @@ DECLARE_PCI_FIXUP_CLASS_EARLY(0x1797, 0x6868, PCI_CLASS_NOT_DEFINED, 8, DECLARE_PCI_FIXUP_CLASS_EARLY(0x1797, 0x6869, PCI_CLASS_NOT_DEFINED, 8, quirk_tw686x_class); +/* + * Some devices have problems with Transaction Layer Packets with the Relaxed + * Ordering Attribute set. Such devices should mark themselves and other + * Device Drivers should check before sending TLPs with RO set. + */ +static void quirk_relaxedordering_disable(struct pci_dev *dev) +{ + dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING; + dev_info(&dev->dev, "Disable Relaxed Ordering Attributes to avoid PCIe Completion erratum\n"); +} + /* * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same * values for the Attribute as were supplied in the header of the diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..29606fb89464 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -188,6 +188,8 @@ enum pci_dev_flags { * the direct_complete optimization. */ PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11), + /* Don't use Relaxed Ordering for TLPs directed at this device */ + PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 12), }; enum pci_irq_reroute_variant { @@ -1125,6 +1127,7 @@ bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); void pci_d3cold_enable(struct pci_dev *dev); void pci_d3cold_disable(struct pci_dev *dev); +bool pcie_relaxed_ordering_enabled(struct pci_dev *dev); /* PCI Virtual Channel */ int pci_save_vc_state(struct pci_dev *dev); -- cgit From 87e09cdec4dae08acdb4aa49beb793c19d73e73e Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Tue, 15 Aug 2017 11:23:24 +0800 Subject: PCI: Disable Relaxed Ordering for some Intel processors According to the Intel spec section 3.9.1 said: 3.9.1 Optimizing PCIe Performance for Accesses Toward Coherent Memory and Toward MMIO Regions (P2P) In order to maximize performance for PCIe devices in the processors listed in Table 3-6 below, the soft- ware should determine whether the accesses are toward coherent memory (system memory) or toward MMIO regions (P2P access to other devices). If the access is toward MMIO region, then software can command HW to set the RO bit in the TLP header, as this would allow hardware to achieve maximum throughput for these types of accesses. For accesses toward coherent memory, software can command HW to clear the RO bit in the TLP header (no RO), as this would allow hardware to achieve maximum throughput for these types of accesses. Table 3-6. Intel Processor CPU RP Device IDs for Processors Optimizing PCIe Performance Processor CPU RP Device IDs Intel Xeon processors based on 6F01H-6F0EH Broadwell microarchitecture Intel Xeon processors based on 2F01H-2F0EH Haswell microarchitecture It means some Intel processors has performance issue when use the Relaxed Ordering Attribute, so disable Relaxed Ordering for these root port. Signed-off-by: Casey Leedom Signed-off-by: Ding Tianhong Acked-by: Alexander Duyck Acked-by: Ashok Raj Signed-off-by: David S. Miller --- drivers/pci/quirks.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 61b59bfa7bfc..1272f7e65699 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4026,6 +4026,68 @@ static void quirk_relaxedordering_disable(struct pci_dev *dev) dev_info(&dev->dev, "Disable Relaxed Ordering Attributes to avoid PCIe Completion erratum\n"); } +/* + * Intel Xeon processors based on Broadwell/Haswell microarchitecture Root + * Complex has a Flow Control Credit issue which can cause performance + * problems with Upstream Transaction Layer Packets with Relaxed Ordering set. + */ +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f01, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f03, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f05, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f06, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f07, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f09, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0a, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0b, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0c, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0d, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0e, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f01, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f02, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f03, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f04, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f05, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f06, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f07, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f08, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f09, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0a, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0b, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0c, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0d, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0e, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); + /* * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same * values for the Attribute as were supplied in the header of the -- cgit From 077fa19c5dfa06a6ae04fb1661680940ff837612 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Tue, 15 Aug 2017 11:23:25 +0800 Subject: PCI: Disable Relaxed Ordering Attributes for AMD A1100 Casey reported that the AMD ARM A1100 SoC has a bug in its PCIe Root Port where Upstream Transaction Layer Packets with the Relaxed Ordering Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering set, it would cause Data Corruption, so we need to disable Relaxed Ordering Attribute when Upstream TLPs to the Root Port. Reported-and-suggested-by: Casey Leedom Signed-off-by: Casey Leedom Signed-off-by: Ding Tianhong Acked-by: Casey Leedom Signed-off-by: David S. Miller --- drivers/pci/quirks.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 1272f7e65699..140760403f36 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4088,6 +4088,22 @@ DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0d, PCI_CLASS_NOT_DEFINED DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0e, PCI_CLASS_NOT_DEFINED, 8, quirk_relaxedordering_disable); +/* + * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex + * where Upstream Transaction Layer Packets with the Relaxed Ordering + * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering + * set. This is a violation of the PCIe 3.0 Transaction Ordering Rules + * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0 + * November 10, 2010). As a result, on this platform we can't use Relaxed + * Ordering for Upstream TLPs. + */ +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); + /* * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same * values for the Attribute as were supplied in the header of the -- cgit From b0ba9d5fded9590cac67a482c5aab8b1bf86ee40 Mon Sep 17 00:00:00 2001 From: Casey Leedom Date: Tue, 15 Aug 2017 11:23:26 +0800 Subject: net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag cxgb4 Ethernet driver now queries PCIe configuration space to determine if it can send TLPs to it with the Relaxed Ordering Attribute set. Remove the enable_pcie_relaxed_ordering() to avoid enable PCIe Capability Device Control[Relaxed Ordering Enable] at probe routine, to make sure the driver will not send the Relaxed Ordering TLPs to the Root Complex which could not deal the Relaxed Ordering TLPs. Signed-off-by: Casey Leedom Signed-off-by: Ding Tianhong Reviewed-by: Casey Leedom Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 23 +++++++++++++++++------ drivers/net/ethernet/chelsio/cxgb4/sge.c | 5 +++-- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index ef4be781fd05..09ea62ee96d3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -529,6 +529,7 @@ enum { /* adapter flags */ USING_SOFT_PARAMS = (1 << 6), MASTER_PF = (1 << 7), FW_OFLD_CONN = (1 << 9), + ROOT_NO_RELAXED_ORDERING = (1 << 10), }; enum { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e403fa18f1b1..33bb8678833a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4654,11 +4654,6 @@ static void print_port_info(const struct net_device *dev) dev->name, adap->params.vpd.id, adap->name, buf); } -static void enable_pcie_relaxed_ordering(struct pci_dev *dev) -{ - pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN); -} - /* * Free the following resources: * - memory used for tables @@ -4908,7 +4903,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } pci_enable_pcie_error_reporting(pdev); - enable_pcie_relaxed_ordering(pdev); pci_set_master(pdev); pci_save_state(pdev); @@ -4947,6 +4941,23 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->msg_enable = DFLT_MSG_ENABLE; memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map)); + /* If possible, we use PCIe Relaxed Ordering Attribute to deliver + * Ingress Packet Data to Free List Buffers in order to allow for + * chipset performance optimizations between the Root Complex and + * Memory Controllers. (Messages to the associated Ingress Queue + * notifying new Packet Placement in the Free Lists Buffers will be + * send without the Relaxed Ordering Attribute thus guaranteeing that + * all preceding PCIe Transaction Layer Packets will be processed + * first.) But some Root Complexes have various issues with Upstream + * Transaction Layer Packets with the Relaxed Ordering Attribute set. + * The PCIe devices which under the Root Complexes will be cleared the + * Relaxed Ordering bit in the configuration space, So we check our + * PCIe configuration space to see if it's flagged with advice against + * using Relaxed Ordering. + */ + if (!pcie_relaxed_ordering_enabled(pdev)) + adapter->flags |= ROOT_NO_RELAXED_ORDERING; + spin_lock_init(&adapter->stats_lock); spin_lock_init(&adapter->tid_release_lock); spin_lock_init(&adapter->win0_lock); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index ede12209f20b..4ef68f69b58c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2719,6 +2719,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct fw_iq_cmd c; struct sge *s = &adap->sge; struct port_info *pi = netdev_priv(dev); + int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING); /* Size needs to be multiple of 16, including status entry. */ iq->size = roundup(iq->size, 16); @@ -2772,8 +2773,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc); c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F | - FW_IQ_CMD_FL0FETCHRO_F | - FW_IQ_CMD_FL0DATARO_F | + FW_IQ_CMD_FL0FETCHRO_V(relaxed) | + FW_IQ_CMD_FL0DATARO_V(relaxed) | FW_IQ_CMD_FL0PADEN_F); if (cong >= 0) c.iqns_to_fl0congen |= -- cgit From b629276df7f669b1daaad2131ca418ab55186565 Mon Sep 17 00:00:00 2001 From: Casey Leedom Date: Tue, 15 Aug 2017 11:23:27 +0800 Subject: net/cxgb4vf: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag cxgb4vf Ethernet driver now queries PCIe configuration space to determine if it can send TLPs to it with the Relaxed Ordering Attribute set, just like the pf did. Signed-off-by: Casey Leedom Signed-off-by: Ding Tianhong Reviewed-by: Casey Leedom Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/adapter.h | 1 + drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 18 ++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 3 +++ 3 files changed, 22 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h index 109bc630408b..08c6ddb84a04 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h @@ -408,6 +408,7 @@ enum { /* adapter flags */ USING_MSI = (1UL << 1), USING_MSIX = (1UL << 2), QUEUES_BOUND = (1UL << 3), + ROOT_NO_RELAXED_ORDERING = (1UL << 4), }; /* diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index ac7a150c54e9..2b85b874fd0d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2888,6 +2888,24 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, */ adapter->name = pci_name(pdev); adapter->msg_enable = DFLT_MSG_ENABLE; + + /* If possible, we use PCIe Relaxed Ordering Attribute to deliver + * Ingress Packet Data to Free List Buffers in order to allow for + * chipset performance optimizations between the Root Complex and + * Memory Controllers. (Messages to the associated Ingress Queue + * notifying new Packet Placement in the Free Lists Buffers will be + * send without the Relaxed Ordering Attribute thus guaranteeing that + * all preceding PCIe Transaction Layer Packets will be processed + * first.) But some Root Complexes have various issues with Upstream + * Transaction Layer Packets with the Relaxed Ordering Attribute set. + * The PCIe devices which under the Root Complexes will be cleared the + * Relaxed Ordering bit in the configuration space, So we check our + * PCIe configuration space to see if it's flagged with advice against + * using Relaxed Ordering. + */ + if (!pcie_relaxed_ordering_enabled(pdev)) + adapter->flags |= ROOT_NO_RELAXED_ORDERING; + err = adap_init0(adapter); if (err) goto err_unmap_bar; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index e37dde2ba97f..05498e7f2840 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2205,6 +2205,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, struct port_info *pi = netdev_priv(dev); struct fw_iq_cmd cmd, rpl; int ret, iqandst, flsz = 0; + int relaxed = !(adapter->flags & ROOT_NO_RELAXED_ORDERING); /* * If we're using MSI interrupts and we're not initializing the @@ -2300,6 +2301,8 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, cpu_to_be32( FW_IQ_CMD_FL0HOSTFCMODE_V(SGE_HOSTFCMODE_NONE) | FW_IQ_CMD_FL0PACKEN_F | + FW_IQ_CMD_FL0FETCHRO_V(relaxed) | + FW_IQ_CMD_FL0DATARO_V(relaxed) | FW_IQ_CMD_FL0PADEN_F); /* In T6, for egress queue type FL there is internal overhead -- cgit From 539a06baedd06127389b254f6b9f016ca072da13 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 14 Aug 2017 18:04:24 +0200 Subject: tcp: ulp: avoid module refcnt leak in tcp_set_ulp __tcp_ulp_find_autoload returns tcp_ulp_ops after taking a reference on the module. Then, if ->init fails, tcp_set_ulp propagates the error but nothing releases that reference. Fixes: 734942cc4ea6 ("tcp: ULP infrastructure") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv4/tcp_ulp.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 2417f55374c5..6bb9e14c710a 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -122,14 +122,14 @@ int tcp_set_ulp(struct sock *sk, const char *name) ulp_ops = __tcp_ulp_find_autoload(name); if (!ulp_ops) - err = -ENOENT; - else - err = ulp_ops->init(sk); + return -ENOENT; - if (err) - goto out; + err = ulp_ops->init(sk); + if (err) { + module_put(ulp_ops->owner); + return err; + } icsk->icsk_ulp_ops = ulp_ops; - out: - return err; + return 0; } -- cgit From 36f41f8fc6d8aa9f8c9072d66ff7cf9055f5e69b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Aug 2017 10:16:45 -0700 Subject: af_key: do not use GFP_KERNEL in atomic contexts pfkey_broadcast() might be called from non process contexts, we can not use GFP_KERNEL in these cases [1]. This patch partially reverts commit ba51b6be38c1 ("net: Fix RCU splat in af_key"), only keeping the GFP_ATOMIC forcing under rcu_read_lock() section. [1] : syzkaller reported : in_atomic(): 1, irqs_disabled(): 0, pid: 2932, name: syzkaller183439 3 locks held by syzkaller183439/2932: #0: (&net->xfrm.xfrm_cfg_mutex){+.+.+.}, at: [] pfkey_sendmsg+0x4c8/0x9f0 net/key/af_key.c:3649 #1: (&pfk->dump_lock){+.+.+.}, at: [] pfkey_do_dump+0x76/0x3f0 net/key/af_key.c:293 #2: (&(&net->xfrm.xfrm_policy_lock)->rlock){+...+.}, at: [] spin_lock_bh include/linux/spinlock.h:304 [inline] #2: (&(&net->xfrm.xfrm_policy_lock)->rlock){+...+.}, at: [] xfrm_policy_walk+0x192/0xa30 net/xfrm/xfrm_policy.c:1028 CPU: 0 PID: 2932 Comm: syzkaller183439 Not tainted 4.13.0-rc4+ #24 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 ___might_sleep+0x2b2/0x470 kernel/sched/core.c:5994 __might_sleep+0x95/0x190 kernel/sched/core.c:5947 slab_pre_alloc_hook mm/slab.h:416 [inline] slab_alloc mm/slab.c:3383 [inline] kmem_cache_alloc+0x24b/0x6e0 mm/slab.c:3559 skb_clone+0x1a0/0x400 net/core/skbuff.c:1037 pfkey_broadcast_one+0x4b2/0x6f0 net/key/af_key.c:207 pfkey_broadcast+0x4ba/0x770 net/key/af_key.c:281 dump_sp+0x3d6/0x500 net/key/af_key.c:2685 xfrm_policy_walk+0x2f1/0xa30 net/xfrm/xfrm_policy.c:1042 pfkey_dump_sp+0x42/0x50 net/key/af_key.c:2695 pfkey_do_dump+0xaa/0x3f0 net/key/af_key.c:299 pfkey_spddump+0x1a0/0x210 net/key/af_key.c:2722 pfkey_process+0x606/0x710 net/key/af_key.c:2814 pfkey_sendmsg+0x4d6/0x9f0 net/key/af_key.c:3650 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 ___sys_sendmsg+0x755/0x890 net/socket.c:2035 __sys_sendmsg+0xe5/0x210 net/socket.c:2069 SYSC_sendmsg net/socket.c:2080 [inline] SyS_sendmsg+0x2d/0x50 net/socket.c:2076 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x445d79 RSP: 002b:00007f32447c1dc8 EFLAGS: 00000202 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000445d79 RDX: 0000000000000000 RSI: 000000002023dfc8 RDI: 0000000000000008 RBP: 0000000000000086 R08: 00007f32447c2700 R09: 00007f32447c2700 R10: 00007f32447c2700 R11: 0000000000000202 R12: 0000000000000000 R13: 00007ffe33edec4f R14: 00007f32447c29c0 R15: 0000000000000000 Fixes: ba51b6be38c1 ("net: Fix RCU splat in af_key") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: David Ahern Acked-by: David Ahern Signed-off-by: David S. Miller --- net/key/af_key.c | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index ca9d3ae665e7..98f4d8211b9a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -228,7 +228,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, #define BROADCAST_ONE 1 #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 -static int pfkey_broadcast(struct sk_buff *skb, +static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, int broadcast_flags, struct sock *one_sk, struct net *net) { @@ -278,7 +278,7 @@ static int pfkey_broadcast(struct sk_buff *skb, rcu_read_unlock(); if (one_sk != NULL) - err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk); + err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); kfree_skb(skb2); kfree_skb(skb); @@ -311,7 +311,7 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) hdr = (struct sadb_msg *) pfk->dump.skb->data; hdr->sadb_msg_seq = 0; hdr->sadb_msg_errno = rc; - pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = NULL; } @@ -355,7 +355,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk)); + pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -1389,7 +1389,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ xfrm_state_put(x); - pfkey_broadcast(resp_skb, BROADCAST_ONE, sk, net); + pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net); return 0; } @@ -1476,7 +1476,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; - pfkey_broadcast(skb, BROADCAST_ALL, NULL, xs_net(x)); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); return 0; } @@ -1589,7 +1589,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, BROADCAST_ONE, sk, sock_net(sk)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -1694,8 +1694,8 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad return -ENOBUFS; } - pfkey_broadcast(supp_skb, BROADCAST_REGISTERED, sk, sock_net(sk)); - + pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, + sock_net(sk)); return 0; } @@ -1712,7 +1712,8 @@ static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr) hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - return pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk)); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, + sock_net(sk)); } static int key_notify_sa_flush(const struct km_event *c) @@ -1733,7 +1734,7 @@ static int key_notify_sa_flush(const struct km_event *c) hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; - pfkey_broadcast(skb, BROADCAST_ALL, NULL, c->net); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } @@ -1790,7 +1791,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) - pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; @@ -1878,7 +1879,7 @@ static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb new_hdr->sadb_msg_errno = 0; } - pfkey_broadcast(skb, BROADCAST_ALL, NULL, sock_net(sk)); + pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } @@ -2206,7 +2207,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; out_hdr->sadb_msg_pid = c->portid; - pfkey_broadcast(out_skb, BROADCAST_ALL, NULL, xp_net(xp)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; } @@ -2426,7 +2427,7 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struc out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, BROADCAST_ONE, sk, xp_net(xp)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp)); err = 0; out: @@ -2682,7 +2683,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) - pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; @@ -2739,7 +2740,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; - pfkey_broadcast(skb_out, BROADCAST_ALL, NULL, c->net); + pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } @@ -2803,7 +2804,7 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), + pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); @@ -3024,7 +3025,8 @@ static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c) out_hdr->sadb_msg_seq = 0; out_hdr->sadb_msg_pid = 0; - pfkey_broadcast(out_skb, BROADCAST_REGISTERED, NULL, xs_net(x)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, + xs_net(x)); return 0; } @@ -3212,7 +3214,8 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_ctx->ctx_len); } - return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x)); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, + xs_net(x)); } static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, @@ -3408,7 +3411,8 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, n_port->sadb_x_nat_t_port_port = sport; n_port->sadb_x_nat_t_port_reserved = 0; - return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x)); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, + xs_net(x)); } #ifdef CONFIG_NET_KEY_MIGRATE @@ -3599,7 +3603,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* broadcast migrate message to sockets */ - pfkey_broadcast(skb, BROADCAST_ALL, NULL, &init_net); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); return 0; -- cgit From e5645f51ba99738b0e5d708edf9c6454f33b9310 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 14 Aug 2017 10:44:59 -0700 Subject: ipv6: release rt6->rt6i_idev properly during ifdown When a dst is created by addrconf_dst_alloc() for a host route or an anycast route, dst->dev points to loopback dev while rt6->rt6i_idev points to a real device. When the real device goes down, the current cleanup code only checks for dst->dev and assumes rt6->rt6i_idev->dev is the same. This causes the refcount leak on the real device in the above situation. This patch makes sure to always release the refcount taken on rt6->rt6i_idev during dst_dev_put(). Fixes: 587fea741134 ("ipv6: mark DST_NOGC and remove the operation of dst_free()") Reported-by: John Stultz Tested-by: John Stultz Tested-by: Martin KaFai Lau Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a640fbcba15d..99d4727f2b18 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -417,14 +417,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct net_device *loopback_dev = dev_net(dev)->loopback_dev; - if (dev != loopback_dev) { - if (idev && idev->dev == dev) { - struct inet6_dev *loopback_idev = - in6_dev_get(loopback_dev); - if (loopback_idev) { - rt->rt6i_idev = loopback_idev; - in6_dev_put(idev); - } + if (idev && idev->dev != loopback_dev) { + struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); + if (loopback_idev) { + rt->rt6i_idev = loopback_idev; + in6_dev_put(idev); } } } -- cgit From 42b7305905be52e467bbc346b0f2f95ad44eb1a0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 14 Aug 2017 21:31:38 +0200 Subject: udp: fix linear skb reception with PEEK_OFF copy_linear_skb() is broken; both of its callers actually expect 'len' to be the amount we are trying to copy, not the offset of the end. Fix it keeping the meanings of arguments in sync with what the callers (both of them) expect. Also restore a saner behavior on EFAULT (i.e. preserving the iov_iter position in case of failure): The commit fd851ba9caa9 ("udp: harden copy_linear_skb()") avoids the more destructive effect of the buggy copy_linear_skb(), e.g. no more invalid memory access, but said function still behaves incorrectly: when peeking with offset it can fail with EINVAL instead of copying the appropriate amount of memory. Reported-by: Sasha Levin Fixes: b65ac44674dd ("udp: try to avoid 2 cache miss on dequeue") Fixes: fd851ba9caa9 ("udp: harden copy_linear_skb()") Signed-off-by: Al Viro Acked-by: Paolo Abeni Tested-by: Sasha Levin Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/udp.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index e9b1d1eacb59..586de4b811b5 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -366,14 +366,13 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb) static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, struct iov_iter *to) { - int n, copy = len - off; + int n; - if (copy < 0) - return -EINVAL; - n = copy_to_iter(skb->data + off, copy, to); - if (n == copy) + n = copy_to_iter(skb->data + off, len, to); + if (n == len) return 0; + iov_iter_revert(to, n); return -EFAULT; } -- cgit From 7749d4ff88d31b0be17c8683143135adaaadc6a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Aug 2017 14:10:25 -0700 Subject: dccp: purge write queue in dccp_destroy_sock() syzkaller reported that DCCP could have a non empty write queue at dismantle time. WARNING: CPU: 1 PID: 2953 at net/core/stream.c:199 sk_stream_kill_queues+0x3ce/0x520 net/core/stream.c:199 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 2953 Comm: syz-executor0 Not tainted 4.13.0-rc4+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 panic+0x1e4/0x417 kernel/panic.c:180 __warn+0x1c4/0x1d9 kernel/panic.c:541 report_bug+0x211/0x2d0 lib/bug.c:183 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:190 do_trap_no_signal arch/x86/kernel/traps.c:224 [inline] do_trap+0x260/0x390 arch/x86/kernel/traps.c:273 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:310 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:323 invalid_op+0x1e/0x30 arch/x86/entry/entry_64.S:846 RIP: 0010:sk_stream_kill_queues+0x3ce/0x520 net/core/stream.c:199 RSP: 0018:ffff8801d182f108 EFLAGS: 00010297 RAX: ffff8801d1144140 RBX: ffff8801d13cb280 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff85137b00 RDI: ffff8801d13cb280 RBP: ffff8801d182f148 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801d13cb4d0 R13: ffff8801d13cb3b8 R14: ffff8801d13cb300 R15: ffff8801d13cb3b8 inet_csk_destroy_sock+0x175/0x3f0 net/ipv4/inet_connection_sock.c:835 dccp_close+0x84d/0xc10 net/dccp/proto.c:1067 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 sock_release+0x8d/0x1e0 net/socket.c:597 sock_close+0x16/0x20 net/socket.c:1126 __fput+0x327/0x7e0 fs/file_table.c:210 ____fput+0x15/0x20 fs/file_table.c:246 task_work_run+0x18a/0x260 kernel/task_work.c:116 exit_task_work include/linux/task_work.h:21 [inline] do_exit+0xa32/0x1b10 kernel/exit.c:865 do_group_exit+0x149/0x400 kernel/exit.c:969 get_signal+0x7e8/0x17e0 kernel/signal.c:2330 do_signal+0x94/0x1ee0 arch/x86/kernel/signal.c:808 exit_to_usermode_loop+0x21c/0x2d0 arch/x86/entry/common.c:157 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline] syscall_return_slowpath+0x3a7/0x450 arch/x86/entry/common.c:263 Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/dccp/proto.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 9fe25bf63296..86bc40ba6ba5 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -201,10 +201,7 @@ void dccp_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - /* - * DCCP doesn't use sk_write_queue, just sk_send_head - * for retransmissions - */ + __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; -- cgit From d624d276d1ddacbcb12ad96832ce0c7b82cd25db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Aug 2017 17:44:43 -0700 Subject: tcp: fix possible deadlock in TCP stack vs BPF filter Filtering the ACK packet was not put at the right place. At this place, we already allocated a child and put it into accept queue. We absolutely need to call tcp_child_process() to release its spinlock, or we will deadlock at accept() or close() time. Found by syzkaller team (Thanks a lot !) Fixes: 8fac365f63c8 ("tcp: Add a tcp_filter hook before handle ack packet") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Chenbo Feng Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a20e7f03d5f7..e9252c7df809 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1722,6 +1722,8 @@ process: */ sock_hold(sk); refcounted = true; + if (tcp_filter(sk, skb)) + goto discard_and_relse; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1729,8 +1731,6 @@ process: } if (nsk == sk) { reqsk_put(req); - } else if (tcp_filter(sk, skb)) { - goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); goto discard_and_relse; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2521690d62d6..206210125fd7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1456,6 +1456,8 @@ process: } sock_hold(sk); refcounted = true; + if (tcp_filter(sk, skb)) + goto discard_and_relse; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1464,8 +1466,6 @@ process: if (nsk == sk) { reqsk_put(req); tcp_v6_restore_cb(skb); - } else if (tcp_filter(sk, skb)) { - goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v6_send_reset(nsk, skb); goto discard_and_relse; -- cgit From 7e1d90f60a0d501c8503e636942ca704a454d910 Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Mon, 14 Aug 2017 14:46:01 -0700 Subject: ALSA: seq: 2nd attempt at fixing race creating a queue commit 4842e98f26dd80be3623c4714a244ba52ea096a8 ("ALSA: seq: Fix race at creating a queue") attempted to fix a race reported by syzkaller. That fix has been described as follows: " When a sequencer queue is created in snd_seq_queue_alloc(),it adds the new queue element to the public list before referencing it. Thus the queue might be deleted before the call of snd_seq_queue_use(), and it results in the use-after-free error, as spotted by syzkaller. The fix is to reference the queue object at the right time. " Even with that fix in place, syzkaller reported a use-after-free error. It specifically pointed to the last instruction "return q->queue" in snd_seq_queue_alloc(). The pointer q is being used after kfree() has been called on it. It turned out that there is still a small window where a race can happen. The window opens at snd_seq_ioctl_create_queue()->snd_seq_queue_alloc()->queue_list_add() and closes at snd_seq_ioctl_create_queue()->queueptr()->snd_use_lock_use(). Between these two calls, a different thread could delete the queue and possibly re-create a different queue in the same location in queue_list. This change prevents this situation by calling snd_use_lock_use() from snd_seq_queue_alloc() prior to calling queue_list_add(). It is then the caller's responsibility to call snd_use_lock_free(&q->use_lock). Fixes: 4842e98f26dd ("ALSA: seq: Fix race at creating a queue") Reported-by: Dmitry Vyukov Cc: Signed-off-by: Daniel Mentz Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 13 ++++--------- sound/core/seq/seq_queue.c | 14 +++++++++----- sound/core/seq/seq_queue.h | 2 +- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 272c55fe17c8..ea2d0ae85bd3 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1502,16 +1502,11 @@ static int snd_seq_ioctl_unsubscribe_port(struct snd_seq_client *client, static int snd_seq_ioctl_create_queue(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_info *info = arg; - int result; struct snd_seq_queue *q; - result = snd_seq_queue_alloc(client->number, info->locked, info->flags); - if (result < 0) - return result; - - q = queueptr(result); - if (q == NULL) - return -EINVAL; + q = snd_seq_queue_alloc(client->number, info->locked, info->flags); + if (IS_ERR(q)) + return PTR_ERR(q); info->queue = q->queue; info->locked = q->locked; @@ -1521,7 +1516,7 @@ static int snd_seq_ioctl_create_queue(struct snd_seq_client *client, void *arg) if (!info->name[0]) snprintf(info->name, sizeof(info->name), "Queue-%d", q->queue); strlcpy(q->name, info->name, sizeof(q->name)); - queuefree(q); + snd_use_lock_free(&q->use_lock); return 0; } diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 450c5187eecb..79e0c5604ef8 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -184,22 +184,26 @@ void __exit snd_seq_queues_delete(void) static void queue_use(struct snd_seq_queue *queue, int client, int use); /* allocate a new queue - - * return queue index value or negative value for error + * return pointer to new queue or ERR_PTR(-errno) for error + * The new queue's use_lock is set to 1. It is the caller's responsibility to + * call snd_use_lock_free(&q->use_lock). */ -int snd_seq_queue_alloc(int client, int locked, unsigned int info_flags) +struct snd_seq_queue *snd_seq_queue_alloc(int client, int locked, unsigned int info_flags) { struct snd_seq_queue *q; q = queue_new(client, locked); if (q == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); q->info_flags = info_flags; queue_use(q, client, 1); + snd_use_lock_use(&q->use_lock); if (queue_list_add(q) < 0) { + snd_use_lock_free(&q->use_lock); queue_delete(q); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } - return q->queue; + return q; } /* delete a queue - queue must be owned by the client */ diff --git a/sound/core/seq/seq_queue.h b/sound/core/seq/seq_queue.h index 30c8111477f6..719093489a2c 100644 --- a/sound/core/seq/seq_queue.h +++ b/sound/core/seq/seq_queue.h @@ -71,7 +71,7 @@ void snd_seq_queues_delete(void); /* create new queue (constructor) */ -int snd_seq_queue_alloc(int client, int locked, unsigned int flags); +struct snd_seq_queue *snd_seq_queue_alloc(int client, int locked, unsigned int flags); /* delete queue (destructor) */ int snd_seq_queue_delete(int client, int queueid); -- cgit From a8e800fe0f68bc28ce309914f47e432742b865ed Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 14 Aug 2017 14:35:50 +0200 Subject: ALSA: usb-audio: Apply sample rate quirk to Sennheiser headset A Senheisser headset requires the typical sample-rate quirk for avoiding spurious errors from inquiring the current sample rate like: usb 1-1: 2:1: cannot get freq at ep 0x4 usb 1-1: 3:1: cannot get freq at ep 0x83 The USB ID 1395:740a has to be added to the entries in snd_usb_get_sample_rate_quirk(). Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1052580 Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index d7b0b0a3a2db..f3e9e30172f3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1142,6 +1142,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */ case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ + case USB_ID(0x1395, 0x740a): /* Sennheiser DECT */ case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */ case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */ case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ -- cgit From a0ffc51e20e90e0c1c2491de2b4b03f48b6caaba Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 15 Aug 2017 11:57:06 +0200 Subject: drm/atomic: If the atomic check fails, return its value first The last part of drm_atomic_check_only is testing whether we need to fail with -EINVAL when modeset is not allowed, but forgets to return the value when atomic_check() fails first. This results in -EDEADLK being replaced by -EINVAL, and the sanity check in drm_modeset_drop_locks kicks in: [ 308.531734] ------------[ cut here ]------------ [ 308.531791] WARNING: CPU: 0 PID: 1886 at drivers/gpu/drm/drm_modeset_lock.c:217 drm_modeset_drop_locks+0x33/0xc0 [drm] [ 308.531828] Modules linked in: [ 308.532050] CPU: 0 PID: 1886 Comm: kms_atomic Tainted: G U W 4.13.0-rc5-patser+ #5225 [ 308.532082] Hardware name: NUC5i7RYB, BIOS RYBDWi35.86A.0246.2015.0309.1355 03/09/2015 [ 308.532124] task: ffff8800cd9dae00 task.stack: ffff8800ca3b8000 [ 308.532168] RIP: 0010:drm_modeset_drop_locks+0x33/0xc0 [drm] [ 308.532189] RSP: 0018:ffff8800ca3bf980 EFLAGS: 00010282 [ 308.532211] RAX: dffffc0000000000 RBX: ffff8800ca3bfaf8 RCX: 0000000013a171e6 [ 308.532235] RDX: 1ffff10019477f69 RSI: ffffffffa8ba4fa0 RDI: ffff8800ca3bfb48 [ 308.532258] RBP: ffff8800ca3bf998 R08: 0000000000000000 R09: 0000000000000003 [ 308.532281] R10: 0000000079dbe066 R11: 00000000f760b34b R12: 0000000000000001 [ 308.532304] R13: dffffc0000000000 R14: 00000000ffffffea R15: ffff880096889680 [ 308.532328] FS: 00007ff00959cec0(0000) GS:ffff8800d4e00000(0000) knlGS:0000000000000000 [ 308.532359] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 308.532380] CR2: 0000000000000008 CR3: 00000000ca2e3000 CR4: 00000000003406f0 [ 308.532402] Call Trace: [ 308.532440] drm_mode_atomic_ioctl+0x19fa/0x1c00 [drm] [ 308.532488] ? drm_atomic_set_property+0x1220/0x1220 [drm] [ 308.532565] ? avc_has_extended_perms+0xc39/0xff0 [ 308.532593] ? lock_downgrade+0x610/0x610 [ 308.532640] ? drm_atomic_set_property+0x1220/0x1220 [drm] [ 308.532680] drm_ioctl_kernel+0x154/0x1a0 [drm] [ 308.532755] drm_ioctl+0x624/0x8f0 [drm] [ 308.532858] ? drm_atomic_set_property+0x1220/0x1220 [drm] [ 308.532976] ? drm_getunique+0x210/0x210 [drm] [ 308.533061] do_vfs_ioctl+0xd92/0xe40 [ 308.533121] ? ioctl_preallocate+0x1b0/0x1b0 [ 308.533160] ? selinux_capable+0x20/0x20 [ 308.533191] ? do_fcntl+0x1b1/0xbf0 [ 308.533219] ? kasan_slab_free+0xa2/0xb0 [ 308.533249] ? f_getown+0x4b/0xa0 [ 308.533278] ? putname+0xcf/0xe0 [ 308.533309] ? security_file_ioctl+0x57/0x90 [ 308.533342] SyS_ioctl+0x4e/0x80 [ 308.533374] entry_SYSCALL_64_fastpath+0x18/0xad [ 308.533405] RIP: 0033:0x7ff00779e4d7 [ 308.533431] RSP: 002b:00007fff66a043d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 308.533481] RAX: ffffffffffffffda RBX: 000000e7c7ca5910 RCX: 00007ff00779e4d7 [ 308.533560] RDX: 00007fff66a04430 RSI: 00000000c03864bc RDI: 0000000000000003 [ 308.533608] RBP: 00007ff007a5fb00 R08: 000000e7c7ca4620 R09: 000000e7c7ca5e60 [ 308.533647] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000070 [ 308.533685] R13: 0000000000000000 R14: 0000000000000000 R15: 000000e7c7ca5930 [ 308.533770] Code: ff df 55 48 89 e5 41 55 41 54 53 48 89 fb 48 83 c7 50 48 89 fa 48 c1 ea 03 80 3c 02 00 74 05 e8 94 d4 16 e7 48 83 7b 50 00 74 02 <0f> ff 4c 8d 6b 58 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 [ 308.534086] ---[ end trace 77f11e53b1df44ad ]--- Solve this by adding the missing return. This is also a bugfix because we could end up rejecting updates with -EINVAL because of a early -EDEADLK, while if atomic_check ran to completion it might have downgraded the modeset to a fastset. Signed-off-by: Maarten Lankhorst Testcase: kms_atomic Link: https://patchwork.freedesktop.org/patch/msgid/20170815095706.23624-1-maarten.lankhorst@linux.intel.com Fixes: d34f20d6e2f2 ("drm: Atomic modeset ioctl") Cc: # v4.0+ Reviewed-by: Daniel Vetter --- drivers/gpu/drm/drm_atomic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index b43939f24812..aed25c4183bb 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1655,6 +1655,9 @@ int drm_atomic_check_only(struct drm_atomic_state *state) if (config->funcs->atomic_check) ret = config->funcs->atomic_check(state->dev, state); + if (ret) + return ret; + if (!state->allow_modeset) { for_each_new_crtc_in_state(state, crtc, crtc_state, i) { if (drm_atomic_crtc_needs_modeset(crtc_state)) { @@ -1665,7 +1668,7 @@ int drm_atomic_check_only(struct drm_atomic_state *state) } } - return ret; + return 0; } EXPORT_SYMBOL(drm_atomic_check_only); -- cgit From 84393817db09bb436e934f8f8cc981cbca9ea4dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Aug 2017 13:03:47 +0200 Subject: x86/mtrr: Prevent CPU hotplug lock recursion Larry reported a CPU hotplug lock recursion in the MTRR code. ============================================ WARNING: possible recursive locking detected systemd-udevd/153 is trying to acquire lock: (cpu_hotplug_lock.rw_sem){.+.+.+}, at: [] stop_machine+0x16/0x30 but task is already holding lock: (cpu_hotplug_lock.rw_sem){.+.+.+}, at: [] mtrr_add_page+0x83/0x470 .... cpus_read_lock+0x48/0x90 stop_machine+0x16/0x30 mtrr_add_page+0x18b/0x470 mtrr_add+0x3e/0x70 mtrr_add_page() holds the hotplug rwsem already and calls stop_machine() which acquires it again. Call stop_machine_cpuslocked() instead. Reported-and-tested-by: Larry Finger Reported-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708140920250.1865@nanos Cc: "Paul E. McKenney" Cc: Borislav Petkov --- arch/x86/kernel/cpu/mtrr/main.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index c5bb63be4ba1..40d5a8a75212 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -237,6 +237,18 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask); } +static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + struct set_mtrr_data data = { .smp_reg = reg, + .smp_base = base, + .smp_size = size, + .smp_type = type + }; + + stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask); +} + static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) { @@ -370,7 +382,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, /* Search for an empty MTRR */ i = mtrr_if->get_free_region(base, size, replace); if (i >= 0) { - set_mtrr(i, base, size, type); + set_mtrr_cpuslocked(i, base, size, type); if (likely(replace < 0)) { mtrr_usage_table[i] = 1; } else { @@ -378,7 +390,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, if (increment) mtrr_usage_table[i]++; if (unlikely(replace != i)) { - set_mtrr(replace, 0, 0, 0); + set_mtrr_cpuslocked(replace, 0, 0, 0); mtrr_usage_table[replace] = 0; } } @@ -506,7 +518,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) goto out; } if (--mtrr_usage_table[reg] < 1) - set_mtrr(reg, 0, 0, 0); + set_mtrr_cpuslocked(reg, 0, 0, 0); error = reg; out: mutex_unlock(&mtrr_mutex); -- cgit From 2926a2aa5c14fb2add75e6584845b1c03022235f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 14 Aug 2017 17:19:26 +0200 Subject: iommu: Fix wrong freeing of iommu_device->dev The struct iommu_device has a 'struct device' embedded into it, not as a pointer, but the whole struct. In the conversion of the iommu drivers to use struct iommu_device it was forgotten that the relase function for that struct device simply calls kfree() on the pointer. This frees memory that was never allocated and causes memory corruption. To fix this issue, use a pointer to struct device instead of embedding the whole struct. This needs some updates in the iommu sysfs code as well as the Intel VT-d and AMD IOMMU driver. Reported-by: Sebastian Ott Fixes: 39ab9555c241 ('iommu: Add sysfs bindings for struct iommu_device') Cc: stable@vger.kernel.org # >= v4.11 Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_types.h | 4 +++- drivers/iommu/intel-iommu.c | 4 +++- drivers/iommu/iommu-sysfs.c | 32 ++++++++++++++++++++------------ include/linux/iommu.h | 12 +++++++++++- 4 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 294a409e283b..d6b873b57054 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -574,7 +574,9 @@ struct amd_iommu { static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev) { - return container_of(dev, struct amd_iommu, iommu.dev); + struct iommu_device *iommu = dev_to_iommu_device(dev); + + return container_of(iommu, struct amd_iommu, iommu); } #define ACPIHID_UID_LEN 256 diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 687f18f65cea..3e8636f1220e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4736,7 +4736,9 @@ static void intel_disable_iommus(void) static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) { - return container_of(dev, struct intel_iommu, iommu.dev); + struct iommu_device *iommu_dev = dev_to_iommu_device(dev); + + return container_of(iommu_dev, struct intel_iommu, iommu); } static ssize_t intel_iommu_show_version(struct device *dev, diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c index c58351ed61c1..36d1a7ce7fc4 100644 --- a/drivers/iommu/iommu-sysfs.c +++ b/drivers/iommu/iommu-sysfs.c @@ -62,32 +62,40 @@ int iommu_device_sysfs_add(struct iommu_device *iommu, va_list vargs; int ret; - device_initialize(&iommu->dev); + iommu->dev = kzalloc(sizeof(*iommu->dev), GFP_KERNEL); + if (!iommu->dev) + return -ENOMEM; - iommu->dev.class = &iommu_class; - iommu->dev.parent = parent; - iommu->dev.groups = groups; + device_initialize(iommu->dev); + + iommu->dev->class = &iommu_class; + iommu->dev->parent = parent; + iommu->dev->groups = groups; va_start(vargs, fmt); - ret = kobject_set_name_vargs(&iommu->dev.kobj, fmt, vargs); + ret = kobject_set_name_vargs(&iommu->dev->kobj, fmt, vargs); va_end(vargs); if (ret) goto error; - ret = device_add(&iommu->dev); + ret = device_add(iommu->dev); if (ret) goto error; + dev_set_drvdata(iommu->dev, iommu); + return 0; error: - put_device(&iommu->dev); + put_device(iommu->dev); return ret; } void iommu_device_sysfs_remove(struct iommu_device *iommu) { - device_unregister(&iommu->dev); + dev_set_drvdata(iommu->dev, NULL); + device_unregister(iommu->dev); + iommu->dev = NULL; } /* * IOMMU drivers can indicate a device is managed by a given IOMMU using @@ -102,14 +110,14 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link) if (!iommu || IS_ERR(iommu)) return -ENODEV; - ret = sysfs_add_link_to_group(&iommu->dev.kobj, "devices", + ret = sysfs_add_link_to_group(&iommu->dev->kobj, "devices", &link->kobj, dev_name(link)); if (ret) return ret; - ret = sysfs_create_link_nowarn(&link->kobj, &iommu->dev.kobj, "iommu"); + ret = sysfs_create_link_nowarn(&link->kobj, &iommu->dev->kobj, "iommu"); if (ret) - sysfs_remove_link_from_group(&iommu->dev.kobj, "devices", + sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link)); return ret; @@ -121,5 +129,5 @@ void iommu_device_unlink(struct iommu_device *iommu, struct device *link) return; sysfs_remove_link(&link->kobj, "iommu"); - sysfs_remove_link_from_group(&iommu->dev.kobj, "devices", dev_name(link)); + sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link)); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2cb54adc4a33..176f7569d874 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -240,7 +240,7 @@ struct iommu_device { struct list_head list; const struct iommu_ops *ops; struct fwnode_handle *fwnode; - struct device dev; + struct device *dev; }; int iommu_device_register(struct iommu_device *iommu); @@ -265,6 +265,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu, iommu->fwnode = fwnode; } +static inline struct iommu_device *dev_to_iommu_device(struct device *dev) +{ + return (struct iommu_device *)dev_get_drvdata(dev); +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ @@ -589,6 +594,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu, { } +static inline struct iommu_device *dev_to_iommu_device(struct device *dev) +{ + return NULL; +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } -- cgit From 3280d66a6363af0df0441709bc0bc302bd9a2510 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 14 Aug 2017 16:40:11 -0400 Subject: blk-mq: Fix queue usage on failed request allocation blk_mq_get_request() does not release the callers queue usage counter when allocation fails. The caller still needs to account for its own queue usage when it is unable to allocate a request. Fixes: 1ad43c0078b7 ("blk-mq: don't leak preempt counter/q_usage_counter when allocating rq failed") Reported-by: Max Gurtovoy Reviewed-by: Ming Lei Reviewed-by: Sagi Grimberg Tested-by: Max Gurtovoy Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 535cbdf32aab..4603b115e234 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -360,12 +360,12 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, return ERR_PTR(ret); rq = blk_mq_get_request(q, NULL, op, &alloc_data); + blk_queue_exit(q); if (!rq) return ERR_PTR(-EWOULDBLOCK); blk_mq_put_ctx(alloc_data.ctx); - blk_queue_exit(q); rq->__data_len = 0; rq->__sector = (sector_t) -1; @@ -411,12 +411,11 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, alloc_data.ctx = __blk_mq_get_ctx(q, cpu); rq = blk_mq_get_request(q, NULL, op, &alloc_data); + blk_queue_exit(q); if (!rq) return ERR_PTR(-EWOULDBLOCK); - blk_queue_exit(q); - return rq; } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); -- cgit From 462cdace790ac2ed6aad1b19c9c0af0143b6aab0 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 18 Jul 2017 15:01:00 +0100 Subject: xen: fix bio vec merging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current test for bio vec merging is not fully accurate and can be tricked into merging bios when certain grant combinations are used. The result of these malicious bio merges is a bio that extends past the memory page used by any of the originating bios. Take into account the following scenario, where a guest creates two grant references that point to the same mfn, ie: grant 1 -> mfn A, grant 2 -> mfn A. These references are then used in a PV block request, and mapped by the backend domain, thus obtaining two different pfns that point to the same mfn, pfn B -> mfn A, pfn C -> mfn A. If those grants happen to be used in two consecutive sectors of a disk IO operation becoming two different bios in the backend domain, the checks in xen_biovec_phys_mergeable will succeed, because bfn1 == bfn2 (they both point to the same mfn). However due to the bio merging, the backend domain will end up with a bio that expands past mfn A into mfn A + 1. Fix this by making sure the check in xen_biovec_phys_mergeable takes into account the offset and the length of the bio, this basically replicates whats done in __BIOVEC_PHYS_MERGEABLE using mfns (bus addresses). While there also remove the usage of __BIOVEC_PHYS_MERGEABLE, since that's already checked by the callers of xen_biovec_phys_mergeable. CC: stable@vger.kernel.org Reported-by: "Jan H. Schönherr" Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/biomerge.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c index 4da69dbf7dca..1bdd02a6d6ac 100644 --- a/drivers/xen/biomerge.c +++ b/drivers/xen/biomerge.c @@ -10,8 +10,7 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, unsigned long bfn1 = pfn_to_bfn(page_to_pfn(vec1->bv_page)); unsigned long bfn2 = pfn_to_bfn(page_to_pfn(vec2->bv_page)); - return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) && - ((bfn1 == bfn2) || ((bfn1+1) == bfn2)); + return bfn1 + PFN_DOWN(vec1->bv_offset + vec1->bv_len) == bfn2; #else /* * XXX: Add support for merging bio_vec when using different page -- cgit From b15bd8cb37598afb2963f7eb9e2de468d2d60a2f Mon Sep 17 00:00:00 2001 From: Munehisa Kamata Date: Wed, 9 Aug 2017 15:31:40 -0700 Subject: xen-blkfront: use a right index when checking requests Since commit d05d7f40791c ("Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block") and 3fc9d690936f ("Merge branch 'for-4.8/drivers' of git://git.kernel.dk/linux-block"), blkfront_resume() has been using an index for iterating ring_info to check request when iterating blk_shadow in an inner loop. This seems to have been accidentally introduced during the massive rewrite of the block layer macros in the commits. This may cause crash like this: [11798.057074] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 [11798.058832] IP: [] blkfront_resume+0x10a/0x610 .... [11798.061063] Call Trace: [11798.061063] [] xenbus_dev_resume+0x53/0x140 [11798.061063] [] ? xenbus_dev_probe+0x150/0x150 [11798.061063] [] dpm_run_callback+0x3e/0x110 [11798.061063] [] device_resume+0x88/0x190 [11798.061063] [] dpm_resume+0x100/0x2d0 [11798.061063] [] dpm_resume_end+0x11/0x20 [11798.061063] [] do_suspend+0xe8/0x1a0 [11798.061063] [] shutdown_handler+0xfd/0x130 [11798.061063] [] ? split+0x110/0x110 [11798.061063] [] xenwatch_thread+0x86/0x120 [11798.061063] [] ? prepare_to_wait_event+0x110/0x110 [11798.061063] [] kthread+0xd7/0xf0 [11798.061063] [] ? kfree+0x121/0x170 [11798.061063] [] ? kthread_park+0x60/0x60 [11798.061063] [] ? call_usermodehelper_exec_work+0xb0/0xb0 [11798.061063] [] ? call_usermodehelper_exec_async+0x13a/0x140 [11798.061063] [] ret_from_fork+0x25/0x30 Use the right index in the inner loop. Fixes: d05d7f40791c ("Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block") Fixes: 3fc9d690936f ("Merge branch 'for-4.8/drivers' of git://git.kernel.dk/linux-block") Signed-off-by: Munehisa Kamata Reviewed-by: Thomas Friebel Reviewed-by: Eduardo Valentin Reviewed-by: Boris Ostrovsky Cc: Juergen Gross Cc: Konrad Rzeszutek Wilk Reviewed-by: Roger Pau Monne Cc: xen-devel@lists.xenproject.org Cc: stable@vger.kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 98e34e4c62b8..2468c28d4771 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2075,9 +2075,9 @@ static int blkfront_resume(struct xenbus_device *dev) /* * Get the bios in the request so we can re-queue them. */ - if (req_op(shadow[i].request) == REQ_OP_FLUSH || - req_op(shadow[i].request) == REQ_OP_DISCARD || - req_op(shadow[i].request) == REQ_OP_SECURE_ERASE || + if (req_op(shadow[j].request) == REQ_OP_FLUSH || + req_op(shadow[j].request) == REQ_OP_DISCARD || + req_op(shadow[j].request) == REQ_OP_SECURE_ERASE || shadow[j].request->cmd_flags & REQ_FUA) { /* * Flush operations don't contain bios, so -- cgit From 7a7c286d07f9c704e8fd11dd960bf421cc67b66b Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 11 Aug 2017 09:34:33 +0800 Subject: drm/amdgpu: save list length when fence is signaled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit update the list first to avoid redundant checks. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index a6899180b265..c586f44312f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -244,6 +244,12 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct dma_fence *f = e->fence; struct amd_sched_fence *s_fence = to_amd_sched_fence(f); + if (dma_fence_is_signaled(f)) { + hash_del(&e->node); + dma_fence_put(f); + kmem_cache_free(amdgpu_sync_slab, e); + continue; + } if (ring && s_fence) { /* For fences from the same ring it is sufficient * when they are scheduled. @@ -256,13 +262,6 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, } } - if (dma_fence_is_signaled(f)) { - hash_del(&e->node); - dma_fence_put(f); - kmem_cache_free(amdgpu_sync_slab, e); - continue; - } - return f; } -- cgit From d76036ab47eafa6ce52b69482e91ca3ba337d6d6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 15 Aug 2017 13:00:36 +0200 Subject: audit: Fix use after free in audit_remove_watch_rule() audit_remove_watch_rule() drops watch's reference to parent but then continues to work with it. That is not safe as parent can get freed once we drop our reference. The following is a trivial reproducer: mount -o loop image /mnt touch /mnt/file auditctl -w /mnt/file -p wax umount /mnt auditctl -D Grab our own reference in audit_remove_watch_rule() earlier to make sure mark does not get freed under us. CC: stable@vger.kernel.org Reported-by: Tony Jones Signed-off-by: Jan Kara Tested-by: Tony Jones Signed-off-by: Paul Moore --- kernel/audit_watch.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index e0656bd63036..1c7ded42f82f 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -457,13 +457,15 @@ void audit_remove_watch_rule(struct audit_krule *krule) list_del(&krule->rlist); if (list_empty(&watch->rules)) { + /* + * audit_remove_watch() drops our reference to 'parent' which + * can get freed. Grab our own reference to be safe. + */ + audit_get_parent(parent); audit_remove_watch(watch); - - if (list_empty(&parent->watches)) { - audit_get_parent(parent); + if (list_empty(&parent->watches)) fsnotify_destroy_mark(&parent->mark, audit_watch_group); - audit_put_parent(parent); - } + audit_put_parent(parent); } } -- cgit From b5fed474b98332559f2590c6bc90388a0899e793 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 15 Aug 2017 13:00:37 +0200 Subject: audit: Receive unmount event Although audit_watch_handle_event() can handle FS_UNMOUNT event, it is not part of AUDIT_FS_WATCH mask and thus such event never gets to audit_watch_handle_event(). Thus fsnotify marks are deleted by fsnotify subsystem on unmount without audit being notified about that which leads to a strange state of existing audit rules with dead fsnotify marks. Add FS_UNMOUNT to the mask of events to be received so that audit can clean up its state accordingly. Signed-off-by: Jan Kara Signed-off-by: Paul Moore --- kernel/audit_watch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 1c7ded42f82f..d1b5857b7e33 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -66,7 +66,7 @@ static struct fsnotify_group *audit_watch_group; /* fsnotify events we care about. */ #define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\ - FS_MOVE_SELF | FS_EVENT_ON_CHILD) + FS_MOVE_SELF | FS_EVENT_ON_CHILD | FS_UNMOUNT) static void audit_free_parent(struct audit_parent *parent) { -- cgit From 12d94a804946af291e24b80fc53ec86264765781 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Aug 2017 04:09:51 -0700 Subject: ipv6: fix NULL dereference in ip6_route_dev_notify() Based on a syzkaller report [1], I found that a per cpu allocation failure in snmp6_alloc_dev() would then lead to NULL dereference in ip6_route_dev_notify(). It seems this is a very old bug, thus no Fixes tag in this submission. Let's add in6_dev_put_clear() helper, as we will probably use it elsewhere (once available/present in net-next) [1] kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 17294 Comm: syz-executor6 Not tainted 4.13.0-rc2+ #10 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff88019f456680 task.stack: ffff8801c6e58000 RIP: 0010:__read_once_size include/linux/compiler.h:250 [inline] RIP: 0010:atomic_read arch/x86/include/asm/atomic.h:26 [inline] RIP: 0010:refcount_sub_and_test+0x7d/0x1b0 lib/refcount.c:178 RSP: 0018:ffff8801c6e5f1b0 EFLAGS: 00010202 RAX: 0000000000000037 RBX: dffffc0000000000 RCX: ffffc90005d25000 RDX: ffff8801c6e5f218 RSI: ffffffff82342bbf RDI: 0000000000000001 RBP: ffff8801c6e5f240 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff10038dcbe37 R13: 0000000000000006 R14: 0000000000000001 R15: 00000000000001b8 FS: 00007f21e0429700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001ddbc22000 CR3: 00000001d632b000 CR4: 00000000001426e0 DR0: 0000000020000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Call Trace: refcount_dec_and_test+0x1a/0x20 lib/refcount.c:211 in6_dev_put include/net/addrconf.h:335 [inline] ip6_route_dev_notify+0x1c9/0x4a0 net/ipv6/route.c:3732 notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x51/0x90 net/core/dev.c:1678 call_netdevice_notifiers net/core/dev.c:1694 [inline] rollback_registered_many+0x91c/0xe80 net/core/dev.c:7107 rollback_registered+0x1be/0x3c0 net/core/dev.c:7149 register_netdevice+0xbcd/0xee0 net/core/dev.c:7587 register_netdev+0x1a/0x30 net/core/dev.c:7669 loopback_net_init+0x76/0x160 drivers/net/loopback.c:214 ops_init+0x10a/0x570 net/core/net_namespace.c:118 setup_net+0x313/0x710 net/core/net_namespace.c:294 copy_net_ns+0x27c/0x580 net/core/net_namespace.c:418 create_new_namespaces+0x425/0x880 kernel/nsproxy.c:107 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:206 SYSC_unshare kernel/fork.c:2347 [inline] SyS_unshare+0x653/0xfa0 kernel/fork.c:2297 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x4512c9 RSP: 002b:00007f21e0428c08 EFLAGS: 00000216 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 0000000000718150 RCX: 00000000004512c9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000062020200 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004b973d R13: 00000000ffffffff R14: 000000002001d000 R15: 00000000000002dd Code: 50 2b 34 82 c7 00 f1 f1 f1 f1 c7 40 04 04 f2 f2 f2 c7 40 08 f3 f3 f3 f3 e8 a1 43 39 ff 4c 89 f8 48 8b 95 70 ff ff ff 48 c1 e8 03 <0f> b6 0c 18 4c 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85 RIP: __read_once_size include/linux/compiler.h:250 [inline] RSP: ffff8801c6e5f1b0 RIP: atomic_read arch/x86/include/asm/atomic.h:26 [inline] RSP: ffff8801c6e5f1b0 RIP: refcount_sub_and_test+0x7d/0x1b0 lib/refcount.c:178 RSP: ffff8801c6e5f1b0 ---[ end trace e441d046c6410d31 ]--- Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- include/net/addrconf.h | 10 ++++++++++ net/ipv6/route.c | 6 +++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 6df79e96a780..f44ff2476758 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -336,6 +336,16 @@ static inline void in6_dev_put(struct inet6_dev *idev) in6_dev_finish_destroy(idev); } +static inline void in6_dev_put_clear(struct inet6_dev **pidev) +{ + struct inet6_dev *idev = *pidev; + + if (idev) { + in6_dev_put(idev); + *pidev = NULL; + } +} + static inline void __in6_dev_put(struct inet6_dev *idev) { refcount_dec(&idev->refcnt); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 99d4727f2b18..94d6a13d47f0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3721,10 +3721,10 @@ static int ip6_route_dev_notify(struct notifier_block *this, /* NETDEV_UNREGISTER could be fired for multiple times by * netdev_wait_allrefs(). Make sure we only call this once. */ - in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev); + in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev); - in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev); + in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev); + in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev); #endif } -- cgit From b3dc8f772fab5b2d284b780830fd56494491e493 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 15 Aug 2017 04:28:54 -0700 Subject: net: Fix a typo in comment about sock flags. Signed-off-by: Tonghao Zhang Signed-off-by: David S. Miller --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/net.h b/include/linux/net.h index dda2cc939a53..ebeb48c92005 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -37,7 +37,7 @@ struct net; /* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected. - * Eventually all flags will be in sk->sk_wq_flags. + * Eventually all flags will be in sk->sk_wq->flags. */ #define SOCKWQ_ASYNC_NOSPACE 0 #define SOCKWQ_ASYNC_WAITDATA 1 -- cgit From 187e5b3ac84d3421d2de3aca949b2791fbcad554 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Aug 2017 05:26:17 -0700 Subject: ipv4: fix NULL dereference in free_fib_info_rcu() If fi->fib_metrics could not be allocated in fib_create_info() we attempt to dereference a NULL pointer in free_fib_info_rcu() : m = fi->fib_metrics; if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt)) kfree(m); Before my recent patch, we used to call kfree(NULL) and nothing wrong happened. Instead of using RCU to defer freeing while we are under memory stress, it seems better to take immediate action. This was reported by syzkaller team. Fixes: 3fb07daff8e9 ("ipv4: add reference counting to metrics") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b8d18171cca3..ec3a9ce281a6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1083,15 +1083,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg, fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (!fi) goto failure; - fib_info_cnt++; if (cfg->fc_mx) { fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); - if (!fi->fib_metrics) - goto failure; + if (unlikely(!fi->fib_metrics)) { + kfree(fi); + return ERR_PTR(err); + } atomic_set(&fi->fib_metrics->refcnt, 1); - } else + } else { fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; - + } + fib_info_cnt++; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; -- cgit From 898904226b5a6dee657f23cf51e385f50da22596 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 15 Aug 2017 16:35:21 +0300 Subject: net_sched: reset pointers to tcf blocks in classful qdiscs' destructors Traffic filters could keep direct pointers to classes in classful qdisc, thus qdisc destruction first removes all filters before freeing classes. Class destruction methods also tries to free attached filters but now this isn't safe because tcf_block_put() unlike to tcf_destroy_chain() cannot be called second time. This patch set class->block to NULL after first tcf_block_put() and turn second call into no-op. Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure") Signed-off-by: Konstantin Khlebnikov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 4 +++- net/sched/sch_cbq.c | 4 +++- net/sched/sch_hfsc.c | 4 +++- net/sched/sch_htb.c | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 572fe2584e48..c403c87aff7a 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -572,8 +572,10 @@ static void atm_tc_destroy(struct Qdisc *sch) struct atm_flow_data *flow, *tmp; pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) + list_for_each_entry(flow, &p->flows, list) { tcf_block_put(flow->block); + flow->block = NULL; + } list_for_each_entry_safe(flow, tmp, &p->flows, list) { if (flow->ref > 1) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 481036f6b54e..780db43300b1 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1431,8 +1431,10 @@ static void cbq_destroy(struct Qdisc *sch) * be bound to classes which have been destroyed already. --TGR '04 */ for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { tcf_block_put(cl->block); + cl->block = NULL; + } } for (h = 0; h < q->clhash.hashsize; h++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3ad02bbe6903..fd15200f8627 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1530,8 +1530,10 @@ hfsc_destroy_qdisc(struct Qdisc *sch) unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) { tcf_block_put(cl->block); + cl->block = NULL; + } } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 203286ab4427..5d65ec5207e9 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1258,8 +1258,10 @@ static void htb_destroy(struct Qdisc *sch) tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { tcf_block_put(cl->block); + cl->block = NULL; + } } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], -- cgit From 325d5dc3f7e7c2840b65e4a2988c082c2c0025c5 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 15 Aug 2017 16:37:04 +0300 Subject: net_sched/sfq: update hierarchical backlog when drop packet When sfq_enqueue() drops head packet or packet from another queue it have to update backlog at upper qdiscs too. Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Signed-off-by: Konstantin Khlebnikov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f80ea2cc5f1f..82469ef9655e 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -437,6 +437,7 @@ congestion_drop: qdisc_drop(head, sch, to_free); slot_queue_add(slot, skb); + qdisc_tree_reduce_backlog(sch, 0, delta); return NET_XMIT_CN; } @@ -468,8 +469,10 @@ enqueue: /* Return Congestion Notification only if we dropped a packet * from this flow. */ - if (qlen != slot->qlen) + if (qlen != slot->qlen) { + qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb)); return NET_XMIT_CN; + } /* As we dropped a packet, better let upper stack know this */ qdisc_tree_reduce_backlog(sch, 1, dropped); -- cgit From c90e95147c27b1780e76c6e8fea1b5c78d7d387f Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 15 Aug 2017 16:39:05 +0300 Subject: net_sched: remove warning from qdisc_hash_add It was added in commit e57a784d8cae ("pkt_sched: set root qdisc before change() in attach_default_qdiscs()") to hide duplicates from "tc qdisc show" for incative deivices. After 59cc1f61f ("net: sched: convert qdisc linked list to hashtable") it triggered when classful qdisc is added to inactive device because default qdiscs are added before switching root qdisc. Anyway after commit ea3274695353 ("net: sched: avoid duplicates in qdisc dump") duplicates are filtered right in dumper. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- net/sched/sch_api.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bd24a550e0f9..a3fa144b8648 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -286,9 +286,6 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) void qdisc_hash_add(struct Qdisc *q, bool invisible) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - struct Qdisc *root = qdisc_dev(q)->qdisc; - - WARN_ON_ONCE(root == &noop_qdisc); ASSERT_RTNL(); hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); if (invisible) -- cgit From 61deee962896f7eb547adc66ef09c8f1e7ddf7d7 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Tue, 15 Aug 2017 14:55:32 +0100 Subject: sfc: don't try and read ef10 data on non-ef10 NIC The MAC stats command takes a port ID, which doesn't exist on pre-ef10 NICs (5000- and 6000- series). This is extracted from the NIC specific data; we misinterpret this as the ef10 data structure, causing us to read potentially unallocated data. With a KASAN kernel this can cause errors with: BUG: KASAN: slab-out-of-bounds in efx_mcdi_mac_stats Fixes: 0a2ab4d988d7 ("sfc: set the port-id when calling MC_CMD_MAC_STATS") Reported-by: Stefano Brivio Tested-by: Stefano Brivio Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi_port.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index c905971c5f3a..990a63d7fcb7 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -938,7 +938,6 @@ enum efx_stats_action { static int efx_mcdi_mac_stats(struct efx_nic *efx, enum efx_stats_action action, int clear) { - struct efx_ef10_nic_data *nic_data = efx->nic_data; MCDI_DECLARE_BUF(inbuf, MC_CMD_MAC_STATS_IN_LEN); int rc; int change = action == EFX_STATS_PULL ? 0 : 1; @@ -960,7 +959,12 @@ static int efx_mcdi_mac_stats(struct efx_nic *efx, MAC_STATS_IN_PERIODIC_NOEVENT, 1, MAC_STATS_IN_PERIOD_MS, period); MCDI_SET_DWORD(inbuf, MAC_STATS_IN_DMA_LEN, dma_len); - MCDI_SET_DWORD(inbuf, MAC_STATS_IN_PORT_ID, nic_data->vport_id); + + if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) { + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + MCDI_SET_DWORD(inbuf, MAC_STATS_IN_PORT_ID, nic_data->vport_id); + } rc = efx_mcdi_rpc_quiet(efx, MC_CMD_MAC_STATS, inbuf, sizeof(inbuf), NULL, 0, NULL); -- cgit From 0e405232871d67bf1b238d56b6b3d500e69ebbf3 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Tue, 15 Aug 2017 23:24:48 +0800 Subject: PCI: fix oops when try to find Root Port for a PCI device Eric report a oops when booting the system after applying the commit a99b646afa8a ("PCI: Disable PCIe Relaxed..."): [ 4.241029] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 [ 4.247001] IP: pci_find_pcie_root_port+0x62/0x80 [ 4.253011] PGD 0 [ 4.253011] P4D 0 [ 4.253011] [ 4.258013] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [ 4.262015] Modules linked in: [ 4.265005] CPU: 31 PID: 1 Comm: swapper/0 Not tainted 4.13.0-dbx-DEV #316 [ 4.271002] Hardware name: Intel RML,PCH/Iota_QC_19, BIOS 2.40.0 06/22/2016 [ 4.279002] task: ffffa2ee38cfa040 task.stack: ffffa51ec0004000 [ 4.285001] RIP: 0010:pci_find_pcie_root_port+0x62/0x80 [ 4.290012] RSP: 0000:ffffa51ec0007ab8 EFLAGS: 00010246 [ 4.295003] RAX: 0000000000000000 RBX: ffffa2ee36bae000 RCX: 0000000000000006 [ 4.303002] RDX: 000000000000081c RSI: ffffa2ee38cfa8c8 RDI: ffffa2ee36bae000 [ 4.310013] RBP: ffffa51ec0007b58 R08: 0000000000000001 R09: 0000000000000000 [ 4.317001] R10: 0000000000000000 R11: 0000000000000000 R12: ffffa51ec0007ad0 [ 4.324005] R13: ffffa2ee36bae098 R14: 0000000000000002 R15: ffffa2ee37204818 [ 4.331002] FS: 0000000000000000(0000) GS:ffffa2ee3fcc0000(0000) knlGS:0000000000000000 [ 4.339002] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4.345001] CR2: 0000000000000050 CR3: 000000401000f000 CR4: 00000000001406e0 [ 4.351002] Call Trace: [ 4.354012] ? pci_configure_device+0x19f/0x570 [ 4.359002] ? pci_conf1_read+0xb8/0xf0 [ 4.363002] ? raw_pci_read+0x23/0x40 [ 4.366011] ? pci_read+0x2c/0x30 [ 4.370014] ? pci_read_config_word+0x67/0x70 [ 4.374012] pci_device_add+0x28/0x230 [ 4.378012] ? pci_vpd_f0_read+0x50/0x80 [ 4.382014] pci_scan_single_device+0x96/0xc0 [ 4.386012] pci_scan_slot+0x79/0xf0 [ 4.389001] pci_scan_child_bus+0x31/0x180 [ 4.394014] acpi_pci_root_create+0x1c6/0x240 [ 4.398013] pci_acpi_scan_root+0x15f/0x1b0 [ 4.402012] acpi_pci_root_add+0x2e6/0x400 [ 4.406012] ? acpi_evaluate_integer+0x37/0x60 [ 4.411002] acpi_bus_attach+0xdf/0x200 [ 4.415002] acpi_bus_attach+0x6a/0x200 [ 4.418014] acpi_bus_attach+0x6a/0x200 [ 4.422013] acpi_bus_scan+0x38/0x70 [ 4.426011] acpi_scan_init+0x10c/0x271 [ 4.429001] acpi_init+0x2fa/0x348 [ 4.433004] ? acpi_sleep_proc_init+0x2d/0x2d [ 4.437001] do_one_initcall+0x43/0x169 [ 4.441001] kernel_init_freeable+0x1d0/0x258 [ 4.445003] ? rest_init+0xe0/0xe0 [ 4.449001] kernel_init+0xe/0x150 ====================== cut here ============================= It looks like the pci_find_pcie_root_port() was trying to find the Root Port for the PCI device which is the Root Port already, it will return NULL and trigger the problem, so check the highest_pcie_bridge to fix thie problem. Fixes: a99b646afa8a ("PCI: Disable PCIe Relaxed Ordering if unsupported") Fixes: c56d4450eb68 ("PCI: Turn off Request Attributes to avoid Chelsio T5 Completion erratum") Reported-by: Eric Dumazet Signed-off-by: Eric Dumazet Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/pci/pci.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index af0cc3456dc1..587cd7623ed8 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -522,10 +522,11 @@ struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev) bridge = pci_upstream_bridge(bridge); } - if (pci_pcie_type(highest_pcie_bridge) != PCI_EXP_TYPE_ROOT_PORT) - return NULL; + if (highest_pcie_bridge && + pci_pcie_type(highest_pcie_bridge) == PCI_EXP_TYPE_ROOT_PORT) + return highest_pcie_bridge; - return highest_pcie_bridge; + return NULL; } EXPORT_SYMBOL(pci_find_pcie_root_port); -- cgit From 88a5c690b66110ad255380d8f629c629cf6ca559 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 16 Aug 2017 01:45:33 +0200 Subject: bpf: fix bpf_trace_printk on 32 bit archs James reported that on MIPS32 bpf_trace_printk() is currently broken while MIPS64 works fine: bpf_trace_printk() uses conditional operators to attempt to pass different types to __trace_printk() depending on the format operators. This doesn't work as intended on 32-bit architectures where u32 and long are passed differently to u64, since the result of C conditional operators follows the "usual arithmetic conversions" rules, such that the values passed to __trace_printk() will always be u64 [causing issues later in the va_list handling for vscnprintf()]. For example the samples/bpf/tracex5 test printed lines like below on MIPS32, where the fd and buf have come from the u64 fd argument, and the size from the buf argument: [...] 1180.941542: 0x00000001: write(fd=1, buf= (null), size=6258688) Instead of this: [...] 1625.616026: 0x00000001: write(fd=1, buf=009e4000, size=512) One way to get it working is to expand various combinations of argument types into 8 different combinations for 32 bit and 64 bit kernels. Fix tested by James on MIPS32 and MIPS64 as well that it resolves the issue. Fixes: 9c959c863f82 ("tracing: Allow BPF programs to call bpf_trace_printk()") Reported-by: James Hogan Tested-by: James Hogan Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 37385193a608..dc498b605d5d 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -204,10 +204,36 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, fmt_cnt++; } - return __trace_printk(1/* fake ip will not be printed */, fmt, - mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1, - mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2, - mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3); +/* Horrid workaround for getting va_list handling working with different + * argument type combinations generically for 32 and 64 bit archs. + */ +#define __BPF_TP_EMIT() __BPF_ARG3_TP() +#define __BPF_TP(...) \ + __trace_printk(1 /* Fake ip will not be printed. */, \ + fmt, ##__VA_ARGS__) + +#define __BPF_ARG1_TP(...) \ + ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ + ? __BPF_TP(arg1, ##__VA_ARGS__) \ + : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \ + ? __BPF_TP((long)arg1, ##__VA_ARGS__) \ + : __BPF_TP((u32)arg1, ##__VA_ARGS__))) + +#define __BPF_ARG2_TP(...) \ + ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \ + ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \ + : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \ + ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \ + : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__))) + +#define __BPF_ARG3_TP(...) \ + ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \ + ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \ + : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \ + ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \ + : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__))) + + return __BPF_TP_EMIT(); } static const struct bpf_func_proto bpf_trace_printk_proto = { -- cgit From 4098116039911e8870d84c975e2ec22dab65a909 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Sat, 12 Aug 2017 23:36:47 +0200 Subject: parisc: pci memory bar assignment fails with 64bit kernels on dino/cujo For 64bit kernels the lmmio_space_offset of the host bridge window isn't set correctly on systems with dino/cujo PCI host bridges. This leads to not assigned memory bars and failing drivers, which need to use these bars. Signed-off-by: Thomas Bogendoerfer Cc: Acked-by: Helge Deller Signed-off-by: Helge Deller --- drivers/parisc/dino.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index 5c63b920b471..ed92c1254cff 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -956,7 +956,7 @@ static int __init dino_probe(struct parisc_device *dev) dino_dev->hba.dev = dev; dino_dev->hba.base_addr = ioremap_nocache(hpa, 4096); - dino_dev->hba.lmmio_space_offset = 0; /* CPU addrs == bus addrs */ + dino_dev->hba.lmmio_space_offset = PCI_F_EXTEND; spin_lock_init(&dino_dev->dinosaur_pen); dino_dev->hba.iommu = ccio_get_iommu(dev); -- cgit From 42819eb7a0957cc340ad4ed8bba736bab5ebc464 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 14 Aug 2017 22:12:37 +0200 Subject: nvmet: don't overwrite identify sn/fr with 0-bytes The merged version of my patch "nvmet: don't report 0-bytes in serial number" fails to remove two lines which should have been replaced, so that the space-padded strings are overwritten again with 0-bytes. Fix it. Fixes: 42de82a8b544 nvmet: don't report 0-bytes in serial number Signed-off-by: Martin Wilck Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 2d7a98ab53fb..a53bb6635b83 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -199,12 +199,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1); copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE)); - memset(id->mn, ' ', sizeof(id->mn)); - strncpy((char *)id->mn, "Linux", sizeof(id->mn)); - - memset(id->fr, ' ', sizeof(id->fr)); - strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr)); - id->rab = 6; /* -- cgit From 16a5a480f067f945fd27bf91ffdce3f959b0d4b6 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 14 Aug 2017 11:20:32 -0700 Subject: nvmet-fc: correct use after free on list teardown Use list_for_each_entry_safe to prevent list handling from referencing next pointers directly after list_del's Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1b7f2520a20d..b200f9aadd52 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -704,7 +704,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) { struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport; struct nvmet_fc_fcp_iod *fod = queue->fod; - struct nvmet_fc_defer_fcp_req *deferfcp; + struct nvmet_fc_defer_fcp_req *deferfcp, *tempptr; unsigned long flags; int i, writedataactive; bool disconnect; @@ -735,7 +735,8 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) } /* Cleanup defer'ed IOs in queue */ - list_for_each_entry(deferfcp, &queue->avail_defer_list, req_list) { + list_for_each_entry_safe(deferfcp, tempptr, &queue->avail_defer_list, + req_list) { list_del(&deferfcp->req_list); kfree(deferfcp); } -- cgit From 5a69aec945d27e78abac9fd032533d3aaebf7c1e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 16 Aug 2017 16:01:14 +1000 Subject: powerpc: Fix VSX enabling/flushing to also test MSR_FP and MSR_VEC VSX uses a combination of the old vector registers, the old FP registers and new "second halves" of the FP registers. Thus when we need to see the VSX state in the thread struct (flush_vsx_to_thread()) or when we'll use the VSX in the kernel (enable_kernel_vsx()) we need to ensure they are all flushed into the thread struct if either of them is individually enabled. Unfortunately we only tested if the whole VSX was enabled, not if they were individually enabled. Fixes: 72cd7b44bc99 ("powerpc: Uncomment and make enable_kernel_vsx() routine available") Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ec480966f9bf..1f0fd361e09b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -362,7 +362,8 @@ void enable_kernel_vsx(void) cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); - if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) { + if (current->thread.regs && + (current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) { check_if_tm_restore_required(current); /* * If a thread has already been reclaimed then the @@ -386,7 +387,7 @@ void flush_vsx_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { preempt_disable(); - if (tsk->thread.regs->msr & MSR_VSX) { + if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) { BUG_ON(tsk != current); giveup_vsx(tsk); } -- cgit From a7d2e03928c1936004750c56faf7534c8534f875 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Thu, 10 Aug 2017 12:05:02 -0700 Subject: RDMA/vmw_pvrdma: Report CQ missed events There is a chance of a race between arming the CQ and receiving completions. By reporting CQ missed events any ULPs should poll again to get the completions. Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Acked-by: Aditya Sarwade Signed-off-by: Bryan Tan Signed-off-by: Adit Ranadive Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 69bda611d313..90aa326fd7c0 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -65,13 +65,28 @@ int pvrdma_req_notify_cq(struct ib_cq *ibcq, struct pvrdma_dev *dev = to_vdev(ibcq->device); struct pvrdma_cq *cq = to_vcq(ibcq); u32 val = cq->cq_handle; + unsigned long flags; + int has_data = 0; val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM; + spin_lock_irqsave(&cq->cq_lock, flags); + pvrdma_write_uar_cq(dev, val); - return 0; + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + unsigned int head; + + has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, + cq->ibcq.cqe, &head); + if (unlikely(has_data == PVRDMA_INVALID_IDX)) + dev_err(&dev->pdev->dev, "CQ ring state invalid\n"); + } + + spin_unlock_irqrestore(&cq->cq_lock, flags); + + return has_data; } /** -- cgit From 211b7aac5443d347ff7b6dabf54702b40228cfaf Mon Sep 17 00:00:00 2001 From: "Balasubramaniam, Hari Chand" Date: Tue, 15 Aug 2017 10:05:46 +0800 Subject: drm/i915: Initialize 'data' in intel_dsi_dcs_backlight.c variable 'data' may be used uninitialized in this function. thus, 'function dcs_get_backlight' will return unwanted value/fail. Thus, adding NULL initialized to 'data' variable will solve the return failure happening. v2: Change commit message to reflect upstream with proper message Fixes: 90198355b83c ("drm/i915/dsi: Add DCS control for Panel PWM") Cc: Jani Nikula Cc: Daniel Vetter Cc: Yetunde Adebisi Cc: Deepak M Cc: Jani Nikula Signed-off-by: Balasubramaniam, Hari Chand Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1502762746-191826-1-git-send-email-hari.chand.balasubramaniam@intel.com (cherry picked from commit d59814a5b4852442e1d03c569a4542f8b08356a7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c b/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c index 6e09ceb71500..150a156f3b1e 100644 --- a/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c +++ b/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c @@ -46,7 +46,7 @@ static u32 dcs_get_backlight(struct intel_connector *connector) struct intel_encoder *encoder = connector->encoder; struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); struct mipi_dsi_device *dsi_device; - u8 data; + u8 data = 0; enum port port; /* FIXME: Need to take care of 16 bit brightness level */ -- cgit From 7c648bde211baeda7a029bd6be4957e8be48d8c9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 11 Aug 2017 14:39:07 +0300 Subject: drm/i915/vbt: ignore extraneous child devices for a port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ever since we've parsed VBT child devices, starting from 6acab15a7b0d ("drm/i915: use the HDMI DDI buffer translations from VBT"), we've ignored the child device information if more than one child device references the same port. The rationale for this seems lost in time. Since commit 311a20949f04 ("drm/i915: don't init DP or HDMI when not supported by DDI port") we started using this information more to skip HDMI/DP init if the port wasn't there per VBT child devices. However, at the same time it added port defaults without further explanation. Thus, if the child device info was skipped due to multiple child devices referencing the same port, the device info would be retrieved from the somewhat arbitrary defaults. Finally, when commit bb1d132935c2 ("drm/i915/vbt: split out defaults that are set when there is no VBT") stopped initializing the defaults whenever VBT is present, thus trusting the VBT more, we stopped initializing ports which were referenced by more than one child device. Apparently at least Asus UX305UA, UX305U, and UX306U laptops have VBT child device blocks which cause this behaviour. Arguably they were shipped with a broken VBT. Relax the rules for multiple references to the same port, and use the first child device info to reference a port. Retain the logic to debug log about this, though. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101745 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196233 Fixes: bb1d132935c2 ("drm/i915/vbt: split out defaults that are set when there is no VBT") Tested-by: Oliver Weißbarth Reported-by: Oliver Weißbarth Reported-by: Didier G Reported-by: Giles Anderson Cc: Manasi Navare Cc: Ville Syrjälä Cc: Paulo Zanoni Cc: # v4.12+ Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20170811113907.6716-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit b5273d72750555a673040070bfb23c454a7cd3ef) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_bios.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 639d45c1dd2e..7ea7fd1e8856 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1120,8 +1120,8 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, bool is_dvi, is_hdmi, is_dp, is_edp, is_crt; uint8_t aux_channel, ddc_pin; /* Each DDI port can have more than one value on the "DVO Port" field, - * so look for all the possible values for each port and abort if more - * than one is found. */ + * so look for all the possible values for each port. + */ int dvo_ports[][3] = { {DVO_PORT_HDMIA, DVO_PORT_DPA, -1}, {DVO_PORT_HDMIB, DVO_PORT_DPB, -1}, @@ -1130,7 +1130,10 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, {DVO_PORT_CRT, DVO_PORT_HDMIE, DVO_PORT_DPE}, }; - /* Find the child device to use, abort if more than one found. */ + /* + * Find the first child device to reference the port, report if more + * than one found. + */ for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { it = dev_priv->vbt.child_dev + i; @@ -1140,11 +1143,11 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, if (it->common.dvo_port == dvo_ports[port][j]) { if (child) { - DRM_DEBUG_KMS("More than one child device for port %c in VBT.\n", + DRM_DEBUG_KMS("More than one child device for port %c in VBT, using the first.\n", port_name(port)); - return; + } else { + child = it; } - child = it; } } } -- cgit From 31c1a7b8f966470ce136710a95afcf5822fecef8 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 15 Aug 2017 20:04:03 -0700 Subject: drm/i915/cnl: Fix LSPCON support. When LSPCON support was extended to CNL one part was missed on lspcon_init. So, instead of adding check per platform on lspcon_init let's use HAS_LSPCON that is already there for that purpose. Fixes: ff15947e0f02 ("drm/i915/cnl: LSPCON support is gen9+") Cc: Shashank Sharma Cc: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Jani Nikula Reviewed-by: Shashank Sharma Link: https://patchwork.freedesktop.org/patch/msgid/20170816030403.11368-1-rodrigo.vivi@intel.com (cherry picked from commit acf58d4e965d40fc014252292b0911b4c9fe6697) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lspcon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index 5abef482eacf..beb9baaf2f2e 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -210,8 +210,8 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (!IS_GEN9(dev_priv)) { - DRM_ERROR("LSPCON is supported on GEN9 only\n"); + if (!HAS_LSPCON(dev_priv)) { + DRM_ERROR("LSPCON is not supported on this platform\n"); return false; } -- cgit From f67ace2d8868d06710ceea1b10b124eead5040da Mon Sep 17 00:00:00 2001 From: Chien Tin Tung Date: Tue, 8 Aug 2017 20:38:43 -0500 Subject: i40iw: Fix parsing of query/commit FPM buffers Parsing of commit/query Host Memory Cache Function Private Memory is not skipping over reserved fields and incorrectly assigning those values into object's base/cnt/max_cnt fields. Skip over reserved fields and set correct values. Also correct memory alignment requirement for commit/query FPM buffers. Signed-off-by: Chien Tin Tung Signed-off-by: Shiraz Saleem Signed-off-by: Christopher N Bednarz Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 121 +++++++++++++++++++++---------- drivers/infiniband/hw/i40iw/i40iw_d.h | 4 +- 2 files changed, 83 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 9ec1ae9a82c9..ef4a73cd1710 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -130,20 +130,32 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf( u64 base = 0; u32 i, j; u32 k = 0; - u32 low; /* copy base values in obj_info */ - for (i = I40IW_HMC_IW_QP, j = 0; - i <= I40IW_HMC_IW_PBLE; i++, j += 8) { + for (i = I40IW_HMC_IW_QP, j = 0; i <= I40IW_HMC_IW_PBLE; i++, j += 8) { + if ((i == I40IW_HMC_IW_SRQ) || + (i == I40IW_HMC_IW_FSIMC) || + (i == I40IW_HMC_IW_FSIAV)) { + info[i].base = 0; + info[i].cnt = 0; + continue; + } get_64bit_val(buf, j, &temp); info[i].base = RS_64_1(temp, 32) * 512; if (info[i].base > base) { base = info[i].base; k = i; } - low = (u32)(temp); - if (low) - info[i].cnt = low; + if (i == I40IW_HMC_IW_APBVT_ENTRY) { + info[i].cnt = 1; + continue; + } + if (i == I40IW_HMC_IW_QP) + info[i].cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_QPS); + else if (i == I40IW_HMC_IW_CQ) + info[i].cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_CQS); + else + info[i].cnt = (u32)(temp); } size = info[k].cnt * info[k].size + info[k].base; if (size & 0x1FFFFF) @@ -154,6 +166,31 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf( return 0; } +/** + * i40iw_sc_decode_fpm_query() - Decode a 64 bit value into max count and size + * @buf: ptr to fpm query buffer + * @buf_idx: index into buf + * @info: ptr to i40iw_hmc_obj_info struct + * @rsrc_idx: resource index into info + * + * Decode a 64 bit value from fpm query buffer into max count and size + */ +static u64 i40iw_sc_decode_fpm_query(u64 *buf, + u32 buf_idx, + struct i40iw_hmc_obj_info *obj_info, + u32 rsrc_idx) +{ + u64 temp; + u32 size; + + get_64bit_val(buf, buf_idx, &temp); + obj_info[rsrc_idx].max_cnt = (u32)temp; + size = (u32)RS_64_1(temp, 32); + obj_info[rsrc_idx].size = LS_64_1(1, size); + + return temp; +} + /** * i40iw_sc_parse_fpm_query_buf() - parses fpm query buffer * @buf: ptr to fpm query buffer @@ -168,9 +205,9 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_query_buf( struct i40iw_hmc_info *hmc_info, struct i40iw_hmc_fpm_misc *hmc_fpm_misc) { - u64 temp; struct i40iw_hmc_obj_info *obj_info; - u32 i, j, size; + u64 temp; + u32 size; u16 max_pe_sds; obj_info = hmc_info->hmc_obj; @@ -185,41 +222,52 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_query_buf( hmc_fpm_misc->max_sds = max_pe_sds; hmc_info->sd_table.sd_cnt = max_pe_sds + hmc_info->first_sd_index; - for (i = I40IW_HMC_IW_QP, j = 8; - i <= I40IW_HMC_IW_ARP; i++, j += 8) { - get_64bit_val(buf, j, &temp); - if (i == I40IW_HMC_IW_QP) - obj_info[i].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_QPS); - else if (i == I40IW_HMC_IW_CQ) - obj_info[i].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_CQS); - else - obj_info[i].max_cnt = (u32)temp; + get_64bit_val(buf, 8, &temp); + obj_info[I40IW_HMC_IW_QP].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_QPS); + size = (u32)RS_64_1(temp, 32); + obj_info[I40IW_HMC_IW_QP].size = LS_64_1(1, size); - size = (u32)RS_64_1(temp, 32); - obj_info[i].size = ((u64)1 << size); - } - for (i = I40IW_HMC_IW_MR, j = 48; - i <= I40IW_HMC_IW_PBLE; i++, j += 8) { - get_64bit_val(buf, j, &temp); - obj_info[i].max_cnt = (u32)temp; - size = (u32)RS_64_1(temp, 32); - obj_info[i].size = LS_64_1(1, size); - } + get_64bit_val(buf, 16, &temp); + obj_info[I40IW_HMC_IW_CQ].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_CQS); + size = (u32)RS_64_1(temp, 32); + obj_info[I40IW_HMC_IW_CQ].size = LS_64_1(1, size); + + i40iw_sc_decode_fpm_query(buf, 32, obj_info, I40IW_HMC_IW_HTE); + i40iw_sc_decode_fpm_query(buf, 40, obj_info, I40IW_HMC_IW_ARP); + + obj_info[I40IW_HMC_IW_APBVT_ENTRY].size = 8192; + obj_info[I40IW_HMC_IW_APBVT_ENTRY].max_cnt = 1; + + i40iw_sc_decode_fpm_query(buf, 48, obj_info, I40IW_HMC_IW_MR); + i40iw_sc_decode_fpm_query(buf, 56, obj_info, I40IW_HMC_IW_XF); - get_64bit_val(buf, 120, &temp); - hmc_fpm_misc->max_ceqs = (u8)RS_64(temp, I40IW_QUERY_FPM_MAX_CEQS); - get_64bit_val(buf, 120, &temp); - hmc_fpm_misc->ht_multiplier = RS_64(temp, I40IW_QUERY_FPM_HTMULTIPLIER); - get_64bit_val(buf, 120, &temp); - hmc_fpm_misc->timer_bucket = RS_64(temp, I40IW_QUERY_FPM_TIMERBUCKET); get_64bit_val(buf, 64, &temp); + obj_info[I40IW_HMC_IW_XFFL].max_cnt = (u32)temp; + obj_info[I40IW_HMC_IW_XFFL].size = 4; hmc_fpm_misc->xf_block_size = RS_64(temp, I40IW_QUERY_FPM_XFBLOCKSIZE); if (!hmc_fpm_misc->xf_block_size) return I40IW_ERR_INVALID_SIZE; + + i40iw_sc_decode_fpm_query(buf, 72, obj_info, I40IW_HMC_IW_Q1); + get_64bit_val(buf, 80, &temp); + obj_info[I40IW_HMC_IW_Q1FL].max_cnt = (u32)temp; + obj_info[I40IW_HMC_IW_Q1FL].size = 4; hmc_fpm_misc->q1_block_size = RS_64(temp, I40IW_QUERY_FPM_Q1BLOCKSIZE); if (!hmc_fpm_misc->q1_block_size) return I40IW_ERR_INVALID_SIZE; + + i40iw_sc_decode_fpm_query(buf, 88, obj_info, I40IW_HMC_IW_TIMER); + + get_64bit_val(buf, 112, &temp); + obj_info[I40IW_HMC_IW_PBLE].max_cnt = (u32)temp; + obj_info[I40IW_HMC_IW_PBLE].size = 8; + + get_64bit_val(buf, 120, &temp); + hmc_fpm_misc->max_ceqs = (u8)RS_64(temp, I40IW_QUERY_FPM_MAX_CEQS); + hmc_fpm_misc->ht_multiplier = RS_64(temp, I40IW_QUERY_FPM_HTMULTIPLIER); + hmc_fpm_misc->timer_bucket = RS_64(temp, I40IW_QUERY_FPM_TIMERBUCKET); + return 0; } @@ -3392,13 +3440,6 @@ enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, u8 hmc_fn_ hmc_info->sd_table.sd_entry = virt_mem.va; } - /* fill size of objects which are fixed */ - hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].size = 4; - hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].size = 4; - hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size = 8; - hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].size = 8192; - hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].max_cnt = 1; - return ret_code; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index a39ac12b6a7e..2ebaadbed379 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -1507,8 +1507,8 @@ enum { I40IW_CQ0_ALIGNMENT_MASK = (256 - 1), I40IW_HOST_CTX_ALIGNMENT_MASK = (4 - 1), I40IW_SHADOWAREA_MASK = (128 - 1), - I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK = 0, - I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK = 0 + I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK = (4 - 1), + I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK = (4 - 1) }; enum i40iw_alignment { -- cgit From 8129331f01a683ed8d9a9a65ed01b5c6ad26403a Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 8 Aug 2017 20:38:44 -0500 Subject: i40iw: Correct variable names Fix incorrect naming of status code and struct. Use inline instead of immediate. Signed-off-by: Mustafa Ismail Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_status.h | 2 +- drivers/infiniband/hw/i40iw/i40iw_uk.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h index 91c421762f06..f7013f11d808 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_status.h +++ b/drivers/infiniband/hw/i40iw/i40iw_status.h @@ -62,7 +62,7 @@ enum i40iw_status_code { I40IW_ERR_INVALID_ALIGNMENT = -23, I40IW_ERR_FLUSHED_QUEUE = -24, I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25, - I40IW_ERR_INVALID_IMM_DATA_SIZE = -26, + I40IW_ERR_INVALID_INLINE_DATA_SIZE = -26, I40IW_ERR_TIMEOUT = -27, I40IW_ERR_OPCODE_MISMATCH = -28, I40IW_ERR_CQP_COMPL_ERROR = -29, diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index b0d3a0e8a9b5..70a6b41980fa 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -435,7 +435,7 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, op_info = &info->op.inline_rdma_write; if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE) - return I40IW_ERR_INVALID_IMM_DATA_SIZE; + return I40IW_ERR_INVALID_INLINE_DATA_SIZE; ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size); if (ret_code) @@ -511,7 +511,7 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, op_info = &info->op.inline_send; if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE) - return I40IW_ERR_INVALID_IMM_DATA_SIZE; + return I40IW_ERR_INVALID_INLINE_DATA_SIZE; ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size); if (ret_code) @@ -1187,7 +1187,7 @@ enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size, u8 *wqe_size) { if (data_size > I40IW_MAX_INLINE_DATA_SIZE) - return I40IW_ERR_INVALID_IMM_DATA_SIZE; + return I40IW_ERR_INVALID_INLINE_DATA_SIZE; if (data_size <= 16) *wqe_size = I40IW_QP_WQE_MIN_SIZE; -- cgit From 29c2415a6669bab354f0aa3445777fe147c7a05d Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 8 Aug 2017 20:38:46 -0500 Subject: i40iw: Fix typecast of tcp_seq_num The typecast of tcp_seq_num incorrectly uses u8. Fix by casting to u32. Signed-off-by: Mustafa Ismail Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_uk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 70a6b41980fa..1060725d18bc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -784,7 +784,7 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, get_64bit_val(cqe, 0, &qword0); get_64bit_val(cqe, 16, &qword2); - info->tcp_seq_num = (u8)RS_64(qword0, I40IWCQ_TCPSEQNUM); + info->tcp_seq_num = (u32)RS_64(qword0, I40IWCQ_TCPSEQNUM); info->qp_id = (u32)RS_64(qword2, I40IWCQ_QPID); -- cgit From a28f047e5f9b987d614eeee34388087ffdda3e53 Mon Sep 17 00:00:00 2001 From: Christopher N Bednarz Date: Tue, 8 Aug 2017 20:38:47 -0500 Subject: i40iw: Use correct alignment for CQ0 memory Utilize correct alignment variable when allocating DMA memory for CQ0. Signed-off-by: Christopher N Bednarz Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_puda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index 71050c5d29a0..7f5583d83622 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -685,7 +685,7 @@ static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc) cqsize = rsrc->cq_size * (sizeof(struct i40iw_cqe)); tsize = cqsize + sizeof(struct i40iw_cq_shadow_area); ret = i40iw_allocate_dma_mem(dev->hw, &rsrc->cqmem, tsize, - I40IW_CQ0_ALIGNMENT_MASK); + I40IW_CQ0_ALIGNMENT); if (ret) return ret; -- cgit From aa939c12ab8a0c094420ad1b909a957ac590e43e Mon Sep 17 00:00:00 2001 From: Christopher N Bednarz Date: Tue, 8 Aug 2017 20:38:48 -0500 Subject: i40iw: Fix potential fcn_id_array out of bounds Avoid out of bounds error by utilizing I40IW_MAX_STATS_COUNT instead of I40IW_INVALID_FCN_ID. Signed-off-by: Christopher N Bednarz Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index ef4a73cd1710..a49ff2eb6fb3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -4881,7 +4881,7 @@ void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi) { u8 fcn_id = vsi->fcn_id; - if ((vsi->stats_fcn_id_alloc) && (fcn_id != I40IW_INVALID_FCN_ID)) + if (vsi->stats_fcn_id_alloc && fcn_id < I40IW_MAX_STATS_COUNT) vsi->dev->fcn_id_array[fcn_id] = false; i40iw_hw_stats_stop_timer(vsi); } -- cgit From 5b59a3969e95cd9be3699ecf7149ae8ef103b6f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 Aug 2017 18:41:02 +0100 Subject: IB/hns: fix memory leak on ah on error return path When dmac is NULL, ah is not being freed on the error return path. Fix this by kfree'ing it. Detected by CoverityScan, CID#1452636 ("Resource Leak") Fixes: d8966fcd4c25 ("IB/core: Use rdma_ah_attr accessor functions") Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_ah.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index f78a733a63ec..d545302b8ef8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -64,8 +64,10 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, } else { u8 *dmac = rdma_ah_retrieve_dmac(ah_attr); - if (!dmac) + if (!dmac) { + kfree(ah); return ERR_PTR(-EINVAL); + } memcpy(ah->av.mac, dmac, ETH_ALEN); } -- cgit From d4ba61d218822578dcf6c2453a38e000b0ea01e6 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 25 Jul 2017 06:51:15 -0700 Subject: iw_cxgb4: fix misuse of integer variable Fixes: ee30f7d507c0 ("iw_cxgb4: Max fastreg depth depends on DSGL support") Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 5332f06b99ba..c2fba76becd4 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -661,7 +661,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, rhp = php->rhp; if (mr_type != IB_MR_TYPE_MEM_REG || - max_num_sg > t4_max_fr_depth(&rhp->rdev.lldi.ulptx_memwrite_dsgl && + max_num_sg > t4_max_fr_depth(rhp->rdev.lldi.ulptx_memwrite_dsgl && use_dsgl)) return ERR_PTR(-EINVAL); -- cgit From 06f8174a97822f6befd28fc2dd315b43b82c700f Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 17 Jul 2017 14:03:50 -0500 Subject: IB/core: Protect sysfs entry on ib_unregister_device ib_unregister_device is not protecting removal of sysfs entries. A call to ib_register_device in that window can result in duplicate sysfs entry warning. Move mutex_unlock to after ib_device_unregister_sysfs to protect against sysfs entry creation. This issue is exposed during driver load/unload stress test. WARNING: CPU: 5 PID: 4445 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x5f/0x70 sysfs: cannot create duplicate filename '/class/infiniband/i40iw0' Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./Q87M-D2H BIOS F7 01/17/2014 Workqueue: i40e i40e_service_task [i40e] Call Trace: dump_stack+0x67/0x98 __warn+0xcc/0xf0 warn_slowpath_fmt+0x4a/0x50 ? kernfs_path_from_node+0x4b/0x60 sysfs_warn_dup+0x5f/0x70 sysfs_do_create_link_sd.isra.2+0xb7/0xc0 sysfs_create_link+0x20/0x40 device_add+0x28c/0x600 ib_device_register_sysfs+0x58/0x170 [ib_core] ib_register_device+0x325/0x570 [ib_core] ? i40iw_register_rdma_device+0x1f4/0x400 [i40iw] ? kmem_cache_alloc_trace+0x143/0x330 ? __raw_spin_lock_init+0x2d/0x50 i40iw_register_rdma_device+0x2dc/0x400 [i40iw] i40iw_open+0x10a6/0x1950 [i40iw] ? i40iw_open+0xeab/0x1950 [i40iw] ? i40iw_make_cm_node+0x9c0/0x9c0 [i40iw] i40e_client_subtask+0xa4/0x110 [i40e] i40e_service_task+0xc2d/0x1320 [i40e] process_one_work+0x203/0x710 ? process_one_work+0x16f/0x710 worker_thread+0x126/0x4a0 ? trace_hardirqs_on+0xd/0x10 kthread+0x112/0x150 ? process_one_work+0x710/0x710 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x2e/0x40 ---[ end trace fd11b69e21ea7653 ]--- Couldn't register device i40iw0 with driver model Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a5dfab6adf49..221468f77128 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -537,10 +537,11 @@ void ib_unregister_device(struct ib_device *device) } up_read(&lists_rwsem); - mutex_unlock(&device_mutex); - ib_device_unregister_rdmacg(device); ib_device_unregister_sysfs(device); + + mutex_unlock(&device_mutex); + ib_cache_cleanup_one(device); ib_security_destroy_port_pkey_list(device); -- cgit From 870201f95fcbd19538aef630393fe9d583eff82e Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 16 Aug 2017 18:57:04 +0300 Subject: IB/uverbs: Fix NULL pointer dereference during device removal As part of ib_uverbs_remove_one which might be triggered upon reset flow, we trigger IB_EVENT_DEVICE_FATAL event to userspace application. If device was removed after uverbs fd was opened but before ib_uverbs_get_context was called, the event file will be accessed before it was allocated, result in NULL pointer dereference: [ 72.325873] BUG: unable to handle kernel NULL pointer dereference at (null) ... [ 72.325984] IP: _raw_spin_lock_irqsave+0x22/0x40 [ 72.327123] Call Trace: [ 72.327168] ib_uverbs_async_handler.isra.8+0x2e/0x160 [ib_uverbs] [ 72.327216] ? synchronize_srcu_expedited+0x27/0x30 [ 72.327269] ib_uverbs_remove_one+0x120/0x2c0 [ib_uverbs] [ 72.327330] ib_unregister_device+0xd0/0x180 [ib_core] [ 72.327373] mlx5_ib_remove+0x74/0x140 [mlx5_ib] [ 72.327422] mlx5_remove_device+0xfb/0x110 [mlx5_core] [ 72.327466] mlx5_unregister_interface+0x3c/0xa0 [mlx5_core] [ 72.327509] mlx5_ib_cleanup+0x10/0x962 [mlx5_ib] [ 72.327546] SyS_delete_module+0x155/0x230 [ 72.328472] ? exit_to_usermode_loop+0x70/0xa6 [ 72.329370] do_syscall_64+0x54/0xc0 [ 72.330262] entry_SYSCALL64_slow_path+0x25/0x25 Fix it by checking that user context was allocated before trigger the event. Fixes: 036b10635739 ('IB/uverbs: Enable device removal when there are active user space applications') Signed-off-by: Maor Gottlieb Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index c023e2c81b8f..5e530d2bee44 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1153,7 +1153,6 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, kref_get(&file->ref); mutex_unlock(&uverbs_dev->lists_mutex); - ib_uverbs_event_handler(&file->event_handler, &event); mutex_lock(&file->cleanup_mutex); ucontext = file->ucontext; @@ -1170,6 +1169,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, * for example due to freeing the resources * (e.g mmput). */ + ib_uverbs_event_handler(&file->event_handler, &event); ib_dev->disassociate_ucontext(ucontext); mutex_lock(&file->cleanup_mutex); ib_uverbs_cleanup_ucontext(file, ucontext, true); -- cgit From 79db795833bf5c3e798bcd7a5aeeee3fb0505927 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Aug 2017 21:32:19 -0700 Subject: sparc64: Don't clibber fixed registers in __multi4. %g4 and %g5 are fixed registers used by the kernel for the thread pointer and the per-cpu offset. Use %o4 and %g7 instead. Diagnosis by Anthony Yznaga. Fixes: 1b4af13ff2cc ("sparc64: Add __multi3 for gcc 7.x and later.") Reported-by: Anatoly Pugachev Tested-by: Anatoly Pugachev Signed-off-by: David S. Miller --- arch/sparc/lib/multi3.S | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/sparc/lib/multi3.S b/arch/sparc/lib/multi3.S index d6b6c97fe3c7..703127aaf4a5 100644 --- a/arch/sparc/lib/multi3.S +++ b/arch/sparc/lib/multi3.S @@ -5,26 +5,26 @@ .align 4 ENTRY(__multi3) /* %o0 = u, %o1 = v */ mov %o1, %g1 - srl %o3, 0, %g4 - mulx %g4, %g1, %o1 + srl %o3, 0, %o4 + mulx %o4, %g1, %o1 srlx %g1, 0x20, %g3 - mulx %g3, %g4, %g5 - sllx %g5, 0x20, %o5 - srl %g1, 0, %g4 + mulx %g3, %o4, %g7 + sllx %g7, 0x20, %o5 + srl %g1, 0, %o4 sub %o1, %o5, %o5 srlx %o5, 0x20, %o5 - addcc %g5, %o5, %g5 + addcc %g7, %o5, %g7 srlx %o3, 0x20, %o5 - mulx %g4, %o5, %g4 + mulx %o4, %o5, %o4 mulx %g3, %o5, %o5 sethi %hi(0x80000000), %g3 - addcc %g5, %g4, %g5 - srlx %g5, 0x20, %g5 + addcc %g7, %o4, %g7 + srlx %g7, 0x20, %g7 add %g3, %g3, %g3 movcc %xcc, %g0, %g3 - addcc %o5, %g5, %o5 - sllx %g4, 0x20, %g4 - add %o1, %g4, %o1 + addcc %o5, %g7, %o5 + sllx %o4, 0x20, %o4 + add %o1, %o4, %o1 add %o5, %g3, %g2 mulx %g1, %o2, %g1 add %g1, %g2, %g1 -- cgit From c7b725be84985532161bcb4fbecd056326998a77 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 15 Aug 2017 18:38:42 -0700 Subject: net: igmp: Use ingress interface rather than vrf device Anuradha reported that statically added groups for interfaces enslaved to a VRF device were not persisting. The problem is that igmp queries and reports need to use the data in the in_dev for the real ingress device rather than the VRF device. Update igmp_rcv accordingly. Fixes: e58e41596811 ("net: Enable support for VRF with ipv4 multicast") Reported-by: Anuradha Karuppiah Signed-off-by: David Ahern Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 498706b072fb..caf2f1101d02 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1007,10 +1007,18 @@ int igmp_rcv(struct sk_buff *skb) { /* This basically follows the spec line by line -- see RFC1112 */ struct igmphdr *ih; - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + struct net_device *dev = skb->dev; + struct in_device *in_dev; int len = skb->len; bool dropped = true; + if (netif_is_l3_master(dev)) { + dev = dev_get_by_index_rcu(dev_net(dev), IPCB(skb)->iif); + if (!dev) + goto drop; + } + + in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto drop; -- cgit From 47f078339be902e97d0ad828ca1d614a5a95334b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Aug 2017 18:21:40 +0100 Subject: Revert "staging: fsl-mc: be consistent when checking strcmp() return" The previous fix removed the equal to zero comparisons by the strcmps and now the function always returns true. Revert this change to restore the original correctly functioning code. Detected by CoverityScan, CID#1452267 ("Constant expression result") This reverts commit b93ad9a067e1515af42da7d56bc61f1a25075f94. Fixes: b93ad9a067e1 ("staging: fsl-mc: be consistent when checking strcmp() return") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman --- drivers/staging/fsl-mc/bus/fsl-mc-allocator.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/fsl-mc/bus/fsl-mc-allocator.c b/drivers/staging/fsl-mc/bus/fsl-mc-allocator.c index b37a6f48225f..8ea3920400a0 100644 --- a/drivers/staging/fsl-mc/bus/fsl-mc-allocator.c +++ b/drivers/staging/fsl-mc/bus/fsl-mc-allocator.c @@ -16,9 +16,9 @@ static bool __must_check fsl_mc_is_allocatable(const char *obj_type) { - return strcmp(obj_type, "dpbp") || - strcmp(obj_type, "dpmcp") || - strcmp(obj_type, "dpcon"); + return strcmp(obj_type, "dpbp") == 0 || + strcmp(obj_type, "dpmcp") == 0 || + strcmp(obj_type, "dpcon") == 0; } /** -- cgit From 6170a506899aee3dd4934c928426505e47b1b466 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Wed, 16 Aug 2017 11:09:10 -0700 Subject: sparc64: remove unnecessary log message There is no need to log message if ATU hvapi couldn't get register. Unlike PCI hvapi, ATU hvapi registration failure is not hard error. Even if ATU hvapi registration fails (on system with ATU or without ATU) system continues with legacy IOMMU. So only log message when ATU hvapi successfully get registered. Signed-off-by: Tushar Dave Signed-off-by: David S. Miller --- arch/sparc/kernel/pci_sun4v.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index f10e2f712394..9ebebf1fd93d 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -1266,8 +1266,6 @@ static int pci_sun4v_probe(struct platform_device *op) * ATU group, but ATU hcalls won't be available. */ hv_atu = false; - pr_err(PFX "Could not register hvapi ATU err=%d\n", - err); } else { pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n", vatu_major, vatu_minor); -- cgit From 47ac5484fd961420e5ec0bb5b972fde381f57365 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Aug 2017 17:39:52 +0200 Subject: x86: Fix norandmaps/ADDR_NO_RANDOMIZE Documentation/admin-guide/kernel-parameters.txt says: norandmaps Don't use address space randomization. Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space but it doesn't work because arch_rnd() which is used to randomize mm->mmap_base returns a random value unconditionally. And as Kirill pointed out, ADDR_NO_RANDOMIZE is broken by the same reason. Just shift the PF_RANDOMIZE check from arch_mmap_rnd() to arch_rnd(). Fixes: 1b028f784e8c ("x86/mm: Introduce mmap_compat_base() for 32-bit mmap()") Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Acked-by: Kirill A. Shutemov Acked-by: Cyrill Gorcunov Reviewed-by: Dmitry Safonov Cc: stable@vger.kernel.org Cc: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20170815153952.GA1076@redhat.com --- arch/x86/mm/mmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 229d04a83f85..c94df122815a 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -79,13 +79,13 @@ static int mmap_is_legacy(void) static unsigned long arch_rnd(unsigned int rndbits) { + if (!(current->flags & PF_RANDOMIZE)) + return 0; return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT; } unsigned long arch_mmap_rnd(void) { - if (!(current->flags & PF_RANDOMIZE)) - return 0; return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits); } -- cgit From 01578e36163cdd0e4fd61d9976de15f13364e26d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Aug 2017 17:40:11 +0200 Subject: x86/elf: Remove the unnecessary ADDR_NO_RANDOMIZE checks The ADDR_NO_RANDOMIZE checks in stack_maxrandom_size() and randomize_stack_top() are not required. PF_RANDOMIZE is set by load_elf_binary() only if ADDR_NO_RANDOMIZE is not set, no need to re-check after that. Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Reviewed-by: Dmitry Safonov Cc: stable@vger.kernel.org Cc: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Linus Torvalds Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20170815154011.GB1076@redhat.com --- arch/x86/mm/mmap.c | 3 +-- fs/binfmt_elf.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index c94df122815a..a88cfbfbd078 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -50,8 +50,7 @@ unsigned long tasksize_64bit(void) static unsigned long stack_maxrandom_size(unsigned long task_size) { unsigned long max = 0; - if ((current->flags & PF_RANDOMIZE) && - !(current->personality & ADDR_NO_RANDOMIZE)) { + if (current->flags & PF_RANDOMIZE) { max = (-1UL) & __STACK_RND_MASK(task_size == tasksize_32bit()); max <<= PAGE_SHIFT; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 879ff9c7ffd0..6466153f2bf0 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -664,8 +664,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top) { unsigned long random_variable = 0; - if ((current->flags & PF_RANDOMIZE) && - !(current->personality & ADDR_NO_RANDOMIZE)) { + if (current->flags & PF_RANDOMIZE) { random_variable = get_random_long(); random_variable &= STACK_RND_MASK; random_variable <<= PAGE_SHIFT; -- cgit From d6957f3396d0b1ee54d183524550d791054b5ebe Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 15 Aug 2017 11:34:19 +0200 Subject: printk-formats.txt: Better describe the difference between %pS and %pF Sometimes people seems unclear when to use the %pS or %pF printk format. For example, see commit 51d96dc2e2dc ("random: fix warning message on ia64 and parisc") which fixed such a wrong format string. The documentation should be more clear about the difference. Signed-off-by: Helge Deller [pmladek@suse.com: Restructure the entire section] Signed-off-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Signed-off-by: Helge Deller --- Documentation/printk-formats.txt | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 65ea5915178b..074670b98bac 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -58,20 +58,23 @@ Symbols/Function Pointers %ps versatile_init %pB prev_fn_of_versatile_init+0x88/0x88 -For printing symbols and function pointers. The ``S`` and ``s`` specifiers -result in the symbol name with (``S``) or without (``s``) offsets. Where -this is used on a kernel without KALLSYMS - the symbol address is -printed instead. +The ``F`` and ``f`` specifiers are for printing function pointers, +for example, f->func, &gettimeofday. They have the same result as +``S`` and ``s`` specifiers. But they do an extra conversion on +ia64, ppc64 and parisc64 architectures where the function pointers +are actually function descriptors. + +The ``S`` and ``s`` specifiers can be used for printing symbols +from direct addresses, for example, __builtin_return_address(0), +(void *)regs->ip. They result in the symbol name with (``S``) or +without (``s``) offsets. If KALLSYMS are disabled then the symbol +address is printed instead. The ``B`` specifier results in the symbol name with offsets and should be used when printing stack backtraces. The specifier takes into consideration the effect of compiler optimisations which may occur when tail-call``s are used and marked with the noreturn GCC attribute. -On ia64, ppc64 and parisc64 architectures function pointers are -actually function descriptors which must first be resolved. The ``F`` and -``f`` specifiers perform this resolution and then provide the same -functionality as the ``S`` and ``s`` specifiers. Kernel Pointers =============== -- cgit From 494bea39f3201776cdfddc232705f54a0bd210c4 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 16 Aug 2017 13:30:07 +0800 Subject: openvswitch: fix skb_panic due to the incorrect actions attrlen For sw_flow_actions, the actions_len only represents the kernel part's size, and when we dump the actions to the userspace, we will do the convertions, so it's true size may become bigger than the actions_len. But unfortunately, for OVS_PACKET_ATTR_ACTIONS, we use the actions_len to alloc the skbuff, so the user_skb's size may become insufficient and oops will happen like this: skbuff: skb_over_panic: text:ffffffff8148fabf len:1749 put:157 head: ffff881300f39000 data:ffff881300f39000 tail:0x6d5 end:0x6c0 dev: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:129! [...] Call Trace: [] skb_put+0x43/0x44 [] skb_zerocopy+0x6c/0x1f4 [] queue_userspace_packet+0x3a3/0x448 [openvswitch] [] ovs_dp_upcall+0x30/0x5c [openvswitch] [] output_userspace+0x132/0x158 [openvswitch] [] ? ip6_rcv_finish+0x74/0x77 [ipv6] [] do_execute_actions+0xcc1/0xdc8 [openvswitch] [] ovs_execute_actions+0x74/0x106 [openvswitch] [] ovs_dp_process_packet+0xe1/0xfd [openvswitch] [] ? key_extract+0x63c/0x8d5 [openvswitch] [] ovs_vport_receive+0xa1/0xc3 [openvswitch] [...] Also we can find that the actions_len is much little than the orig_len: crash> struct sw_flow_actions 0xffff8812f539d000 struct sw_flow_actions { rcu = { next = 0xffff8812f5398800, func = 0xffffe3b00035db32 }, orig_len = 1384, actions_len = 592, actions = 0xffff8812f539d01c } So as a quick fix, use the orig_len instead of the actions_len to alloc the user_skb. Last, this oops happened on our system running a relative old kernel, but the same risk still exists on the mainline, since we use the wrong actions_len from the beginning. Fixes: ccea74457bbd ("openvswitch: include datapath actions with sampled-packet upcall to userspace") Cc: Neil McKee Signed-off-by: Liping Zhang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 1 + net/openvswitch/datapath.c | 7 ++++--- net/openvswitch/datapath.h | 2 ++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e4610676299b..a54a556fcdb5 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1337,6 +1337,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, goto out; } + OVS_CB(skb)->acts_origlen = acts->orig_len; err = do_execute_actions(dp, skb, key, acts->actions, acts->actions_len); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 45fe8c8a884d..6b44fe405282 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -381,7 +381,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, } static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, - unsigned int hdrlen) + unsigned int hdrlen, int actions_attrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ @@ -398,7 +398,7 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, /* OVS_PACKET_ATTR_ACTIONS */ if (upcall_info->actions_len) - size += nla_total_size(upcall_info->actions_len); + size += nla_total_size(actions_attrlen); /* OVS_PACKET_ATTR_MRU */ if (upcall_info->mru) @@ -465,7 +465,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, else hlen = skb->len; - len = upcall_msg_size(upcall_info, hlen - cutlen); + len = upcall_msg_size(upcall_info, hlen - cutlen, + OVS_CB(skb)->acts_origlen); user_skb = genlmsg_new(len, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 5d8dcd88815f..480600649d0b 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -99,11 +99,13 @@ struct datapath { * when a packet is received by OVS. * @mru: The maximum received fragement size; 0 if the packet is not * fragmented. + * @acts_origlen: The netlink size of the flow actions applied to this skb. * @cutlen: The number of bytes from the packet end to be removed. */ struct ovs_skb_cb { struct vport *input_vport; u16 mru; + u16 acts_origlen; u32 cutlen; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) -- cgit From 120e9dabaf551c6dc03d3a10a1f026376cb1811c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2017 07:03:15 -0700 Subject: dccp: defer ccid_hc_tx_delete() at dismantle time syszkaller team reported another problem in DCCP [1] Problem here is that the structure holding RTO timer (ccid2_hc_tx_rto_expire() handler) is freed too soon. We can not use del_timer_sync() to cancel the timer since this timer wants to grab socket lock (that would risk a dead lock) Solution is to defer the freeing of memory when all references to the socket were released. Socket timers do own a reference, so this should fix the issue. [1] ================================================================== BUG: KASAN: use-after-free in ccid2_hc_tx_rto_expire+0x51c/0x5c0 net/dccp/ccids/ccid2.c:144 Read of size 4 at addr ffff8801d2660540 by task kworker/u4:7/3365 CPU: 1 PID: 3365 Comm: kworker/u4:7 Not tainted 4.13.0-rc4+ #3 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events_unbound call_usermodehelper_exec_work Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x24e/0x340 mm/kasan/report.c:409 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429 ccid2_hc_tx_rto_expire+0x51c/0x5c0 net/dccp/ccids/ccid2.c:144 call_timer_fn+0x233/0x830 kernel/time/timer.c:1268 expire_timers kernel/time/timer.c:1307 [inline] __run_timers+0x7fd/0xb90 kernel/time/timer.c:1601 run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614 __do_softirq+0x2f5/0xba3 kernel/softirq.c:284 invoke_softirq kernel/softirq.c:364 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:638 [inline] smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:1044 apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:702 RIP: 0010:arch_local_irq_enable arch/x86/include/asm/paravirt.h:824 [inline] RIP: 0010:__raw_write_unlock_irq include/linux/rwlock_api_smp.h:267 [inline] RIP: 0010:_raw_write_unlock_irq+0x56/0x70 kernel/locking/spinlock.c:343 RSP: 0018:ffff8801cd50eaa8 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff10 RAX: dffffc0000000000 RBX: ffffffff85a090c0 RCX: 0000000000000006 RDX: 1ffffffff0b595f3 RSI: 1ffff1003962f989 RDI: ffffffff85acaf98 RBP: ffff8801cd50eab0 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801cc96ea60 R13: dffffc0000000000 R14: ffff8801cc96e4c0 R15: ffff8801cc96e4c0 release_task+0xe9e/0x1a40 kernel/exit.c:220 wait_task_zombie kernel/exit.c:1162 [inline] wait_consider_task+0x29b8/0x33c0 kernel/exit.c:1389 do_wait_thread kernel/exit.c:1452 [inline] do_wait+0x441/0xa90 kernel/exit.c:1523 kernel_wait4+0x1f5/0x370 kernel/exit.c:1665 SYSC_wait4+0x134/0x140 kernel/exit.c:1677 SyS_wait4+0x2c/0x40 kernel/exit.c:1673 call_usermodehelper_exec_sync kernel/kmod.c:286 [inline] call_usermodehelper_exec_work+0x1a0/0x2c0 kernel/kmod.c:323 process_one_work+0xbf3/0x1bc0 kernel/workqueue.c:2097 worker_thread+0x223/0x1860 kernel/workqueue.c:2231 kthread+0x35e/0x430 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:425 Allocated by task 21267: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489 kmem_cache_alloc+0x127/0x750 mm/slab.c:3561 ccid_new+0x20e/0x390 net/dccp/ccid.c:151 dccp_hdlr_ccid+0x27/0x140 net/dccp/feat.c:44 __dccp_feat_activate+0x142/0x2a0 net/dccp/feat.c:344 dccp_feat_activate_values+0x34e/0xa90 net/dccp/feat.c:1538 dccp_rcv_request_sent_state_process net/dccp/input.c:472 [inline] dccp_rcv_state_process+0xed1/0x1620 net/dccp/input.c:677 dccp_v4_do_rcv+0xeb/0x160 net/dccp/ipv4.c:679 sk_backlog_rcv include/net/sock.h:911 [inline] __release_sock+0x124/0x360 net/core/sock.c:2269 release_sock+0xa4/0x2a0 net/core/sock.c:2784 inet_wait_for_connect net/ipv4/af_inet.c:557 [inline] __inet_stream_connect+0x671/0xf00 net/ipv4/af_inet.c:643 inet_stream_connect+0x58/0xa0 net/ipv4/af_inet.c:682 SYSC_connect+0x204/0x470 net/socket.c:1642 SyS_connect+0x24/0x30 net/socket.c:1623 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 3049: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3503 [inline] kmem_cache_free+0x77/0x280 mm/slab.c:3763 ccid_hc_tx_delete+0xc5/0x100 net/dccp/ccid.c:190 dccp_destroy_sock+0x1d1/0x2b0 net/dccp/proto.c:225 inet_csk_destroy_sock+0x166/0x3f0 net/ipv4/inet_connection_sock.c:833 dccp_done+0xb7/0xd0 net/dccp/proto.c:145 dccp_time_wait+0x13d/0x300 net/dccp/minisocks.c:72 dccp_rcv_reset+0x1d1/0x5b0 net/dccp/input.c:160 dccp_rcv_state_process+0x8fc/0x1620 net/dccp/input.c:663 dccp_v4_do_rcv+0xeb/0x160 net/dccp/ipv4.c:679 sk_backlog_rcv include/net/sock.h:911 [inline] __sk_receive_skb+0x33e/0xc00 net/core/sock.c:521 dccp_v4_rcv+0xef1/0x1c00 net/dccp/ipv4.c:871 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:248 [inline] ip_local_deliver+0x1ce/0x6d0 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:477 [inline] ip_rcv_finish+0x8db/0x19c0 net/ipv4/ip_input.c:397 NF_HOOK include/linux/netfilter.h:248 [inline] ip_rcv+0xc3f/0x17d0 net/ipv4/ip_input.c:488 __netif_receive_skb_core+0x19af/0x33d0 net/core/dev.c:4417 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4455 process_backlog+0x203/0x740 net/core/dev.c:5130 napi_poll net/core/dev.c:5527 [inline] net_rx_action+0x792/0x1910 net/core/dev.c:5593 __do_softirq+0x2f5/0xba3 kernel/softirq.c:284 The buggy address belongs to the object at ffff8801d2660100 which belongs to the cache ccid2_hc_tx_sock of size 1240 The buggy address is located 1088 bytes inside of 1240-byte region [ffff8801d2660100, ffff8801d26605d8) The buggy address belongs to the page: page:ffffea0007499800 count:1 mapcount:0 mapping:ffff8801d2660100 index:0x0 compound_mapcount: 0 flags: 0x200000000008100(slab|head) raw: 0200000000008100 ffff8801d2660100 0000000000000000 0000000100000005 raw: ffffea00075271a0 ffffea0007538820 ffff8801d3aef9c0 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801d2660400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801d2660480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8801d2660500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8801d2660580: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc ffff8801d2660600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/proto.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 86bc40ba6ba5..b68168fcc06a 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -170,6 +171,15 @@ const char *dccp_packet_name(const int type) EXPORT_SYMBOL_GPL(dccp_packet_name); +static void dccp_sk_destruct(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_tx_ccid = NULL; + inet_sock_destruct(sk); +} + int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) { struct dccp_sock *dp = dccp_sk(sk); @@ -179,6 +189,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; sk->sk_write_space = dccp_write_space; + sk->sk_destruct = dccp_sk_destruct; icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_rate_last = jiffies; @@ -219,8 +230,7 @@ void dccp_destroy_sock(struct sock *sk) dp->dccps_hc_rx_ackvec = NULL; } ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); - dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + dp->dccps_hc_rx_ccid = NULL; /* clean up feature negotiation state */ dccp_feat_list_purge(&dp->dccps_featneg); -- cgit From 81fbfe8adaf38d4f5a98c19bebfd41c5d6acaee8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2017 10:36:47 -0700 Subject: ptr_ring: use kmalloc_array() As found by syzkaller, malicious users can set whatever tx_queue_len on a tun device and eventually crash the kernel. Lets remove the ALIGN(XXX, SMP_CACHE_BYTES) thing since a small ring buffer is not fast anyway. Fixes: 2e0ab8ca83c1 ("ptr_ring: array based FIFO for pointers") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Michael S. Tsirkin Cc: Jason Wang Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 9 +++++---- include/linux/skb_array.h | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index d8c97ec8a8e6..37b4bb2545b3 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -436,9 +436,9 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, __PTR_RING_PEEK_CALL_v; \ }) -static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp) +static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) { - return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp); + return kcalloc(size, sizeof(void *), gfp); } static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) @@ -582,7 +582,8 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ -static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, +static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, + unsigned int nrings, int size, gfp_t gfp, void (*destroy)(void *)) { @@ -590,7 +591,7 @@ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, void ***queues; int i; - queues = kmalloc(nrings * sizeof *queues, gfp); + queues = kmalloc_array(nrings, sizeof(*queues), gfp); if (!queues) goto noqueues; diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index 35226cd4efb0..8621ffdeecbf 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -193,7 +193,8 @@ static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) } static inline int skb_array_resize_multiple(struct skb_array **rings, - int nrings, int size, gfp_t gfp) + int nrings, unsigned int size, + gfp_t gfp) { BUILD_BUG_ON(offsetof(struct skb_array, ring)); return ptr_ring_resize_multiple((struct ptr_ring **)rings, -- cgit From c780a049f9bf442314335372c9abc4548bfe3e44 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2017 11:09:12 -0700 Subject: ipv4: better IP_MAX_MTU enforcement While working on yet another syzkaller report, I found that our IP_MAX_MTU enforcements were not properly done. gcc seems to reload dev->mtu for min(dev->mtu, IP_MAX_MTU), and final result can be bigger than IP_MAX_MTU :/ This is a problem because device mtu can be changed on other cpus or threads. While this patch does not fix the issue I am working on, it is probably worth addressing it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++-- net/ipv4/route.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 821cedcc8e73..0cf7f5a65fe6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -352,7 +352,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, !forwarding) return dst_mtu(dst); - return min(dst->dev->mtu, IP_MAX_MTU); + return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); } static inline unsigned int ip_skb_dst_mtu(struct sock *sk, @@ -364,7 +364,7 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); } - return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU); + return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); } u32 ip_idents_reserve(u32 hash, int segs); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7effa62beed3..fe877a4a72b1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1267,7 +1267,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) if (mtu) return mtu; - mtu = dst->dev->mtu; + mtu = READ_ONCE(dst->dev->mtu); if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { if (rt->rt_uses_gateway && mtu > 576) -- cgit From 61f0c3c7a0fd37f252b0f23c237973ddf4c21ea7 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Tue, 8 Aug 2017 01:26:57 +0800 Subject: scsi: megaraid_sas: fix error handle in megasas_probe_one megasas_mgmt_info.max_index has increased by 1 before megasas_io_attach, if megasas_io_attach return error, then goto fail_io_attach, megasas_mgmt_info.instance has a wrong index here. So first reduce max_index and then set that instance to NULL. Signed-off-by: weiping zhang Acked-by: Sumit Saxena Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 316c3df0c3fd..71c4746341ea 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -6228,8 +6228,8 @@ static int megasas_probe_one(struct pci_dev *pdev, fail_start_aen: fail_io_attach: megasas_mgmt_info.count--; - megasas_mgmt_info.instance[megasas_mgmt_info.max_index] = NULL; megasas_mgmt_info.max_index--; + megasas_mgmt_info.instance[megasas_mgmt_info.max_index] = NULL; instance->instancet->disable_intr(instance); megasas_destroy_irqs(instance); -- cgit From 82f0fd06d4a84ff32cc2cf9503764219316ec728 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Mon, 31 Jul 2017 22:02:55 +0530 Subject: scsi: csiostor: fail probe if fw does not support FCoE Fail probe if FCoE capability is not enabled in the firmware. Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/scsi/csiostor/csio_hw.c | 4 +++- drivers/scsi/csiostor/csio_init.c | 12 ++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index 2029ad225121..5be0086142ca 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -3845,8 +3845,10 @@ csio_hw_start(struct csio_hw *hw) if (csio_is_hw_ready(hw)) return 0; - else + else if (csio_match_state(hw, csio_hws_uninit)) return -EINVAL; + else + return -ENODEV; } int diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index ea0c31086cc6..dcd074169aa9 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -969,10 +969,14 @@ static int csio_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, hw); - if (csio_hw_start(hw) != 0) { - dev_err(&pdev->dev, - "Failed to start FW, continuing in debug mode.\n"); - return 0; + rv = csio_hw_start(hw); + if (rv) { + if (rv == -EINVAL) { + dev_err(&pdev->dev, + "Failed to start FW, continuing in debug mode.\n"); + return 0; + } + goto err_lnode_exit; } sprintf(hw->fwrev_str, "%u.%u.%u.%u\n", -- cgit From c80267324938a5517fd31fa4bbd2d63c564401f9 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Fri, 4 Aug 2017 03:51:41 -0700 Subject: scsi: aacraid: Fix out of bounds in aac_get_name_resp We terminate the aac_get_name_resp on a byte that is outside the bounds of the structure. Extend the return response by one byte to remove the out of bounds reference. Fixes: b836439faf04 ("aacraid: 4KB sector support") Reported-by: Dan Carpenter Signed-off-by: David Carroll Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/aachba.c | 9 +++++++-- drivers/scsi/aacraid/aacraid.h | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 4591113c49de..a1a2c71e1626 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -549,7 +549,9 @@ static void get_container_name_callback(void *context, struct fib * fibptr) if ((le32_to_cpu(get_name_reply->status) == CT_OK) && (get_name_reply->data[0] != '\0')) { char *sp = get_name_reply->data; - sp[sizeof(((struct aac_get_name_resp *)NULL)->data)] = '\0'; + int data_size = FIELD_SIZEOF(struct aac_get_name_resp, data); + + sp[data_size - 1] = '\0'; while (*sp == ' ') ++sp; if (*sp) { @@ -579,12 +581,15 @@ static void get_container_name_callback(void *context, struct fib * fibptr) static int aac_get_container_name(struct scsi_cmnd * scsicmd) { int status; + int data_size; struct aac_get_name *dinfo; struct fib * cmd_fibcontext; struct aac_dev * dev; dev = (struct aac_dev *)scsicmd->device->host->hostdata; + data_size = FIELD_SIZEOF(struct aac_get_name_resp, data); + cmd_fibcontext = aac_fib_alloc_tag(dev, scsicmd); aac_fib_init(cmd_fibcontext); @@ -593,7 +598,7 @@ static int aac_get_container_name(struct scsi_cmnd * scsicmd) dinfo->command = cpu_to_le32(VM_ContainerConfig); dinfo->type = cpu_to_le32(CT_READ_NAME); dinfo->cid = cpu_to_le32(scmd_id(scsicmd)); - dinfo->count = cpu_to_le32(sizeof(((struct aac_get_name_resp *)NULL)->data)); + dinfo->count = cpu_to_le32(data_size - 1); status = aac_fib_send(ContainerCommand, cmd_fibcontext, diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index d31a9bc2ba69..ee2667e20e42 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -2274,7 +2274,7 @@ struct aac_get_name_resp { __le32 parm3; __le32 parm4; __le32 parm5; - u8 data[16]; + u8 data[17]; }; #define CT_CID_TO_32BITS_UID 165 -- cgit From 70e42fd02c46e2aa9ab07b766d418637e3a51de7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 9 Aug 2017 12:00:22 +0900 Subject: scsi: sd_zbc: Write unlock zone from sd_uninit_cmnd() Releasing a zone write lock only when the write commnand that acquired the lock completes can cause deadlocks due to potential command reordering if the lock owning request is requeued and not executed. This problem exists only with the scsi-mq path as, unlike the legacy path, requests are moved out of the dispatch queue before being prepared and so before locking a zone for a write command. Since sd_uninit_cmnd() is now always called when a request is requeued, call sd_zbc_write_unlock_zone() from that function for write requests that acquired a zone lock instead of from sd_done(). Acquisition of a zone lock by a write command is indicated using the new command flag SCMD_ZONE_WRITE_LOCK. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 3 +++ drivers/scsi/sd_zbc.c | 9 +++++---- include/scsi/scsi_cmnd.h | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index bea36adeee17..e2647f2d4430 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1277,6 +1277,9 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt) { struct request *rq = SCpnt->request; + if (SCpnt->flags & SCMD_ZONE_WRITE_LOCK) + sd_zbc_write_unlock_zone(SCpnt); + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) __free_page(rq->special_vec.bv_page); diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 96855df9f49d..8aa54779aac1 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -294,6 +294,9 @@ int sd_zbc_write_lock_zone(struct scsi_cmnd *cmd) test_and_set_bit(zno, sdkp->zones_wlock)) return BLKPREP_DEFER; + WARN_ON_ONCE(cmd->flags & SCMD_ZONE_WRITE_LOCK); + cmd->flags |= SCMD_ZONE_WRITE_LOCK; + return BLKPREP_OK; } @@ -302,9 +305,10 @@ void sd_zbc_write_unlock_zone(struct scsi_cmnd *cmd) struct request *rq = cmd->request; struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); - if (sdkp->zones_wlock) { + if (sdkp->zones_wlock && cmd->flags & SCMD_ZONE_WRITE_LOCK) { unsigned int zno = sd_zbc_zone_no(sdkp, blk_rq_pos(rq)); WARN_ON_ONCE(!test_bit(zno, sdkp->zones_wlock)); + cmd->flags &= ~SCMD_ZONE_WRITE_LOCK; clear_bit_unlock(zno, sdkp->zones_wlock); smp_mb__after_atomic(); } @@ -335,9 +339,6 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE_SAME: - /* Unlock the zone */ - sd_zbc_write_unlock_zone(cmd); - if (result && sshdr->sense_key == ILLEGAL_REQUEST && sshdr->asc == 0x21) diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index a1266d318c85..6af198d8120b 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -57,6 +57,7 @@ struct scsi_pointer { /* for scmd->flags */ #define SCMD_TAGGED (1 << 0) #define SCMD_UNCHECKED_ISA_DMA (1 << 1) +#define SCMD_ZONE_WRITE_LOCK (1 << 2) struct scsi_cmnd { struct scsi_request req; -- cgit From cbe7dfa26eee4819db7b5846181d56fd0cece0ea Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 13 Aug 2017 19:44:22 +0200 Subject: Revert "scsi: default to scsi-mq" Defaulting to scsi-mq in 4.13-rc has shown various regressions on setups that we didn't previously consider. Fixes for them are in progress, but too invasive to make it in this cycle. So for now revert the commit that defaults to blk-mq for SCSI. For 4.14 we'll plan to try again with these fixes. This reverts commit 5c279bd9e40624f4ab6e688671026d6005b066fa. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/Kconfig | 11 +++++++++++ drivers/scsi/scsi.c | 4 ++++ 2 files changed, 15 insertions(+) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index f4538d7a3016..d145e0d90227 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -47,6 +47,17 @@ config SCSI_NETLINK default n depends on NET +config SCSI_MQ_DEFAULT + bool "SCSI: use blk-mq I/O path by default" + depends on SCSI + ---help--- + This option enables the new blk-mq based I/O path for SCSI + devices by default. With the option the scsi_mod.use_blk_mq + module/boot option defaults to Y, without it to N, but it can + still be overridden either way. + + If unsure say N. + config SCSI_PROC_FS bool "legacy /proc/scsi/ support" depends on SCSI && PROC_FS diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 3d38c6d463b8..1bf274e3b2b6 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -800,7 +800,11 @@ MODULE_LICENSE("GPL"); module_param(scsi_logging_level, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging levels"); +#ifdef CONFIG_SCSI_MQ_DEFAULT bool scsi_use_blk_mq = true; +#else +bool scsi_use_blk_mq = false; +#endif module_param_named(use_blk_mq, scsi_use_blk_mq, bool, S_IWUSR | S_IRUGO); static int __init init_scsi(void) -- cgit From 71eb2ac58aa52f7948deda4d9a75754b6898e5c9 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 16 Aug 2017 19:40:32 +0530 Subject: scsi: cxgb4i: call neigh_event_send() to update MAC address If nud_state is not valid then call neigh_event_send() to update MAC address. Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index a69a9ac836f5..1d02cf9fe06c 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -1635,6 +1635,9 @@ static int init_act_open(struct cxgbi_sock *csk) goto rel_resource; } + if (!(n->nud_state & NUD_VALID)) + neigh_event_send(n, NULL); + csk->atid = cxgb4_alloc_atid(lldi->tids, csk); if (csk->atid < 0) { pr_err("%s, NO atid available.\n", ndev->name); -- cgit From 369157b41cca435442cf5add9df209aaf951860d Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 16 Aug 2017 10:47:03 -0700 Subject: nvmet-fc: eliminate incorrect static markers on local variables There were 2 statics introduced that were bogus. Removed the static designations. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index b200f9aadd52..309c84aa7595 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -394,7 +394,7 @@ nvmet_fc_free_ls_iodlist(struct nvmet_fc_tgtport *tgtport) static struct nvmet_fc_ls_iod * nvmet_fc_alloc_ls_iod(struct nvmet_fc_tgtport *tgtport) { - static struct nvmet_fc_ls_iod *iod; + struct nvmet_fc_ls_iod *iod; unsigned long flags; spin_lock_irqsave(&tgtport->lock, flags); @@ -471,7 +471,7 @@ nvmet_fc_destroy_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, static struct nvmet_fc_fcp_iod * nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) { - static struct nvmet_fc_fcp_iod *fod; + struct nvmet_fc_fcp_iod *fod; lockdep_assert_held(&queue->qlock); -- cgit From 187e91fe5e915f4b7f39b824aa422493463e443d Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 16 Aug 2017 21:08:08 +0200 Subject: x86/boot/64/clang: Use fixup_pointer() to access 'next_early_pgt' __startup_64() is normally using fixup_pointer() to access globals in a position-independent fashion. However 'next_early_pgt' was accessed directly, which wasn't guaranteed to work. Luckily GCC was generating a R_X86_64_PC32 PC-relative relocation for 'next_early_pgt', but Clang emitted a R_X86_64_32S, which led to accessing invalid memory and rebooting the kernel. Signed-off-by: Alexander Potapenko Acked-by: Kirill A. Shutemov Cc: Dmitry Vyukov Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Michael Davidson Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: c88d71508e36 ("x86/boot/64: Rewrite startup_64() in C") Link: http://lkml.kernel.org/r/20170816190808.131748-1-glider@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 46c3c73e7f43..9ba79543d9ee 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -53,6 +53,7 @@ void __head __startup_64(unsigned long physaddr) pudval_t *pud; pmdval_t *pmd, pmd_entry; int i; + unsigned int *next_pgt_ptr; /* Is the address too large? */ if (physaddr >> MAX_PHYSMEM_BITS) @@ -91,9 +92,9 @@ void __head __startup_64(unsigned long physaddr) * creates a bunch of nonsense entries but that is fine -- * it avoids problems around wraparound. */ - - pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); - pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); + next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr); + pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); + pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); if (IS_ENABLED(CONFIG_X86_5LEVEL)) { p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); -- cgit From ee7b1f31200d9f3cc45e1bd22e962bd6b1d4d611 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 11 Aug 2017 17:29:56 +0100 Subject: of: fix DMA mask generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Historically, DMA masks have suffered some ambiguity between whether they represent the range of physical memory a device can access, or the address bits a device is capable of driving, particularly since on many platforms the two are equivalent. Whilst there are some stragglers left (dma_max_pfn(), I'm looking at you...), the majority of DMA code has been cleaned up to follow the latter definition, not least since it is the only one which makes sense once IOMMUs are involved. In this respect, of_dma_configure() has always done the wrong thing in how it generates initial masks based on "dma-ranges". Although rounding down did not affect the TI Keystone platform where dma_addr + size is already a power of two, in any other case it results in a mask which is at best unnecessarily constrained and at worst unusable. BCM2837 illustrates the problem nicely, where we have a DMA base of 3GB and a size of 1GB - 16MB, giving dma_addr + size = 0xff000000 and a resultant mask of 0x7fffffff, which is then insufficient to even cover the necessary offset, effectively making all DMA addresses out-of-range. This has been hidden until now (mostly because we don't yet prevent drivers from simply overwriting this initial mask later upon probe), but due to recent changes elsewhere now shows up as USB being broken on Raspberry Pi 3. Make it right by rounding up instead of down, such that the mask correctly correctly describes all possisble bits the device needs to emit. Fixes: 9a6d7298b083 ("of: Calculate device DMA masks based on DT dma-range size") Reported-by: Stefan Wahren Reported-by: Andreas Färber Reported-by: Hans Verkuil Signed-off-by: Robin Murphy Acked-by: Rob Herring Signed-off-by: Christoph Hellwig --- drivers/of/device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index 28c38c756f92..e0a28ea341fe 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -89,6 +89,7 @@ int of_dma_configure(struct device *dev, struct device_node *np) bool coherent; unsigned long offset; const struct iommu_ops *iommu; + u64 mask; /* * Set default coherent_dma_mask to 32 bit. Drivers are expected to @@ -134,10 +135,9 @@ int of_dma_configure(struct device *dev, struct device_node *np) * Limit coherent and dma mask based on size and default mask * set by the driver. */ - dev->coherent_dma_mask = min(dev->coherent_dma_mask, - DMA_BIT_MASK(ilog2(dma_addr + size))); - *dev->dma_mask = min((*dev->dma_mask), - DMA_BIT_MASK(ilog2(dma_addr + size))); + mask = DMA_BIT_MASK(ilog2(dma_addr + size - 1) + 1); + dev->coherent_dma_mask &= mask; + *dev->dma_mask &= mask; coherent = of_dma_is_coherent(np); dev_dbg(dev, "device is%sdma coherent\n", -- cgit From 0f174b3525a43bd51f9397394763925e0ebe7bc7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 16 Aug 2017 14:18:37 +0200 Subject: ALSA: usb-audio: Add mute TLV for playback volumes on C-Media devices C-Media devices (at least some models) mute the playback stream when volumes are set to the minimum value. But this isn't informed via TLV and the user-space, typically PulseAudio, gets confused as if it's still played in a low volume. This patch adds the new flag, min_mute, to struct usb_mixer_elem_info for indicating that the mixer element is with the minimum-mute volume. This flag is set for known C-Media devices in snd_usb_mixer_fu_apply_quirk() in turn. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196669 Cc: Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 2 ++ sound/usb/mixer.h | 1 + sound/usb/mixer_quirks.c | 6 ++++++ 3 files changed, 9 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 082736c539bc..e630813c5008 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -542,6 +542,8 @@ int snd_usb_mixer_vol_tlv(struct snd_kcontrol *kcontrol, int op_flag, if (size < sizeof(scale)) return -ENOMEM; + if (cval->min_mute) + scale[0] = SNDRV_CTL_TLVT_DB_MINMAX_MUTE; scale[2] = cval->dBmin; scale[3] = cval->dBmax; if (copy_to_user(_tlv, scale, sizeof(scale))) diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h index 3417ef347e40..2b4b067646ab 100644 --- a/sound/usb/mixer.h +++ b/sound/usb/mixer.h @@ -64,6 +64,7 @@ struct usb_mixer_elem_info { int cached; int cache_val[MAX_CHANNELS]; u8 initialized; + u8 min_mute; void *private_data; }; diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index e3d1dec48ee4..e1e7ce9ab217 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -1878,6 +1878,12 @@ void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, if (unitid == 7 && cval->control == UAC_FU_VOLUME) snd_dragonfly_quirk_db_scale(mixer, cval, kctl); break; + /* lowest playback value is muted on C-Media devices */ + case USB_ID(0x0d8c, 0x000c): + case USB_ID(0x0d8c, 0x0014): + if (strstr(kctl->id.name, "Playback")) + cval->min_mute = 1; + break; } } -- cgit From 733f6563979d96dec180c350abb8ac67cc0367ba Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Tue, 15 Aug 2017 17:34:44 +0300 Subject: i2c: designware: Remove needless pm_runtime_put_noidle() call I guess pm_runtime_put_noidle() call in i2c_dw_probe_slave() was copied by accident from similar master mode adapter registration code. It is unbalanced due missing pm_runtime_get_noresume() but harmless since it doesn't decrease dev->power.usage_count below zero. In theory we can hit similar needless runtime suspend/resume cycle during slave mode adapter registration that was happening when registering the master mode adapter. See commit cd998ded5c12 ("i2c: designware: Prevent runtime suspend during adapter registration"). However, since we are slave, we can consider it as a wrong configuration if we have other slaves attached under this adapter and can omit the pm_runtime_get_noresume()/pm_runtime_put_noidle() calls for simplicity. Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-slave.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 4b62a3872763..25fa33927124 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -382,7 +382,6 @@ int i2c_dw_probe_slave(struct dw_i2c_dev *dev) ret = i2c_add_numbered_adapter(adap); if (ret) dev_err(dev->dev, "failure adding adapter: %d\n", ret); - pm_runtime_put_noidle(dev->dev); return ret; } -- cgit From 2a86cdd2e7d3c75580f41f89f9b9211e225573cc Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Tue, 15 Aug 2017 17:34:45 +0300 Subject: i2c: designware: Fix runtime PM for I2C slave mode I2C slave controller must be powered and active all the time when I2C slave backend is registered in order to let master address and communicate with us. Now if the controller is runtime PM capable it will be suspended after probe and cannot ever respond to the master or generate interrupts. Fix this by resuming the controller when I2C slave backend is registered and let it suspend after unregistering. Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-slave.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 25fa33927124..78d8fb73927d 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -177,6 +177,8 @@ static int i2c_dw_reg_slave(struct i2c_client *slave) return -EBUSY; if (slave->flags & I2C_CLIENT_TEN) return -EAFNOSUPPORT; + pm_runtime_get_sync(dev->dev); + /* * Set slave address in the IC_SAR register, * the address to which the DW_apb_i2c responds. @@ -205,6 +207,7 @@ static int i2c_dw_unreg_slave(struct i2c_client *slave) dev->disable_int(dev); dev->disable(dev); dev->slave = NULL; + pm_runtime_put(dev->dev); return 0; } -- cgit From c8c03f1858331e85d397bacccd34ef409aae993c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 16 Aug 2017 17:08:07 -0700 Subject: pty: fix the cached path of the pty slave file descriptor in the master Christian Brauner reported that if you use the TIOCGPTPEER ioctl() to get a slave pty file descriptor, the resulting file descriptor doesn't look right in /proc//fd/. In particular, he wanted to use readlink() on /proc/self/fd/ to get the pathname of the slave pty (basically implementing "ptsname{_r}()"). The reason for that was that we had generated the wrong 'struct path' when we create the pty in ptmx_open(). In particular, the dentry was correct, but the vfsmount pointed to the mount of the ptmx node. That _can_ be correct - in case you use "/dev/pts/ptmx" to open the master - but usually is not. The normal case is to use /dev/ptmx, which then looks up the pts/ directory, and then the vfsmount of the ptmx node is obviously the /dev directory, not the /dev/pts/ directory. We actually did have the right vfsmount available, but in the wrong place (it gets looked up in 'devpts_acquire()' when we get a reference to the pts filesystem), and so ptmx_open() used the wrong mnt pointer. The end result of this confusion was that the pty worked fine, but when if you did TIOCGPTPEER to get the slave side of the pty, end end result would also work, but have that dodgy 'struct path'. And then when doing "d_path()" on to get the pathname, the vfsmount would not match the root of the pts directory, and d_path() would return an empty pathname thinking that the entry had escaped a bind mount into another mount. This fixes the problem by making devpts_acquire() return the vfsmount for the pts filesystem, allowing ptmx_open() to trivially just use the right mount for the pts dentry, and create the proper 'struct path'. Reported-by: Christian Brauner Cc: Al Viro Acked-by: Eric Biederman Signed-off-by: Linus Torvalds --- drivers/tty/pty.c | 7 +++++-- fs/devpts/inode.c | 4 +++- include/linux/devpts_fs.h | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 284749fb0f6b..1fc80ea87c13 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -793,6 +793,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) struct tty_struct *tty; struct path *pts_path; struct dentry *dentry; + struct vfsmount *mnt; int retval; int index; @@ -805,7 +806,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) if (retval) return retval; - fsi = devpts_acquire(filp); + fsi = devpts_acquire(filp, &mnt); if (IS_ERR(fsi)) { retval = PTR_ERR(fsi); goto out_free_file; @@ -849,7 +850,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) pts_path = kmalloc(sizeof(struct path), GFP_KERNEL); if (!pts_path) goto err_release; - pts_path->mnt = filp->f_path.mnt; + pts_path->mnt = mnt; pts_path->dentry = dentry; path_get(pts_path); tty->link->driver_data = pts_path; @@ -866,6 +867,7 @@ err_path_put: path_put(pts_path); kfree(pts_path); err_release: + mntput(mnt); tty_unlock(tty); // This will also put-ref the fsi tty_release(inode, filp); @@ -874,6 +876,7 @@ out: devpts_kill_index(fsi, index); out_put_fsi: devpts_release(fsi); + mntput(mnt); out_free_file: tty_free_file(filp); return retval; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 108df2e3602c..44dfbca9306f 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -133,7 +133,7 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) return sb->s_fs_info; } -struct pts_fs_info *devpts_acquire(struct file *filp) +struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt) { struct pts_fs_info *result; struct path path; @@ -142,6 +142,7 @@ struct pts_fs_info *devpts_acquire(struct file *filp) path = filp->f_path; path_get(&path); + *ptsmnt = NULL; /* Has the devpts filesystem already been found? */ sb = path.mnt->mnt_sb; @@ -165,6 +166,7 @@ struct pts_fs_info *devpts_acquire(struct file *filp) * pty code needs to hold extra references in case of last /dev/tty close */ atomic_inc(&sb->s_active); + *ptsmnt = mntget(path.mnt); result = DEVPTS_SB(sb); out: diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 277ab9af9ac2..7883e901f65c 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,7 +19,7 @@ struct pts_fs_info; -struct pts_fs_info *devpts_acquire(struct file *); +struct pts_fs_info *devpts_acquire(struct file *, struct vfsmount **ptsmnt); void devpts_release(struct pts_fs_info *); int devpts_new_index(struct pts_fs_info *); -- cgit From 81a0b8d74edd5841be29d223ce44bc8db2b00d09 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Aug 2017 13:57:49 +0200 Subject: nvme-fabrics: fix reporting of unrecognized options Only print the specified options that are not recognized, instead of the whole list of options. Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy --- drivers/nvme/host/fabrics.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 2e582a240943..5f5cd306f76d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -794,7 +794,8 @@ static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts, int i; for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) { - if (opt_tokens[i].token & ~allowed_opts) { + if ((opt_tokens[i].token & opts->mask) && + (opt_tokens[i].token & ~allowed_opts)) { pr_warn("invalid parameter '%s'\n", opt_tokens[i].pattern); } -- cgit From 014cd0a368dc6351c65d51e4ee34f8573a4a1543 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 17 Aug 2017 20:30:39 +1000 Subject: bpf: Update sysctl documentation to list all supported architectures The sysctl documentation states that the JIT is only available on x86_64, which is no longer correct. Update the list, and break it out to indicate which architectures support the cBPF JIT (via HAVE_CBPF_JIT) or the eBPF JIT (HAVE_EBPF_JIT). Signed-off-by: Michael Ellerman Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- Documentation/sysctl/net.txt | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 14db18c970b1..b9c3c6078010 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -36,8 +36,23 @@ bpf_jit_enable -------------- This enables Berkeley Packet Filter Just in Time compiler. -Currently supported on x86_64 architecture, bpf_jit provides a framework -to speed packet filtering, the one used by tcpdump/libpcap for example. + +There are two flavors of JIT, the new eBPF JIT supported on: + - x86_64 + - arm64 + - ppc64 + - sparc64 + - mips64 + +And the older cBPF JIT supported on: + - arm + - mips + - ppc + - sparc + +The BPF JIT provides a framework to speed packet filtering, the one used by +tcpdump/libpcap for example. + Values : 0 - disable the JIT (default value) 1 - enable the JIT -- cgit From 98529b9272e06a7767034fb8a32e43cdecda240a Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 16 Aug 2017 15:29:49 +0800 Subject: ACPI: EC: Fix regression related to wrong ECDT initialization order Commit 2a5708409e4e (ACPI / EC: Fix a gap that ECDT EC cannot handle EC events) introduced acpi_ec_ecdt_start(), but that function is invoked before acpi_ec_query_init(), which is too early. This causes the kernel to crash if an EC event occurs after boot, when ec_query_wq is not valid: BUG: unable to handle kernel NULL pointer dereference at 0000000000000102 ... Workqueue: events acpi_ec_event_handler task: ffff9f539790dac0 task.stack: ffffb437c0e10000 RIP: 0010:__queue_work+0x32/0x430 Normally, the DSDT EC should always be valid, so acpi_ec_ecdt_start() is actually a no-op in the majority of cases. However, commit c712bb58d827 (ACPI / EC: Add support to skip boot stage DSDT probe) caused the probing of the DSDT EC as the "boot EC" to be skipped when the ECDT EC is valid and uncovered the bug. Fix this issue by invoking acpi_ec_ecdt_start() after acpi_ec_query_init() in acpi_ec_init(). Link: https://jira01.devtools.intel.com/browse/LCK-4348 Fixes: 2a5708409e4e (ACPI / EC: Fix a gap that ECDT EC cannot handle EC events) Fixes: c712bb58d827 (ACPI / EC: Add support to skip boot stage DSDT probe) Reported-by: Wang Wendy Tested-by: Feng Chenzhou Signed-off-by: Lv Zheng [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 17 +++++++---------- drivers/acpi/internal.h | 1 - drivers/acpi/scan.c | 1 - 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 62068a5e814f..ae3d6d152633 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1741,7 +1741,7 @@ error: * functioning ECDT EC first in order to handle the events. * https://bugzilla.kernel.org/show_bug.cgi?id=115021 */ -int __init acpi_ec_ecdt_start(void) +static int __init acpi_ec_ecdt_start(void) { acpi_handle handle; @@ -2003,20 +2003,17 @@ static inline void acpi_ec_query_exit(void) int __init acpi_ec_init(void) { int result; + int ecdt_fail, dsdt_fail; /* register workqueue for _Qxx evaluations */ result = acpi_ec_query_init(); if (result) - goto err_exit; - /* Now register the driver for the EC */ - result = acpi_bus_register_driver(&acpi_ec_driver); - if (result) - goto err_exit; + return result; -err_exit: - if (result) - acpi_ec_query_exit(); - return result; + /* Drivers must be started after acpi_ec_query_init() */ + ecdt_fail = acpi_ec_ecdt_start(); + dsdt_fail = acpi_bus_register_driver(&acpi_ec_driver); + return ecdt_fail && dsdt_fail ? -ENODEV : 0; } /* EC driver currently not unloadable */ diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 58dd7ab3c653..3f5af4d7a739 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -185,7 +185,6 @@ typedef int (*acpi_ec_query_func) (void *data); int acpi_ec_init(void); int acpi_ec_ecdt_probe(void); int acpi_ec_dsdt_probe(void); -int acpi_ec_ecdt_start(void); void acpi_ec_block_transactions(void); void acpi_ec_unblock_transactions(void); int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 33897298f03e..70fd5502c284 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2084,7 +2084,6 @@ int __init acpi_scan_init(void) acpi_gpe_apply_masked_gpes(); acpi_update_all_gpes(); - acpi_ec_ecdt_start(); acpi_scan_initialized = true; -- cgit From 8204f8ddaafafcae074746fcf2a05a45e6827603 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 10 Aug 2017 14:20:28 -0700 Subject: xfs: clear MS_ACTIVE after finishing log recovery Way back when we established inode block-map redo log items, it was discovered that we needed to prevent the VFS from evicting inodes during log recovery because any given inode might be have bmap redo items to replay even if the inode has no link count and is ultimately deleted, and any eviction of an unlinked inode causes the inode to be truncated and freed too early. To make this possible, we set MS_ACTIVE so that inodes would not be torn down immediately upon release. Unfortunately, this also results in the quota inodes not being released at all if a later part of the mount process should fail, because we never reclaim the inodes. So, set MS_ACTIVE right before we do the last part of log recovery and clear it immediately after we finish the log recovery so that everything will be torn down properly if we abort the mount. Fixes: 17c12bcd30 ("xfs: when replaying bmap operations, don't let unlinked inodes get reaped") Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/xfs_log.c | 11 +++++++++++ fs/xfs/xfs_mount.c | 10 ---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0053bcf2b10a..4ebd0bafc914 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -749,9 +749,20 @@ xfs_log_mount_finish( return 0; } + /* + * During the second phase of log recovery, we need iget and + * iput to behave like they do for an active filesystem. + * xfs_fs_drop_inode needs to be able to prevent the deletion + * of inodes before we're done replaying log items on those + * inodes. Turn it off immediately after recovery finishes + * so that we don't leak the quota inodes if subsequent mount + * activities fail. + */ + mp->m_super->s_flags |= MS_ACTIVE; error = xlog_recover_finish(mp->m_log); if (!error) xfs_log_work_queue(mp); + mp->m_super->s_flags &= ~MS_ACTIVE; return error; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 40d4e8b4e193..151a82db0945 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -944,15 +944,6 @@ xfs_mountfs( } } - /* - * During the second phase of log recovery, we need iget and - * iput to behave like they do for an active filesystem. - * xfs_fs_drop_inode needs to be able to prevent the deletion - * of inodes before we're done replaying log items on those - * inodes. - */ - mp->m_super->s_flags |= MS_ACTIVE; - /* * Finish recovering the file system. This part needed to be delayed * until after the root and real-time bitmap inodes were consistently @@ -1028,7 +1019,6 @@ xfs_mountfs( out_quota: xfs_qm_unmount_quotas(mp); out_rtunmount: - mp->m_super->s_flags &= ~MS_ACTIVE; xfs_rtunmount_inodes(mp); out_rele_rip: IRELE(rip); -- cgit From 77aff8c76425c8f49b50d0b9009915066739e7d2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 10 Aug 2017 14:20:29 -0700 Subject: xfs: don't leak quotacheck dquots when cow recovery If we fail a mount on account of cow recovery errors, it's possible that a previous quotacheck left some dquots in memory. The bailout clause of xfs_mountfs forgets to purge these, and so we leak them. Fix that. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/xfs_mount.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 151a82db0945..ea7d4b4e50d0 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1024,6 +1024,8 @@ xfs_mountfs( IRELE(rip); cancel_delayed_work_sync(&mp->m_reclaim_work); xfs_reclaim_inodes(mp, SYNC_WAIT); + /* Clean out dquots that might be in memory after quotacheck. */ + xfs_qm_unmount(mp); out_log_dealloc: mp->m_flags |= XFS_MOUNT_UNMOUNTING; xfs_log_mount_cancel(mp); -- cgit From c40bc54fdf2d52a80f66b365f1eac9d43b32e107 Mon Sep 17 00:00:00 2001 From: Gary Bisson Date: Thu, 17 Aug 2017 15:50:10 +0200 Subject: ARM: dts: imx6qdl-nitrogen6_som2: fix PCIe reset Previous value was a bad copy of nitrogen6_max device tree. Signed-off-by: Gary Bisson Fixes: 3faa1bb2e89c ("ARM: dts: imx: add Boundary Devices Nitrogen6_SOM2 support") Cc: Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi index aeaa5a6e4fcf..a24e4f1911ab 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi @@ -507,7 +507,7 @@ pinctrl_pcie: pciegrp { fsl,pins = < /* PCIe reset */ - MX6QDL_PAD_EIM_BCLK__GPIO6_IO31 0x030b0 + MX6QDL_PAD_EIM_DA0__GPIO3_IO00 0x030b0 MX6QDL_PAD_EIM_DA4__GPIO3_IO04 0x030b0 >; }; @@ -668,7 +668,7 @@ &pcie { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pcie>; - reset-gpio = <&gpio6 31 GPIO_ACTIVE_LOW>; + reset-gpio = <&gpio3 0 GPIO_ACTIVE_LOW>; status = "okay"; }; -- cgit From 1a92a80ad386a1a6e3b36d576d52a1a456394b70 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:28:00 +1000 Subject: powerpc/mm: Ensure cpumask update is ordered There is no guarantee that the various isync's involved with the context switch will order the update of the CPU mask with the first TLB entry for the new context being loaded by the HW. Be safe here and add a memory barrier to order any subsequent load/store which may bring entries into the TLB. The corresponding barrier on the other side already exists as pte updates use pte_xchg() which uses __cmpxchg_u64 which has a sync after the atomic operation. Cc: stable@vger.kernel.org Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Nicholas Piggin [mpe: Add comments in the code] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 18 ++++++++++++++++++ arch/powerpc/include/asm/pgtable-be-types.h | 1 + arch/powerpc/include/asm/pgtable-types.h | 1 + 3 files changed, 20 insertions(+) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 0c76675394c5..35bec1c5bd5a 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -90,6 +90,24 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, /* Mark this context has been used on the new CPU */ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + + /* + * This full barrier orders the store to the cpumask above vs + * a subsequent operation which allows this CPU to begin loading + * translations for next. + * + * When using the radix MMU that operation is the load of the + * MMU context id, which is then moved to SPRN_PID. + * + * For the hash MMU it is either the first load from slb_cache + * in switch_slb(), and/or the store of paca->mm_ctx_id in + * copy_mm_to_paca(). + * + * On the read side the barrier is in pte_xchg(), which orders + * the store to the PTE vs the load of mm_cpumask. + */ + smp_mb(); + new_on_cpu = true; } diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h index 9c0f5db5cf46..67e7e3d990f4 100644 --- a/arch/powerpc/include/asm/pgtable-be-types.h +++ b/arch/powerpc/include/asm/pgtable-be-types.h @@ -87,6 +87,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) unsigned long *p = (unsigned long *)ptep; __be64 prev; + /* See comment in switch_mm_irqs_off() */ prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old), (__force unsigned long)pte_raw(new)); diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index 8bd3b13fe2fb..369a164b545c 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -62,6 +62,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) { unsigned long *p = (unsigned long *)ptep; + /* See comment in switch_mm_irqs_off() */ return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new)); } #endif -- cgit From e9d8a0fdeacd843c85dcef480cdb2ab76bcdb6e4 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 17 Aug 2017 16:45:06 -0400 Subject: nvme-pci: set cqe_seen on polled completions Fixes: 920d13a884 ("nvme-pci: factor out the cqe reading mechanics from __nvme_process_cq") Reported-by: Jens Axboe Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 74a124a06264..925467b31a33 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -801,6 +801,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, return; } + nvmeq->cqe_seen = 1; req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); nvme_end_request(req, cqe->status, cqe->result); } @@ -830,10 +831,8 @@ static void nvme_process_cq(struct nvme_queue *nvmeq) consumed++; } - if (consumed) { + if (consumed) nvme_ring_cq_doorbell(nvmeq); - nvmeq->cqe_seen = 1; - } } static irqreturn_t nvme_irq(int irq, void *data) -- cgit From ed993c6fdfa7734881a4516852d95ae2d3b604d3 Mon Sep 17 00:00:00 2001 From: Jussi Laako Date: Fri, 18 Aug 2017 10:42:14 +0300 Subject: ALSA: usb-audio: add DSD support for new Amanero PID Add DSD support for new Amanero Combo384 firmware version with a new PID. This firmware uses DSD_U32_BE. Fixes: 3eff682d765b ("ALSA: usb-audio: Support both DSD LE/BE Amanero firmware versions") Signed-off-by: Jussi Laako Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index f3e9e30172f3..6a03f9697039 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1375,6 +1375,10 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, } } break; + case USB_ID(0x16d0, 0x0a23): + if (fp->altsetting == 2) + return SNDRV_PCM_FMTBIT_DSD_U32_BE; + break; default: break; -- cgit From 0b36f2bd28d040acedb52f4327eb2441afe4f514 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 18 Aug 2017 10:55:10 +0200 Subject: ALSA: emu10k1: Fix forgotten user-copy conversion in init code The commit d42fe63d5839 ("ALSA: emu10k1: Get rid of set_fs() usage") converted the user-space copy hack with set_fs() to the direct memcpy(), but one place was forgotten. This resulted in the error from snd_emu10k1_init_efx(), eventually failed to load the driver. Fix the missing piece. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196687 Fixes: d42fe63d5839 ("ALSA: emu10k1: Get rid of set_fs() usage") Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emufx.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c index dc585959ca32..a2b56b188be4 100644 --- a/sound/pci/emu10k1/emufx.c +++ b/sound/pci/emu10k1/emufx.c @@ -698,10 +698,18 @@ static int copy_gctl(struct snd_emu10k1 *emu, { struct snd_emu10k1_fx8010_control_old_gpr __user *octl; - if (emu->support_tlv) - return copy_from_user(gctl, &_gctl[idx], sizeof(*gctl)); + if (emu->support_tlv) { + if (in_kernel) + memcpy(gctl, (void *)&_gctl[idx], sizeof(*gctl)); + else if (copy_from_user(gctl, &_gctl[idx], sizeof(*gctl))) + return -EFAULT; + return 0; + } + octl = (struct snd_emu10k1_fx8010_control_old_gpr __user *)_gctl; - if (copy_from_user(gctl, &octl[idx], sizeof(*octl))) + if (in_kernel) + memcpy(gctl, (void *)&octl[idx], sizeof(*octl)); + else if (copy_from_user(gctl, &octl[idx], sizeof(*octl))) return -EFAULT; gctl->tlv = NULL; return 0; -- cgit From 7374bfb82e3844abcc5a5b8034620d80b92b820d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 27 Jul 2017 15:47:33 -0700 Subject: MAINTAINERS: Remove Jason Cooper's irqchip git tree Jason's irqchip tree does not seem to have been updated for many months now, remove it from the list of trees to avoid any possible confusion. Jason says: "Unfortunately, when I have time for irqchip, I don't always have the time to properly follow up with pull-requests. So, for the time being, I'll stick to reviewing as I can." Signed-off-by: Florian Fainelli Signed-off-by: Thomas Gleixner Acked-by: Jason Cooper Cc: marc.zyngier@arm.com Link: http://lkml.kernel.org/r/20170727224733.8288-1-f.fainelli@gmail.com --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f66488dfdbc9..b116efa1a087 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7102,7 +7102,6 @@ M: Marc Zyngier L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core -T: git git://git.infradead.org/users/jcooper/linux.git irqchip/core F: Documentation/devicetree/bindings/interrupt-controller/ F: drivers/irqchip/ -- cgit From 45bd07ad82622fb7c8dd7504d976b7dd11568965 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 20 Jul 2017 17:00:32 +0530 Subject: x86: Constify attribute_group structures attribute_groups are not supposed to change at runtime and none of the groups is modified. Mark the non-const structs as const. [ tglx: Folded into one big patch ] Signed-off-by: Arvind Yadav Signed-off-by: Thomas Gleixner Cc: tony.luck@intel.com Cc: bp@alien8.de Link: http://lkml.kernel.org/r/1500550238-15655-2-git-send-email-arvind.yadav.cs@gmail.com --- arch/x86/events/intel/uncore.c | 2 +- arch/x86/events/intel/uncore_nhmex.c | 12 ++++----- arch/x86/events/intel/uncore_snb.c | 6 ++--- arch/x86/events/intel/uncore_snbep.c | 42 ++++++++++++++++---------------- arch/x86/kernel/cpu/mcheck/therm_throt.c | 2 +- arch/x86/kernel/cpu/microcode/core.c | 4 +-- arch/x86/kernel/ksysfs.c | 4 +-- 7 files changed, 36 insertions(+), 36 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 44ec523287f6..1c5390f1cf09 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -721,7 +721,7 @@ static struct attribute *uncore_pmu_attrs[] = { NULL, }; -static struct attribute_group uncore_pmu_attr_group = { +static const struct attribute_group uncore_pmu_attr_group = { .attrs = uncore_pmu_attrs, }; diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index cda569332005..6a5cbe90f859 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -272,7 +272,7 @@ static struct attribute *nhmex_uncore_ubox_formats_attr[] = { NULL, }; -static struct attribute_group nhmex_uncore_ubox_format_group = { +static const struct attribute_group nhmex_uncore_ubox_format_group = { .name = "format", .attrs = nhmex_uncore_ubox_formats_attr, }; @@ -299,7 +299,7 @@ static struct attribute *nhmex_uncore_cbox_formats_attr[] = { NULL, }; -static struct attribute_group nhmex_uncore_cbox_format_group = { +static const struct attribute_group nhmex_uncore_cbox_format_group = { .name = "format", .attrs = nhmex_uncore_cbox_formats_attr, }; @@ -407,7 +407,7 @@ static struct attribute *nhmex_uncore_bbox_formats_attr[] = { NULL, }; -static struct attribute_group nhmex_uncore_bbox_format_group = { +static const struct attribute_group nhmex_uncore_bbox_format_group = { .name = "format", .attrs = nhmex_uncore_bbox_formats_attr, }; @@ -484,7 +484,7 @@ static struct attribute *nhmex_uncore_sbox_formats_attr[] = { NULL, }; -static struct attribute_group nhmex_uncore_sbox_format_group = { +static const struct attribute_group nhmex_uncore_sbox_format_group = { .name = "format", .attrs = nhmex_uncore_sbox_formats_attr, }; @@ -898,7 +898,7 @@ static struct attribute *nhmex_uncore_mbox_formats_attr[] = { NULL, }; -static struct attribute_group nhmex_uncore_mbox_format_group = { +static const struct attribute_group nhmex_uncore_mbox_format_group = { .name = "format", .attrs = nhmex_uncore_mbox_formats_attr, }; @@ -1163,7 +1163,7 @@ static struct attribute *nhmex_uncore_rbox_formats_attr[] = { NULL, }; -static struct attribute_group nhmex_uncore_rbox_format_group = { +static const struct attribute_group nhmex_uncore_rbox_format_group = { .name = "format", .attrs = nhmex_uncore_rbox_formats_attr, }; diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index a3dcc12bef4a..db1127ce685e 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -130,7 +130,7 @@ static struct attribute *snb_uncore_formats_attr[] = { NULL, }; -static struct attribute_group snb_uncore_format_group = { +static const struct attribute_group snb_uncore_format_group = { .name = "format", .attrs = snb_uncore_formats_attr, }; @@ -289,7 +289,7 @@ static struct attribute *snb_uncore_imc_formats_attr[] = { NULL, }; -static struct attribute_group snb_uncore_imc_format_group = { +static const struct attribute_group snb_uncore_imc_format_group = { .name = "format", .attrs = snb_uncore_imc_formats_attr, }; @@ -769,7 +769,7 @@ static struct attribute *nhm_uncore_formats_attr[] = { NULL, }; -static struct attribute_group nhm_uncore_format_group = { +static const struct attribute_group nhm_uncore_format_group = { .name = "format", .attrs = nhm_uncore_formats_attr, }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 4f9127644b80..db1fe377e6dd 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -602,27 +602,27 @@ static struct uncore_event_desc snbep_uncore_qpi_events[] = { { /* end: all zeroes */ }, }; -static struct attribute_group snbep_uncore_format_group = { +static const struct attribute_group snbep_uncore_format_group = { .name = "format", .attrs = snbep_uncore_formats_attr, }; -static struct attribute_group snbep_uncore_ubox_format_group = { +static const struct attribute_group snbep_uncore_ubox_format_group = { .name = "format", .attrs = snbep_uncore_ubox_formats_attr, }; -static struct attribute_group snbep_uncore_cbox_format_group = { +static const struct attribute_group snbep_uncore_cbox_format_group = { .name = "format", .attrs = snbep_uncore_cbox_formats_attr, }; -static struct attribute_group snbep_uncore_pcu_format_group = { +static const struct attribute_group snbep_uncore_pcu_format_group = { .name = "format", .attrs = snbep_uncore_pcu_formats_attr, }; -static struct attribute_group snbep_uncore_qpi_format_group = { +static const struct attribute_group snbep_uncore_qpi_format_group = { .name = "format", .attrs = snbep_uncore_qpi_formats_attr, }; @@ -1431,27 +1431,27 @@ static struct attribute *ivbep_uncore_qpi_formats_attr[] = { NULL, }; -static struct attribute_group ivbep_uncore_format_group = { +static const struct attribute_group ivbep_uncore_format_group = { .name = "format", .attrs = ivbep_uncore_formats_attr, }; -static struct attribute_group ivbep_uncore_ubox_format_group = { +static const struct attribute_group ivbep_uncore_ubox_format_group = { .name = "format", .attrs = ivbep_uncore_ubox_formats_attr, }; -static struct attribute_group ivbep_uncore_cbox_format_group = { +static const struct attribute_group ivbep_uncore_cbox_format_group = { .name = "format", .attrs = ivbep_uncore_cbox_formats_attr, }; -static struct attribute_group ivbep_uncore_pcu_format_group = { +static const struct attribute_group ivbep_uncore_pcu_format_group = { .name = "format", .attrs = ivbep_uncore_pcu_formats_attr, }; -static struct attribute_group ivbep_uncore_qpi_format_group = { +static const struct attribute_group ivbep_uncore_qpi_format_group = { .name = "format", .attrs = ivbep_uncore_qpi_formats_attr, }; @@ -1887,7 +1887,7 @@ static struct attribute *knl_uncore_ubox_formats_attr[] = { NULL, }; -static struct attribute_group knl_uncore_ubox_format_group = { +static const struct attribute_group knl_uncore_ubox_format_group = { .name = "format", .attrs = knl_uncore_ubox_formats_attr, }; @@ -1927,7 +1927,7 @@ static struct attribute *knl_uncore_cha_formats_attr[] = { NULL, }; -static struct attribute_group knl_uncore_cha_format_group = { +static const struct attribute_group knl_uncore_cha_format_group = { .name = "format", .attrs = knl_uncore_cha_formats_attr, }; @@ -2037,7 +2037,7 @@ static struct attribute *knl_uncore_pcu_formats_attr[] = { NULL, }; -static struct attribute_group knl_uncore_pcu_format_group = { +static const struct attribute_group knl_uncore_pcu_format_group = { .name = "format", .attrs = knl_uncore_pcu_formats_attr, }; @@ -2187,7 +2187,7 @@ static struct attribute *knl_uncore_irp_formats_attr[] = { NULL, }; -static struct attribute_group knl_uncore_irp_format_group = { +static const struct attribute_group knl_uncore_irp_format_group = { .name = "format", .attrs = knl_uncore_irp_formats_attr, }; @@ -2385,7 +2385,7 @@ static struct attribute *hswep_uncore_ubox_formats_attr[] = { NULL, }; -static struct attribute_group hswep_uncore_ubox_format_group = { +static const struct attribute_group hswep_uncore_ubox_format_group = { .name = "format", .attrs = hswep_uncore_ubox_formats_attr, }; @@ -2439,7 +2439,7 @@ static struct attribute *hswep_uncore_cbox_formats_attr[] = { NULL, }; -static struct attribute_group hswep_uncore_cbox_format_group = { +static const struct attribute_group hswep_uncore_cbox_format_group = { .name = "format", .attrs = hswep_uncore_cbox_formats_attr, }; @@ -2621,7 +2621,7 @@ static struct attribute *hswep_uncore_sbox_formats_attr[] = { NULL, }; -static struct attribute_group hswep_uncore_sbox_format_group = { +static const struct attribute_group hswep_uncore_sbox_format_group = { .name = "format", .attrs = hswep_uncore_sbox_formats_attr, }; @@ -3314,7 +3314,7 @@ static struct attribute *skx_uncore_cha_formats_attr[] = { NULL, }; -static struct attribute_group skx_uncore_chabox_format_group = { +static const struct attribute_group skx_uncore_chabox_format_group = { .name = "format", .attrs = skx_uncore_cha_formats_attr, }; @@ -3427,7 +3427,7 @@ static struct attribute *skx_uncore_iio_formats_attr[] = { NULL, }; -static struct attribute_group skx_uncore_iio_format_group = { +static const struct attribute_group skx_uncore_iio_format_group = { .name = "format", .attrs = skx_uncore_iio_formats_attr, }; @@ -3484,7 +3484,7 @@ static struct attribute *skx_uncore_formats_attr[] = { NULL, }; -static struct attribute_group skx_uncore_format_group = { +static const struct attribute_group skx_uncore_format_group = { .name = "format", .attrs = skx_uncore_formats_attr, }; @@ -3605,7 +3605,7 @@ static struct attribute *skx_upi_uncore_formats_attr[] = { NULL, }; -static struct attribute_group skx_upi_uncore_format_group = { +static const struct attribute_group skx_upi_uncore_format_group = { .name = "format", .attrs = skx_upi_uncore_formats_attr, }; diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index d7cc190ae457..f7370abd33c6 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -122,7 +122,7 @@ static struct attribute *thermal_throttle_attrs[] = { NULL }; -static struct attribute_group thermal_attr_group = { +static const struct attribute_group thermal_attr_group = { .attrs = thermal_throttle_attrs, .name = "thermal_throttle" }; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 9cb98ee103db..86e8f0b2537b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -561,7 +561,7 @@ static struct attribute *mc_default_attrs[] = { NULL }; -static struct attribute_group mc_attr_group = { +static const struct attribute_group mc_attr_group = { .attrs = mc_default_attrs, .name = "microcode", }; @@ -707,7 +707,7 @@ static struct attribute *cpu_root_microcode_attrs[] = { NULL }; -static struct attribute_group cpu_root_microcode_group = { +static const struct attribute_group cpu_root_microcode_group = { .name = "microcode", .attrs = cpu_root_microcode_attrs, }; diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index 4afc67f5facc..06e1ff5562c0 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -55,7 +55,7 @@ static struct bin_attribute *boot_params_data_attrs[] = { NULL, }; -static struct attribute_group boot_params_attr_group = { +static const struct attribute_group boot_params_attr_group = { .attrs = boot_params_version_attrs, .bin_attrs = boot_params_data_attrs, }; @@ -202,7 +202,7 @@ static struct bin_attribute *setup_data_data_attrs[] = { NULL, }; -static struct attribute_group setup_data_attr_group = { +static const struct attribute_group setup_data_attr_group = { .attrs = setup_data_type_attrs, .bin_attrs = setup_data_data_attrs, }; -- cgit From 4dd6a9973b8aaffac4bf37c5bb70e8eae5a7afb4 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 28 Jul 2017 09:51:34 -0700 Subject: soc: ti: ti_sci_pm_domains: Populate name for genpd Commit b6a1d093f96b ("PM / Domains: Extend generic power domain debugfs") now creates a debugfs directory for each genpd based on the name of the genpd. Currently no name is given to the genpd created by ti_sci_pm_domains driver so because of this we see a NULL pointer dereferences when it is accessed on boot when the debugfs entry creation is attempted. Give the genpd a name before registering it to avoid this. Fixes: 52835d59fc6c ("soc: ti: Add ti_sci_pm_domains driver") Signed-off-by: Dave Gerlach Signed-off-by: Santosh Shilimkar Signed-off-by: Arnd Bergmann --- drivers/soc/ti/ti_sci_pm_domains.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/soc/ti/ti_sci_pm_domains.c b/drivers/soc/ti/ti_sci_pm_domains.c index b0b283810e72..de31b9389e2e 100644 --- a/drivers/soc/ti/ti_sci_pm_domains.c +++ b/drivers/soc/ti/ti_sci_pm_domains.c @@ -176,6 +176,8 @@ static int ti_sci_pm_domain_probe(struct platform_device *pdev) ti_sci_pd->dev = dev; + ti_sci_pd->pd.name = "ti_sci_pd"; + ti_sci_pd->pd.attach_dev = ti_sci_pd_attach_dev; ti_sci_pd->pd.detach_dev = ti_sci_pd_detach_dev; -- cgit From e8f241893dfbbebe2813c01eac54f263e6a5e59c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Aug 2017 10:53:45 +0100 Subject: genirq: Restore trigger settings in irq_modify_status() irq_modify_status starts by clearing the trigger settings from irq_data before applying the new settings, but doesn't restore them, leaving them to IRQ_TYPE_NONE. That's pretty confusing to the potential request_irq() that could follow. Instead, snapshot the settings before clearing them, and restore them if the irq_modify_status() invocation was not changing the trigger. Fixes: 1e2a7d78499e ("irqdomain: Don't set type when mapping an IRQ") Reported-and-tested-by: jeffy Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jon Hunter Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170818095345.12378-1-marc.zyngier@arm.com --- kernel/irq/chip.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index a3cc37c0c85e..3675c6004f2a 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1000,7 +1000,7 @@ EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name); void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) { - unsigned long flags; + unsigned long flags, trigger, tmp; struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); if (!desc) @@ -1014,6 +1014,8 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) irq_settings_clr_and_set(desc, clr, set); + trigger = irqd_get_trigger_type(&desc->irq_data); + irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU | IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT); if (irq_settings_has_no_balance_set(desc)) @@ -1025,7 +1027,11 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) if (irq_settings_is_level(desc)) irqd_set(&desc->irq_data, IRQD_LEVEL); - irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc)); + tmp = irq_settings_get_trigger_mask(desc); + if (tmp != IRQ_TYPE_NONE) + trigger = tmp; + + irqd_set(&desc->irq_data, trigger); irq_put_desc_unlock(desc, flags); } -- cgit From 7edaeb6841dfb27e362288ab8466ebdc4972e867 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Aug 2017 09:50:13 +0200 Subject: kernel/watchdog: Prevent false positives with turbo modes The hardlockup detector on x86 uses a performance counter based on unhalted CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the performance counter period, so the hrtimer should fire 2-3 times before the performance counter NMI fires. The NMI code checks whether the hrtimer fired since the last invocation. If not, it assumess a hard lockup. The calculation of those periods is based on the nominal CPU frequency. Turbo modes increase the CPU clock frequency and therefore shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x nominal frequency) the perf/NMI period is shorter than the hrtimer period which leads to false positives. A simple fix would be to shorten the hrtimer period, but that comes with the side effect of more frequent hrtimer and softlockup thread wakeups, which is not desired. Implement a low pass filter, which checks the perf/NMI period against kernel time. If the perf/NMI fires before 4/5 of the watchdog period has elapsed then the event is ignored and postponed to the next perf/NMI. That solves the problem and avoids the overhead of shorter hrtimer periods and more frequent softlockup thread wakeups. Fixes: 58687acba592 ("lockup_detector: Combine nmi_watchdog and softlockup detector") Reported-and-tested-by: Kan Liang Signed-off-by: Thomas Gleixner Cc: dzickus@redhat.com Cc: prarit@redhat.com Cc: ak@linux.intel.com Cc: babu.moger@oracle.com Cc: peterz@infradead.org Cc: eranian@google.com Cc: acme@redhat.com Cc: stable@vger.kernel.org Cc: atomlin@redhat.com Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos --- arch/x86/Kconfig | 1 + include/linux/nmi.h | 8 +++++++ kernel/watchdog.c | 1 + kernel/watchdog_hld.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 7 ++++++ 5 files changed, 76 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 781521b7cf9e..9101bfc85539 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -100,6 +100,7 @@ config X86 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI_NMI if ACPI select HAVE_ALIGNED_STRUCT_PAGE if SLUB diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 8aa01fd859fb..a36abe2da13e 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace; #define sysctl_softlockup_all_cpu_backtrace 0 #define sysctl_hardlockup_all_cpu_backtrace 0 #endif + +#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ + defined(CONFIG_HARDLOCKUP_DETECTOR) +void watchdog_update_hrtimer_threshold(u64 period); +#else +static inline void watchdog_update_hrtimer_threshold(u64 period) { } +#endif + extern bool is_hardlockup(void); struct ctl_table; extern int proc_watchdog(struct ctl_table *, int , diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 06d3389bca0d..f5d52024f6b7 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -240,6 +240,7 @@ static void set_sample_period(void) * hardlockup detector generates a warning */ sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); + watchdog_update_hrtimer_threshold(sample_period); } /* Commands for resetting the watchdog */ diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 295a0d84934c..3a09ea1b1d3d 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void) } EXPORT_SYMBOL(arch_touch_nmi_watchdog); +#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP +static DEFINE_PER_CPU(ktime_t, last_timestamp); +static DEFINE_PER_CPU(unsigned int, nmi_rearmed); +static ktime_t watchdog_hrtimer_sample_threshold __read_mostly; + +void watchdog_update_hrtimer_threshold(u64 period) +{ + /* + * The hrtimer runs with a period of (watchdog_threshold * 2) / 5 + * + * So it runs effectively with 2.5 times the rate of the NMI + * watchdog. That means the hrtimer should fire 2-3 times before + * the NMI watchdog expires. The NMI watchdog on x86 is based on + * unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles + * might run way faster than expected and the NMI fires in a + * smaller period than the one deduced from the nominal CPU + * frequency. Depending on the Turbo-Mode factor this might be fast + * enough to get the NMI period smaller than the hrtimer watchdog + * period and trigger false positives. + * + * The sample threshold is used to check in the NMI handler whether + * the minimum time between two NMI samples has elapsed. That + * prevents false positives. + * + * Set this to 4/5 of the actual watchdog threshold period so the + * hrtimer is guaranteed to fire at least once within the real + * watchdog threshold. + */ + watchdog_hrtimer_sample_threshold = period * 2; +} + +static bool watchdog_check_timestamp(void) +{ + ktime_t delta, now = ktime_get_mono_fast_ns(); + + delta = now - __this_cpu_read(last_timestamp); + if (delta < watchdog_hrtimer_sample_threshold) { + /* + * If ktime is jiffies based, a stalled timer would prevent + * jiffies from being incremented and the filter would look + * at a stale timestamp and never trigger. + */ + if (__this_cpu_inc_return(nmi_rearmed) < 10) + return false; + } + __this_cpu_write(nmi_rearmed, 0); + __this_cpu_write(last_timestamp, now); + return true; +} +#else +static inline bool watchdog_check_timestamp(void) +{ + return true; +} +#endif + static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, @@ -61,6 +117,9 @@ static void watchdog_overflow_callback(struct perf_event *event, return; } + if (!watchdog_check_timestamp()) + return; + /* check for a hardlockup * This is done by making sure our timer interrupt * is incrementing. The timer interrupt should have diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 98fe715522e8..c617b9d1d6cb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -797,6 +797,13 @@ config HARDLOCKUP_DETECTOR_PERF bool select SOFTLOCKUP_DETECTOR +# +# Enables a timestamp based low pass filter to compensate for perf based +# hard lockup detection which runs too fast due to turbo modes. +# +config HARDLOCKUP_CHECK_TIMESTAMP + bool + # # arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard # lockup detector rather than the perf based detector. -- cgit From c005390374957baacbc38eef96ea360559510aa7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Aug 2017 12:24:47 +0200 Subject: blk-mq-pci: add a fallback when pci_irq_get_affinity returns NULL While pci_irq_get_affinity should never fail for SMP kernel that implement the affinity mapping, it will always return NULL in the UP case, so provide a fallback mapping of all queues to CPU 0 in that case. Signed-off-by: Christoph Hellwig Cc: stable@vger.kernel.org Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq-pci.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c index 0c3354cf3552..76944e3271bf 100644 --- a/block/blk-mq-pci.c +++ b/block/blk-mq-pci.c @@ -36,12 +36,18 @@ int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev) for (queue = 0; queue < set->nr_hw_queues; queue++) { mask = pci_irq_get_affinity(pdev, queue); if (!mask) - return -EINVAL; + goto fallback; for_each_cpu(cpu, mask) set->mq_map[cpu] = queue; } return 0; + +fallback: + WARN_ON_ONCE(set->nr_hw_queues > 1); + for_each_possible_cpu(cpu) + set->mq_map[cpu] = 0; + return 0; } EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues); -- cgit From acc8b31665b4cc17b35c4fa445427f7e2f6dc86b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 18 Aug 2017 10:10:43 +0200 Subject: net: sched: fix p_filter_chain check in tcf_chain_flush The dereference before check is wrong and leads to an oops when p_filter_chain is NULL. The check needs to be done on the pointer to prevent NULL dereference. Fixes: f93e1cdcf42c ("net/sched: fix filter flushing") Signed-off-by: Jiri Pirko Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 39da0c5801c9..9fd44c221347 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -205,7 +205,7 @@ static void tcf_chain_flush(struct tcf_chain *chain) { struct tcf_proto *tp; - if (*chain->p_filter_chain) + if (chain->p_filter_chain) RCU_INIT_POINTER(*chain->p_filter_chain, NULL); while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { RCU_INIT_POINTER(chain->filter_chain, tp->next); -- cgit From eac2c68d663effb077210218788952b5a0c1f60e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Aug 2017 12:11:50 +0100 Subject: nfp: fix infinite loop on umapping cleanup The while loop that performs the dma page unmapping never decrements index counter f and hence loops forever. Fix this with a pre-decrement on f. Detected by CoverityScan, CID#1357309 ("Infinite loop") Fixes: 4c3523623dc0 ("net: add driver for Netronome NFP4000/NFP6000 NIC VFs") Signed-off-by: Colin Ian King Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 4631ca8b8eb2..9f77ce038a4a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -908,8 +908,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; err_unmap: - --f; - while (f >= 0) { + while (--f >= 0) { frag = &skb_shinfo(skb)->frags[f]; dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr, skb_frag_size(frag), DMA_TO_DEVICE); -- cgit From a120d9ab65354727559b9db75ded8071b7ef19e2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Aug 2017 14:12:06 +0100 Subject: netxen: fix incorrect loop counter decrement The loop counter k is currently being decremented from zero which is incorrect. Fix this by incrementing k instead Detected by CoverityScan, CID#401847 ("Infinite loop") Fixes: 83f18a557c6d ("netxen_nic: fw dump support") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c index 66ff15d08bad..0a66389c06c2 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c @@ -2311,7 +2311,7 @@ netxen_md_rdqueue(struct netxen_adapter *adapter, loop_cnt++) { NX_WR_DUMP_REG(select_addr, adapter->ahw.pci_base0, queue_id); read_addr = queueEntry->read_addr; - for (k = 0; k < read_cnt; k--) { + for (k = 0; k < read_cnt; k++) { NX_RD_DUMP_REG(read_addr, adapter->ahw.pci_base0, &read_value); *data_buff++ = read_value; -- cgit From 2110ba58303f0c2a03360c5f81fbe67ed312e7b9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 18 Aug 2017 17:11:06 +0200 Subject: bpf, doc: improve sysctl knob description Current context speaking of tcpdump filters is out of date these days, so lets improve the sysctl description for the BPF knobs a bit. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- Documentation/sysctl/net.txt | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index b9c3c6078010..d7c2b88b92ae 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -35,23 +35,32 @@ Table : Subdirectories in /proc/sys/net bpf_jit_enable -------------- -This enables Berkeley Packet Filter Just in Time compiler. - -There are two flavors of JIT, the new eBPF JIT supported on: +This enables the BPF Just in Time (JIT) compiler. BPF is a flexible +and efficient infrastructure allowing to execute bytecode at various +hook points. It is used in a number of Linux kernel subsystems such +as networking (e.g. XDP, tc), tracing (e.g. kprobes, uprobes, tracepoints) +and security (e.g. seccomp). LLVM has a BPF back end that can compile +restricted C into a sequence of BPF instructions. After program load +through bpf(2) and passing a verifier in the kernel, a JIT will then +translate these BPF proglets into native CPU instructions. There are +two flavors of JITs, the newer eBPF JIT currently supported on: - x86_64 - arm64 - ppc64 - sparc64 - mips64 -And the older cBPF JIT supported on: +And the older cBPF JIT supported on the following archs: - arm - mips - ppc - sparc -The BPF JIT provides a framework to speed packet filtering, the one used by -tcpdump/libpcap for example. +eBPF JITs are a superset of cBPF JITs, meaning the kernel will +migrate cBPF instructions into eBPF instructions and then JIT +compile them transparently. Older cBPF JITs can only translate +tcpdump filters, seccomp rules, etc, but not mentioned eBPF +programs loaded through bpf(2). Values : 0 - disable the JIT (default value) @@ -61,9 +70,9 @@ Values : bpf_jit_harden -------------- -This enables hardening for the Berkeley Packet Filter Just in Time compiler. -Supported are eBPF JIT backends. Enabling hardening trades off performance, -but can mitigate JIT spraying. +This enables hardening for the BPF JIT compiler. Supported are eBPF +JIT backends. Enabling hardening trades off performance, but can +mitigate JIT spraying. Values : 0 - disable JIT hardening (default value) 1 - enable JIT hardening for unprivileged users only @@ -72,11 +81,11 @@ Values : bpf_jit_kallsyms ---------------- -When Berkeley Packet Filter Just in Time compiler is enabled, then compiled -images are unknown addresses to the kernel, meaning they neither show up in -traces nor in /proc/kallsyms. This enables export of these addresses, which -can be used for debugging/tracing. If bpf_jit_harden is enabled, this feature -is disabled. +When BPF JIT compiler is enabled, then compiled images are unknown +addresses to the kernel, meaning they neither show up in traces nor +in /proc/kallsyms. This enables export of these addresses, which can +be used for debugging/tracing. If bpf_jit_harden is enabled, this +feature is disabled. Values : 0 - disable JIT kallsyms export (default value) 1 - enable JIT kallsyms export for privileged users only -- cgit From a0917e0bc6efc05834c0c1eafebd579a9c75e6e9 Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Fri, 18 Aug 2017 15:04:54 -0400 Subject: datagram: When peeking datagrams with offset < 0 don't skip empty skbs Due to commit e6afc8ace6dd5cef5e812f26c72579da8806f5ac ("udp: remove headers from UDP packets before queueing"), when udp packets are being peeked the requested extra offset is always 0 as there is no need to skip the udp header. However, when the offset is 0 and the next skb is of length 0, it is only returned once. The behaviour can be seen with the following python script: from socket import *; f=socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK, 0); g=socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK, 0); f.bind(('::', 0)); addr=('::1', f.getsockname()[1]); g.sendto(b'', addr) g.sendto(b'b', addr) print(f.recvfrom(10, MSG_PEEK)); print(f.recvfrom(10, MSG_PEEK)); Where the expected output should be the empty string twice. Instead, make sk_peek_offset return negative values, and pass those values to __skb_try_recv_datagram/__skb_try_recv_from_queue. If the passed offset to __skb_try_recv_from_queue is negative, the checked skb is never skipped. __skb_try_recv_from_queue will then ensure the offset is reset back to 0 if a peek is requested without an offset, unless no packets are found. Also simplify the if condition in __skb_try_recv_from_queue. If _off is greater then 0, and off is greater then or equal to skb->len, then (_off || skb->len) must always be true assuming skb->len >= 0 is always true. Also remove a redundant check around a call to sk_peek_offset in af_unix.c, as it double checked if MSG_PEEK was set in the flags. V2: - Moved the negative fixup into __skb_try_recv_from_queue, and remove now redundant checks - Fix peeking in udp{,v6}_recvmsg to report the right value when the offset is 0 V3: - Marked new branch in __skb_try_recv_from_queue as unlikely. Signed-off-by: Matthew Dawson Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 4 +--- net/core/datagram.c | 12 +++++++++--- net/ipv4/udp.c | 3 ++- net/ipv6/udp.c | 3 ++- net/unix/af_unix.c | 5 +---- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 7c0632c7e870..aeeec62992ca 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -507,9 +507,7 @@ int sk_set_peek_off(struct sock *sk, int val); static inline int sk_peek_offset(struct sock *sk, int flags) { if (unlikely(flags & MSG_PEEK)) { - s32 off = READ_ONCE(sk->sk_peek_off); - if (off >= 0) - return off; + return READ_ONCE(sk->sk_peek_off); } return 0; diff --git a/net/core/datagram.c b/net/core/datagram.c index ee5647bd91b3..a21ca8dee5ea 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -169,14 +169,20 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, int *peeked, int *off, int *err, struct sk_buff **last) { + bool peek_at_off = false; struct sk_buff *skb; - int _off = *off; + int _off = 0; + + if (unlikely(flags & MSG_PEEK && *off >= 0)) { + peek_at_off = true; + _off = *off; + } *last = queue->prev; skb_queue_walk(queue, skb) { if (flags & MSG_PEEK) { - if (_off >= skb->len && (skb->len || _off || - skb->peeked)) { + if (peek_at_off && _off >= skb->len && + (_off || skb->peeked)) { _off -= skb->len; continue; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a7c804f73990..cd1d044a7fa5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1574,7 +1574,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, return ip_recv_error(sk, msg, len, addr_len); try_again: - peeking = off = sk_peek_offset(sk, flags); + peeking = flags & MSG_PEEK; + off = sk_peek_offset(sk, flags); skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 578142b7ca3e..20039c8501eb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -362,7 +362,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: - peeking = off = sk_peek_offset(sk, flags); + peeking = flags & MSG_PEEK; + off = sk_peek_offset(sk, flags); skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7b52a380d710..be8982b4f8c0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2304,10 +2304,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, */ mutex_lock(&u->iolock); - if (flags & MSG_PEEK) - skip = sk_peek_offset(sk, flags); - else - skip = 0; + skip = max(sk_peek_offset(sk, flags), 0); do { int chunk; -- cgit From 739f79fc9db1b38f96b5a5109b247a650fbebf6d Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 18 Aug 2017 15:15:48 -0700 Subject: mm: memcontrol: fix NULL pointer crash in test_clear_page_writeback() Jaegeuk and Brad report a NULL pointer crash when writeback ending tries to update the memcg stats: BUG: unable to handle kernel NULL pointer dereference at 00000000000003b0 IP: test_clear_page_writeback+0x12e/0x2c0 [...] RIP: 0010:test_clear_page_writeback+0x12e/0x2c0 Call Trace: end_page_writeback+0x47/0x70 f2fs_write_end_io+0x76/0x180 [f2fs] bio_endio+0x9f/0x120 blk_update_request+0xa8/0x2f0 scsi_end_request+0x39/0x1d0 scsi_io_completion+0x211/0x690 scsi_finish_command+0xd9/0x120 scsi_softirq_done+0x127/0x150 __blk_mq_complete_request_remote+0x13/0x20 flush_smp_call_function_queue+0x56/0x110 generic_smp_call_function_single_interrupt+0x13/0x30 smp_call_function_single_interrupt+0x27/0x40 call_function_single_interrupt+0x89/0x90 RIP: 0010:native_safe_halt+0x6/0x10 (gdb) l *(test_clear_page_writeback+0x12e) 0xffffffff811bae3e is in test_clear_page_writeback (./include/linux/memcontrol.h:619). 614 mod_node_page_state(page_pgdat(page), idx, val); 615 if (mem_cgroup_disabled() || !page->mem_cgroup) 616 return; 617 mod_memcg_state(page->mem_cgroup, idx, val); 618 pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; 619 this_cpu_add(pn->lruvec_stat->count[idx], val); 620 } 621 622 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, 623 gfp_t gfp_mask, The issue is that writeback doesn't hold a page reference and the page might get freed after PG_writeback is cleared (and the mapping is unlocked) in test_clear_page_writeback(). The stat functions looking up the page's node or zone are safe, as those attributes are static across allocation and free cycles. But page->mem_cgroup is not, and it will get cleared if we race with truncation or migration. It appears this race window has been around for a while, but less likely to trigger when the memcg stats were updated first thing after PG_writeback is cleared. Recent changes reshuffled this code to update the global node stats before the memcg ones, though, stretching the race window out to an extent where people can reproduce the problem. Update test_clear_page_writeback() to look up and pin page->mem_cgroup before clearing PG_writeback, then not use that pointer afterward. It is a partial revert of 62cccb8c8e7a ("mm: simplify lock_page_memcg()") but leaves the pageref-holding callsites that aren't affected alone. Link: http://lkml.kernel.org/r/20170809183825.GA26387@cmpxchg.org Fixes: 62cccb8c8e7a ("mm: simplify lock_page_memcg()") Signed-off-by: Johannes Weiner Reported-by: Jaegeuk Kim Tested-by: Jaegeuk Kim Reported-by: Bradley Bolen Tested-by: Brad Bolen Cc: Vladimir Davydov Cc: Michal Hocko Cc: [4.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ++++++++-- mm/memcontrol.c | 43 +++++++++++++++++++++++++++++++------------ mm/page-writeback.c | 15 ++++++++++++--- 3 files changed, 51 insertions(+), 17 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3914e3dd6168..9b15a4bcfa77 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -484,7 +484,8 @@ bool mem_cgroup_oom_synchronize(bool wait); extern int do_swap_account; #endif -void lock_page_memcg(struct page *page); +struct mem_cgroup *lock_page_memcg(struct page *page); +void __unlock_page_memcg(struct mem_cgroup *memcg); void unlock_page_memcg(struct page *page); static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, @@ -809,7 +810,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void lock_page_memcg(struct page *page) +static inline struct mem_cgroup *lock_page_memcg(struct page *page) +{ + return NULL; +} + +static inline void __unlock_page_memcg(struct mem_cgroup *memcg) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3df3c04d73ab..e09741af816f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1611,9 +1611,13 @@ cleanup: * @page: the page * * This function protects unlocked LRU pages from being moved to - * another cgroup and stabilizes their page->mem_cgroup binding. + * another cgroup. + * + * It ensures lifetime of the returned memcg. Caller is responsible + * for the lifetime of the page; __unlock_page_memcg() is available + * when @page might get freed inside the locked section. */ -void lock_page_memcg(struct page *page) +struct mem_cgroup *lock_page_memcg(struct page *page) { struct mem_cgroup *memcg; unsigned long flags; @@ -1622,18 +1626,24 @@ void lock_page_memcg(struct page *page) * The RCU lock is held throughout the transaction. The fast * path can get away without acquiring the memcg->move_lock * because page moving starts with an RCU grace period. - */ + * + * The RCU lock also protects the memcg from being freed when + * the page state that is going to change is the only thing + * preventing the page itself from being freed. E.g. writeback + * doesn't hold a page reference and relies on PG_writeback to + * keep off truncation, migration and so forth. + */ rcu_read_lock(); if (mem_cgroup_disabled()) - return; + return NULL; again: memcg = page->mem_cgroup; if (unlikely(!memcg)) - return; + return NULL; if (atomic_read(&memcg->moving_account) <= 0) - return; + return memcg; spin_lock_irqsave(&memcg->move_lock, flags); if (memcg != page->mem_cgroup) { @@ -1649,18 +1659,18 @@ again: memcg->move_lock_task = current; memcg->move_lock_flags = flags; - return; + return memcg; } EXPORT_SYMBOL(lock_page_memcg); /** - * unlock_page_memcg - unlock a page->mem_cgroup binding - * @page: the page + * __unlock_page_memcg - unlock and unpin a memcg + * @memcg: the memcg + * + * Unlock and unpin a memcg returned by lock_page_memcg(). */ -void unlock_page_memcg(struct page *page) +void __unlock_page_memcg(struct mem_cgroup *memcg) { - struct mem_cgroup *memcg = page->mem_cgroup; - if (memcg && memcg->move_lock_task == current) { unsigned long flags = memcg->move_lock_flags; @@ -1672,6 +1682,15 @@ void unlock_page_memcg(struct page *page) rcu_read_unlock(); } + +/** + * unlock_page_memcg - unlock a page->mem_cgroup binding + * @page: the page + */ +void unlock_page_memcg(struct page *page) +{ + __unlock_page_memcg(page->mem_cgroup); +} EXPORT_SYMBOL(unlock_page_memcg); /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 96e93b214d31..bf050ab025b7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2724,9 +2724,12 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); + struct mem_cgroup *memcg; + struct lruvec *lruvec; int ret; - lock_page_memcg(page); + memcg = lock_page_memcg(page); + lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); if (mapping && mapping_use_writeback_tags(mapping)) { struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); @@ -2754,12 +2757,18 @@ int test_clear_page_writeback(struct page *page) } else { ret = TestClearPageWriteback(page); } + /* + * NOTE: Page might be free now! Writeback doesn't hold a page + * reference on its own, it relies on truncation to wait for + * the clearing of PG_writeback. The below can only access + * page state that is static across allocation cycles. + */ if (ret) { - dec_lruvec_page_state(page, NR_WRITEBACK); + dec_lruvec_state(lruvec, NR_WRITEBACK); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); inc_node_page_state(page, NR_WRITTEN); } - unlock_page_memcg(page); + __unlock_page_memcg(memcg); return ret; } -- cgit From 92e5aae457787d0bc6b255200d2fb116edf69794 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 Aug 2017 15:15:51 -0700 Subject: kernel/watchdog: fix Kconfig constraints for perf hardlockup watchdog Commit 05a4a9527931 ("kernel/watchdog: split up config options") lost the perf-based hardlockup detector's dependency on PERF_EVENTS, which can result in broken builds with some powerpc configurations. Restore the dependency. Add it in for x86 too, despite x86 always selecting PERF_EVENTS it seems reasonable to make the dependency explicit. Link: http://lkml.kernel.org/r/20170810114452.6673-1-npiggin@gmail.com Fixes: 05a4a9527931 ("kernel/watchdog: split up config options") Signed-off-by: Nicholas Piggin Acked-by: Don Zickus Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/Kconfig | 2 +- arch/x86/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 36f858c37ca7..81b0031f909f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -199,7 +199,7 @@ config PPC select HAVE_OPTPROBES if PPC64 select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 - select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_RCU_TABLE_FREE if SMP diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 781521b7cf9e..29a1bf85e507 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -163,7 +163,7 @@ config X86 select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI - select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API -- cgit From 8ada92799ec4de00f4bc0f10b1ededa256c1ab22 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 18 Aug 2017 15:15:55 -0700 Subject: wait: add wait_event_killable_timeout() These are the few pending fixes I have queued up for v4.13-final. One is a a generic regression fix for recursive loops on kmod and the other one is a trivial print out correction. During the v4.13 development we assumed that recursive kmod loops were no longer possible. Clearly that is not true. The regression fix makes use of a new killable wait. We use a killable wait to be paranoid in how signals might be sent to modprobe and only accept a proper SIGKILL. The signal will only be available to userspace to issue *iff* a thread has already entered a wait state, and that happens only if we've already throttled after 50 kmod threads have been hit. Note that although it may seem excessive to trigger a failure afer 5 seconds if all kmod thread remain busy, prior to the series of changes that went into v4.13 we would actually *always* fatally fail any request which came in if the limit was already reached. The new waiting implemented in v4.13 actually gives us *more* breathing room -- the wait for 5 seconds is a wait for *any* kmod thread to finish. We give up and fail *iff* no kmod thread has finished and they're *all* running straight for 5 consecutive seconds. If 50 kmod threads are running consecutively for 5 seconds something else must be really bad. Recursive loops with kmod are bad but they're also hard to implement properly as a selftest without currently fooling current userspace tools like kmod [1]. For instance kmod will complain when you run depmod if it finds a recursive loop with symbol dependency between modules as such this type of recursive loop cannot go upstream as the modules_install target will fail after running depmod. These tests already exist on userspace kmod upstream though (refer to the testsuite/module-playground/mod-loop-*.c files). The same is not true if request_module() is used though, or worst if aliases are used. Likewise the issue with 64-bit kernels booting 32-bit userspace without a binfmt handler built-in is also currently not detected and proactively avoided by userspace kmod tools, or kconfig for all architectures. Although we could complain in the kernel when some of these individual recursive issues creep up, proactively avoiding these situations in userspace at build time is what we should keep striving for. Lastly, since recursive loops could happen with kmod it may mean recursive loops may also be possible with other kernel usermode helpers, this should be investigated and long term if we can come up with a more sensible generic solution even better! [0] https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git/log/?h=20170809-kmod-for-v4.13-final [1] https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git This patch (of 3): This wait is similar to wait_event_interruptible_timeout() but only accepts SIGKILL interrupt signal. Other signals are ignored. Link: http://lkml.kernel.org/r/20170809234635.13443-2-mcgrof@kernel.org Signed-off-by: Luis R. Rodriguez Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Kees Cook Cc: Dmitry Torokhov Cc: Jessica Yu Cc: Rusty Russell Cc: Michal Marek Cc: Petr Mladek Cc: Miroslav Benes Cc: Josh Poimboeuf Cc: "Eric W. Biederman" Cc: Shuah Khan Cc: Matt Redfearn Cc: Dan Carpenter Cc: Colin Ian King Cc: Daniel Mentz Cc: David Binderman Cc: Matt Redfearn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/wait.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/include/linux/wait.h b/include/linux/wait.h index 5b74e36c0ca8..dc19880c02f5 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -757,6 +757,43 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); __ret; \ }) +#define __wait_event_killable_timeout(wq_head, condition, timeout) \ + ___wait_event(wq_head, ___wait_cond_timeout(condition), \ + TASK_KILLABLE, 0, timeout, \ + __ret = schedule_timeout(__ret)) + +/** + * wait_event_killable_timeout - sleep until a condition gets true or a timeout elapses + * @wq_head: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_KILLABLE) until the + * @condition evaluates to true or a kill signal is received. + * The @condition is checked each time the waitqueue @wq_head is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * Returns: + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was + * interrupted by a kill signal. + * + * Only kill signals interrupt this process. + */ +#define wait_event_killable_timeout(wq_head, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_killable_timeout(wq_head, \ + condition, timeout); \ + __ret; \ +}) + #define __wait_event_lock_irq(wq_head, condition, lock, cmd) \ (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ -- cgit From 2ba293c9e7db150943f06b12d3eb7213e7fae624 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 18 Aug 2017 15:15:58 -0700 Subject: kmod: fix wait on recursive loop Recursive loops with module loading were previously handled in kmod by restricting the number of modprobe calls to 50 and if that limit was breached request_module() would return an error and a user would see the following on their kernel dmesg: request_module: runaway loop modprobe binfmt-464c Starting init:/sbin/init exists but couldn't execute it (error -8) This issue could happen for instance when a 64-bit kernel boots a 32-bit userspace on some architectures and has no 32-bit binary format hanlders. This is visible, for instance, when a CONFIG_MODULES enabled 64-bit MIPS kernel boots a into o32 root filesystem and the binfmt handler for o32 binaries is not built-in. After commit 6d7964a722af ("kmod: throttle kmod thread limit") we now don't have any visible signs of an error and the kernel just waits for the loop to end somehow. Although this *particular* recursive loop could also be addressed by doing a sanity check on search_binary_handler() and disallowing a modular binfmt to be required for modprobe, a generic solution for any recursive kernel kmod issues is still needed. This should catch these loops. We can investigate each loop and address each one separately as they come in, this however puts a stop gap for them as before. Link: http://lkml.kernel.org/r/20170809234635.13443-3-mcgrof@kernel.org Fixes: 6d7964a722af ("kmod: throttle kmod thread limit") Signed-off-by: Luis R. Rodriguez Reported-by: Matt Redfearn Tested-by: Matt Redfearn Cc: "Eric W. Biederman" Cc: Colin Ian King Cc: Dan Carpenter Cc: Daniel Mentz Cc: David Binderman Cc: Dmitry Torokhov Cc: Ingo Molnar Cc: Jessica Yu Cc: Josh Poimboeuf Cc: Kees Cook Cc: Michal Marek Cc: Miroslav Benes Cc: Peter Zijlstra (Intel) Cc: Petr Mladek Cc: Rusty Russell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kmod.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/kernel/kmod.c b/kernel/kmod.c index 6d016c5d97c8..2f37acde640b 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -70,6 +70,18 @@ static DECLARE_RWSEM(umhelper_sem); static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT); static DECLARE_WAIT_QUEUE_HEAD(kmod_wq); +/* + * This is a restriction on having *all* MAX_KMOD_CONCURRENT threads + * running at the same time without returning. When this happens we + * believe you've somehow ended up with a recursive module dependency + * creating a loop. + * + * We have no option but to fail. + * + * Userspace should proactively try to detect and prevent these. + */ +#define MAX_KMOD_ALL_BUSY_TIMEOUT 5 + /* modprobe_path is set via /proc/sys. */ @@ -167,8 +179,17 @@ int __request_module(bool wait, const char *fmt, ...) pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...", atomic_read(&kmod_concurrent_max), MAX_KMOD_CONCURRENT, module_name); - wait_event_interruptible(kmod_wq, - atomic_dec_if_positive(&kmod_concurrent_max) >= 0); + ret = wait_event_killable_timeout(kmod_wq, + atomic_dec_if_positive(&kmod_concurrent_max) >= 0, + MAX_KMOD_ALL_BUSY_TIMEOUT * HZ); + if (!ret) { + pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now", + module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT); + return -ETIME; + } else if (ret == -ERESTARTSYS) { + pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name); + return ret; + } } trace_module_request(module_name, wait, _RET_IP_); -- cgit From 768dc4e48420955518974d8486c1b00ec05e7274 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 18 Aug 2017 15:16:02 -0700 Subject: test_kmod: fix description for -s -and -c parameters The descriptions were reversed, correct this. Link: http://lkml.kernel.org/r/20170809234635.13443-4-mcgrof@kernel.org Fixes: 64b671204afd71 ("test_sysctl: add generic script to expand on tests") Signed-off-by: Luis R. Rodriguez Reported-by: Daniel Mentz Cc: "Eric W. Biederman" Cc: Colin Ian King Cc: Dan Carpenter Cc: David Binderman Cc: Dmitry Torokhov Cc: Ingo Molnar Cc: Jessica Yu Cc: Josh Poimboeuf Cc: Kees Cook Cc: Matt Redfearn Cc: Matt Redfearn Cc: Michal Marek Cc: Miroslav Benes Cc: Peter Zijlstra (Intel) Cc: Petr Mladek Cc: Rusty Russell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/kmod/kmod.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 8cecae9a8bca..7956ea3be667 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -473,8 +473,8 @@ usage() echo " all Runs all tests (default)" echo " -t Run test ID the number amount of times is recommended" echo " -w Watch test ID run until it runs into an error" - echo " -c Run test ID once" - echo " -s Run test ID x test-count number of times" + echo " -s Run test ID once" + echo " -c Run test ID x test-count number of times" echo " -l List all test ID list" echo " -h|--help Help" echo -- cgit From 3010f876500f9ba921afaeccec30c45ca6584dc8 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 18 Aug 2017 15:16:05 -0700 Subject: mm: discard memblock data later There is existing use after free bug when deferred struct pages are enabled: The memblock_add() allocates memory for the memory array if more than 128 entries are needed. See comment in e820__memblock_setup(): * The bootstrap memblock region count maximum is 128 entries * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries * than that - so allow memblock resizing. This memblock memory is freed here: free_low_memory_core_early() We access the freed memblock.memory later in boot when deferred pages are initialized in this path: deferred_init_memmap() for_each_mem_pfn_range() __next_mem_pfn_range() type = &memblock.memory; One possible explanation for why this use-after-free hasn't been hit before is that the limit of INIT_MEMBLOCK_REGIONS has never been exceeded at least on systems where deferred struct pages were enabled. Tested by reducing INIT_MEMBLOCK_REGIONS down to 4 from the current 128, and verifying in qemu that this code is getting excuted and that the freed pages are sane. Link: http://lkml.kernel.org/r/1502485554-318703-2-git-send-email-pasha.tatashin@oracle.com Fixes: 7e18adb4f80b ("mm: meminit: initialise remaining struct pages in parallel with kswapd") Signed-off-by: Pavel Tatashin Reviewed-by: Steven Sistare Reviewed-by: Daniel Jordan Reviewed-by: Bob Picco Acked-by: Michal Hocko Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 6 ++++-- mm/memblock.c | 38 +++++++++++++++++--------------------- mm/nobootmem.c | 16 ---------------- mm/page_alloc.c | 4 ++++ 4 files changed, 25 insertions(+), 39 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 77d427974f57..bae11c7e7bf3 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -61,6 +61,7 @@ extern int memblock_debug; #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK #define __init_memblock __meminit #define __initdata_memblock __meminitdata +void memblock_discard(void); #else #define __init_memblock #define __initdata_memblock @@ -74,8 +75,6 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, int nid, ulong flags); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); -phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); -phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr); void memblock_allow_resize(void); int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); @@ -110,6 +109,9 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags, void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, phys_addr_t *out_end); +void __memblock_free_early(phys_addr_t base, phys_addr_t size); +void __memblock_free_late(phys_addr_t base, phys_addr_t size); + /** * for_each_mem_range - iterate through memblock areas from type_a and not * included in type_b. Or just type_a if type_b is NULL. diff --git a/mm/memblock.c b/mm/memblock.c index 2cb25fe4452c..bf14aea6ab70 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -285,31 +285,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u } #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK - -phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( - phys_addr_t *addr) -{ - if (memblock.reserved.regions == memblock_reserved_init_regions) - return 0; - - *addr = __pa(memblock.reserved.regions); - - return PAGE_ALIGN(sizeof(struct memblock_region) * - memblock.reserved.max); -} - -phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info( - phys_addr_t *addr) +/** + * Discard memory and reserved arrays if they were allocated + */ +void __init memblock_discard(void) { - if (memblock.memory.regions == memblock_memory_init_regions) - return 0; + phys_addr_t addr, size; - *addr = __pa(memblock.memory.regions); + if (memblock.reserved.regions != memblock_reserved_init_regions) { + addr = __pa(memblock.reserved.regions); + size = PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.reserved.max); + __memblock_free_late(addr, size); + } - return PAGE_ALIGN(sizeof(struct memblock_region) * - memblock.memory.max); + if (memblock.memory.regions == memblock_memory_init_regions) { + addr = __pa(memblock.memory.regions); + size = PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.memory.max); + __memblock_free_late(addr, size); + } } - #endif /** diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 36454d0f96ee..3637809a18d0 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -146,22 +146,6 @@ static unsigned long __init free_low_memory_core_early(void) NULL) count += __free_memory_core(start, end); -#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK - { - phys_addr_t size; - - /* Free memblock.reserved array if it was allocated */ - size = get_allocated_memblock_reserved_regions_info(&start); - if (size) - count += __free_memory_core(start, start + size); - - /* Free memblock.memory array if it was allocated */ - size = get_allocated_memblock_memory_regions_info(&start); - if (size) - count += __free_memory_core(start, start + size); - } -#endif - return count; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6d00f746c2fd..1bad301820c7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1584,6 +1584,10 @@ void __init page_alloc_init_late(void) /* Reinit limits that are based on free pages after the kernel is up */ files_maxfiles_init(); #endif +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK + /* Discard memblock private memory */ + memblock_discard(); +#endif for_each_populated_zone(zone) set_zone_contiguous(zone); -- cgit From f6ba488073fe8159851fe398cc3c5ee383bb4c7a Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Fri, 18 Aug 2017 15:16:08 -0700 Subject: slub: fix per memcg cache leak on css offline To avoid a possible deadlock, sysfs_slab_remove() schedules an asynchronous work to delete sysfs entries corresponding to the kmem cache. To ensure the cache isn't freed before the work function is called, it takes a reference to the cache kobject. The reference is supposed to be released by the work function. However, the work function (sysfs_slab_remove_workfn()) does nothing in case the cache sysfs entry has already been deleted, leaking the kobject and the corresponding cache. This may happen on a per memcg cache destruction, because sysfs entries of a per memcg cache are deleted on memcg offline if the cache is empty (see __kmemcg_cache_deactivate()). The kmemleak report looks like this: unreferenced object 0xffff9f798a79f540 (size 32): comm "kworker/1:4", pid 15416, jiffies 4307432429 (age 28687.554s) hex dump (first 32 bytes): 6b 6d 61 6c 6c 6f 63 2d 31 36 28 31 35 39 39 3a kmalloc-16(1599: 6e 65 77 72 6f 6f 74 29 00 23 6b c0 ff ff ff ff newroot).#k..... backtrace: kmemleak_alloc+0x4a/0xa0 __kmalloc_track_caller+0x148/0x2c0 kvasprintf+0x66/0xd0 kasprintf+0x49/0x70 memcg_create_kmem_cache+0xe6/0x160 memcg_kmem_cache_create_func+0x20/0x110 process_one_work+0x205/0x5d0 worker_thread+0x4e/0x3a0 kthread+0x109/0x140 ret_from_fork+0x2a/0x40 unreferenced object 0xffff9f79b6136840 (size 416): comm "kworker/1:4", pid 15416, jiffies 4307432429 (age 28687.573s) hex dump (first 32 bytes): 40 fb 80 c2 3e 33 00 00 00 00 00 40 00 00 00 00 @...>3.....@.... 00 00 00 00 00 00 00 00 10 00 00 00 10 00 00 00 ................ backtrace: kmemleak_alloc+0x4a/0xa0 kmem_cache_alloc+0x128/0x280 create_cache+0x3b/0x1e0 memcg_create_kmem_cache+0x118/0x160 memcg_kmem_cache_create_func+0x20/0x110 process_one_work+0x205/0x5d0 worker_thread+0x4e/0x3a0 kthread+0x109/0x140 ret_from_fork+0x2a/0x40 Fix the leak by adding the missing call to kobject_put() to sysfs_slab_remove_workfn(). Link: http://lkml.kernel.org/r/20170812181134.25027-1-vdavydov.dev@gmail.com Fixes: 3b7b314053d02 ("slub: make sysfs file removal asynchronous") Signed-off-by: Vladimir Davydov Reported-by: Andrei Vagin Tested-by: Andrei Vagin Acked-by: Tejun Heo Acked-by: David Rientjes Cc: Michal Hocko Cc: Johannes Weiner Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: [4.12.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 1d3f9835f4ea..e8b4e31162ca 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5642,13 +5642,14 @@ static void sysfs_slab_remove_workfn(struct work_struct *work) * A cache is never shut down before deactivation is * complete, so no need to worry about synchronization. */ - return; + goto out; #ifdef CONFIG_MEMCG kset_unregister(s->memcg_kset); #endif kobject_uevent(&s->kobj, KOBJ_REMOVE); kobject_del(&s->kobj); +out: kobject_put(&s->kobj); } -- cgit From 5b53a6ea886700a128b697a6fe8375340dea2c30 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 18 Aug 2017 15:16:12 -0700 Subject: mm: fix double mmap_sem unlock on MMF_UNSTABLE enforced SIGBUS Tetsuo Handa has noticed that MMF_UNSTABLE SIGBUS path in handle_mm_fault causes a lockdep splat Out of memory: Kill process 1056 (a.out) score 603 or sacrifice child Killed process 1056 (a.out) total-vm:4268108kB, anon-rss:2246048kB, file-rss:0kB, shmem-rss:0kB a.out (1169) used greatest stack depth: 11664 bytes left DEBUG_LOCKS_WARN_ON(depth <= 0) ------------[ cut here ]------------ WARNING: CPU: 6 PID: 1339 at kernel/locking/lockdep.c:3617 lock_release+0x172/0x1e0 CPU: 6 PID: 1339 Comm: a.out Not tainted 4.13.0-rc3-next-20170803+ #142 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 RIP: 0010:lock_release+0x172/0x1e0 Call Trace: up_read+0x1a/0x40 __do_page_fault+0x28e/0x4c0 do_page_fault+0x30/0x80 page_fault+0x28/0x30 The reason is that the page fault path might have dropped the mmap_sem and returned with VM_FAULT_RETRY. MMF_UNSTABLE check however rewrites the error path to VM_FAULT_SIGBUS and we always expect mmap_sem taken in that path. Fix this by taking mmap_sem when VM_FAULT_RETRY is held in the MMF_UNSTABLE path. We cannot simply add VM_FAULT_SIGBUS to the existing error code because all arch specific page fault handlers and g-u-p would have to learn a new error code combination. Link: http://lkml.kernel.org/r/20170807113839.16695-2-mhocko@kernel.org Fixes: 3f70dc38cec2 ("mm: make sure that kthreads will not refault oom reaped memory") Reported-by: Tetsuo Handa Signed-off-by: Michal Hocko Acked-by: David Rientjes Cc: Andrea Argangeli Cc: "Kirill A. Shutemov" Cc: Oleg Nesterov Cc: Wenwei Tao Cc: [4.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index e158f7ac6730..c717b5bcc80e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3910,8 +3910,18 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, * further. */ if (unlikely((current->flags & PF_KTHREAD) && !(ret & VM_FAULT_ERROR) - && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags))) + && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags))) { + + /* + * We are going to enforce SIGBUS but the PF path might have + * dropped the mmap_sem already so take it again so that + * we do not break expectations of all arch specific PF paths + * and g-u-p + */ + if (ret & VM_FAULT_RETRY) + down_read(&vma->vm_mm->mmap_sem); ret = VM_FAULT_SIGBUS; + } return ret; } -- cgit From 6b31d5955cb29a51c5baffee382f213d75e98fb8 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 18 Aug 2017 15:16:15 -0700 Subject: mm, oom: fix potential data corruption when oom_reaper races with writer Wenwei Tao has noticed that our current assumption that the oom victim is dying and never doing any visible changes after it dies, and so the oom_reaper can tear it down, is not entirely true. __task_will_free_mem consider a task dying when SIGNAL_GROUP_EXIT is set but do_group_exit sends SIGKILL to all threads _after_ the flag is set. So there is a race window when some threads won't have fatal_signal_pending while the oom_reaper could start unmapping the address space. Moreover some paths might not check for fatal signals before each PF/g-u-p/copy_from_user. We already have a protection for oom_reaper vs. PF races by checking MMF_UNSTABLE. This has been, however, checked only for kernel threads (use_mm users) which can outlive the oom victim. A simple fix would be to extend the current check in handle_mm_fault for all tasks but that wouldn't be sufficient because the current check assumes that a kernel thread would bail out after EFAULT from get_user*/copy_from_user and never re-read the same address which would succeed because the PF path has established page tables already. This seems to be the case for the only existing use_mm user currently (virtio driver) but it is rather fragile in general. This is even more fragile in general for more complex paths such as generic_perform_write which can re-read the same address more times (e.g. iov_iter_copy_from_user_atomic to fail and then iov_iter_fault_in_readable on retry). Therefore we have to implement MMF_UNSTABLE protection in a robust way and never make a potentially corrupted content visible. That requires to hook deeper into the PF path and check for the flag _every time_ before a pte for anonymous memory is established (that means all !VM_SHARED mappings). The corruption can be triggered artificially (http://lkml.kernel.org/r/201708040646.v746kkhC024636@www262.sakura.ne.jp) but there doesn't seem to be any real life bug report. The race window should be quite tight to trigger most of the time. Link: http://lkml.kernel.org/r/20170807113839.16695-3-mhocko@kernel.org Fixes: aac453635549 ("mm, oom: introduce oom reaper") Signed-off-by: Michal Hocko Reported-by: Wenwei Tao Tested-by: Tetsuo Handa Cc: "Kirill A. Shutemov" Cc: Andrea Argangeli Cc: David Rientjes Cc: Oleg Nesterov Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 22 ++++++++++++++++++++++ mm/huge_memory.c | 30 ++++++++++++++++++++++-------- mm/memory.c | 46 ++++++++++++++++++++-------------------------- 3 files changed, 64 insertions(+), 34 deletions(-) diff --git a/include/linux/oom.h b/include/linux/oom.h index 8a266e2be5a6..76aac4ce39bc 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -6,6 +6,8 @@ #include #include #include +#include /* MMF_* */ +#include /* VM_FAULT* */ struct zonelist; struct notifier_block; @@ -63,6 +65,26 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk) return tsk->signal->oom_mm; } +/* + * Checks whether a page fault on the given mm is still reliable. + * This is no longer true if the oom reaper started to reap the + * address space which is reflected by MMF_UNSTABLE flag set in + * the mm. At that moment any !shared mapping would lose the content + * and could cause a memory corruption (zero pages instead of the + * original content). + * + * User should call this before establishing a page table entry for + * a !shared mapping and under the proper page table lock. + * + * Return 0 when the PF is safe VM_FAULT_SIGBUS otherwise. + */ +static inline int check_stable_address_space(struct mm_struct *mm) +{ + if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags))) + return VM_FAULT_SIGBUS; + return 0; +} + extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 216114f6ef0b..90731e3b7e58 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -550,6 +551,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, struct mem_cgroup *memcg; pgtable_t pgtable; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; + int ret = 0; VM_BUG_ON_PAGE(!PageCompound(page), page); @@ -561,9 +563,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, pgtable = pte_alloc_one(vma->vm_mm, haddr); if (unlikely(!pgtable)) { - mem_cgroup_cancel_charge(page, memcg, true); - put_page(page); - return VM_FAULT_OOM; + ret = VM_FAULT_OOM; + goto release; } clear_huge_page(page, haddr, HPAGE_PMD_NR); @@ -576,13 +577,14 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_none(*vmf->pmd))) { - spin_unlock(vmf->ptl); - mem_cgroup_cancel_charge(page, memcg, true); - put_page(page); - pte_free(vma->vm_mm, pgtable); + goto unlock_release; } else { pmd_t entry; + ret = check_stable_address_space(vma->vm_mm); + if (ret) + goto unlock_release; + /* Deliver the page fault to userland */ if (userfaultfd_missing(vma)) { int ret; @@ -610,6 +612,15 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, } return 0; +unlock_release: + spin_unlock(vmf->ptl); +release: + if (pgtable) + pte_free(vma->vm_mm, pgtable); + mem_cgroup_cancel_charge(page, memcg, true); + put_page(page); + return ret; + } /* @@ -688,7 +699,10 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf) ret = 0; set = false; if (pmd_none(*vmf->pmd)) { - if (userfaultfd_missing(vma)) { + ret = check_stable_address_space(vma->vm_mm); + if (ret) { + spin_unlock(vmf->ptl); + } else if (userfaultfd_missing(vma)) { spin_unlock(vmf->ptl); ret = handle_userfault(vmf, VM_UFFD_MISSING); VM_BUG_ON(ret & VM_FAULT_FALLBACK); diff --git a/mm/memory.c b/mm/memory.c index c717b5bcc80e..fe2fba27ded2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -2893,6 +2894,7 @@ static int do_anonymous_page(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct mem_cgroup *memcg; struct page *page; + int ret = 0; pte_t entry; /* File mapping without ->vm_ops ? */ @@ -2925,6 +2927,9 @@ static int do_anonymous_page(struct vm_fault *vmf) vmf->address, &vmf->ptl); if (!pte_none(*vmf->pte)) goto unlock; + ret = check_stable_address_space(vma->vm_mm); + if (ret) + goto unlock; /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -2959,6 +2964,10 @@ static int do_anonymous_page(struct vm_fault *vmf) if (!pte_none(*vmf->pte)) goto release; + ret = check_stable_address_space(vma->vm_mm); + if (ret) + goto release; + /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -2978,7 +2987,7 @@ setpte: update_mmu_cache(vma, vmf->address, vmf->pte); unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; + return ret; release: mem_cgroup_cancel_charge(page, memcg, false); put_page(page); @@ -3252,7 +3261,7 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, int finish_fault(struct vm_fault *vmf) { struct page *page; - int ret; + int ret = 0; /* Did we COW the page? */ if ((vmf->flags & FAULT_FLAG_WRITE) && @@ -3260,7 +3269,15 @@ int finish_fault(struct vm_fault *vmf) page = vmf->cow_page; else page = vmf->page; - ret = alloc_set_pte(vmf, vmf->memcg, page); + + /* + * check even for read faults because we might have lost our CoWed + * page + */ + if (!(vmf->vma->vm_flags & VM_SHARED)) + ret = check_stable_address_space(vmf->vma->vm_mm); + if (!ret) + ret = alloc_set_pte(vmf, vmf->memcg, page); if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); return ret; @@ -3900,29 +3917,6 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, mem_cgroup_oom_synchronize(false); } - /* - * This mm has been already reaped by the oom reaper and so the - * refault cannot be trusted in general. Anonymous refaults would - * lose data and give a zero page instead e.g. This is especially - * problem for use_mm() because regular tasks will just die and - * the corrupted data will not be visible anywhere while kthread - * will outlive the oom victim and potentially propagate the date - * further. - */ - if (unlikely((current->flags & PF_KTHREAD) && !(ret & VM_FAULT_ERROR) - && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags))) { - - /* - * We are going to enforce SIGBUS but the PF path might have - * dropped the mmap_sem already so take it again so that - * we do not break expectations of all arch specific PF paths - * and g-u-p - */ - if (ret & VM_FAULT_RETRY) - down_read(&vma->vm_mm->mmap_sem); - ret = VM_FAULT_SIGBUS; - } - return ret; } EXPORT_SYMBOL_GPL(handle_mm_fault); -- cgit From eb61b5911bdc923875cde99eb25203a0e2b06d43 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Fri, 18 Aug 2017 15:16:18 -0700 Subject: signal: don't remove SIGNAL_UNKILLABLE for traced tasks. When forcing a signal, SIGNAL_UNKILLABLE is removed to prevent recursive faults, but this is undesirable when tracing. For example, debugging an init process (whether global or namespace), hitting a breakpoint and SIGTRAP will force SIGTRAP and then remove SIGNAL_UNKILLABLE. Everything continues fine, but then once debugging has finished, the init process is left killable which is unlikely what the user expects, resulting in either an accidentally killed init or an init that stops reaping zombies. Link: http://lkml.kernel.org/r/20170815112806.10728-1-jamie.iles@oracle.com Signed-off-by: Jamie Iles Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index 7e33f8c583e6..ed804a470dcd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1194,7 +1194,11 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) recalc_sigpending_and_wake(t); } } - if (action->sa.sa_handler == SIG_DFL) + /* + * Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect + * debugging to leave init killable. + */ + if (action->sa.sa_handler == SIG_DFL && !t->ptrace) t->signal->flags &= ~SIGNAL_UNKILLABLE; ret = specific_send_sig_info(sig, info, t); spin_unlock_irqrestore(&t->sighand->siglock, flags); -- cgit From da094e42848e3c36feaa3b5271e53983fd45424f Mon Sep 17 00:00:00 2001 From: Prakash Gupta Date: Fri, 18 Aug 2017 15:16:21 -0700 Subject: mm/cma_debug.c: fix stack corruption due to sprintf usage name[] in cma_debugfs_add_one() can only accommodate 16 chars including NULL to store sprintf output. It's common for cma device name to be larger than 15 chars. This can cause stack corrpution. If the gcc stack protector is turned on, this can cause a panic due to stack corruption. Below is one example trace: Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffff8e69a75730 Call trace: dump_backtrace+0x0/0x2c4 show_stack+0x20/0x28 dump_stack+0xb8/0xf4 panic+0x154/0x2b0 print_tainted+0x0/0xc0 cma_debugfs_init+0x274/0x290 do_one_initcall+0x5c/0x168 kernel_init_freeable+0x1c8/0x280 Fix the short sprintf buffer in cma_debugfs_add_one() by using scnprintf() instead of sprintf(). Link: http://lkml.kernel.org/r/1502446217-21840-1-git-send-email-guptap@codeaurora.org Fixes: f318dd083c81 ("cma: Store a name in the cma structure") Signed-off-by: Prakash Gupta Acked-by: Laura Abbott Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/cma_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/cma_debug.c b/mm/cma_debug.c index 595b757bef72..c03ccbc405a0 100644 --- a/mm/cma_debug.c +++ b/mm/cma_debug.c @@ -167,7 +167,7 @@ static void cma_debugfs_add_one(struct cma *cma, int idx) char name[16]; int u32s; - sprintf(name, "cma-%s", cma->name); + scnprintf(name, sizeof(name), "cma-%s", cma->name); tmp = debugfs_create_dir(name, cma_debugfs_root); -- cgit From 73223e4e2e3867ebf033a5a8eb2e5df0158ccc99 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Fri, 18 Aug 2017 15:16:24 -0700 Subject: mm/mempolicy: fix use after free when calling get_mempolicy I hit a use after free issue when executing trinity and repoduced it with KASAN enabled. The related call trace is as follows. BUG: KASan: use after free in SyS_get_mempolicy+0x3c8/0x960 at addr ffff8801f582d766 Read of size 2 by task syz-executor1/798 INFO: Allocated in mpol_new.part.2+0x74/0x160 age=3 cpu=1 pid=799 __slab_alloc+0x768/0x970 kmem_cache_alloc+0x2e7/0x450 mpol_new.part.2+0x74/0x160 mpol_new+0x66/0x80 SyS_mbind+0x267/0x9f0 system_call_fastpath+0x16/0x1b INFO: Freed in __mpol_put+0x2b/0x40 age=4 cpu=1 pid=799 __slab_free+0x495/0x8e0 kmem_cache_free+0x2f3/0x4c0 __mpol_put+0x2b/0x40 SyS_mbind+0x383/0x9f0 system_call_fastpath+0x16/0x1b INFO: Slab 0xffffea0009cb8dc0 objects=23 used=8 fp=0xffff8801f582de40 flags=0x200000000004080 INFO: Object 0xffff8801f582d760 @offset=5984 fp=0xffff8801f582d600 Bytes b4 ffff8801f582d750: ae 01 ff ff 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ Object ffff8801f582d760: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object ffff8801f582d770: 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkk. Redzone ffff8801f582d778: bb bb bb bb bb bb bb bb ........ Padding ffff8801f582d8b8: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ Memory state around the buggy address: ffff8801f582d600: fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8801f582d680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8801f582d700: fc fc fc fc fc fc fc fc fc fc fc fc fb fb fb fc !shared memory policy is not protected against parallel removal by other thread which is normally protected by the mmap_sem. do_get_mempolicy, however, drops the lock midway while we can still access it later. Early premature up_read is a historical artifact from times when put_user was called in this path see https://lwn.net/Articles/124754/ but that is gone since 8bccd85ffbaf ("[PATCH] Implement sys_* do_* layering in the memory policy layer."). but when we have the the current mempolicy ref count model. The issue was introduced accordingly. Fix the issue by removing the premature release. Link: http://lkml.kernel.org/r/1502950924-27521-1-git-send-email-zhongjiang@huawei.com Signed-off-by: zhong jiang Acked-by: Michal Hocko Cc: Minchan Kim Cc: Vlastimil Babka Cc: David Rientjes Cc: Mel Gorman Cc: [2.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d911fa5cb2a7..618ab125228b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -861,11 +861,6 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, *policy |= (pol->flags & MPOL_MODE_FLAGS); } - if (vma) { - up_read(¤t->mm->mmap_sem); - vma = NULL; - } - err = 0; if (nmask) { if (mpol_store_user_nodemask(pol)) { -- cgit From 704b862f9efd6d4c87a8d0a344dda19bda9c6b69 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 18 Aug 2017 15:16:27 -0700 Subject: mm/vmalloc.c: don't unconditonally use __GFP_HIGHMEM Commit 19809c2da28a ("mm, vmalloc: use __GFP_HIGHMEM implicitly") added use of __GFP_HIGHMEM for allocations. vmalloc_32 may use GFP_DMA/GFP_DMA32 which does not play nice with __GFP_HIGHMEM and will trigger a BUG in gfp_zone. Only add __GFP_HIGHMEM if we aren't using GFP_DMA/GFP_DMA32. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1482249 Link: http://lkml.kernel.org/r/20170816220705.31374-1-labbott@redhat.com Fixes: 19809c2da28a ("mm, vmalloc: use __GFP_HIGHMEM implicitly") Signed-off-by: Laura Abbott Acked-by: Michal Hocko Cc: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8698c1c86c4d..a47e3894c775 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1671,7 +1671,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, struct page **pages; unsigned int nr_pages, array_size, i; const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; - const gfp_t alloc_mask = gfp_mask | __GFP_HIGHMEM | __GFP_NOWARN; + const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN; + const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ? + 0 : + __GFP_HIGHMEM; nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *)); @@ -1679,7 +1682,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, area->nr_pages = nr_pages; /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { - pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, + pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask, PAGE_KERNEL, node, area->caller); } else { pages = kmalloc_node(array_size, nested_gfp, node); @@ -1700,9 +1703,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, } if (node == NUMA_NO_NODE) - page = alloc_page(alloc_mask); + page = alloc_page(alloc_mask|highmem_mask); else - page = alloc_pages_node(node, alloc_mask, 0); + page = alloc_pages_node(node, alloc_mask|highmem_mask, 0); if (unlikely(!page)) { /* Successfully allocated i pages, free them in __vunmap() */ @@ -1710,7 +1713,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, goto fail; } area->pages[i] = page; - if (gfpflags_allow_blocking(gfp_mask)) + if (gfpflags_allow_blocking(gfp_mask|highmem_mask)) cond_resched(); } -- cgit From c715b72c1ba406f133217b509044c38d8e714a37 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 18 Aug 2017 15:16:31 -0700 Subject: mm: revert x86_64 and arm64 ELF_ET_DYN_BASE base changes Moving the x86_64 and arm64 PIE base from 0x555555554000 to 0x000100000000 broke AddressSanitizer. This is a partial revert of: eab09532d400 ("binfmt_elf: use ELF_ET_DYN_BASE only for PIE") 02445990a96e ("arm64: move ELF_ET_DYN_BASE to 4GB / 4MB") The AddressSanitizer tool has hard-coded expectations about where executable mappings are loaded. The motivation for changing the PIE base in the above commits was to avoid the Stack-Clash CVEs that allowed executable mappings to get too close to heap and stack. This was mainly a problem on 32-bit, but the 64-bit bases were moved too, in an effort to proactively protect those systems (proofs of concept do exist that show 64-bit collisions, but other recent changes to fix stack accounting and setuid behaviors will minimize the impact). The new 32-bit PIE base is fine for ASan (since it matches the ET_EXEC base), so only the 64-bit PIE base needs to be reverted to let x86 and arm64 ASan binaries run again. Future changes to the 64-bit PIE base on these architectures can be made optional once a more dynamic method for dealing with AddressSanitizer is found. (e.g. always loading PIE into the mmap region for marked binaries.) Link: http://lkml.kernel.org/r/20170807201542.GA21271@beast Fixes: eab09532d400 ("binfmt_elf: use ELF_ET_DYN_BASE only for PIE") Fixes: 02445990a96e ("arm64: move ELF_ET_DYN_BASE to 4GB / 4MB") Signed-off-by: Kees Cook Reported-by: Kostya Serebryany Acked-by: Will Deacon Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/elf.h | 4 ++-- arch/x86/include/asm/elf.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index acae781f7359..3288c2b36731 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -114,10 +114,10 @@ /* * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * 64-bit, this is above 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ -#define ELF_ET_DYN_BASE 0x100000000UL +#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1c18d83d3f09..9aeb91935ce0 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -247,11 +247,11 @@ extern int force_personality32; /* * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * 64-bit, this is above 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ #define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ - 0x100000000UL) + (TASK_SIZE / 3 * 2)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, -- cgit From ff244c6b29b176f3f448bc75e55df297225e1b3a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Aug 2017 13:39:56 -0700 Subject: tun: handle register_netdevice() failures properly syzkaller reported a double free [1], caused by the fact that tun driver was not updated properly when priv_destructor was added. When/if register_netdevice() fails, priv_destructor() must have been called already. [1] BUG: KASAN: double-free or invalid-free in selinux_tun_dev_free_security+0x15/0x20 security/selinux/hooks.c:5023 CPU: 0 PID: 2919 Comm: syzkaller227220 Not tainted 4.13.0-rc4+ #23 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 print_address_description+0x7f/0x260 mm/kasan/report.c:252 kasan_report_double_free+0x55/0x80 mm/kasan/report.c:333 kasan_slab_free+0xa0/0xc0 mm/kasan/kasan.c:514 __cache_free mm/slab.c:3503 [inline] kfree+0xd3/0x260 mm/slab.c:3820 selinux_tun_dev_free_security+0x15/0x20 security/selinux/hooks.c:5023 security_tun_dev_free_security+0x48/0x80 security/security.c:1512 tun_set_iff drivers/net/tun.c:1884 [inline] __tun_chr_ioctl+0x2ce6/0x3d50 drivers/net/tun.c:2064 tun_chr_ioctl+0x2a/0x40 drivers/net/tun.c:2309 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x443ff9 RSP: 002b:00007ffc34271f68 EFLAGS: 00000217 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000004002e0 RCX: 0000000000443ff9 RDX: 0000000020533000 RSI: 00000000400454ca RDI: 0000000000000003 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000217 R12: 0000000000401ce0 R13: 0000000000401d70 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 2919: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xaa/0xd0 mm/kasan/kasan.c:551 kmem_cache_alloc_trace+0x101/0x6f0 mm/slab.c:3627 kmalloc include/linux/slab.h:493 [inline] kzalloc include/linux/slab.h:666 [inline] selinux_tun_dev_alloc_security+0x49/0x170 security/selinux/hooks.c:5012 security_tun_dev_alloc_security+0x6d/0xa0 security/security.c:1506 tun_set_iff drivers/net/tun.c:1839 [inline] __tun_chr_ioctl+0x1730/0x3d50 drivers/net/tun.c:2064 tun_chr_ioctl+0x2a/0x40 drivers/net/tun.c:2309 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 2919: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x6e/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3503 [inline] kfree+0xd3/0x260 mm/slab.c:3820 selinux_tun_dev_free_security+0x15/0x20 security/selinux/hooks.c:5023 security_tun_dev_free_security+0x48/0x80 security/security.c:1512 tun_free_netdev+0x13b/0x1b0 drivers/net/tun.c:1563 register_netdevice+0x8d0/0xee0 net/core/dev.c:7605 tun_set_iff drivers/net/tun.c:1859 [inline] __tun_chr_ioctl+0x1caf/0x3d50 drivers/net/tun.c:2064 tun_chr_ioctl+0x2a/0x40 drivers/net/tun.c:2309 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 entry_SYSCALL_64_fastpath+0x1f/0xbe The buggy address belongs to the object at ffff8801d2843b40 which belongs to the cache kmalloc-32 of size 32 The buggy address is located 0 bytes inside of 32-byte region [ffff8801d2843b40, ffff8801d2843b60) The buggy address belongs to the page: page:ffffea000660cea8 count:1 mapcount:0 mapping:ffff8801d2843000 index:0xffff8801d2843fc1 flags: 0x200000000000100(slab) raw: 0200000000000100 ffff8801d2843000 ffff8801d2843fc1 000000010000003f raw: ffffea0006626a40 ffffea00066141a0 ffff8801dbc00100 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801d2843a00: fb fb fb fb fc fc fc fc fb fb fb fb fc fc fc fc ffff8801d2843a80: 00 00 00 fc fc fc fc fc fb fb fb fb fc fc fc fc >ffff8801d2843b00: 00 00 00 00 fc fc fc fc fb fb fb fb fc fc fc fc ^ ffff8801d2843b80: fb fb fb fb fc fc fc fc fb fb fb fb fc fc fc fc ffff8801d2843c00: fb fb fb fb fc fc fc fc fb fb fb fb fc fc fc fc ================================================================== Fixes: cf124db566e6 ("net: Fix inconsistent teardown and release of private netdev state.") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 32ad87345f57..0a2c0a42283f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1879,6 +1879,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) err_detach: tun_detach_all(dev); + /* register_netdevice() already called tun_free_netdev() */ + goto err_free_dev; + err_free_flow: tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); -- cgit From 5bfd37b4de5c98e86b12bd13be5aa46c7484a125 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2017 09:41:54 -0700 Subject: tipc: fix use-after-free syszkaller reported use-after-free in tipc [1] When msg->rep skb is freed, set the pointer to NULL, so that caller does not free it again. [1] ================================================================== BUG: KASAN: use-after-free in skb_push+0xd4/0xe0 net/core/skbuff.c:1466 Read of size 8 at addr ffff8801c6e71e90 by task syz-executor5/4115 CPU: 1 PID: 4115 Comm: syz-executor5 Not tainted 4.13.0-rc4+ #32 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x24e/0x340 mm/kasan/report.c:409 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430 skb_push+0xd4/0xe0 net/core/skbuff.c:1466 tipc_nl_compat_recv+0x833/0x18f0 net/tipc/netlink_compat.c:1209 genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:598 genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:623 netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2397 genl_rcv+0x28/0x40 net/netlink/genetlink.c:634 netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline] netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1291 netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1854 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 sock_write_iter+0x31a/0x5d0 net/socket.c:898 call_write_iter include/linux/fs.h:1743 [inline] new_sync_write fs/read_write.c:457 [inline] __vfs_write+0x684/0x970 fs/read_write.c:470 vfs_write+0x189/0x510 fs/read_write.c:518 SYSC_write fs/read_write.c:565 [inline] SyS_write+0xef/0x220 fs/read_write.c:557 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x4512e9 RSP: 002b:00007f3bc8184c08 EFLAGS: 00000216 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 00000000004512e9 RDX: 0000000000000020 RSI: 0000000020fdb000 RDI: 0000000000000006 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004b5e76 R13: 00007f3bc8184b48 R14: 00000000004b5e86 R15: 0000000000000000 Allocated by task 4115: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489 kmem_cache_alloc_node+0x13d/0x750 mm/slab.c:3651 __alloc_skb+0xf1/0x740 net/core/skbuff.c:219 alloc_skb include/linux/skbuff.h:903 [inline] tipc_tlv_alloc+0x26/0xb0 net/tipc/netlink_compat.c:148 tipc_nl_compat_dumpit+0xf2/0x3c0 net/tipc/netlink_compat.c:248 tipc_nl_compat_handle net/tipc/netlink_compat.c:1130 [inline] tipc_nl_compat_recv+0x756/0x18f0 net/tipc/netlink_compat.c:1199 genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:598 genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:623 netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2397 genl_rcv+0x28/0x40 net/netlink/genetlink.c:634 netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline] netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1291 netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1854 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 sock_write_iter+0x31a/0x5d0 net/socket.c:898 call_write_iter include/linux/fs.h:1743 [inline] new_sync_write fs/read_write.c:457 [inline] __vfs_write+0x684/0x970 fs/read_write.c:470 vfs_write+0x189/0x510 fs/read_write.c:518 SYSC_write fs/read_write.c:565 [inline] SyS_write+0xef/0x220 fs/read_write.c:557 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 4115: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3503 [inline] kmem_cache_free+0x77/0x280 mm/slab.c:3763 kfree_skbmem+0x1a1/0x1d0 net/core/skbuff.c:622 __kfree_skb net/core/skbuff.c:682 [inline] kfree_skb+0x165/0x4c0 net/core/skbuff.c:699 tipc_nl_compat_dumpit+0x36a/0x3c0 net/tipc/netlink_compat.c:260 tipc_nl_compat_handle net/tipc/netlink_compat.c:1130 [inline] tipc_nl_compat_recv+0x756/0x18f0 net/tipc/netlink_compat.c:1199 genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:598 genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:623 netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2397 genl_rcv+0x28/0x40 net/netlink/genetlink.c:634 netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline] netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1291 netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1854 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 sock_write_iter+0x31a/0x5d0 net/socket.c:898 call_write_iter include/linux/fs.h:1743 [inline] new_sync_write fs/read_write.c:457 [inline] __vfs_write+0x684/0x970 fs/read_write.c:470 vfs_write+0x189/0x510 fs/read_write.c:518 SYSC_write fs/read_write.c:565 [inline] SyS_write+0xef/0x220 fs/read_write.c:557 entry_SYSCALL_64_fastpath+0x1f/0xbe The buggy address belongs to the object at ffff8801c6e71dc0 which belongs to the cache skbuff_head_cache of size 224 The buggy address is located 208 bytes inside of 224-byte region [ffff8801c6e71dc0, ffff8801c6e71ea0) The buggy address belongs to the page: page:ffffea00071b9c40 count:1 mapcount:0 mapping:ffff8801c6e71000 index:0x0 flags: 0x200000000000100(slab) raw: 0200000000000100 ffff8801c6e71000 0000000000000000 000000010000000c raw: ffffea0007224a20 ffff8801d98caf48 ffff8801d9e79040 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801c6e71d80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff8801c6e71e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8801c6e71e80: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff8801c6e71f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8801c6e71f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Jon Maloy Cc: Ying Xue Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 9bfe886ab330..750949dfc1d7 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -258,13 +258,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, arg = nlmsg_new(0, GFP_KERNEL); if (!arg) { kfree_skb(msg->rep); + msg->rep = NULL; return -ENOMEM; } err = __tipc_nl_compat_dumpit(cmd, msg, arg); - if (err) + if (err) { kfree_skb(msg->rep); - + msg->rep = NULL; + } kfree_skb(arg); return err; -- cgit From 15339e441ec46fbc3bf3486bb1ae4845b0f1bb8d Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 16 Aug 2017 20:16:40 +0200 Subject: sctp: fully initialize the IPv6 address in sctp_v6_to_addr() KMSAN reported use of uninitialized sctp_addr->v4.sin_addr.s_addr and sctp_addr->v6.sin6_scope_id in sctp_v6_cmp_addr() (see below). Make sure all fields of an IPv6 address are initialized, which guarantees that the IPv4 fields are also initialized. ================================================================== BUG: KMSAN: use of uninitialized memory in sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 CPU: 2 PID: 31056 Comm: syz-executor1 Not tainted 4.11.0-rc5+ #2944 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x172/0x1c0 lib/dump_stack.c:42 is_logbuf_locked mm/kmsan/kmsan.c:59 [inline] kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:938 native_save_fl arch/x86/include/asm/irqflags.h:18 [inline] arch_local_save_flags arch/x86/include/asm/irqflags.h:72 [inline] arch_local_irq_save arch/x86/include/asm/irqflags.h:113 [inline] __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:467 sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 sctp_v6_get_dst+0x8c7/0x1630 net/sctp/ipv6.c:290 sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 sctp_assoc_add_peer+0x66d/0x16f0 net/sctp/associola.c:651 sctp_sendmsg+0x35a5/0x4f90 net/sctp/socket.c:1871 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg net/socket.c:643 [inline] SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 entry_SYSCALL_64_fastpath+0x13/0x94 RIP: 0033:0x44b479 RSP: 002b:00007f6213f21c08 EFLAGS: 00000286 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000020000000 RCX: 000000000044b479 RDX: 0000000000000041 RSI: 0000000020edd000 RDI: 0000000000000006 RBP: 00000000007080a8 R08: 0000000020b85fe4 R09: 000000000000001c R10: 0000000000040005 R11: 0000000000000286 R12: 00000000ffffffff R13: 0000000000003760 R14: 00000000006e5820 R15: 0000000000ff8000 origin description: ----dst_saddr@sctp_v6_get_dst local variable created at: sk_fullsock include/net/sock.h:2321 [inline] inet6_sk include/linux/ipv6.h:309 [inline] sctp_v6_get_dst+0x91/0x1630 net/sctp/ipv6.c:241 sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 ================================================================== BUG: KMSAN: use of uninitialized memory in sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 CPU: 2 PID: 31056 Comm: syz-executor1 Not tainted 4.11.0-rc5+ #2944 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x172/0x1c0 lib/dump_stack.c:42 is_logbuf_locked mm/kmsan/kmsan.c:59 [inline] kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:938 native_save_fl arch/x86/include/asm/irqflags.h:18 [inline] arch_local_save_flags arch/x86/include/asm/irqflags.h:72 [inline] arch_local_irq_save arch/x86/include/asm/irqflags.h:113 [inline] __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:467 sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 sctp_v6_get_dst+0x8c7/0x1630 net/sctp/ipv6.c:290 sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 sctp_assoc_add_peer+0x66d/0x16f0 net/sctp/associola.c:651 sctp_sendmsg+0x35a5/0x4f90 net/sctp/socket.c:1871 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg net/socket.c:643 [inline] SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 entry_SYSCALL_64_fastpath+0x13/0x94 RIP: 0033:0x44b479 RSP: 002b:00007f6213f21c08 EFLAGS: 00000286 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000020000000 RCX: 000000000044b479 RDX: 0000000000000041 RSI: 0000000020edd000 RDI: 0000000000000006 RBP: 00000000007080a8 R08: 0000000020b85fe4 R09: 000000000000001c R10: 0000000000040005 R11: 0000000000000286 R12: 00000000ffffffff R13: 0000000000003760 R14: 00000000006e5820 R15: 0000000000ff8000 origin description: ----dst_saddr@sctp_v6_get_dst local variable created at: sk_fullsock include/net/sock.h:2321 [inline] inet6_sk include/linux/ipv6.h:309 [inline] sctp_v6_get_dst+0x91/0x1630 net/sctp/ipv6.c:241 sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 ================================================================== Signed-off-by: Alexander Potapenko Reviewed-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2a186b201ad2..a4b6ffb61495 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -512,7 +512,9 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr, { addr->sa.sa_family = AF_INET6; addr->v6.sin6_port = port; + addr->v6.sin6_flowinfo = 0; addr->v6.sin6_addr = *saddr; + addr->v6.sin6_scope_id = 0; } /* Compare addresses exactly. -- cgit From f299aec6ebd747298e35934cff7709c6b119ca52 Mon Sep 17 00:00:00 2001 From: Charles Milette Date: Fri, 18 Aug 2017 16:30:34 -0400 Subject: staging: rtl8188eu: add RNX-N150NUB support Add support for USB Device Rosewill RNX-N150NUB. VendorID: 0x0bda, ProductID: 0xffef Signed-off-by: Charles Milette Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index d283341cfe43..56cd4e5e51b2 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -45,6 +45,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ + {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ {} /* Terminating entry */ }; -- cgit From 383143f31d7d3525a1dbff733d52fff917f82f15 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 16 Aug 2017 11:18:09 -0700 Subject: ipv6: reset fn->rr_ptr when replacing route syzcaller reported the following use-after-free issue in rt6_select(): BUG: KASAN: use-after-free in rt6_select net/ipv6/route.c:755 [inline] at addr ffff8800bc6994e8 BUG: KASAN: use-after-free in ip6_pol_route.isra.46+0x1429/0x1470 net/ipv6/route.c:1084 at addr ffff8800bc6994e8 Read of size 4 by task syz-executor1/439628 CPU: 0 PID: 439628 Comm: syz-executor1 Not tainted 4.3.5+ #8 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 0000000000000000 ffff88018fe435b0 ffffffff81ca384d ffff8801d3588c00 ffff8800bc699380 ffff8800bc699500 dffffc0000000000 ffff8801d40a47c0 ffff88018fe435d8 ffffffff81735751 ffff88018fe43660 ffff8800bc699380 Call Trace: [] __dump_stack lib/dump_stack.c:15 [inline] [] dump_stack+0xc1/0x124 lib/dump_stack.c:51 sctp: [Deprecated]: syz-executor0 (pid 439615) Use of struct sctp_assoc_value in delayed_ack socket option. Use struct sctp_sack_info instead [] kasan_object_err+0x21/0x70 mm/kasan/report.c:158 [] print_address_description mm/kasan/report.c:196 [inline] [] kasan_report_error+0x1b4/0x4a0 mm/kasan/report.c:285 [] kasan_report mm/kasan/report.c:305 [inline] [] __asan_report_load4_noabort+0x43/0x50 mm/kasan/report.c:325 [] rt6_select net/ipv6/route.c:755 [inline] [] ip6_pol_route.isra.46+0x1429/0x1470 net/ipv6/route.c:1084 [] ip6_pol_route_output+0x81/0xb0 net/ipv6/route.c:1203 [] fib6_rule_action+0x1f0/0x680 net/ipv6/fib6_rules.c:95 [] fib_rules_lookup+0x2a6/0x7a0 net/core/fib_rules.c:223 [] fib6_rule_lookup+0xd0/0x250 net/ipv6/fib6_rules.c:41 [] ip6_route_output+0x1d6/0x2c0 net/ipv6/route.c:1224 [] ip6_dst_lookup_tail+0x4d2/0x890 net/ipv6/ip6_output.c:943 [] ip6_dst_lookup_flow+0x9a/0x250 net/ipv6/ip6_output.c:1079 [] ip6_datagram_dst_update+0x538/0xd40 net/ipv6/datagram.c:91 [] __ip6_datagram_connect net/ipv6/datagram.c:251 [inline] [] ip6_datagram_connect+0x518/0xe50 net/ipv6/datagram.c:272 [] ip6_datagram_connect_v6_only+0x63/0x90 net/ipv6/datagram.c:284 [] inet_dgram_connect+0x170/0x1f0 net/ipv4/af_inet.c:564 [] SYSC_connect+0x1a7/0x2f0 net/socket.c:1582 [] SyS_connect+0x29/0x30 net/socket.c:1563 [] entry_SYSCALL_64_fastpath+0x12/0x17 Object at ffff8800bc699380, in cache ip6_dst_cache size: 384 The root cause of it is that in fib6_add_rt2node(), when it replaces an existing route with the new one, it does not update fn->rr_ptr. This commit resets fn->rr_ptr to NULL when it points to a route which is replaced in fib6_add_rt2node(). Fixes: 27596472473a ("ipv6: fix ECMP route replacement") Signed-off-by: Wei Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ebb299cf72b7..e6f878067c68 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -914,6 +914,8 @@ add: } nsiblings = iter->rt6i_nsiblings; fib6_purge_rt(iter, fn, info->nl_net); + if (fn->rr_ptr == iter) + fn->rr_ptr = NULL; rt6_release(iter); if (nsiblings) { @@ -926,6 +928,8 @@ add: if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; fib6_purge_rt(iter, fn, info->nl_net); + if (fn->rr_ptr == iter) + fn->rr_ptr = NULL; rt6_release(iter); nsiblings--; } else { -- cgit From bc3aae2bbac46dd894c89db5d5e98f7f0ef9e205 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 16 Aug 2017 12:38:52 -0700 Subject: net: check and errout if res->fi is NULL when RTM_F_FIB_MATCH is set Syzkaller hit 'general protection fault in fib_dump_info' bug on commit 4.13-rc5.. Guilty file: net/ipv4/fib_semantics.c kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 0 PID: 2808 Comm: syz-executor0 Not tainted 4.13.0-rc5 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 task: ffff880078562700 task.stack: ffff880078110000 RIP: 0010:fib_dump_info+0x388/0x1170 net/ipv4/fib_semantics.c:1314 RSP: 0018:ffff880078117010 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: 00000000000000fe RCX: 0000000000000002 RDX: 0000000000000006 RSI: ffff880078117084 RDI: 0000000000000030 RBP: ffff880078117268 R08: 000000000000000c R09: ffff8800780d80c8 R10: 0000000058d629b4 R11: 0000000067fce681 R12: 0000000000000000 R13: ffff8800784bd540 R14: ffff8800780d80b5 R15: ffff8800780d80a4 FS: 00000000022fa940(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004387d0 CR3: 0000000079135000 CR4: 00000000000006f0 Call Trace: inet_rtm_getroute+0xc89/0x1f50 net/ipv4/route.c:2766 rtnetlink_rcv_msg+0x288/0x680 net/core/rtnetlink.c:4217 netlink_rcv_skb+0x340/0x470 net/netlink/af_netlink.c:2397 rtnetlink_rcv+0x28/0x30 net/core/rtnetlink.c:4223 netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline] netlink_unicast+0x4c4/0x6e0 net/netlink/af_netlink.c:1291 netlink_sendmsg+0x8c4/0xca0 net/netlink/af_netlink.c:1854 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 ___sys_sendmsg+0x779/0x8d0 net/socket.c:2035 __sys_sendmsg+0xd1/0x170 net/socket.c:2069 SYSC_sendmsg net/socket.c:2080 [inline] SyS_sendmsg+0x2d/0x50 net/socket.c:2076 entry_SYSCALL_64_fastpath+0x1a/0xa5 RIP: 0033:0x4512e9 RSP: 002b:00007ffc75584cc8 EFLAGS: 00000216 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00000000004512e9 RDX: 0000000000000000 RSI: 0000000020f2cfc8 RDI: 0000000000000003 RBP: 000000000000000e R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000216 R12: fffffffffffffffe R13: 0000000000718000 R14: 0000000020c44ff0 R15: 0000000000000000 Code: 00 0f b6 8d ec fd ff ff 48 8b 85 f0 fd ff ff 88 48 17 48 8b 45 28 48 8d 78 30 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e cb 0c 00 00 48 8b 45 28 44 RIP: fib_dump_info+0x388/0x1170 net/ipv4/fib_semantics.c:1314 RSP: ffff880078117010 ---[ end trace 254a7af28348f88b ]--- This patch adds a res->fi NULL check. example run: $ip route get 0.0.0.0 iif virt1-0 broadcast 0.0.0.0 dev lo cache iif virt1-0 $ip route get 0.0.0.0 iif virt1-0 fibmatch RTNETLINK answers: No route to host Reported-by: idaifish Reported-by: Dmitry Vyukov Fixes: b61798130f1b ("net: ipv4: RTM_GETROUTE: return matched fib result when requested") Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/ipv4/route.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fe877a4a72b1..2331de20ca50 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2763,14 +2763,21 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) table_id = rt->rt_table_id; - if (rtm->rtm_flags & RTM_F_FIB_MATCH) + if (rtm->rtm_flags & RTM_F_FIB_MATCH) { + if (!res.fi) { + err = fib_props[res.type].error; + if (!err) + err = -EHOSTUNREACH; + goto errout_free; + } err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, table_id, rt->rt_type, res.prefix, res.prefixlen, fl4.flowi4_tos, res.fi, 0); - else + } else { err = rt_fill_info(net, dst, src, table_id, &fl4, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); + } if (err < 0) goto errout_free; -- cgit From cdbeb633ca71a02b7b63bfeb94994bf4e1a0b894 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 16 Aug 2017 17:53:36 -0400 Subject: tcp: when rearming RTO, if RTO time is in past then fire RTO ASAP In some situations tcp_send_loss_probe() can realize that it's unable to send a loss probe (TLP), and falls back to calling tcp_rearm_rto() to schedule an RTO timer. In such cases, sometimes tcp_rearm_rto() realizes that the RTO was eligible to fire immediately or at some point in the past (delta_us <= 0). Previously in such cases tcp_rearm_rto() was scheduling such "overdue" RTOs to happen at now + icsk_rto, which caused needless delays of hundreds of milliseconds (and non-linear behavior that made reproducible testing difficult). This commit changes the logic to schedule "overdue" RTOs ASAP, rather than at now + icsk_rto. Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Suggested-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 53de1424c13c..bab7f0493098 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3009,8 +3009,7 @@ void tcp_rearm_rto(struct sock *sk) /* delta_us may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ - if (delta_us > 0) - rto = usecs_to_jiffies(delta_us); + rto = usecs_to_jiffies(max_t(int, delta_us, 1)); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); -- cgit From b6f6d56c91f5261c55edef2df300698c4486b669 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 17 Aug 2017 13:06:14 +0200 Subject: PCI: Allow PCI express root ports to find themselves If the pci_find_pcie_root_port() function is called on a root port itself, return the root port rather than NULL. This effectively reverts commit 0e405232871d6 ("PCI: fix oops when try to find Root Port for a PCI device") which added an extra check that would now be redundant. Fixes: a99b646afa8a ("PCI: Disable PCIe Relaxed Ordering if unsupported") Fixes: c56d4450eb68 ("PCI: Turn off Request Attributes to avoid Chelsio T5 Completion erratum") Signed-off-by: Thierry Reding Acked-by: Bjorn Helgaas Tested-by: Shawn Lin Tested-by: Michael Ellerman Signed-off-by: David S. Miller --- drivers/pci/pci.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index da5570cf5c6a..fdf65a6c13f6 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -514,7 +514,7 @@ EXPORT_SYMBOL(pci_find_resource); */ struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev) { - struct pci_dev *bridge, *highest_pcie_bridge = NULL; + struct pci_dev *bridge, *highest_pcie_bridge = dev; bridge = pci_upstream_bridge(dev); while (bridge && pci_is_pcie(bridge)) { @@ -522,11 +522,10 @@ struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev) bridge = pci_upstream_bridge(bridge); } - if (highest_pcie_bridge && - pci_pcie_type(highest_pcie_bridge) == PCI_EXP_TYPE_ROOT_PORT) - return highest_pcie_bridge; + if (pci_pcie_type(highest_pcie_bridge) != PCI_EXP_TYPE_ROOT_PORT) + return NULL; - return NULL; + return highest_pcie_bridge; } EXPORT_SYMBOL(pci_find_pcie_root_port); -- cgit From ca3d89a3ebe79367bd41b6b8ba37664478ae2dba Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 17 Aug 2017 18:29:52 +0300 Subject: net/mlx4_core: Enable 4K UAR if SRIOV module parameter is not enabled enable_4k_uar module parameter was added in patch cited below to address the backward compatibility issue in SRIOV when the VM has system's PAGE_SIZE uar implementation and the Hypervisor has 4k uar implementation. The above compatibility issue does not exist in the non SRIOV case. In this patch, we always enable 4k uar implementation if SRIOV is not enabled on mlx4's supported cards. Fixes: 76e39ccf9c36 ("net/mlx4_core: Fix backward compatibility on VFs") Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 09b9bc17bce9..5fe5cdc51357 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -432,7 +432,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) /* Virtual PCI function needs to determine UAR page size from * firmware. Only master PCI function can set the uar page size */ - if (enable_4k_uar) + if (enable_4k_uar || !dev->persist->num_vfs) dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT; else dev->uar_page_shift = PAGE_SHIFT; @@ -2277,7 +2277,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; - if (enable_4k_uar) { + if (enable_4k_uar || !dev->persist->num_vfs) { init_hca.log_uar_sz = ilog2(dev->caps.num_uars) + PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT; init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12; -- cgit From b024d949a3c24255a7ef1a470420eb478949aa4c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 17 Aug 2017 23:14:58 +0100 Subject: irda: do not leak initialized list.dev to userspace list.dev has not been initialized and so the copy_to_user is copying data from the stack back to user space which is a potential information leak. Fix this ensuring all of list is initialized to zero. Detected by CoverityScan, CID#1357894 ("Uninitialized scalar variable") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/irda/af_irda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 2e6990f8b80b..23fa7c8b09a5 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2213,7 +2213,7 @@ static int irda_getsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); - struct irda_device_list list; + struct irda_device_list list = { 0 }; struct irda_device_info *discoveries; struct irda_ias_set * ias_opt; /* IAS get/query params */ struct ias_object * ias_obj; /* Object in IAS */ -- cgit From 9a19bad70cf16b0cdf3576efda7deb490e7aa529 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 18 Aug 2017 00:19:42 +0100 Subject: rxrpc: Fix oops when discarding a preallocated service call rxrpc_service_prealloc_one() doesn't set the socket pointer on any new call it preallocates, but does add it to the rxrpc net namespace call list. This, however, causes rxrpc_put_call() to oops when the call is discarded when the socket is closed. rxrpc_put_call() needs the socket to be able to reach the namespace so that it can use a lock held therein. Fix this by setting a call's socket pointer immediately before discarding it. This can be triggered by unloading the kafs module, resulting in an oops like the following: BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 IP: rxrpc_put_call+0x1e2/0x32d PGD 0 P4D 0 Oops: 0000 [#1] SMP Modules linked in: kafs(E-) CPU: 3 PID: 3037 Comm: rmmod Tainted: G E 4.12.0-fscache+ #213 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 task: ffff8803fc92e2c0 task.stack: ffff8803fef74000 RIP: 0010:rxrpc_put_call+0x1e2/0x32d RSP: 0018:ffff8803fef77e08 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff8803fab99ac0 RCX: 000000000000000f RDX: ffffffff81c50a40 RSI: 000000000000000c RDI: ffff8803fc92ea88 RBP: ffff8803fef77e30 R08: ffff8803fc87b941 R09: ffffffff82946d20 R10: ffff8803fef77d10 R11: 00000000000076fc R12: 0000000000000005 R13: ffff8803fab99c20 R14: 0000000000000001 R15: ffffffff816c6aee FS: 00007f915a059700(0000) GS:ffff88041fb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000030 CR3: 00000003fef39000 CR4: 00000000001406e0 Call Trace: rxrpc_discard_prealloc+0x325/0x341 rxrpc_listen+0xf9/0x146 kernel_listen+0xb/0xd afs_close_socket+0x3e/0x173 [kafs] afs_exit+0x1f/0x57 [kafs] SyS_delete_module+0x10f/0x19a do_syscall_64+0x8a/0x149 entry_SYSCALL64_slow_path+0x25/0x25 Fixes: 2baec2c3f854 ("rxrpc: Support network namespacing") Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/call_accept.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index dd30d74824b0..ec3383f97d4c 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -223,6 +223,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) tail = b->call_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_call *call = b->call_backlog[tail]; + call->socket = rx; if (rx->discard_new_call) { _debug("discard %lx", call->user_call_ID); rx->discard_new_call(call, call->user_call_ID); -- cgit From 4f8a881acc9d1adaf1e552349a0b1df28933a04c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 18 Aug 2017 11:01:36 +0800 Subject: net: sched: fix NULL pointer dereference when action calls some targets As we know in some target's checkentry it may dereference par.entryinfo to check entry stuff inside. But when sched action calls xt_check_target, par.entryinfo is set with NULL. It would cause kernel panic when calling some targets. It can be reproduce with: # tc qd add dev eth1 ingress handle ffff: # tc filter add dev eth1 parent ffff: u32 match u32 0 0 action xt \ -j ECN --ecn-tcp-remove It could also crash kernel when using target CLUSTERIP or TPROXY. By now there's no proper value for par.entryinfo in ipt_init_target, but it can not be set with NULL. This patch is to void all these panics by setting it with an ipt_entry obj with all members = 0. Note that this issue has been there since the very beginning. Signed-off-by: Xin Long Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/sched/act_ipt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index d516ba8178b8..541707802a23 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -41,6 +41,7 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t, { struct xt_tgchk_param par; struct xt_target *target; + struct ipt_entry e = {}; int ret = 0; target = xt_request_find_target(AF_INET, t->u.user.name, @@ -52,6 +53,7 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t, memset(&par, 0, sizeof(par)); par.net = net; par.table = table; + par.entryinfo = &e; par.target = target; par.targinfo = t->data; par.hook_mask = hook; -- cgit From 1d2226e45040ed4aee95b633cbd64702bf7fc2a1 Mon Sep 17 00:00:00 2001 From: KT Liao Date: Fri, 18 Aug 2017 16:58:15 -0700 Subject: Input: elan_i2c - add ELAN0602 ACPI ID to support Lenovo Yoga310 Add ELAN0602 to the list of known ACPI IDs to enable support for ELAN touchpads found in Lenovo Yoga310. Signed-off-by: KT Liao Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 714cf7f9b138..cfbc8ba4c96c 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1247,6 +1247,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0000", 0 }, { "ELAN0100", 0 }, { "ELAN0600", 0 }, + { "ELAN0602", 0 }, { "ELAN0605", 0 }, { "ELAN0608", 0 }, { "ELAN0605", 0 }, -- cgit From ec667683c532c93fb41e100e5d61a518971060e2 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Fri, 18 Aug 2017 12:17:21 -0700 Subject: Input: trackpoint - add new trackpoint firmware ID Synaptics add new TP firmware ID: 0x2 and 0x3, for now both lower 2 bits are indicated as TP. Change the constant to bitwise values. This makes trackpoint to be recognized on Lenovo Carbon X1 Gen5 instead of it being identified as "PS/2 Generic Mouse". Signed-off-by: Aaron Ma Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/trackpoint.c | 3 ++- drivers/input/mouse/trackpoint.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 20b5b21c1bba..0871010f18d5 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -265,7 +265,8 @@ static int trackpoint_start_protocol(struct psmouse *psmouse, unsigned char *fir if (ps2_command(&psmouse->ps2dev, param, MAKE_PS2_CMD(0, 2, TP_READ_ID))) return -1; - if (param[0] != TP_MAGIC_IDENT) + /* add new TP ID. */ + if (!(param[0] & TP_MAGIC_IDENT)) return -1; if (firmware_id) diff --git a/drivers/input/mouse/trackpoint.h b/drivers/input/mouse/trackpoint.h index 5617ed3a7d7a..88055755f82e 100644 --- a/drivers/input/mouse/trackpoint.h +++ b/drivers/input/mouse/trackpoint.h @@ -21,8 +21,9 @@ #define TP_COMMAND 0xE2 /* Commands start with this */ #define TP_READ_ID 0xE1 /* Sent for device identification */ -#define TP_MAGIC_IDENT 0x01 /* Sent after a TP_READ_ID followed */ +#define TP_MAGIC_IDENT 0x03 /* Sent after a TP_READ_ID followed */ /* by the firmware ID */ + /* Firmware ID includes 0x1, 0x2, 0x3 */ /* -- cgit From 0c264af7be2013266c5b4c644f3f366399ee490a Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 20 Aug 2017 15:54:26 +0900 Subject: ALSA: firewire: fix NULL pointer dereference when releasing uninitialized data of iso-resource When calling 'iso_resource_free()' for uninitialized data, this function causes NULL pointer dereference due to its 'unit' member. This occurs when unplugging audio and music units on IEEE 1394 bus at failure of card registration. This commit fixes the bug. The bug exists since kernel v4.5. Fixes: 324540c4e05c ('ALSA: fireface: postpone sound card registration') at v4.12 Fixes: 8865a31e0fd8 ('ALSA: firewire-motu: postpone sound card registration') at v4.12 Fixes: b610386c8afb ('ALSA: firewire-tascam: deleyed registration of sound card') at v4.7 Fixes: 86c8dd7f4da3 ('ALSA: firewire-digi00x: delayed registration of sound card') at v4.7 Fixes: 6c29230e2a5f ('ALSA: oxfw: delayed registration of sound card') at v4.7 Fixes: 7d3c1d5901aa ('ALSA: fireworks: delayed registration of sound card') at v4.7 Fixes: 04a2c73c97eb ('ALSA: bebob: delayed registration of sound card') at v4.7 Fixes: b59fb1900b4f ('ALSA: dice: postpone card registration') at v4.5 Cc: # v4.5+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/iso-resources.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/firewire/iso-resources.c b/sound/firewire/iso-resources.c index f0e4d502d604..066b5df666f4 100644 --- a/sound/firewire/iso-resources.c +++ b/sound/firewire/iso-resources.c @@ -210,9 +210,14 @@ EXPORT_SYMBOL(fw_iso_resources_update); */ void fw_iso_resources_free(struct fw_iso_resources *r) { - struct fw_card *card = fw_parent_device(r->unit)->card; + struct fw_card *card; int bandwidth, channel; + /* Not initialized. */ + if (r->unit == NULL) + return; + card = fw_parent_device(r->unit)->card; + mutex_lock(&r->mutex); if (r->allocated) { -- cgit From dbd7396b4f24e0c3284fcc05f5def24f52c09884 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 20 Aug 2017 15:55:02 +0900 Subject: ALSA: firewire-motu: destroy stream data surely at failure of card initialization When failing sound card registration after initializing stream data, this module leaves allocated data in stream data. This commit fixes the bug. Fixes: 9b2bb4f2f4a2 ('ALSA: firewire-motu: add stream management functionality') Cc: # v4.12+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/motu/motu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/firewire/motu/motu.c b/sound/firewire/motu/motu.c index bf779cfeef0d..59a270406353 100644 --- a/sound/firewire/motu/motu.c +++ b/sound/firewire/motu/motu.c @@ -128,6 +128,7 @@ static void do_registration(struct work_struct *work) return; error: snd_motu_transaction_unregister(motu); + snd_motu_stream_destroy_duplex(motu); snd_card_free(motu->card); dev_info(&motu->unit->device, "Sound card registration failed: %d\n", err); -- cgit From 8fbbe2d7cc478d1544f41f2271787c993c23a4f6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 19 Aug 2017 12:57:51 +0300 Subject: genirq/ipi: Fixup checks against nr_cpu_ids Valid CPU ids are [0, nr_cpu_ids-1] inclusive. Fixes: 3b8e29a82dd1 ("genirq: Implement ipi_send_mask/single()") Fixes: f9bce791ae2a ("genirq: Add a new function to get IPI reverse mapping") Signed-off-by: Alexey Dobriyan Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170819095751.GB27864@avx2 --- kernel/irq/ipi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c index 1a9abc1c8ea0..259a22aa9934 100644 --- a/kernel/irq/ipi.c +++ b/kernel/irq/ipi.c @@ -165,7 +165,7 @@ irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu) struct irq_data *data = irq_get_irq_data(irq); struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL; - if (!data || !ipimask || cpu > nr_cpu_ids) + if (!data || !ipimask || cpu >= nr_cpu_ids) return INVALID_HWIRQ; if (!cpumask_test_cpu(cpu, ipimask)) @@ -195,7 +195,7 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data, if (!chip->ipi_send_single && !chip->ipi_send_mask) return -EINVAL; - if (cpu > nr_cpu_ids) + if (cpu >= nr_cpu_ids) return -EINVAL; if (dest) { -- cgit From f00fd7ae4f409abb7b2e5d099248832548199f0c Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 30 Jul 2017 16:14:42 -0600 Subject: PATCH] iio: Fix some documentation warnings The kerneldoc description for the trig_readonly field of struct iio_dev lacked a colon, leading to this doc build warning: ./include/linux/iio/iio.h:603: warning: No description found for parameter 'trig_readonly' A similar issue for iio_trigger_set_immutable() in trigger.h yielded: ./include/linux/iio/trigger.h:151: warning: No description found for parameter 'indio_dev' ./include/linux/iio/trigger.h:151: warning: No description found for parameter 'trig' Fix the formatting and silence the warnings. Signed-off-by: Jonathan Corbet Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 +- include/linux/iio/trigger.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d68bec297a45..c380daa40c0e 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -535,7 +535,7 @@ struct iio_buffer_setup_ops { * @scan_timestamp: [INTERN] set if any buffers have requested timestamp * @scan_index_timestamp:[INTERN] cache of the index to the timestamp * @trig: [INTERN] current device trigger (buffer modes) - * @trig_readonly [INTERN] mark the current trigger immutable + * @trig_readonly: [INTERN] mark the current trigger immutable * @pollfunc: [DRIVER] function run on trigger being received * @pollfunc_event: [DRIVER] function run on events trigger being received * @channels: [DRIVER] channel specification structure table diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index ea08302f2d7b..7142d8d6e470 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -144,8 +144,8 @@ void devm_iio_trigger_unregister(struct device *dev, /** * iio_trigger_set_immutable() - set an immutable trigger on destination * - * @indio_dev - IIO device structure containing the device - * @trig - trigger to assign to device + * @indio_dev: IIO device structure containing the device + * @trig: trigger to assign to device * **/ int iio_trigger_set_immutable(struct iio_dev *indio_dev, struct iio_trigger *trig); -- cgit From fdd0d32eb95f135041236a6885d9006315aa9a1d Mon Sep 17 00:00:00 2001 From: Dragos Bogdan Date: Fri, 4 Aug 2017 01:37:27 +0300 Subject: iio: imu: adis16480: Fix acceleration scale factor for adis16480 According to the datasheet, the range of the acceleration is [-10 g, + 10 g], so the scale factor should be 10 instead of 5. Signed-off-by: Dragos Bogdan Acked-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis16480.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c index 8cf84d3488b2..12898424d838 100644 --- a/drivers/iio/imu/adis16480.c +++ b/drivers/iio/imu/adis16480.c @@ -696,7 +696,7 @@ static const struct adis16480_chip_info adis16480_chip_info[] = { .gyro_max_val = IIO_RAD_TO_DEGREE(22500), .gyro_max_scale = 450, .accel_max_val = IIO_M_S_2_TO_G(12500), - .accel_max_scale = 5, + .accel_max_scale = 10, }, [ADIS16485] = { .channels = adis16485_channels, -- cgit From a359bb2a55f384bb93349ddf9d30b20b37e02e8a Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 3 Aug 2017 11:22:17 +0200 Subject: iio: trigger: stm32-timer: fix get trigger mode Fix reading trigger mode, when other bit-fields are set. SMCR register value must be masked to read SMS (slave mode selection) only. Fixes: 9eba381 ("iio: make stm32 trigger driver use INDIO_HARDWARE_TRIGGERED mode") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index 14e6eb04bbb0..25ad6abfee22 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -485,7 +485,7 @@ static int stm32_get_trigger_mode(struct iio_dev *indio_dev, regmap_read(priv->regmap, TIM_SMCR, &smcr); - return smcr == TIM_SMCR_SMS ? 0 : -EINVAL; + return (smcr & TIM_SMCR_SMS) == TIM_SMCR_SMS ? 0 : -EINVAL; } static const struct iio_enum stm32_trigger_mode_enum = { -- cgit From f1664eaacec31035450132c46ed2915fd2b2049a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sat, 12 Aug 2017 09:09:21 -0700 Subject: iio: hid-sensor-trigger: Fix the race with user space powering up sensors It has been reported for a while that with iio-sensor-proxy service the rotation only works after one suspend/resume cycle. This required a wait in the systemd unit file to avoid race. I found a Yoga 900 where I could reproduce this. The problem scenerio is: - During sensor driver init, enable run time PM and also set a auto-suspend for 3 seconds. This result in one runtime resume. But there is a check to avoid a powerup in this sequence, but rpm is active - User space iio-sensor-proxy tries to power up the sensor. Since rpm is active it will simply return. But sensors were not actually powered up in the prior sequence, so actaully the sensors will not work - After 3 seconds the auto suspend kicks If we add a wait in systemd service file to fire iio-sensor-proxy after 3 seconds, then now everything will work as the runtime resume will actually powerup the sensor as this is a user request. To avoid this: - Remove the check to match user requested state, this will cause a brief powerup, but if the iio-sensor-proxy starts immediately it will still work as the sensors are ON. - Also move the autosuspend delay to place when user requested turn off of sensors, like after user finished raw read or buffer disable Signed-off-by: Srinivas Pandruvada Tested-by: Bastien Nocera Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index 16ade0a0327b..0e4b379ada45 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -111,8 +111,6 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) s32 poll_value = 0; if (state) { - if (!atomic_read(&st->user_requested_state)) - return 0; if (sensor_hub_device_open(st->hsdev)) return -EIO; @@ -161,6 +159,9 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) &report_val); } + pr_debug("HID_SENSOR %s set power_state %d report_state %d\n", + st->pdev->name, state_val, report_val); + sensor_hub_get_feature(st->hsdev, st->power_state.report_id, st->power_state.index, sizeof(state_val), &state_val); @@ -182,6 +183,7 @@ int hid_sensor_power_state(struct hid_sensor_common *st, bool state) ret = pm_runtime_get_sync(&st->pdev->dev); else { pm_runtime_mark_last_busy(&st->pdev->dev); + pm_runtime_use_autosuspend(&st->pdev->dev); ret = pm_runtime_put_autosuspend(&st->pdev->dev); } if (ret < 0) { @@ -285,8 +287,6 @@ int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name, /* Default to 3 seconds, but can be changed from sysfs */ pm_runtime_set_autosuspend_delay(&attrb->pdev->dev, 3000); - pm_runtime_use_autosuspend(&attrb->pdev->dev); - return ret; error_unreg_trigger: iio_trigger_unregister(trig); -- cgit From 541ee9b24fca587f510fe1bc58508d5cf40707af Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 16 Aug 2017 19:02:50 +0200 Subject: iio: magnetometer: st_magn: fix status register address for LSM303AGR Fixes: 97865fe41322 (iio: st_sensors: verify interrupt event to status) Signed-off-by: Lorenzo Bianconi Reviewed-by: Linus Walleij Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/st_magn_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index 8e1b0861fbe4..c11f0da86e74 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -358,7 +358,7 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = { .mask_int1 = 0x01, .addr_ihl = 0x63, .mask_ihl = 0x04, - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, + .addr_stat_drdy = 0x67, }, .multi_read_bit = false, .bootime = 2, -- cgit From 8b35a5f87a73842601cd376e0f5b9b25831390f4 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 16 Aug 2017 19:02:51 +0200 Subject: iio: magnetometer: st_magn: remove ihl property for LSM303AGR Remove IRQ active low support for LSM303AGR since the sensor does not support that capability for data-ready line Fixes: a9fd053b56c6 (iio: st_sensors: support active-low interrupts) Signed-off-by: Lorenzo Bianconi Reviewed-by: Linus Walleij Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/st_magn_core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index c11f0da86e74..c38563699984 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -356,8 +356,6 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = { .drdy_irq = { .addr = 0x62, .mask_int1 = 0x01, - .addr_ihl = 0x63, - .mask_ihl = 0x04, .addr_stat_drdy = 0x67, }, .multi_read_bit = false, -- cgit From d912366a59c5384df436fd007667d6e574128b44 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 20 Aug 2017 09:29:03 -0700 Subject: Input: soc_button_array - silence -ENOENT error on Dell XPS13 9365 The Dell XPS13 9365 has an INT33D2 ACPI node with no GPIOs, causing the following error in dmesg: [ 7.172275] soc_button_array: probe of INT33D2:00 failed with error -2 This commit silences this, by returning -ENODEV when there are no GPIOs. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=196679 Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/misc/soc_button_array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index f600f3a7a3c6..23520df7650f 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -331,7 +331,7 @@ static int soc_button_probe(struct platform_device *pdev) error = gpiod_count(dev, NULL); if (error < 0) { dev_dbg(dev, "no GPIO attached, ignoring...\n"); - return error; + return -ENODEV; } priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); -- cgit From 197e7e521384a23b9e585178f3f11c9fa08274b9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Aug 2017 13:26:27 -0700 Subject: Sanitize 'move_pages()' permission checks The 'move_paghes()' system call was introduced long long ago with the same permission checks as for sending a signal (except using CAP_SYS_NICE instead of CAP_SYS_KILL for the overriding capability). That turns out to not be a great choice - while the system call really only moves physical page allocations around (and you need other capabilities to do a lot of it), you can check the return value to map out some the virtual address choices and defeat ASLR of a binary that still shares your uid. So change the access checks to the more common 'ptrace_may_access()' model instead. This tightens the access checks for the uid, and also effectively changes the CAP_SYS_NICE check to CAP_SYS_PTRACE, but it's unlikely that anybody really _uses_ this legacy system call any more (we hav ebetter NUMA placement models these days), so I expect nobody to notice. Famous last words. Reported-by: Otto Ebeling Acked-by: Eric W. Biederman Cc: Willy Tarreau Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- mm/migrate.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index d68a41da6abb..e84eeb4e4356 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -1652,7 +1653,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, const int __user *, nodes, int __user *, status, int, flags) { - const struct cred *cred = current_cred(), *tcred; struct task_struct *task; struct mm_struct *mm; int err; @@ -1676,14 +1676,9 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, /* * Check if this process has the right to modify the specified - * process. The right exists if the process has administrative - * capabilities, superuser privileges or the same - * userid as the target process. + * process. Use the regular "ptrace_may_access()" checks. */ - tcred = __task_cred(task); - if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) && - !uid_eq(cred->uid, tcred->suid) && !uid_eq(cred->uid, tcred->uid) && - !capable(CAP_SYS_NICE)) { + if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { rcu_read_unlock(); err = -EPERM; goto out; -- cgit From 14ccee78fc82f5512908f4424f541549a5705b89 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Aug 2017 14:13:52 -0700 Subject: Linux 4.13-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 90da7bdc3f45..235826f95741 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit From 8c97023cf0518f172b8cb7a9fffc28b89401abbf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 26 Jul 2017 15:36:23 +0200 Subject: Kbuild: use -fshort-wchar globally Commit 971a69db7dc0 ("Xen: don't warn about 2-byte wchar_t in efi") added the --no-wchar-size-warning to the Makefile to avoid this harmless warning: arm-linux-gnueabi-ld: warning: drivers/xen/efi.o uses 2-byte wchar_t yet the output is to use 4-byte wchar_t; use of wchar_t values across objects may fail Changing kbuild to use thin archives instead of recursive linking unfortunately brings the same warning back during the final link. The kernel does not use wchar_t string literals at this point, and xen does not use wchar_t at all (only efi_char16_t), so the flag has no effect, but as pointed out by Jan Beulich, adding a wchar_t string literal would be bad here. Since wchar_t is always defined as u16, independent of the toolchain default, always passing -fshort-wchar is correct and lets us remove the Xen specific hack along with fixing the warning. Link: https://patchwork.kernel.org/patch/9275217/ Fixes: 971a69db7dc0 ("Xen: don't warn about 2-byte wchar_t in efi") Signed-off-by: Arnd Bergmann Acked-by: David Vrabel Signed-off-by: Masahiro Yamada --- Makefile | 2 +- drivers/xen/Makefile | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 473f1aa76f6d..4dac1107913b 100644 --- a/Makefile +++ b/Makefile @@ -396,7 +396,7 @@ LINUXINCLUDE := \ KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -fno-common \ + -fno-strict-aliasing -fno-common -fshort-wchar \ -Werror-implicit-function-declaration \ -Wno-format-security \ -std=gnu89 $(call cc-option,-fno-PIE) diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 8feab810aed9..7f188b8d0c67 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -7,9 +7,6 @@ obj-y += xenbus/ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_features.o := $(nostackp) -CFLAGS_efi.o += -fshort-wchar -LDFLAGS += $(call ld-option, --no-wchar-size-warning) - dom0-$(CONFIG_ARM64) += arm-device.o dom0-$(CONFIG_PCI) += pci.o dom0-$(CONFIG_USB_SUPPORT) += dbgp.o -- cgit From 801d2e9f1cfb24c15ec821288b6bc2e3107f6fcd Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 18 Aug 2017 15:54:41 -0600 Subject: Makefile: add kselftest-clean to PHONY target list kselftest-clean isn't in the PHONY target list. Add it. Signed-off-by: Shuah Khan Signed-off-by: Masahiro Yamada --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 4dac1107913b..c913d80ea6e8 100644 --- a/Makefile +++ b/Makefile @@ -1184,6 +1184,7 @@ PHONY += kselftest kselftest: $(Q)$(MAKE) -C tools/testing/selftests run_tests +PHONY += kselftest-clean kselftest-clean: $(Q)$(MAKE) -C tools/testing/selftests clean -- cgit From 2bfbe7881ee03d12bbedb05f804d139c88df30a0 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 19 Aug 2017 22:30:02 +0100 Subject: kbuild: Do not use hyphen in exported variable name This definition in Makefile.dtbinst: export dtbinst-root ?= $(obj) should define and export dtbinst-root when handling the root dts directory, and do nothing in the subdirectories. However some shells, including dash, will not pass through environment variables whose name includes a hyphen. Usually GNU make does not use a shell to recurse, but if e.g. $(srctree) contains '~' it will use a shell here. Rename the variable to dtbinst_root. References: https://bugs.debian.org/833561 Fixes: 323a028d39cdi ("dts, kbuild: Implement support for dtb vendor subdirs") Signed-off-by: Ben Hutchings Signed-off-by: Masahiro Yamada --- scripts/Makefile.dtbinst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst index 34614a48b717..993fb85982df 100644 --- a/scripts/Makefile.dtbinst +++ b/scripts/Makefile.dtbinst @@ -14,7 +14,7 @@ src := $(obj) PHONY := __dtbs_install __dtbs_install: -export dtbinst-root ?= $(obj) +export dtbinst_root ?= $(obj) include include/config/auto.conf include scripts/Kbuild.include @@ -27,7 +27,7 @@ dtbinst-dirs := $(dts-dirs) quiet_cmd_dtb_install = INSTALL $< cmd_dtb_install = mkdir -p $(2); cp $< $(2) -install-dir = $(patsubst $(dtbinst-root)%,$(INSTALL_DTBS_PATH)%,$(obj)) +install-dir = $(patsubst $(dtbinst_root)%,$(INSTALL_DTBS_PATH)%,$(obj)) $(dtbinst-files): %.dtb: $(obj)/%.dtb $(call cmd,dtb_install,$(install-dir)) -- cgit From d4dd2d75a26ef07cadc2949efeea9fabc2a5c299 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 21 Aug 2017 00:26:03 +0200 Subject: bpf, doc: also add s390x as arch to sysctl description Looks like this was accidentally missed, so still add s390x as supported eBPF JIT arch to bpf_jit_enable. Fixes: 014cd0a368dc ("bpf: Update sysctl documentation to list all supported architectures") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- Documentation/sysctl/net.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index d7c2b88b92ae..28596e03220b 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -49,6 +49,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on: - ppc64 - sparc64 - mips64 + - s390x And the older cBPF JIT supported on the following archs: - arm -- cgit From 5a78449810b06c3bc5fcd002d52e1a64f9bb397e Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Mon, 21 Aug 2017 08:52:54 +1200 Subject: switchdev: documentation: minor typo fixes Two typos in switchdev.txt Signed-off-by: Chris Packham Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 3e7b946dea27..5e40e1f68873 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -228,7 +228,7 @@ Learning on the device port should be enabled, as well as learning_sync: bridge link set dev DEV learning on self bridge link set dev DEV learning_sync on self -Learning_sync attribute enables syncing of the learned/forgotton FDB entry to +Learning_sync attribute enables syncing of the learned/forgotten FDB entry to the bridge's FDB. It's possible, but not optimal, to enable learning on the device port and on the bridge port, and disable learning_sync. @@ -245,7 +245,7 @@ the responsibility of the port driver/device to age out these entries. If the port device supports ageing, when the FDB entry expires, it will notify the driver which in turn will notify the bridge with SWITCHDEV_FDB_DEL. If the device does not support ageing, the driver can simulate ageing using a -garbage collection timer to monitor FBD entries. Expired entries will be +garbage collection timer to monitor FDB entries. Expired entries will be notified to the bridge using SWITCHDEV_FDB_DEL. See rocker driver for example of driver running ageing timer. -- cgit From 49bf4b36fdee4db3c8bc0507a8413a93a8c711cf Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Sun, 20 Aug 2017 21:48:14 +0200 Subject: tools lib bpf: improve warning Signed-off-by: Eric Leblond Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/lib/bpf/libbpf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1a2c07eb7795..8c67a90dbd82 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -879,7 +879,8 @@ bpf_object__create_maps(struct bpf_object *obj) size_t j; int err = *pfd; - pr_warning("failed to create map: %s\n", + pr_warning("failed to create map (name: '%s'): %s\n", + obj->maps[i].name, strerror(errno)); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); -- cgit From 68a66d149a8c78ec6720f268597302883e48e9fa Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sat, 19 Aug 2017 15:37:07 +0300 Subject: net_sched: fix order of queue length updates in qdisc_replace() This important to call qdisc_tree_reduce_backlog() after changing queue length. Parent qdisc should deactivate class in ->qlen_notify() called from qdisc_tree_reduce_backlog() but this happens only if qdisc->q.qlen in zero. Missed class deactivations leads to crashes/warnings at picking packets from empty qdisc and corrupting state at reactivating this class in future. Signed-off-by: Konstantin Khlebnikov Fixes: 86a7996cc8a0 ("net_sched: introduce qdisc_replace() helper") Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1c123e2b2415..67f815e5d525 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -806,8 +806,11 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, old = *pold; *pold = new; if (old != NULL) { - qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog); + unsigned int qlen = old->q.qlen; + unsigned int backlog = old->qstats.backlog; + qdisc_reset(old); + qdisc_tree_reduce_backlog(old, qlen, backlog); } sch_tree_unlock(sch); -- cgit From 348a4002729ccab8b888b38cbc099efa2f2a2036 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 18 Aug 2017 17:14:49 -0700 Subject: ipv6: repair fib6 tree in failure case In fib6_add(), it is possible that fib6_add_1() picks an intermediate node and sets the node's fn->leaf to NULL in order to add this new route. However, if fib6_add_rt2node() fails to add the new route for some reason, fn->leaf will be left as NULL and could potentially cause crash when fn->leaf is accessed in fib6_locate(). This patch makes sure fib6_repair_tree() is called to properly repair fn->leaf in the above failure case. Here is the syzkaller reported general protection fault in fib6_locate: kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 0 PID: 40937 Comm: syz-executor3 Not tainted Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801d7d64100 ti: ffff8801d01a0000 task.ti: ffff8801d01a0000 RIP: 0010:[] [] __ipv6_prefix_equal64_half include/net/ipv6.h:475 [inline] RIP: 0010:[] [] ipv6_prefix_equal include/net/ipv6.h:492 [inline] RIP: 0010:[] [] fib6_locate_1 net/ipv6/ip6_fib.c:1210 [inline] RIP: 0010:[] [] fib6_locate+0x281/0x3c0 net/ipv6/ip6_fib.c:1233 RSP: 0018:ffff8801d01a36a8 EFLAGS: 00010202 RAX: 0000000000000020 RBX: ffff8801bc790e00 RCX: ffffc90002983000 RDX: 0000000000001219 RSI: ffff8801d01a37a0 RDI: 0000000000000100 RBP: ffff8801d01a36f0 R08: 00000000000000ff R09: 0000000000000000 R10: 0000000000000003 R11: 0000000000000000 R12: 0000000000000001 R13: dffffc0000000000 R14: ffff8801d01a37a0 R15: 0000000000000000 FS: 00007f6afd68c700(0000) GS:ffff8801db400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004c6340 CR3: 00000000ba41f000 CR4: 00000000001426f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Stack: ffff8801d01a37a8 ffff8801d01a3780 ffffed003a0346f5 0000000c82a23ea0 ffff8800b7bd7700 ffff8801d01a3780 ffff8800b6a1c940 ffffffff82a23ea0 ffff8801d01a3920 ffff8801d01a3748 ffffffff82a223d6 ffff8801d7d64988 Call Trace: [] ip6_route_del+0x106/0x570 net/ipv6/route.c:2109 [] inet6_rtm_delroute+0xfd/0x100 net/ipv6/route.c:3075 [] rtnetlink_rcv_msg+0x549/0x7a0 net/core/rtnetlink.c:3450 [] netlink_rcv_skb+0x141/0x370 net/netlink/af_netlink.c:2281 [] rtnetlink_rcv+0x2f/0x40 net/core/rtnetlink.c:3456 [] netlink_unicast_kernel net/netlink/af_netlink.c:1206 [inline] [] netlink_unicast+0x518/0x750 net/netlink/af_netlink.c:1232 [] netlink_sendmsg+0x8ce/0xc30 net/netlink/af_netlink.c:1778 [] sock_sendmsg_nosec net/socket.c:609 [inline] [] sock_sendmsg+0xcf/0x110 net/socket.c:619 [] sock_write_iter+0x222/0x3a0 net/socket.c:834 [] new_sync_write+0x1dd/0x2b0 fs/read_write.c:478 [] __vfs_write+0xe4/0x110 fs/read_write.c:491 [] vfs_write+0x178/0x4b0 fs/read_write.c:538 [] SYSC_write fs/read_write.c:585 [inline] [] SyS_write+0xd9/0x1b0 fs/read_write.c:577 [] entry_SYSCALL_64_fastpath+0x12/0x17 Note: there is no "Fixes" tag as this seems to be a bug introduced very early. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e6f878067c68..5cc0ea038198 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1018,7 +1018,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, /* Create subtree root node */ sfn = node_alloc(); if (!sfn) - goto st_failure; + goto failure; sfn->leaf = info->nl_net->ipv6.ip6_null_entry; atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); @@ -1035,12 +1035,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (IS_ERR(sn)) { /* If it is failed, discard just allocated - root, and then (in st_failure) stale node + root, and then (in failure) stale node in main tree. */ node_free(sfn); err = PTR_ERR(sn); - goto st_failure; + goto failure; } /* Now link new subtree to main tree */ @@ -1055,7 +1055,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (IS_ERR(sn)) { err = PTR_ERR(sn); - goto st_failure; + goto failure; } } @@ -1096,18 +1096,17 @@ out: atomic_inc(&pn->leaf->rt6i_ref); } #endif - /* Always release dst as dst->__refcnt is guaranteed - * to be taken before entering this function - */ - dst_release_immediate(&rt->dst); + goto failure; } return err; -#ifdef CONFIG_IPV6_SUBTREES - /* Subtree creation failed, probably main tree node - is orphan. If it is, shoot it. +failure: + /* fn->leaf could be NULL if fn is an intermediate node and we + * failed to add the new route to it in both subtree creation + * failure and fib6_add_rt2node() failure case. + * In both cases, fib6_repair_tree() should be called to fix + * fn->leaf. */ -st_failure: if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) fib6_repair_tree(info->nl_net, fn); /* Always release dst as dst->__refcnt is guaranteed @@ -1115,7 +1114,6 @@ st_failure: */ dst_release_immediate(&rt->dst); return err; -#endif } /* -- cgit From 4459398b6d9e3055ced5de9820364b3bdd79ac25 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Mon, 31 Jul 2017 10:01:36 +0530 Subject: soc: ti: knav: Add a NULL pointer check for kdev in knav_pool_create knav_pool_create is an exported function. In the event of a call before knav_queue_probe, we encounter a NULL pointer dereference in the following line. Hence return -EPROBE_DEFER to the caller till the kdev pointer is non-NULL. Signed-off-by: Keerthy Acked-by: Santosh Shilimkar Signed-off-by: Arnd Bergmann --- drivers/soc/ti/knav_qmss_queue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c index 279e7c5551dd..39225de9d7f1 100644 --- a/drivers/soc/ti/knav_qmss_queue.c +++ b/drivers/soc/ti/knav_qmss_queue.c @@ -745,6 +745,9 @@ void *knav_pool_create(const char *name, bool slot_found; int ret; + if (!kdev) + return ERR_PTR(-EPROBE_DEFER); + if (!kdev->dev) return ERR_PTR(-ENODEV); -- cgit From a93c11527528c951b8d8db638162128a09e09ec2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 17 Aug 2017 13:55:41 +0300 Subject: drm/i915/bxt: use NULL for GPIO connection ID The commit 213e08ad60ba ("drm/i915/bxt: add bxt dsi gpio element support") enables GPIO support for Broxton based platforms. While using that API we might get into troubles in the future, because we can't rely on label name in the driver since vendor firmware might provide any GPIO pin there, e.g. "reset", and even mark it in _DSD (in which case the request will fail). To avoid inconsistency and potential issues we have two options: a) generate GPIO ACPI mapping table and supply it via acpi_dev_add_driver_gpios(), or b) just pass NULL as connection ID. The b) approach is much simpler and would work since the driver relies on GPIO indices only. Moreover, the _CRS fallback mechanism, when requesting GPIO, has been made stricter, and supplying non-NULL connection ID when neither _DSD, nor GPIO ACPI mapping is present, is making request fail. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101921 Fixes: f10e4bf6632b ("gpio: acpi: Even more tighten up ACPI GPIO lookups") Cc: Mika Kahola Cc: Jani Nikula Tested-by: Mika Kahola Signed-off-by: Andy Shevchenko Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20170817105541.63914-1-andriy.shevchenko@linux.intel.com (cherry picked from commit cd55a1fbd21a820b7dd85a208b3170aa0b06adfa) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dsi_vbt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 7158c7ce9c09..91c07b0c8db9 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -306,7 +306,7 @@ static void bxt_exec_gpio(struct drm_i915_private *dev_priv, if (!gpio_desc) { gpio_desc = devm_gpiod_get_index(dev_priv->drm.dev, - "panel", gpio_index, + NULL, gpio_index, value ? GPIOD_OUT_LOW : GPIOD_OUT_HIGH); -- cgit From d41a3c2be178783c85e05025265ab58fbb4d4ce1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Aug 2017 13:19:19 +0100 Subject: drm/i915: Clear lost context-switch interrupts across reset During a global reset, we disable the irq. As we disable the irq, the hardware may be raising a GT interrupt that we then ignore, leaving it pending in the GTIIR. After the reset, we then re-enable the irq, triggering the pending interrupt. However, that interrupt was for the stale state from before the reset, and the contents of the CSB buffer are now invalid. v2: Add a comment to make it clear that the double clear is purely my paranoia. Reported-by: "Dong, Chuanxiao" Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests") Signed-off-by: Chris Wilson Cc: "Dong, Chuanxiao" Cc: Tvrtko Ursulin Cc: Michal Winiarski Cc: Michel Thierry Link: https://patchwork.freedesktop.org/patch/msgid/20170807121919.30165-1-chris@chris-wilson.co.uk Link: https://patchwork.freedesktop.org/patch/msgid/20170818090509.5363-1-chris@chris-wilson.co.uk Reviewed-by: Michel Thierry (cherry picked from commit 64f09f00caf0a7cb40a8c0b85789bacba0f51d9e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lrc.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7404cf2aac28..2afa4daa88e8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1221,6 +1221,14 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } +static u8 gtiir[] = { + [RCS] = 0, + [BCS] = 0, + [VCS] = 1, + [VCS2] = 1, + [VECS] = 3, +}; + static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1245,9 +1253,22 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name); - /* After a GPU reset, we may have requests to replay */ + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); + + /* + * Clear any pending interrupt state. + * + * We do it twice out of paranoia that some of the IIR are double + * buffered, and if we only reset it once there may still be + * an interrupt pending. + */ + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + /* After a GPU reset, we may have requests to replay */ submit = false; for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { if (!port_isset(&port[n])) -- cgit From 03619844d81d7459874e88034f0f36d959f0e2df Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 25 Jul 2017 18:44:04 +0200 Subject: rtc: ds1307: fix regmap config Current max_register setting breaks reading nvram on certain chips and also reading the standard registers on RX8130 where register map starts at 0x10. Signed-off-by: Heiner Kallweit Fixes: 11e5890b5342 "rtc: ds1307: convert driver to regmap" Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1307.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 4fac49e55d47..4b43aa62fbc7 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -1301,7 +1301,6 @@ static void ds1307_clks_register(struct ds1307 *ds1307) static const struct regmap_config regmap_config = { .reg_bits = 8, .val_bits = 8, - .max_register = 0x12, }; static int ds1307_probe(struct i2c_client *client, -- cgit From d83c2dbaa90a9bd6346e234d9802080a9c7b2fea Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Fri, 18 Aug 2017 09:16:08 +0800 Subject: mmc: block: prevent propagating R1_OUT_OF_RANGE for open-ending mode We to some extent should tolerate R1_OUT_OF_RANGE for open-ending mode as it is expected behaviour and most of the backup partition tables should be located near some of the last blocks which will always make open-ending read exceed the capacity of cards. Fixes: 9820a5b11101 ("mmc: core: for data errors, take response of stop cmd into account") Fixes: a04e6bae9e6f ("mmc: core: check also R1 response for stop commands") Signed-off-by: Shawn Lin Reviewed-by: Wolfram Sang Tested-by: Shawn Guo Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 49 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index f1bbfd389367..80d1ec693d2d 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1371,12 +1371,46 @@ static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq, R1_CC_ERROR | /* Card controller error */ \ R1_ERROR) /* General/unknown error */ -static bool mmc_blk_has_cmd_err(struct mmc_command *cmd) +static void mmc_blk_eval_resp_error(struct mmc_blk_request *brq) { - if (!cmd->error && cmd->resp[0] & CMD_ERRORS) - cmd->error = -EIO; + u32 val; - return cmd->error; + /* + * Per the SD specification(physical layer version 4.10)[1], + * section 4.3.3, it explicitly states that "When the last + * block of user area is read using CMD18, the host should + * ignore OUT_OF_RANGE error that may occur even the sequence + * is correct". And JESD84-B51 for eMMC also has a similar + * statement on section 6.8.3. + * + * Multiple block read/write could be done by either predefined + * method, namely CMD23, or open-ending mode. For open-ending mode, + * we should ignore the OUT_OF_RANGE error as it's normal behaviour. + * + * However the spec[1] doesn't tell us whether we should also + * ignore that for predefined method. But per the spec[1], section + * 4.15 Set Block Count Command, it says"If illegal block count + * is set, out of range error will be indicated during read/write + * operation (For example, data transfer is stopped at user area + * boundary)." In another word, we could expect a out of range error + * in the response for the following CMD18/25. And if argument of + * CMD23 + the argument of CMD18/25 exceed the max number of blocks, + * we could also expect to get a -ETIMEDOUT or any error number from + * the host drivers due to missing data response(for write)/data(for + * read), as the cards will stop the data transfer by itself per the + * spec. So we only need to check R1_OUT_OF_RANGE for open-ending mode. + */ + + if (!brq->stop.error) { + bool oor_with_open_end; + /* If there is no error yet, check R1 response */ + + val = brq->stop.resp[0] & CMD_ERRORS; + oor_with_open_end = val & R1_OUT_OF_RANGE && !brq->mrq.sbc; + + if (val && !oor_with_open_end) + brq->stop.error = -EIO; + } } static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card, @@ -1400,8 +1434,11 @@ static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card, * stop.error indicates a problem with the stop command. Data * may have been transferred, or may still be transferring. */ - if (brq->sbc.error || brq->cmd.error || mmc_blk_has_cmd_err(&brq->stop) || - brq->data.error) { + + mmc_blk_eval_resp_error(brq); + + if (brq->sbc.error || brq->cmd.error || + brq->stop.error || brq->data.error) { switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err, &gen_err)) { case ERR_RETRY: return MMC_BLK_RETRY; -- cgit From 4a4eefcd0e49f9f339933324c1bde431186a0a7d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 3 Aug 2017 13:05:11 +0200 Subject: KVM: s390: sthyi: fix sthyi inline assembly The sthyi inline assembly misses register r3 within the clobber list. The sthyi instruction will always write a return code to register "R2+1", which in this case would be r3. Due to that we may have register corruption and see host crashes or data corruption depending on how gcc decided to allocate and use registers during compile time. Fixes: 95ca2cb57985 ("KVM: s390: Add sthyi emulation") Cc: # 4.8+ Reviewed-by: Janosch Frank Signed-off-by: Heiko Carstens Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/sthyi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 926b5244263e..2773a2f6a5c4 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -394,7 +394,7 @@ static int sthyi(u64 vaddr) "srl %[cc],28\n" : [cc] "=d" (cc) : [code] "d" (code), [addr] "a" (addr) - : "memory", "cc"); + : "3", "memory", "cc"); return cc; } -- cgit From 857b8de96795646c5891cf44ae6fb19b9ff74bf9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 3 Aug 2017 14:27:30 +0200 Subject: KVM: s390: sthyi: fix specification exception detection sthyi should only generate a specification exception if the function code is zero and the response buffer is not on a 4k boundary. The current code would also test for unknown function codes if the response buffer, that is currently only defined for function code 0, is not on a 4k boundary and incorrectly inject a specification exception instead of returning with condition code 3 and return code 4 (unsupported function code). Fix this by moving the boundary check. Fixes: 95ca2cb57985 ("KVM: s390: Add sthyi emulation") Cc: # 4.8+ Reviewed-by: Janosch Frank Signed-off-by: Heiko Carstens Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/sthyi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 2773a2f6a5c4..a2e5c24f47a7 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -425,7 +425,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); trace_kvm_s390_handle_sthyi(vcpu, code, addr); - if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK) + if (reg1 == reg2 || reg1 & 1 || reg2 & 1) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); if (code & 0xffff) { @@ -433,6 +433,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu) goto out; } + if (addr & ~PAGE_MASK) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + /* * If the page has not yet been faulted in, we want to do that * now and not after all the expensive calculations. -- cgit From deecd4d71b12626db48544faa66bb897e2cafd07 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 27 Jul 2017 15:56:55 -0500 Subject: objtool: Fix '-mtune=atom' decoding support in objtool 2.0 With '-mtune=atom', which is enabled with CONFIG_MATOM=y, GCC uses some unusual instructions for setting up the stack. Instead of: mov %rsp, %rbp it does: lea (%rsp), %rbp And instead of: add imm, %rsp it does: lea disp(%rsp), %rsp Add support for these instructions to the objtool decoder. Reported-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0") Link: http://lkml.kernel.org/r/4ea1db896e821226efe1f8e09f270771bde47e65.1501188854.git.jpoimboe@redhat.com [ This is a cherry-picked version of upcoming commit 5b8de48e82ba. ] Signed-off-by: Ingo Molnar --- tools/objtool/arch/x86/decode.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index a36c2eba64e7..4559a21a8de2 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -271,7 +271,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, case 0x8d: if (rex == 0x48 && modrm == 0x65) { - /* lea -disp(%rbp), %rsp */ + /* lea disp(%rbp), %rsp */ *type = INSN_STACK; op->src.type = OP_SRC_ADD; op->src.reg = CFI_BP; @@ -281,6 +281,30 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, break; } + if (rex == 0x48 && (modrm == 0xa4 || modrm == 0x64) && + sib == 0x24) { + + /* lea disp(%rsp), %rsp */ + *type = INSN_STACK; + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_SP; + op->src.offset = insn.displacement.value; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + break; + } + + if (rex == 0x48 && modrm == 0x2c && sib == 0x24) { + + /* lea (%rsp), %rbp */ + *type = INSN_STACK; + op->src.type = OP_SRC_REG; + op->src.reg = CFI_SP; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_BP; + break; + } + if (rex == 0x4c && modrm == 0x54 && sib == 0x24 && insn.displacement.value == 8) { -- cgit From dd1c1f2f2028a7b851f701fc6a8ebe39dcb95e7c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Aug 2017 17:35:02 +0200 Subject: pids: make task_tgid_nr_ns() safe This was reported many times, and this was even mentioned in commit 52ee2dfdd4f5 ("pids: refactor vnr/nr_ns helpers to make them safe") but somehow nobody bothered to fix the obvious problem: task_tgid_nr_ns() is not safe because task->group_leader points to nowhere after the exiting task passes exit_notify(), rcu_read_lock() can not help. We really need to change __unhash_process() to nullify group_leader, parent, and real_parent, but this needs some cleanups. Until then we can turn task_tgid_nr_ns() into another user of __task_pid_nr_ns() and fix the problem. Reported-by: Troy Kensinger Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/pid.h | 4 +++- include/linux/sched.h | 51 +++++++++++++++++++++++++++------------------------ kernel/pid.c | 11 ++++------- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/include/linux/pid.h b/include/linux/pid.h index 4d179316e431..719582744a2e 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -8,7 +8,9 @@ enum pid_type PIDTYPE_PID, PIDTYPE_PGID, PIDTYPE_SID, - PIDTYPE_MAX + PIDTYPE_MAX, + /* only valid to __task_pid_nr_ns() */ + __PIDTYPE_TGID }; /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 8337e2db0bb2..c05ac5f5aa03 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1163,13 +1163,6 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk) return tsk->tgid; } -extern pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); - -static inline pid_t task_tgid_vnr(struct task_struct *tsk) -{ - return pid_vnr(task_tgid(tsk)); -} - /** * pid_alive - check that a task structure is not stale * @p: Task structure to be checked. @@ -1185,23 +1178,6 @@ static inline int pid_alive(const struct task_struct *p) return p->pids[PIDTYPE_PID].pid != NULL; } -static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) -{ - pid_t pid = 0; - - rcu_read_lock(); - if (pid_alive(tsk)) - pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); - rcu_read_unlock(); - - return pid; -} - -static inline pid_t task_ppid_nr(const struct task_struct *tsk) -{ - return task_ppid_nr_ns(tsk, &init_pid_ns); -} - static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); @@ -1223,6 +1199,33 @@ static inline pid_t task_session_vnr(struct task_struct *tsk) return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } +static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns); +} + +static inline pid_t task_tgid_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL); +} + +static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) +{ + pid_t pid = 0; + + rcu_read_lock(); + if (pid_alive(tsk)) + pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); + rcu_read_unlock(); + + return pid; +} + +static inline pid_t task_ppid_nr(const struct task_struct *tsk) +{ + return task_ppid_nr_ns(tsk, &init_pid_ns); +} + /* Obsolete, do not use: */ static inline pid_t task_pgrp_nr(struct task_struct *tsk) { diff --git a/kernel/pid.c b/kernel/pid.c index c69c30d827e5..020dedbdf066 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -527,8 +527,11 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, if (!ns) ns = task_active_pid_ns(current); if (likely(pid_alive(task))) { - if (type != PIDTYPE_PID) + if (type != PIDTYPE_PID) { + if (type == __PIDTYPE_TGID) + type = PIDTYPE_PID; task = task->group_leader; + } nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns); } rcu_read_unlock(); @@ -537,12 +540,6 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, } EXPORT_SYMBOL(__task_pid_nr_ns); -pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return pid_nr_ns(task_tgid(tsk), ns); -} -EXPORT_SYMBOL(task_tgid_nr_ns); - struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) { return ns_of_pid(task_pid(tsk)); -- cgit From 2dc77533f1e495788d73ffa4bee4323b2646d2bb Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sun, 13 Aug 2017 23:14:58 +0200 Subject: sparc: kernel/pcic: silence gcc 7.x warning in pcibios_fixup_bus() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building the kernel for Sparc using gcc 7.x, the build fails with: arch/sparc/kernel/pcic.c: In function ‘pcibios_fixup_bus’: arch/sparc/kernel/pcic.c:647:8: error: ‘cmd’ may be used uninitialized in this function [-Werror=maybe-uninitialized] cmd |= PCI_COMMAND_IO; ^~ The simplified code looks like this: unsigned int cmd; [...] pcic_read_config(dev->bus, dev->devfn, PCI_COMMAND, 2, &cmd); [...] cmd |= PCI_COMMAND_IO; I.e, the code assumes that pcic_read_config() will always initialize cmd. But it's not the case. Looking at pcic_read_config(), if bus->number is != 0 or if the size is not one of 1, 2 or 4, *val will not be initialized. As a simple fix, we initialize cmd to zero at the beginning of pcibios_fixup_bus. Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller --- arch/sparc/kernel/pcic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index a38787b84322..732af9a9f6dd 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -602,7 +602,7 @@ void pcibios_fixup_bus(struct pci_bus *bus) { struct pci_dev *dev; int i, has_io, has_mem; - unsigned int cmd; + unsigned int cmd = 0; struct linux_pcic *pcic; /* struct linux_pbm_info* pbm = &pcic->pbm; */ int node; -- cgit From 0f0fc5c090055983163ed8e7b3a73980e128048c Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 15 Aug 2017 09:30:05 +0100 Subject: Revert "mfd: da9061: Fix to remove BBAT_CONT register from chip model" This patch was applied to the MFD twice, causing unwanted behavour. This reverts commit b77eb79acca3203883e8d8dbc7f2b842def1bff8. Fixes: b77eb79acca3 ("mfd: da9061: Fix to remove BBAT_CONT register from chip model") Reported-by: Steve Twiss Reviewed-by: Steve Twiss Signed-off-by: Lee Jones --- drivers/mfd/da9062-core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mfd/da9062-core.c b/drivers/mfd/da9062-core.c index fbe0f245ce8e..fe1811523e4a 100644 --- a/drivers/mfd/da9062-core.c +++ b/drivers/mfd/da9062-core.c @@ -644,6 +644,9 @@ static const struct regmap_range da9062_aa_readable_ranges[] = { }, { .range_min = DA9062AA_VLDO1_B, .range_max = DA9062AA_VLDO4_B, + }, { + .range_min = DA9062AA_BBAT_CONT, + .range_max = DA9062AA_BBAT_CONT, }, { .range_min = DA9062AA_INTERFACE, .range_max = DA9062AA_CONFIG_E, @@ -720,6 +723,9 @@ static const struct regmap_range da9062_aa_writeable_ranges[] = { }, { .range_min = DA9062AA_VLDO1_B, .range_max = DA9062AA_VLDO4_B, + }, { + .range_min = DA9062AA_BBAT_CONT, + .range_max = DA9062AA_BBAT_CONT, }, { .range_min = DA9062AA_GP_ID_0, .range_max = DA9062AA_GP_ID_19, -- cgit From 07b3b5e9ed807a0d2077319b8e43a42e941db818 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 22 Aug 2017 08:33:53 +0200 Subject: ALSA: usb-audio: Add delay quirk for H650e/Jabra 550a USB headsets These headsets reports a lot of: cannot set freq 44100 to ep 0x81 and need a small delay between sample rate settings, just like Zoom R16/24. Add both headsets to the Zoom R16/24 quirk for a 1 ms delay between control msgs. Signed-off-by: Joakim Tjernlund Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 6a03f9697039..5d2a63248b1d 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1309,10 +1309,13 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) mdelay(20); - /* Zoom R16/24 needs a tiny delay here, otherwise requests like - * get/set frequency return as failed despite actually succeeding. + /* Zoom R16/24, Logitech H650e, Jabra 550a needs a tiny delay here, + * otherwise requests like get/set frequency return as failed despite + * actually succeeding. */ - if (chip->usb_id == USB_ID(0x1686, 0x00dd) && + if ((chip->usb_id == USB_ID(0x1686, 0x00dd) || + chip->usb_id == USB_ID(0x046d, 0x0a46) || + chip->usb_id == USB_ID(0x0b0e, 0x0349)) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) mdelay(1); } -- cgit From fe4600a548f2763dec91b3b27a1245c370ceee2a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 19 Aug 2017 13:05:58 +0100 Subject: drm: Release driver tracking before making the object available again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the same bug as we fixed in commit f6cd7daecff5 ("drm: Release driver references to handle before making it available again"), but now the exposure is via the PRIME lookup tables. If we remove the object/handle from the PRIME lut, then a new request for the same object/fd will generate a new handle, thus for a short window that object is known to userspace by two different handles. Fix this by releasing the driver tracking before PRIME. Fixes: 0ff926c7d4f0 ("drm/prime: add exported buffers to current fprivs imported buffer list (v2)") Signed-off-by: Chris Wilson Cc: David Airlie Cc: Daniel Vetter Cc: Rob Clark Cc: Ville Syrjälä Cc: Thierry Reding Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20170819120558.6465-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/drm_gem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 8dc11064253d..cdaac37907b1 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -255,13 +255,13 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) struct drm_gem_object *obj = ptr; struct drm_device *dev = obj->dev; + if (dev->driver->gem_close_object) + dev->driver->gem_close_object(obj, file_priv); + if (drm_core_check_feature(dev, DRIVER_PRIME)) drm_gem_remove_prime_handles(obj, file_priv); drm_vma_node_revoke(&obj->vma_node, file_priv); - if (dev->driver->gem_close_object) - dev->driver->gem_close_object(obj, file_priv); - drm_gem_object_handle_put_unlocked(obj); return 0; -- cgit From 88c54cdf61f508ebcf8da2d819f5dfc03e954d1d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Aug 2017 08:15:13 +0200 Subject: ALSA: core: Fix unexpected error at replacing user TLV When user tries to replace the user-defined control TLV, the kernel checks the change of its content via memcmp(). The problem is that the kernel passes the return value from memcmp() as is. memcmp() gives a non-zero negative value depending on the comparison result, and this shall be recognized as an error code. The patch covers that corner-case, return 1 properly for the changed TLV. Fixes: 8aa9b586e420 ("[ALSA] Control API - more robust TLV implementation") Cc: Signed-off-by: Takashi Iwai --- sound/core/control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/control.c b/sound/core/control.c index 3c6be1452e35..4525e127afd9 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1137,7 +1137,7 @@ static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol, mutex_lock(&ue->card->user_ctl_lock); change = ue->tlv_data_size != size; if (!change) - change = memcmp(ue->tlv_data, new_data, size); + change = memcmp(ue->tlv_data, new_data, size) != 0; kfree(ue->tlv_data); ue->tlv_data = new_data; ue->tlv_data_size = size; -- cgit From 096622104e14d8a1db4860bd557717067a0515d2 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 18 Aug 2017 16:57:01 +0100 Subject: arm64: fpsimd: Prevent registers leaking across exec There are some tricky dependencies between the different stages of flushing the FPSIMD register state during exec, and these can race with context switch in ways that can cause the old task's regs to leak across. In particular, a context switch during the memset() can cause some of the task's old FPSIMD registers to reappear. Disabling preemption for this small window would be no big deal for performance: preemption is already disabled for similar scenarios like updating the FPSIMD registers in sigreturn. So, instead of rearranging things in ways that might swap existing subtle bugs for new ones, this patch just disables preemption around the FPSIMD state flushing so that races of this type can't occur here. This brings fpsimd_flush_thread() into line with other code paths. Cc: stable@vger.kernel.org Fixes: 674c242c9323 ("arm64: flush FP/SIMD state correctly after execve()") Reviewed-by: Ard Biesheuvel Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 06da8ea16bbe..c7b4995868e1 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -161,9 +161,11 @@ void fpsimd_flush_thread(void) { if (!system_supports_fpsimd()) return; + preempt_disable(); memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); fpsimd_flush_task_state(current); set_thread_flag(TIF_FOREIGN_FPSTATE); + preempt_enable(); } /* -- cgit From 289d07a2dc6c6b6f3e4b8a62669320d99dbe6c3d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 11 Jul 2017 15:19:22 +0100 Subject: arm64: mm: abort uaccess retries upon fatal signal When there's a fatal signal pending, arm64's do_page_fault() implementation returns 0. The intent is that we'll return to the faulting userspace instruction, delivering the signal on the way. However, if we take a fatal signal during fixing up a uaccess, this results in a return to the faulting kernel instruction, which will be instantly retried, resulting in the same fault being taken forever. As the task never reaches userspace, the signal is not delivered, and the task is left unkillable. While the task is stuck in this state, it can inhibit the forward progress of the system. To avoid this, we must ensure that when a fatal signal is pending, we apply any necessary fixup for a faulting kernel instruction. Thus we will return to an error path, and it is up to that code to make forward progress towards delivering the fatal signal. Cc: Catalin Marinas Cc: Laura Abbott Cc: stable@vger.kernel.org Reviewed-by: Steve Capper Tested-by: Steve Capper Reviewed-by: James Morse Tested-by: James Morse Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2509e4fe6992..1f22a41565a3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -435,8 +435,11 @@ retry: * the mmap_sem because it would already be released * in __lock_page_or_retry in mm/filemap.c. */ - if (fatal_signal_pending(current)) + if (fatal_signal_pending(current)) { + if (!user_mode(regs)) + goto no_context; return 0; + } /* * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of -- cgit From 4a23e56ad6549d0b8c0fac6d9eb752884379c391 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 18 Aug 2017 18:42:30 +0100 Subject: arm64: kaslr: ignore modulo offset when validating virtual displacement In the KASLR setup routine, we ensure that the early virtual mapping of the kernel image does not cover more than a single table entry at the level above the swapper block level, so that the assembler routines involved in setting up this mapping can remain simple. In this calculation we add the proposed KASLR offset to the values of the _text and _end markers, and reject it if they would end up falling in different swapper table sized windows. However, when taking the addresses of _text and _end, the modulo offset (the physical displacement modulo 2 MB) is already accounted for, and so adding it again results in incorrect results. So disregard the modulo offset from the calculation. Fixes: 08cdac619c81 ("arm64: relocatable: deal with physically misaligned ...") Reviewed-by: Catalin Marinas Tested-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 1 - arch/arm64/kernel/kaslr.c | 12 +++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 973df7de7bf8..adb0910b88f5 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -354,7 +354,6 @@ __primary_switched: tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? b.ne 0f mov x0, x21 // pass FDT address in x0 - mov x1, x23 // pass modulo offset in x1 bl kaslr_early_init // parse FDT for KASLR options cbz x0, 0f // KASLR disabled? just proceed orr x23, x23, x0 // record KASLR offset diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index a9710efb8c01..1d95c204186b 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -75,7 +75,7 @@ extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, * containing function pointers) to be reinitialized, and zero-initialized * .bss variables will be reset to 0. */ -u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset) +u64 __init kaslr_early_init(u64 dt_phys) { void *fdt; u64 seed, offset, mask, module_range; @@ -133,9 +133,15 @@ u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset) * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this * happens, increase the KASLR offset by the size of the kernel image * rounded up by SWAPPER_BLOCK_SIZE. + * + * NOTE: The references to _text and _end below will already take the + * modulo offset (the physical displacement modulo 2 MB) into + * account, given that the physical placement is controlled by + * the loader, and will not change as a result of the virtual + * mapping we choose. */ - if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) != - (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) { + if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != + (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) { u64 kimg_sz = _end - _text; offset = (offset + round_up(kimg_sz, SWAPPER_BLOCK_SIZE)) & mask; -- cgit From a067d94d37ed590fd17684d18c3edf52110d305a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 22 Aug 2017 15:39:00 +0100 Subject: arm64: kaslr: Adjust the offset to avoid Image across alignment boundary With 16KB pages and a kernel Image larger than 16MB, the current kaslr_early_init() logic for avoiding mappings across swapper table boundaries fails since increasing the offset by kimg_sz just moves the problem to the next boundary. This patch rounds the offset down to (1 << SWAPPER_TABLE_SHIFT) if the Image crosses a PMD_SIZE boundary. Fixes: afd0e5a87670 ("arm64: kaslr: Fix up the kernel image alignment") Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Will Deacon Cc: Neeraj Upadhyay Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/kaslr.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 1d95c204186b..47080c49cc7e 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -131,8 +131,7 @@ u64 __init kaslr_early_init(u64 dt_phys) /* * The kernel Image should not extend across a 1GB/32MB/512MB alignment * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this - * happens, increase the KASLR offset by the size of the kernel image - * rounded up by SWAPPER_BLOCK_SIZE. + * happens, round down the KASLR offset by (1 << SWAPPER_TABLE_SHIFT). * * NOTE: The references to _text and _end below will already take the * modulo offset (the physical displacement modulo 2 MB) into @@ -141,11 +140,8 @@ u64 __init kaslr_early_init(u64 dt_phys) * mapping we choose. */ if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != - (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) { - u64 kimg_sz = _end - _text; - offset = (offset + round_up(kimg_sz, SWAPPER_BLOCK_SIZE)) - & mask; - } + (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) + offset = round_down(offset, 1 << SWAPPER_TABLE_SHIFT); if (IS_ENABLED(CONFIG_KASAN)) /* -- cgit From 65159c051c45f269cf40a14f9404248f2d524920 Mon Sep 17 00:00:00 2001 From: Bharat Potnuri Date: Tue, 1 Aug 2017 10:58:35 +0530 Subject: RDMA/uverbs: Initialize cq_context appropriately Initializing cq_context with ev_queue in create_cq(), leads to NULL pointer dereference in ib_uverbs_comp_handler(), if application doesnot use completion channel. This patch fixes the cq_context initialization. Fixes: 1e7710f3f65 ("IB/core: Change completion channel to use the reworked") Cc: stable@vger.kernel.org # 4.12 Signed-off-by: Potnuri Bharat Teja Reviewed-by: Matan Barak Signed-off-by: Doug Ledford (cherry picked from commit 699a2d5b1b880b4e4e1c7d55fa25659322cf5b51) --- drivers/infiniband/core/uverbs_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index c551d2b275fd..55822ae71955 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1015,7 +1015,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = &ev_file->ev_queue; + cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; atomic_set(&cq->usecnt, 0); obj->uobject.object = cq; -- cgit From 9b40eebcd339c47921ff8b04c77af7c762b74216 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 22 Aug 2017 22:44:13 +0200 Subject: ACPICA: Fix acpi_evaluate_object_typed() Commit 2d2a954375a0 (ACPICA: Update two error messages to emit control method name) causes acpi_evaluate_object_typed() to fail if its pathname argument is NULL, but some callers of that function in the kernel, particularly acpi_nondev_subnode_data_ok(), pass NULL as pathname to it and expect it to work. For this reason, make acpi_evaluate_object_typed() check if its pathname argument is NULL and fall back to using the pathname of its handle argument if that is the case. Reported-by: Sakari Ailus Tested-by: Yang, Hyungwoo Fixes: 2d2a954375a0 (ACPICA: Update two error messages to emit control method name) Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/nsxfeval.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c index 538c61677c10..783f4c838aee 100644 --- a/drivers/acpi/acpica/nsxfeval.c +++ b/drivers/acpi/acpica/nsxfeval.c @@ -100,9 +100,13 @@ acpi_evaluate_object_typed(acpi_handle handle, free_buffer_on_error = TRUE; } - status = acpi_get_handle(handle, pathname, &target_handle); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); + if (pathname) { + status = acpi_get_handle(handle, pathname, &target_handle); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + } else { + target_handle = handle; } full_pathname = acpi_ns_get_external_pathname(target_handle); -- cgit From b5212f57da145e53df790a7e211d94daac768bf8 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 22 Aug 2017 23:39:58 +0300 Subject: ACPI: device property: Fix node lookup in acpi_graph_get_child_prop_value() acpi_graph_get_child_prop_value() is intended to find a child node with a certain property value pair. The check if (!fwnode_property_read_u32(fwnode, prop_name, &nr)) continue; is faulty: fwnode_property_read_u32() returns zero on success, not on failure, leading to comparing values only if the searched property was not found. Moreover, the check is made against the parent device node instead of the child one as it should be. Fixes: 79389a83bc38 (ACPI / property: Add support for remote endpoints) Reported-by: Hyungwoo Yang Signed-off-by: Sakari Ailus Cc: 4.12+ # 4.12+ [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 917c789f953d..476a52c60cf3 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1047,7 +1047,7 @@ static struct fwnode_handle *acpi_graph_get_child_prop_value( fwnode_for_each_child_node(fwnode, child) { u32 nr; - if (!fwnode_property_read_u32(fwnode, prop_name, &nr)) + if (fwnode_property_read_u32(child, prop_name, &nr)) continue; if (val == nr) -- cgit From b2a6d1b999a4c13e5997bb864694e77172d45250 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 28 Jul 2017 13:56:08 +0200 Subject: ANDROID: binder: fix proc->tsk check. Commit c4ea41ba195d ("binder: use group leader instead of open thread")' was incomplete and didn't update a check in binder_mmap(), causing all mmap() calls into the binder driver to fail. Signed-off-by: Martijn Coenen Tested-by: John Stultz Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index f7665c31feca..831cdd7d197d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3362,7 +3362,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) const char *failure_string; struct binder_buffer *buffer; - if (proc->tsk != current) + if (proc->tsk != current->group_leader) return -EINVAL; if ((vma->vm_end - vma->vm_start) > SZ_4M) -- cgit From ffeaf9aaf97b4bdaf114d6df52f800d71918768c Mon Sep 17 00:00:00 2001 From: fred gao Date: Wed, 16 Aug 2017 15:48:03 +0800 Subject: drm/i915/gvt: Fix the kernel null pointer error once error happens in shadow_indirect_ctx function, the variable wa_ctx->indirect_ctx.obj is not initialized but accessed, so the kernel null point panic occurs. Fixes: 894cf7d15634 ("drm/i915/gvt: i915_gem_object_create() returns an error pointer") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 713848c36349..e556a46cd4c2 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2714,7 +2714,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) unmap_src: i915_gem_object_unpin_map(obj); put_obj: - i915_gem_object_put(wa_ctx->indirect_ctx.obj); + i915_gem_object_put(obj); return ret; } -- cgit From bbba6f9d3da357bbabc6fda81e99ff5584500e76 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 23 Aug 2017 09:30:17 +0200 Subject: ALSA: hda - Add stereo mic quirk for Lenovo G50-70 (17aa:3978) Lenovo G50-70 (17aa:3978) with Conexant codec chip requires the similar workaround for the inverted stereo dmic like other Lenovo models. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1020657 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 8c1289963c80..a81aacf684b2 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -947,6 +947,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC), + SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo G50-70", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK_VENDOR(0x17aa, "Thinkpad", CXT_FIXUP_THINKPAD_ACPI), SND_PCI_QUIRK(0x1c06, 0x2011, "Lemote A1004", CXT_PINCFG_LEMOTE_A1004), -- cgit From dbeb0c8e84805b78f3979b53a4a12751403e4520 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 23 Aug 2017 16:46:15 +0200 Subject: ARM: at91: don't select CONFIG_ARM_CPU_SUSPEND for old platforms My previous patch fixed a link error for all at91 platforms when CONFIG_ARM_CPU_SUSPEND was not set, however this caused another problem on a configuration that enabled CONFIG_ARCH_AT91 but none of the individual SoCs, and that also enabled CPU_ARM720 as the only CPU: warning: (ARCH_AT91 && SOC_IMX23 && SOC_IMX28 && ARCH_PXA && MACH_MVEBU_V7 && SOC_IMX6 && ARCH_OMAP3 && ARCH_OMAP4 && SOC_OMAP5 && SOC_AM33XX && SOC_DRA7XX && ARCH_EXYNOS3 && ARCH_EXYNOS4 && EXYNOS5420_MCPM && EXYNOS_CPU_SUSPEND && ARCH_VEXPRESS_TC2_PM && ARM_BIG_LITTLE_CPUIDLE && ARM_HIGHBANK_CPUIDLE && QCOM_PM) selects ARM_CPU_SUSPEND which has unmet direct dependencies (ARCH_SUSPEND_POSSIBLE) arch/arm/kernel/sleep.o: In function `cpu_resume': (.text+0xf0): undefined reference to `cpu_arm720_suspend_size' arch/arm/kernel/suspend.o: In function `__cpu_suspend_save': suspend.c:(.text+0x134): undefined reference to `cpu_arm720_do_suspend' This improves the hack some more by only selecting ARM_CPU_SUSPEND for the part that requires it, and changing pm.c to drop the contents of unused init functions so we no longer refer to cpu_resume on at91 platforms that don't need it. Fixes: cc7a938f5f30 ("ARM: at91: select CONFIG_ARM_CPU_SUSPEND") Acked-by: Alexandre Belloni Signed-off-by: Arnd Bergmann --- arch/arm/mach-at91/Kconfig | 2 +- arch/arm/mach-at91/pm.c | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig index d735e5fc4772..195da38cb9a2 100644 --- a/arch/arm/mach-at91/Kconfig +++ b/arch/arm/mach-at91/Kconfig @@ -1,7 +1,7 @@ menuconfig ARCH_AT91 bool "Atmel SoCs" depends on ARCH_MULTI_V4T || ARCH_MULTI_V5 || ARCH_MULTI_V7 || ARM_SINGLE_ARMV7M - select ARM_CPU_SUSPEND if PM + select ARM_CPU_SUSPEND if PM && ARCH_MULTI_V7 select COMMON_CLK_AT91 select GPIOLIB select PINCTRL diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 667fddac3856..5036f996e694 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -608,6 +608,9 @@ static void __init at91_pm_init(void (*pm_idle)(void)) void __init at91rm9200_pm_init(void) { + if (!IS_ENABLED(CONFIG_SOC_AT91RM9200)) + return; + at91_dt_ramc(); /* @@ -620,18 +623,27 @@ void __init at91rm9200_pm_init(void) void __init at91sam9_pm_init(void) { + if (!IS_ENABLED(CONFIG_SOC_AT91SAM9)) + return; + at91_dt_ramc(); at91_pm_init(at91sam9_idle); } void __init sama5_pm_init(void) { + if (!IS_ENABLED(CONFIG_SOC_SAMA5)) + return; + at91_dt_ramc(); at91_pm_init(NULL); } void __init sama5d2_pm_init(void) { + if (!IS_ENABLED(CONFIG_SOC_SAMA5D2)) + return; + at91_pm_backup_init(); sama5_pm_init(); } -- cgit From 42bec214d8bd432be6d32a1acb0a9079ecd4d142 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Thu, 3 Aug 2017 13:09:03 +0530 Subject: cifs: Fix df output for users with quota limits The df for a SMB2 share triggers a GetInfo call for FS_FULL_SIZE_INFORMATION. The values returned are used to populate struct statfs. The problem is that none of the information returned by the call contains the total blocks available on the filesystem. Instead we use the blocks available to the user ie. quota limitation when filling out statfs.f_blocks. The information returned does contain Actual free units on the filesystem and is used to populate statfs.f_bfree. For users with quota enabled, it can lead to situations where the total free space reported is more than the total blocks on the system ending up with df reports like the following # df -h /mnt/a Filesystem Size Used Avail Use% Mounted on //192.168.22.10/a 2.5G -2.3G 2.5G - /mnt/a To fix this problem, we instead populate both statfs.f_bfree with the same value as statfs.f_bavail ie. CallerAvailableAllocationUnits. This is similar to what is done already in the code for cifs and df now reports the quota information for the user used to mount the share. # df --si /mnt/a Filesystem Size Used Avail Use% Mounted on //192.168.22.10/a 2.7G 101M 2.6G 4% /mnt/a Signed-off-by: Sachin Prabhu Signed-off-by: Pierguido Lambri Signed-off-by: Steve French Cc: --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5fb2fc2d0080..97edb4d376cd 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3219,8 +3219,8 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf, kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) * le32_to_cpu(pfs_inf->SectorsPerAllocationUnit); kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits); - kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits); - kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); + kst->f_bfree = kst->f_bavail = + le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); return; } -- cgit From d3edede29f74d335f81d95a4588f5f136a9f7dcf Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 23 Aug 2017 14:48:14 +1000 Subject: cifs: return ENAMETOOLONG for overlong names in cifs_open()/cifs_lookup() Add checking for the path component length and verify it is <= the maximum that the server advertizes via FileFsAttributeInformation. With this patch cifs.ko will now return ENAMETOOLONG instead of ENOENT when users to access an overlong path. To test this, try to cd into a (non-existing) directory on a CIFS share that has a too long name: cd /mnt/aaaaaaaaaaaaaaa... and it now should show a good error message from the shell: bash: cd: /mnt/aaaaaaaaaaaaaaaa...aaaaaa: File name too long rh bz 1153996 Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Cc: --- fs/cifs/dir.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 56366e984076..569d3fb736be 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -194,15 +194,20 @@ cifs_bp_rename_retry: } /* + * Don't allow path components longer than the server max. * Don't allow the separator character in a path component. * The VFS will not allow "/", but "\" is allowed by posix. */ static int -check_name(struct dentry *direntry) +check_name(struct dentry *direntry, struct cifs_tcon *tcon) { struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); int i; + if (unlikely(direntry->d_name.len > + tcon->fsAttrInfo.MaxPathNameComponentLength)) + return -ENAMETOOLONG; + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { for (i = 0; i < direntry->d_name.len; i++) { if (direntry->d_name.name[i] == '\\') { @@ -500,10 +505,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, return finish_no_open(file, res); } - rc = check_name(direntry); - if (rc) - return rc; - xid = get_xid(); cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", @@ -516,6 +517,11 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, } tcon = tlink_tcon(tlink); + + rc = check_name(direntry, tcon); + if (rc) + goto out_free_xid; + server = tcon->ses->server; if (server->ops->new_lease_key) @@ -776,7 +782,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } pTcon = tlink_tcon(tlink); - rc = check_name(direntry); + rc = check_name(direntry, pTcon); if (rc) goto lookup_out; -- cgit From 93a4c8355e0e448d83f31801b4c72f66e4360975 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 23 Aug 2017 18:24:50 +0200 Subject: ARM: dts: exynos: add needs-hpd for Odroid-XU3/4 CEC support was added for Exynos5 in 4.13, but for the Odroids we need to set 'needs-hpd' as well since CEC is disabled when there is no HDMI hotplug signal, just as for the exynos4 Odroid-U3. This is due to the level-shifter that is disabled when there is no HPD, thus blocking the CEC signal as well. Same close-but-no-cigar board design as the Odroid-U3. Tested with my Odroid XU4. Signed-off-by: Hans Verkuil Signed-off-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi b/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi index f92f95741207..a183b56283f8 100644 --- a/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi +++ b/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi @@ -266,6 +266,7 @@ &hdmicec { status = "okay"; + needs-hpd; }; &hsi2c_4 { -- cgit From ea0ea2bc6dd8923d86a0fa98743dbeed98645486 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 18 Aug 2017 16:08:13 -0700 Subject: blk-throttle: cap discard request size discard request usually is very big and easily use all bandwidth budget of a cgroup. discard request size doesn't really mean the size of data written, so it doesn't make sense to account it into bandwidth budget. Jens pointed out treating the size 0 doesn't make sense too, because discard request does have cost. But it's not easy to find the actual cost. This patch simply makes the size one sector. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-throttle.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a7285bf2831c..80f5481fe9f6 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -382,6 +382,14 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) } \ } while (0) +static inline unsigned int throtl_bio_data_size(struct bio *bio) +{ + /* assume it's one sector */ + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) + return 512; + return bio->bi_iter.bi_size; +} + static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) { INIT_LIST_HEAD(&qn->node); @@ -934,6 +942,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, bool rw = bio_data_dir(bio); u64 bytes_allowed, extra_bytes, tmp; unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; + unsigned int bio_size = throtl_bio_data_size(bio); jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; @@ -947,14 +956,14 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, do_div(tmp, HZ); bytes_allowed = tmp; - if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) { + if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) { if (wait) *wait = 0; return true; } /* Calc approx time to dispatch */ - extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed; + extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed; jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw)); if (!jiffy_wait) @@ -1034,11 +1043,12 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) { bool rw = bio_data_dir(bio); + unsigned int bio_size = throtl_bio_data_size(bio); /* Charge the bio to the group */ - tg->bytes_disp[rw] += bio->bi_iter.bi_size; + tg->bytes_disp[rw] += bio_size; tg->io_disp[rw]++; - tg->last_bytes_disp[rw] += bio->bi_iter.bi_size; + tg->last_bytes_disp[rw] += bio_size; tg->last_io_disp[rw]++; /* -- cgit From 1e6ec9ea89d30739b9447c1860fcb07fc29f3aef Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 23 Aug 2017 14:54:59 -0700 Subject: Revert "loop: support 4k physical blocksize" There's some stuff still up in the air, let's not get stuck with a subpar ABI. I'll follow up with something better for 4.14. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- drivers/block/loop.c | 42 ++++++------------------------------------ drivers/block/loop.h | 1 - include/uapi/linux/loop.h | 3 --- 3 files changed, 6 insertions(+), 40 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ef8334949b42..f321b96405f5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -221,8 +221,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) } static int -figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit, - loff_t logical_blocksize) +figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); sector_t x = (sector_t)size; @@ -234,12 +233,6 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit, lo->lo_offset = offset; if (lo->lo_sizelimit != sizelimit) lo->lo_sizelimit = sizelimit; - if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) { - lo->lo_logical_blocksize = logical_blocksize; - blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize); - blk_queue_logical_block_size(lo->lo_queue, - lo->lo_logical_blocksize); - } set_capacity(lo->lo_disk, x); bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); /* let user-space know about the new size */ @@ -820,7 +813,6 @@ static void loop_config_discard(struct loop_device *lo) struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; struct request_queue *q = lo->lo_queue; - int lo_bits = 9; /* * We use punch hole to reclaim the free space used by the @@ -840,11 +832,9 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_granularity = inode->i_sb->s_blocksize; q->limits.discard_alignment = 0; - if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) - lo_bits = blksize_bits(lo->lo_logical_blocksize); - blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits); - blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits); + blk_queue_max_discard_sectors(q, UINT_MAX >> 9); + blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); } @@ -938,7 +928,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->use_dio = false; lo->lo_blocksize = lo_blocksize; - lo->lo_logical_blocksize = 512; lo->lo_device = bdev; lo->lo_flags = lo_flags; lo->lo_backing_file = file; @@ -1104,7 +1093,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) int err; struct loop_func_table *xfer; kuid_t uid = current_uid(); - int lo_flags = lo->lo_flags; if (lo->lo_encrypt_key_size && !uid_eq(lo->lo_key_owner, uid) && @@ -1137,26 +1125,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (err) goto exit; - if (info->lo_flags & LO_FLAGS_BLOCKSIZE) { - if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE)) - lo->lo_logical_blocksize = 512; - lo->lo_flags |= LO_FLAGS_BLOCKSIZE; - if (LO_INFO_BLOCKSIZE(info) != 512 && - LO_INFO_BLOCKSIZE(info) != 1024 && - LO_INFO_BLOCKSIZE(info) != 2048 && - LO_INFO_BLOCKSIZE(info) != 4096) - return -EINVAL; - if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize) - return -EINVAL; - } - if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit || - lo->lo_flags != lo_flags || - ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) && - lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) { - if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit, - LO_INFO_BLOCKSIZE(info))) { + lo->lo_sizelimit != info->lo_sizelimit) { + if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto exit; } @@ -1348,8 +1319,7 @@ static int loop_set_capacity(struct loop_device *lo) if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; - return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit, - lo->lo_logical_blocksize); + return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); } static int loop_set_dio(struct loop_device *lo, unsigned long arg) diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 2c096b9a17b8..fecd3f97ef8c 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -49,7 +49,6 @@ struct loop_device { struct file * lo_backing_file; struct block_device *lo_device; unsigned lo_blocksize; - unsigned lo_logical_blocksize; void *key_data; gfp_t old_gfp_mask; diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index a3960f98679c..c8125ec1f4f2 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -22,7 +22,6 @@ enum { LO_FLAGS_AUTOCLEAR = 4, LO_FLAGS_PARTSCAN = 8, LO_FLAGS_DIRECT_IO = 16, - LO_FLAGS_BLOCKSIZE = 32, }; #include /* for __kernel_old_dev_t */ @@ -60,8 +59,6 @@ struct loop_info64 { __u64 lo_init[2]; }; -#define LO_INFO_BLOCKSIZE(l) (l)->lo_init[0] - /* * Loop filter types */ -- cgit From 143c97cc652949893c8056c679012f0aeccb80e5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 23 Aug 2017 18:16:11 -0700 Subject: Revert "pty: fix the cached path of the pty slave file descriptor in the master" This reverts commit c8c03f1858331e85d397bacccd34ef409aae993c. It turns out that while fixing the ptmx file descriptor to have the correct 'struct path' to the associated slave pty is a really good thing, it breaks some user space tools for a very annoying reason. The problem is that /dev/ptmx and its associated slave pty (/dev/pts/X) are on different mounts. That was what caused us to have the wrong path in the first place (we would mix up the vfsmount of the 'ptmx' node, with the dentry of the pty slave node), but it also means that now while we use the right vfsmount, having the pty master open also keeps the pts mount busy. And it turn sout that that makes 'pbuilder' very unhappy, as noted by Stefan Lippers-Hollmann: "This patch introduces a regression for me when using pbuilder 0.228.7[2] (a helper to build Debian packages in a chroot and to create and update its chroots) when trying to umount /dev/ptmx (inside the chroot) on Debian/ unstable (full log and pbuilder configuration file[3] attached). [...] Setting up build-essential (12.3) ... Processing triggers for libc-bin (2.24-15) ... I: unmounting dev/ptmx filesystem W: Could not unmount dev/ptmx: umount: /var/cache/pbuilder/build/1340/dev/ptmx: target is busy (In some cases useful info about processes that use the device is found by lsof(8) or fuser(1).)" apparently pbuilder tries to unmount the /dev/pts filesystem while still holding at least one master node open, which is arguably not very nice, but we don't break user space even when fixing other bugs. So this commit has to be reverted. I'll try to figure out a way to avoid caching the path to the slave pty in the master pty. The only thing that actually wants that slave pty path is the "TIOCGPTPEER" ioctl, and I think we could just recreate the path at that time. Reported-by: Stefan Lippers-Hollmann Cc: Eric W Biederman Cc: Christian Brauner Cc: Al Viro Signed-off-by: Linus Torvalds --- drivers/tty/pty.c | 7 ++----- fs/devpts/inode.c | 4 +--- include/linux/devpts_fs.h | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 1fc80ea87c13..284749fb0f6b 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -793,7 +793,6 @@ static int ptmx_open(struct inode *inode, struct file *filp) struct tty_struct *tty; struct path *pts_path; struct dentry *dentry; - struct vfsmount *mnt; int retval; int index; @@ -806,7 +805,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) if (retval) return retval; - fsi = devpts_acquire(filp, &mnt); + fsi = devpts_acquire(filp); if (IS_ERR(fsi)) { retval = PTR_ERR(fsi); goto out_free_file; @@ -850,7 +849,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) pts_path = kmalloc(sizeof(struct path), GFP_KERNEL); if (!pts_path) goto err_release; - pts_path->mnt = mnt; + pts_path->mnt = filp->f_path.mnt; pts_path->dentry = dentry; path_get(pts_path); tty->link->driver_data = pts_path; @@ -867,7 +866,6 @@ err_path_put: path_put(pts_path); kfree(pts_path); err_release: - mntput(mnt); tty_unlock(tty); // This will also put-ref the fsi tty_release(inode, filp); @@ -876,7 +874,6 @@ out: devpts_kill_index(fsi, index); out_put_fsi: devpts_release(fsi); - mntput(mnt); out_free_file: tty_free_file(filp); return retval; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 44dfbca9306f..108df2e3602c 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -133,7 +133,7 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) return sb->s_fs_info; } -struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt) +struct pts_fs_info *devpts_acquire(struct file *filp) { struct pts_fs_info *result; struct path path; @@ -142,7 +142,6 @@ struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt) path = filp->f_path; path_get(&path); - *ptsmnt = NULL; /* Has the devpts filesystem already been found? */ sb = path.mnt->mnt_sb; @@ -166,7 +165,6 @@ struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt) * pty code needs to hold extra references in case of last /dev/tty close */ atomic_inc(&sb->s_active); - *ptsmnt = mntget(path.mnt); result = DEVPTS_SB(sb); out: diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 7883e901f65c..277ab9af9ac2 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,7 +19,7 @@ struct pts_fs_info; -struct pts_fs_info *devpts_acquire(struct file *, struct vfsmount **ptsmnt); +struct pts_fs_info *devpts_acquire(struct file *); void devpts_release(struct pts_fs_info *); int devpts_new_index(struct pts_fs_info *); -- cgit From 2fe59f507a65dbd734b990a11ebc7488f6f87a24 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Aug 2017 18:43:48 +1000 Subject: timers: Fix excessive granularity of new timers after a nohz idle When a timer base is idle, it is forwarded when a new timer is added to ensure that granularity does not become excessive. When not idle, the timer tick is expected to increment the base. However there are several problems: - If an existing timer is modified, the base is forwarded only after the index is calculated. - The base is not forwarded by add_timer_on. - There is a window after a timer is restarted from a nohz idle, after it is marked not-idle and before the timer tick on this CPU, where a timer may be added but the ancient base does not get forwarded. These result in excessive granularity (a 1 jiffy timeout can blow out to 100s of jiffies), which cause the rcu lockup detector to trigger, among other things. Fix this by keeping track of whether the timer base has been idle since it was last run or forwarded, and if so then forward it before adding a new timer. There is still a case where mod_timer optimises the case of a pending timer mod with the same expiry time, where the timer can see excessive granularity relative to the new, shorter interval. A comment is added, but it's not changed because it is an important fastpath for networking. This has been tested and found to fix the RCU softlockup messages. Testing was also done with tracing to measure requested versus achieved wakeup latencies for all non-deferrable timers in an idle system (with no lockup watchdogs running). Wakeup latency relative to absolute latency is calculated (note this suffers from round-up skew at low absolute times) and analysed: max avg std upstream 506.0 1.20 4.68 patched 2.0 1.08 0.15 The bug was noticed due to the lockup detector Kconfig changes dropping it out of people's .configs and resulting in larger base clk skew When the lockup detectors are enabled, no CPU can go idle for longer than 4 seconds, which limits the granularity errors. Sub-optimal timer behaviour is observable on a smaller scale in that case: max avg std upstream 9.0 1.05 0.19 patched 2.0 1.04 0.11 Fixes: Fixes: a683f390b93f ("timers: Forward the wheel clock whenever possible") Signed-off-by: Nicholas Piggin Signed-off-by: Thomas Gleixner Tested-by: Jonathan Cameron Tested-by: David Miller Cc: dzickus@redhat.com Cc: sfr@canb.auug.org.au Cc: mpe@ellerman.id.au Cc: Stephen Boyd Cc: linuxarm@huawei.com Cc: abdhalee@linux.vnet.ibm.com Cc: John Stultz Cc: akpm@linux-foundation.org Cc: paulmck@linux.vnet.ibm.com Cc: torvalds@linux-foundation.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170822084348.21436-1-npiggin@gmail.com --- kernel/time/timer.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 8f5d1bf18854..f2674a056c26 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -203,6 +203,7 @@ struct timer_base { bool migration_enabled; bool nohz_active; bool is_idle; + bool must_forward_clk; DECLARE_BITMAP(pending_map, WHEEL_SIZE); struct hlist_head vectors[WHEEL_SIZE]; } ____cacheline_aligned; @@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags) static inline void forward_timer_base(struct timer_base *base) { - unsigned long jnow = READ_ONCE(jiffies); + unsigned long jnow; /* - * We only forward the base when it's idle and we have a delta between - * base clock and jiffies. + * We only forward the base when we are idle or have just come out of + * idle (must_forward_clk logic), and have a delta between base clock + * and jiffies. In the common case, run_timers will take care of it. */ - if (!base->is_idle || (long) (jnow - base->clk) < 2) + if (likely(!base->must_forward_clk)) + return; + + jnow = READ_ONCE(jiffies); + base->must_forward_clk = base->is_idle; + if ((long)(jnow - base->clk) < 2) return; /* @@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * same array bucket then just return: */ if (timer_pending(timer)) { + /* + * The downside of this optimization is that it can result in + * larger granularity than you would get from adding a new + * timer with this expiry. + */ if (timer->expires == expires) return 1; @@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * dequeue/enqueue dance. */ base = lock_timer_base(timer, &flags); + forward_timer_base(base); clk = base->clk; idx = calc_wheel_index(expires, clk); @@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } } else { base = lock_timer_base(timer, &flags); + forward_timer_base(base); } ret = detach_if_pending(timer, base, false); @@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) raw_spin_lock(&base->lock); WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | base->cpu); + forward_timer_base(base); } } - /* Try to forward a stale timer base clock */ - forward_timer_base(base); - timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance @@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu) WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | cpu); } + forward_timer_base(base); debug_activate(timer, timer->expires); internal_add_timer(base, timer); @@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if (!is_max_delta) expires = basem + (u64)(nextevt - basej) * TICK_NSEC; /* - * If we expect to sleep more than a tick, mark the base idle: + * If we expect to sleep more than a tick, mark the base idle. + * Also the tick is stopped so any added timer must forward + * the base clk itself to keep granularity small. This idle + * logic is only maintained for the BASE_STD base, deferrable + * timers may still see large granularity skew (by design). */ - if ((expires - basem) > TICK_NSEC) + if ((expires - basem) > TICK_NSEC) { + base->must_forward_clk = true; base->is_idle = true; + } } raw_spin_unlock(&base->lock); @@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + /* + * must_forward_clk must be cleared before running timers so that any + * timer functions that call mod_timer will not try to forward the + * base. idle trcking / clock forwarding logic is only used with + * BASE_STD timers. + * + * The deferrable base does not do idle tracking at all, so we do + * not forward it. This can result in very large variations in + * granularity for deferrable timers, but they can be deferred for + * long periods due to idle. + */ + base->must_forward_clk = false; + __run_timers(base); if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); -- cgit From bd0fdb191c8523a9126bb14ac1b22cb47698ebf5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 13 Mar 2017 03:03:49 +1000 Subject: KVM: PPC: Book3S HV: Use msgsync with hypervisor doorbells on POWER9 When msgsnd is used for IPIs to other cores, msgsync must be executed by the target to order stores performed on the source before its msgsnd (provided the source executes the appropriate sync). Fixes: 1704a81ccebc ("KVM: PPC: Book3S HV: Use msgsnd for IPIs to other cores on POWER9") Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c52184a8efdf..9c9c983b864f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1291,6 +1291,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Hypervisor doorbell - exit only if host IPI flag set */ cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL bne 3f +BEGIN_FTR_SECTION + PPC_MSGSYNC +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 beq 4f -- cgit From 2c4fb78f78b6e420604ee1b05bdfb5c1d637869f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 18 Aug 2017 12:10:52 +1000 Subject: KVM: PPC: Book3S HV: Workaround POWER9 DD1.0 bug causing IPB bit loss This adds a workaround for a bug in POWER9 DD1 chips where changing the CPPR (Current Processor Priority Register) can cause bits in the IPB (Interrupt Pending Buffer) to get lost. Thankfully it only happens when manually manipulating CPPR which is quite rare. When it does happen it can cause interrupts to be delayed or lost. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive_template.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 4636ca6e7d38..150be86b1018 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -16,7 +16,16 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) u8 cppr; u16 ack; - /* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */ + /* + * DD1 bug workaround: If PIPR is less favored than CPPR + * ignore the interrupt or we might incorrectly lose an IPB + * bit. + */ + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { + u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR); + if (pipr >= xc->hw_cppr) + return; + } /* Perform the acknowledge OS to register cycle. */ ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG)); -- cgit From bb9b52bd51dcb17b965a30167d0812902c1b9927 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 18 Aug 2017 12:10:58 +1000 Subject: KVM: PPC: Book3S HV: Add missing barriers to XIVE code and document them This adds missing memory barriers to order updates/tests of the virtual CPPR and MFRR, thus fixing a lost IPI problem. While at it also document all barriers in this file. This fixes a bug causing guest IPIs to occasionally get lost. The symptom then is hangs or stalls in the guest. Signed-off-by: Benjamin Herrenschmidt Tested-by: Guilherme G. Piccoli Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive_template.c | 57 +++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 150be86b1018..d1ed2c41b5d2 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -16,6 +16,12 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) u8 cppr; u16 ack; + /* + * Ensure any previous store to CPPR is ordered vs. + * the subsequent loads from PIPR or ACK. + */ + eieio(); + /* * DD1 bug workaround: If PIPR is less favored than CPPR * ignore the interrupt or we might incorrectly lose an IPB @@ -244,6 +250,11 @@ skip_ipi: /* * If we found an interrupt, adjust what the guest CPPR should * be as if we had just fetched that interrupt from HW. + * + * Note: This can only make xc->cppr smaller as the previous + * loop will only exit with hirq != 0 if prio is lower than + * the current xc->cppr. Thus we don't need to re-check xc->mfrr + * for pending IPIs. */ if (hirq) xc->cppr = prio; @@ -389,6 +400,12 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) old_cppr = xc->cppr; xc->cppr = cppr; + /* + * Order the above update of xc->cppr with the subsequent + * read of xc->mfrr inside push_pending_to_hw() + */ + smp_mb(); + /* * We are masking less, we need to look for pending things * to deliver and set VP pending bits accordingly to trigger @@ -429,21 +446,37 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr) * used to signal MFRR changes is EOId when fetched from * the queue. */ - if (irq == XICS_IPI || irq == 0) + if (irq == XICS_IPI || irq == 0) { + /* + * This barrier orders the setting of xc->cppr vs. + * subsquent test of xc->mfrr done inside + * scan_interrupts and push_pending_to_hw + */ + smp_mb(); goto bail; + } /* Find interrupt source */ sb = kvmppc_xive_find_source(xive, irq, &src); if (!sb) { pr_devel(" source not found !\n"); rc = H_PARAMETER; + /* Same as above */ + smp_mb(); goto bail; } state = &sb->irq_state[src]; kvmppc_xive_select_irq(state, &hw_num, &xd); state->in_eoi = true; - mb(); + + /* + * This barrier orders both setting of in_eoi above vs, + * subsequent test of guest_priority, and the setting + * of xc->cppr vs. subsquent test of xc->mfrr done inside + * scan_interrupts and push_pending_to_hw + */ + smp_mb(); again: if (state->guest_priority == MASKED) { @@ -470,6 +503,14 @@ again: } + /* + * This barrier orders the above guest_priority check + * and spin_lock/unlock with clearing in_eoi below. + * + * It also has to be a full mb() as it must ensure + * the MMIOs done in source_eoi() are completed before + * state->in_eoi is visible. + */ mb(); state->in_eoi = false; bail: @@ -504,6 +545,18 @@ X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server, /* Locklessly write over MFRR */ xc->mfrr = mfrr; + /* + * The load of xc->cppr below and the subsequent MMIO store + * to the IPI must happen after the above mfrr update is + * globally visible so that: + * + * - Synchronize with another CPU doing an H_EOI or a H_CPPR + * updating xc->cppr then reading xc->mfrr. + * + * - The target of the IPI sees the xc->mfrr update + */ + mb(); + /* Shoot the IPI if most favored than target cppr */ if (mfrr < xc->cppr) __x_writeq(0, __x_trig_page(&xc->vp_ipi_data)); -- cgit From a8f0f9e49956a74718874b800251455680085600 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 17 Aug 2017 16:37:25 -0400 Subject: ftrace: Check for null ret_stack on profile function graph entry function There's a small race when function graph shutsdown and the calling of the registered function graph entry callback. The callback must not reference the task's ret_stack without first checking that it is not NULL. Note, when a ret_stack is allocated for a task, it stays allocated until the task exits. The problem here, is that function_graph is shutdown, and a new task was created, which doesn't have its ret_stack allocated. But since some of the functions are still being traced, the callbacks can still be called. The normal function_graph code handles this, but starting with commit 8861dd303c ("ftrace: Access ret_stack->subtime only in the function profiler") the profiler code references the ret_stack on function entry, but doesn't check if it is NULL first. Link: https://bugzilla.kernel.org/show_bug.cgi?id=196611 Cc: stable@vger.kernel.org Fixes: 8861dd303c ("ftrace: Access ret_stack->subtime only in the function profiler") Reported-by: lilydjwg@gmail.com Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 02004ae91860..96cea88fa00f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -889,6 +889,10 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) function_profile_call(trace->func, 0, NULL, NULL); + /* If function graph is shutting down, ret_stack can be NULL */ + if (!current->ret_stack) + return 0; + if (index >= 0 && index < FTRACE_RETFUNC_DEPTH) current->ret_stack[index].subtime = 0; -- cgit From 475bb3c69ab05df2a6ecef6acc2393703d134180 Mon Sep 17 00:00:00 2001 From: Chunyu Hu Date: Mon, 14 Aug 2017 18:18:17 +0800 Subject: tracing: Fix kmemleak in tracing_map_array_free() kmemleak reported the below leak when I was doing clear of the hist trigger. With this patch, the kmeamleak is gone. unreferenced object 0xffff94322b63d760 (size 32): comm "bash", pid 1522, jiffies 4403687962 (age 2442.311s) hex dump (first 32 bytes): 00 01 00 00 04 00 00 00 08 00 00 00 ff 00 00 00 ................ 10 00 00 00 00 00 00 00 80 a8 7a f2 31 94 ff ff ..........z.1... backtrace: [] kmemleak_alloc+0x4a/0xa0 [] kmem_cache_alloc_trace+0xca/0x1d0 [] tracing_map_array_alloc+0x26/0x140 [] kretprobe_trampoline+0x0/0x50 [] create_hist_data+0x535/0x750 [] event_hist_trigger_func+0x1f7/0x420 [] event_trigger_write+0xfd/0x1a0 [] __vfs_write+0x37/0x170 [] vfs_write+0xb2/0x1b0 [] SyS_write+0x55/0xc0 [] do_syscall_64+0x67/0x150 [] return_from_SYSCALL_64+0x0/0x6a [] 0xffffffffffffffff unreferenced object 0xffff9431f27aa880 (size 128): comm "bash", pid 1522, jiffies 4403687962 (age 2442.311s) hex dump (first 32 bytes): 00 00 8c 2a 32 94 ff ff 00 f0 8b 2a 32 94 ff ff ...*2......*2... 00 e0 8b 2a 32 94 ff ff 00 d0 8b 2a 32 94 ff ff ...*2......*2... backtrace: [] kmemleak_alloc+0x4a/0xa0 [] __kmalloc+0xe8/0x220 [] tracing_map_array_alloc+0xb1/0x140 [] kretprobe_trampoline+0x0/0x50 [] create_hist_data+0x535/0x750 [] event_hist_trigger_func+0x1f7/0x420 [] event_trigger_write+0xfd/0x1a0 [] __vfs_write+0x37/0x170 [] vfs_write+0xb2/0x1b0 [] SyS_write+0x55/0xc0 [] do_syscall_64+0x67/0x150 [] return_from_SYSCALL_64+0x0/0x6a [] 0xffffffffffffffff Link: http://lkml.kernel.org/r/1502705898-27571-1-git-send-email-chuhu@redhat.com Cc: stable@vger.kernel.org Fixes: 08d43a5fa063 ("tracing: Add lock-free tracing_map") Signed-off-by: Chunyu Hu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/tracing_map.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 0a689bbb78ef..305039b122fa 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -221,16 +221,19 @@ void tracing_map_array_free(struct tracing_map_array *a) if (!a) return; - if (!a->pages) { - kfree(a); - return; - } + if (!a->pages) + goto free; for (i = 0; i < a->n_pages; i++) { if (!a->pages[i]) break; free_page((unsigned long)a->pages[i]); } + + kfree(a->pages); + + free: + kfree(a); } struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, -- cgit From 8b0db1a5bdfcee0dbfa89607672598ae203c9045 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 23 Aug 2017 12:46:27 -0400 Subject: tracing: Fix freeing of filter in create_filter() when set_str is false Performing the following task with kmemleak enabled: # cd /sys/kernel/tracing/events/irq/irq_handler_entry/ # echo 'enable_event:kmem:kmalloc:3 if irq >' > trigger # echo 'enable_event:kmem:kmalloc:3 if irq > 31' > trigger # echo scan > /sys/kernel/debug/kmemleak # cat /sys/kernel/debug/kmemleak unreferenced object 0xffff8800b9290308 (size 32): comm "bash", pid 1114, jiffies 4294848451 (age 141.139s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4a/0xa0 [] kmem_cache_alloc_trace+0x158/0x290 [] create_filter_start.constprop.28+0x99/0x940 [] create_filter+0xa9/0x160 [] create_event_filter+0xc/0x10 [] set_trigger_filter+0xe5/0x210 [] event_enable_trigger_func+0x324/0x490 [] event_trigger_write+0x1a2/0x260 [] __vfs_write+0xd7/0x380 [] vfs_write+0x101/0x260 [] SyS_write+0xab/0x130 [] entry_SYSCALL_64_fastpath+0x1f/0xbe [] 0xffffffffffffffff The function create_filter() is passed a 'filterp' pointer that gets allocated, and if "set_str" is true, it is up to the caller to free it, even on error. The problem is that the pointer is not freed by create_filter() when set_str is false. This is a bug, and it is not up to the caller to free the filter on error if it doesn't care about the string. Link: http://lkml.kernel.org/r/1502705898-27571-2-git-send-email-chuhu@redhat.com Cc: stable@vger.kernel.org Fixes: 38b78eb85 ("tracing: Factorize filter creation") Reported-by: Chunyu Hu Tested-by: Chunyu Hu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_filter.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 59a411ff60c7..181e139a8057 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1959,6 +1959,10 @@ static int create_filter(struct trace_event_call *call, if (err && set_str) append_filter_err(ps, filter); } + if (err && !set_str) { + free_event_filter(filter); + filter = NULL; + } create_filter_finish(ps); *filterp = filter; -- cgit From 50b4d485528d1dbe0bd249f2073140e3444f4a7b Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Thu, 24 Aug 2017 01:57:56 +0200 Subject: bsg-lib: fix kernel panic resulting from missing allocation of reply-buffer Since we split the scsi_request out of struct request bsg fails to provide a reply-buffer for the drivers. This was done via the pointer for sense-data, that is not preallocated anymore. Failing to allocate/assign it results in illegal dereferences because LLDs use this pointer unquestioned. An example panic on s390x, using the zFCP driver, looks like this (I had debugging on, otherwise NULL-pointer dereferences wouldn't even panic on s390x): Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 6b6b6b6b6b6b6000 TEID: 6b6b6b6b6b6b6403 Fault in home space mode while using kernel ASCE. AS:0000000001590007 R3:0000000000000024 Oops: 0038 ilc:2 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.12.0-bsg-regression+ #3 Hardware name: IBM 2964 N96 702 (z/VM 6.4.0) task: 0000000065cb0100 task.stack: 0000000065cb4000 Krnl PSW : 0704e00180000000 000003ff801e4156 (zfcp_fc_ct_els_job_handler+0x16/0x58 [zfcp]) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 0000000000000001 000000005fa9d0d0 000000005fa9d078 0000000000e16866 000003ff00000290 6b6b6b6b6b6b6b6b 0000000059f78f00 000000000000000f 00000000593a0958 00000000593a0958 0000000060d88800 000000005ddd4c38 0000000058b50100 07000000659cba08 000003ff801e8556 00000000659cb9a8 Krnl Code: 000003ff801e4146: e31020500004 lg %r1,80(%r2) 000003ff801e414c: 58402040 l %r4,64(%r2) #000003ff801e4150: e35020200004 lg %r5,32(%r2) >000003ff801e4156: 50405004 st %r4,4(%r5) 000003ff801e415a: e54c50080000 mvhi 8(%r5),0 000003ff801e4160: e33010280012 lt %r3,40(%r1) 000003ff801e4166: a718fffb lhi %r1,-5 000003ff801e416a: 1803 lr %r0,%r3 Call Trace: ([<000003ff801e8556>] zfcp_fsf_req_complete+0x726/0x768 [zfcp]) [<000003ff801ea82a>] zfcp_fsf_reqid_check+0x102/0x180 [zfcp] [<000003ff801eb980>] zfcp_qdio_int_resp+0x230/0x278 [zfcp] [<00000000009b91b6>] qdio_kick_handler+0x2ae/0x2c8 [<00000000009b9e3e>] __tiqdio_inbound_processing+0x406/0xc10 [<00000000001684c2>] tasklet_action+0x15a/0x1d8 [<0000000000bd28ec>] __do_softirq+0x3ec/0x848 [<00000000001675a4>] irq_exit+0x74/0xf8 [<000000000010dd6a>] do_IRQ+0xba/0xf0 [<0000000000bd19e8>] io_int_handler+0x104/0x2d4 [<00000000001033b6>] enabled_wait+0xb6/0x188 ([<000000000010339e>] enabled_wait+0x9e/0x188) [<000000000010396a>] arch_cpu_idle+0x32/0x50 [<0000000000bd0112>] default_idle_call+0x52/0x68 [<00000000001cd0fa>] do_idle+0x102/0x188 [<00000000001cd41e>] cpu_startup_entry+0x3e/0x48 [<0000000000118c64>] smp_start_secondary+0x11c/0x130 [<0000000000bd2016>] restart_int_handler+0x62/0x78 [<0000000000000000>] (null) INFO: lockdep is turned off. Last Breaking-Event-Address: [<000003ff801e41d6>] zfcp_fc_ct_job_handler+0x3e/0x48 [zfcp] Kernel panic - not syncing: Fatal exception in interrupt This patch moves bsg-lib to allocate and setup struct bsg_job ahead of time, including the allocation of a buffer for the reply-data. This means, struct bsg_job is not allocated separately anymore, but as part of struct request allocation - similar to struct scsi_cmd. Reflect this in the function names that used to handle creation/destruction of struct bsg_job. Reported-by: Steffen Maier Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Benjamin Block Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request") Cc: #4.11+ Signed-off-by: Jens Axboe --- block/bsg-lib.c | 74 +++++++++++++++++++++++++++++-------------------- include/linux/blkdev.h | 1 - include/linux/bsg-lib.h | 2 ++ 3 files changed, 46 insertions(+), 31 deletions(-) diff --git a/block/bsg-lib.c b/block/bsg-lib.c index c4513b23f57a..dd56d7460cb9 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -29,26 +29,25 @@ #include /** - * bsg_destroy_job - routine to teardown/delete a bsg job + * bsg_teardown_job - routine to teardown a bsg job * @job: bsg_job that is to be torn down */ -static void bsg_destroy_job(struct kref *kref) +static void bsg_teardown_job(struct kref *kref) { struct bsg_job *job = container_of(kref, struct bsg_job, kref); struct request *rq = job->req; - blk_end_request_all(rq, BLK_STS_OK); - put_device(job->dev); /* release reference for the request */ kfree(job->request_payload.sg_list); kfree(job->reply_payload.sg_list); - kfree(job); + + blk_end_request_all(rq, BLK_STS_OK); } void bsg_job_put(struct bsg_job *job) { - kref_put(&job->kref, bsg_destroy_job); + kref_put(&job->kref, bsg_teardown_job); } EXPORT_SYMBOL_GPL(bsg_job_put); @@ -100,7 +99,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done); */ static void bsg_softirq_done(struct request *rq) { - struct bsg_job *job = rq->special; + struct bsg_job *job = blk_mq_rq_to_pdu(rq); bsg_job_put(job); } @@ -122,33 +121,20 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) } /** - * bsg_create_job - create the bsg_job structure for the bsg request + * bsg_prepare_job - create the bsg_job structure for the bsg request * @dev: device that is being sent the bsg request * @req: BSG request that needs a job structure */ -static int bsg_create_job(struct device *dev, struct request *req) +static int bsg_prepare_job(struct device *dev, struct request *req) { struct request *rsp = req->next_rq; - struct request_queue *q = req->q; struct scsi_request *rq = scsi_req(req); - struct bsg_job *job; + struct bsg_job *job = blk_mq_rq_to_pdu(req); int ret; - BUG_ON(req->special); - - job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL); - if (!job) - return -ENOMEM; - - req->special = job; - job->req = req; - if (q->bsg_job_size) - job->dd_data = (void *)&job[1]; job->request = rq->cmd; job->request_len = rq->cmd_len; - job->reply = rq->sense; - job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer - * allocated */ + if (req->bio) { ret = bsg_map_buffer(&job->request_payload, req); if (ret) @@ -187,7 +173,6 @@ static void bsg_request_fn(struct request_queue *q) { struct device *dev = q->queuedata; struct request *req; - struct bsg_job *job; int ret; if (!get_device(dev)) @@ -199,7 +184,7 @@ static void bsg_request_fn(struct request_queue *q) break; spin_unlock_irq(q->queue_lock); - ret = bsg_create_job(dev, req); + ret = bsg_prepare_job(dev, req); if (ret) { scsi_req(req)->result = ret; blk_end_request_all(req, BLK_STS_OK); @@ -207,8 +192,7 @@ static void bsg_request_fn(struct request_queue *q) continue; } - job = req->special; - ret = q->bsg_job_fn(job); + ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req)); spin_lock_irq(q->queue_lock); if (ret) break; @@ -219,6 +203,35 @@ static void bsg_request_fn(struct request_queue *q) spin_lock_irq(q->queue_lock); } +static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + + memset(job, 0, sizeof(*job)); + + scsi_req_init(sreq); + sreq->sense_len = SCSI_SENSE_BUFFERSIZE; + sreq->sense = kzalloc(sreq->sense_len, gfp); + if (!sreq->sense) + return -ENOMEM; + + job->req = req; + job->reply = sreq->sense; + job->reply_len = sreq->sense_len; + job->dd_data = job + 1; + + return 0; +} + +static void bsg_exit_rq(struct request_queue *q, struct request *req) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + + kfree(sreq->sense); +} + /** * bsg_setup_queue - Create and add the bsg hooks so we can receive requests * @dev: device to attach bsg device to @@ -235,7 +248,9 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, q = blk_alloc_queue(GFP_KERNEL); if (!q) return ERR_PTR(-ENOMEM); - q->cmd_size = sizeof(struct scsi_request); + q->cmd_size = sizeof(struct bsg_job) + dd_job_size; + q->init_rq_fn = bsg_init_rq; + q->exit_rq_fn = bsg_exit_rq; q->request_fn = bsg_request_fn; ret = blk_init_allocated_queue(q); @@ -243,7 +258,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, goto out_cleanup_queue; q->queuedata = dev; - q->bsg_job_size = dd_job_size; q->bsg_job_fn = job_fn; queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25f6a0cb27d3..2a5d52fa90f5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -568,7 +568,6 @@ struct request_queue { #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; - int bsg_job_size; struct bsg_class_device bsg_dev; #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index e34dde2da0ef..637a20cfb237 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -24,6 +24,7 @@ #define _BLK_BSG_ #include +#include struct request; struct device; @@ -37,6 +38,7 @@ struct bsg_buffer { }; struct bsg_job { + struct scsi_request sreq; struct device *dev; struct request *req; -- cgit From 64236e315955cc59e2b612e6a0e59579395530ae Mon Sep 17 00:00:00 2001 From: Cao jin Date: Tue, 22 Aug 2017 21:09:53 +0800 Subject: kbuild: update comments of Makefile.asm-generic Signed-off-by: Cao jin Signed-off-by: Masahiro Yamada --- scripts/Makefile.asm-generic | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.asm-generic b/scripts/Makefile.asm-generic index 95f7d8090152..a6c8c1780855 100644 --- a/scripts/Makefile.asm-generic +++ b/scripts/Makefile.asm-generic @@ -1,9 +1,9 @@ # include/asm-generic contains a lot of files that are used # verbatim by several architectures. # -# This Makefile reads the file arch/$(SRCARCH)/include/asm/Kbuild +# This Makefile reads the file arch/$(SRCARCH)/include/$(src)/Kbuild # and for each file listed in this file with generic-y creates -# a small wrapper file in $(obj) (arch/$(SRCARCH)/include/generated/asm) +# a small wrapper file in $(obj) (arch/$(SRCARCH)/include/generated/$(src)) kbuild-file := $(srctree)/arch/$(SRCARCH)/include/$(src)/Kbuild -include $(kbuild-file) -- cgit From 58efbc9f5463308c43d812fd0748a4dee44c8a16 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 22 Aug 2017 23:45:59 -0700 Subject: Btrfs: fix blk_status_t/errno confusion This fixes several instances of blk_status_t and bare errno ints being mixed up, some of which are real bugs. In the normal case, 0 matches BLK_STS_OK, so we don't observe any effects of the missing conversion, but in case of errors or passes through the repair/retry paths, the errors get mixed up. The changes were identified using 'sparse', we don't have reports of the buggy behaviour. Fixes: 4e4cbee93d56 ("block: switch bios to blk_status_t") Signed-off-by: Omar Sandoval Reviewed-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/inode.c | 70 +++++++++++++++++++++++++++++------------------------- fs/btrfs/raid56.c | 34 +++++++++++++------------- fs/btrfs/volumes.c | 10 ++++---- fs/btrfs/volumes.h | 6 ++--- 5 files changed, 64 insertions(+), 60 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 080e2ebb8aa0..f45b61fe9a9a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3516,7 +3516,7 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device) struct bio *bio = device->flush_bio; if (!device->flush_bio_sent) - return 0; + return BLK_STS_OK; device->flush_bio_sent = 0; wait_for_completion_io(&device->flush_wait); @@ -3563,7 +3563,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info) continue; write_dev_flush(dev); - dev->last_flush_error = 0; + dev->last_flush_error = BLK_STS_OK; } /* wait for all the barriers */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 95c212037095..24bcd5cd9cf2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7924,11 +7924,12 @@ err: return ret; } -static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio, - int mirror_num) +static inline blk_status_t submit_dio_repair_bio(struct inode *inode, + struct bio *bio, + int mirror_num) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - int ret; + blk_status_t ret; BUG_ON(bio_op(bio) == REQ_OP_WRITE); @@ -7980,10 +7981,10 @@ static int btrfs_check_dio_repairable(struct inode *inode, return 1; } -static int dio_read_error(struct inode *inode, struct bio *failed_bio, - struct page *page, unsigned int pgoff, - u64 start, u64 end, int failed_mirror, - bio_end_io_t *repair_endio, void *repair_arg) +static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, + struct page *page, unsigned int pgoff, + u64 start, u64 end, int failed_mirror, + bio_end_io_t *repair_endio, void *repair_arg) { struct io_failure_record *failrec; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; @@ -7993,18 +7994,19 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, int read_mode = 0; int segs; int ret; + blk_status_t status; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); ret = btrfs_get_io_failure_record(inode, start, end, &failrec); if (ret) - return ret; + return errno_to_blk_status(ret); ret = btrfs_check_dio_repairable(inode, failed_bio, failrec, failed_mirror); if (!ret) { free_io_failure(failure_tree, io_tree, failrec); - return -EIO; + return BLK_STS_IOERR; } segs = bio_segments(failed_bio); @@ -8022,13 +8024,13 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n", read_mode, failrec->this_mirror, failrec->in_validation); - ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror); - if (ret) { + status = submit_dio_repair_bio(inode, bio, failrec->this_mirror); + if (status) { free_io_failure(failure_tree, io_tree, failrec); bio_put(bio); } - return ret; + return status; } struct btrfs_retry_complete { @@ -8065,8 +8067,8 @@ end: bio_put(bio); } -static int __btrfs_correct_data_nocsum(struct inode *inode, - struct btrfs_io_bio *io_bio) +static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode, + struct btrfs_io_bio *io_bio) { struct btrfs_fs_info *fs_info; struct bio_vec bvec; @@ -8076,8 +8078,8 @@ static int __btrfs_correct_data_nocsum(struct inode *inode, unsigned int pgoff; u32 sectorsize; int nr_sectors; - int ret; - int err = 0; + blk_status_t ret; + blk_status_t err = BLK_STS_OK; fs_info = BTRFS_I(inode)->root->fs_info; sectorsize = fs_info->sectorsize; @@ -8183,11 +8185,12 @@ static blk_status_t __btrfs_subio_endio_read(struct inode *inode, int csum_pos; bool uptodate = (err == 0); int ret; + blk_status_t status; fs_info = BTRFS_I(inode)->root->fs_info; sectorsize = fs_info->sectorsize; - err = 0; + err = BLK_STS_OK; start = io_bio->logical; done.inode = inode; io_bio->bio.bi_iter = io_bio->iter; @@ -8209,12 +8212,12 @@ try_again: done.start = start; init_completion(&done.done); - ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page, - pgoff, start, start + sectorsize - 1, - io_bio->mirror_num, - btrfs_retry_endio, &done); - if (ret) { - err = errno_to_blk_status(ret); + status = dio_read_error(inode, &io_bio->bio, bvec.bv_page, + pgoff, start, start + sectorsize - 1, + io_bio->mirror_num, btrfs_retry_endio, + &done); + if (status) { + err = status; goto next; } @@ -8250,7 +8253,7 @@ static blk_status_t btrfs_subio_endio_read(struct inode *inode, if (unlikely(err)) return __btrfs_correct_data_nocsum(inode, io_bio); else - return 0; + return BLK_STS_OK; } else { return __btrfs_subio_endio_read(inode, io_bio, err); } @@ -8423,9 +8426,9 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, return 0; } -static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, - u64 file_offset, int skip_sum, - int async_submit) +static inline blk_status_t +__btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset, + int skip_sum, int async_submit) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_dio_private *dip = bio->bi_private; @@ -8488,6 +8491,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, int clone_offset = 0; int clone_len; int ret; + blk_status_t status; map_length = orig_bio->bi_iter.bi_size; submit_len = map_length; @@ -8537,9 +8541,9 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, */ atomic_inc(&dip->pending_bios); - ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, - async_submit); - if (ret) { + status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, + async_submit); + if (status) { bio_put(bio); atomic_dec(&dip->pending_bios); goto out_err; @@ -8557,9 +8561,9 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, } while (submit_len > 0); submit: - ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, - async_submit); - if (!ret) + status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, + async_submit); + if (!status) return 0; bio_put(bio); diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 208638384cd2..2cf6ba40f7c4 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -905,7 +905,7 @@ static void raid_write_end_io(struct bio *bio) if (!atomic_dec_and_test(&rbio->stripes_pending)) return; - err = 0; + err = BLK_STS_OK; /* OK, we have read all the stripes we need to. */ max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ? @@ -1324,7 +1324,7 @@ write_data: return; cleanup: - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); } /* @@ -1475,7 +1475,7 @@ static void raid_rmw_end_io(struct bio *bio) cleanup: - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); } static void async_rmw_stripe(struct btrfs_raid_bio *rbio) @@ -1579,7 +1579,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) return 0; cleanup: - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); return -EIO; finish: @@ -1795,12 +1795,12 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) void **pointers; int faila = -1, failb = -1; struct page *page; - int err; + blk_status_t err; int i; pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); if (!pointers) { - err = -ENOMEM; + err = BLK_STS_RESOURCE; goto cleanup_io; } @@ -1856,7 +1856,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) * a bad data or Q stripe. * TODO, we should redo the xor here. */ - err = -EIO; + err = BLK_STS_IOERR; goto cleanup; } /* @@ -1882,7 +1882,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) { if (rbio->bbio->raid_map[faila] == RAID5_P_STRIPE) { - err = -EIO; + err = BLK_STS_IOERR; goto cleanup; } /* @@ -1954,13 +1954,13 @@ pstripe: } } - err = 0; + err = BLK_STS_OK; cleanup: kfree(pointers); cleanup_io: if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { - if (err == 0) + if (err == BLK_STS_OK) cache_rbio_pages(rbio); else clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); @@ -1968,7 +1968,7 @@ cleanup_io: rbio_orig_end_io(rbio, err); } else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) { rbio_orig_end_io(rbio, err); - } else if (err == 0) { + } else if (err == BLK_STS_OK) { rbio->faila = -1; rbio->failb = -1; @@ -2005,7 +2005,7 @@ static void raid_recover_end_io(struct bio *bio) return; if (atomic_read(&rbio->error) > rbio->bbio->max_errors) - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); else __raid_recover_end_io(rbio); } @@ -2104,7 +2104,7 @@ out: cleanup: if (rbio->operation == BTRFS_RBIO_READ_REBUILD || rbio->operation == BTRFS_RBIO_REBUILD_MISSING) - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); return -EIO; } @@ -2431,7 +2431,7 @@ submit_write: nr_data = bio_list_size(&bio_list); if (!nr_data) { /* Every parity is right */ - rbio_orig_end_io(rbio, 0); + rbio_orig_end_io(rbio, BLK_STS_OK); return; } @@ -2451,7 +2451,7 @@ submit_write: return; cleanup: - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); } static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) @@ -2519,7 +2519,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) return; cleanup: - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); } /* @@ -2633,7 +2633,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) return; cleanup: - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); return; finish: diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e8b9a269fdde..bd679bc7a1a9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6212,8 +6212,8 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) } } -int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, - int mirror_num, int async_submit) +blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, + int mirror_num, int async_submit) { struct btrfs_device *dev; struct bio *first_bio = bio; @@ -6233,7 +6233,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, &map_length, &bbio, mirror_num, 1); if (ret) { btrfs_bio_counter_dec(fs_info); - return ret; + return errno_to_blk_status(ret); } total_devs = bbio->num_stripes; @@ -6256,7 +6256,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, } btrfs_bio_counter_dec(fs_info); - return ret; + return errno_to_blk_status(ret); } if (map_length < length) { @@ -6283,7 +6283,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, dev_nr, async_submit); } btrfs_bio_counter_dec(fs_info); - return 0; + return BLK_STS_OK; } struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6f45fd60d15a..93277fc60930 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -74,7 +74,7 @@ struct btrfs_device { int missing; int can_discard; int is_tgtdev_for_dev_replace; - int last_flush_error; + blk_status_t last_flush_error; int flush_bio_sent; #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED @@ -416,8 +416,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 type); void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); -int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, - int mirror_num, int async_submit); +blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, + int mirror_num, int async_submit); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fmode_t flags, void *holder); int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, -- cgit From 9ce76511b67be8fbcdff36b7e1662e3887bb7377 Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Tue, 22 Aug 2017 21:51:46 -0400 Subject: ASoC: rt5677: Reintroduce I2C device IDs Not all devices with ACPI and this combination of sound devices will have the required information provided via ACPI. Reintroduce the I2C device ID to restore sound functionality on on the Chromebook 'Samus' model. [ More background note: the commit a36afb0ab648 ("ASoC: rt5677: Introduce proper table...") moved the i2c ID probed via ACPI ("RT5677CE:00") to a proper acpi_device_id table. Although the action itself is correct per se, the overseen issue is the reference id->driver_data at rt5677_i2c_probe() for retrieving the corresponding chip model for the given id. Since id=NULL is passed for ACPI matching case, we get an Oops now. We already have queued more fixes for 4.14 and they already address the issue, but they are bigger changes that aren't preferable for the late 4.13-rc stage. So, this patch just papers over the bug as a once-off quick fix for a particular ACPI matching. -- tiwai ] Fixes: a36afb0ab648 ("ASoC: rt5677: Introduce proper table for ACPI enumeration") Signed-off-by: Tom Rini Acked-by: Mark Brown Signed-off-by: Takashi Iwai --- sound/soc/codecs/rt5677.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index 36e530a36c82..6f629278d982 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -5021,6 +5021,7 @@ static const struct regmap_config rt5677_regmap = { static const struct i2c_device_id rt5677_i2c_id[] = { { "rt5677", RT5677 }, { "rt5676", RT5676 }, + { "RT5677CE:00", RT5677 }, { } }; MODULE_DEVICE_TABLE(i2c, rt5677_i2c_id); -- cgit From 498ca3c82a7b11e152a46c253f6b2087c929ce00 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Wed, 23 Aug 2017 08:35:40 +0300 Subject: IB/core: Avoid accessing non-allocated memory when inferring port type Commit 44c58487d51a ("IB/core: Define 'ib' and 'roce' rdma_ah_attr types") introduced the concept of type in ah_attr: * During ib_register_device, each port is checked for its type which is stored in ib_device's port_immutable array. * During uverbs' modify_qp, the type is inferred using the port number in ib_uverbs_qp_dest struct (address vector) by accessing the relevant port_immutable array and the type is passed on to providers. IB spec (version 1.3) enforces a valid port value only in Reset to Init. During Init to RTR, the address vector must be valid but port number is not mentioned as a field in the address vector, so its value is not validated, which leads to accesses to a non-allocated memory when inferring the port type. Save the real port number in ib_qp during modify to Init (when the comp_mask indicates that the port number is valid) and use this value to infer the port type. Avoid copying the address vector fields if the matching bit is not set in the attr_mask. Address vector can't be modified before the port, so no valid flow is affected. Fixes: 44c58487d51a ('IB/core: Define 'ib' and 'roce' rdma_ah_attr types') Signed-off-by: Noa Osherovich Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 11 +++++++---- drivers/infiniband/core/verbs.c | 7 ++++++- include/rdma/ib_verbs.h | 1 + 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 55822ae71955..739bd69ef1d4 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1522,6 +1522,7 @@ static int create_qp(struct ib_uverbs_file *file, qp->qp_type = attr.qp_type; atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); + qp->port = 0; if (attr.send_cq) atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) @@ -1962,8 +1963,9 @@ static int modify_qp(struct ib_uverbs_file *file, attr->alt_timeout = cmd->base.alt_timeout; attr->rate_limit = cmd->rate_limit; - attr->ah_attr.type = rdma_ah_find_type(qp->device, - cmd->base.dest.port_num); + if (cmd->base.attr_mask & IB_QP_AV) + attr->ah_attr.type = rdma_ah_find_type(qp->device, + cmd->base.dest.port_num); if (cmd->base.dest.is_global) { rdma_ah_set_grh(&attr->ah_attr, NULL, cmd->base.dest.flow_label, @@ -1981,8 +1983,9 @@ static int modify_qp(struct ib_uverbs_file *file, rdma_ah_set_port_num(&attr->ah_attr, cmd->base.dest.port_num); - attr->alt_ah_attr.type = rdma_ah_find_type(qp->device, - cmd->base.dest.port_num); + if (cmd->base.attr_mask & IB_QP_ALT_PATH) + attr->alt_ah_attr.type = + rdma_ah_find_type(qp->device, cmd->base.dest.port_num); if (cmd->base.alt_dest.is_global) { rdma_ah_set_grh(&attr->alt_ah_attr, NULL, cmd->base.alt_dest.flow_label, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 7f8fe443df46..b456e3ca1876 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -838,6 +838,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, spin_lock_init(&qp->mr_lock); INIT_LIST_HEAD(&qp->rdma_mrs); INIT_LIST_HEAD(&qp->sig_mrs); + qp->port = 0; if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) return ib_create_xrc_qp(qp, qp_init_attr); @@ -1297,7 +1298,11 @@ int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr, if (ret) return ret; } - return ib_security_modify_qp(qp, attr, attr_mask, udata); + ret = ib_security_modify_qp(qp, attr, attr_mask, udata); + if (!ret && (attr_mask & IB_QP_PORT)) + qp->port = attr->port_num; + + return ret; } EXPORT_SYMBOL(ib_modify_qp_with_udata); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b5732432bb29..88c32aba32f7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1683,6 +1683,7 @@ struct ib_qp { enum ib_qp_type qp_type; struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_qp_security *qp_sec; + u8 port; }; struct ib_mr { -- cgit From 1d31e9c09f41f9ffa53eaf8457b3e77a1d527f1e Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Wed, 23 Aug 2017 08:35:41 +0300 Subject: IB/mlx5: Fix Raw Packet QP event handler assignment In case we have SQ and RQ for Raw Packet QP, the SQ's event handler wasn't assigned. Fixing this by assigning event handler for each WQ after creation. [ 1877.145243] Call Trace: [ 1877.148644] [ 1877.150580] [] ? mlx5_rsc_event+0x105/0x210 [mlx5_core] [ 1877.159581] [] ? mlx5_cq_event+0x57/0xd0 [mlx5_core] [ 1877.167137] [] mlx5_eq_int+0x53e/0x6c0 [mlx5_core] [ 1877.174526] [] ? sched_clock+0x9/0x10 [ 1877.180753] [] handle_irq_event_percpu+0x3e/0x1e0 [ 1877.188014] [] handle_irq_event+0x3d/0x60 [ 1877.194567] [] handle_edge_irq+0x77/0x130 [ 1877.201129] [] handle_irq+0xbf/0x150 [ 1877.207244] [] ? atomic_notifier_call_chain+0x1a/0x20 [ 1877.214829] [] do_IRQ+0x4f/0xf0 [ 1877.220498] [] common_interrupt+0x6d/0x6d [ 1877.227025] [ 1877.228967] [] ? cpuidle_enter_state+0x52/0xc0 [ 1877.236990] [] cpuidle_idle_call+0xc5/0x200 [ 1877.243676] [] arch_cpu_idle+0xe/0x30 [ 1877.249831] [] cpu_startup_entry+0xf5/0x290 [ 1877.256513] [] start_secondary+0x265/0x27b [ 1877.263111] Code: Bad RIP value. [ 1877.267296] RIP [< (null)>] (null) [ 1877.273264] RSP [ 1877.277531] CR2: 0000000000000000 Fixes: 19098df2da78 ("IB/mlx5: Refactor mlx5_ib_qp to accommodate other QP types") Signed-off-by: Majd Dibbiny Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0889ff367c86..f58f8f5f3ebe 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1238,6 +1238,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, goto err_destroy_tis; sq->base.container_mibqp = qp; + sq->base.mqp.event = mlx5_ib_qp_event; } if (qp->rq.wqe_cnt) { -- cgit From ec2558796d25e6024071b6bcb8e11392538d57bf Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Wed, 23 Aug 2017 08:35:42 +0300 Subject: IB/mlx5: Always return success for RoCE modify port CM layer calls ib_modify_port() regardless of the link layer. For the Ethernet ports, qkey violation and Port capabilities are meaningless. Therefore, always return success for ib_modify_port calls on the Ethernet ports. Cc: Selvin Xavier Signed-off-by: Majd Dibbiny Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a7f2e60085c4..f7fcde1ff0aa 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1085,6 +1085,12 @@ static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND); + /* CM layer calls ib_modify_port() regardless of the link layer. For + * Ethernet ports, qkey violation and Port capabilities are meaningless. + */ + if (!is_ib) + return 0; + if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) { change_mask = props->clr_port_cap_mask | props->set_port_cap_mask; value = ~props->clr_port_cap_mask | props->set_port_cap_mask; -- cgit From 311fc65c9fb9c966bca8e6f3ff8132ce57344ab9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 Aug 2017 15:13:29 -0500 Subject: pty: Repair TIOCGPTPEER The implementation of TIOCGPTPEER has two issues. When /dev/ptmx (as opposed to /dev/pts/ptmx) is opened the wrong vfsmount is passed to dentry_open. Which results in the kernel displaying the wrong pathname for the peer. The second is simply by caching the vfsmount and dentry of the peer it leaves them open, in a way they were not previously Which because of the inreased reference counts can cause unnecessary behaviour differences resulting in regressions. To fix these move the ioctl into tty_io.c at a generic level allowing the ioctl to have access to the struct file on which the ioctl is being called. This allows the path of the slave to be derived when opening the slave through TIOCGPTPEER instead of requiring the path to the slave be cached. Thus removing the need for caching the path. A new function devpts_ptmx_path is factored out of devpts_acquire and used to implement a function devpts_mntget. The new function devpts_mntget takes a filp to perform the lookup on and fsi so that it can confirm that the superblock that is found by devpts_ptmx_path is the proper superblock. v2: Lots of fixes to make the code actually work v3: Suggestions by Linus - Removed the unnecessary initialization of filp in ptm_open_peer - Simplified devpts_ptmx_path as gotos are no longer required [ This is the fix for the issue that was reverted in commit 143c97cc6529, but this time without breaking 'pbuilder' due to increased reference counts - Linus ] Fixes: 54ebbfb16034 ("tty: add TIOCGPTPEER ioctl") Reported-by: Christian Brauner Reported-and-tested-by: Stefan Lippers-Hollmann Signed-off-by: "Eric W. Biederman" Signed-off-by: Linus Torvalds --- drivers/tty/pty.c | 64 ++++++++++++++++++++-------------------------- drivers/tty/tty_io.c | 3 +++ fs/devpts/inode.c | 65 +++++++++++++++++++++++++++++++++++------------ include/linux/devpts_fs.h | 10 ++++++++ 4 files changed, 89 insertions(+), 53 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 284749fb0f6b..a6d5164c33a9 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -69,13 +69,8 @@ static void pty_close(struct tty_struct *tty, struct file *filp) #ifdef CONFIG_UNIX98_PTYS if (tty->driver == ptm_driver) { mutex_lock(&devpts_mutex); - if (tty->link->driver_data) { - struct path *path = tty->link->driver_data; - - devpts_pty_kill(path->dentry); - path_put(path); - kfree(path); - } + if (tty->link->driver_data) + devpts_pty_kill(tty->link->driver_data); mutex_unlock(&devpts_mutex); } #endif @@ -607,25 +602,24 @@ static inline void legacy_pty_init(void) { } static struct cdev ptmx_cdev; /** - * pty_open_peer - open the peer of a pty - * @tty: the peer of the pty being opened + * ptm_open_peer - open the peer of a pty + * @master: the open struct file of the ptmx device node + * @tty: the master of the pty being opened + * @flags: the flags for open * - * Open the cached dentry in tty->link, providing a safe way for userspace - * to get the slave end of a pty (where they have the master fd and cannot - * access or trust the mount namespace /dev/pts was mounted inside). + * Provide a race free way for userspace to open the slave end of a pty + * (where they have the master fd and cannot access or trust the mount + * namespace /dev/pts was mounted inside). */ -static struct file *pty_open_peer(struct tty_struct *tty, int flags) -{ - if (tty->driver->subtype != PTY_TYPE_MASTER) - return ERR_PTR(-EIO); - return dentry_open(tty->link->driver_data, flags, current_cred()); -} - -static int pty_get_peer(struct tty_struct *tty, int flags) +int ptm_open_peer(struct file *master, struct tty_struct *tty, int flags) { int fd = -1; - struct file *filp = NULL; + struct file *filp; int retval = -EINVAL; + struct path path; + + if (tty->driver != ptm_driver) + return -EIO; fd = get_unused_fd_flags(0); if (fd < 0) { @@ -633,7 +627,16 @@ static int pty_get_peer(struct tty_struct *tty, int flags) goto err; } - filp = pty_open_peer(tty, flags); + /* Compute the slave's path */ + path.mnt = devpts_mntget(master, tty->driver_data); + if (IS_ERR(path.mnt)) { + retval = PTR_ERR(path.mnt); + goto err_put; + } + path.dentry = tty->link->driver_data; + + filp = dentry_open(&path, flags, current_cred()); + mntput(path.mnt); if (IS_ERR(filp)) { retval = PTR_ERR(filp); goto err_put; @@ -662,8 +665,6 @@ static int pty_unix98_ioctl(struct tty_struct *tty, return pty_get_pktmode(tty, (int __user *)arg); case TIOCGPTN: /* Get PT Number */ return put_user(tty->index, (unsigned int __user *)arg); - case TIOCGPTPEER: /* Open the other end */ - return pty_get_peer(tty, (int) arg); case TIOCSIG: /* Send signal to other side of pty */ return pty_signal(tty, (int) arg); } @@ -791,7 +792,6 @@ static int ptmx_open(struct inode *inode, struct file *filp) { struct pts_fs_info *fsi; struct tty_struct *tty; - struct path *pts_path; struct dentry *dentry; int retval; int index; @@ -845,26 +845,16 @@ static int ptmx_open(struct inode *inode, struct file *filp) retval = PTR_ERR(dentry); goto err_release; } - /* We need to cache a fake path for TIOCGPTPEER. */ - pts_path = kmalloc(sizeof(struct path), GFP_KERNEL); - if (!pts_path) - goto err_release; - pts_path->mnt = filp->f_path.mnt; - pts_path->dentry = dentry; - path_get(pts_path); - tty->link->driver_data = pts_path; + tty->link->driver_data = dentry; retval = ptm_driver->ops->open(tty, filp); if (retval) - goto err_path_put; + goto err_release; tty_debug_hangup(tty, "opening (count=%d)\n", tty->count); tty_unlock(tty); return 0; -err_path_put: - path_put(pts_path); - kfree(pts_path); err_release: tty_unlock(tty); // This will also put-ref the fsi diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 974b13d24401..10c4038c0e8d 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2518,6 +2518,9 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case TIOCSSERIAL: tty_warn_deprecated_flags(p); break; + case TIOCGPTPEER: + /* Special because the struct file is needed */ + return ptm_open_peer(file, tty, (int)arg); default: retval = tty_jobctrl_ioctl(tty, real_tty, file, cmd, arg); if (retval != -ENOIOCTLCMD) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 108df2e3602c..7eae33ffa3fc 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -133,6 +133,50 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) return sb->s_fs_info; } +static int devpts_ptmx_path(struct path *path) +{ + struct super_block *sb; + int err; + + /* Has the devpts filesystem already been found? */ + if (path->mnt->mnt_sb->s_magic == DEVPTS_SUPER_MAGIC) + return 0; + + /* Is a devpts filesystem at "pts" in the same directory? */ + err = path_pts(path); + if (err) + return err; + + /* Is the path the root of a devpts filesystem? */ + sb = path->mnt->mnt_sb; + if ((sb->s_magic != DEVPTS_SUPER_MAGIC) || + (path->mnt->mnt_root != sb->s_root)) + return -ENODEV; + + return 0; +} + +struct vfsmount *devpts_mntget(struct file *filp, struct pts_fs_info *fsi) +{ + struct path path; + int err; + + path = filp->f_path; + path_get(&path); + + err = devpts_ptmx_path(&path); + dput(path.dentry); + if (err) { + mntput(path.mnt); + path.mnt = ERR_PTR(err); + } + if (DEVPTS_SB(path.mnt->mnt_sb) != fsi) { + mntput(path.mnt); + path.mnt = ERR_PTR(-ENODEV); + } + return path.mnt; +} + struct pts_fs_info *devpts_acquire(struct file *filp) { struct pts_fs_info *result; @@ -143,27 +187,16 @@ struct pts_fs_info *devpts_acquire(struct file *filp) path = filp->f_path; path_get(&path); - /* Has the devpts filesystem already been found? */ - sb = path.mnt->mnt_sb; - if (sb->s_magic != DEVPTS_SUPER_MAGIC) { - /* Is a devpts filesystem at "pts" in the same directory? */ - err = path_pts(&path); - if (err) { - result = ERR_PTR(err); - goto out; - } - - /* Is the path the root of a devpts filesystem? */ - result = ERR_PTR(-ENODEV); - sb = path.mnt->mnt_sb; - if ((sb->s_magic != DEVPTS_SUPER_MAGIC) || - (path.mnt->mnt_root != sb->s_root)) - goto out; + err = devpts_ptmx_path(&path); + if (err) { + result = ERR_PTR(err); + goto out; } /* * pty code needs to hold extra references in case of last /dev/tty close */ + sb = path.mnt->mnt_sb; atomic_inc(&sb->s_active); result = DEVPTS_SB(sb); diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 277ab9af9ac2..100cb4343763 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,6 +19,7 @@ struct pts_fs_info; +struct vfsmount *devpts_mntget(struct file *, struct pts_fs_info *); struct pts_fs_info *devpts_acquire(struct file *); void devpts_release(struct pts_fs_info *); @@ -32,6 +33,15 @@ void *devpts_get_priv(struct dentry *); /* unlink */ void devpts_pty_kill(struct dentry *); +/* in pty.c */ +int ptm_open_peer(struct file *master, struct tty_struct *tty, int flags); + +#else +static inline int +ptm_open_peer(struct file *master, struct tty_struct *tty, int flags) +{ + return -EIO; +} #endif -- cgit From fc788f64f1f3eb31e87d4f53bcf1ab76590d5838 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Aug 2017 11:12:19 -0400 Subject: nfsd: Limit end of page list when decoding NFSv4 WRITE When processing an NFSv4 WRITE operation, argp->end should never point past the end of the data in the final page of the page list. Otherwise, nfsd4_decode_compound can walk into uninitialized memory. More critical, nfsd4_decode_write is failing to increment argp->pagelen when it increments argp->pagelist. This can cause later xdr decoders to assume more data is available than really is, which can cause server crashes on malformed requests. Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 20fbcab97753..5f940d2a136b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -144,7 +144,7 @@ static void next_decode_page(struct nfsd4_compoundargs *argp) argp->p = page_address(argp->pagelist[0]); argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); + argp->end = argp->p + XDR_QUADLEN(argp->pagelen); argp->pagelen = 0; } else { argp->end = argp->p + (PAGE_SIZE>>2); @@ -1279,9 +1279,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) argp->pagelen -= pages * PAGE_SIZE; len -= pages * PAGE_SIZE; - argp->p = (__be32 *)page_address(argp->pagelist[0]); - argp->pagelist++; - argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); + next_decode_page(argp); } argp->p += XDR_QUADLEN(len); -- cgit From eebe53e87f97975ee58a21693e44797608bf679c Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Mon, 21 Aug 2017 07:23:07 -0400 Subject: net: sunrpc: svcsock: fix NULL-pointer exception While running nfs/connectathon tests kernel NULL-pointer exception has been observed due to races in svcsock.c. Race is appear when kernel accepts connection by kernel_accept (which creates new socket) and start queuing ingress packets to new socket. This happens in ksoftirq context which could run concurrently on a different core while new socket setup is not done yet. The fix is to re-order socket user data init sequence and add write/read barrier calls to be sure that we got proper values for callback pointers before actually calling them. Test results: nfs/connectathon reports '0' failed tests for about 200+ iterations. Crash log: ---<-snip->--- [ 6708.638984] Unable to handle kernel NULL pointer dereference at virtual address 00000000 [ 6708.647093] pgd = ffff0000094e0000 [ 6708.650497] [00000000] *pgd=0000010ffff90003, *pud=0000010ffff90003, *pmd=0000010ffff80003, *pte=0000000000000000 [ 6708.660761] Internal error: Oops: 86000005 [#1] SMP [ 6708.665630] Modules linked in: nfsv3 nfnetlink_queue nfnetlink_log nfnetlink rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache overlay xt_CONNSECMARK xt_SECMARK xt_conntrack iptable_security ip_tables ah4 xfrm4_mode_transport sctp tun binfmt_misc ext4 jbd2 mbcache loop tcp_diag udp_diag inet_diag rpcrdma ib_isert iscsi_target_mod ib_iser rdma_cm iw_cm libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib ib_ucm ib_uverbs ib_umad ib_cm ib_core nls_koi8_u nls_cp932 ts_kmp nf_conntrack_ipv4 nf_defrag_ipv4 nf_conntrack vfat fat ghash_ce sha2_ce sha1_ce cavium_rng_vf i2c_thunderx sg thunderx_edac i2c_smbus edac_core cavium_rng nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c nicvf nicpf ast i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops [ 6708.736446] ttm drm i2c_core thunder_bgx thunder_xcv mdio_thunder mdio_cavium dm_mirror dm_region_hash dm_log dm_mod [last unloaded: stap_3c300909c5b3f46dcacd49aab3334af_87021] [ 6708.752275] CPU: 84 PID: 0 Comm: swapper/84 Tainted: G W OE 4.11.0-4.el7.aarch64 #1 [ 6708.760787] Hardware name: www.cavium.com CRB-2S/CRB-2S, BIOS 0.3 Mar 13 2017 [ 6708.767910] task: ffff810006842e80 task.stack: ffff81000689c000 [ 6708.773822] PC is at 0x0 [ 6708.776739] LR is at svc_data_ready+0x38/0x88 [sunrpc] [ 6708.781866] pc : [<0000000000000000>] lr : [] pstate: 60000145 [ 6708.789248] sp : ffff810ffbad3900 [ 6708.792551] x29: ffff810ffbad3900 x28: ffff000008c73d58 [ 6708.797853] x27: 0000000000000000 x26: ffff81000bbe1e00 [ 6708.803156] x25: 0000000000000020 x24: ffff800f7410bf28 [ 6708.808458] x23: ffff000008c63000 x22: ffff000008c63000 [ 6708.813760] x21: ffff800f7410bf28 x20: ffff81000bbe1e00 [ 6708.819063] x19: ffff810012412400 x18: 00000000d82a9df2 [ 6708.824365] x17: 0000000000000000 x16: 0000000000000000 [ 6708.829667] x15: 0000000000000000 x14: 0000000000000001 [ 6708.834969] x13: 0000000000000000 x12: 722e736f622e676e [ 6708.840271] x11: 00000000f814dd99 x10: 0000000000000000 [ 6708.845573] x9 : 7374687225000000 x8 : 0000000000000000 [ 6708.850875] x7 : 0000000000000000 x6 : 0000000000000000 [ 6708.856177] x5 : 0000000000000028 x4 : 0000000000000000 [ 6708.861479] x3 : 0000000000000000 x2 : 00000000e5000000 [ 6708.866781] x1 : 0000000000000000 x0 : ffff81000bbe1e00 [ 6708.872084] [ 6708.873565] Process swapper/84 (pid: 0, stack limit = 0xffff81000689c000) [ 6708.880341] Stack: (0xffff810ffbad3900 to 0xffff8100068a0000) [ 6708.886075] Call trace: [ 6708.888513] Exception stack(0xffff810ffbad3710 to 0xffff810ffbad3840) [ 6708.894942] 3700: ffff810012412400 0001000000000000 [ 6708.902759] 3720: ffff810ffbad3900 0000000000000000 0000000060000145 ffff800f79300000 [ 6708.910577] 3740: ffff000009274d00 00000000000003ea 0000000000000015 ffff000008c63000 [ 6708.918395] 3760: ffff810ffbad3830 ffff800f79300000 000000000000004d 0000000000000000 [ 6708.926212] 3780: ffff810ffbad3890 ffff0000080f88dc ffff800f79300000 000000000000004d [ 6708.934030] 37a0: ffff800f7930093c ffff000008c63000 0000000000000000 0000000000000140 [ 6708.941848] 37c0: ffff000008c2c000 0000000000040b00 ffff81000bbe1e00 0000000000000000 [ 6708.949665] 37e0: 00000000e5000000 0000000000000000 0000000000000000 0000000000000028 [ 6708.957483] 3800: 0000000000000000 0000000000000000 0000000000000000 7374687225000000 [ 6708.965300] 3820: 0000000000000000 00000000f814dd99 722e736f622e676e 0000000000000000 [ 6708.973117] [< (null)>] (null) [ 6708.977824] [] tcp_data_queue+0x754/0xc5c [ 6708.983386] [] tcp_rcv_established+0x1a0/0x67c [ 6708.989384] [] tcp_v4_do_rcv+0x15c/0x22c [ 6708.994858] [] tcp_v4_rcv+0xaf0/0xb58 [ 6709.000077] [] ip_local_deliver_finish+0x10c/0x254 [ 6709.006419] [] ip_local_deliver+0xf0/0xfc [ 6709.011980] [] ip_rcv_finish+0x208/0x3a4 [ 6709.017454] [] ip_rcv+0x2dc/0x3c8 [ 6709.022328] [] __netif_receive_skb_core+0x2f8/0xa0c [ 6709.028758] [] __netif_receive_skb+0x38/0x84 [ 6709.034580] [] netif_receive_skb_internal+0x68/0xdc [ 6709.041010] [] napi_gro_receive+0xcc/0x1a8 [ 6709.046690] [] nicvf_cq_intr_handler+0x59c/0x730 [nicvf] [ 6709.053559] [] nicvf_poll+0x38/0xb8 [nicvf] [ 6709.059295] [] net_rx_action+0x2f8/0x464 [ 6709.064771] [] __do_softirq+0x11c/0x308 [ 6709.070164] [] irq_exit+0x12c/0x174 [ 6709.075206] [] __handle_domain_irq+0x78/0xc4 [ 6709.081027] [] gic_handle_irq+0x94/0x190 [ 6709.086501] Exception stack(0xffff81000689fdf0 to 0xffff81000689ff20) [ 6709.092929] fde0: 0000810ff2ec0000 ffff000008c10000 [ 6709.100747] fe00: ffff000008c70ef4 0000000000000001 0000000000000000 ffff810ffbad9b18 [ 6709.108565] fe20: ffff810ffbad9c70 ffff8100169d3800 ffff810006843ab0 ffff81000689fe80 [ 6709.116382] fe40: 0000000000000bd0 0000ffffdf979cd0 183f5913da192500 0000ffff8a254ce4 [ 6709.124200] fe60: 0000ffff8a254b78 0000aaab10339808 0000000000000000 0000ffff8a0c2a50 [ 6709.132018] fe80: 0000ffffdf979b10 ffff000008d6d450 ffff000008c10000 ffff000008d6d000 [ 6709.139836] fea0: 0000000000000054 ffff000008cd3dbc 0000000000000000 0000000000000000 [ 6709.147653] fec0: 0000000000000000 0000000000000000 0000000000000000 ffff81000689ff20 [ 6709.155471] fee0: ffff000008085240 ffff81000689ff20 ffff000008085244 0000000060000145 [ 6709.163289] ff00: ffff81000689ff10 ffff00000813f1e4 ffffffffffffffff ffff00000813f238 [ 6709.171107] [] el1_irq+0xb4/0x140 [ 6709.175976] [] arch_cpu_idle+0x44/0x11c [ 6709.181368] [] default_idle_call+0x20/0x30 [ 6709.187020] [] do_idle+0x158/0x1e4 [ 6709.191973] [] cpu_startup_entry+0x2c/0x30 [ 6709.197624] [] secondary_start_kernel+0x13c/0x160 [ 6709.203878] [<0000000001bc71c4>] 0x1bc71c4 [ 6709.207967] Code: bad PC value [ 6709.211061] SMP: stopping secondary CPUs [ 6709.218830] Starting crashdump kernel... [ 6709.222749] Bye! ---<-snip>--- Signed-off-by: Vadim Lomovtsev Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 2b720fa35c4f..e18500151236 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -421,6 +421,9 @@ static void svc_data_ready(struct sock *sk) dprintk("svc: socket %p(inet %p), busy=%d\n", svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); + + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_odata(sk); if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); @@ -437,6 +440,9 @@ static void svc_write_space(struct sock *sk) if (svsk) { dprintk("svc: socket %p(inet %p), write_space busy=%d\n", svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); + + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_owspace(sk); svc_xprt_enqueue(&svsk->sk_xprt); } @@ -760,8 +766,12 @@ static void svc_tcp_listen_data_ready(struct sock *sk) dprintk("svc: socket %p TCP (listen) state change %d\n", sk, sk->sk_state); - if (svsk) + if (svsk) { + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_odata(sk); + } + /* * This callback may called twice when a new connection * is established as a child socket inherits everything @@ -794,6 +804,8 @@ static void svc_tcp_state_change(struct sock *sk) if (!svsk) printk("svc: socket %p: no user data\n", sk); else { + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_ostate(sk); if (sk->sk_state != TCP_ESTABLISHED) { set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1381,12 +1393,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return ERR_PTR(err); } - inet->sk_user_data = svsk; svsk->sk_sock = sock; svsk->sk_sk = inet; svsk->sk_ostate = inet->sk_state_change; svsk->sk_odata = inet->sk_data_ready; svsk->sk_owspace = inet->sk_write_space; + /* + * This barrier is necessary in order to prevent race condition + * with svc_data_ready(), svc_listen_data_ready() and others + * when calling callbacks above. + */ + wmb(); + inet->sk_user_data = svsk; /* Initialize the socket */ if (sock->type == SOCK_DGRAM) -- cgit From 4a646580f793d19717f7e034c8d473b509c27d49 Mon Sep 17 00:00:00 2001 From: Masaki Ota Date: Thu, 24 Aug 2017 15:44:36 -0700 Subject: Input: ALPS - fix two-finger scroll breakage in right side on ALPS touchpad Fixed the issue that two finger scroll does not work correctly on V8 protocol. The cause is that V8 protocol X-coordinate decode is wrong at SS4 PLUS device. I added SS4 PLUS X decode definition. Mote notes: the problem manifests itself by the commit e7348396c6d5 ("Input: ALPS - fix V8+ protocol handling (73 03 28)"), where a fix for the V8+ protocol was applied. Although the culprit must have been present beforehand, the two-finger scroll worked casually even with the wrongly reported values by some reason. It got broken by the commit above just because it changed x_max value, and this made libinput correctly figuring the MT events. Since the X coord is reported as falsely doubled, the events on the right-half side go outside the boundary, thus they are no longer handled. This resulted as a broken two-finger scroll. One finger event is decoded differently, and it didn't suffer from this problem. The problem was only about MT events. --tiwai Fixes: e7348396c6d5 ("Input: ALPS - fix V8+ protocol handling (73 03 28)") Signed-off-by: Masaki Ota Tested-by: Takashi Iwai Tested-by: Paul Donohue Cc: Signed-off-by: Takashi Iwai Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.c | 41 +++++++++++++++++++++++++++++++---------- drivers/input/mouse/alps.h | 8 ++++++++ 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 262d1057c1da..850b00e3ad8e 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1215,14 +1215,24 @@ static int alps_decode_ss4_v2(struct alps_fields *f, case SS4_PACKET_ID_TWO: if (priv->flags & ALPS_BUTTONPAD) { - f->mt[0].x = SS4_BTL_MF_X_V2(p, 0); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0); + f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + } else { + f->mt[0].x = SS4_BTL_MF_X_V2(p, 0); + f->mt[1].x = SS4_BTL_MF_X_V2(p, 1); + } f->mt[0].y = SS4_BTL_MF_Y_V2(p, 0); - f->mt[1].x = SS4_BTL_MF_X_V2(p, 1); f->mt[1].y = SS4_BTL_MF_Y_V2(p, 1); } else { - f->mt[0].x = SS4_STD_MF_X_V2(p, 0); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0); + f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1); + } else { + f->mt[0].x = SS4_STD_MF_X_V2(p, 0); + f->mt[1].x = SS4_STD_MF_X_V2(p, 1); + } f->mt[0].y = SS4_STD_MF_Y_V2(p, 0); - f->mt[1].x = SS4_STD_MF_X_V2(p, 1); f->mt[1].y = SS4_STD_MF_Y_V2(p, 1); } f->pressure = SS4_MF_Z_V2(p, 0) ? 0x30 : 0; @@ -1239,16 +1249,27 @@ static int alps_decode_ss4_v2(struct alps_fields *f, case SS4_PACKET_ID_MULTI: if (priv->flags & ALPS_BUTTONPAD) { - f->mt[2].x = SS4_BTL_MF_X_V2(p, 0); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0); + f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + } else { + f->mt[2].x = SS4_BTL_MF_X_V2(p, 0); + f->mt[3].x = SS4_BTL_MF_X_V2(p, 1); + } + f->mt[2].y = SS4_BTL_MF_Y_V2(p, 0); - f->mt[3].x = SS4_BTL_MF_X_V2(p, 1); f->mt[3].y = SS4_BTL_MF_Y_V2(p, 1); no_data_x = SS4_MFPACKET_NO_AX_BL; no_data_y = SS4_MFPACKET_NO_AY_BL; } else { - f->mt[2].x = SS4_STD_MF_X_V2(p, 0); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0); + f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1); + } else { + f->mt[0].x = SS4_STD_MF_X_V2(p, 0); + f->mt[1].x = SS4_STD_MF_X_V2(p, 1); + } f->mt[2].y = SS4_STD_MF_Y_V2(p, 0); - f->mt[3].x = SS4_STD_MF_X_V2(p, 1); f->mt[3].y = SS4_STD_MF_Y_V2(p, 1); no_data_x = SS4_MFPACKET_NO_AX; no_data_y = SS4_MFPACKET_NO_AY; @@ -2541,8 +2562,8 @@ static int alps_set_defaults_ss4_v2(struct psmouse *psmouse, memset(otp, 0, sizeof(otp)); - if (alps_get_otp_values_ss4_v2(psmouse, 0, &otp[0][0]) || - alps_get_otp_values_ss4_v2(psmouse, 1, &otp[1][0])) + if (alps_get_otp_values_ss4_v2(psmouse, 1, &otp[1][0]) || + alps_get_otp_values_ss4_v2(psmouse, 0, &otp[0][0])) return -1; alps_update_device_area_ss4_v2(otp, priv); diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h index ed2d6879fa52..c80a7c76cb76 100644 --- a/drivers/input/mouse/alps.h +++ b/drivers/input/mouse/alps.h @@ -100,6 +100,10 @@ enum SS4_PACKET_ID { ((_b[1 + _i * 3] << 5) & 0x1F00) \ ) +#define SS4_PLUS_STD_MF_X_V2(_b, _i) (((_b[0 + (_i) * 3] << 4) & 0x0070) | \ + ((_b[1 + (_i) * 3] << 4) & 0x0F80) \ + ) + #define SS4_STD_MF_Y_V2(_b, _i) (((_b[1 + (_i) * 3] << 3) & 0x0010) | \ ((_b[2 + (_i) * 3] << 5) & 0x01E0) | \ ((_b[2 + (_i) * 3] << 4) & 0x0E00) \ @@ -109,6 +113,10 @@ enum SS4_PACKET_ID { ((_b[0 + (_i) * 3] >> 3) & 0x0010) \ ) +#define SS4_PLUS_BTL_MF_X_V2(_b, _i) (SS4_PLUS_STD_MF_X_V2(_b, _i) | \ + ((_b[0 + (_i) * 3] >> 4) & 0x0008) \ + ) + #define SS4_BTL_MF_Y_V2(_b, _i) (SS4_STD_MF_Y_V2(_b, _i) | \ ((_b[0 + (_i) * 3] >> 3) & 0x0008) \ ) -- cgit From b974696da1cfc5aa0c29ed97dc8f6c239899e64b Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Wed, 23 Aug 2017 09:03:04 +0200 Subject: mtd: nandsim: remove debugfs entries in error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The debugfs entries must be removed before an error is returned in the probe function. Otherwise another try to load the module fails and when the debugfs files are accessed without the module loaded, the kernel still tries to call a function in that module. Fixes: 5346c27c5fed ("mtd: nandsim: Introduce debugfs infrastructure") Signed-off-by: Uwe Kleine-König Reviewed-by: Richard Weinberger Acked-by: Boris Brezillon Signed-off-by: Brian Norris --- drivers/mtd/nand/nandsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index 03a0d057bf2f..e4211c3cc49b 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -2373,6 +2373,7 @@ static int __init ns_init_module(void) return 0; err_exit: + nandsim_debugfs_remove(nand); free_nandsim(nand); nand_release(nsmtd); for (i = 0;i < ARRAY_SIZE(nand->partitions); ++i) -- cgit From be3e83e3471cd0faff2c2d88fe9cfc73d9a9745a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 23 Aug 2017 20:45:01 +0200 Subject: mtd: nand: atmel: Relax tADL_min constraint Version 4 of the ONFI spec mandates that tADL be at least 400 nanoseconds, but, depending on the master clock rate, 400 ns may not fit in the tADL field of the SMC reg. We need to relax the check and accept the -ERANGE return code. Note that previous versions of the ONFI spec had a lower tADL_min (100 or 200 ns). It's not clear why this timing constraint got increased but it seems most NANDs are fine with values lower than 400ns, so we should be safe. Fixes: f9ce2eddf176 ("mtd: nand: atmel: Add ->setup_data_interface() hooks") Signed-off-by: Boris Brezillon Tested-by: Quentin Schulz Signed-off-by: Brian Norris --- drivers/mtd/nand/atmel/nand-controller.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c index 2c8baa0c2c4e..ceec21bd30c4 100644 --- a/drivers/mtd/nand/atmel/nand-controller.c +++ b/drivers/mtd/nand/atmel/nand-controller.c @@ -1364,7 +1364,18 @@ static int atmel_smc_nand_prepare_smcconf(struct atmel_nand *nand, ret = atmel_smc_cs_conf_set_timing(smcconf, ATMEL_HSMC_TIMINGS_TADL_SHIFT, ncycles); - if (ret) + /* + * Version 4 of the ONFI spec mandates that tADL be at least 400 + * nanoseconds, but, depending on the master clock rate, 400 ns may not + * fit in the tADL field of the SMC reg. We need to relax the check and + * accept the -ERANGE return code. + * + * Note that previous versions of the ONFI spec had a lower tADL_min + * (100 or 200 ns). It's not clear why this timing constraint got + * increased but it seems most NANDs are fine with values lower than + * 400ns, so we should be safe. + */ + if (ret && ret != -ERANGE) return ret; ncycles = DIV_ROUND_UP(conf->timings.sdr.tAR_min, mckperiodps); -- cgit From c469268cd523245cc58255f6696e0c295485cb0b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 24 Aug 2017 11:59:31 +0200 Subject: KVM: x86: block guest protection keys unless the host has them enabled If the host has protection keys disabled, we cannot read and write the guest PKRU---RDPKRU and WRPKRU fail with #GP(0) if CR4.PKE=0. Block the PKU cpuid bit in that case. This ensures that guest_CR4.PKE=1 implies host_CR4.PKE=1. Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870 Cc: stable@vger.kernel.org Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 59ca2eea522c..19adbb418443 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -469,7 +469,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; cpuid_mask(&entry->ecx, CPUID_7_ECX); /* PKU is not yet implemented for shadow paging. */ - if (!tdp_enabled) + if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX); -- cgit From b9dd21e104bcd45e124acfe978a79df71259e59b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 23 Aug 2017 23:14:38 +0200 Subject: KVM: x86: simplify handling of PKRU Move it to struct kvm_arch_vcpu, replacing guest_pkru_valid with a simple comparison against the host value of the register. The write of PKRU in addition can be skipped if the guest has not enabled the feature. Once we do this, we need not test OSPKE in the host anymore, because guest_CR4.PKE=1 implies host_CR4.PKE=1. The static PKU test is kept to elide the code on older CPUs. Suggested-by: Yang Zhang Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870 Cc: stable@vger.kernel.org Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/kvm_cache_regs.h | 5 ----- arch/x86/kvm/mmu.h | 2 +- arch/x86/kvm/svm.c | 7 ------- arch/x86/kvm/vmx.c | 25 ++++++++----------------- 5 files changed, 10 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 87ac4fba6d8e..f4d120a3e22e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -492,6 +492,7 @@ struct kvm_vcpu_arch { unsigned long cr4; unsigned long cr4_guest_owned_bits; unsigned long cr8; + u32 pkru; u32 hflags; u64 efer; u64 apic_base; diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 762cdf2595f9..e1e89ee4af75 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -84,11 +84,6 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); } -static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu) -{ - return kvm_x86_ops->get_pkru(vcpu); -} - static inline void enter_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags |= HF_GUEST_MASK; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index d7d248a000dd..4b9a3ae6b725 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -185,7 +185,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, * index of the protection domain, so pte_pkey * 2 is * is the index of the first bit for the domain. */ - pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3; + pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3; /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ offset = (pfec & ~1) + diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 56ba05312759..af256b786a70 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1777,11 +1777,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } -static u32 svm_get_pkru(struct kvm_vcpu *vcpu) -{ - return 0; -} - static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { switch (reg) { @@ -5413,8 +5408,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, - .get_pkru = svm_get_pkru, - .tlb_flush = svm_flush_tlb, .run = svm_vcpu_run, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9b21b1223035..c6ef2940119b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -636,8 +636,6 @@ struct vcpu_vmx { u64 current_tsc_ratio; - bool guest_pkru_valid; - u32 guest_pkru; u32 host_pkru; /* @@ -2383,11 +2381,6 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_vmx(vcpu)->emulation_required = emulation_required(vcpu); } -static u32 vmx_get_pkru(struct kvm_vcpu *vcpu) -{ - return to_vmx(vcpu)->guest_pkru; -} - static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) { u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); @@ -9020,8 +9013,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) vmx_set_interrupt_shadow(vcpu, 0); - if (vmx->guest_pkru_valid) - __write_pkru(vmx->guest_pkru); + if (static_cpu_has(X86_FEATURE_PKU) && + kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && + vcpu->arch.pkru != vmx->host_pkru) + __write_pkru(vcpu->arch.pkru); atomic_switch_perf_msrs(vmx); debugctlmsr = get_debugctlmsr(); @@ -9169,13 +9164,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * back on host, so it is safe to read guest PKRU from current * XSAVE. */ - if (boot_cpu_has(X86_FEATURE_OSPKE)) { - vmx->guest_pkru = __read_pkru(); - if (vmx->guest_pkru != vmx->host_pkru) { - vmx->guest_pkru_valid = true; + if (static_cpu_has(X86_FEATURE_PKU) && + kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { + vcpu->arch.pkru = __read_pkru(); + if (vcpu->arch.pkru != vmx->host_pkru) __write_pkru(vmx->host_pkru); - } else - vmx->guest_pkru_valid = false; } /* @@ -11682,8 +11675,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, - .get_pkru = vmx_get_pkru, - .tlb_flush = vmx_flush_tlb, .run = vmx_vcpu_run, -- cgit From 38cfd5e3df9c4f88e76b547eee2087ee5c042ae2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 23 Aug 2017 23:16:29 +0200 Subject: KVM, pkeys: do not use PKRU value in vcpu->arch.guest_fpu.state The host pkru is restored right after vcpu exit (commit 1be0e61), so KVM_GET_XSAVE will return the host PKRU value instead. Fix this by using the guest PKRU explicitly in fill_xsave and load_xsave. This part is based on a patch by Junkang Fu. The host PKRU data may also not match the value in vcpu->arch.guest_fpu.state, because it could have been changed by userspace since the last time it was saved, so skip loading it in kvm_load_guest_fpu. Reported-by: Junkang Fu Cc: Yang Zhang Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870 Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/fpu/internal.h | 6 +++--- arch/x86/kvm/x86.c | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 255645f60ca2..554cdb205d17 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -450,10 +450,10 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) return 0; } -static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate) +static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) { if (use_xsave()) { - copy_kernel_to_xregs(&fpstate->xsave, -1); + copy_kernel_to_xregs(&fpstate->xsave, mask); } else { if (use_fxsr()) copy_kernel_to_fxregs(&fpstate->fxsave); @@ -477,7 +477,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) : : [addr] "m" (fpstate)); } - __copy_kernel_to_fpregs(fpstate); + __copy_kernel_to_fpregs(fpstate, -1); } extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d734aa8c5b4f..05a5e57c6f39 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3245,7 +3245,12 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) u32 size, offset, ecx, edx; cpuid_count(XSTATE_CPUID, index, &size, &offset, &ecx, &edx); - memcpy(dest + offset, src, size); + if (feature == XFEATURE_MASK_PKRU) + memcpy(dest + offset, &vcpu->arch.pkru, + sizeof(vcpu->arch.pkru)); + else + memcpy(dest + offset, src, size); + } valid -= feature; @@ -3283,7 +3288,11 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) u32 size, offset, ecx, edx; cpuid_count(XSTATE_CPUID, index, &size, &offset, &ecx, &edx); - memcpy(dest, src + offset, size); + if (feature == XFEATURE_MASK_PKRU) + memcpy(&vcpu->arch.pkru, src + offset, + sizeof(vcpu->arch.pkru)); + else + memcpy(dest, src + offset, size); } valid -= feature; @@ -7633,7 +7642,9 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) */ vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); - __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); + /* PKRU is separately restored in kvm_x86_ops->run. */ + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, + ~XFEATURE_MASK_PKRU); trace_kvm_fpu(1); } -- cgit From ccd5b3235180eef3cfec337df1c8554ab151b5cc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 24 Aug 2017 10:50:29 -0700 Subject: x86/mm: Fix use-after-free of ldt_struct The following commit: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init") renamed init_new_context() to init_new_context_ldt() and added a new init_new_context() which calls init_new_context_ldt(). However, the error code of init_new_context_ldt() was ignored. Consequently, if a memory allocation in alloc_ldt_struct() failed during a fork(), the ->context.ldt of the new task remained the same as that of the old task (due to the memcpy() in dup_mm()). ldt_struct's are not intended to be shared, so a use-after-free occurred after one task exited. Fix the bug by making init_new_context() pass through the error code of init_new_context_ldt(). This bug was found by syzkaller, which encountered the following splat: BUG: KASAN: use-after-free in free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116 Read of size 4 at addr ffff88006d2cb7c8 by task kworker/u9:0/3710 CPU: 1 PID: 3710 Comm: kworker/u9:0 Not tainted 4.13.0-rc4-next-20170811 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x24e/0x340 mm/kasan/report.c:409 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429 free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116 free_ldt_struct arch/x86/kernel/ldt.c:173 [inline] destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171 destroy_context arch/x86/include/asm/mmu_context.h:157 [inline] __mmdrop+0xe9/0x530 kernel/fork.c:889 mmdrop include/linux/sched/mm.h:42 [inline] exec_mmap fs/exec.c:1061 [inline] flush_old_exec+0x173c/0x1ff0 fs/exec.c:1291 load_elf_binary+0x81f/0x4ba0 fs/binfmt_elf.c:855 search_binary_handler+0x142/0x6b0 fs/exec.c:1652 exec_binprm fs/exec.c:1694 [inline] do_execveat_common.isra.33+0x1746/0x22e0 fs/exec.c:1816 do_execve+0x31/0x40 fs/exec.c:1860 call_usermodehelper_exec_async+0x457/0x8f0 kernel/umh.c:100 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Allocated by task 3700: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 kmem_cache_alloc_trace+0x136/0x750 mm/slab.c:3627 kmalloc include/linux/slab.h:493 [inline] alloc_ldt_struct+0x52/0x140 arch/x86/kernel/ldt.c:67 write_ldt+0x7b7/0xab0 arch/x86/kernel/ldt.c:277 sys_modify_ldt+0x1ef/0x240 arch/x86/kernel/ldt.c:307 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 3700: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3503 [inline] kfree+0xca/0x250 mm/slab.c:3820 free_ldt_struct.part.2+0xdd/0x150 arch/x86/kernel/ldt.c:121 free_ldt_struct arch/x86/kernel/ldt.c:173 [inline] destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171 destroy_context arch/x86/include/asm/mmu_context.h:157 [inline] __mmdrop+0xe9/0x530 kernel/fork.c:889 mmdrop include/linux/sched/mm.h:42 [inline] __mmput kernel/fork.c:916 [inline] mmput+0x541/0x6e0 kernel/fork.c:927 copy_process.part.36+0x22e1/0x4af0 kernel/fork.c:1931 copy_process kernel/fork.c:1546 [inline] _do_fork+0x1ef/0xfb0 kernel/fork.c:2025 SYSC_clone kernel/fork.c:2135 [inline] SyS_clone+0x37/0x50 kernel/fork.c:2129 do_syscall_64+0x26c/0x8c0 arch/x86/entry/common.c:287 return_from_SYSCALL_64+0x0/0x7a Here is a C reproducer: #include #include #include #include #include #include #include static void *fork_thread(void *_arg) { fork(); } int main(void) { struct user_desc desc = { .entry_number = 8191 }; syscall(__NR_modify_ldt, 1, &desc, sizeof(desc)); for (;;) { if (fork() == 0) { pthread_t t; srand(getpid()); pthread_create(&t, NULL, fork_thread, NULL); usleep(rand() % 10000); syscall(__NR_exit_group, 0); } wait(NULL); } } Note: the reproducer takes advantage of the fact that alloc_ldt_struct() may use vmalloc() to allocate a large ->entries array, and after commit: 5d17a73a2ebe ("vmalloc: back off when the current task is killed") it is possible for userspace to fail a task's vmalloc() by sending a fatal signal, e.g. via exit_group(). It would be more difficult to reproduce this bug on kernels without that commit. This bug only affected kernels with CONFIG_MODIFY_LDT_SYSCALL=y. Signed-off-by: Eric Biggers Acked-by: Dave Hansen Cc: [v4.6+] Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Christoph Hellwig Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Michal Hocko Cc: Peter Zijlstra Cc: Rik van Riel Cc: Tetsuo Handa Cc: Thomas Gleixner Cc: linux-mm@kvack.org Fixes: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init") Link: http://lkml.kernel.org/r/20170824175029.76040-1-ebiggers3@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu_context.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 265c907d7d4c..7a234be7e298 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -140,9 +140,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.execute_only_pkey = -1; } #endif - init_new_context_ldt(tsk, mm); - - return 0; + return init_new_context_ldt(tsk, mm); } static inline void destroy_context(struct mm_struct *mm) { -- cgit From 64aee2a965cf2954a038b5522f11d2cd2f0f8f3e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 22 Jun 2017 15:41:38 +0100 Subject: perf/core: Fix group {cpu,task} validation Regardless of which events form a group, it does not make sense for the events to target different tasks and/or CPUs, as this leaves the group inconsistent and impossible to schedule. The core perf code assumes that these are consistent across (successfully intialised) groups. Core perf code only verifies this when moving SW events into a HW context. Thus, we can violate this requirement for pure SW groups and pure HW groups, unless the relevant PMU driver happens to perform this verification itself. These mismatched groups subsequently wreak havoc elsewhere. For example, we handle watchpoints as SW events, and reserve watchpoint HW on a per-CPU basis at pmu::event_init() time to ensure that any event that is initialised is guaranteed to have a slot at pmu::add() time. However, the core code only checks the group leader's cpu filter (via event_filter_match()), and can thus install follower events onto CPUs violating thier (mismatched) CPU filters, potentially installing them into a CPU without sufficient reserved slots. This can be triggered with the below test case, resulting in warnings from arch backends. #define _GNU_SOURCE #include #include #include #include #include #include #include static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } char watched_char; struct perf_event_attr wp_attr = { .type = PERF_TYPE_BREAKPOINT, .bp_type = HW_BREAKPOINT_RW, .bp_addr = (unsigned long)&watched_char, .bp_len = 1, .size = sizeof(wp_attr), }; int main(int argc, char *argv[]) { int leader, ret; cpu_set_t cpus; /* * Force use of CPU0 to ensure our CPU0-bound events get scheduled. */ CPU_ZERO(&cpus); CPU_SET(0, &cpus); ret = sched_setaffinity(0, sizeof(cpus), &cpus); if (ret) { printf("Unable to set cpu affinity\n"); return 1; } /* open leader event, bound to this task, CPU0 only */ leader = perf_event_open(&wp_attr, 0, 0, -1, 0); if (leader < 0) { printf("Couldn't open leader: %d\n", leader); return 1; } /* * Open a follower event that is bound to the same task, but a * different CPU. This means that the group should never be possible to * schedule. */ ret = perf_event_open(&wp_attr, 0, 1, leader, 0); if (ret < 0) { printf("Couldn't open mismatched follower: %d\n", ret); return 1; } else { printf("Opened leader/follower with mismastched CPUs\n"); } /* * Open as many independent events as we can, all bound to the same * task, CPU0 only. */ do { ret = perf_event_open(&wp_attr, 0, 0, -1, 0); } while (ret >= 0); /* * Force enable/disble all events to trigger the erronoeous * installation of the follower event. */ printf("Opened all events. Toggling..\n"); for (;;) { prctl(PR_TASK_PERF_EVENTS_DISABLE, 0, 0, 0, 0); prctl(PR_TASK_PERF_EVENTS_ENABLE, 0, 0, 0, 0); } return 0; } Fix this by validating this requirement regardless of whether we're moving events. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Zhou Chengming Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1498142498-15758-1-git-send-email-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index ee20d4c546b5..3504125871d2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10032,28 +10032,27 @@ SYSCALL_DEFINE5(perf_event_open, goto err_context; /* - * Do not allow to attach to a group in a different - * task or CPU context: + * Make sure we're both events for the same CPU; + * grouping events for different CPUs is broken; since + * you can never concurrently schedule them anyhow. */ - if (move_group) { - /* - * Make sure we're both on the same task, or both - * per-cpu events. - */ - if (group_leader->ctx->task != ctx->task) - goto err_context; + if (group_leader->cpu != event->cpu) + goto err_context; - /* - * Make sure we're both events for the same CPU; - * grouping events for different CPUs is broken; since - * you can never concurrently schedule them anyhow. - */ - if (group_leader->cpu != event->cpu) - goto err_context; - } else { - if (group_leader->ctx != ctx) - goto err_context; - } + /* + * Make sure we're both on the same task, or both + * per-CPU events. + */ + if (group_leader->ctx->task != ctx->task) + goto err_context; + + /* + * Do not allow to attach to a group in a different task + * or CPU context. If we're moving SW events, we'll fix + * this up later, so allow that. + */ + if (!move_group && group_leader->ctx != ctx) + goto err_context; /* * Only a group leader can be exclusive or pinned -- cgit From 47c5310a8dbe7c2cb9f0083daa43ceed76c257fa Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 24 Aug 2017 19:14:47 +1000 Subject: KVM: PPC: Book3S: Fix race and leak in kvm_vm_ioctl_create_spapr_tce() Nixiaoming pointed out that there is a memory leak in kvm_vm_ioctl_create_spapr_tce() if the call to anon_inode_getfd() fails; the memory allocated for the kvmppc_spapr_tce_table struct is not freed, and nor are the pages allocated for the iommu tables. In addition, we have already incremented the process's count of locked memory pages, and this doesn't get restored on error. David Hildenbrand pointed out that there is a race in that the function checks early on that there is not already an entry in the stt->iommu_tables list with the same LIOBN, but an entry with the same LIOBN could get added between then and when the new entry is added to the list. This fixes all three problems. To simplify things, we now call anon_inode_getfd() before placing the new entry in the list. The check for an existing entry is done while holding the kvm->lock mutex, immediately before adding the new entry to the list. Finally, on failure we now call kvmppc_account_memlimit to decrement the process's count of locked memory pages. Reported-by: Nixiaoming Reported-by: David Hildenbrand Signed-off-by: Paul Mackerras Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_64_vio.c | 56 ++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index a160c14304eb..53766e2bc029 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -294,32 +294,26 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce_64 *args) { struct kvmppc_spapr_tce_table *stt = NULL; + struct kvmppc_spapr_tce_table *siter; unsigned long npages, size; int ret = -ENOMEM; int i; + int fd = -1; if (!args->size) return -EINVAL; - /* Check this LIOBN hasn't been previously allocated */ - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { - if (stt->liobn == args->liobn) - return -EBUSY; - } - size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); npages = kvmppc_tce_pages(size); ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); - if (ret) { - stt = NULL; - goto fail; - } + if (ret) + return ret; ret = -ENOMEM; stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), GFP_KERNEL); if (!stt) - goto fail; + goto fail_acct; stt->liobn = args->liobn; stt->page_shift = args->page_shift; @@ -334,24 +328,42 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, goto fail; } - kvm_get_kvm(kvm); + ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDWR | O_CLOEXEC); + if (ret < 0) + goto fail; mutex_lock(&kvm->lock); - list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); + + /* Check this LIOBN hasn't been previously allocated */ + ret = 0; + list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) { + if (siter->liobn == args->liobn) { + ret = -EBUSY; + break; + } + } + + if (!ret) { + list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); + kvm_get_kvm(kvm); + } mutex_unlock(&kvm->lock); - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, - stt, O_RDWR | O_CLOEXEC); + if (!ret) + return fd; -fail: - if (stt) { - for (i = 0; i < npages; i++) - if (stt->pages[i]) - __free_page(stt->pages[i]); + put_unused_fd(fd); - kfree(stt); - } + fail: + for (i = 0; i < npages; i++) + if (stt->pages[i]) + __free_page(stt->pages[i]); + + kfree(stt); + fail_acct: + kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); return ret; } -- cgit From 22d538213ec4fa65b08b1edbf610066d8aab7bbb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Aug 2017 15:52:54 -0700 Subject: blk-mq-debugfs: Add names for recently added flags The symbolic constants QUEUE_FLAG_SCSI_PASSTHROUGH, QUEUE_FLAG_QUIESCED and REQ_NOWAIT are missing from blk-mq-debugfs.c. Add these to blk-mq-debugfs.c such that these appear as names in debugfs instead of as numbers. Reviewed-by: Omar Sandoval Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 9ebc2945f991..4f927a58dff8 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -75,6 +75,8 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(STATS), QUEUE_FLAG_NAME(POLL_STATS), QUEUE_FLAG_NAME(REGISTERED), + QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), + QUEUE_FLAG_NAME(QUIESCED), }; #undef QUEUE_FLAG_NAME @@ -265,6 +267,7 @@ static const char *const cmd_flag_name[] = { CMD_FLAG_NAME(RAHEAD), CMD_FLAG_NAME(BACKGROUND), CMD_FLAG_NAME(NOUNMAP), + CMD_FLAG_NAME(NOWAIT), }; #undef CMD_FLAG_NAME -- cgit From 1046d304900cf9d4b2c730c6860b8e03cc704377 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 26 Jul 2017 15:32:23 +0100 Subject: virtio_blk: fix incorrect message when disk is resized The message printed on disk resize is incorrect. The following is printed when resizing to 2 GiB: $ truncate -s 1G test.img $ qemu -device virtio-blk-pci,logical_block_size=4096,... (qemu) block_resize drive1 2G virtio_blk virtio0: new size: 4194304 4096-byte logical blocks (17.2 GB/16.0 GiB) The virtio_blk capacity config field is in 512-byte sector units regardless of logical_block_size as per the VIRTIO specification. Therefore the message should read: virtio_blk virtio0: new size: 524288 4096-byte logical blocks (2.15 GB/2.0 GiB) Note that this only affects the printed message. Thankfully the actual block device has the correct size because the block layer expects capacity in sectors. Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1498b899a593..d3d5523862c2 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -381,6 +381,7 @@ static void virtblk_config_changed_work(struct work_struct *work) struct request_queue *q = vblk->disk->queue; char cap_str_2[10], cap_str_10[10]; char *envp[] = { "RESIZE=1", NULL }; + unsigned long long nblocks; u64 capacity; /* Host must always specify the capacity. */ @@ -393,16 +394,19 @@ static void virtblk_config_changed_work(struct work_struct *work) capacity = (sector_t)-1; } - string_get_size(capacity, queue_logical_block_size(q), + nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); + + string_get_size(nblocks, queue_logical_block_size(q), STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); - string_get_size(capacity, queue_logical_block_size(q), + string_get_size(nblocks, queue_logical_block_size(q), STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); dev_notice(&vdev->dev, - "new size: %llu %d-byte logical blocks (%s/%s)\n", - (unsigned long long)capacity, - queue_logical_block_size(q), - cap_str_10, cap_str_2); + "new size: %llu %d-byte logical blocks (%s/%s)\n", + nblocks, + queue_logical_block_size(q), + cap_str_10, + cap_str_2); set_capacity(vblk->disk, capacity); revalidate_disk(vblk->disk); -- cgit From ba74b6f7fcc07355d087af6939712eed4a454821 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Aug 2017 18:07:02 +0200 Subject: virtio_pci: fix cpu affinity support Commit 0b0f9dc5 ("Revert "virtio_pci: use shared interrupts for virtqueues"") removed the adjustment of the pre_vectors for the virtio MSI-X vector allocation which was added in commit fb5e31d9 ("virtio: allow drivers to request IRQ affinity when creating VQs"). This will lead to an incorrect assignment of MSI-X vectors, and potential deadlocks when offlining cpus. Signed-off-by: Christoph Hellwig Fixes: 0b0f9dc5 ("Revert "virtio_pci: use shared interrupts for virtqueues") Reported-by: YASUAKI ISHIMATSU Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 007a4f366086..1c4797e53f68 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -107,6 +107,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); + unsigned flags = PCI_IRQ_MSIX; unsigned i, v; int err = -ENOMEM; @@ -126,10 +127,13 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, GFP_KERNEL)) goto error; + if (desc) { + flags |= PCI_IRQ_AFFINITY; + desc->pre_vectors++; /* virtio config vector */ + } + err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors, - nvectors, PCI_IRQ_MSIX | - (desc ? PCI_IRQ_AFFINITY : 0), - desc); + nvectors, flags, desc); if (err < 0) goto error; vp_dev->msix_enabled = 1; -- cgit From 556b969a1cfe2686aae149137fa1dfcac0eefe54 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 25 Aug 2017 15:55:30 -0700 Subject: PM/hibernate: touch NMI watchdog when creating snapshot There is a problem that when counting the pages for creating the hibernation snapshot will take significant amount of time, especially on system with large memory. Since the counting job is performed with irq disabled, this might lead to NMI lockup. The following warning were found on a system with 1.5TB DRAM: Freezing user space processes ... (elapsed 0.002 seconds) done. OOM killer disabled. PM: Preallocating image memory... NMI watchdog: Watchdog detected hard LOCKUP on cpu 27 CPU: 27 PID: 3128 Comm: systemd-sleep Not tainted 4.13.0-0.rc2.git0.1.fc27.x86_64 #1 task: ffff9f01971ac000 task.stack: ffffb1a3f325c000 RIP: 0010:memory_bm_find_bit+0xf4/0x100 Call Trace: swsusp_set_page_free+0x2b/0x30 mark_free_pages+0x147/0x1c0 count_data_pages+0x41/0xa0 hibernate_preallocate_memory+0x80/0x450 hibernation_snapshot+0x58/0x410 hibernate+0x17c/0x310 state_store+0xdf/0xf0 kobj_attr_store+0xf/0x20 sysfs_kf_write+0x37/0x40 kernfs_fop_write+0x11c/0x1a0 __vfs_write+0x37/0x170 vfs_write+0xb1/0x1a0 SyS_write+0x55/0xc0 entry_SYSCALL_64_fastpath+0x1a/0xa5 ... done (allocated 6590003 pages) PM: Allocated 26360012 kbytes in 19.89 seconds (1325.28 MB/s) It has taken nearly 20 seconds(2.10GHz CPU) thus the NMI lockup was triggered. In case the timeout of the NMI watch dog has been set to 1 second, a safe interval should be 6590003/20 = 320k pages in theory. However there might also be some platforms running at a lower frequency, so feed the watchdog every 100k pages. [yu.c.chen@intel.com: simplification] Link: http://lkml.kernel.org/r/1503460079-29721-1-git-send-email-yu.c.chen@intel.com [yu.c.chen@intel.com: use interval of 128k instead of 100k to avoid modulus] Link: http://lkml.kernel.org/r/1503328098-5120-1-git-send-email-yu.c.chen@intel.com Signed-off-by: Chen Yu Reported-by: Jan Filipcewicz Suggested-by: Michal Hocko Reviewed-by: Michal Hocko Acked-by: Rafael J. Wysocki Cc: Mel Gorman Cc: Vlastimil Babka Cc: Len Brown Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1bad301820c7..7a58eb5757e3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include @@ -2535,9 +2536,14 @@ void drain_all_pages(struct zone *zone) #ifdef CONFIG_HIBERNATION +/* + * Touch the watchdog for every WD_PAGE_COUNT pages. + */ +#define WD_PAGE_COUNT (128*1024) + void mark_free_pages(struct zone *zone) { - unsigned long pfn, max_zone_pfn; + unsigned long pfn, max_zone_pfn, page_count = WD_PAGE_COUNT; unsigned long flags; unsigned int order, t; struct page *page; @@ -2552,6 +2558,11 @@ void mark_free_pages(struct zone *zone) if (pfn_valid(pfn)) { page = pfn_to_page(pfn); + if (!--page_count) { + touch_nmi_watchdog(); + page_count = WD_PAGE_COUNT; + } + if (page_zone(page) != zone) continue; @@ -2565,8 +2576,13 @@ void mark_free_pages(struct zone *zone) unsigned long i; pfn = page_to_pfn(page); - for (i = 0; i < (1UL << order); i++) + for (i = 0; i < (1UL << order); i++) { + if (!--page_count) { + touch_nmi_watchdog(); + page_count = WD_PAGE_COUNT; + } swsusp_set_page_free(pfn_to_page(pfn + i)); + } } } spin_unlock_irqrestore(&zone->lock, flags); -- cgit From 435c0b87d661da83771c30ed775f7c37eed193fb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 25 Aug 2017 15:55:33 -0700 Subject: mm, shmem: fix handling /sys/kernel/mm/transparent_hugepage/shmem_enabled /sys/kernel/mm/transparent_hugepage/shmem_enabled controls if we want to allocate huge pages when allocate pages for private in-kernel shmem mount. Unfortunately, as Dan noticed, I've screwed it up and the only way to make kernel allocate huge page for the mount is to use "force" there. All other values will be effectively ignored. Link: http://lkml.kernel.org/r/20170822144254.66431-1-kirill.shutemov@linux.intel.com Fixes: 5a6e75f8110c ("shmem: prepare huge= mount option and sysfs knob") Signed-off-by: Kirill A. Shutemov Reported-by: Dan Carpenter Cc: stable [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 6540e5982444..fbcb3c96a186 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3967,7 +3967,7 @@ int __init shmem_init(void) } #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE - if (has_transparent_hugepage() && shmem_huge < SHMEM_HUGE_DENY) + if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY) SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; else shmem_huge = 0; /* just in case it was patched */ @@ -4028,7 +4028,7 @@ static ssize_t shmem_enabled_store(struct kobject *kobj, return -EINVAL; shmem_huge = huge; - if (shmem_huge < SHMEM_HUGE_DENY) + if (shmem_huge > SHMEM_HUGE_DENY) SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; return count; } -- cgit From fffa281b48a91ad6dac1a18c5907ece58fa3879b Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 25 Aug 2017 15:55:36 -0700 Subject: dax: fix deadlock due to misaligned PMD faults In DAX there are two separate places where the 2MiB range of a PMD is defined. The first is in the page tables, where a PMD mapping inserted for a given address spans from (vmf->address & PMD_MASK) to ((vmf->address & PMD_MASK) + PMD_SIZE - 1). That is, from the 2MiB boundary below the address to the 2MiB boundary above the address. So, for example, a fault at address 3MiB (0x30 0000) falls within the PMD that ranges from 2MiB (0x20 0000) to 4MiB (0x40 0000). The second PMD range is in the mapping->page_tree, where a given file offset is covered by a radix tree entry that spans from one 2MiB aligned file offset to another 2MiB aligned file offset. So, for example, the file offset for 3MiB (pgoff 768) falls within the PMD range for the order 9 radix tree entry that ranges from 2MiB (pgoff 512) to 4MiB (pgoff 1024). This system works so long as the addresses and file offsets for a given mapping both have the same offsets relative to the start of each PMD. Consider the case where the starting address for a given file isn't 2MiB aligned - say our faulting address is 3 MiB (0x30 0000), but that corresponds to the beginning of our file (pgoff 0). Now all the PMDs in the mapping are misaligned so that the 2MiB range defined in the page tables never matches up with the 2MiB range defined in the radix tree. The current code notices this case for DAX faults to storage with the following test in dax_pmd_insert_mapping(): if (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR) goto unlock_fallback; This test makes sure that the pfn we get from the driver is 2MiB aligned, and relies on the assumption that the 2MiB alignment of the pfn we get back from the driver matches the 2MiB alignment of the faulting address. However, faults to holes were not checked and we could hit the problem described above. This was reported in response to the NVML nvml/src/test/pmempool_sync TEST5: $ cd nvml/src/test/pmempool_sync $ make TEST5 You can grab NVML here: https://github.com/pmem/nvml/ The dmesg warning you see when you hit this error is: WARNING: CPU: 13 PID: 2900 at fs/dax.c:641 dax_insert_mapping_entry+0x2df/0x310 Where we notice in dax_insert_mapping_entry() that the radix tree entry we are about to replace doesn't match the locked entry that we had previously inserted into the tree. This happens because the initial insertion was done in grab_mapping_entry() using a pgoff calculated from the faulting address (vmf->address), and the replacement in dax_pmd_load_hole() => dax_insert_mapping_entry() is done using vmf->pgoff. In our failure case those two page offsets (one calculated from vmf->address, one using vmf->pgoff) point to different order 9 radix tree entries. This failure case can result in a deadlock because the radix tree unlock also happens on the pgoff calculated from vmf->address. This means that the locked radix tree entry that we swapped in to the tree in dax_insert_mapping_entry() using vmf->pgoff is never unlocked, so all future faults to that 2MiB range will block forever. Fix this by validating that the faulting address's PMD offset matches the PMD offset from the start of the file. This check is done at the very beginning of the fault and covers faults that would have mapped to storage as well as faults to holes. I left the COLOUR check in dax_pmd_insert_mapping() in place in case we ever hit the insanity condition where the alignment of the pfn we get from the driver doesn't match the alignment of the userspace address. Link: http://lkml.kernel.org/r/20170822222436.18926-1-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reported-by: "Slusarz, Marcin" Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index 306c2b603fb8..865d42c63e23 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1383,6 +1383,16 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); + /* + * Make sure that the faulting address's PMD offset (color) matches + * the PMD offset from the start of the file. This is necessary so + * that a PMD range in the page table overlaps exactly with a PMD + * range in the radix tree. + */ + if ((vmf->pgoff & PG_PMD_COLOUR) != + ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) + goto fallback; + /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) goto fallback; -- cgit From 263630e8d176d87308481ebdcd78ef9426739c6b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 25 Aug 2017 15:55:39 -0700 Subject: mm/madvise.c: fix freeing of locked page with MADV_FREE If madvise(..., MADV_FREE) split a transparent hugepage, it called put_page() before unlock_page(). This was wrong because put_page() can free the page, e.g. if a concurrent madvise(..., MADV_DONTNEED) has removed it from the memory mapping. put_page() then rightfully complained about freeing a locked page. Fix this by moving the unlock_page() before put_page(). This bug was found by syzkaller, which encountered the following splat: BUG: Bad page state in process syzkaller412798 pfn:1bd800 page:ffffea0006f60000 count:0 mapcount:0 mapping: (null) index:0x20a00 flags: 0x200000000040019(locked|uptodate|dirty|swapbacked) raw: 0200000000040019 0000000000000000 0000000000020a00 00000000ffffffff raw: ffffea0006f60020 ffffea0006f60020 0000000000000000 0000000000000000 page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set bad because of flags: 0x1(locked) Modules linked in: CPU: 1 PID: 3037 Comm: syzkaller412798 Not tainted 4.13.0-rc5+ #35 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 bad_page+0x230/0x2b0 mm/page_alloc.c:565 free_pages_check_bad+0x1f0/0x2e0 mm/page_alloc.c:943 free_pages_check mm/page_alloc.c:952 [inline] free_pages_prepare mm/page_alloc.c:1043 [inline] free_pcp_prepare mm/page_alloc.c:1068 [inline] free_hot_cold_page+0x8cf/0x12b0 mm/page_alloc.c:2584 __put_single_page mm/swap.c:79 [inline] __put_page+0xfb/0x160 mm/swap.c:113 put_page include/linux/mm.h:814 [inline] madvise_free_pte_range+0x137a/0x1ec0 mm/madvise.c:371 walk_pmd_range mm/pagewalk.c:50 [inline] walk_pud_range mm/pagewalk.c:108 [inline] walk_p4d_range mm/pagewalk.c:134 [inline] walk_pgd_range mm/pagewalk.c:160 [inline] __walk_page_range+0xc3a/0x1450 mm/pagewalk.c:249 walk_page_range+0x200/0x470 mm/pagewalk.c:326 madvise_free_page_range.isra.9+0x17d/0x230 mm/madvise.c:444 madvise_free_single_vma+0x353/0x580 mm/madvise.c:471 madvise_dontneed_free mm/madvise.c:555 [inline] madvise_vma mm/madvise.c:664 [inline] SYSC_madvise mm/madvise.c:832 [inline] SyS_madvise+0x7d3/0x13c0 mm/madvise.c:760 entry_SYSCALL_64_fastpath+0x1f/0xbe Here is a C reproducer: #define _GNU_SOURCE #include #include #include #define MADV_FREE 8 #define PAGE_SIZE 4096 static void *mapping; static const size_t mapping_size = 0x1000000; static void *madvise_thrproc(void *arg) { madvise(mapping, mapping_size, (long)arg); } int main(void) { pthread_t t[2]; for (;;) { mapping = mmap(NULL, mapping_size, PROT_WRITE, MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); munmap(mapping + mapping_size / 2, PAGE_SIZE); pthread_create(&t[0], 0, madvise_thrproc, (void*)MADV_DONTNEED); pthread_create(&t[1], 0, madvise_thrproc, (void*)MADV_FREE); pthread_join(t[0], NULL); pthread_join(t[1], NULL); munmap(mapping, mapping_size); } } Note: to see the splat, CONFIG_TRANSPARENT_HUGEPAGE=y and CONFIG_DEBUG_VM=y are needed. Google Bug Id: 64696096 Link: http://lkml.kernel.org/r/20170823205235.132061-1-ebiggers3@gmail.com Fixes: 854e9ed09ded ("mm: support madvise(MADV_FREE)") Signed-off-by: Eric Biggers Acked-by: David Rientjes Acked-by: Minchan Kim Acked-by: Michal Hocko Cc: Dmitry Vyukov Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: [v4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index 47d8d8a25eae..23ed525bc2bc 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -368,8 +368,8 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, pte_offset_map_lock(mm, pmd, addr, &ptl); goto out; } - put_page(page); unlock_page(page); + put_page(page); pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte--; addr -= PAGE_SIZE; -- cgit From 2b7e8665b4ff51c034c55df3cff76518d1a9ee3a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 25 Aug 2017 15:55:43 -0700 Subject: fork: fix incorrect fput of ->exe_file causing use-after-free Commit 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for write killable") made it possible to kill a forking task while it is waiting to acquire its ->mmap_sem for write, in dup_mmap(). However, it was overlooked that this introduced an new error path before a reference is taken on the mm_struct's ->exe_file. Since the ->exe_file of the new mm_struct was already set to the old ->exe_file by the memcpy() in dup_mm(), it was possible for the mmput() in the error path of dup_mm() to drop a reference to ->exe_file which was never taken. This caused the struct file to later be freed prematurely. Fix it by updating mm_init() to NULL out the ->exe_file, in the same place it clears other things like the list of mmaps. This bug was found by syzkaller. It can be reproduced using the following C program: #define _GNU_SOURCE #include #include #include #include #include #include static void *mmap_thread(void *_arg) { for (;;) { mmap(NULL, 0x1000000, PROT_READ, MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); } } static void *fork_thread(void *_arg) { usleep(rand() % 10000); fork(); } int main(void) { fork(); fork(); fork(); for (;;) { if (fork() == 0) { pthread_t t; pthread_create(&t, NULL, mmap_thread, NULL); pthread_create(&t, NULL, fork_thread, NULL); usleep(rand() % 10000); syscall(__NR_exit_group, 0); } wait(NULL); } } No special kernel config options are needed. It usually causes a NULL pointer dereference in __remove_shared_vm_struct() during exit, or in dup_mmap() (which is usually inlined into copy_process()) during fork. Both are due to a vm_area_struct's ->vm_file being used after it's already been freed. Google Bug Id: 64772007 Link: http://lkml.kernel.org/r/20170823211408.31198-1-ebiggers3@gmail.com Fixes: 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for write killable") Signed-off-by: Eric Biggers Tested-by: Mark Rutland Acked-by: Michal Hocko Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Konstantin Khlebnikov Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Vlastimil Babka Cc: [v4.7+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/fork.c b/kernel/fork.c index e075b7780421..cbbea277b3fb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -806,6 +806,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); + RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_mm_init(mm); init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS -- cgit From 91b540f98872a206ea1c49e4aa6ea8eed0886644 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 25 Aug 2017 15:55:46 -0700 Subject: mm/memblock.c: reversed logic in memblock_discard() In recently introduced memblock_discard() there is a reversed logic bug. Memory is freed of static array instead of dynamically allocated one. Link: http://lkml.kernel.org/r/1503511441-95478-2-git-send-email-pasha.tatashin@oracle.com Fixes: 3010f876500f ("mm: discard memblock data later") Signed-off-by: Pavel Tatashin Reported-by: Woody Suwalski Tested-by: Woody Suwalski Acked-by: Michal Hocko Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index bf14aea6ab70..91205780e6b1 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -299,7 +299,7 @@ void __init memblock_discard(void) __memblock_free_late(addr, size); } - if (memblock.memory.regions == memblock_memory_init_regions) { + if (memblock.memory.regions != memblock_memory_init_regions) { addr = __pa(memblock.memory.regions); size = PAGE_ALIGN(sizeof(struct memblock_region) * memblock.memory.max); -- cgit From 8e1101d251647802d0a4ae19eb3d0e1453eaeff4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Aug 2017 18:58:42 -0500 Subject: PCI/MSI: Don't warn when irq_create_affinity_masks() returns NULL irq_create_affinity_masks() can return NULL on non-SMP systems, when there are not enough "free" vectors available to spread, or if memory allocation for the CPU masks fails. Only the allocation failure is of interest, and even then the system will work just fine except for non-optimally spread vectors. Thus remove the warnings. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Acked-by: David S. Miller --- drivers/pci/msi.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 253d92409bb3..2225afc1cbbb 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -538,12 +538,9 @@ msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd) struct msi_desc *entry; u16 control; - if (affd) { + if (affd) masks = irq_create_affinity_masks(nvec, affd); - if (!masks) - dev_err(&dev->dev, "can't allocate MSI affinity masks for %d vectors\n", - nvec); - } + /* MSI Entry Initialization */ entry = alloc_msi_entry(&dev->dev, nvec, masks); @@ -679,12 +676,8 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, struct msi_desc *entry; int ret, i; - if (affd) { + if (affd) masks = irq_create_affinity_masks(nvec, affd); - if (!masks) - dev_err(&dev->dev, "can't allocate MSI-X affinity masks for %d vectors\n", - nvec); - } for (i = 0, curmsk = masks; i < nvec; i++) { entry = alloc_msi_entry(&dev->dev, 1, curmsk); -- cgit From 0cc3b0ec23ce4c69e1e890ed2b8d2fa932b14aad Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Aug 2017 12:12:25 -0700 Subject: Clarify (and fix) MAX_LFS_FILESIZE macros We have a MAX_LFS_FILESIZE macro that is meant to be filled in by filesystems (and other IO targets) that know they are 64-bit clean and don't have any 32-bit limits in their IO path. It turns out that our 32-bit value for that limit was bogus. On 32-bit, the VM layer is limited by the page cache to only 32-bit index values, but our logic for that was confusing and actually wrong. We used to define that value to (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) which is actually odd in several ways: it limits the index to 31 bits, and then it limits files so that they can't have data in that last byte of a page that has the highest 31-bit index (ie page index 0x7fffffff). Neither of those limitations make sense. The index is actually the full 32 bit unsigned value, and we can use that whole full page. So the maximum size of the file would logically be "PAGE_SIZE << BITS_PER_LONG". However, we do wan tto avoid the maximum index, because we have code that iterates over the page indexes, and we don't want that code to overflow. So the maximum size of a file on a 32-bit host should actually be one page less than the full 32-bit index. So the actual limit is ULONG_MAX << PAGE_SHIFT. That means that we will not actually be using the page of that last index (ULONG_MAX), but we can grow a file up to that limit. The wrong value of MAX_LFS_FILESIZE actually caused problems for Doug Nazar, who was still using a 32-bit host, but with a 9.7TB 2 x RAID5 volume. It turns out that our old MAX_LFS_FILESIZE was 8TiB (well, one byte less), but the actual true VM limit is one page less than 16TiB. This was invisible until commit c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()"), which started applying that MAX_LFS_FILESIZE limit to block devices too. NOTE! On 64-bit, the page index isn't a limiter at all, and the limit is actually just the offset type itself (loff_t), which is signed. But for clarity, on 64-bit, just use the maximum signed value, and don't make people have to count the number of 'f' characters in the hex constant. So just use LLONG_MAX for the 64-bit case. That was what the value had been before too, just written out as a hex constant. Fixes: c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") Reported-and-tested-by: Doug Nazar Cc: Andreas Dilger Cc: Mark Fasheh Cc: Joel Becker Cc: Dave Kleikamp Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e1fd5d21248..cbfe127bccf8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -907,9 +907,9 @@ static inline struct file *get_file(struct file *f) /* Page cache limit. The filesystems should put that into their s_maxbytes limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 -#define MAX_LFS_FILESIZE (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) +#define MAX_LFS_FILESIZE ((loff_t)ULONG_MAX << PAGE_SHIFT) #elif BITS_PER_LONG==64 -#define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL) +#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX) #endif #define FL_POSIX 1 -- cgit From 3510ca20ece0150af6b10c77a74ff1b5c198e3e2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Aug 2017 13:55:12 -0700 Subject: Minor page waitqueue cleanups Tim Chen and Kan Liang have been battling a customer load that shows extremely long page wakeup lists. The cause seems to be constant NUMA migration of a hot page that is shared across a lot of threads, but the actual root cause for the exact behavior has not been found. Tim has a patch that batches the wait list traversal at wakeup time, so that we at least don't get long uninterruptible cases where we traverse and wake up thousands of processes and get nasty latency spikes. That is likely 4.14 material, but we're still discussing the page waitqueue specific parts of it. In the meantime, I've tried to look at making the page wait queues less expensive, and failing miserably. If you have thousands of threads waiting for the same page, it will be painful. We'll need to try to figure out the NUMA balancing issue some day, in addition to avoiding the excessive spinlock hold times. That said, having tried to rewrite the page wait queues, I can at least fix up some of the braindamage in the current situation. In particular: (a) we don't want to continue walking the page wait list if the bit we're waiting for already got set again (which seems to be one of the patterns of the bad load). That makes no progress and just causes pointless cache pollution chasing the pointers. (b) we don't want to put the non-locking waiters always on the front of the queue, and the locking waiters always on the back. Not only is that unfair, it means that we wake up thousands of reading threads that will just end up being blocked by the writer later anyway. Also add a comment about the layout of 'struct wait_page_key' - there is an external user of it in the cachefiles code that means that it has to match the layout of 'struct wait_bit_key' in the two first members. It so happens to match, because 'struct page *' and 'unsigned long *' end up having the same values simply because the page flags are the first member in struct page. Cc: Tim Chen Cc: Kan Liang Cc: Mel Gorman Cc: Christopher Lameter Cc: Andi Kleen Cc: Davidlohr Bueso Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- kernel/sched/wait.c | 7 ++++--- mm/filemap.c | 11 ++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 17f11c6b0a9f..d6afed6d0752 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -70,9 +70,10 @@ static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, list_for_each_entry_safe(curr, next, &wq_head->head, entry) { unsigned flags = curr->flags; - - if (curr->func(curr, mode, wake_flags, key) && - (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) + int ret = curr->func(curr, mode, wake_flags, key); + if (ret < 0) + break; + if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) break; } } diff --git a/mm/filemap.c b/mm/filemap.c index a49702445ce0..baba290c276b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -885,6 +885,7 @@ void __init pagecache_init(void) page_writeback_init(); } +/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */ struct wait_page_key { struct page *page; int bit_nr; @@ -909,8 +910,10 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, if (wait_page->bit_nr != key->bit_nr) return 0; + + /* Stop walking if it's locked */ if (test_bit(key->bit_nr, &key->page->flags)) - return 0; + return -1; return autoremove_wake_function(wait, mode, sync, key); } @@ -964,6 +967,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, int ret = 0; init_wait(wait); + wait->flags = lock ? WQ_FLAG_EXCLUSIVE : 0; wait->func = wake_page_function; wait_page.page = page; wait_page.bit_nr = bit_nr; @@ -972,10 +976,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, spin_lock_irq(&q->lock); if (likely(list_empty(&wait->entry))) { - if (lock) - __add_wait_queue_entry_tail_exclusive(q, wait); - else - __add_wait_queue(q, wait); + __add_wait_queue_entry_tail(q, wait); SetPageWaiters(page); } -- cgit From a8b169afbf06a678437632709caac98e16f99263 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Aug 2017 16:25:09 -0700 Subject: Avoid page waitqueue race leaving possible page locker waiting The "lock_page_killable()" function waits for exclusive access to the page lock bit using the WQ_FLAG_EXCLUSIVE bit in the waitqueue entry set. That means that if it gets woken up, other waiters may have been skipped. That, in turn, means that if it sees the page being unlocked, it *must* take that lock and return success, even if a lethal signal is also pending. So instead of checking for lethal signals first, we need to check for them after we've checked the actual bit that we were waiting for. Even if that might then delay the killing of the process. This matches the order of the old "wait_on_bit_lock()" infrastructure that the page locking used to use (and is still used in a few other areas). Note that if we still return an error after having unsuccessfully tried to acquire the page lock, that is ok: that means that some other thread was able to get ahead of us and lock the page, and when that other thread then unlocks the page, the wakeup event will be repeated. So any other pending waiters will now get properly woken up. Fixes: 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Cc: Nick Piggin Cc: Peter Zijlstra Cc: Mel Gorman Cc: Jan Kara Cc: Davidlohr Bueso Cc: Andi Kleen Signed-off-by: Linus Torvalds --- mm/filemap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index baba290c276b..0b41c8cbeabc 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -986,10 +986,6 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, if (likely(test_bit(bit_nr, &page->flags))) { io_schedule(); - if (unlikely(signal_pending_state(state, current))) { - ret = -EINTR; - break; - } } if (lock) { @@ -999,6 +995,11 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, if (!test_bit(bit_nr, &page->flags)) break; } + + if (unlikely(signal_pending_state(state, current))) { + ret = -EINTR; + break; + } } finish_wait(q, wait); -- cgit From cc4a41fe5541a73019a864883297bd5043aa6d98 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Aug 2017 17:20:40 -0700 Subject: Linux 4.13-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dda88e744d5f..8db6be7dca73 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit